4
4
import logging
5
5
import re
6
6
from collections import OrderedDict , defaultdict
7
+ from typing import Tuple , Union
7
8
from urllib .parse import urlencode
8
9
9
10
import defusedxml .ElementTree as etree
@@ -76,22 +77,37 @@ def _fix_choices(config):
76
77
return config
77
78
78
79
79
- def parse_config_to_json (config_string ):
80
+ def parse_config_to_xml (config_string : Union [str , None ], raise_on_empty : bool = False ) -> Union [OrderedDict , None ]:
81
+ if config_string is None :
82
+ if raise_on_empty :
83
+ raise TypeError ('config_string is None' )
84
+ return None
85
+
86
+ xml = etree .fromstring (config_string , forbid_dtd = True )
87
+
88
+ # Remove comments
89
+ for comment in xml .findall ('.//comment' ):
90
+ comment .getparent ().remove (comment )
91
+
92
+ return xml
93
+
94
+
95
+ def parse_config_to_json (config_string : Union [str , None ]) -> Tuple [Union [OrderedDict , None ], Union [str , None ]]:
80
96
try :
81
- xml = etree . fromstring (config_string , forbid_dtd = False )
97
+ xml = parse_config_to_xml (config_string , raise_on_empty = True )
82
98
except TypeError :
83
99
raise etree .ParseError ('can only parse strings' )
84
100
if xml is None :
85
101
raise etree .ParseError ('xml is empty or incorrect' )
86
102
config = xmljson .badgerfish .data (xml )
87
103
config = _fix_choices (config )
88
- return config
104
+ return config , etree . tostring ( xml , encoding = 'unicode' )
89
105
90
106
91
- def validate_label_config (config_string ) :
107
+ def validate_label_config (config_string : Union [ str , None ]) -> None :
92
108
# xml and schema
93
109
try :
94
- config = parse_config_to_json (config_string )
110
+ config , cleaned_config_string = parse_config_to_json (config_string )
95
111
jsonschema .validate (config , _LABEL_CONFIG_SCHEMA_DATA )
96
112
except (etree .ParseError , ValueError ) as exc :
97
113
raise LabelStudioValidationErrorSentryIgnored (str (exc ))
@@ -106,13 +122,13 @@ def validate_label_config(config_string):
106
122
raise LabelStudioValidationErrorSentryIgnored (error_message )
107
123
108
124
# unique names in config # FIXME: 'name =' (with spaces) won't work
109
- all_names = re .findall (r'name="([^"]*)"' , config_string )
125
+ all_names = re .findall (r'name="([^"]*)"' , cleaned_config_string )
110
126
if len (set (all_names )) != len (all_names ):
111
127
raise LabelStudioValidationErrorSentryIgnored ('Label config contains non-unique names' )
112
128
113
129
# toName points to existent name
114
130
names = set (all_names )
115
- toNames = re .findall (r'toName="([^"]*)"' , config_string )
131
+ toNames = re .findall (r'toName="([^"]*)"' , cleaned_config_string )
116
132
for toName_ in toNames :
117
133
for toName in toName_ .split (',' ):
118
134
if toName not in names :
@@ -121,7 +137,7 @@ def validate_label_config(config_string):
121
137
122
138
def extract_data_types (label_config ):
123
139
# load config
124
- xml = etree . fromstring (label_config , forbid_dtd = False )
140
+ xml = parse_config_to_xml (label_config )
125
141
if xml is None :
126
142
raise etree .ParseError ('Project config is empty or incorrect' )
127
143
@@ -185,16 +201,11 @@ def get_all_object_tag_names(label_config):
185
201
186
202
187
203
def config_line_stipped (c ):
188
- tree = etree .fromstring (c , forbid_dtd = False )
189
- comments = tree .xpath ('//comment()' )
190
-
191
- for c in comments :
192
- p = c .getparent ()
193
- if p is not None :
194
- p .remove (c )
195
- c = etree .tostring (tree , method = 'html' ).decode ('utf-8' )
204
+ xml = parse_config_to_xml (c )
205
+ if xml is None :
206
+ return None
196
207
197
- return c .replace ('\n ' , '' ).replace ('\r ' , '' )
208
+ return etree . tostring ( xml , encoding = 'unicode' ) .replace ('\n ' , '' ).replace ('\r ' , '' )
198
209
199
210
200
211
def get_task_from_labeling_config (config ):
@@ -243,7 +254,7 @@ def data_examples(mode):
243
254
def generate_sample_task_without_check (label_config , mode = 'upload' , secure_mode = False ):
244
255
"""Generate sample task only"""
245
256
# load config
246
- xml = etree . fromstring (label_config , forbid_dtd = False )
257
+ xml = parse_config_to_xml (label_config )
247
258
if xml is None :
248
259
raise etree .ParseError ('Project config is empty or incorrect' )
249
260
0 commit comments