10
10
parse_postfix_tokens , tokens_to_postfix ,
11
11
validate_tokens )
12
12
13
- RESERVED_CHARACTERS = frozenset ({'*' , '|' , '(' , ')' , '?' , ' ' , '\t ' , '&' , '+' , '.' })
13
+ RESERVED_CHARACTERS = frozenset ({'*' , '|' , '(' , ')' , '?' , ' ' , '\t ' , '&' , '+' , '.' , '^' })
14
14
15
15
16
16
class NFARegexBuilder :
@@ -44,7 +44,7 @@ def from_string_literal(cls, literal):
44
44
end_states .add (start_state + 1 )
45
45
46
46
final_state = cls .__get_next_state_name ()
47
- transitions [final_state ] = dict ()
47
+ transitions [final_state ] = {}
48
48
49
49
return cls (
50
50
transitions = transitions ,
@@ -63,7 +63,7 @@ def wildcard(cls, input_symbols):
63
63
64
64
transitions = {
65
65
initial_state : {symbol : {final_state } for symbol in input_symbols },
66
- final_state : dict ()
66
+ final_state : {}
67
67
}
68
68
69
69
return cls (
@@ -93,25 +93,24 @@ def intersection(self, other):
93
93
Apply the intersection operation to the NFA represented by this builder and other.
94
94
Use BFS to only traverse reachable part (keeps number of states down).
95
95
"""
96
- new_state_name_dict = dict ()
96
+ new_state_name_dict = {}
97
97
98
98
def get_state_name (state_name ):
99
99
return new_state_name_dict .setdefault (state_name , self .__get_next_state_name ())
100
100
101
101
new_final_states = set ()
102
- new_transitions = dict ()
102
+ new_transitions = {}
103
103
new_initial_state = (self ._initial_state , other ._initial_state )
104
104
105
105
new_initial_state_name = get_state_name (new_initial_state )
106
- new_input_symbols = set (chain .from_iterable (
107
- transition_dict .keys ()
108
- for transition_dict in chain (self ._transitions .values (), other ._transitions .values ())
109
- )) - {'' }
106
+ new_input_symbols = tuple (set (chain .from_iterable (
107
+ map (dict .keys , chain (self ._transitions .values (), other ._transitions .values ()))
108
+ )) - {'' })
110
109
111
110
queue = deque ()
112
111
113
112
queue .append (new_initial_state )
114
- new_transitions [new_initial_state_name ] = dict ()
113
+ new_transitions [new_initial_state_name ] = {}
115
114
116
115
while queue :
117
116
curr_state = queue .popleft ()
@@ -129,9 +128,9 @@ def get_state_name(state_name):
129
128
# Add epsilon transitions for first set of transitions
130
129
epsilon_transitions_a = transitions_a .get ('' )
131
130
if epsilon_transitions_a is not None :
132
- state_dict = new_transitions .setdefault (curr_state_name , dict () )
131
+ state_dict = new_transitions .setdefault (curr_state_name , {} )
133
132
state_dict .setdefault ('' , set ()).update (
134
- get_state_name ( state ) for state in product (epsilon_transitions_a , [q_b ])
133
+ map ( get_state_name , product (epsilon_transitions_a , [q_b ]) )
135
134
)
136
135
next_states_iterables .append (product (epsilon_transitions_a , [q_b ]))
137
136
@@ -140,9 +139,9 @@ def get_state_name(state_name):
140
139
# Add epsilon transitions for second set of transitions
141
140
epsilon_transitions_b = transitions_b .get ('' )
142
141
if epsilon_transitions_b is not None :
143
- state_dict = new_transitions .setdefault (curr_state_name , dict () )
142
+ state_dict = new_transitions .setdefault (curr_state_name , {} )
144
143
state_dict .setdefault ('' , set ()).update (
145
- get_state_name ( state ) for state in product ([q_a ], epsilon_transitions_b )
144
+ map ( get_state_name , product ([q_a ], epsilon_transitions_b ) )
146
145
)
147
146
next_states_iterables .append (product ([q_a ], epsilon_transitions_b ))
148
147
@@ -152,17 +151,17 @@ def get_state_name(state_name):
152
151
end_states_b = transitions_b .get (symbol )
153
152
154
153
if end_states_a is not None and end_states_b is not None :
155
- state_dict = new_transitions .setdefault (curr_state_name , dict () )
154
+ state_dict = new_transitions .setdefault (curr_state_name , {} )
156
155
state_dict .setdefault (symbol , set ()).update (
157
- get_state_name ( state ) for state in product (end_states_a , end_states_b )
156
+ map ( get_state_name , product (end_states_a , end_states_b ) )
158
157
)
159
158
next_states_iterables .append (product (end_states_a , end_states_b ))
160
159
161
160
# Finally, try visiting every state we found.
162
161
for product_state in chain .from_iterable (next_states_iterables ):
163
162
product_state_name = get_state_name (product_state )
164
163
if product_state_name not in new_transitions :
165
- new_transitions [product_state_name ] = dict ()
164
+ new_transitions [product_state_name ] = {}
166
165
queue .append (product_state )
167
166
168
167
self ._final_states = new_final_states
@@ -216,6 +215,37 @@ def option(self):
216
215
self ._initial_state = new_initial_state
217
216
self ._final_states .add (new_initial_state )
218
217
218
+ def shuffle_product (self , other ):
219
+ """
220
+ Apply the shuffle operation to the NFA represented by this builder and other.
221
+ No need for BFS since all states are accessible.
222
+ """
223
+ new_state_name_dict = {}
224
+
225
+ def get_state_name (state_name ):
226
+ return new_state_name_dict .setdefault (state_name , self .__get_next_state_name ())
227
+
228
+ self ._initial_state = get_state_name ((self ._initial_state , other ._initial_state ))
229
+
230
+ new_transitions = {}
231
+
232
+ transition_product = product (self ._transitions .items (), other ._transitions .items ())
233
+ for (q_a , transitions_a ), (q_b , transitions_b ) in transition_product :
234
+ state_dict = new_transitions .setdefault (get_state_name ((q_a , q_b )), {})
235
+
236
+ for symbol , end_states in transitions_a .items ():
237
+ state_dict .setdefault (symbol , set ()).update (
238
+ map (get_state_name , product (end_states , [q_b ]))
239
+ )
240
+
241
+ for symbol , end_states in transitions_b .items ():
242
+ state_dict .setdefault (symbol , set ()).update (
243
+ map (get_state_name , product ([q_a ], end_states ))
244
+ )
245
+
246
+ self ._final_states = set (map (get_state_name , product (self ._final_states , other ._final_states )))
247
+ self ._transitions = new_transitions
248
+
219
249
@classmethod
220
250
def __get_next_state_name (cls ):
221
251
return next (cls ._state_name_counter )
@@ -243,6 +273,17 @@ def op(self, left, right):
243
273
return left
244
274
245
275
276
+ class ShuffleToken (InfixOperator ):
277
+ """Subclass of infix operator defining the shuffle operator."""
278
+
279
+ def get_precedence (self ):
280
+ return 1
281
+
282
+ def op (self , left , right ):
283
+ left .shuffle_product (right )
284
+ return left
285
+
286
+
246
287
class KleeneStarToken (PostfixOperator ):
247
288
"""Subclass of postfix operator defining the kleene star operator."""
248
289
@@ -340,6 +381,7 @@ def get_regex_lexer(input_symbols):
340
381
lexer .register_token (StringToken , r'[A-Za-z0-9]' )
341
382
lexer .register_token (UnionToken , r'\|' )
342
383
lexer .register_token (IntersectionToken , r'\&' )
384
+ lexer .register_token (ShuffleToken , r'\^' )
343
385
lexer .register_token (KleeneStarToken , r'\*' )
344
386
lexer .register_token (KleenePlusToken , r'\+' )
345
387
lexer .register_token (OptionToken , r'\?' )
0 commit comments