@@ -149,6 +149,7 @@ def _validate_structured_output(self, params: SamplingParams) -> None:
149
149
"xgrammar" , "xgrammar:disable-any-whitespace" , "guidance" ,
150
150
"guidance:disable-any-whitespace" , "auto"
151
151
]
152
+
152
153
engine_level_backend = self .decoding_config .guided_decoding_backend
153
154
if engine_level_backend not in supported_backends :
154
155
raise ValueError (f"Only { supported_backends } structured output is "
@@ -169,8 +170,15 @@ def _validate_structured_output(self, params: SamplingParams) -> None:
169
170
if engine_level_backend .startswith ("xgrammar" ):
170
171
# xgrammar with no fallback
171
172
validate_xgrammar_grammar (params )
172
- params .guided_decoding .backend = engine_level_backend
173
- elif engine_level_backend == "auto" :
173
+ elif engine_level_backend .startswith ("guidance" ):
174
+ # TODO: ideally we would have the LLTokenizer here as Lark syntax
175
+ # allows <|special_token|> and similar, see
176
+ # https://github.com/guidance-ai/llguidance/blob/main/docs/syntax.md#special-tokens
177
+ # Without tokenizer these are disallowed in grammars.
178
+ validate_guidance_grammar (params , tokenizer = None )
179
+ else :
180
+ # NOTE: engine_level_backend must be "auto" here, because we have
181
+ # checked supported_backends above.
174
182
# "auto" is an opt-in to opinionated behavior where we try to
175
183
# choose a backend based on request contents. This is not the
176
184
# default as it is less predictable and subject to change
@@ -183,14 +191,6 @@ def _validate_structured_output(self, params: SamplingParams) -> None:
183
191
# are not supported in xgrammar. Fall back to guidance.
184
192
params .guided_decoding .backend = "guidance"
185
193
186
- if engine_level_backend .startswith ("guidance" ):
187
- # TODO ideally we would have the LLTokenizer here as Lark syntax
188
- # allows <|special_token|> and similar, see
189
- # https://github.com/guidance-ai/llguidance/blob/main/docs/syntax.md#special-tokens
190
- # Without tokenizer these are disallowed in grammars.
191
- validate_guidance_grammar (params , tokenizer = None )
192
- params .guided_decoding .backend = engine_level_backend
193
-
194
194
def process_inputs (
195
195
self ,
196
196
request_id : str ,
0 commit comments