Skip to content

Unclock mutual exclusivity of argument: tool-call-parser and reasoning-parser #3550

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
May 19, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions lmdeploy/cli/serve.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,9 +138,8 @@ def add_parser_api_server():
ArgumentHelper.chat_template(parser)

# parsers
parser_group = parser.add_mutually_exclusive_group()
ArgumentHelper.tool_call_parser(parser_group)
ArgumentHelper.reasoning_parser(parser_group)
ArgumentHelper.tool_call_parser(parser)
ArgumentHelper.reasoning_parser(parser)

# model args
ArgumentHelper.revision(parser)
Expand Down
12 changes: 6 additions & 6 deletions lmdeploy/serve/openai/api_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -490,7 +490,9 @@ async def completion_stream_generator() -> AsyncGenerator[str, None]:
streaming_tools = True
previous_text = current_text
previous_token_ids = current_token_ids
elif VariableInterface.reasoning_parser is not None:
elif request.tool_choice != 'none' and request.tools is not None and VariableInterface.tool_parser is None:
logger.error('Please lanuch the api_server with --tool-call-parser if you want to use tool.')
if VariableInterface.reasoning_parser is not None:
current_text = current_text + res.response
delta_token_ids = res.token_ids if res.token_ids is not None else []
current_token_ids = current_token_ids + delta_token_ids
Expand All @@ -506,8 +508,6 @@ async def completion_stream_generator() -> AsyncGenerator[str, None]:
delta_message.content = reasoning_delta.content
previous_text = current_text
previous_token_ids = current_token_ids
elif request.tool_choice != 'none' and request.tools is not None and VariableInterface.tool_parser is None:
logger.error('Please lanuch the api_server with --tool-call-parser if you want to use tool.')
response_json = create_stream_response_json(index=0,
delta_message=delta_message,
finish_reason=res.finish_reason,
Expand Down Expand Up @@ -557,12 +557,12 @@ async def completion_stream_generator() -> AsyncGenerator[str, None]:
except Exception as e:
logger.error(f'Failed to parse {text}. Exception: {e}.')
return create_error_response(HTTPStatus.BAD_REQUEST, 'Failed to parse fc related info to json format!')
# assume reasoning uncompatible with tool call
elif VariableInterface.reasoning_parser is not None:
reasoning_content, text = VariableInterface.reasoning_parser.extract_reasoning_content(text, request)
elif request.tool_choice != 'none' and request.tools is not None and VariableInterface.tool_parser is None:
logger.error('Please lanuch the api_server with --tool-call-parser if you want to use tool.')

if VariableInterface.reasoning_parser is not None:
reasoning_content, text = VariableInterface.reasoning_parser.extract_reasoning_content(text, request)

logprobs = None
if gen_logprobs and len(final_logprobs):
logprobs = _create_chat_completion_logprobs(VariableInterface.async_engine.tokenizer, final_token_ids,
Expand Down