InternLM · lvhan028 · May 19, 2025 · May 14, 2025 · May 15, 2025 · May 19, 2025
diff --git a/lmdeploy/cli/serve.py b/lmdeploy/cli/serve.py
@@ -138,9 +138,8 @@ def add_parser_api_server():
         ArgumentHelper.chat_template(parser)
 
         # parsers
-        parser_group = parser.add_mutually_exclusive_group()
-        ArgumentHelper.tool_call_parser(parser_group)
-        ArgumentHelper.reasoning_parser(parser_group)
+        ArgumentHelper.tool_call_parser(parser)
+        ArgumentHelper.reasoning_parser(parser)
 
         # model args
         ArgumentHelper.revision(parser)

diff --git a/lmdeploy/serve/openai/api_server.py b/lmdeploy/serve/openai/api_server.py
@@ -490,7 +490,9 @@ async def completion_stream_generator() -> AsyncGenerator[str, None]:
                         streaming_tools = True
                 previous_text = current_text
                 previous_token_ids = current_token_ids
-            elif VariableInterface.reasoning_parser is not None:
+            elif request.tool_choice != 'none' and request.tools is not None and VariableInterface.tool_parser is None:
+                logger.error('Please lanuch the api_server with --tool-call-parser if you want to use tool.')
+            if VariableInterface.reasoning_parser is not None:
                 current_text = current_text + res.response
                 delta_token_ids = res.token_ids if res.token_ids is not None else []
                 current_token_ids = current_token_ids + delta_token_ids
@@ -506,8 +508,6 @@ async def completion_stream_generator() -> AsyncGenerator[str, None]:
                     delta_message.content = reasoning_delta.content
                 previous_text = current_text
                 previous_token_ids = current_token_ids
-            elif request.tool_choice != 'none' and request.tools is not None and VariableInterface.tool_parser is None:
-                logger.error('Please lanuch the api_server with --tool-call-parser if you want to use tool.')
             response_json = create_stream_response_json(index=0,
                                                         delta_message=delta_message,
                                                         finish_reason=res.finish_reason,
@@ -557,12 +557,12 @@ async def completion_stream_generator() -> AsyncGenerator[str, None]:
         except Exception as e:
             logger.error(f'Failed to parse {text}. Exception: {e}.')
             return create_error_response(HTTPStatus.BAD_REQUEST, 'Failed to parse fc related info to json format!')
-    # assume reasoning uncompatible with tool call
-    elif VariableInterface.reasoning_parser is not None:
-        reasoning_content, text = VariableInterface.reasoning_parser.extract_reasoning_content(text, request)
     elif request.tool_choice != 'none' and request.tools is not None and VariableInterface.tool_parser is None:
         logger.error('Please lanuch the api_server with --tool-call-parser if you want to use tool.')
 
+    if VariableInterface.reasoning_parser is not None:
+        reasoning_content, text = VariableInterface.reasoning_parser.extract_reasoning_content(text, request)
+
     logprobs = None
     if gen_logprobs and len(final_logprobs):
         logprobs = _create_chat_completion_logprobs(VariableInterface.async_engine.tokenizer, final_token_ids,