Skip to content

Commit 4de3cfe

Browse files
CatherineSuetarinkk
authored andcommitted
Support max_completion_tokens for OpenAIChatCompletions (sgl-project#5857)
1 parent f00656b commit 4de3cfe

File tree

2 files changed

+11
-2
lines changed

2 files changed

+11
-2
lines changed

python/sglang/srt/openai_api/adapter.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1099,7 +1099,7 @@ def v1_chat_generate_request(
10991099

11001100
sampling_params = {
11011101
"temperature": request.temperature,
1102-
"max_new_tokens": request.max_tokens,
1102+
"max_new_tokens": request.max_tokens or request.max_completion_tokens,
11031103
"min_new_tokens": request.min_tokens,
11041104
"stop": stop,
11051105
"stop_token_ids": request.stop_token_ids,

python/sglang/srt/openai_api/protocol.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -320,7 +320,16 @@ class ChatCompletionRequest(BaseModel):
320320
logit_bias: Optional[Dict[str, float]] = None
321321
logprobs: bool = False
322322
top_logprobs: Optional[int] = None
323-
max_tokens: Optional[int] = None
323+
max_tokens: Optional[int] = Field(
324+
default=None,
325+
deprecated="max_tokens is deprecated in favor of the max_completion_tokens field",
326+
description="The maximum number of tokens that can be generated in the chat completion. ",
327+
)
328+
max_completion_tokens: Optional[int] = Field(
329+
default=None,
330+
description="The maximum number of completion tokens for a chat completion request, "
331+
"including visible output tokens and reasoning tokens. Input tokens are not included. ",
332+
)
324333
n: int = 1
325334
presence_penalty: float = 0.0
326335
response_format: Optional[Union[ResponseFormat, StructuralTagResponseFormat]] = None

0 commit comments

Comments
 (0)