Skip to content

Commit 74d126a

Browse files
Cangxihuilk-chen
authored andcommitted
Add "/server_info" endpoint in api_server to retrieve the vllm_config.  (vllm-project#16572)
Signed-off-by: Xihui Cang <[email protected]>
1 parent a91aed8 commit 74d126a

File tree

7 files changed

+37
-6
lines changed

7 files changed

+37
-6
lines changed

vllm/engine/async_llm_engine.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1167,6 +1167,10 @@ def _abort(self, request_id: str) -> None:
11671167
exception=asyncio.CancelledError,
11681168
verbose=self.log_requests)
11691169

1170+
async def get_vllm_config(self) -> VllmConfig:
1171+
"""Get the vllm configuration of the vLLM engine."""
1172+
return self.engine.get_vllm_config()
1173+
11701174
async def get_model_config(self) -> ModelConfig:
11711175
"""Get the model configuration of the vLLM engine."""
11721176
return self.engine.get_model_config()

vllm/engine/llm_engine.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -914,6 +914,10 @@ def abort_request(self, request_id: Union[str, Iterable[str]]) -> None:
914914
scheduler.abort_seq_group(
915915
request_id, seq_id_to_seq_group=self.seq_id_to_seq_group)
916916

917+
def get_vllm_config(self) -> VllmConfig:
918+
"""Gets the vllm configuration."""
919+
return self.vllm_config
920+
917921
def get_model_config(self) -> ModelConfig:
918922
"""Gets the model configuration."""
919923
return self.model_config

vllm/engine/multiprocessing/client.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@ def __init__(self, ipc_path: str, engine_config: VllmConfig,
9393
self._errored_with: Optional[BaseException] = None
9494

9595
# Get the configs.
96+
self.vllm_config = engine_config
9697
self.model_config = engine_config.model_config
9798
self.decoding_config = engine_config.decoding_config
9899

@@ -377,6 +378,9 @@ async def get_input_preprocessor(self) -> InputPreprocessor:
377378
async def get_tokenizer(self, lora_request: Optional[LoRARequest] = None):
378379
return await self.tokenizer.get_lora_tokenizer_async(lora_request)
379380

381+
async def get_vllm_config(self) -> VllmConfig:
382+
return self.vllm_config
383+
380384
async def get_decoding_config(self) -> DecodingConfig:
381385
return self.decoding_config
382386

vllm/engine/protocol.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from typing import AsyncGenerator, List, Mapping, Optional
66

77
from vllm.beam_search import BeamSearchSequence, create_sort_beams_key_function
8-
from vllm.config import DecodingConfig, ModelConfig
8+
from vllm.config import DecodingConfig, ModelConfig, VllmConfig
99
from vllm.core.scheduler import SchedulerOutputs
1010
from vllm.inputs.data import PromptType, TokensPrompt
1111
from vllm.inputs.parse import is_explicit_encoder_decoder_prompt
@@ -220,6 +220,11 @@ async def abort(self, request_id: str) -> None:
220220
"""
221221
...
222222

223+
@abstractmethod
224+
async def get_vllm_config(self) -> VllmConfig:
225+
"""Get the vllm configuration of the vLLM engine."""
226+
...
227+
223228
@abstractmethod
224229
async def get_model_config(self) -> ModelConfig:
225230
"""Get the model configuration of the vLLM engine."""

vllm/entrypoints/openai/api_server.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
from typing_extensions import assert_never
3131

3232
import vllm.envs as envs
33-
from vllm.config import ModelConfig
33+
from vllm.config import VllmConfig
3434
from vllm.engine.arg_utils import AsyncEngineArgs
3535
from vllm.engine.async_llm_engine import AsyncLLMEngine # type: ignore
3636
from vllm.engine.multiprocessing.client import MQLLMEngineClient
@@ -327,6 +327,7 @@ def mount_metrics(app: FastAPI):
327327
"/load",
328328
"/ping",
329329
"/version",
330+
"/server_info",
330331
],
331332
registry=registry,
332333
).add().instrument(app).expose(app)
@@ -687,6 +688,11 @@ async def do_rerank_v2(request: RerankRequest, raw_request: Request):
687688

688689
if envs.VLLM_SERVER_DEV_MODE:
689690

691+
@router.get("/server_info")
692+
async def show_server_info(raw_request: Request):
693+
server_info = {"vllm_config": str(raw_request.app.state.vllm_config)}
694+
return JSONResponse(content=server_info)
695+
690696
@router.post("/reset_prefix_cache")
691697
async def reset_prefix_cache(raw_request: Request):
692698
"""
@@ -894,7 +900,7 @@ async def log_response(request: Request, call_next):
894900

895901
async def init_app_state(
896902
engine_client: EngineClient,
897-
model_config: ModelConfig,
903+
vllm_config: VllmConfig,
898904
state: State,
899905
args: Namespace,
900906
) -> None:
@@ -915,6 +921,8 @@ async def init_app_state(
915921

916922
state.engine_client = engine_client
917923
state.log_stats = not args.disable_log_stats
924+
state.vllm_config = vllm_config
925+
model_config = vllm_config.model_config
918926

919927
resolved_chat_template = load_chat_template(args.chat_template)
920928
if resolved_chat_template is not None:
@@ -1069,8 +1077,8 @@ def signal_handler(*_) -> None:
10691077
async with build_async_engine_client(args) as engine_client:
10701078
app = build_app(args)
10711079

1072-
model_config = await engine_client.get_model_config()
1073-
await init_app_state(engine_client, model_config, app.state, args)
1080+
vllm_config = await engine_client.get_vllm_config()
1081+
await init_app_state(engine_client, vllm_config, app.state, args)
10741082

10751083
def _listen_addr(a: str) -> str:
10761084
if is_valid_ipv6_address(a):

vllm/v1/engine/async_llm.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ def __init__(
6464
assert start_engine_loop
6565

6666
self.model_config = vllm_config.model_config
67-
67+
self.vllm_config = vllm_config
6868
self.log_requests = log_requests
6969
self.log_stats = log_stats
7070

@@ -379,6 +379,9 @@ def encode(
379379
):
380380
raise ValueError("Not Supported on V1 yet.")
381381

382+
async def get_vllm_config(self) -> VllmConfig:
383+
return self.vllm_config
384+
382385
async def get_model_config(self) -> ModelConfig:
383386
return self.model_config
384387

vllm/v1/engine/llm_engine.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,9 @@ def step(self) -> list[RequestOutput]:
230230

231231
return processed_outputs.request_outputs
232232

233+
def get_vllm_config(self):
234+
return self.vllm_config
235+
233236
def get_model_config(self):
234237
return self.model_config
235238

0 commit comments

Comments
 (0)