Skip to content

Add "/server_info" endpoint in api_server to retrieve the vllm_config.  #16572

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Apr 15, 2025
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions vllm/engine/async_llm_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -1171,6 +1171,10 @@ async def get_model_config(self) -> ModelConfig:
"""Get the model configuration of the vLLM engine."""
return self.engine.get_model_config()

async def get_vllm_config(self) -> VllmConfig:
"""Get the vllm configuration of the vLLM engine."""
return self.engine.get_vllm_config()

async def get_parallel_config(self) -> ParallelConfig:
"""Get the parallel configuration of the vLLM engine."""
return self.engine.get_parallel_config()
Expand Down
4 changes: 4 additions & 0 deletions vllm/engine/llm_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -918,6 +918,10 @@ def get_model_config(self) -> ModelConfig:
"""Gets the model configuration."""
return self.model_config

def get_vllm_config(self) -> VllmConfig:
"""Gets the vllm configuration."""
return self.vllm_config

def get_parallel_config(self) -> ParallelConfig:
"""Gets the parallel configuration."""
return self.parallel_config
Expand Down
4 changes: 4 additions & 0 deletions vllm/engine/multiprocessing/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ def __init__(self, ipc_path: str, engine_config: VllmConfig,
self._errored_with: Optional[BaseException] = None

# Get the configs.
self.vllm_config = engine_config
self.model_config = engine_config.model_config
self.decoding_config = engine_config.decoding_config

Expand Down Expand Up @@ -383,6 +384,9 @@ async def get_decoding_config(self) -> DecodingConfig:
async def get_model_config(self) -> ModelConfig:
return self.model_config

async def get_vllm_config(self) -> VllmConfig:
return self.vllm_config

async def is_tracing_enabled(self) -> bool:
return self.tracing_flag

Expand Down
16 changes: 12 additions & 4 deletions vllm/entrypoints/openai/api_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
from typing_extensions import assert_never

import vllm.envs as envs
from vllm.config import ModelConfig
from vllm.config import ModelConfig, VllmConfig
from vllm.engine.arg_utils import AsyncEngineArgs
from vllm.engine.async_llm_engine import AsyncLLMEngine # type: ignore
from vllm.engine.multiprocessing.client import MQLLMEngineClient
Expand Down Expand Up @@ -327,6 +327,7 @@
"/load",
"/ping",
"/version",
"/server_info",
],
registry=registry,
).add().instrument(app).expose(app)
Expand Down Expand Up @@ -728,6 +729,11 @@
is_sleeping = await engine_client(raw_request).is_sleeping()
return JSONResponse(content={"is_sleeping": is_sleeping})

@router.get("/server_info")
async def show_server_info(raw_request: Request):
server_info = {"vllm_config": str(raw_request.app.state.vllm_config)}
return JSONResponse(content=server_info)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Place this at the top of the block since it's more "basic"?



@router.post("/invocations", dependencies=[Depends(validate_json_request)])
async def invocations(raw_request: Request):
Expand Down Expand Up @@ -894,7 +900,7 @@

async def init_app_state(
engine_client: EngineClient,
model_config: ModelConfig,
vllm_config: VllmConfig,
state: State,
args: Namespace,
) -> None:
Expand All @@ -915,6 +921,8 @@

state.engine_client = engine_client
state.log_stats = not args.disable_log_stats
state.vllm_config = vllm_config
model_config = vllm_config.model_config

resolved_chat_template = load_chat_template(args.chat_template)
if resolved_chat_template is not None:
Expand Down Expand Up @@ -1069,8 +1077,8 @@
async with build_async_engine_client(args) as engine_client:
app = build_app(args)

model_config = await engine_client.get_model_config()
await init_app_state(engine_client, model_config, app.state, args)
vllm_config = await engine_client.get_vllm_config()

Check failure on line 1080 in vllm/entrypoints/openai/api_server.py

View workflow job for this annotation

GitHub Actions / pre-commit

"EngineClient" has no attribute "get_vllm_config"; maybe "get_model_config"? [attr-defined]

Check failure on line 1080 in vllm/entrypoints/openai/api_server.py

View workflow job for this annotation

GitHub Actions / pre-commit

"EngineClient" has no attribute "get_vllm_config"; maybe "get_model_config"? [attr-defined]

Check failure on line 1080 in vllm/entrypoints/openai/api_server.py

View workflow job for this annotation

GitHub Actions / pre-commit

"EngineClient" has no attribute "get_vllm_config"; maybe "get_model_config"? [attr-defined]

Check failure on line 1080 in vllm/entrypoints/openai/api_server.py

View workflow job for this annotation

GitHub Actions / pre-commit

"EngineClient" has no attribute "get_vllm_config"; maybe "get_model_config"? [attr-defined]

Check failure on line 1080 in vllm/entrypoints/openai/api_server.py

View workflow job for this annotation

GitHub Actions / pre-commit

"EngineClient" has no attribute "get_vllm_config"; maybe "get_model_config"? [attr-defined]

Check failure on line 1080 in vllm/entrypoints/openai/api_server.py

View workflow job for this annotation

GitHub Actions / pre-commit

"EngineClient" has no attribute "get_vllm_config"; maybe "get_model_config"? [attr-defined]

Check failure on line 1080 in vllm/entrypoints/openai/api_server.py

View workflow job for this annotation

GitHub Actions / pre-commit

"EngineClient" has no attribute "get_vllm_config"; maybe "get_model_config"? [attr-defined]

Check failure on line 1080 in vllm/entrypoints/openai/api_server.py

View workflow job for this annotation

GitHub Actions / pre-commit

"EngineClient" has no attribute "get_vllm_config"; maybe "get_model_config"? [attr-defined]

Check failure on line 1080 in vllm/entrypoints/openai/api_server.py

View workflow job for this annotation

GitHub Actions / pre-commit

"EngineClient" has no attribute "get_vllm_config"; maybe "get_model_config"? [attr-defined]

Check failure on line 1080 in vllm/entrypoints/openai/api_server.py

View workflow job for this annotation

GitHub Actions / pre-commit

"EngineClient" has no attribute "get_vllm_config"; maybe "get_model_config"? [attr-defined]
await init_app_state(engine_client, vllm_config, app.state, args)

def _listen_addr(a: str) -> str:
if is_valid_ipv6_address(a):
Expand Down
5 changes: 4 additions & 1 deletion vllm/v1/engine/async_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def __init__(
assert start_engine_loop

self.model_config = vllm_config.model_config

self.vllm_config = vllm_config
self.log_requests = log_requests
self.log_stats = log_stats

Expand Down Expand Up @@ -379,6 +379,9 @@ def encode(
):
raise ValueError("Not Supported on V1 yet.")

async def get_vllm_config(self) -> VllmConfig:
return self.vllm_config

async def get_model_config(self) -> ModelConfig:
return self.model_config

Expand Down