Skip to content

Commit 77f1047

Browse files
committed
format
Signed-off-by: sfc-gh-zhwang <[email protected]>
1 parent ed2e464 commit 77f1047

File tree

2 files changed

+7
-16
lines changed

2 files changed

+7
-16
lines changed

vllm/engine/metrics.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -140,16 +140,13 @@ def __init__(self, labelnames: List[str], vllm_config: VllmConfig):
140140
name="vllm:generation_tokens_total",
141141
documentation="Number of generation tokens processed.",
142142
labelnames=labelnames)
143-
buckets = [1, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8096]
144-
if not vllm_config.model_config.enforce_eager:
145-
buckets = vllm_config.compilation_config.\
146-
cudagraph_capture_sizes.copy()
147-
buckets.sort()
148143
self.histogram_iteration_tokens = self._histogram_cls(
149144
name="vllm:iteration_tokens_total",
150145
documentation="Histogram of number of tokens per engine_step.",
151146
labelnames=labelnames,
152-
buckets=buckets)
147+
buckets=[
148+
1, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8096, 16192
149+
])
153150
self.histogram_time_to_first_token = self._histogram_cls(
154151
name="vllm:time_to_first_token_seconds",
155152
documentation="Histogram of time to first token in seconds.",

vllm/v1/metrics/loggers.py

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -216,7 +216,10 @@ def __init__(self, vllm_config: VllmConfig, engine_index: int = 0):
216216
prometheus_client.Histogram(
217217
name="vllm:iteration_tokens_total",
218218
documentation="Histogram of number of tokens per engine_step.",
219-
buckets=build_cudagraph_buckets(vllm_config),
219+
buckets=[
220+
1, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8096,
221+
16192
222+
],
220223
labelnames=labelnames).labels(*labelvalues)
221224

222225
self.histogram_max_num_generation_tokens_request = \
@@ -470,12 +473,3 @@ def build_1_2_5_buckets(max_value: int) -> list[int]:
470473
"""
471474
return build_buckets([1, 2, 5], max_value)
472475

473-
474-
def build_cudagraph_buckets(vllm_config: VllmConfig) -> list[int]:
475-
if not vllm_config.model_config.enforce_eager:
476-
buckets = vllm_config.compilation_config.\
477-
cudagraph_capture_sizes.copy()
478-
buckets.sort()
479-
return buckets
480-
else:
481-
return [1, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8096]

0 commit comments

Comments
 (0)