Skip to content

Commit ef18f5f

Browse files
sfc-gh-zhwangYuqi Zhang
authored andcommitted
[Misc] Change buckets of histogram_iteration_tokens to [1, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8096] to represent number of tokens (vllm-project#17033)
Signed-off-by: sfc-gh-zhwang <[email protected]> Signed-off-by: Yuqi Zhang <[email protected]>
1 parent 22b7d2d commit ef18f5f

File tree

2 files changed

+7
-17
lines changed

2 files changed

+7
-17
lines changed

vllm/engine/metrics.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -140,16 +140,13 @@ def __init__(self, labelnames: List[str], vllm_config: VllmConfig):
140140
name="vllm:generation_tokens_total",
141141
documentation="Number of generation tokens processed.",
142142
labelnames=labelnames)
143-
buckets = [1, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8096]
144-
if not vllm_config.model_config.enforce_eager:
145-
buckets = vllm_config.compilation_config.\
146-
cudagraph_capture_sizes.copy()
147-
buckets.sort()
148143
self.histogram_iteration_tokens = self._histogram_cls(
149144
name="vllm:iteration_tokens_total",
150145
documentation="Histogram of number of tokens per engine_step.",
151146
labelnames=labelnames,
152-
buckets=buckets)
147+
buckets=[
148+
1, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8096, 16192
149+
])
153150
self.histogram_time_to_first_token = self._histogram_cls(
154151
name="vllm:time_to_first_token_seconds",
155152
documentation="Histogram of time to first token in seconds.",

vllm/v1/metrics/loggers.py

Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -232,7 +232,10 @@ def __init__(self, vllm_config: VllmConfig, engine_index: int = 0):
232232
prometheus_client.Histogram(
233233
name="vllm:iteration_tokens_total",
234234
documentation="Histogram of number of tokens per engine_step.",
235-
buckets=build_cudagraph_buckets(vllm_config),
235+
buckets=[
236+
1, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8096,
237+
16192
238+
],
236239
labelnames=labelnames).labels(*labelvalues)
237240

238241
self.histogram_max_num_generation_tokens_request = \
@@ -467,16 +470,6 @@ def build_1_2_5_buckets(max_value: int) -> list[int]:
467470
return build_buckets([1, 2, 5], max_value)
468471

469472

470-
def build_cudagraph_buckets(vllm_config: VllmConfig) -> list[int]:
471-
if not vllm_config.model_config.enforce_eager:
472-
buckets = vllm_config.compilation_config.\
473-
cudagraph_capture_sizes.copy()
474-
buckets.sort()
475-
return buckets
476-
else:
477-
return [1, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8096]
478-
479-
480473
def setup_default_loggers(
481474
vllm_config: VllmConfig,
482475
log_stats: bool,

0 commit comments

Comments
 (0)