format

sfc-gh-zhwang · sfc-gh-zhwang · commit 77f104738c55 · 2025-04-23T20:23:45.000-07:00
Signed-off-by: sfc-gh-zhwang &lt;flex.wang@snowflake.com&gt;
diff --git a/vllm/engine/metrics.py b/vllm/engine/metrics.py
@@ -140,16 +140,13 @@ def __init__(self, labelnames: List[str], vllm_config: VllmConfig):
             name="vllm:generation_tokens_total",
             documentation="Number of generation tokens processed.",
             labelnames=labelnames)
-        buckets = [1, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8096]
-        if not vllm_config.model_config.enforce_eager:
-            buckets = vllm_config.compilation_config.\
-                cudagraph_capture_sizes.copy()
-            buckets.sort()
         self.histogram_iteration_tokens = self._histogram_cls(
             name="vllm:iteration_tokens_total",
             documentation="Histogram of number of tokens per engine_step.",
             labelnames=labelnames,
-            buckets=buckets)
+            buckets=[
+                1, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8096, 16192
+            ])
         self.histogram_time_to_first_token = self._histogram_cls(
             name="vllm:time_to_first_token_seconds",
             documentation="Histogram of time to first token in seconds.",
diff --git a/vllm/v1/metrics/loggers.py b/vllm/v1/metrics/loggers.py
@@ -216,7 +216,10 @@ def __init__(self, vllm_config: VllmConfig, engine_index: int = 0):
             prometheus_client.Histogram(
                 name="vllm:iteration_tokens_total",
                 documentation="Histogram of number of tokens per engine_step.",
-                buckets=build_cudagraph_buckets(vllm_config),
+                buckets=[
+                    1, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8096,
+                    16192
+                ],
                 labelnames=labelnames).labels(*labelvalues)
 
         self.histogram_max_num_generation_tokens_request = \
@@ -470,12 +473,3 @@ def build_1_2_5_buckets(max_value: int) -> list[int]:
     """
     return build_buckets([1, 2, 5], max_value)
 
-
-def build_cudagraph_buckets(vllm_config: VllmConfig) -> list[int]:
-    if not vllm_config.model_config.enforce_eager:
-        buckets = vllm_config.compilation_config.\
-            cudagraph_capture_sizes.copy()
-        buckets.sort()
-        return buckets
-    else:
-        return [1, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8096]