[V1] Remove log noise when idle (vllm-project#16735)

russellb · dbyoung18 · commit 54e327ad6ba3 · 2025-04-29T02:12:11.000Z
Signed-off-by: Russell Bryant &lt;rbryant@redhat.com&gt;
diff --git a/vllm/v1/metrics/loggers.py b/vllm/v1/metrics/loggers.py
@@ -40,6 +40,8 @@ def __init__(self, engine_index: int = 0):
         # TODO: Make the interval configurable.
         self.prefix_caching_metrics = PrefixCachingMetrics()
         self.spec_decoding_metrics = SpecDecodingMetrics()
+        self.last_prompt_throughput: float = 0.0
+        self.last_generation_throughput: float = 0.0
 
     def _reset(self, now):
         self.last_log_time = now
@@ -83,8 +85,17 @@ def log(self):
 
         scheduler_stats = self.last_scheduler_stats
 
+        log_fn = logger.info
+        if not any(
+            (prompt_throughput, generation_throughput,
+             self.last_prompt_throughput, self.last_generation_throughput)):
+            # Avoid log noise on an idle production system
+            log_fn = logger.debug
+        self.last_generation_throughput = generation_throughput
+        self.last_prompt_throughput = prompt_throughput
+
         # Format and print output.
-        logger.info(
+        log_fn(
             "Engine %03d: "
             "Avg prompt throughput: %.1f tokens/s, "
             "Avg generation throughput: %.1f tokens/s, "
@@ -101,7 +112,7 @@ def log(self):
         )
 
         if scheduler_stats.spec_decoding_stats is not None:
-            self.spec_decoding_metrics.log()
+            self.spec_decoding_metrics.log(log_fn=log_fn)
 
 
 class PrometheusStatLogger(StatLoggerBase):
diff --git a/vllm/v1/spec_decode/metrics.py b/vllm/v1/spec_decode/metrics.py
@@ -43,14 +43,14 @@ def observe(self, spec_decoding_stats: SpecDecodingStats):
         self.num_accepted_tokens.append(
             spec_decoding_stats.num_accepted_tokens)
 
-    def log(self):
+    def log(self, log_fn=logger.info):
         num_draft_tokens = np.sum(self.num_draft_tokens)
         num_accepted_tokens = np.sum(self.num_accepted_tokens)
 
         draft_acceptance_rate = (num_accepted_tokens / num_draft_tokens *
                                  100 if num_draft_tokens > 0 else float("nan"))
 
-        logger.info(
+        log_fn(
             "SpecDecoding metrics: "
             "Draft acceptance rate: %.1f%%, "
             "Accepted: %d tokens, "