Skip to content

Commit 54e327a

Browse files
russellbdbyoung18
authored andcommitted
[V1] Remove log noise when idle (vllm-project#16735)
Signed-off-by: Russell Bryant <[email protected]>
1 parent 5608de3 commit 54e327a

File tree

2 files changed

+15
-4
lines changed

2 files changed

+15
-4
lines changed

vllm/v1/metrics/loggers.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,8 @@ def __init__(self, engine_index: int = 0):
4040
# TODO: Make the interval configurable.
4141
self.prefix_caching_metrics = PrefixCachingMetrics()
4242
self.spec_decoding_metrics = SpecDecodingMetrics()
43+
self.last_prompt_throughput: float = 0.0
44+
self.last_generation_throughput: float = 0.0
4345

4446
def _reset(self, now):
4547
self.last_log_time = now
@@ -83,8 +85,17 @@ def log(self):
8385

8486
scheduler_stats = self.last_scheduler_stats
8587

88+
log_fn = logger.info
89+
if not any(
90+
(prompt_throughput, generation_throughput,
91+
self.last_prompt_throughput, self.last_generation_throughput)):
92+
# Avoid log noise on an idle production system
93+
log_fn = logger.debug
94+
self.last_generation_throughput = generation_throughput
95+
self.last_prompt_throughput = prompt_throughput
96+
8697
# Format and print output.
87-
logger.info(
98+
log_fn(
8899
"Engine %03d: "
89100
"Avg prompt throughput: %.1f tokens/s, "
90101
"Avg generation throughput: %.1f tokens/s, "
@@ -101,7 +112,7 @@ def log(self):
101112
)
102113

103114
if scheduler_stats.spec_decoding_stats is not None:
104-
self.spec_decoding_metrics.log()
115+
self.spec_decoding_metrics.log(log_fn=log_fn)
105116

106117

107118
class PrometheusStatLogger(StatLoggerBase):

vllm/v1/spec_decode/metrics.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,14 +43,14 @@ def observe(self, spec_decoding_stats: SpecDecodingStats):
4343
self.num_accepted_tokens.append(
4444
spec_decoding_stats.num_accepted_tokens)
4545

46-
def log(self):
46+
def log(self, log_fn=logger.info):
4747
num_draft_tokens = np.sum(self.num_draft_tokens)
4848
num_accepted_tokens = np.sum(self.num_accepted_tokens)
4949

5050
draft_acceptance_rate = (num_accepted_tokens / num_draft_tokens *
5151
100 if num_draft_tokens > 0 else float("nan"))
5252

53-
logger.info(
53+
log_fn(
5454
"SpecDecoding metrics: "
5555
"Draft acceptance rate: %.1f%%, "
5656
"Accepted: %d tokens, "

0 commit comments

Comments
 (0)