@@ -40,6 +40,8 @@ def __init__(self, engine_index: int = 0):
40
40
# TODO: Make the interval configurable.
41
41
self .prefix_caching_metrics = PrefixCachingMetrics ()
42
42
self .spec_decoding_metrics = SpecDecodingMetrics ()
43
+ self .last_prompt_throughput : float = 0.0
44
+ self .last_generation_throughput : float = 0.0
43
45
44
46
def _reset (self , now ):
45
47
self .last_log_time = now
@@ -83,8 +85,17 @@ def log(self):
83
85
84
86
scheduler_stats = self .last_scheduler_stats
85
87
88
+ log_fn = logger .info
89
+ if not any (
90
+ (prompt_throughput , generation_throughput ,
91
+ self .last_prompt_throughput , self .last_generation_throughput )):
92
+ # Avoid log noise on an idle production system
93
+ log_fn = logger .debug
94
+ self .last_generation_throughput = generation_throughput
95
+ self .last_prompt_throughput = prompt_throughput
96
+
86
97
# Format and print output.
87
- logger . info (
98
+ log_fn (
88
99
"Engine %03d: "
89
100
"Avg prompt throughput: %.1f tokens/s, "
90
101
"Avg generation throughput: %.1f tokens/s, "
@@ -101,7 +112,7 @@ def log(self):
101
112
)
102
113
103
114
if scheduler_stats .spec_decoding_stats is not None :
104
- self .spec_decoding_metrics .log ()
115
+ self .spec_decoding_metrics .log (log_fn = log_fn )
105
116
106
117
107
118
class PrometheusStatLogger (StatLoggerBase ):
0 commit comments