fix: fix typo of comments in w8a8_fp8.py (sgl-project#4843)

ZhuJiaqi9905 · jimoosciuc · commit 7cf3a56f7ee8 · 2025-04-17T11:58:04.000+08:00
diff --git a/python/sglang/srt/layers/quantization/w8a8_fp8.py b/python/sglang/srt/layers/quantization/w8a8_fp8.py
@@ -37,7 +37,7 @@ class W8A8Fp8Config(QuantizationConfig):
     Note:
     - For models without offline quantization, weights will be quantized during model loading
     - If CUTLASS is supported: Per-channel weight quantization is used
-    - If CUTLASS is not supported: Falls back to per-token weight quantization
+    - If CUTLASS is not supported: Falls back to per-tensor weight quantization
     """
 
     def __init__(self, is_checkpoint_fp8_serialized: bool = False):