We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 01d446c commit 7cf3a56Copy full SHA for 7cf3a56
python/sglang/srt/layers/quantization/w8a8_fp8.py
@@ -37,7 +37,7 @@ class W8A8Fp8Config(QuantizationConfig):
37
Note:
38
- For models without offline quantization, weights will be quantized during model loading
39
- If CUTLASS is supported: Per-channel weight quantization is used
40
- - If CUTLASS is not supported: Falls back to per-token weight quantization
+ - If CUTLASS is not supported: Falls back to per-tensor weight quantization
41
"""
42
43
def __init__(self, is_checkpoint_fp8_serialized: bool = False):
0 commit comments