Skip to content

Commit 91a6868

Browse files
kkHuang-amdthyecust
authored andcommitted
Fix torch compile errors (sgl-project#5158)
1 parent 376e926 commit 91a6868

File tree

1 file changed

+2
-2
lines changed

1 file changed

+2
-2
lines changed

python/sglang/srt/layers/quantization/fp8_utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -243,8 +243,8 @@ def apply_fp8_linear(
243243
if _is_cuda:
244244
qinput, x_scale = sglang_per_token_quant_fp8(input_2d)
245245
else:
246-
qinput, x_scale = per_token_group_quant_fp8(
247-
input_2d, group_size=input_2d.shape[1]
246+
qinput, x_scale = ops.scaled_fp8_quant(
247+
input_2d, input_scale, use_per_token_if_dynamic=use_per_token_if_dynamic
248248
)
249249

250250
if cutlass_fp8_supported:

0 commit comments

Comments
 (0)