Skip to content

Commit 1b1b47a

Browse files
authored
Fix w8a8_int8 model shared experts fusion load weights error (#5120)
1 parent 3c9740d commit 1b1b47a

File tree

1 file changed

+18
-8
lines changed

1 file changed

+18
-8
lines changed

python/sglang/srt/models/deepseek_v2.py

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1480,14 +1480,24 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
14801480
if self.n_share_experts_fusion is not None and self.n_share_experts_fusion > 0:
14811481
weights_list = list(weights)
14821482
weights_dict = dict(weights_list)
1483-
suffix_list = [
1484-
"down_proj.weight",
1485-
"down_proj.weight_scale_inv",
1486-
"gate_proj.weight",
1487-
"gate_proj.weight_scale_inv",
1488-
"up_proj.weight",
1489-
"up_proj.weight_scale_inv",
1490-
]
1483+
if self.quant_config.get_name() == "w8a8_int8":
1484+
suffix_list = [
1485+
"down_proj.weight",
1486+
"down_proj.weight_scale",
1487+
"gate_proj.weight",
1488+
"gate_proj.weight_scale",
1489+
"up_proj.weight",
1490+
"up_proj.weight_scale",
1491+
]
1492+
else:
1493+
suffix_list = [
1494+
"down_proj.weight",
1495+
"down_proj.weight_scale_inv",
1496+
"gate_proj.weight",
1497+
"gate_proj.weight_scale_inv",
1498+
"up_proj.weight",
1499+
"up_proj.weight_scale_inv",
1500+
]
14911501
names_to_remove = []
14921502
for moe_layer in tqdm(
14931503
range(

0 commit comments

Comments
 (0)