Skip to content

Commit 759eebe

Browse files
BBufwenju.li
authored andcommitted
fused moe triton tuning script support qwen3 (sgl-project#5842)
1 parent fcc44a2 commit 759eebe

File tree

3 files changed

+18
-1
lines changed

3 files changed

+18
-1
lines changed

benchmark/kernels/fused_moe_triton/README.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,13 @@ python benchmark/kernels/fused_moe_triton/tuning_fused_moe_triton.py \
2020
--dtype fp8_w8a8 \
2121
--tune
2222

23+
# Tune Qwen3-235B-A22B-FP8 and TP=4
24+
python benchmark/kernels/fused_moe_triton/tuning_fused_moe_triton.py \
25+
--model Qwen/Qwen3-235B-A22B-FP8 \
26+
--tp-size 4 \
27+
--dtype fp8_w8a8 \
28+
--tune
29+
2330
# Tune DeepSeek-V3 with FP8, TP=8 and n_share_experts_fusion=8
2431
python benchmark/kernels/fused_moe_triton/tuning_fused_moe_triton.py \
2532
--model deepseek-ai/DeepSeek-V3-0324 \

benchmark/kernels/fused_moe_triton/benchmark_torch_compile_fused_moe.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,10 +30,15 @@ def get_model_config(model_name: str, tp_size: int):
3030
topk = config.num_experts_per_tok
3131
intermediate_size = config.moe_intermediate_size
3232
shard_intermediate_size = 2 * intermediate_size // tp_size
33+
elif config.architectures[0] == "Qwen3MoeForCausalLM":
34+
E = config.num_experts
35+
topk = config.num_experts_per_tok
36+
intermediate_size = config.moe_intermediate_size
37+
shard_intermediate_size = 2 * intermediate_size // tp_size
3338
elif config.architectures[0] in ["DeepseekV2ForCausalLM", "DeepseekV3ForCausalLM"]:
3439
E = config.n_routed_experts
3540
topk = config.num_experts_per_tok
36-
intermediate_size = config.intermediate_size
41+
intermediate_size = config.moe_intermediate_size
3742
shard_intermediate_size = 2 * intermediate_size // tp_size
3843
elif config.architectures[0] in [
3944
"Grok1ForCausalLM",

benchmark/kernels/fused_moe_triton/benchmark_vllm_vs_sglang_fused_moe_triton.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,11 @@ def get_model_config(model_name: str, tp_size: int):
3030
topk = config.num_experts_per_tok
3131
intermediate_size = config.moe_intermediate_size
3232
shard_intermediate_size = 2 * intermediate_size // tp_size
33+
elif config.architectures[0] == "Qwen3MoeForCausalLM":
34+
E = config.num_experts
35+
topk = config.num_experts_per_tok
36+
intermediate_size = config.moe_intermediate_size
37+
shard_intermediate_size = 2 * intermediate_size // tp_size
3338
elif config.architectures[0] in ["DeepseekV2ForCausalLM", "DeepseekV3ForCausalLM"]:
3439
E = config.n_routed_experts
3540
topk = config.num_experts_per_tok

0 commit comments

Comments
 (0)