Skip to content

Commit 54faf72

Browse files
zhyncsDiweiSun
authored andcommitted
fix: use DeepEPDispatcher on CUDA (sgl-project#5180)
1 parent 154b862 commit 54faf72

File tree

1 file changed

+2
-1
lines changed

1 file changed

+2
-1
lines changed

python/sglang/srt/models/deepseek_v2.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,6 @@
5050
)
5151
from sglang.srt.layers.logits_processor import LogitsProcessor
5252
from sglang.srt.layers.moe.ep_moe.layer import DeepEPMoE, EPMoE
53-
from sglang.srt.layers.moe.ep_moe.token_dispatcher import DeepEPDispatcher
5453
from sglang.srt.layers.moe.fused_moe_triton import FusedMoE
5554
from sglang.srt.layers.moe.topk import select_experts
5655
from sglang.srt.layers.quantization.base_config import QuantizationConfig
@@ -79,6 +78,8 @@
7978

8079
if _is_cuda:
8180
from sgl_kernel import awq_dequantize, bmm_fp8
81+
82+
from sglang.srt.layers.moe.ep_moe.token_dispatcher import DeepEPDispatcher
8283
else:
8384
from vllm import _custom_ops as ops
8485

0 commit comments

Comments
 (0)