We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 154b862 commit 54faf72Copy full SHA for 54faf72
python/sglang/srt/models/deepseek_v2.py
@@ -50,7 +50,6 @@
50
)
51
from sglang.srt.layers.logits_processor import LogitsProcessor
52
from sglang.srt.layers.moe.ep_moe.layer import DeepEPMoE, EPMoE
53
-from sglang.srt.layers.moe.ep_moe.token_dispatcher import DeepEPDispatcher
54
from sglang.srt.layers.moe.fused_moe_triton import FusedMoE
55
from sglang.srt.layers.moe.topk import select_experts
56
from sglang.srt.layers.quantization.base_config import QuantizationConfig
@@ -79,6 +78,8 @@
79
78
80
if _is_cuda:
81
from sgl_kernel import awq_dequantize, bmm_fp8
+
82
+ from sglang.srt.layers.moe.ep_moe.token_dispatcher import DeepEPDispatcher
83
else:
84
from vllm import _custom_ops as ops
85
0 commit comments