Skip to content

Commit 4f44bc0

Browse files
Fr4nk1inCskingzevinyizhang2077
authored andcommitted
feat: add dp attention support for Qwen 2/3 MoE models, fixes sgl-project#6088 (sgl-project#6121)
Co-authored-by: King.Zevin <[email protected]> Co-authored-by: Yi Zhang <[email protected]>
1 parent 0c6c343 commit 4f44bc0

File tree

4 files changed

+449
-70
lines changed

4 files changed

+449
-70
lines changed

python/sglang/bench_one_batch.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -269,6 +269,7 @@ def _maybe_prepare_dp_attn_batch(batch: ScheduleBatch, model_runner):
269269
batch,
270270
dp_size=model_runner.server_args.dp_size,
271271
attn_tp_size=1,
272+
moe_dense_tp_size=model_runner.server_args.moe_dense_tp_size,
272273
tp_cpu_group=model_runner.tp_group.cpu_group,
273274
get_idle_batch=None,
274275
disable_cuda_graph=model_runner.server_args.disable_cuda_graph,

python/sglang/srt/layers/dp_attention.py

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -142,16 +142,6 @@ def get_local_attention_dp_size():
142142
return _LOCAL_ATTN_DP_SIZE
143143

144144

145-
def get_local_attention_dp_rank():
146-
assert _LOCAL_ATTN_DP_RANK is not None, "dp attention not initialized!"
147-
return _LOCAL_ATTN_DP_RANK
148-
149-
150-
def get_local_attention_dp_size():
151-
assert _LOCAL_ATTN_DP_SIZE is not None, "dp attention not initialized!"
152-
return _LOCAL_ATTN_DP_SIZE
153-
154-
155145
@contextmanager
156146
def disable_dp_size():
157147
"""Patch the tp group temporarily until this function ends.

0 commit comments

Comments
 (0)