Skip to content

Commit 97e49d4

Browse files
committed
[Bug] Bump SGLang version to 0.4.6.post4; Fix AsyncSGLangRollout
Similar to sgl-project/sglang#5997 In the PP PR sgl-project/sglang#5724 broadcast_pyobj function changed its condition from judging rank==0 (if rank is local rank 0 of the passing ProcessGroup) to rank==src (if rank is global rank src), which breaks VerlEngine's broadcast logic when dp>1 and tp>1. Signed-off-by: Hollow Man <[email protected]>
1 parent bc9062d commit 97e49d4

File tree

5 files changed

+10
-9
lines changed

5 files changed

+10
-9
lines changed

docker/Dockerfile.sglang

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,8 @@ RUN pip config set global.index-url "${PIP_INDEX}" && \
3636
pip config set global.extra-index-url "${PIP_INDEX}" && \
3737
python -m pip install --upgrade pip
3838

39-
# Install sglang-0.4.6.post1 and torch-memory-saver
40-
RUN pip install "sglang[all]==0.4.6.post1" --no-cache-dir --find-links https://flashinfer.ai/whl/cu124/torch2.6/flashinfer-python && pip install torch-memory-saver --no-cache-dir
39+
# Install sglang-0.4.6.post4 and torch-memory-saver
40+
RUN pip install "sglang[all]==0.4.6.post4" --no-cache-dir --find-links https://flashinfer.ai/whl/cu124/torch2.6/flashinfer-python && pip install torch-memory-saver --no-cache-dir
4141

4242
# Install torch-2.6.0
4343
RUN pip install --no-cache-dir torch==2.6.0 torchvision==0.21.0 torchaudio==2.6.0 tensordict torchdata \

docker/Dockerfile.stage2.megatron

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ RUN apt-get update && \
77
apt-get install -y aria2 libfreeimage3 libfreeimage-dev zlib1g
88

99
# 4. Install Sglang
10-
RUN pip install --no-deps "sglang[all]>=0.4.5.post3"
10+
RUN pip install --no-deps "sglang[all]>=0.4.6.post3"
1111

1212
# 5. Install cudnn
1313
RUN aria2c --max-tries=9999 https://developer.download.nvidia.com/compute/cudnn/9.8.0/local_installers/cudnn-local-repo-ubuntu2204-9.8.0_1.0-1_amd64.deb && \

requirements_sglang.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,5 +17,5 @@ torchdata
1717
torchvision
1818
transformers
1919
wandb
20-
sglang[all]==0.4.4.post4
20+
sglang[all]==0.4.6.post4
2121
torch-memory-saver>=0.0.5

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@
5151
VLLM_REQUIRES = ["tensordict<=0.6.2", "vllm<=0.8.3"]
5252
SGLANG_REQUIRES = [
5353
"tensordict<=0.6.2",
54-
"sglang[srt,openai]==0.4.6.post1",
54+
"sglang[srt,openai]==0.4.6.post4",
5555
"torch-memory-saver>=0.0.5",
5656
]
5757

verl/workers/rollout/sglang_rollout/async_sglang_rollout.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,7 @@ def initialize_tools(tools_config) -> list:
191191
dist.all_gather_object(visible_devices, os.environ["CUDA_VISIBLE_DEVICES"], device_mesh_cpu.get_group("tp"))
192192
os.environ["CUDA_VISIBLE_DEVICES"] = ",".join(visible_devices)
193193

194+
self._rank = device_mesh_cpu["tp"].get_rank()
194195
# initialize the inference engine
195196
monkey_patch_torch_reductions()
196197
nnodes = -(-tp_size // len(visible_devices))
@@ -199,7 +200,7 @@ def initialize_tools(tools_config) -> list:
199200
port = get_open_port() if port is None else port
200201
[ip, port] = broadcast_pyobj(
201202
[ip, port],
202-
rank=self._tp_rank,
203+
rank=self._rank,
203204
dist_group=device_mesh_cpu.get_group("tp"),
204205
src=device_mesh_cpu["tp"].mesh[0].item(),
205206
force_cpu_device=False,
@@ -423,7 +424,7 @@ def generate_sequences(self, prompts: DataProto, **kwargs) -> DataProto:
423424

424425
# free cache engine
425426
if self.config.free_cache_engine and self._engine is not None:
426-
self._engine.tokenizer_manager.flush_cache()
427+
self._engine.flush_cache()
427428

428429
return DataProto(batch=batch)
429430

@@ -591,7 +592,7 @@ def generate_sequences_with_tools(self, prompts: DataProto, **kwargs) -> DataPro
591592

592593
[sorted_output_req_list] = broadcast_pyobj(
593594
data=[sorted_output_req_list],
594-
rank=self._tp_rank,
595+
rank=self._rank,
595596
dist_group=self._device_mesh_cpu["tp"].get_group(),
596597
src=self._device_mesh_cpu["tp"].mesh[0].item(),
597598
force_cpu_device=False,
@@ -681,7 +682,7 @@ def generate_sequences_with_tools(self, prompts: DataProto, **kwargs) -> DataPro
681682

682683
# free cache engine
683684
if self.config.free_cache_engine and self._engine is not None and self._tp_rank == 0:
684-
self._engine.tokenizer_manager.flush_cache()
685+
self._engine.flush_cache()
685686

686687
return DataProto(batch=batch, non_tensor_batch={"messages": np.array(messages), "reward_scores": np.array(reward_scores)})
687688

0 commit comments

Comments
 (0)