GitMonkey0
diff --git a/‎.github/workflows/checkpoints.yml
Lines changed: 2 additions & 3 deletions b/‎.github/workflows/checkpoints.yml
Lines changed: 2 additions & 3 deletions
diff --git a/‎.github/workflows/dataset.yml
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/dataset.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/e2e_eval_aime24.yml
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/e2e_eval_aime24.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/e2e_grpo.yml
Lines changed: 4 additions & 4 deletions b/‎.github/workflows/e2e_grpo.yml
Lines changed: 4 additions & 4 deletions
diff --git a/‎.github/workflows/e2e_gsm8k.yml
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/e2e_gsm8k.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/e2e_gsm8k_megatron.yml
Lines changed: 25 additions & 3 deletions b/‎.github/workflows/e2e_gsm8k_megatron.yml
Lines changed: 25 additions & 3 deletions
diff --git a/‎.github/workflows/e2e_gsm8k_prime.yml
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/e2e_gsm8k_prime.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/e2e_lora.yml
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/e2e_lora.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/e2e_sft.yml
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/e2e_sft.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/e2e_vlm_geo3k.yml
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/e2e_vlm_geo3k.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/model.yml
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/model.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/ray_test.yml
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/ray_test.yml
Lines changed: 2 additions & 2 deletions
diff --git a/‎.github/workflows/sandbox.yml
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/sandbox.yml
Lines changed: 2 additions & 2 deletions
diff --git a/‎docker/Dockerfile.megatron
Lines changed: 39 additions & 6 deletions b/‎docker/Dockerfile.megatron
Lines changed: 39 additions & 6 deletions
diff --git a/‎docs/advance/checkpoint.rst
Lines changed: 5 additions & 4 deletions b/‎docs/advance/checkpoint.rst
Lines changed: 5 additions & 4 deletions
diff --git a/‎docs/examples/config.rst
Lines changed: 5 additions & 0 deletions b/‎docs/examples/config.rst
Lines changed: 5 additions & 0 deletions
diff --git a/‎docs/start/install.rst
Lines changed: 14 additions & 14 deletions b/‎docs/start/install.rst
Lines changed: 14 additions & 14 deletions
diff --git a/‎scripts/model_merger.py
Lines changed: 2 additions & 1 deletion b/‎scripts/model_merger.py
Lines changed: 2 additions & 1 deletion
@@ -22,7 +22,7 @@ permissions:
   contents: read
 
 jobs:
-  e2e_gsm8k_megatron:
+  checkpoints:
     runs-on: [self-hosted, l20-0]
     timeout-minutes: 40 # Increase this timeout value as needed
     env:
@@ -31,7 +31,7 @@ jobs:
       NO_PROXY: "localhost,127.0.0.1"
       HF_HUB_ENABLE_HF_TRANSFER: 1
     container:
-      image: whatcanyousee/verl:vemlp-th2.4.0-cu124-vllm0.6.3-ray2.10-te2.0-megatron0.11.0-v0.0.6
+      image: whatcanyousee/verl:ngc-th2.6.0-cu124-vllm0.8.2-mcore0.11.0-te2.0
       options: --gpus all --shm-size=10g
     steps:
       - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
@@ -47,7 +47,6 @@ jobs:
       - name: Running Checkpoint Integration Test (Qwen Megatron)
         run: |
           ray stop --force
-          export PYTHONPATH=$PYTHONPATH:/opt/nvidia/Megatron-LM
           bash tests/checkpoint/run_qwen_megatron_ckpt.sh
       - name: Running Checkpoint Integration Test (Deepseek Megatron)
         run: |
 
@@ -32,7 +32,7 @@ jobs:
       NO_PROXY: "localhost,127.0.0.1"
       HF_HUB_ENABLE_HF_TRANSFER: 1
     container:
-      image: verlai/verl:vemlp-th2.4.0-cu124-vllm0.6.3-ray2.10-te1.7-v0.0.3
+      image: whatcanyousee/verl:ngc-th2.6.0-cu124-vllm0.8.2-mcore0.11.0-te2.0
       options: --gpus all --shm-size=10g
     steps:
       - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
 
@@ -28,7 +28,7 @@ jobs:
       NO_PROXY: "localhost,127.0.0.1"
       HF_HUB_ENABLE_HF_TRANSFER: 1
     container:
-      image: hiyouga/verl:ngc-th2.6.0-cu120-vllm0.8.2
+      image: whatcanyousee/verl:ngc-th2.6.0-cu124-vllm0.8.2-mcore0.11.0-te2.0
       options: --gpus all --shm-size=10g
     steps:
       - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
 
@@ -25,7 +25,7 @@ permissions:
   contents: read
 
 jobs:
-  e2e_gsm8k_megatron-l20-0:
+  e2e_grpo-l20-0:
     runs-on: [self-hosted, l20-0]
     timeout-minutes: 40 # Increase this timeout value as needed
     env:
@@ -34,7 +34,7 @@ jobs:
       NO_PROXY: "localhost,127.0.0.1"
       HF_HUB_ENABLE_HF_TRANSFER: 1
     container:
-      image: whatcanyousee/verl:vemlp-th2.4.0-cu124-vllm0.6.3-ray2.10-te2.0-megatron0.11.0-v0.0.6
+      image: whatcanyousee/verl:ngc-th2.6.0-cu124-vllm0.8.2-mcore0.11.0-te2.0
       options: --gpus all --shm-size=10g
     steps:
       - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
@@ -55,7 +55,7 @@ jobs:
         run: |
           ray stop --force
           bash tests/e2e/run_qwen_grpo_megatron.sh
-  e2e_gsm8k_megatron-l20-1:
+  e2e_grpo-l20-1:
     runs-on: [self-hosted, l20-1]
     timeout-minutes: 40 # Increase this timeout value as needed
     env:
@@ -64,7 +64,7 @@ jobs:
       NO_PROXY: "localhost,127.0.0.1"
       HF_HUB_ENABLE_HF_TRANSFER: 1
     container:
-      image: whatcanyousee/verl:vemlp-th2.4.0-cu124-vllm0.6.3-ray2.10-te2.0-megatron0.11.0-v0.0.6
+      image: whatcanyousee/verl:ngc-th2.6.0-cu124-vllm0.8.2-mcore0.11.0-te2.0
       options: --gpus all --shm-size=10g
     steps:
       - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
 
@@ -34,7 +34,7 @@ jobs:
       NO_PROXY: "localhost,127.0.0.1"
       HF_HUB_ENABLE_HF_TRANSFER: 1
     container:
-      image: hiyouga/verl:ngc-th2.6.0-cu120-vllm0.8.2
+      image: whatcanyousee/verl:ngc-th2.6.0-cu124-vllm0.8.2-mcore0.11.0-te2.0
       options: --gpus all --shm-size=10g
     steps:
       - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
 
@@ -1,5 +1,5 @@
 name: e2e_gsm8k_megatron
-# latest version: Megatron-LM core_r0.11.0 https://github.com/NVIDIA/Megatron-LM/tree/core_r0.11.0
+# latest version: Megatron-LM v0.11.0 https://github.com/NVIDIA/Megatron-LM/tree/v0.11.0
 
 on:
   # Trigger the workflow on push or pull request,
@@ -27,7 +27,7 @@ permissions:
   contents: read
 
 jobs:
-  e2e_gsm8k_megatron:
+  e2e_gsm8k_megatron-l20-0:
     runs-on: [self-hosted, l20-0]
     timeout-minutes: 40 # Increase this timeout value as needed
     env:
@@ -36,7 +36,7 @@ jobs:
       NO_PROXY: "localhost,127.0.0.1"
       HF_HUB_ENABLE_HF_TRANSFER: 1
     container:
-      image: whatcanyousee/verl:vemlp-th2.4.0-cu124-vllm0.6.3-ray2.10-te2.0-megatron0.11.0-v0.0.6
+      image: whatcanyousee/verl:ngc-th2.6.0-cu124-vllm0.8.2-mcore0.11.0-te2.0
       options: --gpus all --shm-size=10g
     steps:
       - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
@@ -53,6 +53,28 @@ jobs:
         run: |
           ray stop --force
           bash tests/e2e/run_deepseek_megatron_parallelism.sh
+  e2e_gsm8k_megatron-l20-1:
+    runs-on: [self-hosted, l20-1]
+    timeout-minutes: 40 # Increase this timeout value as needed
+    env:
+      HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
+      HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
+      NO_PROXY: "localhost,127.0.0.1"
+      HF_HUB_ENABLE_HF_TRANSFER: 1
+    container:
+      image: whatcanyousee/verl:ngc-th2.6.0-cu124-vllm0.8.2-mcore0.11.0-te2.0
+      options: --gpus all --shm-size=10g
+    steps:
+      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        with:
+            fetch-depth: 0
+      - name: Install the current repository
+        run: |
+          pip3 install hf_transfer
+          pip3 install -e .[test]
+      - name: Prepare gsm8k dataset
+        run: |
+          python3 examples/data_preprocess/gsm8k.py
       - name: Running gsm8k e2e training tests with 3D parallelism on 8 L20 GPUs with Megatron (Qwen)
         run: |
           ray stop --force
 
@@ -31,7 +31,7 @@ jobs:
       NO_PROXY: "localhost,127.0.0.1"
       HF_HUB_ENABLE_HF_TRANSFER: 1
     container:
-      image: hiyouga/verl:ngc-th2.6.0-cu120-vllm0.8.2
+      image: whatcanyousee/verl:ngc-th2.6.0-cu124-vllm0.8.2-mcore0.11.0-te2.0
       options: --gpus all --shm-size=10g
     steps:
       - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
 
@@ -33,7 +33,7 @@ jobs:
       NO_PROXY: "localhost,127.0.0.1"
       HF_HUB_ENABLE_HF_TRANSFER: 1
     container:
-      image: verlai/verl:vemlp-th2.4.0-cu124-vllm0.6.3-ray2.10-te1.7-v0.0.3
+      image: whatcanyousee/verl:ngc-th2.6.0-cu124-vllm0.8.2-mcore0.11.0-te2.0
       options: --gpus all --shm-size=10g
     steps:
       - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
 
@@ -34,7 +34,7 @@ jobs:
       NO_PROXY: "localhost,127.0.0.1"
       HF_HUB_ENABLE_HF_TRANSFER: 1
     container:
-      image: verlai/verl:vemlp-th2.4.0-cu124-vllm0.6.3-ray2.10-te1.7-v0.0.3
+      image: whatcanyousee/verl:ngc-th2.6.0-cu124-vllm0.8.2-mcore0.11.0-te2.0
       options: --gpus all --shm-size=10g
     steps:
       - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
 
@@ -28,7 +28,7 @@ jobs:
       NO_PROXY: "localhost,127.0.0.1"
       HF_HUB_ENABLE_HF_TRANSFER: 1
     container:
-      image: hiyouga/verl:ngc-th2.6.0-cu120-vllm0.8.2
+      image: whatcanyousee/verl:ngc-th2.6.0-cu124-vllm0.8.2-mcore0.11.0-te2.0
       options: --gpus all --shm-size=40g
     steps:
       - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
 
@@ -28,7 +28,7 @@ jobs:
       NO_PROXY: "localhost,127.0.0.1"
       HF_HUB_ENABLE_HF_TRANSFER: 1
     container:
-      image: whatcanyousee/verl:vemlp-th2.4.0-cu124-vllm0.6.3-ray2.10-te2.0-megatron0.11.0-v0.0.6
+      image: whatcanyousee/verl:ngc-th2.6.0-cu124-vllm0.8.2-mcore0.11.0-te2.0
       options: --gpus all --shm-size=10g
     steps:
       - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
 
@@ -31,14 +31,14 @@ permissions:
 jobs:
   ray:
     runs-on: [self-hosted, l20-0]
-    timeout-minutes: 5 # Increase this timeout value as needed
+    timeout-minutes: 10 # Increase this timeout value as needed
     env:
       HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
       HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
       NO_PROXY: "localhost,127.0.0.1"
       HF_HUB_ENABLE_HF_TRANSFER: 1
     container:
-      image: verlai/verl:vemlp-th2.4.0-cu124-vllm0.6.3-ray2.10-te1.7-v0.0.3
+      image: whatcanyousee/verl:ngc-th2.6.0-cu124-vllm0.8.2-mcore0.11.0-te2.0
       options: --gpus all --shm-size=10g
     steps:
       - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
 
@@ -23,14 +23,14 @@ permissions:
 jobs:
   sandbox:
     runs-on: [self-hosted, l20-0]
-    timeout-minutes: 3 # Increase this timeout value as needed
+    timeout-minutes: 10 # Increase this timeout value as needed
     env:
       HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
       HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
       NO_PROXY: "localhost,127.0.0.1"
       HF_HUB_ENABLE_HF_TRANSFER: 1
     container:
-      image: verlai/verl:vemlp-th2.4.0-cu124-vllm0.6.3-ray2.10-te1.7-v0.0.3
+      image: whatcanyousee/verl:ngc-th2.6.0-cu124-vllm0.8.2-mcore0.11.0-te2.0
       options: --gpus all --shm-size=10g
     steps:
       - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
 
@@ -1,9 +1,42 @@
-FROM verlai/verl:vemlp-th2.4.0-cu124-vllm0.6.3-ray2.10-te1.7-v0.0.3
+FROM hiyouga/verl:ngc-th2.6.0-cu120-vllm0.8.2
 
-RUN pip install git+https://github.com/NVIDIA/TransformerEngine.git@stable
+# Define environments
+ENV MAX_JOBS=64
 
-RUN cd /opt/nvidia && git clone --single-branch --branch core_r0.11.0 https://github.com/NVIDIA/Megatron-LM.git Megatron-LM
+RUN apt-get update && \
+    apt-get install -y aria2
 
-# only config pip index with https://pypi.tuna.tsinghua.edu.cn/simple if needed
-# unset for now
-RUN cd /opt/nvidia/Megatron-LM && pip3 install --no-deps -e .
+# 1. Reinstall CUDA 12.4
+RUN aria2c https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-ubuntu2204.pin && \
+    mv cuda-ubuntu2204.pin /etc/apt/preferences.d/cuda-repository-pin-600
+
+RUN aria2c --always-resume=true --max-tries=99999 https://developer.download.nvidia.com/compute/cuda/12.4.1/local_installers/cuda-repo-ubuntu2204-12-4-local_12.4.1-550.54.15-1_amd64.deb
+
+RUN dpkg -i cuda-repo-ubuntu2204-12-4-local_12.4.1-550.54.15-1_amd64.deb
+
+RUN cp /var/cuda-repo-ubuntu2204-12-4-local/cuda-*-keyring.gpg /usr/share/keyrings/
+
+RUN apt-get update
+
+RUN apt-get -y install cuda-toolkit-12-4
+
+RUN rm cuda-repo-ubuntu2204-12-4-local_12.4.1-550.54.15-1_amd64.deb
+
+RUN update-alternatives --set cuda /usr/local/cuda-12.4
+
+# 2. Reinstall Flash attn 2.7.3
+RUN pip uninstall -y flash-attn && \
+    wget -nv https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.3/flash_attn-2.7.3+cu12torch2.6cxx11abiFALSE-cp310-cp310-linux_x86_64.whl && \
+    pip install --no-cache-dir flash_attn-2.7.3+cu12torch2.6cxx11abiFALSE-cp310-cp310-linux_x86_64.whl && \
+    rm flash_attn-2.7.3+cu12torch2.6cxx11abiFALSE-cp310-cp310-linux_x86_64.whl 
+
+# 3. Install Apex
+RUN git clone https://github.com/NVIDIA/apex.git && \
+    cd apex && \
+    pip install -v --disable-pip-version-check --no-cache-dir --no-build-isolation --config-settings "--build-option=--cpp_ext" --config-settings "--build-option=--cuda_ext" ./
+
+# 4. Install TransformerEngine
+RUN export NVTE_FRAMEWORK=pytorch && pip3 install --no-deps git+https://github.com/NVIDIA/[email protected]
+
+# 5. Install Megatron-LM
+RUN pip3 install git+https://github.com/NVIDIA/[email protected]
@@ -84,10 +84,11 @@ So example use of Megatron model merger is:
 
 .. code:: bash
 
-    python3 scripts/model_merger.py --backend megatron \
-        --is-value-model \
-        --hf_model_path Qwen/Qwen2-7B \
-        --local_dir checkpoints/verl_megatron_gsm8k_examples/deepseek_megatron_checkpoint_saveload/global_step_1/actor/model
+    python scripts/model_merger.py \
+        --backend megatron \
+        --tie-word-embedding \
+        --hf_model_path Qwen/Qwen2.5-0.5B \
+        --local_dir checkpoints/verl_megatron_gsm8k_examples/qwen2_5_0b5_megatron_saveload/global_step_1/actor
 
 Megatron Merger details
 -----------------------
 
@@ -105,6 +105,7 @@ Actor/Rollout/Reference Policy
       kl_loss_coef: 0.001 # for grpo
       kl_loss_type: low_var_kl # for grpo
       ppo_epochs: 1
+      data_loader_seed: null
       shuffle: False
       ulysses_sequence_parallel_size: 1 # sp size
       optim:
@@ -206,6 +207,10 @@ Actor/Rollout/Reference Policy
 - ``actor_rollout_ref.actor.ppo_epochs``: Number of epochs for PPO
   updates on one set of sampled data
 
+- ``actor_rollout_ref.actor.data_loader_seed``: From torch 2.6.0 Megatron backend can get wrong seed generated by pytorch 
+  between cp ranks and cause misalignment between data on these ranks, so we shall manually set the seed to avoid hanging
+  issue. if ``actor_rollout_ref.actor.shuffle`` is not null, this must be set.
+
 - ``actor_rollout_ref.actor.shuffle``: Whether to shuffle data when
   there are multiple epochs
 
 
@@ -19,7 +19,7 @@ Choices of Backend Engines
 
 We recommend using **FSDP** backend to investigate, research and prototype different models, datasets and RL algorithms. The guide for using FSDP backend can be found in :doc:`FSDP Workers<../workers/fsdp_workers>`.
 
-For users who pursue better scalability, we recommend using **Megatron-LM** backend. Currently, we support Megatron-LM v0.11 [1]_. The guide for using Megatron-LM backend can be found in :doc:`Megatron-LM Workers<../workers/megatron_workers>`.
+For users who pursue better scalability, we recommend using **Megatron-LM** backend. Currently, we support `Megatron-LM v0.11<https://github.com/NVIDIA/Megatron-LM/tree/v0.11.0>`_. The guide for using Megatron-LM backend can be found in :doc:`Megatron-LM Workers<../workers/megatron_workers>`.
 
 .. note:: 
 
@@ -39,19 +39,19 @@ Install from docker image
 
 We provide pre-built Docker images for quick setup.
 
-For latest vllm, please use ``hiyouga/verl:ngc-th2.6.0-cu120-vllm0.8.2-verl0.3.0.post1`` with vllm v0.8.2 with FSDP.
-
-For users who need latest Megatron, please use ``whatcanyousee/verl:vemlp-th2.4.0-cu124-vllm0.6.3-ray2.10-te2.0-megatron0.11.0-v0.0.6`` for vllm v0.6.3 with Megatron/FSDP.
+For latest vllm and Megatron or FSDP, please use ``whatcanyousee/verl:ngc-th2.6.0-cu124-vllm0.8.2-mcore0.11.0-te2.0``.
 
 For SGLang with FSDP, please use ``ocss884/verl-sglang:ngc-th2.5.1-cu126-sglang0.4.4.post4`` which is provided SGLang RL Group.
 
 See files under ``docker/`` for NGC-based image or if you want to build your own.
 
-1. Launch the desired Docker image:
+1. Launch the desired Docker image and attach into it:
 
 .. code:: bash
 
-    docker run --runtime=nvidia -it --rm --shm-size="10g" --cap-add=SYS_ADMIN -v <image:tag>
+    docker create --runtime=nvidia --gpus all --net=host --shm-size="10g" --cap-add=SYS_ADMIN -v .:/workspace/verl --name verl <image:tag>
+    docker start verl
+    docker exec -it verl bash
 
 
 2.	Inside the container, install latest verl:
@@ -65,16 +65,16 @@ See files under ``docker/`` for NGC-based image or if you want to build your own
 
 .. note::
 
-    The Docker image ``whatcanyousee/verl:vemlp-th2.4.0-cu124-vllm0.6.3-ray2.10-te2.0-megatron0.11.0-v0.0.6`` is built with the following configurations:
+    The Docker image ``whatcanyousee/verl:ngc-th2.6.0-cu124-vllm0.8.2-mcore0.11.0-te2.0`` is built with the following configurations:
 
-    - **PyTorch**: 2.4.0+cu124
+    - **PyTorch**: 2.6.0+cu124
     - **CUDA**: 12.4
-    - **Megatron-LM**: core_r0.11.0
-    - **vLLM**: 0.6.3
-    - **Ray**: 2.10.0
-    - **TransformerEngine**: 2.0.0+754d2a0
+    - **Megatron-LM**: v0.11.0
+    - **vLLM**: 0.8.2
+    - **Ray**: 2.44.0
+    - **TransformerEngine**: 2.0.0
 
-    Now verl has been **compatible to Megatron-LM core_r0.11.0**, and there is **no need to apply patches** to Megatron-LM. Also, the image has integrated **Megatron-LM core_r0.11.0**, located at ``/opt/nvidia/Meagtron-LM``. One more thing, because verl only use ``megatron.core`` module for now, there is **no need to modify** ``PATH`` if you have installed Megatron-LM with this docker image.
+    Now verl has been **compatible to Megatron-LM v0.11.0**, and there is **no need to apply patches** to Megatron-LM. Also, the image has integrated **Megatron-LM v0.11.0**, located at ``/opt/nvidia/Meagtron-LM``. One more thing, because verl only use ``megatron.core`` module for now, there is **no need to modify** ``PATH`` if you have installed Megatron-LM with this docker image.
 
 
 Install from custom environment
@@ -94,7 +94,7 @@ own post-training jobs.
 .. code:: bash
 
    # install verl together with some lightweight dependencies in setup.py
-   pip3 install torch==2.4.0 --index-url https://download.pytorch.org/whl/cu124
+   pip3 install torch==2.6.0 --index-url https://download.pytorch.org/whl/cu126
    pip3 install flash-attn --no-build-isolation
    git clone https://github.com/volcengine/verl.git
    cd verl
 
@@ -86,7 +86,8 @@ def convert_fsdp_checkpoints_to_hfmodels():
     assert world_size, "No model file with the proper format"
 
     state_dict = torch.load(os.path.join(local_dir, f'model_world_size_{world_size}_rank_{rank}.pt'),
-                            map_location='cpu')
+                            map_location='cpu',
+                            weights_only=False)
     pivot_key = sorted(list(state_dict.keys()))[0]
     weight = state_dict[pivot_key]