InternLM · lvhan028 · Jun 2, 2025 · May 30, 2025
diff --git a/lmdeploy/pytorch/engine/executor/ray_executor.py b/lmdeploy/pytorch/engine/executor/ray_executor.py
@@ -147,13 +147,19 @@ def init_ray_cluster(world_size: int, ray_address: str = None, dp: int = 1):
 
 def _get_master_addr():
     """get master addr."""
+    addr = _envs.dist_master_addr
+    if addr is not None:
+        return addr
     gcs_addr = ray.get_runtime_context().gcs_address
     master_addr = gcs_addr.split(':')[0]
     return master_addr
 
 
 def _get_master_port():
     """get master port."""
+    port = _envs.dist_master_port
+    if port is not None:
+        return port
     return find_available_port()
 
 

diff --git a/lmdeploy/pytorch/envs.py b/lmdeploy/pytorch/envs.py
@@ -63,3 +63,7 @@ def env_to_int(
 # ray timeline
 ray_timeline_enable = env_to_bool('LMDEPLOY_RAY_TIMELINE_ENABLE', False)
 ray_timeline_output_path = os.getenv('LMDEPLOY_RAY_TIMELINE_OUT_PATH', 'ray_timeline.json')
+
+# dist
+dist_master_addr = os.getenv('LMDEPLOY_DIST_MASTER_ADDR', None)
+dist_master_port = os.getenv('LMDEPLOY_DIST_MASTER_PORT', None)
diff --git a/lmdeploy/pytorch/models/deepseek_v2.py b/lmdeploy/pytorch/models/deepseek_v2.py
@@ -577,11 +577,7 @@ def forward(
 class MoEGate(nn.Module):
     """Deepseek Gate."""
 
-    def __init__(self,
-                 config: Any,
-                 dtype: torch.dtype = None,
-                 device: torch.device = None,
-                 info: Any = None):
+    def __init__(self, config: Any, dtype: torch.dtype = None, device: torch.device = None, info: Any = None):
         super().__init__()
         self.config = config
         self.top_k = config.num_experts_per_tok