[deepep] fix: shared experts are not initialized when shared experts fusion is enabled (sgl-project#5072)

ch-wan · jimoosciuc · commit 8f4359b8ffbc · 2025-04-17T11:59:23.000+08:00
diff --git a/python/sglang/srt/server_args.py b/python/sglang/srt/server_args.py
@@ -197,7 +197,7 @@ class ServerArgs:
     enable_flashmla: bool = False
     flashinfer_mla_disable_ragged: bool = False
     warmups: Optional[str] = None
-    n_share_experts_fusion: Optional[int] = None
+    n_share_experts_fusion: int = 0
     disable_shared_experts_fusion: bool = False
 
     # KV cache transfer
@@ -1134,7 +1134,7 @@ def add_cli_args(parser: argparse.ArgumentParser):
         parser.add_argument(
             "--n-share-experts-fusion",
             type=int,
-            default=None,
+            default=0,
             help="The number of shared_experts need to be replica to fuse with normal experts in deepseek v3/r1 "
             "we use tp_size by default.",
         )