[doc] add a note for --n-share-experts-fusion args (sgl-project#6154)

BBuf · xwu-intel · commit ed238c6c1786 · 2025-06-17T06:06:19.000+03:00
diff --git a/python/sglang/srt/server_args.py b/python/sglang/srt/server_args.py
@@ -1194,7 +1194,7 @@ def add_cli_args(parser: argparse.ArgumentParser):
             type=int,
             default=0,
             help="The number of shared_experts need to be replicated to fuse with normal experts in deepseek v3/r1, "
-            "set it to tp_size can get best optimized performance.",
+            "set it to tp_size can get best optimized performance. Note that for architectures with SM==90, we have enabled the shared experts fusion optimization by default for DeepSeek V3/R1, with n_share_experts_fusion automatically set to the TP size.",
         )
         parser.add_argument(
             "--disable-chunked-prefix-cache",

Original file line number	Diff line number	Diff line change
`@@ -1194,7 +1194,7 @@ def add_cli_args(parser: argparse.ArgumentParser):`
`1194`	`1194`	`type=int,`
`1195`	`1195`	`default=0,`
`1196`	`1196`	`help="The number of shared_experts need to be replicated to fuse with normal experts in deepseek v3/r1, "`
`1197`		`- "set it to tp_size can get best optimized performance.",`
	`1197`	`+ "set it to tp_size can get best optimized performance. Note that for architectures with SM==90, we have enabled the shared experts fusion optimization by default for DeepSeek V3/R1, with n_share_experts_fusion automatically set to the TP size.",`
`1198`	`1198`	`)`
`1199`	`1199`	`parser.add_argument(`
`1200`	`1200`	`"--disable-chunked-prefix-cache",`