We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 9f0f616 commit 8f4359bCopy full SHA for 8f4359b
python/sglang/srt/server_args.py
@@ -197,7 +197,7 @@ class ServerArgs:
197
enable_flashmla: bool = False
198
flashinfer_mla_disable_ragged: bool = False
199
warmups: Optional[str] = None
200
- n_share_experts_fusion: Optional[int] = None
+ n_share_experts_fusion: int = 0
201
disable_shared_experts_fusion: bool = False
202
203
# KV cache transfer
@@ -1134,7 +1134,7 @@ def add_cli_args(parser: argparse.ArgumentParser):
1134
parser.add_argument(
1135
"--n-share-experts-fusion",
1136
type=int,
1137
- default=None,
+ default=0,
1138
help="The number of shared_experts need to be replica to fuse with normal experts in deepseek v3/r1 "
1139
"we use tp_size by default.",
1140
)
0 commit comments