[fix] reduce dp capture bs (sgl-project#5634)

Alcanderian · alcanerian · tarinkk · commit 0fa2f9f3885f · 2025-04-23T01:46:49.000Z
Co-authored-by: alcanerian &lt;alcanerian@gmail.com&gt;
diff --git a/python/sglang/srt/model_executor/cuda_graph_runner.py b/python/sglang/srt/model_executor/cuda_graph_runner.py
@@ -134,7 +134,8 @@ def get_batch_sizes_to_capture(model_runner: ModelRunner):
             )
 
         gpu_mem = get_device_memory_capacity()
-        if gpu_mem is not None and gpu_mem > 81920:
+        # Batch size of each rank will not become so large when DP is on
+        if gpu_mem is not None and gpu_mem > 81920 and server_args.dp_size == 1:
             capture_bs += list(range(160, 257, 8))
 
     if max(capture_bs) > model_runner.req_to_token_pool.size:

Original file line number	Diff line number	Diff line change
`@@ -134,7 +134,8 @@ def get_batch_sizes_to_capture(model_runner: ModelRunner):`
`134`	`134`	`)`
`135`	`135`
`136`	`136`	`gpu_mem = get_device_memory_capacity()`
`137`		`- if gpu_mem is not None and gpu_mem > 81920:`
	`137`	`+ # Batch size of each rank will not become so large when DP is on`
	`138`	`+ if gpu_mem is not None and gpu_mem > 81920 and server_args.dp_size == 1:`
`138`	`139`	`capture_bs += list(range(160, 257, 8))`
`139`	`140`
`140`	`141`	`if max(capture_bs) > model_runner.req_to_token_pool.size:`