File tree Expand file tree Collapse file tree 1 file changed +5
-1
lines changed
python/sglang/srt/layers/attention Expand file tree Collapse file tree 1 file changed +5
-1
lines changed Original file line number Diff line number Diff line change @@ -236,7 +236,11 @@ def make_local_attention_virtual_batches(
236
236
np .arange (pages_per_local_batch , dtype = np .int32 ),
237
237
(virtual_batches , pages_per_local_batch ),
238
238
) + np .expand_dims (block_starts , axis = 1 )
239
- block_indices = block_indices .flatten ()
239
+ # Ensure block_indices doesn't exceed block_table dimensions
240
+ # This is a critical safety check that prevents index out of bounds errors
241
+ # when dealing with large sequences (>8192 tokens) or when the block_table
242
+ # dimensions are smaller than what would be needed for the full attention chunk size.
243
+ block_indices = block_indices .flatten ().clip (max = block_table .shape [1 ] - 1 )
240
244
batch_indices = np .repeat (
241
245
np .arange (actual_batch_size , dtype = np .int32 ),
242
246
local_blocks * pages_per_local_batch ,
You can’t perform that action at this time.
0 commit comments