We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 7d64cac commit 6ca2a67Copy full SHA for 6ca2a67
vllm/v1/worker/gpu_model_runner.py
@@ -540,9 +540,6 @@ def _prepare_inputs(
540
# because M (max_model_len) is not necessarily divisible by block_size.
541
block_table_indices = (req_indices * self.max_num_blocks_per_req +
542
positions_np // self.block_size)
543
- # NOTE(woosuk): We use torch.index_select instead of np.take here
544
- # because torch.index_select is much faster than np.take for large
545
- # tensors.
546
block_table_cpu = self.input_batch.block_table.get_cpu_tensor()
547
block_numbers = block_table_cpu.flatten()[block_table_indices].numpy()
548
block_offsets = positions_np % self.block_size
0 commit comments