We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 15020fa commit 2d4d701Copy full SHA for 2d4d701
vllm/v1/worker/gpu_model_runner.py
@@ -542,9 +542,6 @@ def _prepare_inputs(
542
# because M (max_model_len) is not necessarily divisible by block_size.
543
block_table_indices = (req_indices * self.max_num_blocks_per_req +
544
positions_np // self.block_size)
545
- # NOTE(woosuk): We use torch.index_select instead of np.take here
546
- # because torch.index_select is much faster than np.take for large
547
- # tensors.
548
block_table_cpu = self.input_batch.block_table.get_cpu_tensor()
549
block_numbers = block_table_cpu.flatten()[block_table_indices].numpy()
550
block_offsets = positions_np % self.block_size
0 commit comments