File tree Expand file tree Collapse file tree 3 files changed +14
-2
lines changed Expand file tree Collapse file tree 3 files changed +14
-2
lines changed Original file line number Diff line number Diff line change @@ -455,7 +455,10 @@ def add_one_req(
455
455
total_tokens = req .extend_input_len + min (
456
456
req .sampling_params .max_new_tokens , CLIP_MAX_NEW_TOKENS_ESTIMATION
457
457
)
458
- input_tokens = req .extend_input_len
458
+ input_tokens = (
459
+ - (- req .extend_input_len // self .tree_cache .page_size )
460
+ * self .tree_cache .page_size
461
+ )
459
462
prefix_len = len (req .prefix_indices )
460
463
461
464
if total_tokens >= self .rem_total_tokens :
@@ -477,7 +480,10 @@ def add_one_req(
477
480
req .last_node_global , req .prefix_indices
478
481
)
479
482
req .extend_input_len = len (req .fill_ids ) - len (req .prefix_indices )
480
- input_tokens = req .extend_input_len
483
+ input_tokens = (
484
+ - (- req .extend_input_len // self .tree_cache .page_size )
485
+ * self .tree_cache .page_size
486
+ )
481
487
prefix_len = len (req .prefix_indices )
482
488
483
489
if self .rem_chunk_tokens is None or input_tokens <= self .rem_chunk_tokens :
Original file line number Diff line number Diff line change @@ -507,7 +507,11 @@ def init_memory_pool_and_cache(self):
507
507
self .tree_cache = ChunkCache (
508
508
req_to_token_pool = self .req_to_token_pool ,
509
509
token_to_kv_pool_allocator = self .token_to_kv_pool_allocator ,
510
+ << << << < HEAD
510
511
token_to_kv_pool_allocator_local = self .token_to_kv_pool_allocator_local ,
512
+ == == == =
513
+ page_size = self .page_size ,
514
+ > >> >> >> f8e46093 (Fix prefill OOM error in the case of large page size (#5081))
511
515
)
512
516
else :
513
517
if self .enable_hierarchical_cache :
Original file line number Diff line number Diff line change @@ -24,10 +24,12 @@ def __init__(
24
24
self ,
25
25
req_to_token_pool : ReqToTokenPool ,
26
26
token_to_kv_pool_allocator : TokenToKVPoolAllocator ,
27
+ page_size : int ,
27
28
token_to_kv_pool_allocator_local : TokenToKVPoolAllocator = None ,
28
29
):
29
30
self .req_to_token_pool = req_to_token_pool
30
31
self .token_to_kv_pool_allocator = token_to_kv_pool_allocator
32
+ self .page_size = page_size
31
33
self .token_to_kv_pool_allocator_local = token_to_kv_pool_allocator_local
32
34
33
35
def reset (self ):
You can’t perform that action at this time.
0 commit comments