File tree Expand file tree Collapse file tree 3 files changed +11
-2
lines changed Expand file tree Collapse file tree 3 files changed +11
-2
lines changed Original file line number Diff line number Diff line change @@ -455,7 +455,10 @@ def add_one_req(
455
455
total_tokens = req .extend_input_len + min (
456
456
req .sampling_params .max_new_tokens , CLIP_MAX_NEW_TOKENS_ESTIMATION
457
457
)
458
- input_tokens = req .extend_input_len
458
+ input_tokens = (
459
+ - (- req .extend_input_len // self .tree_cache .page_size )
460
+ * self .tree_cache .page_size
461
+ )
459
462
prefix_len = len (req .prefix_indices )
460
463
461
464
if total_tokens >= self .rem_total_tokens :
@@ -477,7 +480,10 @@ def add_one_req(
477
480
req .last_node_global , req .prefix_indices
478
481
)
479
482
req .extend_input_len = len (req .fill_ids ) - len (req .prefix_indices )
480
- input_tokens = req .extend_input_len
483
+ input_tokens = (
484
+ - (- req .extend_input_len // self .tree_cache .page_size )
485
+ * self .tree_cache .page_size
486
+ )
481
487
prefix_len = len (req .prefix_indices )
482
488
483
489
if self .rem_chunk_tokens is None or input_tokens <= self .rem_chunk_tokens :
Original file line number Diff line number Diff line change @@ -502,6 +502,7 @@ def init_memory_pool_and_cache(self):
502
502
self .tree_cache = ChunkCache (
503
503
req_to_token_pool = self .req_to_token_pool ,
504
504
token_to_kv_pool_allocator = self .token_to_kv_pool_allocator ,
505
+ page_size = self .page_size ,
505
506
)
506
507
else :
507
508
if self .enable_hierarchical_cache :
Original file line number Diff line number Diff line change @@ -24,9 +24,11 @@ def __init__(
24
24
self ,
25
25
req_to_token_pool : ReqToTokenPool ,
26
26
token_to_kv_pool_allocator : TokenToKVPoolAllocator ,
27
+ page_size : int ,
27
28
):
28
29
self .req_to_token_pool = req_to_token_pool
29
30
self .token_to_kv_pool_allocator = token_to_kv_pool_allocator
31
+ self .page_size = page_size
30
32
31
33
def reset (self ):
32
34
pass
You can’t perform that action at this time.
0 commit comments