File tree Expand file tree Collapse file tree 3 files changed +11
-2
lines changed Expand file tree Collapse file tree 3 files changed +11
-2
lines changed Original file line number Diff line number Diff line change @@ -463,7 +463,10 @@ def add_one_req(
463
463
total_tokens = req .extend_input_len + min (
464
464
req .sampling_params .max_new_tokens , CLIP_MAX_NEW_TOKENS_ESTIMATION
465
465
)
466
- input_tokens = req .extend_input_len
466
+ input_tokens = (
467
+ - (- req .extend_input_len // self .tree_cache .page_size )
468
+ * self .tree_cache .page_size
469
+ )
467
470
prefix_len = len (req .prefix_indices )
468
471
469
472
if total_tokens >= self .rem_total_tokens :
@@ -540,7 +543,10 @@ def add_one_req(
540
543
)
541
544
542
545
req .extend_input_len = len (req .fill_ids ) - len (req .prefix_indices )
543
- input_tokens = req .extend_input_len
546
+ input_tokens = (
547
+ - (- req .extend_input_len // self .tree_cache .page_size )
548
+ * self .tree_cache .page_size
549
+ )
544
550
prefix_len = len (req .prefix_indices )
545
551
546
552
if self .rem_chunk_tokens is None or input_tokens <= self .rem_chunk_tokens :
Original file line number Diff line number Diff line change @@ -506,6 +506,7 @@ def init_memory_pool_and_cache(self):
506
506
self .tree_cache = ChunkCache (
507
507
req_to_token_pool = self .req_to_token_pool ,
508
508
token_to_kv_pool_allocator = self .token_to_kv_pool_allocator ,
509
+ page_size = self .page_size ,
509
510
)
510
511
else :
511
512
if self .enable_hierarchical_cache :
Original file line number Diff line number Diff line change @@ -24,9 +24,11 @@ def __init__(
24
24
self ,
25
25
req_to_token_pool : ReqToTokenPool ,
26
26
token_to_kv_pool_allocator : TokenToKVPoolAllocator ,
27
+ page_size : int ,
27
28
):
28
29
self .req_to_token_pool = req_to_token_pool
29
30
self .token_to_kv_pool_allocator = token_to_kv_pool_allocator
31
+ self .page_size = page_size
30
32
31
33
def reset (self ):
32
34
pass
You can’t perform that action at this time.
0 commit comments