31
31
32
32
logger = logging .getLogger (__name__ )
33
33
34
+
34
35
def find_available_ports (base_port : int , count : int ) -> List [int ]:
35
36
"""Find consecutive available ports starting from base_port."""
36
37
available_ports = []
@@ -43,6 +44,7 @@ def find_available_ports(base_port: int, count: int) -> List[int]:
43
44
44
45
return available_ports
45
46
47
+
46
48
def group_concurrent_contiguous (
47
49
src_indices : npt .NDArray [np .int64 ], dst_indices : npt .NDArray [np .int64 ]
48
50
) -> Tuple [List [npt .NDArray [np .int64 ]], List [npt .NDArray [np .int64 ]]]:
@@ -265,7 +267,9 @@ def transfer_thread():
265
267
)
266
268
if ret != 0 :
267
269
self .request_status [kv_chunk .room ] = KVPoll .Failed
268
- self .sync_status_to_decode_endpoint (req .endpoint , req .dst_port , req .room )
270
+ self .sync_status_to_decode_endpoint (
271
+ req .endpoint , req .dst_port , req .room
272
+ )
269
273
continue
270
274
271
275
if kv_chunk .is_last :
@@ -279,7 +283,9 @@ def transfer_thread():
279
283
self .request_status [req .room ] = (
280
284
KVPoll .Success if ret == 0 else KVPoll .Failed
281
285
)
282
- self .sync_status_to_decode_endpoint (req .endpoint , req .dst_port , req .room )
286
+ self .sync_status_to_decode_endpoint (
287
+ req .endpoint , req .dst_port , req .room
288
+ )
283
289
self .transfer_infos .pop (req .room )
284
290
285
291
except queue .Empty :
@@ -443,13 +449,14 @@ def _get_prefill_info_from_bootstrap(self, tp_rank: int):
443
449
prefill_info = response .json ()
444
450
return prefill_info
445
451
else :
446
- logger .error (f"Failed to get prefill server info: { response .status_code } , { response .text } " )
452
+ logger .error (
453
+ f"Failed to get prefill server info: { response .status_code } , { response .text } "
454
+ )
447
455
return None
448
456
except Exception as e :
449
457
logger .error (f"Error fetching prefill info from bootstrap: { e } " )
450
458
return None
451
459
452
-
453
460
@cache
454
461
def _connect (self , endpoint : str ):
455
462
socket = zmq .Context ().socket (zmq .PUSH )
@@ -466,17 +473,25 @@ def init(self, kv_indices: npt.NDArray[np.int64], aux_index: Optional[int] = Non
466
473
)
467
474
if prefill_info is None :
468
475
logger .error (
469
- logger .error (f"Could not fetch prefill server info for tp_rank { self .kv_mgr .kv_args .engine_rank } " )
476
+ logger .error (
477
+ f"Could not fetch prefill server info for tp_rank { self .kv_mgr .kv_args .engine_rank } "
478
+ )
470
479
)
471
480
else :
472
- self .kv_mgr .connection_pool [self .kv_mgr .kv_args .engine_rank ] = prefill_info
481
+ self .kv_mgr .connection_pool [self .kv_mgr .kv_args .engine_rank ] = (
482
+ prefill_info
483
+ )
473
484
else :
474
485
prefill_info = self .kv_mgr .connection_pool [self .kv_mgr .kv_args .engine_rank ]
475
486
476
487
if prefill_info :
477
- self .prefill_server_url = f"{ prefill_info ['serve_ip' ]} :{ prefill_info ['serve_port' ]} "
488
+ self .prefill_server_url = (
489
+ f"{ prefill_info ['serve_ip' ]} :{ prefill_info ['serve_port' ]} "
490
+ )
478
491
479
- logger .info (f"Fetched prefill server info: { prefill_info } for tp_rank { self .kv_mgr .kv_args .engine_rank } " )
492
+ logger .info (
493
+ f"Fetched prefill server info: { prefill_info } for tp_rank { self .kv_mgr .kv_args .engine_rank } "
494
+ )
480
495
self .handshake_prefill_server (kv_indices , aux_index )
481
496
482
497
def handshake_prefill_server (
@@ -598,8 +613,13 @@ async def _handle_kv_route_put(self, request: web.Request):
598
613
# Add lock to make sure thread-safe
599
614
if role == "Prefill" :
600
615
async with self .lock :
601
- self .prefill_port_table [tp_rank ] = {"serve_ip" : serve_ip , "serve_port" : serve_port }
602
- logger .info (f"Registered Prefill tp_rank: { tp_rank } with serve_ip: { serve_ip } and serve_port: { serve_port } " )
616
+ self .prefill_port_table [tp_rank ] = {
617
+ "serve_ip" : serve_ip ,
618
+ "serve_port" : serve_port ,
619
+ }
620
+ logger .info (
621
+ f"Registered Prefill tp_rank: { tp_rank } with serve_ip: { serve_ip } and serve_port: { serve_port } "
622
+ )
603
623
604
624
return web .Response (text = "OK" , status = 200 )
605
625
0 commit comments