File tree Expand file tree Collapse file tree 3 files changed +27
-26
lines changed Expand file tree Collapse file tree 3 files changed +27
-26
lines changed Original file line number Diff line number Diff line change @@ -38,7 +38,7 @@ runtime_common = [
38
38
" pyzmq>=25.1.2" ,
39
39
" soundfile==0.13.1" ,
40
40
" torchao>=0.7.0" ,
41
- " transformers==4.51.0 " ,
41
+ " transformers==4.51.1 " ,
42
42
" uvicorn" ,
43
43
" uvloop" ,
44
44
" compressed-tensors" ,
@@ -50,6 +50,7 @@ srt = [
50
50
" sgl-kernel==0.0.8" ,
51
51
" flashinfer_python==0.2.3" ,
52
52
" torch==2.5.1" ,
53
+ " torchvision==0.20.1" ,
53
54
" cuda-python" ,
54
55
" outlines>=0.0.44,<=0.1.11" ,
55
56
" partial_json_parser" ,
Original file line number Diff line number Diff line change @@ -859,7 +859,6 @@ def handle_generate_request(
859
859
bootstrap_room = recv_req .bootstrap_room ,
860
860
)
861
861
req .tokenizer = self .tokenizer
862
- req .queue_time_start = time .time ()
863
862
864
863
if self .server_args .kv_transfer_config is not None :
865
864
req .pd_step = PDStep .PREFILL
@@ -884,7 +883,6 @@ def handle_generate_request(
884
883
# Create a new request from a previous session
885
884
session = self .sessions [recv_req .session_params .id ]
886
885
req = session .create_req (recv_req , self .tokenizer )
887
- req .queue_time_start = time .time ()
888
886
if isinstance (req .finished_reason , FINISH_ABORT ):
889
887
self ._add_request_to_queue (req )
890
888
return
@@ -987,6 +985,7 @@ def _add_request_to_queue(self, req: Req):
987
985
self .disagg_decode_prealloc_queue .add (req )
988
986
989
987
else :
988
+ req .queue_time_start = time .time ()
990
989
self .waiting_queue .append (req )
991
990
992
991
def _extend_requests_to_queue (self , reqs : List [Req ], is_retracted : bool = False ):
Original file line number Diff line number Diff line change @@ -682,29 +682,30 @@ def test_single_image_chat_completion(self):
682
682
pass
683
683
684
684
685
- class TestLlama4Server (TestOpenAIVisionServer ):
686
- @classmethod
687
- def setUpClass (cls ):
688
- cls .model = "meta-llama/Llama-4-Scout-17B-16E-Instruct"
689
- cls .base_url = DEFAULT_URL_FOR_TEST
690
- cls .api_key = "sk-123456"
691
- cls .process = popen_launch_server (
692
- cls .model ,
693
- cls .base_url ,
694
- timeout = DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH ,
695
- other_args = [
696
- "--chat-template" ,
697
- "llama-4" ,
698
- "--mem-fraction-static" ,
699
- "0.8" ,
700
- "--tp-size=8" ,
701
- "--context-length=8192" ,
702
- ],
703
- )
704
- cls .base_url += "/v1"
705
-
706
- def test_video_chat_completion (self ):
707
- pass
685
+ ## Skip for ci test
686
+ # class TestLlama4Server(TestOpenAIVisionServer):
687
+ # @classmethod
688
+ # def setUpClass(cls):
689
+ # cls.model = "meta-llama/Llama-4-Scout-17B-16E-Instruct"
690
+ # cls.base_url = DEFAULT_URL_FOR_TEST
691
+ # cls.api_key = "sk-123456"
692
+ # cls.process = popen_launch_server(
693
+ # cls.model,
694
+ # cls.base_url,
695
+ # timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
696
+ # other_args=[
697
+ # "--chat-template",
698
+ # "llama-4",
699
+ # "--mem-fraction-static",
700
+ # "0.8",
701
+ # "--tp-size=8",
702
+ # "--context-length=8192",
703
+ # ],
704
+ # )
705
+ # cls.base_url += "/v1"
706
+
707
+ # def test_video_chat_completion(self):
708
+ # pass
708
709
709
710
710
711
class TestGemma3itServer (TestOpenAIVisionServer ):
You can’t perform that action at this time.
0 commit comments