Skip to content

Commit f497e33

Browse files
ispobockjimoosciuc
authored andcommitted
Update deps for mllama4 (sgl-project#5215)
1 parent 74fafaa commit f497e33

File tree

3 files changed

+27
-26
lines changed

3 files changed

+27
-26
lines changed

python/pyproject.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ runtime_common = [
3838
"pyzmq>=25.1.2",
3939
"soundfile==0.13.1",
4040
"torchao>=0.7.0",
41-
"transformers==4.51.0",
41+
"transformers==4.51.1",
4242
"uvicorn",
4343
"uvloop",
4444
"compressed-tensors",
@@ -50,6 +50,7 @@ srt = [
5050
"sgl-kernel==0.0.8",
5151
"flashinfer_python==0.2.3",
5252
"torch==2.5.1",
53+
"torchvision==0.20.1",
5354
"cuda-python",
5455
"outlines>=0.0.44,<=0.1.11",
5556
"partial_json_parser",

python/sglang/srt/managers/scheduler.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -859,7 +859,6 @@ def handle_generate_request(
859859
bootstrap_room=recv_req.bootstrap_room,
860860
)
861861
req.tokenizer = self.tokenizer
862-
req.queue_time_start = time.time()
863862

864863
if self.server_args.kv_transfer_config is not None:
865864
req.pd_step = PDStep.PREFILL
@@ -884,7 +883,6 @@ def handle_generate_request(
884883
# Create a new request from a previous session
885884
session = self.sessions[recv_req.session_params.id]
886885
req = session.create_req(recv_req, self.tokenizer)
887-
req.queue_time_start = time.time()
888886
if isinstance(req.finished_reason, FINISH_ABORT):
889887
self._add_request_to_queue(req)
890888
return
@@ -987,6 +985,7 @@ def _add_request_to_queue(self, req: Req):
987985
self.disagg_decode_prealloc_queue.add(req)
988986

989987
else:
988+
req.queue_time_start = time.time()
990989
self.waiting_queue.append(req)
991990

992991
def _extend_requests_to_queue(self, reqs: List[Req], is_retracted: bool = False):

test/srt/test_vision_openai_server.py

Lines changed: 24 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -682,29 +682,30 @@ def test_single_image_chat_completion(self):
682682
pass
683683

684684

685-
class TestLlama4Server(TestOpenAIVisionServer):
686-
@classmethod
687-
def setUpClass(cls):
688-
cls.model = "meta-llama/Llama-4-Scout-17B-16E-Instruct"
689-
cls.base_url = DEFAULT_URL_FOR_TEST
690-
cls.api_key = "sk-123456"
691-
cls.process = popen_launch_server(
692-
cls.model,
693-
cls.base_url,
694-
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
695-
other_args=[
696-
"--chat-template",
697-
"llama-4",
698-
"--mem-fraction-static",
699-
"0.8",
700-
"--tp-size=8",
701-
"--context-length=8192",
702-
],
703-
)
704-
cls.base_url += "/v1"
705-
706-
def test_video_chat_completion(self):
707-
pass
685+
## Skip for ci test
686+
# class TestLlama4Server(TestOpenAIVisionServer):
687+
# @classmethod
688+
# def setUpClass(cls):
689+
# cls.model = "meta-llama/Llama-4-Scout-17B-16E-Instruct"
690+
# cls.base_url = DEFAULT_URL_FOR_TEST
691+
# cls.api_key = "sk-123456"
692+
# cls.process = popen_launch_server(
693+
# cls.model,
694+
# cls.base_url,
695+
# timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
696+
# other_args=[
697+
# "--chat-template",
698+
# "llama-4",
699+
# "--mem-fraction-static",
700+
# "0.8",
701+
# "--tp-size=8",
702+
# "--context-length=8192",
703+
# ],
704+
# )
705+
# cls.base_url += "/v1"
706+
707+
# def test_video_chat_completion(self):
708+
# pass
708709

709710

710711
class TestGemma3itServer(TestOpenAIVisionServer):

0 commit comments

Comments
 (0)