From 9064246bd8c2c6e08fc0a2c8ca22778bcb17a402 Mon Sep 17 00:00:00 2001 From: David Heineman Date: Wed, 16 Apr 2025 11:27:33 -0700 Subject: [PATCH 1/2] fix max len edge case Signed-off-by: David Heineman --- vllm/engine/llm_engine.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py index 2347cdee904..4644053785f 100644 --- a/vllm/engine/llm_engine.py +++ b/vllm/engine/llm_engine.py @@ -2062,7 +2062,7 @@ def _validate_model_input( raise ValueError(f"The {prompt_type} prompt cannot be empty") max_prompt_len = self.model_config.max_model_len - if len(prompt_ids) >= max_prompt_len: + if len(prompt_ids) > max_prompt_len: if prompt_type == "encoder" and model_config.is_multimodal_model: mm_registry = self.input_preprocessor.mm_registry mm_processor = mm_registry.create_processor( From 1539e74af120dea75ab942909657f8deb58b2a6c Mon Sep 17 00:00:00 2001 From: David Heineman Date: Wed, 16 Apr 2025 11:54:14 -0700 Subject: [PATCH 2/2] fix max_prompt_len in v1 engine Signed-off-by: David Heineman --- vllm/v1/engine/processor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/v1/engine/processor.py b/vllm/v1/engine/processor.py index 225e78f53ea..43163aaa208 100644 --- a/vllm/v1/engine/processor.py +++ b/vllm/v1/engine/processor.py @@ -351,7 +351,7 @@ def _validate_model_input( raise ValueError(f"Token id {max_input_id} is out of vocabulary") max_prompt_len = self.model_config.max_model_len - if len(prompt_ids) >= max_prompt_len: + if len(prompt_ids) > max_prompt_len: if prompt_type == "encoder" and model_config.is_multimodal_model: mm_registry = self.input_preprocessor.mm_registry mm_processor = mm_registry.create_processor(