diff --git a/vllm/model_executor/models/kimi_vl.py b/vllm/model_executor/models/kimi_vl.py index 8cb8bc22fc0..0629266860f 100644 --- a/vllm/model_executor/models/kimi_vl.py +++ b/vllm/model_executor/models/kimi_vl.py @@ -369,6 +369,9 @@ def _process_image_input(self, return self.multi_modal_projector( torch.cat(image_features)).split(lengths) + def get_language_model(self) -> torch.nn.Module: + return self.language_model + def get_multimodal_embeddings(self, **kwargs: object) -> Optional[NestedTensors]: # Validate the multimodal input keyword arguments diff --git a/vllm/model_executor/models/qwen2_5_omni_thinker.py b/vllm/model_executor/models/qwen2_5_omni_thinker.py index 93c21fc55c5..c3988ee5708 100644 --- a/vllm/model_executor/models/qwen2_5_omni_thinker.py +++ b/vllm/model_executor/models/qwen2_5_omni_thinker.py @@ -809,6 +809,9 @@ def _parse_and_validate_multimodal_inputs(self, **kwargs: object) -> dict: "audio"] = self._parse_and_validate_audio_input(**kwargs) return mm_input_by_modality + def get_language_model(self) -> torch.nn.Module: + return self.language_model + def get_multimodal_embeddings( self, **kwargs: object) -> Optional[MultiModalEmbeddings]: