BerriAI · krrishdholakia · May 15, 2025 · May 7, 2025 · May 7, 2025 · May 12, 2025
diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py
@@ -3814,14 +3814,15 @@ async def embeddings(  # noqa: PLR0915
                     if m["model_name"] == data["model"] and (
                         m["litellm_params"]["model"] in litellm.open_ai_embedding_models
                         or m["litellm_params"]["model"].startswith("azure/")
+                        or m["litellm_params"]["model"].startswith("hosted_vllm/")
 import socket 
 DEFAULT_BATCH_SIZE = 512 
 LITELLM_CHAT_PROVIDERS = [ 
 import socket 
 DEFAULT_BATCH_SIZE = 512 
 LITELLM_CHAT_PROVIDERS = [ 
                     ):
                         pass
                     else:
                         # non-openai/azure embedding model called with token input
                         input_list = []
                         for i in data["input"]:
                             input_list.append(
-                                litellm.decode(model="gpt-3.5-turbo", tokens=i)
+                                litellm.decode(model=m["model_name"], tokens=i)
                             )
                         data["input"] = input_list
                         break