BerriAI · krrishdholakia · May 15, 2025 · May 7, 2025 · May 7, 2025 · May 12, 2025
diff --git a/litellm/constants.py b/litellm/constants.py
@@ -164,6 +164,12 @@
     "meta_llama",
 ]
 
+LITELLM_EMBEDDING_PROVIDERS_SUPPORTING_INPUT_ARRAY_OF_TOKENS = [
+    "openai",
+    "azure",
+    "hosted_vllm"
+]
+
 
 OPENAI_CHAT_COMPLETION_PARAMS = [
     "functions",

diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py
@@ -28,6 +28,7 @@
 from litellm.constants import (
     DEFAULT_MAX_RECURSE_DEPTH,
     DEFAULT_SLACK_ALERTING_THRESHOLD,
+    LITELLM_EMBEDDING_PROVIDERS_SUPPORTING_INPUT_ARRAY_OF_TOKENS
 )
 from litellm.types.utils import (
     ModelResponse,
@@ -3808,24 +3809,26 @@ async def embeddings(  # noqa: PLR0915
             and isinstance(data["input"][0], list)
             and isinstance(data["input"][0][0], int)
         ):  # check if array of tokens passed in
-            # check if non-openai/azure model called - e.g. for langchain integration
+            # check if provider accept list of tokens as input - e.g. for langchain integration
             if llm_model_list is not None and data["model"] in router_model_names:
                 for m in llm_model_list:
-                    if m["model_name"] == data["model"] and (
-                        m["litellm_params"]["model"] in litellm.open_ai_embedding_models
-                        or m["litellm_params"]["model"].startswith("azure/")
-                        or m["litellm_params"]["model"].startswith("hosted_vllm/")
-                    ):
-                        pass
-                    else:
-                        # non-openai/azure embedding model called with token input
-                        input_list = []
-                        for i in data["input"]:
-                            input_list.append(
-                                litellm.decode(model="gpt-3.5-turbo", tokens=i)
-                            )
-                        data["input"] = input_list
-                        break
+                    if m["model_name"] == data["model"]:
+                        if (m["litellm_params"]["model"] in litellm.open_ai_embedding_models
+                                or any(
+                                    m["litellm_params"]["model"].startswith(provider)
+                                    for provider in LITELLM_EMBEDDING_PROVIDERS_SUPPORTING_INPUT_ARRAY_OF_TOKENS
+                                )
+                        ):
+                            pass
+                        else:
+                            # non-openai/azure embedding model called with token input
+                            input_list = []
+                            for i in data["input"]:
+                                input_list.append(
+                                    litellm.decode(model="gpt-3.5-turbo", tokens=i)
+                                )
+                            data["input"] = input_list
+                            break
 
         ### CALL HOOKS ### - modify incoming data / reject request before calling the model
         data = await proxy_logging_obj.pre_call_hook(