diff --git a/litellm/__init__.py b/litellm/__init__.py index 67bd50d6d386..bd61fa6af472 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -289,6 +289,9 @@ client_session: Optional[httpx.Client] = None aclient_session: Optional[httpx.AsyncClient] = None model_fallbacks: Optional[List] = None # Deprecated for 'litellm.fallbacks' +model_cost_map_url: str = ( + "https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json" +) suppress_debug_info = False dynamodb_table_name: Optional[str] = None s3_callback_params: Optional[Dict] = None @@ -354,7 +357,9 @@ #### PII MASKING #### output_parse_pii: bool = False ############################################# +from litellm.litellm_core_utils.get_model_cost_map import get_model_cost_map +model_cost = get_model_cost_map(url=model_cost_map_url) custom_prompt_dict: Dict[str, dict] = {} check_provider_endpoint = False @@ -382,6 +387,460 @@ def identify(event_details): project = None config_path = None vertex_ai_safety_settings: Optional[dict] = None +BEDROCK_CONVERSE_MODELS = [ + "anthropic.claude-opus-4-20250514-v1:0", + "anthropic.claude-sonnet-4-20250514-v1:0", + "anthropic.claude-3-7-sonnet-20250219-v1:0", + "anthropic.claude-3-5-haiku-20241022-v1:0", + "anthropic.claude-3-5-sonnet-20241022-v2:0", + "anthropic.claude-3-5-sonnet-20240620-v1:0", + "anthropic.claude-3-opus-20240229-v1:0", + "anthropic.claude-3-sonnet-20240229-v1:0", + "anthropic.claude-3-haiku-20240307-v1:0", + "anthropic.claude-v2", + "anthropic.claude-v2:1", + "anthropic.claude-v1", + "anthropic.claude-instant-v1", + "ai21.jamba-instruct-v1:0", + "ai21.jamba-1-5-mini-v1:0", + "ai21.jamba-1-5-large-v1:0", + "meta.llama3-70b-instruct-v1:0", + "meta.llama3-8b-instruct-v1:0", + "meta.llama3-1-8b-instruct-v1:0", + "meta.llama3-1-70b-instruct-v1:0", + "meta.llama3-1-405b-instruct-v1:0", + "meta.llama3-70b-instruct-v1:0", + "mistral.mistral-large-2407-v1:0", + "mistral.mistral-large-2402-v1:0", + "mistral.mistral-small-2402-v1:0", + "meta.llama3-2-1b-instruct-v1:0", + "meta.llama3-2-3b-instruct-v1:0", + "meta.llama3-2-11b-instruct-v1:0", + "meta.llama3-2-90b-instruct-v1:0", +] + +####### COMPLETION MODELS ################### +open_ai_chat_completion_models: List = [] +open_ai_text_completion_models: List = [] +cohere_models: List = [] +cohere_chat_models: List = [] +mistral_chat_models: List = [] +text_completion_codestral_models: List = [] +anthropic_models: List = [] +openrouter_models: List = [] +datarobot_models: List = [] +vertex_language_models: List = [] +vertex_vision_models: List = [] +vertex_chat_models: List = [] +vertex_code_chat_models: List = [] +vertex_ai_image_models: List = [] +vertex_text_models: List = [] +vertex_code_text_models: List = [] +vertex_embedding_models: List = [] +vertex_anthropic_models: List = [] +vertex_llama3_models: List = [] +vertex_ai_ai21_models: List = [] +vertex_mistral_models: List = [] +ai21_models: List = [] +ai21_chat_models: List = [] +nlp_cloud_models: List = [] +aleph_alpha_models: List = [] +bedrock_models: List = [] +bedrock_converse_models: List = BEDROCK_CONVERSE_MODELS +fireworks_ai_models: List = [] +fireworks_ai_embedding_models: List = [] +deepinfra_models: List = [] +perplexity_models: List = [] +watsonx_models: List = [] +gemini_models: List = [] +xai_models: List = [] +deepseek_models: List = [] +azure_ai_models: List = [] +jina_ai_models: List = [] +voyage_models: List = [] +infinity_models: List = [] +databricks_models: List = [] +cloudflare_models: List = [] +codestral_models: List = [] +friendliai_models: List = [] +featherless_ai_models: List = [] +palm_models: List = [] +groq_models: List = [] +azure_models: List = [] +azure_text_models: List = [] +anyscale_models: List = [] +cerebras_models: List = [] +galadriel_models: List = [] +sambanova_models: List = [] +novita_models: List = [] +assemblyai_models: List = [] +snowflake_models: List = [] +llama_models: List = [] +nscale_models: List = [] +nebius_models: List = [] +nebius_embedding_models: List = [] +deepgram_models: List = [] +elevenlabs_models: List = [] + + +def is_bedrock_pricing_only_model(key: str) -> bool: + """ + Excludes keys with the pattern 'bedrock//'. These are in the model_prices_and_context_window.json file for pricing purposes only. + + Args: + key (str): A key to filter. + + Returns: + bool: True if the key matches the Bedrock pattern, False otherwise. + """ + # Regex to match 'bedrock//' + bedrock_pattern = re.compile(r"^bedrock/[a-zA-Z0-9_-]+/.+$") + + if "month-commitment" in key: + return True + + is_match = bedrock_pattern.match(key) + return is_match is not None + + +def is_openai_finetune_model(key: str) -> bool: + """ + Excludes model cost keys with the pattern 'ft:'. These are in the model_prices_and_context_window.json file for pricing purposes only. + + Args: + key (str): A key to filter. + + Returns: + bool: True if the key matches the OpenAI finetune pattern, False otherwise. + """ + return key.startswith("ft:") and not key.count(":") > 1 + + +def add_known_models(): + for key, value in model_cost.items(): + if value.get("litellm_provider") == "openai" and not is_openai_finetune_model( + key + ): + open_ai_chat_completion_models.append(key) + elif value.get("litellm_provider") == "text-completion-openai": + open_ai_text_completion_models.append(key) + elif value.get("litellm_provider") == "azure_text": + azure_text_models.append(key) + elif value.get("litellm_provider") == "cohere": + cohere_models.append(key) + elif value.get("litellm_provider") == "cohere_chat": + cohere_chat_models.append(key) + elif value.get("litellm_provider") == "mistral": + mistral_chat_models.append(key) + elif value.get("litellm_provider") == "anthropic": + anthropic_models.append(key) + elif value.get("litellm_provider") == "empower": + empower_models.append(key) + elif value.get("litellm_provider") == "openrouter": + openrouter_models.append(key) + elif value.get("litellm_provider") == "datarobot": + datarobot_models.append(key) + elif value.get("litellm_provider") == "vertex_ai-text-models": + vertex_text_models.append(key) + elif value.get("litellm_provider") == "vertex_ai-code-text-models": + vertex_code_text_models.append(key) + elif value.get("litellm_provider") == "vertex_ai-language-models": + vertex_language_models.append(key) + elif value.get("litellm_provider") == "vertex_ai-vision-models": + vertex_vision_models.append(key) + elif value.get("litellm_provider") == "vertex_ai-chat-models": + vertex_chat_models.append(key) + elif value.get("litellm_provider") == "vertex_ai-code-chat-models": + vertex_code_chat_models.append(key) + elif value.get("litellm_provider") == "vertex_ai-embedding-models": + vertex_embedding_models.append(key) + elif value.get("litellm_provider") == "vertex_ai-anthropic_models": + key = key.replace("vertex_ai/", "") + vertex_anthropic_models.append(key) + elif value.get("litellm_provider") == "vertex_ai-llama_models": + key = key.replace("vertex_ai/", "") + vertex_llama3_models.append(key) + elif value.get("litellm_provider") == "vertex_ai-mistral_models": + key = key.replace("vertex_ai/", "") + vertex_mistral_models.append(key) + elif value.get("litellm_provider") == "vertex_ai-ai21_models": + key = key.replace("vertex_ai/", "") + vertex_ai_ai21_models.append(key) + elif value.get("litellm_provider") == "vertex_ai-image-models": + key = key.replace("vertex_ai/", "") + vertex_ai_image_models.append(key) + elif value.get("litellm_provider") == "ai21": + if value.get("mode") == "chat": + ai21_chat_models.append(key) + else: + ai21_models.append(key) + elif value.get("litellm_provider") == "nlp_cloud": + nlp_cloud_models.append(key) + elif value.get("litellm_provider") == "aleph_alpha": + aleph_alpha_models.append(key) + elif value.get( + "litellm_provider" + ) == "bedrock" and not is_bedrock_pricing_only_model(key): + bedrock_models.append(key) + elif value.get("litellm_provider") == "bedrock_converse": + bedrock_converse_models.append(key) + elif value.get("litellm_provider") == "deepinfra": + deepinfra_models.append(key) + elif value.get("litellm_provider") == "perplexity": + perplexity_models.append(key) + elif value.get("litellm_provider") == "watsonx": + watsonx_models.append(key) + elif value.get("litellm_provider") == "gemini": + gemini_models.append(key) + elif value.get("litellm_provider") == "fireworks_ai": + # ignore the 'up-to', '-to-' model names -> not real models. just for cost tracking based on model params. + if "-to-" not in key and "fireworks-ai-default" not in key: + fireworks_ai_models.append(key) + elif value.get("litellm_provider") == "fireworks_ai-embedding-models": + # ignore the 'up-to', '-to-' model names -> not real models. just for cost tracking based on model params. + if "-to-" not in key: + fireworks_ai_embedding_models.append(key) + elif value.get("litellm_provider") == "text-completion-codestral": + text_completion_codestral_models.append(key) + elif value.get("litellm_provider") == "xai": + xai_models.append(key) + elif value.get("litellm_provider") == "deepseek": + deepseek_models.append(key) + elif value.get("litellm_provider") == "meta_llama": + llama_models.append(key) + elif value.get("litellm_provider") == "nscale": + nscale_models.append(key) + elif value.get("litellm_provider") == "azure_ai": + azure_ai_models.append(key) + elif value.get("litellm_provider") == "voyage": + voyage_models.append(key) + elif value.get("litellm_provider") == "infinity": + infinity_models.append(key) + elif value.get("litellm_provider") == "databricks": + databricks_models.append(key) + elif value.get("litellm_provider") == "cloudflare": + cloudflare_models.append(key) + elif value.get("litellm_provider") == "codestral": + codestral_models.append(key) + elif value.get("litellm_provider") == "friendliai": + friendliai_models.append(key) + elif value.get("litellm_provider") == "palm": + palm_models.append(key) + elif value.get("litellm_provider") == "groq": + groq_models.append(key) + elif value.get("litellm_provider") == "azure": + azure_models.append(key) + elif value.get("litellm_provider") == "anyscale": + anyscale_models.append(key) + elif value.get("litellm_provider") == "cerebras": + cerebras_models.append(key) + elif value.get("litellm_provider") == "galadriel": + galadriel_models.append(key) + elif value.get("litellm_provider") == "sambanova": + sambanova_models.append(key) + elif value.get("litellm_provider") == "novita": + novita_models.append(key) + elif value.get("litellm_provider") == "nebius-chat-models": + nebius_models.append(key) + elif value.get("litellm_provider") == "nebius-embedding-models": + nebius_embedding_models.append(key) + elif value.get("litellm_provider") == "assemblyai": + assemblyai_models.append(key) + elif value.get("litellm_provider") == "jina_ai": + jina_ai_models.append(key) + elif value.get("litellm_provider") == "snowflake": + snowflake_models.append(key) + elif value.get("litellm_provider") == "featherless_ai": + featherless_ai_models.append(key) + elif value.get("litellm_provider") == "deepgram": + deepgram_models.append(key) + elif value.get("litellm_provider") == "elevenlabs": + elevenlabs_models.append(key) + + +add_known_models() +# known openai compatible endpoints - we'll eventually move this list to the model_prices_and_context_window.json dictionary + +# this is maintained for Exception Mapping + + +# used for Cost Tracking & Token counting +# https://azure.microsoft.com/en-in/pricing/details/cognitive-services/openai-service/ +# Azure returns gpt-35-turbo in their responses, we need to map this to azure/gpt-3.5-turbo for token counting +azure_llms = { + "gpt-35-turbo": "azure/gpt-35-turbo", + "gpt-35-turbo-16k": "azure/gpt-35-turbo-16k", + "gpt-35-turbo-instruct": "azure/gpt-35-turbo-instruct", +} + +azure_embedding_models = { + "ada": "azure/ada", +} + +petals_models = [ + "petals-team/StableBeluga2", +] + +ollama_models = ["llama2"] + +maritalk_models = ["maritalk"] + + +model_list = ( + open_ai_chat_completion_models + + open_ai_text_completion_models + + cohere_models + + cohere_chat_models + + anthropic_models + + replicate_models + + openrouter_models + + datarobot_models + + huggingface_models + + vertex_chat_models + + vertex_text_models + + ai21_models + + ai21_chat_models + + together_ai_models + + baseten_models + + aleph_alpha_models + + nlp_cloud_models + + ollama_models + + bedrock_models + + deepinfra_models + + perplexity_models + + maritalk_models + + vertex_language_models + + watsonx_models + + gemini_models + + text_completion_codestral_models + + xai_models + + deepseek_models + + azure_ai_models + + voyage_models + + infinity_models + + databricks_models + + cloudflare_models + + codestral_models + + friendliai_models + + palm_models + + groq_models + + azure_models + + anyscale_models + + cerebras_models + + galadriel_models + + sambanova_models + + azure_text_models + + novita_models + + assemblyai_models + + jina_ai_models + + snowflake_models + + llama_models + + featherless_ai_models + + nscale_models + + deepgram_models + + elevenlabs_models +) + +model_list_set = set(model_list) + +provider_list: List[Union[LlmProviders, str]] = list(LlmProviders) + + +models_by_provider: dict = { + "openai": open_ai_chat_completion_models + open_ai_text_completion_models, + "text-completion-openai": open_ai_text_completion_models, + "cohere": cohere_models + cohere_chat_models, + "cohere_chat": cohere_chat_models, + "anthropic": anthropic_models, + "replicate": replicate_models, + "huggingface": huggingface_models, + "together_ai": together_ai_models, + "baseten": baseten_models, + "openrouter": openrouter_models, + "datarobot": datarobot_models, + "vertex_ai": vertex_chat_models + + vertex_text_models + + vertex_anthropic_models + + vertex_vision_models + + vertex_language_models, + "ai21": ai21_models, + "bedrock": bedrock_models + bedrock_converse_models, + "petals": petals_models, + "ollama": ollama_models, + "deepinfra": deepinfra_models, + "perplexity": perplexity_models, + "maritalk": maritalk_models, + "watsonx": watsonx_models, + "gemini": gemini_models, + "fireworks_ai": fireworks_ai_models + fireworks_ai_embedding_models, + "aleph_alpha": aleph_alpha_models, + "text-completion-codestral": text_completion_codestral_models, + "xai": xai_models, + "deepseek": deepseek_models, + "mistral": mistral_chat_models, + "azure_ai": azure_ai_models, + "voyage": voyage_models, + "infinity": infinity_models, + "databricks": databricks_models, + "cloudflare": cloudflare_models, + "codestral": codestral_models, + "nlp_cloud": nlp_cloud_models, + "friendliai": friendliai_models, + "palm": palm_models, + "groq": groq_models, + "azure": azure_models + azure_text_models, + "azure_text": azure_text_models, + "anyscale": anyscale_models, + "cerebras": cerebras_models, + "galadriel": galadriel_models, + "sambanova": sambanova_models, + "novita": novita_models, + "nebius": nebius_models + nebius_embedding_models, + "assemblyai": assemblyai_models, + "jina_ai": jina_ai_models, + "snowflake": snowflake_models, + "meta_llama": llama_models, + "nscale": nscale_models, + "featherless_ai": featherless_ai_models, + "deepgram": deepgram_models, + "elevenlabs": elevenlabs_models, +} + +# mapping for those models which have larger equivalents +longer_context_model_fallback_dict: dict = { + # openai chat completion models + "gpt-3.5-turbo": "gpt-3.5-turbo-16k", + "gpt-3.5-turbo-0301": "gpt-3.5-turbo-16k-0301", + "gpt-3.5-turbo-0613": "gpt-3.5-turbo-16k-0613", + "gpt-4": "gpt-4-32k", + "gpt-4-0314": "gpt-4-32k-0314", + "gpt-4-0613": "gpt-4-32k-0613", + # anthropic + "claude-instant-1": "claude-2", + "claude-instant-1.2": "claude-2", + # vertexai + "chat-bison": "chat-bison-32k", + "chat-bison@001": "chat-bison-32k", + "codechat-bison": "codechat-bison-32k", + "codechat-bison@001": "codechat-bison-32k", + # openrouter + "openrouter/openai/gpt-3.5-turbo": "openrouter/openai/gpt-3.5-turbo-16k", + "openrouter/anthropic/claude-instant-v1": "openrouter/anthropic/claude-2", +} + +####### EMBEDDING MODELS ################### + +all_embedding_models = ( + open_ai_embedding_models + + cohere_embedding_models + + bedrock_embedding_models + + vertex_embedding_models + + fireworks_ai_embedding_models + + nebius_embedding_models +) + +####### IMAGE GENERATION MODELS ################### +openai_image_generation_models = ["dall-e-2", "dall-e-3"] from .timeout import timeout from .cost_calculator import completion_cost diff --git a/litellm/model_registry.py b/litellm/model_registry.py deleted file mode 100644 index 6e1f66c48ced..000000000000 --- a/litellm/model_registry.py +++ /dev/null @@ -1,475 +0,0 @@ -import re -from typing import List, Union - -from litellm.constants import ( - baseten_models, - bedrock_embedding_models, - cohere_embedding_models, - empower_models, - huggingface_models, - open_ai_embedding_models, - replicate_models, - together_ai_models, -) -from litellm.types.utils import LlmProviders - -BEDROCK_CONVERSE_MODELS = [ - "anthropic.claude-opus-4-20250514-v1:0", - "anthropic.claude-sonnet-4-20250514-v1:0", - "anthropic.claude-3-7-sonnet-20250219-v1:0", - "anthropic.claude-3-5-haiku-20241022-v1:0", - "anthropic.claude-3-5-sonnet-20241022-v2:0", - "anthropic.claude-3-5-sonnet-20240620-v1:0", - "anthropic.claude-3-opus-20240229-v1:0", - "anthropic.claude-3-sonnet-20240229-v1:0", - "anthropic.claude-3-haiku-20240307-v1:0", - "anthropic.claude-v2", - "anthropic.claude-v2:1", - "anthropic.claude-v1", - "anthropic.claude-instant-v1", - "ai21.jamba-instruct-v1:0", - "ai21.jamba-1-5-mini-v1:0", - "ai21.jamba-1-5-large-v1:0", - "meta.llama3-70b-instruct-v1:0", - "meta.llama3-8b-instruct-v1:0", - "meta.llama3-1-8b-instruct-v1:0", - "meta.llama3-1-70b-instruct-v1:0", - "meta.llama3-1-405b-instruct-v1:0", - "meta.llama3-70b-instruct-v1:0", - "mistral.mistral-large-2407-v1:0", - "mistral.mistral-large-2402-v1:0", - "mistral.mistral-small-2402-v1:0", - "meta.llama3-2-1b-instruct-v1:0", - "meta.llama3-2-3b-instruct-v1:0", - "meta.llama3-2-11b-instruct-v1:0", - "meta.llama3-2-90b-instruct-v1:0", -] - -####### COMPLETION MODELS ################### -open_ai_chat_completion_models: List = [] -open_ai_text_completion_models: List = [] -cohere_models: List = [] -cohere_chat_models: List = [] -mistral_chat_models: List = [] -text_completion_codestral_models: List = [] -anthropic_models: List = [] -openrouter_models: List = [] -datarobot_models: List = [] -vertex_language_models: List = [] -vertex_vision_models: List = [] -vertex_chat_models: List = [] -vertex_code_chat_models: List = [] -vertex_ai_image_models: List = [] -vertex_text_models: List = [] -vertex_code_text_models: List = [] -vertex_embedding_models: List = [] -vertex_anthropic_models: List = [] -vertex_llama3_models: List = [] -vertex_ai_ai21_models: List = [] -vertex_mistral_models: List = [] -ai21_models: List = [] -ai21_chat_models: List = [] -nlp_cloud_models: List = [] -aleph_alpha_models: List = [] -bedrock_models: List = [] -bedrock_converse_models: List = BEDROCK_CONVERSE_MODELS -fireworks_ai_models: List = [] -fireworks_ai_embedding_models: List = [] -deepinfra_models: List = [] -perplexity_models: List = [] -watsonx_models: List = [] -gemini_models: List = [] -xai_models: List = [] -deepseek_models: List = [] -azure_ai_models: List = [] -jina_ai_models: List = [] -voyage_models: List = [] -infinity_models: List = [] -databricks_models: List = [] -cloudflare_models: List = [] -codestral_models: List = [] -friendliai_models: List = [] -featherless_ai_models: List = [] -palm_models: List = [] -groq_models: List = [] -azure_models: List = [] -azure_text_models: List = [] -anyscale_models: List = [] -cerebras_models: List = [] -galadriel_models: List = [] -sambanova_models: List = [] -novita_models: List = [] -assemblyai_models: List = [] -snowflake_models: List = [] -llama_models: List = [] -nscale_models: List = [] -nebius_models: List = [] -nebius_embedding_models: List = [] -deepgram_models: List = [] -elevenlabs_models: List = [] - - -def is_bedrock_pricing_only_model(key: str) -> bool: - """ - Excludes keys with the pattern 'bedrock//'. These are in the model_prices_and_context_window.json file for pricing purposes only. - - Args: - key (str): A key to filter. - - Returns: - bool: True if the key matches the Bedrock pattern, False otherwise. - """ - # Regex to match 'bedrock//' - bedrock_pattern = re.compile(r"^bedrock/[a-zA-Z0-9_-]+/.+$") - - if "month-commitment" in key: - return True - - is_match = bedrock_pattern.match(key) - return is_match is not None - - -def is_openai_finetune_model(key: str) -> bool: - """ - Excludes model cost keys with the pattern 'ft:'. These are in the model_prices_and_context_window.json file for pricing purposes only. - - Args: - key (str): A key to filter. - - Returns: - bool: True if the key matches the OpenAI finetune pattern, False otherwise. - """ - return key.startswith("ft:") and not key.count(":") > 1 - -model_cost_map_url: str = ( - "https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json" -) -from litellm.litellm_core_utils.get_model_cost_map import get_model_cost_map - -model_cost = get_model_cost_map(url=model_cost_map_url) - -def add_known_models(): # noqa: PLR0915 - for key, value in model_cost.items(): - if value.get("litellm_provider") == "openai" and not is_openai_finetune_model( - key - ): - open_ai_chat_completion_models.append(key) - elif value.get("litellm_provider") == "text-completion-openai": - open_ai_text_completion_models.append(key) - elif value.get("litellm_provider") == "azure_text": - azure_text_models.append(key) - elif value.get("litellm_provider") == "cohere": - cohere_models.append(key) - elif value.get("litellm_provider") == "cohere_chat": - cohere_chat_models.append(key) - elif value.get("litellm_provider") == "mistral": - mistral_chat_models.append(key) - elif value.get("litellm_provider") == "anthropic": - anthropic_models.append(key) - elif value.get("litellm_provider") == "empower": - empower_models.append(key) - elif value.get("litellm_provider") == "openrouter": - openrouter_models.append(key) - elif value.get("litellm_provider") == "datarobot": - datarobot_models.append(key) - elif value.get("litellm_provider") == "vertex_ai-text-models": - vertex_text_models.append(key) - elif value.get("litellm_provider") == "vertex_ai-code-text-models": - vertex_code_text_models.append(key) - elif value.get("litellm_provider") == "vertex_ai-language-models": - vertex_language_models.append(key) - elif value.get("litellm_provider") == "vertex_ai-vision-models": - vertex_vision_models.append(key) - elif value.get("litellm_provider") == "vertex_ai-chat-models": - vertex_chat_models.append(key) - elif value.get("litellm_provider") == "vertex_ai-code-chat-models": - vertex_code_chat_models.append(key) - elif value.get("litellm_provider") == "vertex_ai-embedding-models": - vertex_embedding_models.append(key) - elif value.get("litellm_provider") == "vertex_ai-anthropic_models": - key = key.replace("vertex_ai/", "") - vertex_anthropic_models.append(key) - elif value.get("litellm_provider") == "vertex_ai-llama_models": - key = key.replace("vertex_ai/", "") - vertex_llama3_models.append(key) - elif value.get("litellm_provider") == "vertex_ai-mistral_models": - key = key.replace("vertex_ai/", "") - vertex_mistral_models.append(key) - elif value.get("litellm_provider") == "vertex_ai-ai21_models": - key = key.replace("vertex_ai/", "") - vertex_ai_ai21_models.append(key) - elif value.get("litellm_provider") == "vertex_ai-image-models": - key = key.replace("vertex_ai/", "") - vertex_ai_image_models.append(key) - elif value.get("litellm_provider") == "ai21": - if value.get("mode") == "chat": - ai21_chat_models.append(key) - else: - ai21_models.append(key) - elif value.get("litellm_provider") == "nlp_cloud": - nlp_cloud_models.append(key) - elif value.get("litellm_provider") == "aleph_alpha": - aleph_alpha_models.append(key) - elif value.get( - "litellm_provider" - ) == "bedrock" and not is_bedrock_pricing_only_model(key): - bedrock_models.append(key) - elif value.get("litellm_provider") == "bedrock_converse": - bedrock_converse_models.append(key) - elif value.get("litellm_provider") == "deepinfra": - deepinfra_models.append(key) - elif value.get("litellm_provider") == "perplexity": - perplexity_models.append(key) - elif value.get("litellm_provider") == "watsonx": - watsonx_models.append(key) - elif value.get("litellm_provider") == "gemini": - gemini_models.append(key) - elif value.get("litellm_provider") == "fireworks_ai": - # ignore the 'up-to', '-to-' model names -> not real models. just for cost tracking based on model params. - if "-to-" not in key and "fireworks-ai-default" not in key: - fireworks_ai_models.append(key) - elif value.get("litellm_provider") == "fireworks_ai-embedding-models": - # ignore the 'up-to', '-to-' model names -> not real models. just for cost tracking based on model params. - if "-to-" not in key: - fireworks_ai_embedding_models.append(key) - elif value.get("litellm_provider") == "text-completion-codestral": - text_completion_codestral_models.append(key) - elif value.get("litellm_provider") == "xai": - xai_models.append(key) - elif value.get("litellm_provider") == "deepseek": - deepseek_models.append(key) - elif value.get("litellm_provider") == "meta_llama": - llama_models.append(key) - elif value.get("litellm_provider") == "nscale": - nscale_models.append(key) - elif value.get("litellm_provider") == "azure_ai": - azure_ai_models.append(key) - elif value.get("litellm_provider") == "voyage": - voyage_models.append(key) - elif value.get("litellm_provider") == "infinity": - infinity_models.append(key) - elif value.get("litellm_provider") == "databricks": - databricks_models.append(key) - elif value.get("litellm_provider") == "cloudflare": - cloudflare_models.append(key) - elif value.get("litellm_provider") == "codestral": - codestral_models.append(key) - elif value.get("litellm_provider") == "friendliai": - friendliai_models.append(key) - elif value.get("litellm_provider") == "palm": - palm_models.append(key) - elif value.get("litellm_provider") == "groq": - groq_models.append(key) - elif value.get("litellm_provider") == "azure": - azure_models.append(key) - elif value.get("litellm_provider") == "anyscale": - anyscale_models.append(key) - elif value.get("litellm_provider") == "cerebras": - cerebras_models.append(key) - elif value.get("litellm_provider") == "galadriel": - galadriel_models.append(key) - elif value.get("litellm_provider") == "sambanova": - sambanova_models.append(key) - elif value.get("litellm_provider") == "novita": - novita_models.append(key) - elif value.get("litellm_provider") == "nebius-chat-models": - nebius_models.append(key) - elif value.get("litellm_provider") == "nebius-embedding-models": - nebius_embedding_models.append(key) - elif value.get("litellm_provider") == "assemblyai": - assemblyai_models.append(key) - elif value.get("litellm_provider") == "jina_ai": - jina_ai_models.append(key) - elif value.get("litellm_provider") == "snowflake": - snowflake_models.append(key) - elif value.get("litellm_provider") == "featherless_ai": - featherless_ai_models.append(key) - elif value.get("litellm_provider") == "deepgram": - deepgram_models.append(key) - elif value.get("litellm_provider") == "elevenlabs": - elevenlabs_models.append(key) - - -add_known_models() -# known openai compatible endpoints - we'll eventually move this list to the model_prices_and_context_window.json dictionary - -# this is maintained for Exception Mapping - - -# used for Cost Tracking & Token counting -# https://azure.microsoft.com/en-in/pricing/details/cognitive-services/openai-service/ -# Azure returns gpt-35-turbo in their responses, we need to map this to azure/gpt-3.5-turbo for token counting -azure_llms = { - "gpt-35-turbo": "azure/gpt-35-turbo", - "gpt-35-turbo-16k": "azure/gpt-35-turbo-16k", - "gpt-35-turbo-instruct": "azure/gpt-35-turbo-instruct", -} - -azure_embedding_models = { - "ada": "azure/ada", -} - -petals_models = [ - "petals-team/StableBeluga2", -] - -ollama_models = ["llama2"] - -maritalk_models = ["maritalk"] - - -model_list = ( - open_ai_chat_completion_models - + open_ai_text_completion_models - + cohere_models - + cohere_chat_models - + anthropic_models - + replicate_models - + openrouter_models - + datarobot_models - + huggingface_models - + vertex_chat_models - + vertex_text_models - + ai21_models - + ai21_chat_models - + together_ai_models - + baseten_models - + aleph_alpha_models - + nlp_cloud_models - + ollama_models - + bedrock_models - + deepinfra_models - + perplexity_models - + maritalk_models - + vertex_language_models - + watsonx_models - + gemini_models - + text_completion_codestral_models - + xai_models - + deepseek_models - + azure_ai_models - + voyage_models - + infinity_models - + databricks_models - + cloudflare_models - + codestral_models - + friendliai_models - + palm_models - + groq_models - + azure_models - + anyscale_models - + cerebras_models - + galadriel_models - + sambanova_models - + azure_text_models - + novita_models - + assemblyai_models - + jina_ai_models - + snowflake_models - + llama_models - + featherless_ai_models - + nscale_models - + deepgram_models - + elevenlabs_models -) - -model_list_set = set(model_list) - -provider_list: List[Union[LlmProviders, str]] = list(LlmProviders) - - -models_by_provider: dict = { - "openai": open_ai_chat_completion_models + open_ai_text_completion_models, - "text-completion-openai": open_ai_text_completion_models, - "cohere": cohere_models + cohere_chat_models, - "cohere_chat": cohere_chat_models, - "anthropic": anthropic_models, - "replicate": replicate_models, - "huggingface": huggingface_models, - "together_ai": together_ai_models, - "baseten": baseten_models, - "openrouter": openrouter_models, - "datarobot": datarobot_models, - "vertex_ai": vertex_chat_models - + vertex_text_models - + vertex_anthropic_models - + vertex_vision_models - + vertex_language_models, - "ai21": ai21_models, - "bedrock": bedrock_models + bedrock_converse_models, - "petals": petals_models, - "ollama": ollama_models, - "deepinfra": deepinfra_models, - "perplexity": perplexity_models, - "maritalk": maritalk_models, - "watsonx": watsonx_models, - "gemini": gemini_models, - "fireworks_ai": fireworks_ai_models + fireworks_ai_embedding_models, - "aleph_alpha": aleph_alpha_models, - "text-completion-codestral": text_completion_codestral_models, - "xai": xai_models, - "deepseek": deepseek_models, - "mistral": mistral_chat_models, - "azure_ai": azure_ai_models, - "voyage": voyage_models, - "infinity": infinity_models, - "databricks": databricks_models, - "cloudflare": cloudflare_models, - "codestral": codestral_models, - "nlp_cloud": nlp_cloud_models, - "friendliai": friendliai_models, - "palm": palm_models, - "groq": groq_models, - "azure": azure_models + azure_text_models, - "azure_text": azure_text_models, - "anyscale": anyscale_models, - "cerebras": cerebras_models, - "galadriel": galadriel_models, - "sambanova": sambanova_models, - "novita": novita_models, - "nebius": nebius_models + nebius_embedding_models, - "assemblyai": assemblyai_models, - "jina_ai": jina_ai_models, - "snowflake": snowflake_models, - "meta_llama": llama_models, - "nscale": nscale_models, - "featherless_ai": featherless_ai_models, - "deepgram": deepgram_models, - "elevenlabs": elevenlabs_models, -} - -# mapping for those models which have larger equivalents -longer_context_model_fallback_dict: dict = { - # openai chat completion models - "gpt-3.5-turbo": "gpt-3.5-turbo-16k", - "gpt-3.5-turbo-0301": "gpt-3.5-turbo-16k-0301", - "gpt-3.5-turbo-0613": "gpt-3.5-turbo-16k-0613", - "gpt-4": "gpt-4-32k", - "gpt-4-0314": "gpt-4-32k-0314", - "gpt-4-0613": "gpt-4-32k-0613", - # anthropic - "claude-instant-1": "claude-2", - "claude-instant-1.2": "claude-2", - # vertexai - "chat-bison": "chat-bison-32k", - "chat-bison@001": "chat-bison-32k", - "codechat-bison": "codechat-bison-32k", - "codechat-bison@001": "codechat-bison-32k", - # openrouter - "openrouter/openai/gpt-3.5-turbo": "openrouter/openai/gpt-3.5-turbo-16k", - "openrouter/anthropic/claude-instant-v1": "openrouter/anthropic/claude-2", -} - -####### EMBEDDING MODELS ################### - -all_embedding_models = ( - open_ai_embedding_models - + cohere_embedding_models - + bedrock_embedding_models - + vertex_embedding_models - + fireworks_ai_embedding_models - + nebius_embedding_models -) - -####### IMAGE GENERATION MODELS ################### -openai_image_generation_models = ["dall-e-2", "dall-e-3"] \ No newline at end of file