BerriAI · krrishdholakia · Apr 26, 2025 · Apr 25, 2025 · Apr 25, 2025 · Apr 25, 2025
diff --git a/litellm/integrations/prometheus.py b/litellm/integrations/prometheus.py
@@ -1000,9 +1000,9 @@ def set_llm_deployment_success_metrics(
     ):
         try:
             verbose_logger.debug("setting remaining tokens requests metric")
-            standard_logging_payload: Optional[StandardLoggingPayload] = (
-                request_kwargs.get("standard_logging_object")
-            )
+            standard_logging_payload: Optional[
+                StandardLoggingPayload
+            ] = request_kwargs.get("standard_logging_object")
 
             if standard_logging_payload is None:
                 return
@@ -1453,6 +1453,7 @@ async def fetch_keys(
                 user_id=None,
                 team_id=None,
                 key_alias=None,
+                key_hash=None,
                 exclude_team_id=UI_SESSION_TOKEN_TEAM_ID,
                 return_full_object=True,
                 organization_id=None,
@@ -1771,10 +1772,10 @@ def initialize_budget_metrics_cron_job(scheduler: AsyncIOScheduler):
         from litellm.integrations.custom_logger import CustomLogger
         from litellm.integrations.prometheus import PrometheusLogger
 
-        prometheus_loggers: List[CustomLogger] = (
-            litellm.logging_callback_manager.get_custom_loggers_for_type(
-                callback_type=PrometheusLogger
-            )
+        prometheus_loggers: List[
+            CustomLogger
+        ] = litellm.logging_callback_manager.get_custom_loggers_for_type(
+            callback_type=PrometheusLogger
         )
         # we need to get the initialized prometheus logger instance(s) and call logger.initialize_remaining_budget_metrics() on them
         verbose_logger.debug("found %s prometheus loggers", len(prometheus_loggers))

diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json
@@ -356,7 +356,8 @@
         "supports_vision": true,
         "supports_prompt_caching": true,
         "supports_system_messages": true,
-        "supports_tool_choice": true
+        "supports_tool_choice": true,
+        "deprecation_date": "2025-07-14"
     },
     "gpt-4o-audio-preview": {
         "max_tokens": 16384,
@@ -1509,6 +1510,8 @@
     },
     "gpt-4o-transcribe": {
         "mode": "audio_transcription",
+        "max_input_tokens": 16000,
+        "max_output_tokens": 2000,
         "input_cost_per_token": 0.0000025,
         "input_cost_per_audio_token": 0.000006,
         "output_cost_per_token": 0.00001, 
@@ -1517,6 +1520,8 @@
     }, 
     "gpt-4o-mini-transcribe": {
         "mode": "audio_transcription",
+        "max_input_tokens": 16000,
+        "max_output_tokens": 2000,
         "input_cost_per_token": 0.00000125,
         "input_cost_per_audio_token": 0.000003,
         "output_cost_per_token": 0.000005, 
@@ -2439,7 +2444,8 @@
         "supports_response_schema": true,
         "supports_vision": true,
         "supports_prompt_caching": true,
-        "supports_tool_choice": true
+        "supports_tool_choice": true,
+        "deprecation_date": "2025-08-20"
     },
     "azure/us/gpt-4o-2024-08-06": {
         "max_tokens": 16384,
@@ -2479,13 +2485,15 @@
         "max_output_tokens": 16384,
         "input_cost_per_token": 0.0000025,
         "output_cost_per_token": 0.000010,
+        "cache_read_input_token_cost": 0.00000125,
         "litellm_provider": "azure",
         "mode": "chat",
         "supports_function_calling": true,
         "supports_parallel_function_calling": true,
         "supports_response_schema": true,
         "supports_vision": true,
-        "supports_tool_choice": true
+        "supports_tool_choice": true,
+        "deprecation_date": "2025-12-20"
     },
     "azure/global-standard/gpt-4o-mini": {
         "max_tokens": 16384,
@@ -5349,14 +5357,14 @@
         "input_cost_per_image": 0,
         "input_cost_per_video_per_second": 0,
         "input_cost_per_audio_per_second": 0,
-        "input_cost_per_token": 0,
+        "input_cost_per_token": 0.00000015,
         "input_cost_per_character": 0, 
         "input_cost_per_token_above_128k_tokens": 0, 
         "input_cost_per_character_above_128k_tokens": 0, 
         "input_cost_per_image_above_128k_tokens": 0,
         "input_cost_per_video_per_second_above_128k_tokens": 0,
         "input_cost_per_audio_per_second_above_128k_tokens": 0,
-        "output_cost_per_token": 0,
+        "output_cost_per_token": 0.0000006,
         "output_cost_per_character": 0,
         "output_cost_per_token_above_128k_tokens": 0,
         "output_cost_per_character_above_128k_tokens": 0,
@@ -5395,7 +5403,8 @@
         "supports_tool_choice": true,
         "supported_modalities": ["text", "image", "audio", "video"],
         "supported_output_modalities": ["text", "image"],
-        "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing"
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing",
+        "deprecation_date": "2026-02-05"
     },
     "gemini-2.0-flash-thinking-exp": {
         "max_tokens": 8192,
@@ -5599,7 +5608,8 @@
         "supported_modalities": ["text", "image", "audio", "video"],
         "supported_output_modalities": ["text"],
         "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash",
-        "supports_tool_choice": true
+        "supports_tool_choice": true,
+        "deprecation_date": "2026-02-25"
     },
     "gemini-2.5-pro-preview-03-25": {
         "max_tokens": 65536,

diff --git a/litellm/proxy/management_endpoints/key_management_endpoints.py b/litellm/proxy/management_endpoints/key_management_endpoints.py
@@ -1861,6 +1861,7 @@ async def validate_key_list_check(
     team_id: Optional[str],
     organization_id: Optional[str],
     key_alias: Optional[str],
+    key_hash: Optional[str],
     prisma_client: PrismaClient,
 ) -> Optional[LiteLLM_UserTable]:
     if user_api_key_dict.user_role == LitellmUserRoles.PROXY_ADMIN.value:
@@ -1924,6 +1925,31 @@ async def validate_key_list_check(
                 param="organization_id",
                 code=status.HTTP_403_FORBIDDEN,
             )
+
+    if key_hash:
+        try:
+            key_info = await prisma_client.db.litellm_verificationtoken.find_unique(
+                where={"token": key_hash},
+            )
+        except Exception:
+            raise ProxyException(
+                message="Key Hash not found.",
+                type=ProxyErrorTypes.bad_request_error,
+                param="key_hash",
+                code=status.HTTP_403_FORBIDDEN,
+            )
+        can_user_query_key_info = await _can_user_query_key_info(
+            user_api_key_dict=user_api_key_dict,
+            key=key_hash,
+            key_info=key_info,
+        )
+        if not can_user_query_key_info:
+            raise HTTPException(
+                status_code=status.HTTP_403_FORBIDDEN,
+                detail="You are not allowed to access this key's info. Your role={}".format(
+                    user_api_key_dict.user_role
+                ),
+            )
     return complete_user_info
 
 
@@ -1972,6 +1998,7 @@ async def list_keys(
     organization_id: Optional[str] = Query(
         None, description="Filter keys by organization ID"
     ),
+    key_hash: Optional[str] = Query(None, description="Filter keys by key hash"),
     key_alias: Optional[str] = Query(None, description="Filter keys by key alias"),
     return_full_object: bool = Query(False, description="Return full key object"),
     include_team_keys: bool = Query(
@@ -2004,6 +2031,7 @@ async def list_keys(
             team_id=team_id,
             organization_id=organization_id,
             key_alias=key_alias,
+            key_hash=key_hash,
             prisma_client=prisma_client,
         )
 
@@ -2029,6 +2057,7 @@ async def list_keys(
             user_id=user_id,
             team_id=team_id,
             key_alias=key_alias,
+            key_hash=key_hash,
             return_full_object=return_full_object,
             organization_id=organization_id,
             admin_team_ids=admin_team_ids,
@@ -2065,6 +2094,7 @@ async def _list_key_helper(
     team_id: Optional[str],
     organization_id: Optional[str],
     key_alias: Optional[str],
+    key_hash: Optional[str],
     exclude_team_id: Optional[str] = None,
     return_full_object: bool = False,
     admin_team_ids: Optional[
@@ -2111,6 +2141,8 @@ async def _list_key_helper(
         user_condition["team_id"] = {"not": exclude_team_id}
     if organization_id and isinstance(organization_id, str):
         user_condition["organization_id"] = organization_id
+    if key_hash and isinstance(key_hash, str):
+        user_condition["token"] = key_hash
 
     if user_condition:
         or_conditions.append(user_condition)

diff --git a/tests/litellm/proxy/management_endpoints/test_key_management_endpoints.py b/tests/litellm/proxy/management_endpoints/test_key_management_endpoints.py
@@ -30,6 +30,7 @@ async def test_list_keys():
         "team_id": None,
         "organization_id": None,
         "key_alias": None,
+        "key_hash": None,
         "exclude_team_id": None,
         "return_full_object": True,
         "admin_team_ids": ["28bd3181-02c5-48f2-b408-ce790fb3d5ba"],

diff --git a/tests/proxy_admin_ui_tests/test_key_management.py b/tests/proxy_admin_ui_tests/test_key_management.py
@@ -989,6 +989,7 @@ async def test_list_key_helper(prisma_client):
         user_id=None,
         team_id=None,
         key_alias=None,
+        key_hash=None,
         organization_id=None,
     )
     assert len(result["keys"]) == 2, "Should return exactly 2 keys"
@@ -1004,6 +1005,7 @@ async def test_list_key_helper(prisma_client):
         user_id=test_user_id,
         team_id=None,
         key_alias=None,
+        key_hash=None,
         organization_id=None,
     )
     assert len(result["keys"]) == 3, "Should return exactly 3 keys for test user"
@@ -1016,6 +1018,7 @@ async def test_list_key_helper(prisma_client):
         user_id=None,
         team_id=test_team_id,
         key_alias=None,
+        key_hash=None,
         organization_id=None,
     )
     assert len(result["keys"]) == 2, "Should return exactly 2 keys for test team"
@@ -1028,6 +1031,7 @@ async def test_list_key_helper(prisma_client):
         user_id=None,
         team_id=None,
         key_alias=test_key_alias,
+        key_hash=None,
         organization_id=None,
     )
     assert len(result["keys"]) == 1, "Should return exactly 1 key with test alias"
@@ -1040,6 +1044,7 @@ async def test_list_key_helper(prisma_client):
         user_id=test_user_id,
         team_id=None,
         key_alias=None,
+        key_hash=None,
         return_full_object=True,
         organization_id=None,
     )
@@ -1141,6 +1146,7 @@ async def test_list_key_helper_team_filtering(prisma_client):
                 user_id=None,
                 team_id=None,
                 key_alias=None,
+                key_hash=None,
                 return_full_object=True,
                 organization_id=None,
             )