Litellm dev 06 03 2025 p3 (#11388)

krrishdholakia · web-flow · commit e74ff23b458d · 2025-06-03T22:35:51.000-07:00
* fix(vertex_ai/common_utils.py): Close #11383 * feat(anthropic/batches): transformation.py new transformation config for anthropic batches * feat(anthropic/batches): working token tracking for anthropic batch calls via `/anthropic` passthrough route * fix(anthropic_passthrough_logging_handler.py): ruff check fixes
diff --git a/litellm/cost_calculator.py b/litellm/cost_calculator.py
@@ -1209,28 +1209,7 @@ def batch_cost_calculator(
     return total_prompt_cost, total_completion_cost
 
 
-class RealtimeAPITokenUsageProcessor:
-    @staticmethod
-    def collect_usage_from_realtime_stream_results(
-        results: OpenAIRealtimeStreamList,
-    ) -> List[Usage]:
-        """
-        Collect usage from realtime stream results
-        """
-        response_done_events: List[OpenAIRealtimeStreamResponseBaseObject] = cast(
-            List[OpenAIRealtimeStreamResponseBaseObject],
-            [result for result in results if result["type"] == "response.done"],
-        )
-        usage_objects: List[Usage] = []
-        for result in response_done_events:
-            usage_object = (
-                ResponseAPILoggingUtils._transform_response_api_usage_to_chat_usage(
-                    result["response"].get("usage", {})
-                )
-            )
-            usage_objects.append(usage_object)
-        return usage_objects
-
+class BaseTokenUsageProcessor:
     @staticmethod
     def combine_usage_objects(usage_objects: List[Usage]) -> Usage:
         """
@@ -1266,13 +1245,17 @@ def combine_usage_objects(usage_objects: List[Usage]) -> Usage:
                     combined.prompt_tokens_details = PromptTokensDetailsWrapper()
 
                 # Check what keys exist in the model's prompt_tokens_details
-                for attr in dir(usage.prompt_tokens_details):
-                    if not attr.startswith("_") and not callable(
-                        getattr(usage.prompt_tokens_details, attr)
+                for attr in usage.prompt_tokens_details.model_fields:
+                    if (
+                        hasattr(usage.prompt_tokens_details, attr)
+                        and not attr.startswith("_")
+                        and not callable(getattr(usage.prompt_tokens_details, attr))
                     ):
-                        current_val = getattr(combined.prompt_tokens_details, attr, 0)
-                        new_val = getattr(usage.prompt_tokens_details, attr, 0)
-                        if new_val is not None:
+                        current_val = (
+                            getattr(combined.prompt_tokens_details, attr, 0) or 0
+                        )
+                        new_val = getattr(usage.prompt_tokens_details, attr, 0) or 0
+                        if new_val is not None and isinstance(new_val, (int, float)):
                             setattr(
                                 combined.prompt_tokens_details,
                                 attr,
@@ -1308,6 +1291,29 @@ def combine_usage_objects(usage_objects: List[Usage]) -> Usage:
 
         return combined
 
+
+class RealtimeAPITokenUsageProcessor(BaseTokenUsageProcessor):
+    @staticmethod
+    def collect_usage_from_realtime_stream_results(
+        results: OpenAIRealtimeStreamList,
+    ) -> List[Usage]:
+        """
+        Collect usage from realtime stream results
+        """
+        response_done_events: List[OpenAIRealtimeStreamResponseBaseObject] = cast(
+            List[OpenAIRealtimeStreamResponseBaseObject],
+            [result for result in results if result["type"] == "response.done"],
+        )
+        usage_objects: List[Usage] = []
+        for result in response_done_events:
+            usage_object = (
+                ResponseAPILoggingUtils._transform_response_api_usage_to_chat_usage(
+                    result["response"].get("usage", {})
+                )
+            )
+            usage_objects.append(usage_object)
+        return usage_objects
+
     @staticmethod
     def collect_and_combine_usage_from_realtime_stream_results(
         results: OpenAIRealtimeStreamList,
diff --git a/litellm/llms/anthropic/__init__.py b/litellm/llms/anthropic/__init__.py
@@ -0,0 +1,15 @@
+from typing import Type, Union
+
+from .batches.transformation import AnthropicBatchesConfig
+from .chat.transformation import AnthropicConfig
+
+__all__ = ["AnthropicBatchesConfig", "AnthropicConfig"]
+
+
+def get_anthropic_config(
+    url_route: str,
+) -> Union[Type[AnthropicBatchesConfig], Type[AnthropicConfig]]:
+    if "messages/batches" in url_route and "results" in url_route:
+        return AnthropicBatchesConfig
+    else:
+        return AnthropicConfig
diff --git a/litellm/llms/anthropic/batches/transformation.py b/litellm/llms/anthropic/batches/transformation.py
@@ -0,0 +1,76 @@
+import json
+from typing import TYPE_CHECKING, Any, Dict, List, Optional, cast
+
+from httpx import Response
+
+from litellm.types.llms.openai import AllMessageValues
+from litellm.utils import ModelResponse
+
+if TYPE_CHECKING:
+    from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
+
+    LoggingClass = LiteLLMLoggingObj
+else:
+    LoggingClass = Any
+
+
+class AnthropicBatchesConfig:
+    def __init__(self):
+        from ..chat.transformation import AnthropicConfig
+
+        self.anthropic_chat_config = AnthropicConfig()  # initialize once
+
+    def transform_response(
+        self,
+        model: str,
+        raw_response: Response,
+        model_response: ModelResponse,
+        logging_obj: LoggingClass,
+        request_data: Dict,
+        messages: List[AllMessageValues],
+        optional_params: Dict,
+        litellm_params: dict,
+        encoding: Any,
+        api_key: Optional[str] = None,
+        json_mode: Optional[bool] = None,
+    ) -> ModelResponse:
+        from litellm.cost_calculator import BaseTokenUsageProcessor
+        from litellm.types.utils import Usage
+
+        response_text = raw_response.text.strip()
+        all_usage: List[Usage] = []
+
+        try:
+            # Split by newlines and try to parse each line as JSON
+            lines = response_text.split("\n")
+            for line in lines:
+                line = line.strip()
+                if not line:
+                    continue
+                try:
+                    response_json = json.loads(line)
+                    # Update model_response with the parsed JSON
+                    completion_response = response_json["result"]["message"]
+                    transformed_response = (
+                        self.anthropic_chat_config.transform_parsed_response(
+                            completion_response=completion_response,
+                            raw_response=raw_response,
+                            model_response=model_response,
+                        )
+                    )
+
+                    transformed_response_usage = getattr(
+                        transformed_response, "usage", None
+                    )
+                    if transformed_response_usage:
+                        all_usage.append(cast(Usage, transformed_response_usage))
+                except json.JSONDecodeError:
+                    continue
+
+            ## SUM ALL USAGE
+            combined_usage = BaseTokenUsageProcessor.combine_usage_objects(all_usage)
+            setattr(model_response, "usage", combined_usage)
+
+            return model_response
+        except Exception as e:
+            raise e
diff --git a/litellm/llms/anthropic/chat/transformation.py b/litellm/llms/anthropic/chat/transformation.py
@@ -784,44 +784,17 @@ def calculate_usage(
         )
         return usage
 
-    def transform_response(
+    def transform_parsed_response(
         self,
-        model: str,
+        completion_response: dict,
         raw_response: httpx.Response,
         model_response: ModelResponse,
-        logging_obj: LoggingClass,
-        request_data: Dict,
-        messages: List[AllMessageValues],
-        optional_params: Dict,
-        litellm_params: dict,
-        encoding: Any,
-        api_key: Optional[str] = None,
         json_mode: Optional[bool] = None,
-    ) -> ModelResponse:
+    ):
         _hidden_params: Dict = {}
         _hidden_params["additional_headers"] = process_anthropic_headers(
             dict(raw_response.headers)
         )
-        ## LOGGING
-        logging_obj.post_call(
-            input=messages,
-            api_key=api_key,
-            original_response=raw_response.text,
-            additional_args={"complete_input_dict": request_data},
-        )
-
-        ## RESPONSE OBJECT
-        try:
-            completion_response = raw_response.json()
-        except Exception as e:
-            response_headers = getattr(raw_response, "headers", None)
-            raise AnthropicError(
-                message="Unable to get json response - {}, Original Response: {}".format(
-                    str(e), raw_response.text
-                ),
-                status_code=raw_response.status_code,
-                headers=response_headers,
-            )
         if "error" in completion_response:
             response_headers = getattr(raw_response, "headers", None)
             raise AnthropicError(
@@ -890,6 +863,50 @@ def transform_response(
         model_response.model = completion_response["model"]
 
         model_response._hidden_params = _hidden_params
+
+        return model_response
+
+    def transform_response(
+        self,
+        model: str,
+        raw_response: httpx.Response,
+        model_response: ModelResponse,
+        logging_obj: LoggingClass,
+        request_data: Dict,
+        messages: List[AllMessageValues],
+        optional_params: Dict,
+        litellm_params: dict,
+        encoding: Any,
+        api_key: Optional[str] = None,
+        json_mode: Optional[bool] = None,
+    ) -> ModelResponse:
+        ## LOGGING
+        logging_obj.post_call(
+            input=messages,
+            api_key=api_key,
+            original_response=raw_response.text,
+            additional_args={"complete_input_dict": request_data},
+        )
+
+        ## RESPONSE OBJECT
+        try:
+            completion_response = raw_response.json()
+        except Exception as e:
+            response_headers = getattr(raw_response, "headers", None)
+            raise AnthropicError(
+                message="Unable to get json response - {}, Original Response: {}".format(
+                    str(e), raw_response.text
+                ),
+                status_code=raw_response.status_code,
+                headers=response_headers,
+            )
+
+        model_response = self.transform_parsed_response(
+            completion_response=completion_response,
+            raw_response=raw_response,
+            model_response=model_response,
+            json_mode=json_mode,
+        )
         return model_response
 
     @staticmethod
diff --git a/litellm/llms/vertex_ai/common_utils.py b/litellm/llms/vertex_ai/common_utils.py
@@ -223,16 +223,20 @@ def _filter_anyof_fields(schema_dict: Dict[str, Any]) -> Dict[str, Any]:
     E.g. {"anyOf": [{"type": "string"}, {"type": "null"}], "default": "test", "title": "test"} -> {"anyOf": [{"type": "string", "title": "test"}, {"type": "null", "title": "test"}]}
     """
     title = schema_dict.get("title", None)
+    description = schema_dict.get("description", None)
 
     if isinstance(schema_dict, dict) and schema_dict.get("anyOf"):
         any_of = schema_dict["anyOf"]
         if (
-            title
+            (title or description)
             and isinstance(any_of, list)
             and all(isinstance(item, dict) for item in any_of)
         ):
             for item in any_of:
-                item["title"] = title
+                if title:
+                    item["title"] = title
+                if description:
+                    item["description"] = description
             return {"anyOf": any_of}
         else:
             return schema_dict
diff --git a/litellm/proxy/pass_through_endpoints/llm_provider_handlers/anthropic_passthrough_logging_handler.py b/litellm/proxy/pass_through_endpoints/llm_provider_handlers/anthropic_passthrough_logging_handler.py
@@ -7,10 +7,10 @@
 import litellm
 from litellm._logging import verbose_proxy_logger
 from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
+from litellm.llms.anthropic import get_anthropic_config
 from litellm.llms.anthropic.chat.handler import (
     ModelResponseIterator as AnthropicModelResponseIterator,
 )
-from litellm.llms.anthropic.chat.transformation import AnthropicConfig
 from litellm.proxy._types import PassThroughEndpointLoggingTypedDict
 from litellm.proxy.auth.auth_utils import get_end_user_id_from_request_body
 from litellm.types.passthrough_endpoints.pass_through_endpoints import (
@@ -43,7 +43,8 @@ def anthropic_passthrough_handler(
         Transforms Anthropic response to OpenAI response, generates a standard logging object so downstream logging can be handled
         """
         model = response_body.get("model", "")
-        litellm_model_response: ModelResponse = AnthropicConfig().transform_response(
+        anthropic_config = get_anthropic_config(url_route)
+        litellm_model_response: ModelResponse = anthropic_config().transform_response(
             raw_response=httpx_response,
             model_response=litellm.ModelResponse(),
             model=model,
@@ -124,9 +125,9 @@ def _create_anthropic_response_logging_payload(
             litellm_model_response.id = logging_obj.litellm_call_id
             litellm_model_response.model = model
             logging_obj.model_call_details["model"] = model
-            logging_obj.model_call_details["custom_llm_provider"] = (
-                litellm.LlmProviders.ANTHROPIC.value
-            )
+            logging_obj.model_call_details[
+                "custom_llm_provider"
+            ] = litellm.LlmProviders.ANTHROPIC.value
             return kwargs
         except Exception as e:
             verbose_proxy_logger.exception(