Skip to content

Commit e74ff23

Browse files
Litellm dev 06 03 2025 p3 (#11388)
* fix(vertex_ai/common_utils.py): Close #11383 * feat(anthropic/batches): transformation.py new transformation config for anthropic batches * feat(anthropic/batches): working token tracking for anthropic batch calls via `/anthropic` passthrough route * fix(anthropic_passthrough_logging_handler.py): ruff check fixes
1 parent b21efd4 commit e74ff23

File tree

6 files changed

+184
-65
lines changed

6 files changed

+184
-65
lines changed

litellm/cost_calculator.py

Lines changed: 34 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1209,28 +1209,7 @@ def batch_cost_calculator(
12091209
return total_prompt_cost, total_completion_cost
12101210

12111211

1212-
class RealtimeAPITokenUsageProcessor:
1213-
@staticmethod
1214-
def collect_usage_from_realtime_stream_results(
1215-
results: OpenAIRealtimeStreamList,
1216-
) -> List[Usage]:
1217-
"""
1218-
Collect usage from realtime stream results
1219-
"""
1220-
response_done_events: List[OpenAIRealtimeStreamResponseBaseObject] = cast(
1221-
List[OpenAIRealtimeStreamResponseBaseObject],
1222-
[result for result in results if result["type"] == "response.done"],
1223-
)
1224-
usage_objects: List[Usage] = []
1225-
for result in response_done_events:
1226-
usage_object = (
1227-
ResponseAPILoggingUtils._transform_response_api_usage_to_chat_usage(
1228-
result["response"].get("usage", {})
1229-
)
1230-
)
1231-
usage_objects.append(usage_object)
1232-
return usage_objects
1233-
1212+
class BaseTokenUsageProcessor:
12341213
@staticmethod
12351214
def combine_usage_objects(usage_objects: List[Usage]) -> Usage:
12361215
"""
@@ -1266,13 +1245,17 @@ def combine_usage_objects(usage_objects: List[Usage]) -> Usage:
12661245
combined.prompt_tokens_details = PromptTokensDetailsWrapper()
12671246

12681247
# Check what keys exist in the model's prompt_tokens_details
1269-
for attr in dir(usage.prompt_tokens_details):
1270-
if not attr.startswith("_") and not callable(
1271-
getattr(usage.prompt_tokens_details, attr)
1248+
for attr in usage.prompt_tokens_details.model_fields:
1249+
if (
1250+
hasattr(usage.prompt_tokens_details, attr)
1251+
and not attr.startswith("_")
1252+
and not callable(getattr(usage.prompt_tokens_details, attr))
12721253
):
1273-
current_val = getattr(combined.prompt_tokens_details, attr, 0)
1274-
new_val = getattr(usage.prompt_tokens_details, attr, 0)
1275-
if new_val is not None:
1254+
current_val = (
1255+
getattr(combined.prompt_tokens_details, attr, 0) or 0
1256+
)
1257+
new_val = getattr(usage.prompt_tokens_details, attr, 0) or 0
1258+
if new_val is not None and isinstance(new_val, (int, float)):
12761259
setattr(
12771260
combined.prompt_tokens_details,
12781261
attr,
@@ -1308,6 +1291,29 @@ def combine_usage_objects(usage_objects: List[Usage]) -> Usage:
13081291

13091292
return combined
13101293

1294+
1295+
class RealtimeAPITokenUsageProcessor(BaseTokenUsageProcessor):
1296+
@staticmethod
1297+
def collect_usage_from_realtime_stream_results(
1298+
results: OpenAIRealtimeStreamList,
1299+
) -> List[Usage]:
1300+
"""
1301+
Collect usage from realtime stream results
1302+
"""
1303+
response_done_events: List[OpenAIRealtimeStreamResponseBaseObject] = cast(
1304+
List[OpenAIRealtimeStreamResponseBaseObject],
1305+
[result for result in results if result["type"] == "response.done"],
1306+
)
1307+
usage_objects: List[Usage] = []
1308+
for result in response_done_events:
1309+
usage_object = (
1310+
ResponseAPILoggingUtils._transform_response_api_usage_to_chat_usage(
1311+
result["response"].get("usage", {})
1312+
)
1313+
)
1314+
usage_objects.append(usage_object)
1315+
return usage_objects
1316+
13111317
@staticmethod
13121318
def collect_and_combine_usage_from_realtime_stream_results(
13131319
results: OpenAIRealtimeStreamList,

litellm/llms/anthropic/__init__.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
from typing import Type, Union
2+
3+
from .batches.transformation import AnthropicBatchesConfig
4+
from .chat.transformation import AnthropicConfig
5+
6+
__all__ = ["AnthropicBatchesConfig", "AnthropicConfig"]
7+
8+
9+
def get_anthropic_config(
10+
url_route: str,
11+
) -> Union[Type[AnthropicBatchesConfig], Type[AnthropicConfig]]:
12+
if "messages/batches" in url_route and "results" in url_route:
13+
return AnthropicBatchesConfig
14+
else:
15+
return AnthropicConfig
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
import json
2+
from typing import TYPE_CHECKING, Any, Dict, List, Optional, cast
3+
4+
from httpx import Response
5+
6+
from litellm.types.llms.openai import AllMessageValues
7+
from litellm.utils import ModelResponse
8+
9+
if TYPE_CHECKING:
10+
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
11+
12+
LoggingClass = LiteLLMLoggingObj
13+
else:
14+
LoggingClass = Any
15+
16+
17+
class AnthropicBatchesConfig:
18+
def __init__(self):
19+
from ..chat.transformation import AnthropicConfig
20+
21+
self.anthropic_chat_config = AnthropicConfig() # initialize once
22+
23+
def transform_response(
24+
self,
25+
model: str,
26+
raw_response: Response,
27+
model_response: ModelResponse,
28+
logging_obj: LoggingClass,
29+
request_data: Dict,
30+
messages: List[AllMessageValues],
31+
optional_params: Dict,
32+
litellm_params: dict,
33+
encoding: Any,
34+
api_key: Optional[str] = None,
35+
json_mode: Optional[bool] = None,
36+
) -> ModelResponse:
37+
from litellm.cost_calculator import BaseTokenUsageProcessor
38+
from litellm.types.utils import Usage
39+
40+
response_text = raw_response.text.strip()
41+
all_usage: List[Usage] = []
42+
43+
try:
44+
# Split by newlines and try to parse each line as JSON
45+
lines = response_text.split("\n")
46+
for line in lines:
47+
line = line.strip()
48+
if not line:
49+
continue
50+
try:
51+
response_json = json.loads(line)
52+
# Update model_response with the parsed JSON
53+
completion_response = response_json["result"]["message"]
54+
transformed_response = (
55+
self.anthropic_chat_config.transform_parsed_response(
56+
completion_response=completion_response,
57+
raw_response=raw_response,
58+
model_response=model_response,
59+
)
60+
)
61+
62+
transformed_response_usage = getattr(
63+
transformed_response, "usage", None
64+
)
65+
if transformed_response_usage:
66+
all_usage.append(cast(Usage, transformed_response_usage))
67+
except json.JSONDecodeError:
68+
continue
69+
70+
## SUM ALL USAGE
71+
combined_usage = BaseTokenUsageProcessor.combine_usage_objects(all_usage)
72+
setattr(model_response, "usage", combined_usage)
73+
74+
return model_response
75+
except Exception as e:
76+
raise e

litellm/llms/anthropic/chat/transformation.py

Lines changed: 47 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -784,44 +784,17 @@ def calculate_usage(
784784
)
785785
return usage
786786

787-
def transform_response(
787+
def transform_parsed_response(
788788
self,
789-
model: str,
789+
completion_response: dict,
790790
raw_response: httpx.Response,
791791
model_response: ModelResponse,
792-
logging_obj: LoggingClass,
793-
request_data: Dict,
794-
messages: List[AllMessageValues],
795-
optional_params: Dict,
796-
litellm_params: dict,
797-
encoding: Any,
798-
api_key: Optional[str] = None,
799792
json_mode: Optional[bool] = None,
800-
) -> ModelResponse:
793+
):
801794
_hidden_params: Dict = {}
802795
_hidden_params["additional_headers"] = process_anthropic_headers(
803796
dict(raw_response.headers)
804797
)
805-
## LOGGING
806-
logging_obj.post_call(
807-
input=messages,
808-
api_key=api_key,
809-
original_response=raw_response.text,
810-
additional_args={"complete_input_dict": request_data},
811-
)
812-
813-
## RESPONSE OBJECT
814-
try:
815-
completion_response = raw_response.json()
816-
except Exception as e:
817-
response_headers = getattr(raw_response, "headers", None)
818-
raise AnthropicError(
819-
message="Unable to get json response - {}, Original Response: {}".format(
820-
str(e), raw_response.text
821-
),
822-
status_code=raw_response.status_code,
823-
headers=response_headers,
824-
)
825798
if "error" in completion_response:
826799
response_headers = getattr(raw_response, "headers", None)
827800
raise AnthropicError(
@@ -890,6 +863,50 @@ def transform_response(
890863
model_response.model = completion_response["model"]
891864

892865
model_response._hidden_params = _hidden_params
866+
867+
return model_response
868+
869+
def transform_response(
870+
self,
871+
model: str,
872+
raw_response: httpx.Response,
873+
model_response: ModelResponse,
874+
logging_obj: LoggingClass,
875+
request_data: Dict,
876+
messages: List[AllMessageValues],
877+
optional_params: Dict,
878+
litellm_params: dict,
879+
encoding: Any,
880+
api_key: Optional[str] = None,
881+
json_mode: Optional[bool] = None,
882+
) -> ModelResponse:
883+
## LOGGING
884+
logging_obj.post_call(
885+
input=messages,
886+
api_key=api_key,
887+
original_response=raw_response.text,
888+
additional_args={"complete_input_dict": request_data},
889+
)
890+
891+
## RESPONSE OBJECT
892+
try:
893+
completion_response = raw_response.json()
894+
except Exception as e:
895+
response_headers = getattr(raw_response, "headers", None)
896+
raise AnthropicError(
897+
message="Unable to get json response - {}, Original Response: {}".format(
898+
str(e), raw_response.text
899+
),
900+
status_code=raw_response.status_code,
901+
headers=response_headers,
902+
)
903+
904+
model_response = self.transform_parsed_response(
905+
completion_response=completion_response,
906+
raw_response=raw_response,
907+
model_response=model_response,
908+
json_mode=json_mode,
909+
)
893910
return model_response
894911

895912
@staticmethod

litellm/llms/vertex_ai/common_utils.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -223,16 +223,20 @@ def _filter_anyof_fields(schema_dict: Dict[str, Any]) -> Dict[str, Any]:
223223
E.g. {"anyOf": [{"type": "string"}, {"type": "null"}], "default": "test", "title": "test"} -> {"anyOf": [{"type": "string", "title": "test"}, {"type": "null", "title": "test"}]}
224224
"""
225225
title = schema_dict.get("title", None)
226+
description = schema_dict.get("description", None)
226227

227228
if isinstance(schema_dict, dict) and schema_dict.get("anyOf"):
228229
any_of = schema_dict["anyOf"]
229230
if (
230-
title
231+
(title or description)
231232
and isinstance(any_of, list)
232233
and all(isinstance(item, dict) for item in any_of)
233234
):
234235
for item in any_of:
235-
item["title"] = title
236+
if title:
237+
item["title"] = title
238+
if description:
239+
item["description"] = description
236240
return {"anyOf": any_of}
237241
else:
238242
return schema_dict

litellm/proxy/pass_through_endpoints/llm_provider_handlers/anthropic_passthrough_logging_handler.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,10 @@
77
import litellm
88
from litellm._logging import verbose_proxy_logger
99
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
10+
from litellm.llms.anthropic import get_anthropic_config
1011
from litellm.llms.anthropic.chat.handler import (
1112
ModelResponseIterator as AnthropicModelResponseIterator,
1213
)
13-
from litellm.llms.anthropic.chat.transformation import AnthropicConfig
1414
from litellm.proxy._types import PassThroughEndpointLoggingTypedDict
1515
from litellm.proxy.auth.auth_utils import get_end_user_id_from_request_body
1616
from litellm.types.passthrough_endpoints.pass_through_endpoints import (
@@ -43,7 +43,8 @@ def anthropic_passthrough_handler(
4343
Transforms Anthropic response to OpenAI response, generates a standard logging object so downstream logging can be handled
4444
"""
4545
model = response_body.get("model", "")
46-
litellm_model_response: ModelResponse = AnthropicConfig().transform_response(
46+
anthropic_config = get_anthropic_config(url_route)
47+
litellm_model_response: ModelResponse = anthropic_config().transform_response(
4748
raw_response=httpx_response,
4849
model_response=litellm.ModelResponse(),
4950
model=model,
@@ -124,9 +125,9 @@ def _create_anthropic_response_logging_payload(
124125
litellm_model_response.id = logging_obj.litellm_call_id
125126
litellm_model_response.model = model
126127
logging_obj.model_call_details["model"] = model
127-
logging_obj.model_call_details["custom_llm_provider"] = (
128-
litellm.LlmProviders.ANTHROPIC.value
129-
)
128+
logging_obj.model_call_details[
129+
"custom_llm_provider"
130+
] = litellm.LlmProviders.ANTHROPIC.value
130131
return kwargs
131132
except Exception as e:
132133
verbose_proxy_logger.exception(

0 commit comments

Comments
 (0)