24
24
import litellm .litellm_core_utils
25
25
import litellm .litellm_core_utils .litellm_logging
26
26
from litellm import verbose_logger
27
+ from litellm .constants import (
28
+ DEFAULT_REASONING_EFFORT_HIGH_THINKING_BUDGET ,
29
+ DEFAULT_REASONING_EFFORT_LOW_THINKING_BUDGET ,
30
+ DEFAULT_REASONING_EFFORT_MEDIUM_THINKING_BUDGET ,
31
+ )
27
32
from litellm .litellm_core_utils .core_helpers import map_finish_reason
28
33
from litellm .llms .base_llm .chat .transformation import BaseConfig , BaseLLMException
29
34
from litellm .llms .custom_httpx .http_handler import (
30
35
AsyncHTTPHandler ,
31
36
HTTPHandler ,
32
37
get_async_httpx_client ,
33
38
)
39
+ from litellm .types .llms .anthropic import AnthropicThinkingParam
34
40
from litellm .types .llms .openai import (
35
41
AllMessageValues ,
36
42
ChatCompletionResponseMessage ,
45
51
ContentType ,
46
52
FunctionCallingConfig ,
47
53
FunctionDeclaration ,
54
+ GeminiThinkingConfig ,
48
55
GenerateContentResponseBody ,
49
56
HttpxPartType ,
50
57
LogprobsResult ,
59
66
TopLogprob ,
60
67
Usage ,
61
68
)
62
- from litellm .utils import CustomStreamWrapper , ModelResponse
69
+ from litellm .utils import CustomStreamWrapper , ModelResponse , supports_reasoning
63
70
64
71
from ....utils import _remove_additional_properties , _remove_strict_from_schema
65
72
from ..common_utils import VertexAIError , _build_vertex_schema
@@ -190,7 +197,7 @@ def get_config(cls):
190
197
return super ().get_config ()
191
198
192
199
def get_supported_openai_params (self , model : str ) -> List [str ]:
193
- return [
200
+ supported_params = [
194
201
"temperature" ,
195
202
"top_p" ,
196
203
"max_tokens" ,
@@ -210,6 +217,10 @@ def get_supported_openai_params(self, model: str) -> List[str]:
210
217
"top_logprobs" ,
211
218
"modalities" ,
212
219
]
220
+ if supports_reasoning (model ):
221
+ supported_params .append ("reasoning_effort" )
222
+ supported_params .append ("thinking" )
223
+ return supported_params
213
224
214
225
def map_tool_choice_values (
215
226
self , model : str , tool_choice : Union [str , dict ]
@@ -313,10 +324,14 @@ def _map_response_schema(self, value: dict) -> dict:
313
324
if isinstance (old_schema , list ):
314
325
for item in old_schema :
315
326
if isinstance (item , dict ):
316
- item = _build_vertex_schema (parameters = item , add_property_ordering = True )
327
+ item = _build_vertex_schema (
328
+ parameters = item , add_property_ordering = True
329
+ )
317
330
318
331
elif isinstance (old_schema , dict ):
319
- old_schema = _build_vertex_schema (parameters = old_schema , add_property_ordering = True )
332
+ old_schema = _build_vertex_schema (
333
+ parameters = old_schema , add_property_ordering = True
334
+ )
320
335
return old_schema
321
336
322
337
def apply_response_schema_transformation (self , value : dict , optional_params : dict ):
@@ -343,6 +358,43 @@ def apply_response_schema_transformation(self, value: dict, optional_params: dic
343
358
value = optional_params ["response_schema" ]
344
359
)
345
360
361
+ @staticmethod
362
+ def _map_reasoning_effort_to_thinking_budget (
363
+ reasoning_effort : str ,
364
+ ) -> GeminiThinkingConfig :
365
+ if reasoning_effort == "low" :
366
+ return {
367
+ "thinkingBudget" : DEFAULT_REASONING_EFFORT_LOW_THINKING_BUDGET ,
368
+ "includeThoughts" : True ,
369
+ }
370
+ elif reasoning_effort == "medium" :
371
+ return {
372
+ "thinkingBudget" : DEFAULT_REASONING_EFFORT_MEDIUM_THINKING_BUDGET ,
373
+ "includeThoughts" : True ,
374
+ }
375
+ elif reasoning_effort == "high" :
376
+ return {
377
+ "thinkingBudget" : DEFAULT_REASONING_EFFORT_HIGH_THINKING_BUDGET ,
378
+ "includeThoughts" : True ,
379
+ }
380
+ else :
381
+ raise ValueError (f"Invalid reasoning effort: { reasoning_effort } " )
382
+
383
+ @staticmethod
384
+ def _map_thinking_param (
385
+ thinking_param : AnthropicThinkingParam ,
386
+ ) -> GeminiThinkingConfig :
387
+ thinking_enabled = thinking_param .get ("type" ) == "enabled"
388
+ thinking_budget = thinking_param .get ("budget_tokens" )
389
+
390
+ params : GeminiThinkingConfig = {}
391
+ if thinking_enabled :
392
+ params ["includeThoughts" ] = True
393
+ if thinking_budget :
394
+ params ["thinkingBudget" ] = thinking_budget
395
+
396
+ return params
397
+
346
398
def map_openai_params (
347
399
self ,
348
400
non_default_params : Dict ,
@@ -399,6 +451,16 @@ def map_openai_params(
399
451
optional_params ["tool_choice" ] = _tool_choice_value
400
452
elif param == "seed" :
401
453
optional_params ["seed" ] = value
454
+ elif param == "reasoning_effort" and isinstance (value , str ):
455
+ optional_params [
456
+ "thinkingConfig"
457
+ ] = VertexGeminiConfig ._map_reasoning_effort_to_thinking_budget (value )
458
+ elif param == "thinking" :
459
+ optional_params [
460
+ "thinkingConfig"
461
+ ] = VertexGeminiConfig ._map_thinking_param (
462
+ cast (AnthropicThinkingParam , value )
463
+ )
402
464
elif param == "modalities" and isinstance (value , list ):
403
465
response_modalities = []
404
466
for modality in value :
@@ -514,19 +576,27 @@ def translate_exception_str(self, exception_string: str):
514
576
515
577
def get_assistant_content_message (
516
578
self , parts : List [HttpxPartType ]
517
- ) -> Optional [str ]:
518
- _content_str = ""
579
+ ) -> Tuple [Optional [str ], Optional [str ]]:
580
+ content_str : Optional [str ] = None
581
+ reasoning_content_str : Optional [str ] = None
519
582
for part in parts :
583
+ _content_str = ""
520
584
if "text" in part :
521
585
_content_str += part ["text" ]
522
586
elif "inlineData" in part : # base64 encoded image
523
587
_content_str += "data:{};base64,{}" .format (
524
588
part ["inlineData" ]["mimeType" ], part ["inlineData" ]["data" ]
525
589
)
590
+ if part .get ("thought" ) is True :
591
+ if reasoning_content_str is None :
592
+ reasoning_content_str = ""
593
+ reasoning_content_str += _content_str
594
+ else :
595
+ if content_str is None :
596
+ content_str = ""
597
+ content_str += _content_str
526
598
527
- if _content_str :
528
- return _content_str
529
- return None
599
+ return content_str , reasoning_content_str
530
600
531
601
def _transform_parts (
532
602
self ,
@@ -677,6 +747,7 @@ def _calculate_usage(
677
747
audio_tokens : Optional [int ] = None
678
748
text_tokens : Optional [int ] = None
679
749
prompt_tokens_details : Optional [PromptTokensDetailsWrapper ] = None
750
+ reasoning_tokens : Optional [int ] = None
680
751
if "cachedContentTokenCount" in completion_response ["usageMetadata" ]:
681
752
cached_tokens = completion_response ["usageMetadata" ][
682
753
"cachedContentTokenCount"
@@ -687,7 +758,10 @@ def _calculate_usage(
687
758
audio_tokens = detail ["tokenCount" ]
688
759
elif detail ["modality" ] == "TEXT" :
689
760
text_tokens = detail ["tokenCount" ]
690
-
761
+ if "thoughtsTokenCount" in completion_response ["usageMetadata" ]:
762
+ reasoning_tokens = completion_response ["usageMetadata" ][
763
+ "thoughtsTokenCount"
764
+ ]
691
765
prompt_tokens_details = PromptTokensDetailsWrapper (
692
766
cached_tokens = cached_tokens ,
693
767
audio_tokens = audio_tokens ,
@@ -703,6 +777,7 @@ def _calculate_usage(
703
777
),
704
778
total_tokens = completion_response ["usageMetadata" ].get ("totalTokenCount" , 0 ),
705
779
prompt_tokens_details = prompt_tokens_details ,
780
+ reasoning_tokens = reasoning_tokens ,
706
781
)
707
782
708
783
return usage
@@ -731,11 +806,16 @@ def _process_candidates(self, _candidates, model_response, litellm_params):
731
806
citation_metadata .append (candidate ["citationMetadata" ])
732
807
733
808
if "parts" in candidate ["content" ]:
734
- chat_completion_message [
735
- "content"
736
- ] = VertexGeminiConfig ().get_assistant_content_message (
809
+ (
810
+ content ,
811
+ reasoning_content ,
812
+ ) = VertexGeminiConfig ().get_assistant_content_message (
737
813
parts = candidate ["content" ]["parts" ]
738
814
)
815
+ if content is not None :
816
+ chat_completion_message ["content" ] = content
817
+ if reasoning_content is not None :
818
+ chat_completion_message ["reasoning_content" ] = reasoning_content
739
819
740
820
functions , tools = self ._transform_parts (
741
821
parts = candidate ["content" ]["parts" ],
0 commit comments