BerriAI · krrishdholakia · May 7, 2025 · May 6, 2025 · May 6, 2025 · May 7, 2025
diff --git a/litellm/integrations/opentelemetry.py b/litellm/integrations/opentelemetry.py
@@ -417,7 +417,7 @@ def set_tools_attributes(self, span: Span, tools):
                 if not function:
                     continue
 
-                prefix = f"{SpanAttributes.LLM_REQUEST_FUNCTIONS}.{i}"
+                prefix = f"{SpanAttributes.LLM_REQUEST_FUNCTIONS.value}.{i}"
                 self.safe_set_attribute(
                     span=span,
                     key=f"{prefix}.name",
@@ -473,7 +473,7 @@ def _tool_calls_kv_pair(
                 _value = _function.get(key)
                 if _value:
                     kv_pairs[
-                        f"{SpanAttributes.LLM_COMPLETIONS}.{idx}.function_call.{key}"
+                        f"{SpanAttributes.LLM_COMPLETIONS.value}.{idx}.function_call.{key}"
                     ] = _value
 
         return kv_pairs
@@ -525,58 +525,58 @@ def set_attributes(  # noqa: PLR0915
             if kwargs.get("model"):
                 self.safe_set_attribute(
                     span=span,
-                    key=SpanAttributes.LLM_REQUEST_MODEL,
+                    key=SpanAttributes.LLM_REQUEST_MODEL.value,
                     value=kwargs.get("model"),
                 )
 
             # The LLM request type
             self.safe_set_attribute(
                 span=span,
-                key=SpanAttributes.LLM_REQUEST_TYPE,
+                key=SpanAttributes.LLM_REQUEST_TYPE.value,
                 value=standard_logging_payload["call_type"],
             )
 
             # The Generative AI Provider: Azure, OpenAI, etc.
             self.safe_set_attribute(
                 span=span,
-                key=SpanAttributes.LLM_SYSTEM,
+                key=SpanAttributes.LLM_SYSTEM.value,
                 value=litellm_params.get("custom_llm_provider", "Unknown"),
             )
 
             # The maximum number of tokens the LLM generates for a request.
             if optional_params.get("max_tokens"):
                 self.safe_set_attribute(
                     span=span,
-                    key=SpanAttributes.LLM_REQUEST_MAX_TOKENS,
+                    key=SpanAttributes.LLM_REQUEST_MAX_TOKENS.value,
                     value=optional_params.get("max_tokens"),
                 )
 
             # The temperature setting for the LLM request.
             if optional_params.get("temperature"):
                 self.safe_set_attribute(
                     span=span,
-                    key=SpanAttributes.LLM_REQUEST_TEMPERATURE,
+                    key=SpanAttributes.LLM_REQUEST_TEMPERATURE.value,
                     value=optional_params.get("temperature"),
                 )
 
             # The top_p sampling setting for the LLM request.
             if optional_params.get("top_p"):
                 self.safe_set_attribute(
                     span=span,
-                    key=SpanAttributes.LLM_REQUEST_TOP_P,
+                    key=SpanAttributes.LLM_REQUEST_TOP_P.value,
                     value=optional_params.get("top_p"),
                 )
 
             self.safe_set_attribute(
                 span=span,
-                key=SpanAttributes.LLM_IS_STREAMING,
+                key=SpanAttributes.LLM_IS_STREAMING.value,
                 value=str(optional_params.get("stream", False)),
             )
 
             if optional_params.get("user"):
                 self.safe_set_attribute(
                     span=span,
-                    key=SpanAttributes.LLM_USER,
+                    key=SpanAttributes.LLM_USER.value,
                     value=optional_params.get("user"),
                 )
 
@@ -590,29 +590,29 @@ def set_attributes(  # noqa: PLR0915
             if response_obj and response_obj.get("model"):
                 self.safe_set_attribute(
                     span=span,
-                    key=SpanAttributes.LLM_RESPONSE_MODEL,
+                    key=SpanAttributes.LLM_RESPONSE_MODEL.value,
                     value=response_obj.get("model"),
                 )
 
             usage = response_obj and response_obj.get("usage")
             if usage:
                 self.safe_set_attribute(
                     span=span,
-                    key=SpanAttributes.LLM_USAGE_TOTAL_TOKENS,
+                    key=SpanAttributes.LLM_USAGE_TOTAL_TOKENS.value,
                     value=usage.get("total_tokens"),
                 )
 
                 # The number of tokens used in the LLM response (completion).
                 self.safe_set_attribute(
                     span=span,
-                    key=SpanAttributes.LLM_USAGE_COMPLETION_TOKENS,
+                    key=SpanAttributes.LLM_USAGE_COMPLETION_TOKENS.value,
                     value=usage.get("completion_tokens"),
                 )
 
                 # The number of tokens used in the LLM prompt.
                 self.safe_set_attribute(
                     span=span,
-                    key=SpanAttributes.LLM_USAGE_PROMPT_TOKENS,
+                    key=SpanAttributes.LLM_USAGE_PROMPT_TOKENS.value,
                     value=usage.get("prompt_tokens"),
                 )
 
@@ -634,7 +634,7 @@ def set_attributes(  # noqa: PLR0915
                     if prompt.get("role"):
                         self.safe_set_attribute(
                             span=span,
-                            key=f"{SpanAttributes.LLM_PROMPTS}.{idx}.role",
+                            key=f"{SpanAttributes.LLM_PROMPTS.value}.{idx}.role",
                             value=prompt.get("role"),
                         )
 
@@ -643,7 +643,7 @@ def set_attributes(  # noqa: PLR0915
                             prompt["content"] = str(prompt.get("content"))
                         self.safe_set_attribute(
                             span=span,
-                            key=f"{SpanAttributes.LLM_PROMPTS}.{idx}.content",
+                            key=f"{SpanAttributes.LLM_PROMPTS.value}.{idx}.content",
                             value=prompt.get("content"),
                         )
             #############################################
@@ -655,14 +655,14 @@ def set_attributes(  # noqa: PLR0915
                         if choice.get("finish_reason"):
                             self.safe_set_attribute(
                                 span=span,
-                                key=f"{SpanAttributes.LLM_COMPLETIONS}.{idx}.finish_reason",
+                                key=f"{SpanAttributes.LLM_COMPLETIONS.value}.{idx}.finish_reason",
                                 value=choice.get("finish_reason"),
                             )
                         if choice.get("message"):
                             if choice.get("message").get("role"):
                                 self.safe_set_attribute(
                                     span=span,
-                                    key=f"{SpanAttributes.LLM_COMPLETIONS}.{idx}.role",
+                                    key=f"{SpanAttributes.LLM_COMPLETIONS.value}.{idx}.role",
                                     value=choice.get("message").get("role"),
                                 )
                             if choice.get("message").get("content"):
@@ -674,7 +674,7 @@ def set_attributes(  # noqa: PLR0915
                                     )
                                 self.safe_set_attribute(
                                     span=span,
-                                    key=f"{SpanAttributes.LLM_COMPLETIONS}.{idx}.content",
+                                    key=f"{SpanAttributes.LLM_COMPLETIONS.value}.{idx}.content",
                                     value=choice.get("message").get("content"),
                                 )
 

diff --git a/litellm/main.py b/litellm/main.py
@@ -2722,9 +2722,9 @@ def completion(  # type: ignore # noqa: PLR0915
                     "aws_region_name" not in optional_params
                     or optional_params["aws_region_name"] is None
                 ):
-                    optional_params["aws_region_name"] = (
-                        aws_bedrock_client.meta.region_name
-                    )
+                    optional_params[
+                        "aws_region_name"
+                    ] = aws_bedrock_client.meta.region_name
 
             bedrock_route = BedrockModelInfo.get_bedrock_route(model)
             if bedrock_route == "converse":
@@ -4448,9 +4448,9 @@ def adapter_completion(
     new_kwargs = translation_obj.translate_completion_input_params(kwargs=kwargs)
 
     response: Union[ModelResponse, CustomStreamWrapper] = completion(**new_kwargs)  # type: ignore
-    translated_response: Optional[Union[BaseModel, AdapterCompletionStreamWrapper]] = (
-        None
-    )
+    translated_response: Optional[
+        Union[BaseModel, AdapterCompletionStreamWrapper]
+    ] = None
     if isinstance(response, ModelResponse):
         translated_response = translation_obj.translate_completion_output_params(
             response=response
@@ -5372,6 +5372,7 @@ def speech(  # noqa: PLR0915
     timeout: Optional[Union[float, httpx.Timeout]] = None,
     response_format: Optional[str] = None,
     speed: Optional[int] = None,
+    instructions: Optional[str] = None,
     client=None,
     headers: Optional[dict] = None,
     custom_llm_provider: Optional[str] = None,
@@ -5393,7 +5394,8 @@ def speech(  # noqa: PLR0915
         optional_params["response_format"] = response_format
     if speed is not None:
         optional_params["speed"] = speed  # type: ignore
-
+    if instructions is not None:
+        optional_params["instructions"] = instructions
     if timeout is None:
         timeout = litellm.request_timeout
 
@@ -5901,9 +5903,9 @@ def stream_chunk_builder(  # noqa: PLR0915
         ]
 
         if len(content_chunks) > 0:
-            response["choices"][0]["message"]["content"] = (
-                processor.get_combined_content(content_chunks)
-            )
+            response["choices"][0]["message"][
+                "content"
+            ] = processor.get_combined_content(content_chunks)
 
         reasoning_chunks = [
             chunk
@@ -5914,9 +5916,9 @@ def stream_chunk_builder(  # noqa: PLR0915
         ]
 
         if len(reasoning_chunks) > 0:
-            response["choices"][0]["message"]["reasoning_content"] = (
-                processor.get_combined_reasoning_content(reasoning_chunks)
-            )
+            response["choices"][0]["message"][
+                "reasoning_content"
+            ] = processor.get_combined_reasoning_content(reasoning_chunks)
 
         audio_chunks = [
             chunk

diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml
@@ -65,6 +65,7 @@ litellm_settings:
   num_retries: 0
   check_provider_endpoint: true
   cache: true
+  callbacks: ["otel"]
 
 files_settings:
   - custom_llm_provider: gemini