Skip to content

Fix otel - follow genai semantic conventions + support 'instructions' param for tts #10608

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
May 7, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 19 additions & 19 deletions litellm/integrations/opentelemetry.py
Original file line number Diff line number Diff line change
Expand Up @@ -417,7 +417,7 @@ def set_tools_attributes(self, span: Span, tools):
if not function:
continue

prefix = f"{SpanAttributes.LLM_REQUEST_FUNCTIONS}.{i}"
prefix = f"{SpanAttributes.LLM_REQUEST_FUNCTIONS.value}.{i}"
self.safe_set_attribute(
span=span,
key=f"{prefix}.name",
Expand Down Expand Up @@ -473,7 +473,7 @@ def _tool_calls_kv_pair(
_value = _function.get(key)
if _value:
kv_pairs[
f"{SpanAttributes.LLM_COMPLETIONS}.{idx}.function_call.{key}"
f"{SpanAttributes.LLM_COMPLETIONS.value}.{idx}.function_call.{key}"
] = _value

return kv_pairs
Expand Down Expand Up @@ -525,58 +525,58 @@ def set_attributes( # noqa: PLR0915
if kwargs.get("model"):
self.safe_set_attribute(
span=span,
key=SpanAttributes.LLM_REQUEST_MODEL,
key=SpanAttributes.LLM_REQUEST_MODEL.value,
value=kwargs.get("model"),
)

# The LLM request type
self.safe_set_attribute(
span=span,
key=SpanAttributes.LLM_REQUEST_TYPE,
key=SpanAttributes.LLM_REQUEST_TYPE.value,
value=standard_logging_payload["call_type"],
)

# The Generative AI Provider: Azure, OpenAI, etc.
self.safe_set_attribute(
span=span,
key=SpanAttributes.LLM_SYSTEM,
key=SpanAttributes.LLM_SYSTEM.value,
value=litellm_params.get("custom_llm_provider", "Unknown"),
)

# The maximum number of tokens the LLM generates for a request.
if optional_params.get("max_tokens"):
self.safe_set_attribute(
span=span,
key=SpanAttributes.LLM_REQUEST_MAX_TOKENS,
key=SpanAttributes.LLM_REQUEST_MAX_TOKENS.value,
value=optional_params.get("max_tokens"),
)

# The temperature setting for the LLM request.
if optional_params.get("temperature"):
self.safe_set_attribute(
span=span,
key=SpanAttributes.LLM_REQUEST_TEMPERATURE,
key=SpanAttributes.LLM_REQUEST_TEMPERATURE.value,
value=optional_params.get("temperature"),
)

# The top_p sampling setting for the LLM request.
if optional_params.get("top_p"):
self.safe_set_attribute(
span=span,
key=SpanAttributes.LLM_REQUEST_TOP_P,
key=SpanAttributes.LLM_REQUEST_TOP_P.value,
value=optional_params.get("top_p"),
)

self.safe_set_attribute(
span=span,
key=SpanAttributes.LLM_IS_STREAMING,
key=SpanAttributes.LLM_IS_STREAMING.value,
value=str(optional_params.get("stream", False)),
)

if optional_params.get("user"):
self.safe_set_attribute(
span=span,
key=SpanAttributes.LLM_USER,
key=SpanAttributes.LLM_USER.value,
value=optional_params.get("user"),
)

Expand All @@ -590,29 +590,29 @@ def set_attributes( # noqa: PLR0915
if response_obj and response_obj.get("model"):
self.safe_set_attribute(
span=span,
key=SpanAttributes.LLM_RESPONSE_MODEL,
key=SpanAttributes.LLM_RESPONSE_MODEL.value,
value=response_obj.get("model"),
)

usage = response_obj and response_obj.get("usage")
if usage:
self.safe_set_attribute(
span=span,
key=SpanAttributes.LLM_USAGE_TOTAL_TOKENS,
key=SpanAttributes.LLM_USAGE_TOTAL_TOKENS.value,
value=usage.get("total_tokens"),
)

# The number of tokens used in the LLM response (completion).
self.safe_set_attribute(
span=span,
key=SpanAttributes.LLM_USAGE_COMPLETION_TOKENS,
key=SpanAttributes.LLM_USAGE_COMPLETION_TOKENS.value,
value=usage.get("completion_tokens"),
)

# The number of tokens used in the LLM prompt.
self.safe_set_attribute(
span=span,
key=SpanAttributes.LLM_USAGE_PROMPT_TOKENS,
key=SpanAttributes.LLM_USAGE_PROMPT_TOKENS.value,
value=usage.get("prompt_tokens"),
)

Expand All @@ -634,7 +634,7 @@ def set_attributes( # noqa: PLR0915
if prompt.get("role"):
self.safe_set_attribute(
span=span,
key=f"{SpanAttributes.LLM_PROMPTS}.{idx}.role",
key=f"{SpanAttributes.LLM_PROMPTS.value}.{idx}.role",
value=prompt.get("role"),
)

Expand All @@ -643,7 +643,7 @@ def set_attributes( # noqa: PLR0915
prompt["content"] = str(prompt.get("content"))
self.safe_set_attribute(
span=span,
key=f"{SpanAttributes.LLM_PROMPTS}.{idx}.content",
key=f"{SpanAttributes.LLM_PROMPTS.value}.{idx}.content",
value=prompt.get("content"),
)
#############################################
Expand All @@ -655,14 +655,14 @@ def set_attributes( # noqa: PLR0915
if choice.get("finish_reason"):
self.safe_set_attribute(
span=span,
key=f"{SpanAttributes.LLM_COMPLETIONS}.{idx}.finish_reason",
key=f"{SpanAttributes.LLM_COMPLETIONS.value}.{idx}.finish_reason",
value=choice.get("finish_reason"),
)
if choice.get("message"):
if choice.get("message").get("role"):
self.safe_set_attribute(
span=span,
key=f"{SpanAttributes.LLM_COMPLETIONS}.{idx}.role",
key=f"{SpanAttributes.LLM_COMPLETIONS.value}.{idx}.role",
value=choice.get("message").get("role"),
)
if choice.get("message").get("content"):
Expand All @@ -674,7 +674,7 @@ def set_attributes( # noqa: PLR0915
)
self.safe_set_attribute(
span=span,
key=f"{SpanAttributes.LLM_COMPLETIONS}.{idx}.content",
key=f"{SpanAttributes.LLM_COMPLETIONS.value}.{idx}.content",
value=choice.get("message").get("content"),
)

Expand Down
28 changes: 15 additions & 13 deletions litellm/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -2722,9 +2722,9 @@ def completion( # type: ignore # noqa: PLR0915
"aws_region_name" not in optional_params
or optional_params["aws_region_name"] is None
):
optional_params["aws_region_name"] = (
aws_bedrock_client.meta.region_name
)
optional_params[
"aws_region_name"
] = aws_bedrock_client.meta.region_name

bedrock_route = BedrockModelInfo.get_bedrock_route(model)
if bedrock_route == "converse":
Expand Down Expand Up @@ -4448,9 +4448,9 @@ def adapter_completion(
new_kwargs = translation_obj.translate_completion_input_params(kwargs=kwargs)

response: Union[ModelResponse, CustomStreamWrapper] = completion(**new_kwargs) # type: ignore
translated_response: Optional[Union[BaseModel, AdapterCompletionStreamWrapper]] = (
None
)
translated_response: Optional[
Union[BaseModel, AdapterCompletionStreamWrapper]
] = None
if isinstance(response, ModelResponse):
translated_response = translation_obj.translate_completion_output_params(
response=response
Expand Down Expand Up @@ -5372,6 +5372,7 @@ def speech( # noqa: PLR0915
timeout: Optional[Union[float, httpx.Timeout]] = None,
response_format: Optional[str] = None,
speed: Optional[int] = None,
instructions: Optional[str] = None,
client=None,
headers: Optional[dict] = None,
custom_llm_provider: Optional[str] = None,
Expand All @@ -5393,7 +5394,8 @@ def speech( # noqa: PLR0915
optional_params["response_format"] = response_format
if speed is not None:
optional_params["speed"] = speed # type: ignore

if instructions is not None:
optional_params["instructions"] = instructions
if timeout is None:
timeout = litellm.request_timeout

Expand Down Expand Up @@ -5901,9 +5903,9 @@ def stream_chunk_builder( # noqa: PLR0915
]

if len(content_chunks) > 0:
response["choices"][0]["message"]["content"] = (
processor.get_combined_content(content_chunks)
)
response["choices"][0]["message"][
"content"
] = processor.get_combined_content(content_chunks)

reasoning_chunks = [
chunk
Expand All @@ -5914,9 +5916,9 @@ def stream_chunk_builder( # noqa: PLR0915
]

if len(reasoning_chunks) > 0:
response["choices"][0]["message"]["reasoning_content"] = (
processor.get_combined_reasoning_content(reasoning_chunks)
)
response["choices"][0]["message"][
"reasoning_content"
] = processor.get_combined_reasoning_content(reasoning_chunks)

audio_chunks = [
chunk
Expand Down
1 change: 1 addition & 0 deletions litellm/proxy/_new_secret_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ litellm_settings:
num_retries: 0
check_provider_endpoint: true
cache: true
callbacks: ["otel"]

files_settings:
- custom_llm_provider: gemini
Expand Down
Loading
Loading