getsentry · constantinius · Jan 27, 2026 · Jan 20, 2026 · Jan 27, 2026 · Jan 27, 2026
@@ -518,6 +518,12 @@ class SPANDATA:
     Example: ["The weather in Paris is rainy and overcast, with temperatures around 57°F", "The weather in London is sunny and warm, with temperatures around 65°F"]
     """
 
+    GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN = "gen_ai.response.time_to_first_token"
+    """
+    The time it took to receive the first token from the model.
+    Example: 0.1
+    """
+
     GEN_AI_RESPONSE_TOOL_CALLS = "gen_ai.response.tool_calls"
     """
     The tool calls in the model's response.

@@ -1,5 +1,6 @@
 import sys
 from functools import wraps
+import time
 
 import sentry_sdk
 from sentry_sdk import consts
@@ -249,6 +250,7 @@ def _set_output_data(
     response: "Any",
     kwargs: "dict[str, Any]",
     integration: "OpenAIIntegration",
+    start_time: "Optional[float]" = None,
     finish_span: bool = True,
 ) -> None:
     if hasattr(response, "model"):
@@ -263,6 +265,8 @@ def _set_output_data(
     if messages is not None and isinstance(messages, str):
         messages = [messages]
 
+    ttft: "Optional[float]" = None
+
     if hasattr(response, "choices"):
         if should_send_default_pii() and integration.include_prompts:
             response_text = [
@@ -320,6 +324,7 @@ def _set_output_data(
         old_iterator = response._iterator
 
         def new_iterator() -> "Iterator[ChatCompletionChunk]":
+            nonlocal ttft
             count_tokens_manually = True
             for x in old_iterator:
                 with capture_internal_exceptions():
@@ -330,6 +335,8 @@ def new_iterator() -> "Iterator[ChatCompletionChunk]":
                             if hasattr(choice, "delta") and hasattr(
                                 choice.delta, "content"
                             ):
+                                if start_time is not None and ttft is None:
+                                    ttft = time.perf_counter() - start_time
                                 content = choice.delta.content
                                 if len(data_buf) <= choice_index:
                                     data_buf.append([])
@@ -338,6 +345,8 @@ def new_iterator() -> "Iterator[ChatCompletionChunk]":
 
                     # OpenAI responses API
                     elif hasattr(x, "delta"):
+                        if start_time is not None and ttft is None:
+                            ttft = time.perf_counter() - start_time
                         if len(data_buf) == 0:
                             data_buf.append([])
                         data_buf[0].append(x.delta or "")
@@ -356,6 +365,10 @@ def new_iterator() -> "Iterator[ChatCompletionChunk]":
                 yield x
 
             with capture_internal_exceptions():
+                if ttft is not None:
+                    set_data_normalized(
+                        span, SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN, ttft
+                    )
                 if len(data_buf) > 0:
                     all_responses = ["".join(chunk) for chunk in data_buf]
                     if should_send_default_pii() and integration.include_prompts:
@@ -375,6 +388,7 @@ def new_iterator() -> "Iterator[ChatCompletionChunk]":
                 span.__exit__(None, None, None)
 
         async def new_iterator_async() -> "AsyncIterator[ChatCompletionChunk]":
+            nonlocal ttft
             count_tokens_manually = True
             async for x in old_iterator:
                 with capture_internal_exceptions():
@@ -385,6 +399,8 @@ async def new_iterator_async() -> "AsyncIterator[ChatCompletionChunk]":
                             if hasattr(choice, "delta") and hasattr(
                                 choice.delta, "content"
                             ):
+                                if start_time is not None and ttft is None:
+                                    ttft = time.perf_counter() - start_time
                                 content = choice.delta.content
                                 if len(data_buf) <= choice_index:
                                     data_buf.append([])
@@ -393,6 +409,8 @@ async def new_iterator_async() -> "AsyncIterator[ChatCompletionChunk]":
 
                     # OpenAI responses API
                     elif hasattr(x, "delta"):
+                        if start_time is not None and ttft is None:
+                            ttft = time.perf_counter() - start_time
                         if len(data_buf) == 0:
                             data_buf.append([])
                         data_buf[0].append(x.delta or "")
@@ -411,6 +429,10 @@ async def new_iterator_async() -> "AsyncIterator[ChatCompletionChunk]":
                 yield x
 
             with capture_internal_exceptions():
+                if ttft is not None:
+                    set_data_normalized(
+                        span, SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN, ttft
+                    )
                 if len(data_buf) > 0:
                     all_responses = ["".join(chunk) for chunk in data_buf]
                     if should_send_default_pii() and integration.include_prompts:
@@ -465,9 +487,10 @@ def _new_chat_completion_common(f: "Any", *args: "Any", **kwargs: "Any") -> "Any
 
     _set_input_data(span, kwargs, operation, integration)
 
+    start_time = time.perf_counter()
     response = yield f, args, kwargs
 
-    _set_output_data(span, response, kwargs, integration, finish_span=True)
+    _set_output_data(span, response, kwargs, integration, start_time, finish_span=True)
 
     return response
 
@@ -645,9 +668,10 @@ def _new_responses_create_common(f: "Any", *args: "Any", **kwargs: "Any") -> "An
 
     _set_input_data(span, kwargs, operation, integration)
 
+    start_time = time.perf_counter()
     response = yield f, args, kwargs
 
-    _set_output_data(span, response, kwargs, integration, finish_span=True)
+    _set_output_data(span, response, kwargs, integration, start_time, finish_span=True)
 
     return response