From 66e09627e1ec6a26c5a9169a20c21c5d47a75801 Mon Sep 17 00:00:00 2001
From: Fabian Schindler <fabian.schindler@sentry.io>
Date: Tue, 20 Jan 2026 14:42:27 +0100
Subject: [PATCH 1/5] feat(integrations): openai: detect and report the time to
 first token metric (TTFT) as `gen_ai.response.time_to_first_token`

---
 sentry_sdk/consts.py              |  6 ++++++
 sentry_sdk/integrations/openai.py | 28 ++++++++++++++++++++++++++--
 2 files changed, 32 insertions(+), 2 deletions(-)

diff --git a/sentry_sdk/consts.py b/sentry_sdk/consts.py
index e53533018f..a932a05a60 100644
--- a/sentry_sdk/consts.py
+++ b/sentry_sdk/consts.py
@@ -518,6 +518,12 @@ class SPANDATA:
     Example: ["The weather in Paris is rainy and overcast, with temperatures around 57°F", "The weather in London is sunny and warm, with temperatures around 65°F"]
     """
 
+    GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN = "gen_ai.response.time_to_first_token"
+    """
+    The time it took to receive the first token from the model.
+    Example: 0.1
+    """
+
     GEN_AI_RESPONSE_TOOL_CALLS = "gen_ai.response.tool_calls"
     """
     The tool calls in the model's response.
diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py
index 66dc4a1c48..abb64e6b88 100644
--- a/sentry_sdk/integrations/openai.py
+++ b/sentry_sdk/integrations/openai.py
@@ -1,5 +1,6 @@
 import sys
 from functools import wraps
+import time
 
 import sentry_sdk
 from sentry_sdk import consts
@@ -249,6 +250,7 @@ def _set_output_data(
     response: "Any",
     kwargs: "dict[str, Any]",
     integration: "OpenAIIntegration",
+    start_time: "Optional[float]" = None,
     finish_span: bool = True,
 ) -> None:
     if hasattr(response, "model"):
@@ -263,6 +265,8 @@ def _set_output_data(
     if messages is not None and isinstance(messages, str):
         messages = [messages]
 
+    ttft: "Optional[float]" = None
+
     if hasattr(response, "choices"):
         if should_send_default_pii() and integration.include_prompts:
             response_text = [
@@ -320,6 +324,7 @@ def _set_output_data(
         old_iterator = response._iterator
 
         def new_iterator() -> "Iterator[ChatCompletionChunk]":
+            nonlocal ttft
             count_tokens_manually = True
             for x in old_iterator:
                 with capture_internal_exceptions():
@@ -330,6 +335,8 @@ def new_iterator() -> "Iterator[ChatCompletionChunk]":
                             if hasattr(choice, "delta") and hasattr(
                                 choice.delta, "content"
                             ):
+                                if start_time is not None and ttft is None:
+                                    ttft = time.perf_counter() - start_time
                                 content = choice.delta.content
                                 if len(data_buf) <= choice_index:
                                     data_buf.append([])
@@ -338,6 +345,8 @@ def new_iterator() -> "Iterator[ChatCompletionChunk]":
 
                     # OpenAI responses API
                     elif hasattr(x, "delta"):
+                        if start_time is not None and ttft is None:
+                            ttft = time.perf_counter() - start_time
                         if len(data_buf) == 0:
                             data_buf.append([])
                         data_buf[0].append(x.delta or "")
@@ -356,6 +365,10 @@ def new_iterator() -> "Iterator[ChatCompletionChunk]":
                 yield x
 
             with capture_internal_exceptions():
+                if ttft is not None:
+                    set_data_normalized(
+                        span, SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN, ttft
+                    )
                 if len(data_buf) > 0:
                     all_responses = ["".join(chunk) for chunk in data_buf]
                     if should_send_default_pii() and integration.include_prompts:
@@ -375,6 +388,7 @@ def new_iterator() -> "Iterator[ChatCompletionChunk]":
                 span.__exit__(None, None, None)
 
         async def new_iterator_async() -> "AsyncIterator[ChatCompletionChunk]":
+            nonlocal ttft
             count_tokens_manually = True
             async for x in old_iterator:
                 with capture_internal_exceptions():
@@ -385,6 +399,8 @@ async def new_iterator_async() -> "AsyncIterator[ChatCompletionChunk]":
                             if hasattr(choice, "delta") and hasattr(
                                 choice.delta, "content"
                             ):
+                                if start_time is not None and ttft is None:
+                                    ttft = time.perf_counter() - start_time
                                 content = choice.delta.content
                                 if len(data_buf) <= choice_index:
                                     data_buf.append([])
@@ -393,6 +409,8 @@ async def new_iterator_async() -> "AsyncIterator[ChatCompletionChunk]":
 
                     # OpenAI responses API
                     elif hasattr(x, "delta"):
+                        if start_time is not None and ttft is None:
+                            ttft = time.perf_counter() - start_time
                         if len(data_buf) == 0:
                             data_buf.append([])
                         data_buf[0].append(x.delta or "")
@@ -411,6 +429,10 @@ async def new_iterator_async() -> "AsyncIterator[ChatCompletionChunk]":
                 yield x
 
             with capture_internal_exceptions():
+                if ttft is not None:
+                    set_data_normalized(
+                        span, SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN, ttft
+                    )
                 if len(data_buf) > 0:
                     all_responses = ["".join(chunk) for chunk in data_buf]
                     if should_send_default_pii() and integration.include_prompts:
@@ -465,9 +487,10 @@ def _new_chat_completion_common(f: "Any", *args: "Any", **kwargs: "Any") -> "Any
 
     _set_input_data(span, kwargs, operation, integration)
 
+    start_time = time.perf_counter()
     response = yield f, args, kwargs
 
-    _set_output_data(span, response, kwargs, integration, finish_span=True)
+    _set_output_data(span, response, kwargs, integration, start_time, finish_span=True)
 
     return response
 
@@ -645,9 +668,10 @@ def _new_responses_create_common(f: "Any", *args: "Any", **kwargs: "Any") -> "An
 
     _set_input_data(span, kwargs, operation, integration)
 
+    start_time = time.perf_counter()
     response = yield f, args, kwargs
 
-    _set_output_data(span, response, kwargs, integration, finish_span=True)
+    _set_output_data(span, response, kwargs, integration, start_time, finish_span=True)
 
     return response
 

From 77e9e2015a62fa3d23876c31702e4e22be70e403 Mon Sep 17 00:00:00 2001
From: Fabian Schindler <fabian.schindler@sentry.io>
Date: Tue, 27 Jan 2026 10:19:50 +0100
Subject: [PATCH 2/5] feat(integrations): openai-agents: record TTFT for
 ai_spans

---
 .../openai_agents/patches/models.py           | 15 ++-
 .../openai_agents/spans/ai_client.py          |  5 +
 .../openai_agents/test_openai_agents.py       | 97 +++++++++++++++++++
 3 files changed, 116 insertions(+), 1 deletion(-)

diff --git a/sentry_sdk/integrations/openai_agents/patches/models.py b/sentry_sdk/integrations/openai_agents/patches/models.py
index 5f18a859e2..063155858e 100644
--- a/sentry_sdk/integrations/openai_agents/patches/models.py
+++ b/sentry_sdk/integrations/openai_agents/patches/models.py
@@ -1,5 +1,5 @@
 import copy
-import sys
+import time
 from functools import wraps
 
 from sentry_sdk.integrations import DidNotEnable
@@ -149,8 +149,21 @@ async def wrapped_stream_response(*args: "Any", **kwargs: "Any") -> "Any":
                     span.set_data(SPANDATA.GEN_AI_RESPONSE_STREAMING, True)
 
                     streaming_response = None
+                    ttft_recorded = False
 
                     async for event in original_stream_response(*args, **kwargs):
+                        # Detect first content token (text delta event)
+                        if not ttft_recorded and hasattr(event, "delta"):
+                            start_time = getattr(
+                                agent, "_sentry_chat_ttft_start_time", None
+                            )
+                            if start_time is not None:
+                                ttft = time.perf_counter() - start_time
+                                span.set_data(
+                                    SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN, ttft
+                                )
+                            ttft_recorded = True
+
                         # Capture the full response from ResponseCompletedEvent
                         if hasattr(event, "response"):
                             streaming_response = event.response
diff --git a/sentry_sdk/integrations/openai_agents/spans/ai_client.py b/sentry_sdk/integrations/openai_agents/spans/ai_client.py
index c099f133f4..364728bf23 100644
--- a/sentry_sdk/integrations/openai_agents/spans/ai_client.py
+++ b/sentry_sdk/integrations/openai_agents/spans/ai_client.py
@@ -1,3 +1,5 @@
+import time
+
 import sentry_sdk
 from sentry_sdk.consts import OP, SPANDATA
 
@@ -36,6 +38,9 @@ def ai_client_span(
     # TODO-anton: remove hardcoded stuff and replace something that also works for embedding and so on
     span.set_data(SPANDATA.GEN_AI_OPERATION_NAME, "chat")
 
+    # Store start time for TTFT calculation on the agent object
+    agent._sentry_chat_ttft_start_time = time.perf_counter()
+
     _set_agent_data(span, agent)
     _set_input_data(span, get_response_kwargs)
 
diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py
index c66c53b2ef..d62fd0dbd3 100644
--- a/tests/integrations/openai_agents/test_openai_agents.py
+++ b/tests/integrations/openai_agents/test_openai_agents.py
@@ -2202,3 +2202,100 @@ async def test_streaming_span_update_captures_response_data(
         assert span._data["gen_ai.usage.input_tokens"] == 10
         assert span._data["gen_ai.usage.output_tokens"] == 20
         assert span._data["gen_ai.response.model"] == "gpt-4-streaming"
+
+
+@pytest.mark.asyncio
+async def test_streaming_ttft_on_chat_span(sentry_init, test_agent):
+    """
+    Test that time-to-first-token (TTFT) is recorded on chat spans during streaming.
+
+    TTFT is triggered by events with a `delta` attribute, which includes:
+    - ResponseTextDeltaEvent (text output)
+    - ResponseAudioDeltaEvent (audio output)
+    - ResponseReasoningTextDeltaEvent (reasoning/thinking)
+    - ResponseFunctionCallArgumentsDeltaEvent (function call args)
+    - and other delta events...
+
+    Events WITHOUT delta (like ResponseCompletedEvent, ResponseCreatedEvent, etc.)
+    should NOT trigger TTFT.
+    """
+    import time
+
+    sentry_init(
+        integrations=[OpenAIAgentsIntegration()],
+        traces_sample_rate=1.0,
+    )
+
+    # Create a mock model that returns a stream_response generator
+    class MockModel:
+        model = "gpt-4"
+
+        async def stream_response(self, *args, **kwargs):
+            # First event: ResponseCreatedEvent (no delta - should NOT trigger TTFT)
+            created_event = MagicMock(spec=["type", "sequence_number"])
+            created_event.type = "response.created"
+            yield created_event
+
+            # Simulate server-side processing delay before first token
+            await asyncio.sleep(0.05)  # 50ms delay
+
+            # Second event: ResponseTextDeltaEvent (HAS delta - triggers TTFT)
+            # This simulates the first actual content token
+            text_delta_event = MagicMock(spec=["delta", "type", "content_index"])
+            text_delta_event.delta = "Hello"
+            text_delta_event.type = "response.output_text.delta"
+            yield text_delta_event
+            await asyncio.sleep(0.05)  # 50ms delay
+
+            # Third event: more text content (also has delta, but TTFT already recorded)
+            text_delta_event2 = MagicMock(spec=["delta", "type", "content_index"])
+            text_delta_event2.delta = " world!"
+            text_delta_event2.type = "response.output_text.delta"
+            yield text_delta_event2
+
+            # Final event: ResponseCompletedEvent (has response, no delta)
+            completed_event = MagicMock(spec=["response", "type", "sequence_number"])
+            completed_event.response = MagicMock()
+            completed_event.response.model = "gpt-4"
+            completed_event.response.usage = Usage(
+                requests=1,
+                input_tokens=10,
+                output_tokens=5,
+                total_tokens=15,
+            )
+            completed_event.response.output = []
+            yield completed_event
+
+    mock_model = MockModel()
+
+    with sentry_sdk.start_transaction(name="test_ttft", sampled=True) as transaction:
+        # Simulate calling the wrapped stream_response logic
+        from sentry_sdk.integrations.openai_agents.spans import ai_client_span
+
+        with ai_client_span(test_agent, {}) as span:
+            span.set_data(SPANDATA.GEN_AI_RESPONSE_STREAMING, True)
+
+            ttft_recorded = False
+            start_time = getattr(test_agent, "_sentry_chat_ttft_start_time", None)
+
+            async for event in mock_model.stream_response():
+                # This is the same logic used in the actual integration
+                if (
+                    not ttft_recorded
+                    and hasattr(event, "delta")
+                    and start_time is not None
+                ):
+                    ttft = time.perf_counter() - start_time
+                    span.set_data(SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN, ttft)
+                    ttft_recorded = True
+
+        # Verify TTFT is recorded on the chat span (inside transaction context)
+        chat_spans = [
+            s for s in transaction._span_recorder.spans if s.op == "gen_ai.chat"
+        ]
+        assert len(chat_spans) >= 1
+        chat_span = chat_spans[0]
+        assert SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN in chat_span._data
+        ttft_value = chat_span._data[SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN]
+        # TTFT should be at least 40ms (our simulated delay minus some variance) but reasonable
+        assert 0.04 < ttft_value < 1.0, f"TTFT {ttft_value} should be around 50ms"

From f3f3f24cd47ce32aeb0707a0bcf66008f618281b Mon Sep 17 00:00:00 2001
From: Fabian Schindler <fabian.schindler@sentry.io>
Date: Tue, 27 Jan 2026 10:50:25 +0100
Subject: [PATCH 3/5] fix: using local variable to track start time

---
 .../integrations/openai_agents/patches/models.py     | 12 +++++-------
 .../integrations/openai_agents/spans/ai_client.py    |  5 -----
 .../integrations/openai_agents/test_openai_agents.py |  9 +++------
 3 files changed, 8 insertions(+), 18 deletions(-)

diff --git a/sentry_sdk/integrations/openai_agents/patches/models.py b/sentry_sdk/integrations/openai_agents/patches/models.py
index 063155858e..9b57a55f1f 100644
--- a/sentry_sdk/integrations/openai_agents/patches/models.py
+++ b/sentry_sdk/integrations/openai_agents/patches/models.py
@@ -150,18 +150,16 @@ async def wrapped_stream_response(*args: "Any", **kwargs: "Any") -> "Any":
 
                     streaming_response = None
                     ttft_recorded = False
+                    # Capture start time locally to avoid race conditions with concurrent requests
+                    start_time = time.perf_counter()
 
                     async for event in original_stream_response(*args, **kwargs):
                         # Detect first content token (text delta event)
                         if not ttft_recorded and hasattr(event, "delta"):
-                            start_time = getattr(
-                                agent, "_sentry_chat_ttft_start_time", None
+                            ttft = time.perf_counter() - start_time
+                            span.set_data(
+                                SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN, ttft
                             )
-                            if start_time is not None:
-                                ttft = time.perf_counter() - start_time
-                                span.set_data(
-                                    SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN, ttft
-                                )
                             ttft_recorded = True
 
                         # Capture the full response from ResponseCompletedEvent
diff --git a/sentry_sdk/integrations/openai_agents/spans/ai_client.py b/sentry_sdk/integrations/openai_agents/spans/ai_client.py
index 364728bf23..c099f133f4 100644
--- a/sentry_sdk/integrations/openai_agents/spans/ai_client.py
+++ b/sentry_sdk/integrations/openai_agents/spans/ai_client.py
@@ -1,5 +1,3 @@
-import time
-
 import sentry_sdk
 from sentry_sdk.consts import OP, SPANDATA
 
@@ -38,9 +36,6 @@ def ai_client_span(
     # TODO-anton: remove hardcoded stuff and replace something that also works for embedding and so on
     span.set_data(SPANDATA.GEN_AI_OPERATION_NAME, "chat")
 
-    # Store start time for TTFT calculation on the agent object
-    agent._sentry_chat_ttft_start_time = time.perf_counter()
-
     _set_agent_data(span, agent)
     _set_input_data(span, get_response_kwargs)
 
diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py
index d62fd0dbd3..795637986b 100644
--- a/tests/integrations/openai_agents/test_openai_agents.py
+++ b/tests/integrations/openai_agents/test_openai_agents.py
@@ -2276,15 +2276,12 @@ async def stream_response(self, *args, **kwargs):
             span.set_data(SPANDATA.GEN_AI_RESPONSE_STREAMING, True)
 
             ttft_recorded = False
-            start_time = getattr(test_agent, "_sentry_chat_ttft_start_time", None)
+            # Capture start time locally (same as production code after race condition fix)
+            start_time = time.perf_counter()
 
             async for event in mock_model.stream_response():
                 # This is the same logic used in the actual integration
-                if (
-                    not ttft_recorded
-                    and hasattr(event, "delta")
-                    and start_time is not None
-                ):
+                if not ttft_recorded and hasattr(event, "delta"):
                     ttft = time.perf_counter() - start_time
                     span.set_data(SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN, ttft)
                     ttft_recorded = True

From 092635cba4aaf643f40ef94f9c4c86ce8f387596 Mon Sep 17 00:00:00 2001
From: Fabian Schindler <fabian.schindler@sentry.io>
Date: Tue, 27 Jan 2026 11:24:32 +0100
Subject: [PATCH 4/5] test: fix test case to be actually useful

---
 .../openai_agents/test_openai_agents.py       | 45 ++++++++++---------
 1 file changed, 24 insertions(+), 21 deletions(-)

diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py
index 795637986b..81f4b54e1d 100644
--- a/tests/integrations/openai_agents/test_openai_agents.py
+++ b/tests/integrations/openai_agents/test_openai_agents.py
@@ -2219,17 +2219,23 @@ async def test_streaming_ttft_on_chat_span(sentry_init, test_agent):
     Events WITHOUT delta (like ResponseCompletedEvent, ResponseCreatedEvent, etc.)
     should NOT trigger TTFT.
     """
-    import time
+    from sentry_sdk.integrations.openai_agents.patches.models import (
+        _create_get_model_wrapper,
+    )
 
     sentry_init(
         integrations=[OpenAIAgentsIntegration()],
         traces_sample_rate=1.0,
     )
 
-    # Create a mock model that returns a stream_response generator
+    # Create a mock model with stream_response and get_response
     class MockModel:
         model = "gpt-4"
 
+        async def get_response(self, *args, **kwargs):
+            # Not used in this test, but required by the wrapper
+            pass
+
         async def stream_response(self, *args, **kwargs):
             # First event: ResponseCreatedEvent (no delta - should NOT trigger TTFT)
             created_event = MagicMock(spec=["type", "sequence_number"])
@@ -2240,12 +2246,10 @@ async def stream_response(self, *args, **kwargs):
             await asyncio.sleep(0.05)  # 50ms delay
 
             # Second event: ResponseTextDeltaEvent (HAS delta - triggers TTFT)
-            # This simulates the first actual content token
             text_delta_event = MagicMock(spec=["delta", "type", "content_index"])
             text_delta_event.delta = "Hello"
             text_delta_event.type = "response.output_text.delta"
             yield text_delta_event
-            await asyncio.sleep(0.05)  # 50ms delay
 
             # Third event: more text content (also has delta, but TTFT already recorded)
             text_delta_event2 = MagicMock(spec=["delta", "type", "content_index"])
@@ -2266,33 +2270,32 @@ async def stream_response(self, *args, **kwargs):
             completed_event.response.output = []
             yield completed_event
 
-    mock_model = MockModel()
-
-    with sentry_sdk.start_transaction(name="test_ttft", sampled=True) as transaction:
-        # Simulate calling the wrapped stream_response logic
-        from sentry_sdk.integrations.openai_agents.spans import ai_client_span
+    # Create a mock original _get_model that returns our mock model
+    def mock_get_model(agent, run_config):
+        return MockModel()
 
-        with ai_client_span(test_agent, {}) as span:
-            span.set_data(SPANDATA.GEN_AI_RESPONSE_STREAMING, True)
+    # Wrap it with our integration wrapper
+    wrapped_get_model = _create_get_model_wrapper(mock_get_model)
 
-            ttft_recorded = False
-            # Capture start time locally (same as production code after race condition fix)
-            start_time = time.perf_counter()
+    with sentry_sdk.start_transaction(name="test_ttft", sampled=True) as transaction:
+        # Get the wrapped model (this applies the stream_response wrapper)
+        wrapped_model = wrapped_get_model(None, test_agent, MagicMock())
 
-            async for event in mock_model.stream_response():
-                # This is the same logic used in the actual integration
-                if not ttft_recorded and hasattr(event, "delta"):
-                    ttft = time.perf_counter() - start_time
-                    span.set_data(SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN, ttft)
-                    ttft_recorded = True
+        # Call the wrapped stream_response and consume all events
+        async for _event in wrapped_model.stream_response():
+            pass
 
-        # Verify TTFT is recorded on the chat span (inside transaction context)
+        # Verify TTFT is recorded on the chat span (must be inside transaction context)
         chat_spans = [
             s for s in transaction._span_recorder.spans if s.op == "gen_ai.chat"
         ]
         assert len(chat_spans) >= 1
         chat_span = chat_spans[0]
+
         assert SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN in chat_span._data
         ttft_value = chat_span._data[SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN]
         # TTFT should be at least 40ms (our simulated delay minus some variance) but reasonable
         assert 0.04 < ttft_value < 1.0, f"TTFT {ttft_value} should be around 50ms"
+
+        # Verify streaming flag is set
+        assert chat_span._data.get(SPANDATA.GEN_AI_RESPONSE_STREAMING) is True

From 8a6c6f134209141f301ab9843e068a67f07fea3a Mon Sep 17 00:00:00 2001
From: Fabian Schindler <fabian.schindler@sentry.io>
Date: Tue, 27 Jan 2026 12:45:09 +0100
Subject: [PATCH 5/5] test: add TTFT tests for openai

---
 tests/integrations/openai/test_openai.py | 200 +++++++++++++++++++++++
 1 file changed, 200 insertions(+)

diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py
index 814289c887..505b2bb561 100644
--- a/tests/integrations/openai/test_openai.py
+++ b/tests/integrations/openai/test_openai.py
@@ -1559,3 +1559,203 @@ def test_openai_message_truncation(sentry_init, capture_events):
             if SPANDATA.GEN_AI_REQUEST_MESSAGES in span_meta:
                 messages_meta = span_meta[SPANDATA.GEN_AI_REQUEST_MESSAGES]
                 assert "len" in messages_meta.get("", {})
+
+
+# noinspection PyTypeChecker
+def test_streaming_chat_completion_ttft(sentry_init, capture_events):
+    """
+    Test that streaming chat completions capture time-to-first-token (TTFT).
+    """
+    sentry_init(
+        integrations=[OpenAIIntegration()],
+        traces_sample_rate=1.0,
+    )
+    events = capture_events()
+
+    client = OpenAI(api_key="z")
+    returned_stream = Stream(cast_to=None, response=None, client=client)
+    returned_stream._iterator = [
+        ChatCompletionChunk(
+            id="1",
+            choices=[
+                DeltaChoice(
+                    index=0, delta=ChoiceDelta(content="Hello"), finish_reason=None
+                )
+            ],
+            created=100000,
+            model="model-id",
+            object="chat.completion.chunk",
+        ),
+        ChatCompletionChunk(
+            id="1",
+            choices=[
+                DeltaChoice(
+                    index=0, delta=ChoiceDelta(content=" world"), finish_reason="stop"
+                )
+            ],
+            created=100000,
+            model="model-id",
+            object="chat.completion.chunk",
+        ),
+    ]
+
+    client.chat.completions._post = mock.Mock(return_value=returned_stream)
+
+    with start_transaction(name="openai tx"):
+        response_stream = client.chat.completions.create(
+            model="some-model", messages=[{"role": "user", "content": "Say hello"}]
+        )
+        # Consume the stream
+        for _ in response_stream:
+            pass
+
+    (tx,) = events
+    span = tx["spans"][0]
+    assert span["op"] == "gen_ai.chat"
+
+    # Verify TTFT is captured
+    assert SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN in span["data"]
+    ttft = span["data"][SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN]
+    assert isinstance(ttft, float)
+    assert ttft > 0
+
+
+# noinspection PyTypeChecker
+@pytest.mark.asyncio
+async def test_streaming_chat_completion_ttft_async(sentry_init, capture_events):
+    """
+    Test that async streaming chat completions capture time-to-first-token (TTFT).
+    """
+    sentry_init(
+        integrations=[OpenAIIntegration()],
+        traces_sample_rate=1.0,
+    )
+    events = capture_events()
+
+    client = AsyncOpenAI(api_key="z")
+    returned_stream = AsyncStream(cast_to=None, response=None, client=client)
+    returned_stream._iterator = async_iterator(
+        [
+            ChatCompletionChunk(
+                id="1",
+                choices=[
+                    DeltaChoice(
+                        index=0, delta=ChoiceDelta(content="Hello"), finish_reason=None
+                    )
+                ],
+                created=100000,
+                model="model-id",
+                object="chat.completion.chunk",
+            ),
+            ChatCompletionChunk(
+                id="1",
+                choices=[
+                    DeltaChoice(
+                        index=0,
+                        delta=ChoiceDelta(content=" world"),
+                        finish_reason="stop",
+                    )
+                ],
+                created=100000,
+                model="model-id",
+                object="chat.completion.chunk",
+            ),
+        ]
+    )
+
+    client.chat.completions._post = AsyncMock(return_value=returned_stream)
+
+    with start_transaction(name="openai tx"):
+        response_stream = await client.chat.completions.create(
+            model="some-model", messages=[{"role": "user", "content": "Say hello"}]
+        )
+        # Consume the stream
+        async for _ in response_stream:
+            pass
+
+    (tx,) = events
+    span = tx["spans"][0]
+    assert span["op"] == "gen_ai.chat"
+
+    # Verify TTFT is captured
+    assert SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN in span["data"]
+    ttft = span["data"][SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN]
+    assert isinstance(ttft, float)
+    assert ttft > 0
+
+
+# noinspection PyTypeChecker
+@pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available")
+def test_streaming_responses_api_ttft(sentry_init, capture_events):
+    """
+    Test that streaming responses API captures time-to-first-token (TTFT).
+    """
+    sentry_init(
+        integrations=[OpenAIIntegration()],
+        traces_sample_rate=1.0,
+    )
+    events = capture_events()
+
+    client = OpenAI(api_key="z")
+    returned_stream = Stream(cast_to=None, response=None, client=client)
+    returned_stream._iterator = EXAMPLE_RESPONSES_STREAM
+    client.responses._post = mock.Mock(return_value=returned_stream)
+
+    with start_transaction(name="openai tx"):
+        response_stream = client.responses.create(
+            model="some-model",
+            input="hello",
+            stream=True,
+        )
+        # Consume the stream
+        for _ in response_stream:
+            pass
+
+    (tx,) = events
+    span = tx["spans"][0]
+    assert span["op"] == "gen_ai.responses"
+
+    # Verify TTFT is captured
+    assert SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN in span["data"]
+    ttft = span["data"][SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN]
+    assert isinstance(ttft, float)
+    assert ttft > 0
+
+
+# noinspection PyTypeChecker
+@pytest.mark.asyncio
+@pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available")
+async def test_streaming_responses_api_ttft_async(sentry_init, capture_events):
+    """
+    Test that async streaming responses API captures time-to-first-token (TTFT).
+    """
+    sentry_init(
+        integrations=[OpenAIIntegration()],
+        traces_sample_rate=1.0,
+    )
+    events = capture_events()
+
+    client = AsyncOpenAI(api_key="z")
+    returned_stream = AsyncStream(cast_to=None, response=None, client=client)
+    returned_stream._iterator = async_iterator(EXAMPLE_RESPONSES_STREAM)
+    client.responses._post = AsyncMock(return_value=returned_stream)
+
+    with start_transaction(name="openai tx"):
+        response_stream = await client.responses.create(
+            model="some-model",
+            input="hello",
+            stream=True,
+        )
+        # Consume the stream
+        async for _ in response_stream:
+            pass
+
+    (tx,) = events
+    span = tx["spans"][0]
+    assert span["op"] == "gen_ai.responses"
+
+    # Verify TTFT is captured
+    assert SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN in span["data"]
+    ttft = span["data"][SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN]
+    assert isinstance(ttft, float)
+    assert ttft > 0