diff --git a/sentry_sdk/consts.py b/sentry_sdk/consts.py
index 4b61a317fb..0e7ad18d91 100644
--- a/sentry_sdk/consts.py
+++ b/sentry_sdk/consts.py
@@ -518,6 +518,12 @@ class SPANDATA:
     Example: ["The weather in Paris is rainy and overcast, with temperatures around 57°F", "The weather in London is sunny and warm, with temperatures around 65°F"]
     """
 
+    GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN = "gen_ai.response.time_to_first_token"
+    """
+    The time it took to receive the first token from the model.
+    Example: 0.1
+    """
+
     GEN_AI_RESPONSE_TOOL_CALLS = "gen_ai.response.tool_calls"
     """
     The tool calls in the model's response.
diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py
index 66dc4a1c48..abb64e6b88 100644
--- a/sentry_sdk/integrations/openai.py
+++ b/sentry_sdk/integrations/openai.py
@@ -1,5 +1,6 @@
 import sys
 from functools import wraps
+import time
 
 import sentry_sdk
 from sentry_sdk import consts
@@ -249,6 +250,7 @@ def _set_output_data(
     response: "Any",
     kwargs: "dict[str, Any]",
     integration: "OpenAIIntegration",
+    start_time: "Optional[float]" = None,
     finish_span: bool = True,
 ) -> None:
     if hasattr(response, "model"):
@@ -263,6 +265,8 @@ def _set_output_data(
     if messages is not None and isinstance(messages, str):
         messages = [messages]
 
+    ttft: "Optional[float]" = None
+
     if hasattr(response, "choices"):
         if should_send_default_pii() and integration.include_prompts:
             response_text = [
@@ -320,6 +324,7 @@ def _set_output_data(
         old_iterator = response._iterator
 
         def new_iterator() -> "Iterator[ChatCompletionChunk]":
+            nonlocal ttft
             count_tokens_manually = True
             for x in old_iterator:
                 with capture_internal_exceptions():
@@ -330,6 +335,8 @@ def new_iterator() -> "Iterator[ChatCompletionChunk]":
                             if hasattr(choice, "delta") and hasattr(
                                 choice.delta, "content"
                             ):
+                                if start_time is not None and ttft is None:
+                                    ttft = time.perf_counter() - start_time
                                 content = choice.delta.content
                                 if len(data_buf) <= choice_index:
                                     data_buf.append([])
@@ -338,6 +345,8 @@ def new_iterator() -> "Iterator[ChatCompletionChunk]":
 
                     # OpenAI responses API
                     elif hasattr(x, "delta"):
+                        if start_time is not None and ttft is None:
+                            ttft = time.perf_counter() - start_time
                         if len(data_buf) == 0:
                             data_buf.append([])
                         data_buf[0].append(x.delta or "")
@@ -356,6 +365,10 @@ def new_iterator() -> "Iterator[ChatCompletionChunk]":
                 yield x
 
             with capture_internal_exceptions():
+                if ttft is not None:
+                    set_data_normalized(
+                        span, SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN, ttft
+                    )
                 if len(data_buf) > 0:
                     all_responses = ["".join(chunk) for chunk in data_buf]
                     if should_send_default_pii() and integration.include_prompts:
@@ -375,6 +388,7 @@ def new_iterator() -> "Iterator[ChatCompletionChunk]":
                 span.__exit__(None, None, None)
 
         async def new_iterator_async() -> "AsyncIterator[ChatCompletionChunk]":
+            nonlocal ttft
             count_tokens_manually = True
             async for x in old_iterator:
                 with capture_internal_exceptions():
@@ -385,6 +399,8 @@ async def new_iterator_async() -> "AsyncIterator[ChatCompletionChunk]":
                             if hasattr(choice, "delta") and hasattr(
                                 choice.delta, "content"
                             ):
+                                if start_time is not None and ttft is None:
+                                    ttft = time.perf_counter() - start_time
                                 content = choice.delta.content
                                 if len(data_buf) <= choice_index:
                                     data_buf.append([])
@@ -393,6 +409,8 @@ async def new_iterator_async() -> "AsyncIterator[ChatCompletionChunk]":
 
                     # OpenAI responses API
                     elif hasattr(x, "delta"):
+                        if start_time is not None and ttft is None:
+                            ttft = time.perf_counter() - start_time
                         if len(data_buf) == 0:
                             data_buf.append([])
                         data_buf[0].append(x.delta or "")
@@ -411,6 +429,10 @@ async def new_iterator_async() -> "AsyncIterator[ChatCompletionChunk]":
                 yield x
 
             with capture_internal_exceptions():
+                if ttft is not None:
+                    set_data_normalized(
+                        span, SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN, ttft
+                    )
                 if len(data_buf) > 0:
                     all_responses = ["".join(chunk) for chunk in data_buf]
                     if should_send_default_pii() and integration.include_prompts:
@@ -465,9 +487,10 @@ def _new_chat_completion_common(f: "Any", *args: "Any", **kwargs: "Any") -> "Any
 
     _set_input_data(span, kwargs, operation, integration)
 
+    start_time = time.perf_counter()
     response = yield f, args, kwargs
 
-    _set_output_data(span, response, kwargs, integration, finish_span=True)
+    _set_output_data(span, response, kwargs, integration, start_time, finish_span=True)
 
     return response
 
@@ -645,9 +668,10 @@ def _new_responses_create_common(f: "Any", *args: "Any", **kwargs: "Any") -> "An
 
     _set_input_data(span, kwargs, operation, integration)
 
+    start_time = time.perf_counter()
     response = yield f, args, kwargs
 
-    _set_output_data(span, response, kwargs, integration, finish_span=True)
+    _set_output_data(span, response, kwargs, integration, start_time, finish_span=True)
 
     return response
 
diff --git a/sentry_sdk/integrations/openai_agents/patches/models.py b/sentry_sdk/integrations/openai_agents/patches/models.py
index 5f18a859e2..9b57a55f1f 100644
--- a/sentry_sdk/integrations/openai_agents/patches/models.py
+++ b/sentry_sdk/integrations/openai_agents/patches/models.py
@@ -1,5 +1,5 @@
 import copy
-import sys
+import time
 from functools import wraps
 
 from sentry_sdk.integrations import DidNotEnable
@@ -149,8 +149,19 @@ async def wrapped_stream_response(*args: "Any", **kwargs: "Any") -> "Any":
                     span.set_data(SPANDATA.GEN_AI_RESPONSE_STREAMING, True)
 
                     streaming_response = None
+                    ttft_recorded = False
+                    # Capture start time locally to avoid race conditions with concurrent requests
+                    start_time = time.perf_counter()
 
                     async for event in original_stream_response(*args, **kwargs):
+                        # Detect first content token (text delta event)
+                        if not ttft_recorded and hasattr(event, "delta"):
+                            ttft = time.perf_counter() - start_time
+                            span.set_data(
+                                SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN, ttft
+                            )
+                            ttft_recorded = True
+
                         # Capture the full response from ResponseCompletedEvent
                         if hasattr(event, "response"):
                             streaming_response = event.response
diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py
index 814289c887..505b2bb561 100644
--- a/tests/integrations/openai/test_openai.py
+++ b/tests/integrations/openai/test_openai.py
@@ -1559,3 +1559,203 @@ def test_openai_message_truncation(sentry_init, capture_events):
             if SPANDATA.GEN_AI_REQUEST_MESSAGES in span_meta:
                 messages_meta = span_meta[SPANDATA.GEN_AI_REQUEST_MESSAGES]
                 assert "len" in messages_meta.get("", {})
+
+
+# noinspection PyTypeChecker
+def test_streaming_chat_completion_ttft(sentry_init, capture_events):
+    """
+    Test that streaming chat completions capture time-to-first-token (TTFT).
+    """
+    sentry_init(
+        integrations=[OpenAIIntegration()],
+        traces_sample_rate=1.0,
+    )
+    events = capture_events()
+
+    client = OpenAI(api_key="z")
+    returned_stream = Stream(cast_to=None, response=None, client=client)
+    returned_stream._iterator = [
+        ChatCompletionChunk(
+            id="1",
+            choices=[
+                DeltaChoice(
+                    index=0, delta=ChoiceDelta(content="Hello"), finish_reason=None
+                )
+            ],
+            created=100000,
+            model="model-id",
+            object="chat.completion.chunk",
+        ),
+        ChatCompletionChunk(
+            id="1",
+            choices=[
+                DeltaChoice(
+                    index=0, delta=ChoiceDelta(content=" world"), finish_reason="stop"
+                )
+            ],
+            created=100000,
+            model="model-id",
+            object="chat.completion.chunk",
+        ),
+    ]
+
+    client.chat.completions._post = mock.Mock(return_value=returned_stream)
+
+    with start_transaction(name="openai tx"):
+        response_stream = client.chat.completions.create(
+            model="some-model", messages=[{"role": "user", "content": "Say hello"}]
+        )
+        # Consume the stream
+        for _ in response_stream:
+            pass
+
+    (tx,) = events
+    span = tx["spans"][0]
+    assert span["op"] == "gen_ai.chat"
+
+    # Verify TTFT is captured
+    assert SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN in span["data"]
+    ttft = span["data"][SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN]
+    assert isinstance(ttft, float)
+    assert ttft > 0
+
+
+# noinspection PyTypeChecker
+@pytest.mark.asyncio
+async def test_streaming_chat_completion_ttft_async(sentry_init, capture_events):
+    """
+    Test that async streaming chat completions capture time-to-first-token (TTFT).
+    """
+    sentry_init(
+        integrations=[OpenAIIntegration()],
+        traces_sample_rate=1.0,
+    )
+    events = capture_events()
+
+    client = AsyncOpenAI(api_key="z")
+    returned_stream = AsyncStream(cast_to=None, response=None, client=client)
+    returned_stream._iterator = async_iterator(
+        [
+            ChatCompletionChunk(
+                id="1",
+                choices=[
+                    DeltaChoice(
+                        index=0, delta=ChoiceDelta(content="Hello"), finish_reason=None
+                    )
+                ],
+                created=100000,
+                model="model-id",
+                object="chat.completion.chunk",
+            ),
+            ChatCompletionChunk(
+                id="1",
+                choices=[
+                    DeltaChoice(
+                        index=0,
+                        delta=ChoiceDelta(content=" world"),
+                        finish_reason="stop",
+                    )
+                ],
+                created=100000,
+                model="model-id",
+                object="chat.completion.chunk",
+            ),
+        ]
+    )
+
+    client.chat.completions._post = AsyncMock(return_value=returned_stream)
+
+    with start_transaction(name="openai tx"):
+        response_stream = await client.chat.completions.create(
+            model="some-model", messages=[{"role": "user", "content": "Say hello"}]
+        )
+        # Consume the stream
+        async for _ in response_stream:
+            pass
+
+    (tx,) = events
+    span = tx["spans"][0]
+    assert span["op"] == "gen_ai.chat"
+
+    # Verify TTFT is captured
+    assert SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN in span["data"]
+    ttft = span["data"][SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN]
+    assert isinstance(ttft, float)
+    assert ttft > 0
+
+
+# noinspection PyTypeChecker
+@pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available")
+def test_streaming_responses_api_ttft(sentry_init, capture_events):
+    """
+    Test that streaming responses API captures time-to-first-token (TTFT).
+    """
+    sentry_init(
+        integrations=[OpenAIIntegration()],
+        traces_sample_rate=1.0,
+    )
+    events = capture_events()
+
+    client = OpenAI(api_key="z")
+    returned_stream = Stream(cast_to=None, response=None, client=client)
+    returned_stream._iterator = EXAMPLE_RESPONSES_STREAM
+    client.responses._post = mock.Mock(return_value=returned_stream)
+
+    with start_transaction(name="openai tx"):
+        response_stream = client.responses.create(
+            model="some-model",
+            input="hello",
+            stream=True,
+        )
+        # Consume the stream
+        for _ in response_stream:
+            pass
+
+    (tx,) = events
+    span = tx["spans"][0]
+    assert span["op"] == "gen_ai.responses"
+
+    # Verify TTFT is captured
+    assert SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN in span["data"]
+    ttft = span["data"][SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN]
+    assert isinstance(ttft, float)
+    assert ttft > 0
+
+
+# noinspection PyTypeChecker
+@pytest.mark.asyncio
+@pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available")
+async def test_streaming_responses_api_ttft_async(sentry_init, capture_events):
+    """
+    Test that async streaming responses API captures time-to-first-token (TTFT).
+    """
+    sentry_init(
+        integrations=[OpenAIIntegration()],
+        traces_sample_rate=1.0,
+    )
+    events = capture_events()
+
+    client = AsyncOpenAI(api_key="z")
+    returned_stream = AsyncStream(cast_to=None, response=None, client=client)
+    returned_stream._iterator = async_iterator(EXAMPLE_RESPONSES_STREAM)
+    client.responses._post = AsyncMock(return_value=returned_stream)
+
+    with start_transaction(name="openai tx"):
+        response_stream = await client.responses.create(
+            model="some-model",
+            input="hello",
+            stream=True,
+        )
+        # Consume the stream
+        async for _ in response_stream:
+            pass
+
+    (tx,) = events
+    span = tx["spans"][0]
+    assert span["op"] == "gen_ai.responses"
+
+    # Verify TTFT is captured
+    assert SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN in span["data"]
+    ttft = span["data"][SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN]
+    assert isinstance(ttft, float)
+    assert ttft > 0
diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py
index c66c53b2ef..81f4b54e1d 100644
--- a/tests/integrations/openai_agents/test_openai_agents.py
+++ b/tests/integrations/openai_agents/test_openai_agents.py
@@ -2202,3 +2202,100 @@ async def test_streaming_span_update_captures_response_data(
         assert span._data["gen_ai.usage.input_tokens"] == 10
         assert span._data["gen_ai.usage.output_tokens"] == 20
         assert span._data["gen_ai.response.model"] == "gpt-4-streaming"
+
+
+@pytest.mark.asyncio
+async def test_streaming_ttft_on_chat_span(sentry_init, test_agent):
+    """
+    Test that time-to-first-token (TTFT) is recorded on chat spans during streaming.
+
+    TTFT is triggered by events with a `delta` attribute, which includes:
+    - ResponseTextDeltaEvent (text output)
+    - ResponseAudioDeltaEvent (audio output)
+    - ResponseReasoningTextDeltaEvent (reasoning/thinking)
+    - ResponseFunctionCallArgumentsDeltaEvent (function call args)
+    - and other delta events...
+
+    Events WITHOUT delta (like ResponseCompletedEvent, ResponseCreatedEvent, etc.)
+    should NOT trigger TTFT.
+    """
+    from sentry_sdk.integrations.openai_agents.patches.models import (
+        _create_get_model_wrapper,
+    )
+
+    sentry_init(
+        integrations=[OpenAIAgentsIntegration()],
+        traces_sample_rate=1.0,
+    )
+
+    # Create a mock model with stream_response and get_response
+    class MockModel:
+        model = "gpt-4"
+
+        async def get_response(self, *args, **kwargs):
+            # Not used in this test, but required by the wrapper
+            pass
+
+        async def stream_response(self, *args, **kwargs):
+            # First event: ResponseCreatedEvent (no delta - should NOT trigger TTFT)
+            created_event = MagicMock(spec=["type", "sequence_number"])
+            created_event.type = "response.created"
+            yield created_event
+
+            # Simulate server-side processing delay before first token
+            await asyncio.sleep(0.05)  # 50ms delay
+
+            # Second event: ResponseTextDeltaEvent (HAS delta - triggers TTFT)
+            text_delta_event = MagicMock(spec=["delta", "type", "content_index"])
+            text_delta_event.delta = "Hello"
+            text_delta_event.type = "response.output_text.delta"
+            yield text_delta_event
+
+            # Third event: more text content (also has delta, but TTFT already recorded)
+            text_delta_event2 = MagicMock(spec=["delta", "type", "content_index"])
+            text_delta_event2.delta = " world!"
+            text_delta_event2.type = "response.output_text.delta"
+            yield text_delta_event2
+
+            # Final event: ResponseCompletedEvent (has response, no delta)
+            completed_event = MagicMock(spec=["response", "type", "sequence_number"])
+            completed_event.response = MagicMock()
+            completed_event.response.model = "gpt-4"
+            completed_event.response.usage = Usage(
+                requests=1,
+                input_tokens=10,
+                output_tokens=5,
+                total_tokens=15,
+            )
+            completed_event.response.output = []
+            yield completed_event
+
+    # Create a mock original _get_model that returns our mock model
+    def mock_get_model(agent, run_config):
+        return MockModel()
+
+    # Wrap it with our integration wrapper
+    wrapped_get_model = _create_get_model_wrapper(mock_get_model)
+
+    with sentry_sdk.start_transaction(name="test_ttft", sampled=True) as transaction:
+        # Get the wrapped model (this applies the stream_response wrapper)
+        wrapped_model = wrapped_get_model(None, test_agent, MagicMock())
+
+        # Call the wrapped stream_response and consume all events
+        async for _event in wrapped_model.stream_response():
+            pass
+
+        # Verify TTFT is recorded on the chat span (must be inside transaction context)
+        chat_spans = [
+            s for s in transaction._span_recorder.spans if s.op == "gen_ai.chat"
+        ]
+        assert len(chat_spans) >= 1
+        chat_span = chat_spans[0]
+
+        assert SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN in chat_span._data
+        ttft_value = chat_span._data[SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN]
+        # TTFT should be at least 40ms (our simulated delay minus some variance) but reasonable
+        assert 0.04 < ttft_value < 1.0, f"TTFT {ttft_value} should be around 50ms"
+
+        # Verify streaming flag is set
+        assert chat_span._data.get(SPANDATA.GEN_AI_RESPONSE_STREAMING) is True