diff --git a/core/llm/index.ts b/core/llm/index.ts
index ceea1153dcd..9097525b81b 100644
--- a/core/llm/index.ts
+++ b/core/llm/index.ts
@@ -1037,7 +1037,8 @@ export abstract class BaseLLM implements ILLM {
     return (
       this.providerName === "openai" &&
       typeof (this as any)._streamResponses === "function" &&
-      (this as any).isOSeriesOrGpt5Model(options.model)
+      (this as any).isOSeriesOrGpt5Model(options.model) &&
+      this._llmOptions.useLegacyCompletionsEndpoint !== true
     );
   }
 
diff --git a/docs/reference.mdx b/docs/reference.mdx
index 0e8cef73735..d7c28208b66 100644
--- a/docs/reference.mdx
+++ b/docs/reference.mdx
@@ -110,6 +110,7 @@ The `models` section defines the language models used in your configuration. Mod
   - `stop`: An array of stop tokens that will terminate the completion.
   - `reasoning`: Boolean to enable thinking/reasoning for Anthropic Claude 3.7+ and some Ollama models.
   - `reasoningBudgetTokens`: Budget tokens for thinking/reasoning in Anthropic Claude 3.7+ models.
+  - `stream`: Boolean to enable/disable streaming for the model.
 
 - `requestOptions`: HTTP request options specific to the model.