diff --git a/core/llm/index.ts b/core/llm/index.ts index ceea1153dcd..9097525b81b 100644 --- a/core/llm/index.ts +++ b/core/llm/index.ts @@ -1037,7 +1037,8 @@ export abstract class BaseLLM implements ILLM { return ( this.providerName === "openai" && typeof (this as any)._streamResponses === "function" && - (this as any).isOSeriesOrGpt5Model(options.model) + (this as any).isOSeriesOrGpt5Model(options.model) && + this._llmOptions.useLegacyCompletionsEndpoint !== true ); } diff --git a/docs/reference.mdx b/docs/reference.mdx index 0e8cef73735..d7c28208b66 100644 --- a/docs/reference.mdx +++ b/docs/reference.mdx @@ -110,6 +110,7 @@ The `models` section defines the language models used in your configuration. Mod - `stop`: An array of stop tokens that will terminate the completion. - `reasoning`: Boolean to enable thinking/reasoning for Anthropic Claude 3.7+ and some Ollama models. - `reasoningBudgetTokens`: Budget tokens for thinking/reasoning in Anthropic Claude 3.7+ models. + - `stream`: Boolean to enable/disable streaming for the model. - `requestOptions`: HTTP request options specific to the model.