mirror of
https://github.com/openai/codex.git
synced 2026-04-24 14:45:27 +00:00
convert to model provider
This commit is contained in:
@@ -444,7 +444,7 @@ Currently, `"vscode"` is the default, though Codex does not verify VS Code is in
|
||||
|
||||
## hide_agent_reasoning
|
||||
|
||||
Codex intermittently emits "reasoning" events that show the model’s internal "thinking" before it produces a final answer. Some users may find these events distracting, especially in CI logs or minimal terminal output.
|
||||
Codex intermittently emits "reasoning" events that show the model's internal "thinking" before it produces a final answer. Some users may find these events distracting, especially in CI logs or minimal terminal output.
|
||||
|
||||
Setting `hide_agent_reasoning` to `true` suppresses these events in **both** the TUI as well as the headless `exec` sub-command:
|
||||
|
||||
@@ -466,19 +466,33 @@ This is analogous to `model_context_window`, but for the maximum number of outpu
|
||||
|
||||
Maximum number of bytes to read from an `AGENTS.md` file to include in the instructions sent with the first turn of a session. Defaults to 32 KiB.
|
||||
|
||||
## openai_request_max_retries
|
||||
## Per-provider network tuning
|
||||
|
||||
The following optional settings control retry behaviour and streaming idle timeouts **per model provider**. They must be specified inside the corresponding `[model_providers.<id>]` block in `config.toml`. (Older releases accepted top‑level keys; those are now ignored.)
|
||||
|
||||
Example:
|
||||
|
||||
```toml
|
||||
[model_providers.openai]
|
||||
name = "OpenAI"
|
||||
base_url = "https://api.openai.com/v1"
|
||||
env_key = "OPENAI_API_KEY"
|
||||
# network tuning overrides (all optional; falls back to built‑in defaults)
|
||||
openai_request_max_retries = 4 # retry failed HTTP requests
|
||||
openai_stream_max_retries = 10 # retry dropped SSE streams
|
||||
openai_stream_idle_timeout_ms = 300000 # 5m idle timeout
|
||||
```
|
||||
|
||||
### openai_request_max_retries
|
||||
How many times Codex will retry a failed HTTP request to the model provider. Defaults to `4`.
|
||||
|
||||
## openai_stream_max_retries
|
||||
|
||||
### openai_stream_max_retries
|
||||
Number of times Codex will attempt to reconnect when a streaming response is interrupted. Defaults to `10`.
|
||||
|
||||
## openai_stream_idle_timeout_ms
|
||||
|
||||
### openai_stream_idle_timeout_ms
|
||||
How long Codex will wait for activity on a streaming response before treating the connection as lost. Defaults to `300000` (5 minutes).
|
||||
|
||||
## tui
|
||||
### tui
|
||||
|
||||
Options that are specific to the TUI.
|
||||
|
||||
|
||||
@@ -33,7 +33,7 @@ pub(crate) async fn stream_chat_completions(
|
||||
model: &str,
|
||||
client: &reqwest::Client,
|
||||
provider: &ModelProviderInfo,
|
||||
config: &Config,
|
||||
_config: &Config,
|
||||
) -> Result<ResponseStream> {
|
||||
// Build messages array
|
||||
let mut messages = Vec::<serde_json::Value>::new();
|
||||
@@ -121,6 +121,7 @@ pub(crate) async fn stream_chat_completions(
|
||||
);
|
||||
|
||||
let mut attempt = 0;
|
||||
let max_retries = provider.request_max_retries();
|
||||
loop {
|
||||
attempt += 1;
|
||||
|
||||
@@ -139,7 +140,7 @@ pub(crate) async fn stream_chat_completions(
|
||||
tokio::spawn(process_chat_sse(
|
||||
stream,
|
||||
tx_event,
|
||||
config.openai_stream_idle_timeout_ms,
|
||||
provider.stream_idle_timeout(),
|
||||
));
|
||||
return Ok(ResponseStream { rx_event });
|
||||
}
|
||||
@@ -150,7 +151,7 @@ pub(crate) async fn stream_chat_completions(
|
||||
return Err(CodexErr::UnexpectedStatus(status, body));
|
||||
}
|
||||
|
||||
if attempt > config.openai_request_max_retries {
|
||||
if attempt > max_retries {
|
||||
return Err(CodexErr::RetryLimit(status));
|
||||
}
|
||||
|
||||
@@ -166,7 +167,7 @@ pub(crate) async fn stream_chat_completions(
|
||||
tokio::time::sleep(delay).await;
|
||||
}
|
||||
Err(e) => {
|
||||
if attempt > config.openai_request_max_retries {
|
||||
if attempt > max_retries {
|
||||
return Err(e.into());
|
||||
}
|
||||
let delay = backoff(attempt);
|
||||
|
||||
@@ -29,7 +29,7 @@ use crate::config_types::ReasoningSummary as ReasoningSummaryConfig;
|
||||
use crate::error::CodexErr;
|
||||
use crate::error::Result;
|
||||
use crate::flags::CODEX_RS_SSE_FIXTURE;
|
||||
use crate::flags::OPENAI_STREAM_IDLE_TIMEOUT_MS; // retained for default config
|
||||
use crate::flags::OPENAI_STREAM_IDLE_TIMEOUT_MS;
|
||||
use crate::model_provider_info::ModelProviderInfo;
|
||||
use crate::model_provider_info::WireApi;
|
||||
use crate::models::ResponseItem;
|
||||
@@ -140,6 +140,7 @@ impl ModelClient {
|
||||
);
|
||||
|
||||
let mut attempt = 0;
|
||||
let max_retries = self.provider.request_max_retries();
|
||||
loop {
|
||||
attempt += 1;
|
||||
|
||||
@@ -160,7 +161,7 @@ impl ModelClient {
|
||||
tokio::spawn(process_sse(
|
||||
stream,
|
||||
tx_event,
|
||||
self.config.openai_stream_idle_timeout_ms,
|
||||
self.provider.stream_idle_timeout(),
|
||||
));
|
||||
|
||||
return Ok(ResponseStream { rx_event });
|
||||
@@ -180,7 +181,7 @@ impl ModelClient {
|
||||
return Err(CodexErr::UnexpectedStatus(status, body));
|
||||
}
|
||||
|
||||
if attempt > self.config.openai_request_max_retries {
|
||||
if attempt > max_retries {
|
||||
return Err(CodexErr::RetryLimit(status));
|
||||
}
|
||||
|
||||
@@ -197,7 +198,7 @@ impl ModelClient {
|
||||
tokio::time::sleep(delay).await;
|
||||
}
|
||||
Err(e) => {
|
||||
if attempt > self.config.openai_request_max_retries {
|
||||
if attempt > max_retries {
|
||||
return Err(e.into());
|
||||
}
|
||||
let delay = backoff(attempt);
|
||||
@@ -327,7 +328,7 @@ async fn process_sse<S>(
|
||||
// duplicated `output` array embedded in the `response.completed`
|
||||
// payload. That produced two concrete issues:
|
||||
// 1. No real‑time streaming – the user only saw output after the
|
||||
// entire turn had finished, which broke the “typing” UX and
|
||||
// entire turn had finished, which broke the "typing" UX and
|
||||
// made long‑running turns look stalled.
|
||||
// 2. Duplicate `function_call_output` items – both the
|
||||
// individual *and* the completed array were forwarded, which
|
||||
|
||||
@@ -1025,12 +1025,14 @@ async fn run_turn(
|
||||
Err(CodexErr::Interrupted) => return Err(CodexErr::Interrupted),
|
||||
Err(CodexErr::EnvVar(var)) => return Err(CodexErr::EnvVar(var)),
|
||||
Err(e) => {
|
||||
if retries < sess.client.config().openai_stream_max_retries {
|
||||
// Use the configured provider-specific stream retry budget.
|
||||
let max_retries = sess.client.config().model_provider.stream_max_retries();
|
||||
if retries < max_retries {
|
||||
retries += 1;
|
||||
let delay = backoff(retries);
|
||||
warn!(
|
||||
"stream disconnected - retrying turn ({retries}/{} in {delay:?})...",
|
||||
sess.client.config().openai_stream_max_retries
|
||||
max_retries
|
||||
);
|
||||
|
||||
// Surface retry information to any UI/front‑end so the
|
||||
@@ -1040,8 +1042,7 @@ async fn run_turn(
|
||||
&sub_id,
|
||||
format!(
|
||||
"stream error: {e}; retrying {retries}/{} in {:?}…",
|
||||
sess.client.config().openai_stream_max_retries,
|
||||
delay
|
||||
max_retries, delay
|
||||
),
|
||||
)
|
||||
.await;
|
||||
|
||||
@@ -10,9 +10,6 @@ use crate::config_types::ShellEnvironmentPolicyToml;
|
||||
use crate::config_types::Tui;
|
||||
use crate::config_types::UriBasedFileOpener;
|
||||
use crate::flags::OPENAI_DEFAULT_MODEL;
|
||||
use crate::flags::OPENAI_REQUEST_MAX_RETRIES;
|
||||
use crate::flags::OPENAI_STREAM_IDLE_TIMEOUT_MS;
|
||||
use crate::flags::OPENAI_STREAM_MAX_RETRIES;
|
||||
use crate::model_provider_info::ModelProviderInfo;
|
||||
use crate::model_provider_info::built_in_model_providers;
|
||||
use crate::openai_model_info::get_model_info;
|
||||
@@ -23,7 +20,6 @@ use serde::Deserialize;
|
||||
use std::collections::HashMap;
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
use std::time::Duration;
|
||||
use toml::Value as TomlValue;
|
||||
|
||||
/// Maximum number of bytes of the documentation that will be embedded. Larger
|
||||
@@ -141,15 +137,6 @@ pub struct Config {
|
||||
|
||||
/// Base URL for requests to ChatGPT (as opposed to the OpenAI API).
|
||||
pub chatgpt_base_url: String,
|
||||
|
||||
/// Maximum number of retries for failed HTTP requests to the model provider.
|
||||
pub openai_request_max_retries: u64,
|
||||
|
||||
/// Maximum number of retries for a dropped SSE stream.
|
||||
pub openai_stream_max_retries: u64,
|
||||
|
||||
/// Idle timeout for streaming responses.
|
||||
pub openai_stream_idle_timeout_ms: Duration,
|
||||
}
|
||||
|
||||
impl Config {
|
||||
@@ -334,10 +321,6 @@ pub struct ConfigToml {
|
||||
|
||||
/// Base URL for requests to ChatGPT (as opposed to the OpenAI API).
|
||||
pub chatgpt_base_url: Option<String>,
|
||||
|
||||
pub openai_request_max_retries: Option<u64>,
|
||||
pub openai_stream_max_retries: Option<u64>,
|
||||
pub openai_stream_idle_timeout_ms: Option<u64>,
|
||||
}
|
||||
|
||||
impl ConfigToml {
|
||||
@@ -511,17 +494,6 @@ impl Config {
|
||||
.chatgpt_base_url
|
||||
.or(cfg.chatgpt_base_url)
|
||||
.unwrap_or("https://chatgpt.com/backend-api/".to_string()),
|
||||
|
||||
openai_request_max_retries: cfg
|
||||
.openai_request_max_retries
|
||||
.unwrap_or(*OPENAI_REQUEST_MAX_RETRIES),
|
||||
openai_stream_max_retries: cfg
|
||||
.openai_stream_max_retries
|
||||
.unwrap_or(*OPENAI_STREAM_MAX_RETRIES),
|
||||
openai_stream_idle_timeout_ms: cfg
|
||||
.openai_stream_idle_timeout_ms
|
||||
.map(Duration::from_millis)
|
||||
.unwrap_or(*OPENAI_STREAM_IDLE_TIMEOUT_MS),
|
||||
};
|
||||
Ok(config)
|
||||
}
|
||||
@@ -750,6 +722,9 @@ disable_response_storage = true
|
||||
query_params: None,
|
||||
http_headers: None,
|
||||
env_http_headers: None,
|
||||
openai_request_max_retries: None,
|
||||
openai_stream_max_retries: None,
|
||||
openai_stream_idle_timeout_ms: None,
|
||||
};
|
||||
let model_provider_map = {
|
||||
let mut model_provider_map = built_in_model_providers();
|
||||
@@ -828,9 +803,6 @@ disable_response_storage = true
|
||||
model_reasoning_summary: ReasoningSummary::Detailed,
|
||||
model_supports_reasoning_summaries: false,
|
||||
chatgpt_base_url: "https://chatgpt.com/backend-api/".to_string(),
|
||||
openai_request_max_retries: *OPENAI_REQUEST_MAX_RETRIES,
|
||||
openai_stream_max_retries: *OPENAI_STREAM_MAX_RETRIES,
|
||||
openai_stream_idle_timeout_ms: *OPENAI_STREAM_IDLE_TIMEOUT_MS,
|
||||
},
|
||||
o3_profile_config
|
||||
);
|
||||
@@ -877,9 +849,6 @@ disable_response_storage = true
|
||||
model_reasoning_summary: ReasoningSummary::default(),
|
||||
model_supports_reasoning_summaries: false,
|
||||
chatgpt_base_url: "https://chatgpt.com/backend-api/".to_string(),
|
||||
openai_request_max_retries: *OPENAI_REQUEST_MAX_RETRIES,
|
||||
openai_stream_max_retries: *OPENAI_STREAM_MAX_RETRIES,
|
||||
openai_stream_idle_timeout_ms: *OPENAI_STREAM_IDLE_TIMEOUT_MS,
|
||||
};
|
||||
|
||||
assert_eq!(expected_gpt3_profile_config, gpt3_profile_config);
|
||||
@@ -941,9 +910,6 @@ disable_response_storage = true
|
||||
model_reasoning_summary: ReasoningSummary::default(),
|
||||
model_supports_reasoning_summaries: false,
|
||||
chatgpt_base_url: "https://chatgpt.com/backend-api/".to_string(),
|
||||
openai_request_max_retries: *OPENAI_REQUEST_MAX_RETRIES,
|
||||
openai_stream_max_retries: *OPENAI_STREAM_MAX_RETRIES,
|
||||
openai_stream_idle_timeout_ms: *OPENAI_STREAM_IDLE_TIMEOUT_MS,
|
||||
};
|
||||
|
||||
assert_eq!(expected_zdr_profile_config, zdr_profile_config);
|
||||
|
||||
@@ -9,8 +9,12 @@ use serde::Deserialize;
|
||||
use serde::Serialize;
|
||||
use std::collections::HashMap;
|
||||
use std::env::VarError;
|
||||
use std::time::Duration;
|
||||
|
||||
use crate::error::EnvVarError;
|
||||
use crate::flags::OPENAI_REQUEST_MAX_RETRIES;
|
||||
use crate::flags::OPENAI_STREAM_IDLE_TIMEOUT_MS;
|
||||
use crate::flags::OPENAI_STREAM_MAX_RETRIES;
|
||||
use crate::openai_api_key::get_openai_api_key;
|
||||
|
||||
/// Value for the `OpenAI-Originator` header that is sent with requests to
|
||||
@@ -26,7 +30,7 @@ const OPENAI_ORIGINATOR_HEADER: &str = "codex_cli_rs";
|
||||
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "lowercase")]
|
||||
pub enum WireApi {
|
||||
/// The experimental “Responses” API exposed by OpenAI at `/v1/responses`.
|
||||
/// The experimental "Responses" API exposed by OpenAI at `/v1/responses`.
|
||||
Responses,
|
||||
|
||||
/// Regular Chat Completions compatible with `/v1/chat/completions`.
|
||||
@@ -64,6 +68,21 @@ pub struct ModelProviderInfo {
|
||||
/// value should be used. If the environment variable is not set, or the
|
||||
/// value is empty, the header will not be included in the request.
|
||||
pub env_http_headers: Option<HashMap<String, String>>,
|
||||
|
||||
/// Maximum number of times to retry a failed HTTP request to this provider.
|
||||
/// When `None`, falls back to the global default from `OPENAI_REQUEST_MAX_RETRIES` (currently 4).
|
||||
#[serde(default)]
|
||||
pub openai_request_max_retries: Option<u64>,
|
||||
|
||||
/// Number of times to retry reconnecting a dropped streaming response before failing.
|
||||
/// When `None`, falls back to `OPENAI_STREAM_MAX_RETRIES` (currently 10).
|
||||
#[serde(default)]
|
||||
pub openai_stream_max_retries: Option<u64>,
|
||||
|
||||
/// Idle timeout (in milliseconds) to wait for activity on a streaming response before treating
|
||||
/// the connection as lost. When `None`, falls back to `OPENAI_STREAM_IDLE_TIMEOUT_MS` (currently 5m).
|
||||
#[serde(default)]
|
||||
pub openai_stream_idle_timeout_ms: Option<u64>,
|
||||
}
|
||||
|
||||
impl ModelProviderInfo {
|
||||
@@ -161,6 +180,25 @@ impl ModelProviderInfo {
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
/// Effective maximum number of request retries for this provider.
|
||||
pub fn request_max_retries(&self) -> u64 {
|
||||
self.openai_request_max_retries
|
||||
.unwrap_or(*OPENAI_REQUEST_MAX_RETRIES)
|
||||
}
|
||||
|
||||
/// Effective maximum number of stream reconnection attempts for this provider.
|
||||
pub fn stream_max_retries(&self) -> u64 {
|
||||
self.openai_stream_max_retries
|
||||
.unwrap_or(*OPENAI_STREAM_MAX_RETRIES)
|
||||
}
|
||||
|
||||
/// Effective idle timeout for streaming responses.
|
||||
pub fn stream_idle_timeout(&self) -> Duration {
|
||||
self.openai_stream_idle_timeout_ms
|
||||
.map(Duration::from_millis)
|
||||
.unwrap_or(*OPENAI_STREAM_IDLE_TIMEOUT_MS)
|
||||
}
|
||||
}
|
||||
|
||||
/// Built-in default provider list.
|
||||
@@ -205,6 +243,10 @@ pub fn built_in_model_providers() -> HashMap<String, ModelProviderInfo> {
|
||||
.into_iter()
|
||||
.collect(),
|
||||
),
|
||||
// Use global defaults for retry/timeout unless overridden in config.toml.
|
||||
openai_request_max_retries: None,
|
||||
openai_stream_max_retries: None,
|
||||
openai_stream_idle_timeout_ms: None,
|
||||
},
|
||||
),
|
||||
]
|
||||
@@ -234,6 +276,9 @@ base_url = "http://localhost:11434/v1"
|
||||
query_params: None,
|
||||
http_headers: None,
|
||||
env_http_headers: None,
|
||||
openai_request_max_retries: None,
|
||||
openai_stream_max_retries: None,
|
||||
openai_stream_idle_timeout_ms: None,
|
||||
};
|
||||
|
||||
let provider: ModelProviderInfo = toml::from_str(azure_provider_toml).unwrap();
|
||||
@@ -259,6 +304,9 @@ query_params = { api-version = "2025-04-01-preview" }
|
||||
}),
|
||||
http_headers: None,
|
||||
env_http_headers: None,
|
||||
openai_request_max_retries: None,
|
||||
openai_stream_max_retries: None,
|
||||
openai_stream_idle_timeout_ms: None,
|
||||
};
|
||||
|
||||
let provider: ModelProviderInfo = toml::from_str(azure_provider_toml).unwrap();
|
||||
@@ -287,6 +335,9 @@ env_http_headers = { "X-Example-Env-Header" = "EXAMPLE_ENV_VAR" }
|
||||
env_http_headers: Some(maplit::hashmap! {
|
||||
"X-Example-Env-Header".to_string() => "EXAMPLE_ENV_VAR".to_string(),
|
||||
}),
|
||||
openai_request_max_retries: None,
|
||||
openai_stream_max_retries: None,
|
||||
openai_stream_idle_timeout_ms: None,
|
||||
};
|
||||
|
||||
let provider: ModelProviderInfo = toml::from_str(azure_provider_toml).unwrap();
|
||||
|
||||
@@ -46,9 +46,8 @@ async fn spawn_codex() -> Result<Codex, CodexErr> {
|
||||
);
|
||||
|
||||
let codex_home = TempDir::new().unwrap();
|
||||
let mut config = load_default_config_for_test(&codex_home);
|
||||
config.openai_request_max_retries = 2;
|
||||
config.openai_stream_max_retries = 2;
|
||||
let config = load_default_config_for_test(&codex_home);
|
||||
// network retry/timeout tuning moved into ModelProviderInfo; using defaults
|
||||
let (agent, _init_id) = Codex::spawn(config, std::sync::Arc::new(Notify::new())).await?;
|
||||
|
||||
Ok(agent)
|
||||
@@ -67,7 +66,7 @@ async fn live_streaming_and_prev_id_reset() {
|
||||
|
||||
let codex = spawn_codex().await.unwrap();
|
||||
|
||||
// ---------- Task 1 ----------
|
||||
// ---------- Task 1 ----------
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![InputItem::Text {
|
||||
@@ -101,7 +100,7 @@ async fn live_streaming_and_prev_id_reset() {
|
||||
"Agent did not stream any AgentMessage before TaskComplete"
|
||||
);
|
||||
|
||||
// ---------- Task 2 (same session) ----------
|
||||
// ---------- Task 2 (same session) ----------
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![InputItem::Text {
|
||||
|
||||
@@ -102,13 +102,15 @@ async fn keeps_previous_response_id_between_tasks() {
|
||||
query_params: None,
|
||||
http_headers: None,
|
||||
env_http_headers: None,
|
||||
// disable retries so we don't get duplicate calls in this test
|
||||
openai_request_max_retries: Some(0),
|
||||
openai_stream_max_retries: Some(0),
|
||||
openai_stream_idle_timeout_ms: None,
|
||||
};
|
||||
|
||||
// Init session
|
||||
let codex_home = TempDir::new().unwrap();
|
||||
let mut config = load_default_config_for_test(&codex_home);
|
||||
config.openai_request_max_retries = 0;
|
||||
config.openai_stream_max_retries = 0;
|
||||
config.model_provider = model_provider;
|
||||
let ctrl_c = std::sync::Arc::new(tokio::sync::Notify::new());
|
||||
let (codex, _init_id) = Codex::spawn(config, ctrl_c.clone()).await.unwrap();
|
||||
|
||||
@@ -87,14 +87,15 @@ async fn retries_on_early_close() {
|
||||
query_params: None,
|
||||
http_headers: None,
|
||||
env_http_headers: None,
|
||||
// exercise retry path: first attempt yields incomplete stream, so allow 1 retry
|
||||
openai_request_max_retries: Some(0),
|
||||
openai_stream_max_retries: Some(1),
|
||||
openai_stream_idle_timeout_ms: Some(2000),
|
||||
};
|
||||
|
||||
let ctrl_c = std::sync::Arc::new(tokio::sync::Notify::new());
|
||||
let codex_home = TempDir::new().unwrap();
|
||||
let mut config = load_default_config_for_test(&codex_home);
|
||||
config.openai_request_max_retries = 0;
|
||||
config.openai_stream_max_retries = 1;
|
||||
config.openai_stream_idle_timeout_ms = Duration::from_millis(2000);
|
||||
config.model_provider = model_provider;
|
||||
let (codex, _init_id) = Codex::spawn(config, ctrl_c).await.unwrap();
|
||||
|
||||
|
||||
Reference in New Issue
Block a user