convert to model provider

This commit is contained in:
Ahmed Ibrahim
2025-07-17 12:38:50 -07:00
parent 79825c08f1
commit 2bb8d37b12
9 changed files with 104 additions and 68 deletions

View File

@@ -444,7 +444,7 @@ Currently, `"vscode"` is the default, though Codex does not verify VS Code is in
## hide_agent_reasoning
Codex intermittently emits "reasoning" events that show the models internal "thinking" before it produces a final answer. Some users may find these events distracting, especially in CI logs or minimal terminal output.
Codex intermittently emits "reasoning" events that show the model's internal "thinking" before it produces a final answer. Some users may find these events distracting, especially in CI logs or minimal terminal output.
Setting `hide_agent_reasoning` to `true` suppresses these events in **both** the TUI as well as the headless `exec` sub-command:
@@ -466,19 +466,33 @@ This is analogous to `model_context_window`, but for the maximum number of outpu
Maximum number of bytes to read from an `AGENTS.md` file to include in the instructions sent with the first turn of a session. Defaults to 32 KiB.
## openai_request_max_retries
## Per-provider network tuning
The following optional settings control retry behaviour and streaming idle timeouts **per model provider**. They must be specified inside the corresponding `[model_providers.<id>]` block in `config.toml`. (Older releases accepted toplevel keys; those are now ignored.)
Example:
```toml
[model_providers.openai]
name = "OpenAI"
base_url = "https://api.openai.com/v1"
env_key = "OPENAI_API_KEY"
# network tuning overrides (all optional; falls back to builtin defaults)
openai_request_max_retries = 4 # retry failed HTTP requests
openai_stream_max_retries = 10 # retry dropped SSE streams
openai_stream_idle_timeout_ms = 300000 # 5m idle timeout
```
### openai_request_max_retries
How many times Codex will retry a failed HTTP request to the model provider. Defaults to `4`.
## openai_stream_max_retries
### openai_stream_max_retries
Number of times Codex will attempt to reconnect when a streaming response is interrupted. Defaults to `10`.
## openai_stream_idle_timeout_ms
### openai_stream_idle_timeout_ms
How long Codex will wait for activity on a streaming response before treating the connection as lost. Defaults to `300000` (5 minutes).
## tui
### tui
Options that are specific to the TUI.

View File

@@ -33,7 +33,7 @@ pub(crate) async fn stream_chat_completions(
model: &str,
client: &reqwest::Client,
provider: &ModelProviderInfo,
config: &Config,
_config: &Config,
) -> Result<ResponseStream> {
// Build messages array
let mut messages = Vec::<serde_json::Value>::new();
@@ -121,6 +121,7 @@ pub(crate) async fn stream_chat_completions(
);
let mut attempt = 0;
let max_retries = provider.request_max_retries();
loop {
attempt += 1;
@@ -139,7 +140,7 @@ pub(crate) async fn stream_chat_completions(
tokio::spawn(process_chat_sse(
stream,
tx_event,
config.openai_stream_idle_timeout_ms,
provider.stream_idle_timeout(),
));
return Ok(ResponseStream { rx_event });
}
@@ -150,7 +151,7 @@ pub(crate) async fn stream_chat_completions(
return Err(CodexErr::UnexpectedStatus(status, body));
}
if attempt > config.openai_request_max_retries {
if attempt > max_retries {
return Err(CodexErr::RetryLimit(status));
}
@@ -166,7 +167,7 @@ pub(crate) async fn stream_chat_completions(
tokio::time::sleep(delay).await;
}
Err(e) => {
if attempt > config.openai_request_max_retries {
if attempt > max_retries {
return Err(e.into());
}
let delay = backoff(attempt);

View File

@@ -29,7 +29,7 @@ use crate::config_types::ReasoningSummary as ReasoningSummaryConfig;
use crate::error::CodexErr;
use crate::error::Result;
use crate::flags::CODEX_RS_SSE_FIXTURE;
use crate::flags::OPENAI_STREAM_IDLE_TIMEOUT_MS; // retained for default config
use crate::flags::OPENAI_STREAM_IDLE_TIMEOUT_MS;
use crate::model_provider_info::ModelProviderInfo;
use crate::model_provider_info::WireApi;
use crate::models::ResponseItem;
@@ -140,6 +140,7 @@ impl ModelClient {
);
let mut attempt = 0;
let max_retries = self.provider.request_max_retries();
loop {
attempt += 1;
@@ -160,7 +161,7 @@ impl ModelClient {
tokio::spawn(process_sse(
stream,
tx_event,
self.config.openai_stream_idle_timeout_ms,
self.provider.stream_idle_timeout(),
));
return Ok(ResponseStream { rx_event });
@@ -180,7 +181,7 @@ impl ModelClient {
return Err(CodexErr::UnexpectedStatus(status, body));
}
if attempt > self.config.openai_request_max_retries {
if attempt > max_retries {
return Err(CodexErr::RetryLimit(status));
}
@@ -197,7 +198,7 @@ impl ModelClient {
tokio::time::sleep(delay).await;
}
Err(e) => {
if attempt > self.config.openai_request_max_retries {
if attempt > max_retries {
return Err(e.into());
}
let delay = backoff(attempt);
@@ -327,7 +328,7 @@ async fn process_sse<S>(
// duplicated `output` array embedded in the `response.completed`
// payload. That produced two concrete issues:
// 1. No realtime streaming the user only saw output after the
// entire turn had finished, which broke the typing UX and
// entire turn had finished, which broke the "typing" UX and
// made longrunning turns look stalled.
// 2. Duplicate `function_call_output` items both the
// individual *and* the completed array were forwarded, which

View File

@@ -1025,12 +1025,14 @@ async fn run_turn(
Err(CodexErr::Interrupted) => return Err(CodexErr::Interrupted),
Err(CodexErr::EnvVar(var)) => return Err(CodexErr::EnvVar(var)),
Err(e) => {
if retries < sess.client.config().openai_stream_max_retries {
// Use the configured provider-specific stream retry budget.
let max_retries = sess.client.config().model_provider.stream_max_retries();
if retries < max_retries {
retries += 1;
let delay = backoff(retries);
warn!(
"stream disconnected - retrying turn ({retries}/{} in {delay:?})...",
sess.client.config().openai_stream_max_retries
max_retries
);
// Surface retry information to any UI/frontend so the
@@ -1040,8 +1042,7 @@ async fn run_turn(
&sub_id,
format!(
"stream error: {e}; retrying {retries}/{} in {:?}",
sess.client.config().openai_stream_max_retries,
delay
max_retries, delay
),
)
.await;

View File

@@ -10,9 +10,6 @@ use crate::config_types::ShellEnvironmentPolicyToml;
use crate::config_types::Tui;
use crate::config_types::UriBasedFileOpener;
use crate::flags::OPENAI_DEFAULT_MODEL;
use crate::flags::OPENAI_REQUEST_MAX_RETRIES;
use crate::flags::OPENAI_STREAM_IDLE_TIMEOUT_MS;
use crate::flags::OPENAI_STREAM_MAX_RETRIES;
use crate::model_provider_info::ModelProviderInfo;
use crate::model_provider_info::built_in_model_providers;
use crate::openai_model_info::get_model_info;
@@ -23,7 +20,6 @@ use serde::Deserialize;
use std::collections::HashMap;
use std::path::Path;
use std::path::PathBuf;
use std::time::Duration;
use toml::Value as TomlValue;
/// Maximum number of bytes of the documentation that will be embedded. Larger
@@ -141,15 +137,6 @@ pub struct Config {
/// Base URL for requests to ChatGPT (as opposed to the OpenAI API).
pub chatgpt_base_url: String,
/// Maximum number of retries for failed HTTP requests to the model provider.
pub openai_request_max_retries: u64,
/// Maximum number of retries for a dropped SSE stream.
pub openai_stream_max_retries: u64,
/// Idle timeout for streaming responses.
pub openai_stream_idle_timeout_ms: Duration,
}
impl Config {
@@ -334,10 +321,6 @@ pub struct ConfigToml {
/// Base URL for requests to ChatGPT (as opposed to the OpenAI API).
pub chatgpt_base_url: Option<String>,
pub openai_request_max_retries: Option<u64>,
pub openai_stream_max_retries: Option<u64>,
pub openai_stream_idle_timeout_ms: Option<u64>,
}
impl ConfigToml {
@@ -511,17 +494,6 @@ impl Config {
.chatgpt_base_url
.or(cfg.chatgpt_base_url)
.unwrap_or("https://chatgpt.com/backend-api/".to_string()),
openai_request_max_retries: cfg
.openai_request_max_retries
.unwrap_or(*OPENAI_REQUEST_MAX_RETRIES),
openai_stream_max_retries: cfg
.openai_stream_max_retries
.unwrap_or(*OPENAI_STREAM_MAX_RETRIES),
openai_stream_idle_timeout_ms: cfg
.openai_stream_idle_timeout_ms
.map(Duration::from_millis)
.unwrap_or(*OPENAI_STREAM_IDLE_TIMEOUT_MS),
};
Ok(config)
}
@@ -750,6 +722,9 @@ disable_response_storage = true
query_params: None,
http_headers: None,
env_http_headers: None,
openai_request_max_retries: None,
openai_stream_max_retries: None,
openai_stream_idle_timeout_ms: None,
};
let model_provider_map = {
let mut model_provider_map = built_in_model_providers();
@@ -828,9 +803,6 @@ disable_response_storage = true
model_reasoning_summary: ReasoningSummary::Detailed,
model_supports_reasoning_summaries: false,
chatgpt_base_url: "https://chatgpt.com/backend-api/".to_string(),
openai_request_max_retries: *OPENAI_REQUEST_MAX_RETRIES,
openai_stream_max_retries: *OPENAI_STREAM_MAX_RETRIES,
openai_stream_idle_timeout_ms: *OPENAI_STREAM_IDLE_TIMEOUT_MS,
},
o3_profile_config
);
@@ -877,9 +849,6 @@ disable_response_storage = true
model_reasoning_summary: ReasoningSummary::default(),
model_supports_reasoning_summaries: false,
chatgpt_base_url: "https://chatgpt.com/backend-api/".to_string(),
openai_request_max_retries: *OPENAI_REQUEST_MAX_RETRIES,
openai_stream_max_retries: *OPENAI_STREAM_MAX_RETRIES,
openai_stream_idle_timeout_ms: *OPENAI_STREAM_IDLE_TIMEOUT_MS,
};
assert_eq!(expected_gpt3_profile_config, gpt3_profile_config);
@@ -941,9 +910,6 @@ disable_response_storage = true
model_reasoning_summary: ReasoningSummary::default(),
model_supports_reasoning_summaries: false,
chatgpt_base_url: "https://chatgpt.com/backend-api/".to_string(),
openai_request_max_retries: *OPENAI_REQUEST_MAX_RETRIES,
openai_stream_max_retries: *OPENAI_STREAM_MAX_RETRIES,
openai_stream_idle_timeout_ms: *OPENAI_STREAM_IDLE_TIMEOUT_MS,
};
assert_eq!(expected_zdr_profile_config, zdr_profile_config);

View File

@@ -9,8 +9,12 @@ use serde::Deserialize;
use serde::Serialize;
use std::collections::HashMap;
use std::env::VarError;
use std::time::Duration;
use crate::error::EnvVarError;
use crate::flags::OPENAI_REQUEST_MAX_RETRIES;
use crate::flags::OPENAI_STREAM_IDLE_TIMEOUT_MS;
use crate::flags::OPENAI_STREAM_MAX_RETRIES;
use crate::openai_api_key::get_openai_api_key;
/// Value for the `OpenAI-Originator` header that is sent with requests to
@@ -26,7 +30,7 @@ const OPENAI_ORIGINATOR_HEADER: &str = "codex_cli_rs";
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum WireApi {
/// The experimental Responses API exposed by OpenAI at `/v1/responses`.
/// The experimental "Responses" API exposed by OpenAI at `/v1/responses`.
Responses,
/// Regular Chat Completions compatible with `/v1/chat/completions`.
@@ -64,6 +68,21 @@ pub struct ModelProviderInfo {
/// value should be used. If the environment variable is not set, or the
/// value is empty, the header will not be included in the request.
pub env_http_headers: Option<HashMap<String, String>>,
/// Maximum number of times to retry a failed HTTP request to this provider.
/// When `None`, falls back to the global default from `OPENAI_REQUEST_MAX_RETRIES` (currently 4).
#[serde(default)]
pub openai_request_max_retries: Option<u64>,
/// Number of times to retry reconnecting a dropped streaming response before failing.
/// When `None`, falls back to `OPENAI_STREAM_MAX_RETRIES` (currently 10).
#[serde(default)]
pub openai_stream_max_retries: Option<u64>,
/// Idle timeout (in milliseconds) to wait for activity on a streaming response before treating
/// the connection as lost. When `None`, falls back to `OPENAI_STREAM_IDLE_TIMEOUT_MS` (currently 5m).
#[serde(default)]
pub openai_stream_idle_timeout_ms: Option<u64>,
}
impl ModelProviderInfo {
@@ -161,6 +180,25 @@ impl ModelProviderInfo {
None => Ok(None),
}
}
/// Effective maximum number of request retries for this provider.
pub fn request_max_retries(&self) -> u64 {
self.openai_request_max_retries
.unwrap_or(*OPENAI_REQUEST_MAX_RETRIES)
}
/// Effective maximum number of stream reconnection attempts for this provider.
pub fn stream_max_retries(&self) -> u64 {
self.openai_stream_max_retries
.unwrap_or(*OPENAI_STREAM_MAX_RETRIES)
}
/// Effective idle timeout for streaming responses.
pub fn stream_idle_timeout(&self) -> Duration {
self.openai_stream_idle_timeout_ms
.map(Duration::from_millis)
.unwrap_or(*OPENAI_STREAM_IDLE_TIMEOUT_MS)
}
}
/// Built-in default provider list.
@@ -205,6 +243,10 @@ pub fn built_in_model_providers() -> HashMap<String, ModelProviderInfo> {
.into_iter()
.collect(),
),
// Use global defaults for retry/timeout unless overridden in config.toml.
openai_request_max_retries: None,
openai_stream_max_retries: None,
openai_stream_idle_timeout_ms: None,
},
),
]
@@ -234,6 +276,9 @@ base_url = "http://localhost:11434/v1"
query_params: None,
http_headers: None,
env_http_headers: None,
openai_request_max_retries: None,
openai_stream_max_retries: None,
openai_stream_idle_timeout_ms: None,
};
let provider: ModelProviderInfo = toml::from_str(azure_provider_toml).unwrap();
@@ -259,6 +304,9 @@ query_params = { api-version = "2025-04-01-preview" }
}),
http_headers: None,
env_http_headers: None,
openai_request_max_retries: None,
openai_stream_max_retries: None,
openai_stream_idle_timeout_ms: None,
};
let provider: ModelProviderInfo = toml::from_str(azure_provider_toml).unwrap();
@@ -287,6 +335,9 @@ env_http_headers = { "X-Example-Env-Header" = "EXAMPLE_ENV_VAR" }
env_http_headers: Some(maplit::hashmap! {
"X-Example-Env-Header".to_string() => "EXAMPLE_ENV_VAR".to_string(),
}),
openai_request_max_retries: None,
openai_stream_max_retries: None,
openai_stream_idle_timeout_ms: None,
};
let provider: ModelProviderInfo = toml::from_str(azure_provider_toml).unwrap();

View File

@@ -46,9 +46,8 @@ async fn spawn_codex() -> Result<Codex, CodexErr> {
);
let codex_home = TempDir::new().unwrap();
let mut config = load_default_config_for_test(&codex_home);
config.openai_request_max_retries = 2;
config.openai_stream_max_retries = 2;
let config = load_default_config_for_test(&codex_home);
// network retry/timeout tuning moved into ModelProviderInfo; using defaults
let (agent, _init_id) = Codex::spawn(config, std::sync::Arc::new(Notify::new())).await?;
Ok(agent)
@@ -67,7 +66,7 @@ async fn live_streaming_and_prev_id_reset() {
let codex = spawn_codex().await.unwrap();
// ---------- Task 1 ----------
// ---------- Task 1 ----------
codex
.submit(Op::UserInput {
items: vec![InputItem::Text {
@@ -101,7 +100,7 @@ async fn live_streaming_and_prev_id_reset() {
"Agent did not stream any AgentMessage before TaskComplete"
);
// ---------- Task 2 (same session) ----------
// ---------- Task 2 (same session) ----------
codex
.submit(Op::UserInput {
items: vec![InputItem::Text {

View File

@@ -102,13 +102,15 @@ async fn keeps_previous_response_id_between_tasks() {
query_params: None,
http_headers: None,
env_http_headers: None,
// disable retries so we don't get duplicate calls in this test
openai_request_max_retries: Some(0),
openai_stream_max_retries: Some(0),
openai_stream_idle_timeout_ms: None,
};
// Init session
let codex_home = TempDir::new().unwrap();
let mut config = load_default_config_for_test(&codex_home);
config.openai_request_max_retries = 0;
config.openai_stream_max_retries = 0;
config.model_provider = model_provider;
let ctrl_c = std::sync::Arc::new(tokio::sync::Notify::new());
let (codex, _init_id) = Codex::spawn(config, ctrl_c.clone()).await.unwrap();

View File

@@ -87,14 +87,15 @@ async fn retries_on_early_close() {
query_params: None,
http_headers: None,
env_http_headers: None,
// exercise retry path: first attempt yields incomplete stream, so allow 1 retry
openai_request_max_retries: Some(0),
openai_stream_max_retries: Some(1),
openai_stream_idle_timeout_ms: Some(2000),
};
let ctrl_c = std::sync::Arc::new(tokio::sync::Notify::new());
let codex_home = TempDir::new().unwrap();
let mut config = load_default_config_for_test(&codex_home);
config.openai_request_max_retries = 0;
config.openai_stream_max_retries = 1;
config.openai_stream_idle_timeout_ms = Duration::from_millis(2000);
config.model_provider = model_provider;
let (codex, _init_id) = Codex::spawn(config, ctrl_c).await.unwrap();