This commit is contained in:
Ahmed Ibrahim
2025-07-17 16:32:21 -07:00
parent 666a546adc
commit 9b3e1a8b56
10 changed files with 112 additions and 67 deletions

View File

@@ -92,6 +92,32 @@ http_headers = { "X-Example-Header" = "example-value" }
env_http_headers = { "X-Example-Features": "EXAMPLE_FEATURES" }
```
### Per-provider network tuning
The following optional settings control retry behaviour and streaming idle timeouts **per model provider**. They must be specified inside the corresponding `[model_providers.<id>]` block in `config.toml`. (Older releases accepted toplevel keys; those are now ignored.)
Example:
```toml
[model_providers.openai]
name = "OpenAI"
base_url = "https://api.openai.com/v1"
env_key = "OPENAI_API_KEY"
# network tuning overrides (all optional; falls back to builtin defaults)
request_max_retries = 4 # retry failed HTTP requests
openai_stream_max_retries = 10 # retry dropped SSE streams
openai_stream_idle_timeout_ms = 300000 # 5m idle timeout
```
#### request_max_retries
How many times Codex will retry a failed HTTP request to the model provider. Defaults to `4`.
#### openai_stream_max_retries
Number of times Codex will attempt to reconnect when a streaming response is interrupted. Defaults to `10`.
#### openai_stream_idle_timeout_ms
How long Codex will wait for activity on a streaming response before treating the connection as lost. Defaults to `300000` (5 minutes).
## model_provider
Identifies which provider to use from the `model_providers` map. Defaults to `"openai"`. You can override the `base_url` for the built-in `openai` provider via the `OPENAI_BASE_URL` environment variable.
@@ -466,33 +492,7 @@ This is analogous to `model_context_window`, but for the maximum number of outpu
Maximum number of bytes to read from an `AGENTS.md` file to include in the instructions sent with the first turn of a session. Defaults to 32 KiB.
## Per-provider network tuning
The following optional settings control retry behaviour and streaming idle timeouts **per model provider**. They must be specified inside the corresponding `[model_providers.<id>]` block in `config.toml`. (Older releases accepted toplevel keys; those are now ignored.)
Example:
```toml
[model_providers.openai]
name = "OpenAI"
base_url = "https://api.openai.com/v1"
env_key = "OPENAI_API_KEY"
# network tuning overrides (all optional; falls back to builtin defaults)
openai_request_max_retries = 4 # retry failed HTTP requests
openai_stream_max_retries = 10 # retry dropped SSE streams
openai_stream_idle_timeout_ms = 300000 # 5m idle timeout
```
### openai_request_max_retries
How many times Codex will retry a failed HTTP request to the model provider. Defaults to `4`.
### openai_stream_max_retries
Number of times Codex will attempt to reconnect when a streaming response is interrupted. Defaults to `10`.
### openai_stream_idle_timeout_ms
How long Codex will wait for activity on a streaming response before treating the connection as lost. Defaults to `300000` (5 minutes).
### tui
## tui
Options that are specific to the TUI.

View File

@@ -19,7 +19,6 @@ use crate::ModelProviderInfo;
use crate::client_common::Prompt;
use crate::client_common::ResponseEvent;
use crate::client_common::ResponseStream;
use crate::config::Config;
use crate::error::CodexErr;
use crate::error::Result;
use crate::models::ContentItem;
@@ -33,7 +32,6 @@ pub(crate) async fn stream_chat_completions(
model: &str,
client: &reqwest::Client,
provider: &ModelProviderInfo,
_config: &Config,
) -> Result<ResponseStream> {
// Build messages array
let mut messages = Vec::<serde_json::Value>::new();

View File

@@ -29,7 +29,6 @@ use crate::config_types::ReasoningSummary as ReasoningSummaryConfig;
use crate::error::CodexErr;
use crate::error::Result;
use crate::flags::CODEX_RS_SSE_FIXTURE;
use crate::flags::OPENAI_STREAM_IDLE_TIMEOUT_MS;
use crate::model_provider_info::ModelProviderInfo;
use crate::model_provider_info::WireApi;
use crate::models::ResponseItem;
@@ -63,10 +62,6 @@ impl ModelClient {
}
}
pub fn config(&self) -> &Config {
&self.config
}
/// Dispatches to either the Responses or Chat implementation depending on
/// the provider config. Public callers always invoke `stream()` the
/// specialised helpers are private to avoid accidental misuse.
@@ -80,7 +75,6 @@ impl ModelClient {
&self.config.model,
&self.client,
&self.provider,
&self.config,
)
.await?;
@@ -113,7 +107,7 @@ impl ModelClient {
if let Some(path) = &*CODEX_RS_SSE_FIXTURE {
// short circuit for tests
warn!(path, "Streaming from fixture");
return stream_from_fixture(path).await;
return stream_from_fixture(path, self.provider.clone()).await;
}
let full_instructions = prompt.get_full_instructions(&self.config.model);
@@ -207,6 +201,9 @@ impl ModelClient {
}
}
}
pub fn get_provider(&self) -> ModelProviderInfo {
self.provider.clone()
}
}
#[derive(Debug, Deserialize, Serialize)]
@@ -401,7 +398,10 @@ async fn process_sse<S>(
}
/// used in tests to stream from a text SSE file
async fn stream_from_fixture(path: impl AsRef<Path>) -> Result<ResponseStream> {
async fn stream_from_fixture(
path: impl AsRef<Path>,
provider: ModelProviderInfo,
) -> Result<ResponseStream> {
let (tx_event, rx_event) = mpsc::channel::<Result<ResponseEvent>>(1600);
let f = std::fs::File::open(path.as_ref())?;
let lines = std::io::BufReader::new(f).lines();
@@ -418,7 +418,7 @@ async fn stream_from_fixture(path: impl AsRef<Path>) -> Result<ResponseStream> {
tokio::spawn(process_sse(
stream,
tx_event,
*OPENAI_STREAM_IDLE_TIMEOUT_MS,
provider.stream_idle_timeout(),
));
Ok(ResponseStream { rx_event })
}
@@ -439,7 +439,10 @@ mod tests {
/// Runs the SSE parser on pre-chunked byte slices and returns every event
/// (including any final `Err` from a stream-closure check).
async fn collect_events(chunks: &[&[u8]]) -> Vec<Result<ResponseEvent>> {
async fn collect_events(
chunks: &[&[u8]],
provider: ModelProviderInfo,
) -> Vec<Result<ResponseEvent>> {
let mut builder = IoBuilder::new();
for chunk in chunks {
builder.read(chunk);
@@ -448,7 +451,7 @@ mod tests {
let reader = builder.build();
let stream = ReaderStream::new(reader).map_err(CodexErr::Io);
let (tx, mut rx) = mpsc::channel::<Result<ResponseEvent>>(16);
tokio::spawn(process_sse(stream, tx, *OPENAI_STREAM_IDLE_TIMEOUT_MS));
tokio::spawn(process_sse(stream, tx, provider.stream_idle_timeout()));
let mut events = Vec::new();
while let Some(ev) = rx.recv().await {
@@ -459,7 +462,10 @@ mod tests {
/// Builds an in-memory SSE stream from JSON fixtures and returns only the
/// successfully parsed events (panics on internal channel errors).
async fn run_sse(events: Vec<serde_json::Value>) -> Vec<ResponseEvent> {
async fn run_sse(
events: Vec<serde_json::Value>,
provider: ModelProviderInfo,
) -> Vec<ResponseEvent> {
let mut body = String::new();
for e in events {
let kind = e
@@ -475,7 +481,7 @@ mod tests {
let (tx, mut rx) = mpsc::channel::<Result<ResponseEvent>>(8);
let stream = ReaderStream::new(std::io::Cursor::new(body)).map_err(CodexErr::Io);
tokio::spawn(process_sse(stream, tx, *OPENAI_STREAM_IDLE_TIMEOUT_MS));
tokio::spawn(process_sse(stream, tx, provider.stream_idle_timeout()));
let mut out = Vec::new();
while let Some(ev) = rx.recv().await {
@@ -520,7 +526,25 @@ mod tests {
let sse2 = format!("event: response.output_item.done\ndata: {item2}\n\n");
let sse3 = format!("event: response.completed\ndata: {completed}\n\n");
let events = collect_events(&[sse1.as_bytes(), sse2.as_bytes(), sse3.as_bytes()]).await;
let provider = ModelProviderInfo {
name: "test".to_string(),
base_url: "https://test.com".to_string(),
env_key: Some("TEST_API_KEY".to_string()),
env_key_instructions: None,
wire_api: WireApi::Responses,
query_params: None,
http_headers: None,
env_http_headers: None,
request_max_retries: Some(0),
openai_stream_max_retries: Some(0),
openai_stream_idle_timeout_ms: Some(1000),
};
let events = collect_events(
&[sse1.as_bytes(), sse2.as_bytes(), sse3.as_bytes()],
provider,
)
.await;
assert_eq!(events.len(), 3);
@@ -561,8 +585,21 @@ mod tests {
.to_string();
let sse1 = format!("event: response.output_item.done\ndata: {item1}\n\n");
let provider = ModelProviderInfo {
name: "test".to_string(),
base_url: "https://test.com".to_string(),
env_key: Some("TEST_API_KEY".to_string()),
env_key_instructions: None,
wire_api: WireApi::Responses,
query_params: None,
http_headers: None,
env_http_headers: None,
request_max_retries: Some(0),
openai_stream_max_retries: Some(0),
openai_stream_idle_timeout_ms: Some(1000),
};
let events = collect_events(&[sse1.as_bytes()]).await;
let events = collect_events(&[sse1.as_bytes()], provider).await;
assert_eq!(events.len(), 2);
@@ -650,7 +687,21 @@ mod tests {
let mut evs = vec![case.event];
evs.push(completed.clone());
let out = run_sse(evs).await;
let provider = ModelProviderInfo {
name: "test".to_string(),
base_url: "https://test.com".to_string(),
env_key: Some("TEST_API_KEY".to_string()),
env_key_instructions: None,
wire_api: WireApi::Responses,
query_params: None,
http_headers: None,
env_http_headers: None,
request_max_retries: Some(0),
openai_stream_max_retries: Some(0),
openai_stream_idle_timeout_ms: Some(1000),
};
let out = run_sse(evs, provider).await;
assert_eq!(out.len(), case.expected_len, "case {}", case.name);
assert!(
(case.expect_first)(&out[0]),

View File

@@ -1026,7 +1026,7 @@ async fn run_turn(
Err(CodexErr::EnvVar(var)) => return Err(CodexErr::EnvVar(var)),
Err(e) => {
// Use the configured provider-specific stream retry budget.
let max_retries = sess.client.config().model_provider.stream_max_retries();
let max_retries = sess.client.get_provider().stream_max_retries();
if retries < max_retries {
retries += 1;
let delay = backoff(retries);

View File

@@ -682,6 +682,9 @@ name = "OpenAI using Chat Completions"
base_url = "https://api.openai.com/v1"
env_key = "OPENAI_API_KEY"
wire_api = "chat"
request_max_retries = 4 # retry failed HTTP requests
openai_stream_max_retries = 10 # retry dropped SSE streams
openai_stream_idle_timeout_ms = 300000 # 5m idle timeout
[profiles.o3]
model = "o3"
@@ -722,7 +725,7 @@ disable_response_storage = true
query_params: None,
http_headers: None,
env_http_headers: None,
openai_request_max_retries: None,
request_max_retries: None,
openai_stream_max_retries: None,
openai_stream_idle_timeout_ms: None,
};

View File

@@ -11,7 +11,7 @@ env_flags! {
pub OPENAI_TIMEOUT_MS: Duration = Duration::from_millis(300_000), |value| {
value.parse().map(Duration::from_millis)
};
pub OPENAI_REQUEST_MAX_RETRIES: u64 = 4;
pub REQUEST_MAX_RETRIES: u64 = 4;
pub OPENAI_STREAM_MAX_RETRIES: u64 = 10;
// We generally don't want to disconnect; this updates the timeout to be five minutes

View File

@@ -12,9 +12,9 @@ use std::env::VarError;
use std::time::Duration;
use crate::error::EnvVarError;
use crate::flags::OPENAI_REQUEST_MAX_RETRIES;
use crate::flags::OPENAI_STREAM_IDLE_TIMEOUT_MS;
use crate::flags::OPENAI_STREAM_MAX_RETRIES;
use crate::flags::REQUEST_MAX_RETRIES;
use crate::openai_api_key::get_openai_api_key;
/// Value for the `OpenAI-Originator` header that is sent with requests to
@@ -70,18 +70,13 @@ pub struct ModelProviderInfo {
pub env_http_headers: Option<HashMap<String, String>>,
/// Maximum number of times to retry a failed HTTP request to this provider.
/// When `None`, falls back to the global default from `OPENAI_REQUEST_MAX_RETRIES` (currently 4).
#[serde(default)]
pub openai_request_max_retries: Option<u64>,
pub request_max_retries: Option<u64>,
/// Number of times to retry reconnecting a dropped streaming response before failing.
/// When `None`, falls back to `OPENAI_STREAM_MAX_RETRIES` (currently 10).
#[serde(default)]
pub openai_stream_max_retries: Option<u64>,
/// Idle timeout (in milliseconds) to wait for activity on a streaming response before treating
/// the connection as lost. When `None`, falls back to `OPENAI_STREAM_IDLE_TIMEOUT_MS` (currently 5m).
#[serde(default)]
/// the connection as lost.
pub openai_stream_idle_timeout_ms: Option<u64>,
}
@@ -183,8 +178,7 @@ impl ModelProviderInfo {
/// Effective maximum number of request retries for this provider.
pub fn request_max_retries(&self) -> u64 {
self.openai_request_max_retries
.unwrap_or(*OPENAI_REQUEST_MAX_RETRIES)
self.request_max_retries.unwrap_or(*REQUEST_MAX_RETRIES)
}
/// Effective maximum number of stream reconnection attempts for this provider.
@@ -244,7 +238,7 @@ pub fn built_in_model_providers() -> HashMap<String, ModelProviderInfo> {
.collect(),
),
// Use global defaults for retry/timeout unless overridden in config.toml.
openai_request_max_retries: None,
request_max_retries: None,
openai_stream_max_retries: None,
openai_stream_idle_timeout_ms: None,
},
@@ -276,7 +270,7 @@ base_url = "http://localhost:11434/v1"
query_params: None,
http_headers: None,
env_http_headers: None,
openai_request_max_retries: None,
request_max_retries: None,
openai_stream_max_retries: None,
openai_stream_idle_timeout_ms: None,
};
@@ -304,7 +298,7 @@ query_params = { api-version = "2025-04-01-preview" }
}),
http_headers: None,
env_http_headers: None,
openai_request_max_retries: None,
request_max_retries: None,
openai_stream_max_retries: None,
openai_stream_idle_timeout_ms: None,
};
@@ -335,7 +329,7 @@ env_http_headers = { "X-Example-Env-Header" = "EXAMPLE_ENV_VAR" }
env_http_headers: Some(maplit::hashmap! {
"X-Example-Env-Header".to_string() => "EXAMPLE_ENV_VAR".to_string(),
}),
openai_request_max_retries: None,
request_max_retries: None,
openai_stream_max_retries: None,
openai_stream_idle_timeout_ms: None,
};

View File

@@ -47,7 +47,6 @@ async fn spawn_codex() -> Result<Codex, CodexErr> {
let codex_home = TempDir::new().unwrap();
let config = load_default_config_for_test(&codex_home);
// network retry/timeout tuning moved into ModelProviderInfo; using defaults
let (agent, _init_id) = Codex::spawn(config, std::sync::Arc::new(Notify::new())).await?;
Ok(agent)

View File

@@ -88,7 +88,7 @@ async fn keeps_previous_response_id_between_tasks() {
.mount(&server)
.await;
// Configure retry behaviour explicitly to avoid mutating process-wide
// Configure retry behavior explicitly to avoid mutating process-wide
// environment variables.
let model_provider = ModelProviderInfo {
name: "openai".into(),
@@ -103,7 +103,7 @@ async fn keeps_previous_response_id_between_tasks() {
http_headers: None,
env_http_headers: None,
// disable retries so we don't get duplicate calls in this test
openai_request_max_retries: Some(0),
request_max_retries: Some(0),
openai_stream_max_retries: Some(0),
openai_stream_idle_timeout_ms: None,
};

View File

@@ -72,7 +72,7 @@ async fn retries_on_early_close() {
.mount(&server)
.await;
// Configure retry behaviour explicitly to avoid mutating process-wide
// Configure retry behavior explicitly to avoid mutating process-wide
// environment variables.
let model_provider = ModelProviderInfo {
@@ -88,7 +88,7 @@ async fn retries_on_early_close() {
http_headers: None,
env_http_headers: None,
// exercise retry path: first attempt yields incomplete stream, so allow 1 retry
openai_request_max_retries: Some(0),
request_max_retries: Some(0),
openai_stream_max_retries: Some(1),
openai_stream_idle_timeout_ms: Some(2000),
};