PR #1601: Refactor env settings into config

URL: https://github.com/openai/codex/pull/1601
Author: aibrahim-oai
Created: 2025-07-17 17:40:30 UTC
Updated: 2025-07-18 19:12:49 UTC
Changes: +228/-94, Files changed: 12, Commits: 32

Description

Summary

add OpenAI retry and timeout fields to Config
inject these settings in tests instead of mutating env vars
plumb Config values through client and chat completions logic
document new configuration options

Testing

cargo test -p codex-core --no-run

https://chatgpt.com/codex/tasks/task_i_68792c5b04cc832195c03050c8b6ea94

Full Diff

diff --git a/codex-rs/config.md b/codex-rs/config.md
index 438b7e767d..3d38ded1a5 100644
--- a/codex-rs/config.md
+++ b/codex-rs/config.md
@@ -92,6 +92,32 @@ http_headers = { "X-Example-Header" = "example-value" }
 env_http_headers = { "X-Example-Features": "EXAMPLE_FEATURES" }

+### Per-provider network tuning + +The following optional settings control retry behaviour and streaming idle timeouts per model provider. They must be specified inside the corresponding [model_providers.<id>] block in config.toml. (Older releases accepted top‑level keys; those are now ignored.) + +Example: + +toml +[model_providers.openai] +name = "OpenAI" +base_url = "https://api.openai.com/v1" +env_key = "OPENAI_API_KEY" +# network tuning overrides (all optional; falls back to built‑in defaults) +request_max_retries = 4 # retry failed HTTP requests +stream_max_retries = 10 # retry dropped SSE streams +stream_idle_timeout_ms = 300000 # 5m idle timeout + + +#### request_max_retries +How many times Codex will retry a failed HTTP request to the model provider. Defaults to 4. + +#### stream_max_retries +Number of times Codex will attempt to reconnect when a streaming response is interrupted. Defaults to 10. + +#### stream_idle_timeout_ms +How long Codex will wait for activity on a streaming response before treating the connection as lost. Defaults to 300_000 (5 minutes). +

model_provider

Identifies which provider to use from the model_providers map. Defaults to "openai". You can override the base_url for the built-in openai provider via the OPENAI_BASE_URL environment variable. @@ -444,7 +470,7 @@ Currently, "vscode" is the default, though Codex does not verify VS Code is in

hide_agent_reasoning

-Codex intermittently emits "reasoning" events that show the model’s internal "thinking" before it produces a final answer. Some users may find these events distracting, especially in CI logs or minimal terminal output. +Codex intermittently emits "reasoning" events that show the model's internal "thinking" before it produces a final answer. Some users may find these events distracting, especially in CI logs or minimal terminal output.

Setting hide_agent_reasoning to true suppresses these events in both the TUI as well as the headless exec sub-command:

diff --git a/codex-rs/core/src/chat_completions.rs b/codex-rs/core/src/chat_completions.rs index ad7b55952a..35045c8e1b 100644 --- a/codex-rs/core/src/chat_completions.rs +++ b/codex-rs/core/src/chat_completions.rs @@ -21,8 +21,6 @@ use crate::client_common::ResponseEvent; use crate::client_common::ResponseStream; use crate::error::CodexErr; use crate::error::Result; -use crate::flags::OPENAI_REQUEST_MAX_RETRIES; -use crate::flags::OPENAI_STREAM_IDLE_TIMEOUT_MS; use crate::models::ContentItem; use crate::models::ResponseItem; use crate::openai_tools::create_tools_json_for_chat_completions_api; @@ -121,6 +119,7 @@ pub(crate) async fn stream_chat_completions( );

 let mut attempt = 0;

let max_retries = provider.request_max_retries(); loop { attempt += 1;

@@ -136,7 +135,11 @@ pub(crate) async fn stream_chat_completions( Ok(resp) if resp.status().is_success() => { let (tx_event, rx_event) = mpsc::channel::<Result>(1600); let stream = resp.bytes_stream().map_err(CodexErr::Reqwest);

           tokio::spawn(process_chat_sse(stream, tx_event));

           tokio::spawn(process_chat_sse(

```
               stream,
```
```
               tx_event,
```

               provider.stream_idle_timeout(),

           ));
           return Ok(ResponseStream { rx_event });
       }
       Ok(res) => {

@@ -146,7 +149,7 @@ pub(crate) async fn stream_chat_completions( return Err(CodexErr::UnexpectedStatus(status, body)); }

           if attempt > *OPENAI_REQUEST_MAX_RETRIES {

           if attempt > max_retries {
               return Err(CodexErr::RetryLimit(status));
           }

@@ -162,7 +165,7 @@ pub(crate) async fn stream_chat_completions( tokio::time::sleep(delay).await; } Err(e) => {

           if attempt > *OPENAI_REQUEST_MAX_RETRIES {

           if attempt > max_retries {
               return Err(e.into());
           }
           let delay = backoff(attempt);

@@ -175,14 +178,15 @@ pub(crate) async fn stream_chat_completions( /// Lightweight SSE processor for the Chat Completions streaming format. The /// output is mapped onto Codex's internal [ResponseEvent] so that the rest /// of the pipeline can stay agnostic of the underlying wire format. -async fn process_chat_sse~~(stream: S, tx_event: mpsc::Sender<Result>) -where +async fn process_chat_sse(~~

stream: S,

tx_event: mpsc::Sender<Result>,

idle_timeout: Duration, +) where S: Stream<Item = Result> + Unpin, { let mut stream = stream.eventsource();

let idle_timeout = *OPENAI_STREAM_IDLE_TIMEOUT_MS;

// State to accumulate a function call across streaming chunks. // OpenAI may split the arguments string over multiple delta events // until the chunk whose finish_reason is tool_calls is emitted. We diff --git a/codex-rs/core/src/client.rs b/codex-rs/core/src/client.rs index ae7904b8ff..62fcabe05b 100644 --- a/codex-rs/core/src/client.rs +++ b/codex-rs/core/src/client.rs @@ -30,8 +30,6 @@ use crate::config_types::ReasoningSummary as ReasoningSummaryConfig; use crate::error::CodexErr; use crate::error::Result; use crate::flags::CODEX_RS_SSE_FIXTURE; -use crate::flags::OPENAI_REQUEST_MAX_RETRIES; -use crate::flags::OPENAI_STREAM_IDLE_TIMEOUT_MS; use crate::model_provider_info::ModelProviderInfo; use crate::model_provider_info::WireApi; use crate::models::ResponseItem; @@ -113,7 +111,7 @@ impl ModelClient { if let Some(path) = &*CODEX_RS_SSE_FIXTURE { // short circuit for tests warn!(path, "Streaming from fixture");

return stream_from_fixture(path).await;

return stream_from_fixture(path, self.provider.clone()).await; } let full_instructions = prompt.get_full_instructions(&self.config.model);

@@ -140,6 +138,7 @@ impl ModelClient { );

let mut attempt = 0;

let max_retries = self.provider.request_max_retries(); loop { attempt += 1;

@@ -158,7 +157,11 @@ impl ModelClient {

// spawn task to process SSE let stream = resp.bytes_stream().map_err(CodexErr::Reqwest);

tokio::spawn(process_sse(stream, tx_event));

tokio::spawn(process_sse(

stream,

tx_event,

self.provider.stream_idle_timeout(),

)); return Ok(ResponseStream { rx_event }); }

@@ -177,7 +180,7 @@ impl ModelClient { return Err(CodexErr::UnexpectedStatus(status, body)); }

if attempt > *OPENAI_REQUEST_MAX_RETRIES {

if attempt > max_retries { return Err(CodexErr::RetryLimit(status)); }

@@ -194,7 +197,7 @@ impl ModelClient { tokio::time::sleep(delay).await; } Err(e) => {

if attempt > *OPENAI_REQUEST_MAX_RETRIES {

if attempt > max_retries { return Err(e.into()); } let delay = backoff(attempt);

@@ -203,6 +206,10 @@ impl ModelClient { } } } +

pub fn get_provider(&self) -> ModelProviderInfo {

self.provider.clone()

} }

#[derive(Debug, Deserialize, Serialize)] @@ -254,14 +261,16 @@ struct ResponseCompletedOutputTokensDetails { reasoning_tokens: u64, }

-async fn process_sse~~(stream: S, tx_event: mpsc::Sender<Result>) -where +async fn process_sse(~~

stream: S,

tx_event: mpsc::Sender<Result>,

idle_timeout: Duration, +) where S: Stream<Item = Result> + Unpin, { let mut stream = stream.eventsource();

// If the stream stays completely silent for an extended period treat it as disconnected.

let idle_timeout = *OPENAI_STREAM_IDLE_TIMEOUT_MS; // The response id returned from the "complete" message. let mut response_completed: Option = None;

@@ -322,7 +331,7 @@ where // duplicated output array embedded in the response.completed // payload. That produced two concrete issues: // 1. No real‑time streaming – the user only saw output after the

// entire turn had finished, which broke the “typing” UX and

// entire turn had finished, which broke the "typing" UX and // made long‑running turns look stalled. // 2. Duplicate `function_call_output` items – both the // individual *and* the completed array were forwarded, which

@@ -395,7 +404,10 @@ where }

/// used in tests to stream from a text SSE file -async fn stream_from_fixture(path: impl AsRef) -> Result { +async fn stream_from_fixture(

path: impl AsRef,

provider: ModelProviderInfo, +) -> Result { let (tx_event, rx_event) = mpsc::channel::<Result>(1600); let f = std::fs::File::open(path.as_ref())?; let lines = std::io::BufReader::new(f).lines(); @@ -409,7 +421,11 @@ async fn stream_from_fixture(path: impl AsRef) -> Result {

let rdr = std::io::Cursor::new(content); let stream = ReaderStream::new(rdr).map_err(CodexErr::Io);

tokio::spawn(process_sse(stream, tx_event));

tokio::spawn(process_sse(

stream,

tx_event,

provider.stream_idle_timeout(),

)); Ok(ResponseStream { rx_event }) }

@@ -429,7 +445,10 @@ mod tests {

/// Runs the SSE parser on pre-chunked byte slices and returns every event /// (including any final `Err` from a stream-closure check).

async fn collect_events(chunks: &[&[u8]]) -> Vec<Result> {

async fn collect_events(

chunks: &[&[u8]],

provider: ModelProviderInfo,

) -> Vec<Result> { let mut builder = IoBuilder::new(); for chunk in chunks { builder.read(chunk); @@ -438,7 +457,7 @@ mod tests { let reader = builder.build(); let stream = ReaderStream::new(reader).map_err(CodexErr::Io); let (tx, mut rx) = mpsc::channel::<Result>(16);

tokio::spawn(process_sse(stream, tx));

tokio::spawn(process_sse(stream, tx, provider.stream_idle_timeout())); let mut events = Vec::new(); while let Some(ev) = rx.recv().await {

@@ -449,7 +468,10 @@ mod tests {

/// Builds an in-memory SSE stream from JSON fixtures and returns only the /// successfully parsed events (panics on internal channel errors).

async fn run_sse(events: Vec<serde_json::Value>) -> Vec {

async fn run_sse(

events: Vec<serde_json::Value>,

provider: ModelProviderInfo,

) -> Vec { let mut body = String::new(); for e in events { let kind = e @@ -465,7 +487,7 @@ mod tests {

let (tx, mut rx) = mpsc::channel::<Result<ResponseEvent>>(8); let stream = ReaderStream::new(std::io::Cursor::new(body)).map_err(CodexErr::Io);

tokio::spawn(process_sse(stream, tx));

tokio::spawn(process_sse(stream, tx, provider.stream_idle_timeout())); let mut out = Vec::new(); while let Some(ev) = rx.recv().await {

@@ -510,7 +532,25 @@ mod tests { let sse2 = format!("event: response.output_item.done\ndata: {item2}\n\n"); let sse3 = format!("event: response.completed\ndata: {completed}\n\n");

let events = collect_events(&[sse1.as_bytes(), sse2.as_bytes(), sse3.as_bytes()]).await;

let provider = ModelProviderInfo {

name: "test".to_string(),

base_url: "https://test.com".to_string(),

env_key: Some("TEST_API_KEY".to_string()),

env_key_instructions: None,

wire_api: WireApi::Responses,

query_params: None,

http_headers: None,

env_http_headers: None,

request_max_retries: Some(0),

stream_max_retries: Some(0),

stream_idle_timeout_ms: Some(1000),

};

let events = collect_events(

&[sse1.as_bytes(), sse2.as_bytes(), sse3.as_bytes()],

provider,

)

.await; assert_eq!(events.len(), 3);

@@ -551,8 +591,21 @@ mod tests { .to_string();

let sse1 = format!("event: response.output_item.done\ndata: {item1}\n\n");

let provider = ModelProviderInfo {

name: "test".to_string(),

base_url: "https://test.com".to_string(),

env_key: Some("TEST_API_KEY".to_string()),

env_key_instructions: None,

wire_api: WireApi::Responses,

query_params: None,

http_headers: None,

env_http_headers: None,

request_max_retries: Some(0),

stream_max_retries: Some(0),

stream_idle_timeout_ms: Some(1000),

};

let events = collect_events(&[sse1.as_bytes()]).await;

let events = collect_events(&[sse1.as_bytes()], provider).await; assert_eq!(events.len(), 2);

@@ -640,7 +693,21 @@ mod tests { let mut evs = vec![case.event]; evs.push(completed.clone());

let out = run_sse(evs).await;

let provider = ModelProviderInfo {

name: "test".to_string(),

base_url: "https://test.com".to_string(),

env_key: Some("TEST_API_KEY".to_string()),

env_key_instructions: None,

wire_api: WireApi::Responses,

query_params: None,

http_headers: None,

env_http_headers: None,

request_max_retries: Some(0),

stream_max_retries: Some(0),

stream_idle_timeout_ms: Some(1000),

};

let out = run_sse(evs, provider).await; assert_eq!(out.len(), case.expected_len, "case {}", case.name); assert!( (case.expect_first)(&out[0]),

diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs index 246198c006..df1ffad50d 100644 --- a/codex-rs/core/src/codex.rs +++ b/codex-rs/core/src/codex.rs @@ -49,7 +49,6 @@ use crate::exec::ExecToolCallOutput; use crate::exec::SandboxType; use crate::exec::process_exec_tool_call; use crate::exec_env::create_env; -use crate::flags::OPENAI_STREAM_MAX_RETRIES; use crate::mcp_connection_manager::McpConnectionManager; use crate::mcp_tool_call::handle_mcp_tool_call; use crate::models::ContentItem; @@ -1027,12 +1026,13 @@ async fn run_turn( Err(CodexErr::Interrupted) => return Err(CodexErr::Interrupted), Err(CodexErr::EnvVar(var)) => return Err(CodexErr::EnvVar(var)), Err(e) => {

if retries < *OPENAI_STREAM_MAX_RETRIES {

// Use the configured provider-specific stream retry budget.

let max_retries = sess.client.get_provider().stream_max_retries();

if retries < max_retries { retries += 1; let delay = backoff(retries); warn!(

"stream disconnected - retrying turn ({retries}/{} in {delay:?})...",

*OPENAI_STREAM_MAX_RETRIES

"stream disconnected - retrying turn ({retries}/{max_retries} in {delay:?})...", ); // Surface retry information to any UI/front‑end so the

@@ -1041,8 +1041,7 @@ async fn run_turn( sess.notify_background_event( &sub_id, format!(

"stream error: {e}; retrying {retries}/{} in {:?}…",

*OPENAI_STREAM_MAX_RETRIES, delay

"stream error: {e}; retrying {retries}/{max_retries} in {delay:?}…" ), ) .await;

@@ -1124,7 +1123,28 @@ async fn try_run_turn( let mut stream = sess.client.clone().stream(&prompt).await?;

let mut output = Vec::new();

while let Some(Ok(event)) = stream.next().await {

loop {

// Poll the next item from the model stream. We must inspect *both* Ok and Err

// cases so that transient stream failures (e.g., dropped SSE connection before

// `response.completed`) bubble up and trigger the caller's retry logic.

let event = stream.next().await;

let Some(event) = event else {

// Channel closed without yielding a final Completed event or explicit error.

// Treat as a disconnected stream so the caller can retry.

return Err(CodexErr::Stream(

"stream closed before response.completed".into(),

));

};

let event = match event {

Ok(ev) => ev,

Err(e) => {

// Propagate the underlying stream error to the caller (run_turn), which

// will apply the configured `stream_max_retries` policy.

return Err(e);

}

};

match event { ResponseEvent::Created => { let mut state = sess.state.lock().unwrap();

@@ -1165,7 +1185,7 @@ async fn try_run_turn(

let mut state = sess.state.lock().unwrap(); state.previous_response_id = Some(response_id);

break;

return Ok(output); } ResponseEvent::OutputTextDelta(delta) => { let event = Event {

@@ -1183,7 +1203,6 @@ async fn try_run_turn( } } }

Ok(output) }

async fn handle_response_item( diff --git a/codex-rs/core/src/config.rs b/codex-rs/core/src/config.rs index d67e692fc8..d5b2845398 100644 --- a/codex-rs/core/src/config.rs +++ b/codex-rs/core/src/config.rs @@ -682,6 +682,9 @@ name = "OpenAI using Chat Completions" base_url = "https://api.openai.com/v1" env_key = "OPENAI_API_KEY" wire_api = "chat" +request_max_retries = 4 # retry failed HTTP requests +stream_max_retries = 10 # retry dropped SSE streams +stream_idle_timeout_ms = 300000 # 5m idle timeout

[profiles.o3] model = "o3" @@ -722,6 +725,9 @@ disable_response_storage = true query_params: None, http_headers: None, env_http_headers: None,

request_max_retries: Some(4),

stream_max_retries: Some(10),

stream_idle_timeout_ms: Some(300_000), }; let model_provider_map = { let mut model_provider_map = built_in_model_providers();

diff --git a/codex-rs/core/src/flags.rs b/codex-rs/core/src/flags.rs index c21ef67026..c150405491 100644 --- a/codex-rs/core/src/flags.rs +++ b/codex-rs/core/src/flags.rs @@ -11,14 +11,6 @@ env_flags! { pub OPENAI_TIMEOUT_MS: Duration = Duration::from_millis(300_000), |value| { value.parse().map(Duration::from_millis) };

pub OPENAI_REQUEST_MAX_RETRIES: u64 = 4;

pub OPENAI_STREAM_MAX_RETRIES: u64 = 10;

// We generally don't want to disconnect; this updates the timeout to be five minutes

// which matches the upstream typescript codex impl.

pub OPENAI_STREAM_IDLE_TIMEOUT_MS: Duration = Duration::from_millis(300_000), |value| {

value.parse().map(Duration::from_millis)

};

/// Fixture path for offline tests (see client.rs). pub CODEX_RS_SSE_FIXTURE: Option<&str> = None; diff --git a/codex-rs/core/src/model_provider_info.rs b/codex-rs/core/src/model_provider_info.rs index b38c912d34..72ef58c60a 100644 --- a/codex-rs/core/src/model_provider_info.rs +++ b/codex-rs/core/src/model_provider_info.rs @@ -9,6 +9,7 @@ use serde::Deserialize; use serde::Serialize; use std::collections::HashMap; use std::env::VarError; +use std::time::Duration;

use crate::error::EnvVarError; use crate::openai_api_key::get_openai_api_key; @@ -16,6 +17,9 @@ use crate::openai_api_key::get_openai_api_key; /// Value for the OpenAI-Originator header that is sent with requests to /// OpenAI. const OPENAI_ORIGINATOR_HEADER: &str = "codex_cli_rs"; +const DEFAULT_STREAM_IDLE_TIMEOUT_MS: u64 = 300_000; +const DEFAULT_STREAM_MAX_RETRIES: u64 = 10; +const DEFAULT_REQUEST_MAX_RETRIES: u64 = 4;

/// Wire protocol that the provider speaks. Most third-party services only /// implement the classic OpenAI Chat Completions JSON schema, whereas OpenAI @@ -26,7 +30,7 @@ const OPENAI_ORIGINATOR_HEADER: &str = "codex_cli_rs"; #[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)] #[serde(rename_all = "lowercase")] pub enum WireApi {

/// The experimental “Responses” API exposed by OpenAI at /v1/responses.

/// The experimental "Responses" API exposed by OpenAI at /v1/responses. Responses,

/// Regular Chat Completions compatible with /v1/chat/completions. @@ -64,6 +68,16 @@ pub struct ModelProviderInfo { /// value should be used. If the environment variable is not set, or the /// value is empty, the header will not be included in the request. pub env_http_headers: Option<HashMap<String, String>>,

/// Maximum number of times to retry a failed HTTP request to this provider.

pub request_max_retries: Option,

/// Number of times to retry reconnecting a dropped streaming response before failing.

pub stream_max_retries: Option,

/// Idle timeout (in milliseconds) to wait for activity on a streaming response before treating

/// the connection as lost.

pub stream_idle_timeout_ms: Option, }

impl ModelProviderInfo { @@ -161,6 +175,25 @@ impl ModelProviderInfo { None => Ok(None), } } +

/// Effective maximum number of request retries for this provider.

pub fn request_max_retries(&self) -> u64 {

self.request_max_retries

.unwrap_or(DEFAULT_REQUEST_MAX_RETRIES)

}

/// Effective maximum number of stream reconnection attempts for this provider.

pub fn stream_max_retries(&self) -> u64 {

self.stream_max_retries

.unwrap_or(DEFAULT_STREAM_MAX_RETRIES)

}

/// Effective idle timeout for streaming responses.

pub fn stream_idle_timeout(&self) -> Duration {

self.stream_idle_timeout_ms

.map(Duration::from_millis)

.unwrap_or(Duration::from_millis(DEFAULT_STREAM_IDLE_TIMEOUT_MS))

} }

/// Built-in default provider list. @@ -205,6 +238,10 @@ pub fn built_in_model_providers() -> HashMap<String, ModelProviderInfo> { .into_iter() .collect(), ),

// Use global defaults for retry/timeout unless overridden in config.toml.

request_max_retries: None,

stream_max_retries: None,

stream_idle_timeout_ms: None, }, ),
] @@ -234,6 +271,9 @@ base_url = "http://localhost:11434/v1" query_params: None, http_headers: None, env_http_headers: None,

request_max_retries: None,

stream_max_retries: None,

stream_idle_timeout_ms: None, }; let provider: ModelProviderInfo = toml::from_str(azure_provider_toml).unwrap();

@@ -259,6 +299,9 @@ query_params = { api-version = "2025-04-01-preview" } }), http_headers: None, env_http_headers: None,

request_max_retries: None,

stream_max_retries: None,

stream_idle_timeout_ms: None, }; let provider: ModelProviderInfo = toml::from_str(azure_provider_toml).unwrap();

@@ -287,6 +330,9 @@ env_http_headers = { "X-Example-Env-Header" = "EXAMPLE_ENV_VAR" } env_http_headers: Some(maplit::hashmap! { "X-Example-Env-Header".to_string() => "EXAMPLE_ENV_VAR".to_string(), }),

request_max_retries: None,

stream_max_retries: None,

stream_idle_timeout_ms: None, }; let provider: ModelProviderInfo = toml::from_str(azure_provider_toml).unwrap();

diff --git a/codex-rs/core/tests/cli_stream.rs b/codex-rs/core/tests/cli_stream.rs index 3669b93f51..23ee0a3cbc 100644 --- a/codex-rs/core/tests/cli_stream.rs +++ b/codex-rs/core/tests/cli_stream.rs @@ -173,7 +173,7 @@ async fn integration_creates_and_checks_session_file() { // 5. Sessions are written asynchronously; wait briefly for the directory to appear. let sessions_dir = home.path().join("sessions"); let start = Instant::now();

while !sessions_dir.exists() && start.elapsed() < Duration::from_secs(2) {

while !sessions_dir.exists() && start.elapsed() < Duration::from_secs(3) { std::thread::sleep(Duration::from_millis(50)); }

diff --git a/codex-rs/core/tests/client.rs b/codex-rs/core/tests/client.rs index f4fb58f5a4..964710b83f 100644 --- a/codex-rs/core/tests/client.rs +++ b/codex-rs/core/tests/client.rs @@ -49,13 +49,6 @@ async fn includes_session_id_and_model_headers_in_request() { .mount(&server) .await;

// Environment

// Update environment – set_var is unsafe starting with the 2024

// edition so we group the calls into a single unsafe { … } block.

unsafe {

std::env::set_var("OPENAI_REQUEST_MAX_RETRIES", "0");

std::env::set_var("OPENAI_STREAM_MAX_RETRIES", "0");

} let model_provider = ModelProviderInfo { name: "openai".into(), base_url: format!("{}/v1", server.uri()), @@ -72,6 +65,9 @@ async fn includes_session_id_and_model_headers_in_request() { .collect(), ), env_http_headers: None,

request_max_retries: Some(0),

stream_max_retries: Some(0),

stream_idle_timeout_ms: None,

};

// Init session diff --git a/codex-rs/core/tests/live_agent.rs b/codex-rs/core/tests/live_agent.rs index c21f9d0032..26a5539dd7 100644 --- a/codex-rs/core/tests/live_agent.rs +++ b/codex-rs/core/tests/live_agent.rs @@ -45,22 +45,10 @@ async fn spawn_codex() -> Result<Codex, CodexErr> { "OPENAI_API_KEY must be set for live tests" );

// Environment tweaks to keep the tests snappy and inexpensive while still

// exercising retry/robustness logic.

//

// NOTE: Starting with the 2024 edition std::env::set_var is unsafe

// because changing the process environment races with any other threads

// that might be performing environment look-ups at the same time.

// Restrict the unsafety to this tiny block that happens at the very

// beginning of the test, before we spawn any background tasks that could

// observe the environment.

unsafe {

std::env::set_var("OPENAI_REQUEST_MAX_RETRIES", "2");

std::env::set_var("OPENAI_STREAM_MAX_RETRIES", "2");

}

let codex_home = TempDir::new().unwrap();

let config = load_default_config_for_test(&codex_home);

let mut config = load_default_config_for_test(&codex_home);

config.model_provider.request_max_retries = Some(2);

config.model_provider.stream_max_retries = Some(2); let (agent, _init_id) = Codex::spawn(config, std::sync::Arc::new(Notify::new())).await?;

Ok(agent) @@ -79,7 +67,7 @@ async fn live_streaming_and_prev_id_reset() {

let codex = spawn_codex().await.unwrap();

// ---------- Task 1 ----------

// ---------- Task 1 ---------- codex .submit(Op::UserInput { items: vec![InputItem::Text { @@ -113,7 +101,7 @@ async fn live_streaming_and_prev_id_reset() { "Agent did not stream any AgentMessage before TaskComplete" );

// ---------- Task 2 (same session) ----------

// ---------- Task 2 (same session) ---------- codex .submit(Op::UserInput { items: vec![InputItem::Text { diff --git a/codex-rs/core/tests/previous_response_id.rs b/codex-rs/core/tests/previous_response_id.rs index e64271a0ff..9630cc1028 100644 --- a/codex-rs/core/tests/previous_response_id.rs +++ b/codex-rs/core/tests/previous_response_id.rs @@ -88,13 +88,8 @@ async fn keeps_previous_response_id_between_tasks() { .mount(&server) .await;

// Environment

// Update environment – set_var is unsafe starting with the 2024

// edition so we group the calls into a single unsafe { … } block.

unsafe {

std::env::set_var("OPENAI_REQUEST_MAX_RETRIES", "0");

std::env::set_var("OPENAI_STREAM_MAX_RETRIES", "0");

}

// Configure retry behavior explicitly to avoid mutating process-wide

// environment variables. let model_provider = ModelProviderInfo { name: "openai".into(), base_url: format!("{}/v1", server.uri()), @@ -107,6 +102,10 @@ async fn keeps_previous_response_id_between_tasks() { query_params: None, http_headers: None, env_http_headers: None,

// disable retries so we don't get duplicate calls in this test

request_max_retries: Some(0),

stream_max_retries: Some(0),

stream_idle_timeout_ms: None,

};

// Init session diff --git a/codex-rs/core/tests/stream_no_completed.rs b/codex-rs/core/tests/stream_no_completed.rs index 8883eff373..f2de5de188 100644 --- a/codex-rs/core/tests/stream_no_completed.rs +++ b/codex-rs/core/tests/stream_no_completed.rs @@ -32,8 +32,6 @@ fn sse_completed(id: &str) -> String { }

#[tokio::test(flavor = "multi_thread", worker_threads = 2)] -// this test is flaky (has race conditions), so we ignore it for now -#[ignore] async fn retries_on_early_close() { #![allow(clippy::unwrap_used)]

@@ -72,19 +70,8 @@ async fn retries_on_early_close() { .mount(&server) .await;

// Environment

//

// As of Rust 2024 std::env::set_var has been made unsafe because

// mutating the process environment is inherently racy when other threads

// are running. We therefore have to wrap every call in an explicit

// unsafe block. These are limited to the test-setup section so the

// scope is very small and clearly delineated.

unsafe {

std::env::set_var("OPENAI_REQUEST_MAX_RETRIES", "0");

std::env::set_var("OPENAI_STREAM_MAX_RETRIES", "1");

std::env::set_var("OPENAI_STREAM_IDLE_TIMEOUT_MS", "2000");

}

// Configure retry behavior explicitly to avoid mutating process-wide

// environment variables.

let model_provider = ModelProviderInfo { name: "openai".into(), @@ -98,6 +85,10 @@ async fn retries_on_early_close() { query_params: None, http_headers: None, env_http_headers: None,

// exercise retry path: first attempt yields incomplete stream, so allow 1 retry

request_max_retries: Some(0),

stream_max_retries: Some(1),

stream_idle_timeout_ms: Some(2000),

};

let ctrl_c = std::sync::Arc::new(tokio::sync::Notify::new());

## Review Comments ### codex-rs/config.md - Created: 2025-07-17 21:01:49 UTC | Link: https://github.com/openai/codex/pull/1601#discussion_r2214299757 ```diff @@ -466,7 +466,33 @@ This is analogous to `model_context_window`, but for the maximum number of outpu Maximum number of bytes to read from an `AGENTS.md` file to include in the instructions sent with the first turn of a session. Defaults to 32 KiB. -## tui +## Per-provider network tuning + +The following optional settings control retry behaviour and streaming idle timeouts **per model provider**. They must be specified inside the corresponding `[model_providers.<id>]` block in `config.toml`. (Older releases accepted top‑level keys; those are now ignored.) + +Example: + +```toml +[model_providers.openai] +name = "OpenAI" +base_url = "https://api.openai.com/v1" +env_key = "OPENAI_API_KEY" +# network tuning overrides (all optional; falls back to built‑in defaults) +openai_request_max_retries = 4 # retry failed HTTP requests +openai_stream_max_retries = 10 # retry dropped SSE streams +openai_stream_idle_timeout_ms = 300000 # 5m idle timeout +``` + +### openai_request_max_retries +How many times Codex will retry a failed HTTP request to the model provider. Defaults to `4`. + +### openai_stream_max_retries +Number of times Codex will attempt to reconnect when a streaming response is interrupted. Defaults to `10`. + +### openai_stream_idle_timeout_ms +How long Codex will wait for activity on a streaming response before treating the connection as lost. Defaults to `300000` (5 minutes). + +### tui

Why did the header level change from ## to ###? I guess Codex did it?

Created: 2025-07-17 21:14:50 UTC | Link: https://github.com/openai/codex/pull/1601#discussion_r2214327613

@@ -466,7 +466,33 @@ This is analogous to `model_context_window`, but for the maximum number of outpu Maximum number of bytes to read from an `AGENTS.md` file to include in the instructions sent with the first turn of a session. Defaults to 32 KiB. -## tui +## Per-provider network tuning

I think this should be part of the existing model_providers documentation.

I'm also not sure whether these overrides will work for the built-in openai provider as things work today?

Created: 2025-07-18 00:36:03 UTC | Link: https://github.com/openai/codex/pull/1601#discussion_r2214583848

@@ -92,6 +92,32 @@ http_headers = { "X-Example-Header" = "example-value" } env_http_headers = { "X-Example-Features": "EXAMPLE_FEATURES" }
+### Per-provider network tuning + +The following optional settings control retry behaviour and streaming idle timeouts per model provider. They must be specified inside the corresponding [model_providers.<id>] block in config.toml. (Older releases accepted top‑level keys; those are now ignored.) + +Example: + +toml +[model_providers.openai] +name = "OpenAI" +base_url = "https://api.openai.com/v1" +env_key = "OPENAI_API_KEY" +# network tuning overrides (all optional; falls back to built‑in defaults) +request_max_retries = 4 # retry failed HTTP requests +openai_stream_max_retries = 10 # retry dropped SSE streams +openai_stream_idle_timeout_ms = 300000 # 5m idle timeout + + +#### request_max_retries +How many times Codex will retry a failed HTTP request to the model provider. Defaults to 4. + +#### openai_stream_max_retries +Number of times Codex will attempt to reconnect when a streaming response is interrupted. Defaults to 10. + +#### openai_stream_idle_timeout_ms +How long Codex will wait for activity on a streaming response before treating the connection as lost. Defaults to 300000 (5 minutes).

> Note, TOML supports underscores in int literals, making them easier to read for large values: > > ```suggestion > How long Codex will wait for activity on a streaming response before treating the connection as lost. Defaults to `300_000` (5 minutes). > ``` ### codex-rs/core/src/chat_completions.rs - Created: 2025-07-17 18:36:15 UTC | Link: https://github.com/openai/codex/pull/1601#discussion_r2214030972 ```diff @@ -136,7 +136,11 @@ pub(crate) async fn stream_chat_completions( Ok(resp) if resp.status().is_success() => { let (tx_event, rx_event) = mpsc::channel::<Result<ResponseEvent>>(1600); let stream = resp.bytes_stream().map_err(CodexErr::Reqwest); - tokio::spawn(process_chat_sse(stream, tx_event)); + tokio::spawn(process_chat_sse(

Seeing how ModelProviderInfo is already in scope, should we instead define stream_idle_timeout_ms, request_max_retries, and stream_max_retries on ModelProviderInfo instead so it is configurable for all model providers?

Created: 2025-07-17 21:15:52 UTC | Link: https://github.com/openai/codex/pull/1601#discussion_r2214329082

@@ -34,6 +33,7 @@ pub(crate) async fn stream_chat_completions( model: &str, client: &reqwest::Client, provider: &ModelProviderInfo, + _config: &Config,

We can remove this new arg now, right?

codex-rs/core/src/client.rs

Created: 2025-07-17 18:36:45 UTC | Link: https://github.com/openai/codex/pull/1601#discussion_r2214031715

@@ -29,8 +29,7 @@ use crate::config_types::ReasoningSummary as ReasoningSummaryConfig; use crate::error::CodexErr; use crate::error::Result; use crate::flags::CODEX_RS_SSE_FIXTURE; -use crate::flags::OPENAI_REQUEST_MAX_RETRIES; -use crate::flags::OPENAI_STREAM_IDLE_TIMEOUT_MS; +use crate::flags::OPENAI_STREAM_IDLE_TIMEOUT_MS; // retained for default config

drop comment?

Created: 2025-07-17 21:16:13 UTC | Link: https://github.com/openai/codex/pull/1601#discussion_r2214329457

@@ -64,6 +63,10 @@ impl ModelClient { } } + pub fn config(&self) -> &Config {

Is this needed or was this from an earlier rev of this PR?

Created: 2025-07-17 21:16:41 UTC | Link: https://github.com/openai/codex/pull/1601#discussion_r2214329986

@@ -404,7 +415,11 @@ async fn stream_from_fixture(path: impl AsRef<Path>) -> Result<ResponseStream> { let rdr = std::io::Cursor::new(content); let stream = ReaderStream::new(rdr).map_err(CodexErr::Io); - tokio::spawn(process_sse(stream, tx_event)); + tokio::spawn(process_sse( + stream, + tx_event, + *OPENAI_STREAM_IDLE_TIMEOUT_MS,

I thought we were passing everything through the provider?

Created: 2025-07-18 01:51:09 UTC | Link: https://github.com/openai/codex/pull/1601#discussion_r2214704090

@@ -198,6 +201,9 @@ impl ModelClient { } } } + pub fn get_provider(&self) -> ModelProviderInfo {

newline before? (I'm surprised just fmt doesn't do this...)

codex-rs/core/src/codex.rs

Created: 2025-07-18 01:52:51 UTC | Link: https://github.com/openai/codex/pull/1601#discussion_r2214705607

@@ -1026,12 +1025,14 @@ async fn run_turn( Err(CodexErr::Interrupted) => return Err(CodexErr::Interrupted), Err(CodexErr::EnvVar(var)) => return Err(CodexErr::EnvVar(var)), Err(e) => { - if retries < *OPENAI_STREAM_MAX_RETRIES { + // Use the configured provider-specific stream retry budget. + let max_retries = sess.client.get_provider().stream_max_retries(); + if retries < max_retries { retries += 1; let delay = backoff(retries); warn!( "stream disconnected - retrying turn ({retries}/{} in {delay:?})...", - *OPENAI_STREAM_MAX_RETRIES + max_retries

GitHub makes it hard to show this correctly...

"stream disconnected - retrying turn ({retries}/{max_retries} in {delay:?})...",
I thought the latest clippy flags this sort of thing, I should investigate...

Created: 2025-07-18 01:54:02 UTC | Link: https://github.com/openai/codex/pull/1601#discussion_r2214707438

@@ -1182,7 +1203,7 @@ async fn try_run_turn( } } } - Ok(output) + // unreachable: loop only exits via return statements above

can remove this comment; the compiler would flag any added code as unreachable, right?

Created: 2025-07-18 03:27:13 UTC | Link: https://github.com/openai/codex/pull/1601#discussion_r2214794394

@@ -1026,12 +1025,14 @@ async fn run_turn( Err(CodexErr::Interrupted) => return Err(CodexErr::Interrupted), Err(CodexErr::EnvVar(var)) => return Err(CodexErr::EnvVar(var)), Err(e) => { - if retries < *OPENAI_STREAM_MAX_RETRIES { + // Use the configured provider-specific stream retry budget. + let max_retries = sess.client.get_provider().stream_max_retries(); + if retries < max_retries { retries += 1; let delay = backoff(retries); warn!( "stream disconnected - retrying turn ({retries}/{} in {delay:?})...", - *OPENAI_STREAM_MAX_RETRIES + "stream disconnected - retrying turn ({retries}/{max_retries} in {delay:?})...",

remove the line above?

Created: 2025-07-18 03:27:51 UTC | Link: https://github.com/openai/codex/pull/1601#discussion_r2214794932

@@ -1026,12 +1025,14 @@ async fn run_turn( Err(CodexErr::Interrupted) => return Err(CodexErr::Interrupted), Err(CodexErr::EnvVar(var)) => return Err(CodexErr::EnvVar(var)), Err(e) => { - if retries < *OPENAI_STREAM_MAX_RETRIES { + // Use the configured provider-specific stream retry budget. + let max_retries = sess.client.get_provider().stream_max_retries(); + if retries < max_retries { retries += 1; let delay = backoff(retries); warn!( "stream disconnected - retrying turn ({retries}/{} in {delay:?})...", - *OPENAI_STREAM_MAX_RETRIES + "stream disconnected - retrying turn ({retries}/{max_retries} in {delay:?})...",

also curious if delay needs the :?

Created: 2025-07-18 04:18:06 UTC | Link: https://github.com/openai/codex/pull/1601#discussion_r2214855976

@@ -1123,7 +1123,28 @@ async fn try_run_turn( let mut stream = sess.client.clone().stream(&prompt).await?; let mut output = Vec::new(); - while let Some(Ok(event)) = stream.next().await { + loop { + // Poll the next item from the model stream. We must inspect *both* Ok and Err + // cases so that transient stream failures (e.g., dropped SSE connection before + // `response.completed`) bubble up and trigger the caller's retry logic. + let next = stream.next().await;

maybe name it event instead of just next?

Created: 2025-07-18 04:19:58 UTC | Link: https://github.com/openai/codex/pull/1601#discussion_r2214857529

@@ -1123,7 +1123,28 @@ async fn try_run_turn( let mut stream = sess.client.clone().stream(&prompt).await?; let mut output = Vec::new(); - while let Some(Ok(event)) = stream.next().await {

Hmm, so before, if it were None, we would just return whatever output we had thus far?

codex-rs/core/src/config.rs

Created: 2025-07-17 21:17:15 UTC | Link: https://github.com/openai/codex/pull/1601#discussion_r2214330656

@@ -722,6 +722,9 @@ disable_response_storage = true query_params: None, http_headers: None, env_http_headers: None, + openai_request_max_retries: None,

Maybe you want to update toml on line 671 to exercise the parsing?

codex-rs/core/src/model_provider_info.rs

Created: 2025-07-17 21:03:25 UTC | Link: https://github.com/openai/codex/pull/1601#discussion_r2214303905

@@ -64,6 +68,21 @@ pub struct ModelProviderInfo { /// value should be used. If the environment variable is not set, or the /// value is empty, the header will not be included in the request. pub env_http_headers: Option<HashMap<String, String>>, + + /// Maximum number of times to retry a failed HTTP request to this provider. + /// When `None`, falls back to the global default from `OPENAI_REQUEST_MAX_RETRIES` (currently 4). + #[serde(default)] + pub openai_request_max_retries: Option<u64>,

These should not have an openai_ prefix, but should be generally applicable to all providers, right?

Created: 2025-07-17 21:13:28 UTC | Link: https://github.com/openai/codex/pull/1601#discussion_r2214324589

@@ -205,6 +243,10 @@ pub fn built_in_model_providers() -> HashMap<String, ModelProviderInfo> { .into_iter() .collect(), ), + // Use global defaults for retry/timeout unless overridden in config.toml.

I think we should only be looking at the OPENAI_ environment variables for the built-in OpenAI provider, not all providers, right?

Created: 2025-07-17 21:17:37 UTC | Link: https://github.com/openai/codex/pull/1601#discussion_r2214331240

@@ -64,6 +68,21 @@ pub struct ModelProviderInfo { /// value should be used. If the environment variable is not set, or the /// value is empty, the header will not be included in the request. pub env_http_headers: Option<HashMap<String, String>>, + + /// Maximum number of times to retry a failed HTTP request to this provider. + /// When `None`, falls back to the global default from `OPENAI_REQUEST_MAX_RETRIES` (currently 4). + #[serde(default)]

This should be unnecessary for Optional?

Created: 2025-07-17 23:19:30 UTC | Link: https://github.com/openai/codex/pull/1601#discussion_r2214478234

@@ -205,6 +243,10 @@ pub fn built_in_model_providers() -> HashMap<String, ModelProviderInfo> { .into_iter() .collect(), ), + // Use global defaults for retry/timeout unless overridden in config.toml.

Taking a step back, I'm not sure we should be honoring OPENAI_STREAM_IDLE_TIMEOUT_MS, OPENAI_REQUEST_MAX_RETRIES, or OPENAI_STREAM_MAX_RETRIES at all. As best I can tell, these are not "standard" OpenAI environment variables, but ones that we made up for Codex?

I've been trying to maintain a consistency where the Config is the "one true way" to configure things, so supporting a small handful of environment variables confuses that.

Created: 2025-07-18 01:54:46 UTC | Link: https://github.com/openai/codex/pull/1601#discussion_r2214708144

@@ -161,6 +172,23 @@ impl ModelProviderInfo { None => Ok(None), } } + + /// Effective maximum number of request retries for this provider. + pub fn request_max_retries(&self) -> u64 { + self.request_max_retries.unwrap_or(4)

I think the default values should be their own constants at the top of the file.

Created: 2025-07-18 03:25:59 UTC | Link: https://github.com/openai/codex/pull/1601#discussion_r2214793461

@@ -9,13 +9,17 @@ use serde::Deserialize; use serde::Serialize; use std::collections::HashMap; use std::env::VarError; +use std::time::Duration; use crate::error::EnvVarError; use crate::openai_api_key::get_openai_api_key; /// Value for the `OpenAI-Originator` header that is sent with requests to /// OpenAI. const OPENAI_ORIGINATOR_HEADER: &str = "codex_cli_rs"; +const OPENAI_STREAM_IDLE_TIMEOUT_MS: u64 = 300_000; +const OPENAI_STREAM_MAX_RETRIES: u64 = 10; +const OPENAI_REQUEST_MAX_RETRIES: u64 = 4;

I think it's more appropriate for these to be:

const DEFAULT_STREAM_IDLE_TIMEOUT_MS: u64 = 300_000; const DEFAULT_STREAM_MAX_RETRIES: u64 = 10; const DEFAULT_REQUEST_MAX_RETRIES: u64 = 4;

codex-rs/core/tests/live_agent.rs

Created: 2025-07-17 21:18:00 UTC | Link: https://github.com/openai/codex/pull/1601#discussion_r2214331971

@@ -45,22 +45,9 @@ async fn spawn_codex() -> Result<Codex, CodexErr> { "OPENAI_API_KEY must be set for live tests" ); - // Environment tweaks to keep the tests snappy and inexpensive while still - // exercising retry/robustness logic. - // - // NOTE: Starting with the 2024 edition `std::env::set_var` is `unsafe` - // because changing the process environment races with any other threads - // that might be performing environment look-ups at the same time. - // Restrict the unsafety to this tiny block that happens at the very - // beginning of the test, before we spawn any background tasks that could - // observe the environment. - unsafe { - std::env::set_var("OPENAI_REQUEST_MAX_RETRIES", "2"); - std::env::set_var("OPENAI_STREAM_MAX_RETRIES", "2"); - } - let codex_home = TempDir::new().unwrap(); let config = load_default_config_for_test(&codex_home); + // network retry/timeout tuning moved into ModelProviderInfo; using defaults

remove comment

Created: 2025-07-18 00:37:26 UTC | Link: https://github.com/openai/codex/pull/1601#discussion_r2214586752

@@ -45,20 +45,6 @@ async fn spawn_codex() -> Result<Codex, CodexErr> { "OPENAI_API_KEY must be set for live tests" ); - // Environment tweaks to keep the tests snappy and inexpensive while still - // exercising retry/robustness logic. - // - // NOTE: Starting with the 2024 edition `std::env::set_var` is `unsafe` - // because changing the process environment races with any other threads - // that might be performing environment look-ups at the same time. - // Restrict the unsafety to this tiny block that happens at the very - // beginning of the test, before we spawn any background tasks that could - // observe the environment. - unsafe {

In these tests where you are deleting unsafe, I would expect there to be changes to a Config to set the corresponding values. Am I missing something?

codex-rs/core/tests/previous_response_id.rs

Created: 2025-07-17 21:18:12 UTC | Link: https://github.com/openai/codex/pull/1601#discussion_r2214332286

@@ -88,13 +88,8 @@ async fn keeps_previous_response_id_between_tasks() { .mount(&server) .await; - // Environment - // Update environment – `set_var` is `unsafe` starting with the 2024 - // edition so we group the calls into a single `unsafe { … }` block. - unsafe { - std::env::set_var("OPENAI_REQUEST_MAX_RETRIES", "0"); - std::env::set_var("OPENAI_STREAM_MAX_RETRIES", "0"); - } + // Configure retry behaviour explicitly to avoid mutating process-wide

// Configure retry behavior explicitly to avoid mutating process-wide

codex-rs/core/tests/stream_no_completed.rs

Created: 2025-07-17 21:18:24 UTC | Link: https://github.com/openai/codex/pull/1601#discussion_r2214332747

@@ -72,19 +72,8 @@ async fn retries_on_early_close() { .mount(&server) .await; - // Environment - // - // As of Rust 2024 `std::env::set_var` has been made `unsafe` because - // mutating the process environment is inherently racy when other threads - // are running. We therefore have to wrap every call in an explicit - // `unsafe` block. These are limited to the test-setup section so the - // scope is very small and clearly delineated. - - unsafe { - std::env::set_var("OPENAI_REQUEST_MAX_RETRIES", "0"); - std::env::set_var("OPENAI_STREAM_MAX_RETRIES", "1"); - std::env::set_var("OPENAI_STREAM_IDLE_TIMEOUT_MS", "2000"); - } + // Configure retry behaviour explicitly to avoid mutating process-wide

// Configure retry behavior explicitly to avoid mutating process-wide

52 KiB Raw Blame History Unescape Escape

PR #1601: Refactor env settings into config

Description

Summary

Testing

Full Diff

model_provider

hide_agent_reasoning

codex-rs/core/src/client.rs

codex-rs/core/src/codex.rs

codex-rs/core/src/config.rs

codex-rs/core/src/model_provider_info.rs

codex-rs/core/tests/live_agent.rs

codex-rs/core/tests/previous_response_id.rs

codex-rs/core/tests/stream_no_completed.rs

52 KiB

Raw Blame History