Files
codex/prs/bolinfest/PR-1601.md
2025-09-02 15:17:45 -07:00

52 KiB
Raw Blame History

PR #1601: Refactor env settings into config

Description

Summary

  • add OpenAI retry and timeout fields to Config
  • inject these settings in tests instead of mutating env vars
  • plumb Config values through client and chat completions logic
  • document new configuration options

Testing

  • cargo test -p codex-core --no-run

https://chatgpt.com/codex/tasks/task_i_68792c5b04cc832195c03050c8b6ea94

Full Diff

diff --git a/codex-rs/config.md b/codex-rs/config.md
index 438b7e767d..3d38ded1a5 100644
--- a/codex-rs/config.md
+++ b/codex-rs/config.md
@@ -92,6 +92,32 @@ http_headers = { "X-Example-Header" = "example-value" }
 env_http_headers = { "X-Example-Features": "EXAMPLE_FEATURES" }

+### Per-provider network tuning + +The following optional settings control retry behaviour and streaming idle timeouts per model provider. They must be specified inside the corresponding [model_providers.<id>] block in config.toml. (Older releases accepted toplevel keys; those are now ignored.) + +Example: + +toml +[model_providers.openai] +name = "OpenAI" +base_url = "https://api.openai.com/v1" +env_key = "OPENAI_API_KEY" +# network tuning overrides (all optional; falls back to builtin defaults) +request_max_retries = 4 # retry failed HTTP requests +stream_max_retries = 10 # retry dropped SSE streams +stream_idle_timeout_ms = 300000 # 5m idle timeout + + +#### request_max_retries +How many times Codex will retry a failed HTTP request to the model provider. Defaults to 4. + +#### stream_max_retries +Number of times Codex will attempt to reconnect when a streaming response is interrupted. Defaults to 10. + +#### stream_idle_timeout_ms +How long Codex will wait for activity on a streaming response before treating the connection as lost. Defaults to 300_000 (5 minutes). +

model_provider

Identifies which provider to use from the model_providers map. Defaults to "openai". You can override the base_url for the built-in openai provider via the OPENAI_BASE_URL environment variable. @@ -444,7 +470,7 @@ Currently, "vscode" is the default, though Codex does not verify VS Code is in

hide_agent_reasoning

-Codex intermittently emits "reasoning" events that show the models internal "thinking" before it produces a final answer. Some users may find these events distracting, especially in CI logs or minimal terminal output. +Codex intermittently emits "reasoning" events that show the model's internal "thinking" before it produces a final answer. Some users may find these events distracting, especially in CI logs or minimal terminal output.

Setting hide_agent_reasoning to true suppresses these events in both the TUI as well as the headless exec sub-command:

diff --git a/codex-rs/core/src/chat_completions.rs b/codex-rs/core/src/chat_completions.rs index ad7b55952a..35045c8e1b 100644 --- a/codex-rs/core/src/chat_completions.rs +++ b/codex-rs/core/src/chat_completions.rs @@ -21,8 +21,6 @@ use crate::client_common::ResponseEvent; use crate::client_common::ResponseStream; use crate::error::CodexErr; use crate::error::Result; -use crate::flags::OPENAI_REQUEST_MAX_RETRIES; -use crate::flags::OPENAI_STREAM_IDLE_TIMEOUT_MS; use crate::models::ContentItem; use crate::models::ResponseItem; use crate::openai_tools::create_tools_json_for_chat_completions_api; @@ -121,6 +119,7 @@ pub(crate) async fn stream_chat_completions( );

 let mut attempt = 0;
  • let max_retries = provider.request_max_retries(); loop { attempt += 1;

@@ -136,7 +135,11 @@ pub(crate) async fn stream_chat_completions( Ok(resp) if resp.status().is_success() => { let (tx_event, rx_event) = mpsc::channel::<Result>(1600); let stream = resp.bytes_stream().map_err(CodexErr::Reqwest);

  •            tokio::spawn(process_chat_sse(stream, tx_event));
    
  •            tokio::spawn(process_chat_sse(
    
  •                stream,
    
  •                tx_event,
    
  •                provider.stream_idle_timeout(),
    
  •            ));
               return Ok(ResponseStream { rx_event });
           }
           Ok(res) => {
    

@@ -146,7 +149,7 @@ pub(crate) async fn stream_chat_completions( return Err(CodexErr::UnexpectedStatus(status, body)); }

  •            if attempt > *OPENAI_REQUEST_MAX_RETRIES {
    
  •            if attempt > max_retries {
                   return Err(CodexErr::RetryLimit(status));
               }
    

@@ -162,7 +165,7 @@ pub(crate) async fn stream_chat_completions( tokio::time::sleep(delay).await; } Err(e) => {

  •            if attempt > *OPENAI_REQUEST_MAX_RETRIES {
    
  •            if attempt > max_retries {
                   return Err(e.into());
               }
               let delay = backoff(attempt);
    

@@ -175,14 +178,15 @@ pub(crate) async fn stream_chat_completions( /// Lightweight SSE processor for the Chat Completions streaming format. The /// output is mapped onto Codex's internal [ResponseEvent] so that the rest /// of the pipeline can stay agnostic of the underlying wire format. -async fn process_chat_sse(stream: S, tx_event: mpsc::Sender<Result>) -where +async fn process_chat_sse(

  • stream: S,
  • tx_event: mpsc::Sender<Result>,
  • idle_timeout: Duration, +) where S: Stream<Item = Result> + Unpin, { let mut stream = stream.eventsource();
  • let idle_timeout = *OPENAI_STREAM_IDLE_TIMEOUT_MS;
  • // State to accumulate a function call across streaming chunks. // OpenAI may split the arguments string over multiple delta events // until the chunk whose finish_reason is tool_calls is emitted. We diff --git a/codex-rs/core/src/client.rs b/codex-rs/core/src/client.rs index ae7904b8ff..62fcabe05b 100644 --- a/codex-rs/core/src/client.rs +++ b/codex-rs/core/src/client.rs @@ -30,8 +30,6 @@ use crate::config_types::ReasoningSummary as ReasoningSummaryConfig; use crate::error::CodexErr; use crate::error::Result; use crate::flags::CODEX_RS_SSE_FIXTURE; -use crate::flags::OPENAI_REQUEST_MAX_RETRIES; -use crate::flags::OPENAI_STREAM_IDLE_TIMEOUT_MS; use crate::model_provider_info::ModelProviderInfo; use crate::model_provider_info::WireApi; use crate::models::ResponseItem; @@ -113,7 +111,7 @@ impl ModelClient { if let Some(path) = &*CODEX_RS_SSE_FIXTURE { // short circuit for tests warn!(path, "Streaming from fixture");
  •        return stream_from_fixture(path).await;
    
  •        return stream_from_fixture(path, self.provider.clone()).await;
       }
    
       let full_instructions = prompt.get_full_instructions(&self.config.model);
    

@@ -140,6 +138,7 @@ impl ModelClient { );

     let mut attempt = 0;
  •    let max_retries = self.provider.request_max_retries();
       loop {
           attempt += 1;
    

@@ -158,7 +157,11 @@ impl ModelClient {

                 // spawn task to process SSE
                 let stream = resp.bytes_stream().map_err(CodexErr::Reqwest);
  •                tokio::spawn(process_sse(stream, tx_event));
    
  •                tokio::spawn(process_sse(
    
  •                    stream,
    
  •                    tx_event,
    
  •                    self.provider.stream_idle_timeout(),
    
  •                ));
    
                   return Ok(ResponseStream { rx_event });
               }
    

@@ -177,7 +180,7 @@ impl ModelClient { return Err(CodexErr::UnexpectedStatus(status, body)); }

  •                if attempt > *OPENAI_REQUEST_MAX_RETRIES {
    
  •                if attempt > max_retries {
                       return Err(CodexErr::RetryLimit(status));
                   }
    

@@ -194,7 +197,7 @@ impl ModelClient { tokio::time::sleep(delay).await; } Err(e) => {

  •                if attempt > *OPENAI_REQUEST_MAX_RETRIES {
    
  •                if attempt > max_retries {
                       return Err(e.into());
                   }
                   let delay = backoff(attempt);
    

@@ -203,6 +206,10 @@ impl ModelClient { } } } +

  • pub fn get_provider(&self) -> ModelProviderInfo {
  •    self.provider.clone()
    
  • } }

#[derive(Debug, Deserialize, Serialize)] @@ -254,14 +261,16 @@ struct ResponseCompletedOutputTokensDetails { reasoning_tokens: u64, }

-async fn process_sse(stream: S, tx_event: mpsc::Sender<Result>) -where +async fn process_sse(

  • stream: S,

  • tx_event: mpsc::Sender<Result>,

  • idle_timeout: Duration, +) where S: Stream<Item = Result> + Unpin, { let mut stream = stream.eventsource();

    // If the stream stays completely silent for an extended period treat it as disconnected.

  • let idle_timeout = *OPENAI_STREAM_IDLE_TIMEOUT_MS; // The response id returned from the "complete" message. let mut response_completed: Option = None;

@@ -322,7 +331,7 @@ where // duplicated output array embedded in the response.completed // payload. That produced two concrete issues: // 1. No realtime streaming the user only saw output after the

  •        //      entire turn had finished, which broke the “typing” UX and
    
  •        //      entire turn had finished, which broke the "typing" UX and
           //      made longrunning turns look stalled.
           //   2. Duplicate `function_call_output` items  both the
           //      individual *and* the completed array were forwarded, which
    

@@ -395,7 +404,10 @@ where }

/// used in tests to stream from a text SSE file -async fn stream_from_fixture(path: impl AsRef) -> Result { +async fn stream_from_fixture(

  • path: impl AsRef,

  • provider: ModelProviderInfo, +) -> Result { let (tx_event, rx_event) = mpsc::channel::<Result>(1600); let f = std::fs::File::open(path.as_ref())?; let lines = std::io::BufReader::new(f).lines(); @@ -409,7 +421,11 @@ async fn stream_from_fixture(path: impl AsRef) -> Result {

    let rdr = std::io::Cursor::new(content); let stream = ReaderStream::new(rdr).map_err(CodexErr::Io);

  • tokio::spawn(process_sse(stream, tx_event));
  • tokio::spawn(process_sse(
  •    stream,
    
  •    tx_event,
    
  •    provider.stream_idle_timeout(),
    
  • )); Ok(ResponseStream { rx_event }) }

@@ -429,7 +445,10 @@ mod tests {

 /// Runs the SSE parser on pre-chunked byte slices and returns every event
 /// (including any final `Err` from a stream-closure check).
  • async fn collect_events(chunks: &[&[u8]]) -> Vec<Result> {
  • async fn collect_events(
  •    chunks: &[&[u8]],
    
  •    provider: ModelProviderInfo,
    
  • ) -> Vec<Result> { let mut builder = IoBuilder::new(); for chunk in chunks { builder.read(chunk); @@ -438,7 +457,7 @@ mod tests { let reader = builder.build(); let stream = ReaderStream::new(reader).map_err(CodexErr::Io); let (tx, mut rx) = mpsc::channel::<Result>(16);
  •    tokio::spawn(process_sse(stream, tx));
    
  •    tokio::spawn(process_sse(stream, tx, provider.stream_idle_timeout()));
    
       let mut events = Vec::new();
       while let Some(ev) = rx.recv().await {
    

@@ -449,7 +468,10 @@ mod tests {

 /// Builds an in-memory SSE stream from JSON fixtures and returns only the
 /// successfully parsed events (panics on internal channel errors).
  • async fn run_sse(events: Vec<serde_json::Value>) -> Vec {
  • async fn run_sse(

  •    events: Vec<serde_json::Value>,
    
  •    provider: ModelProviderInfo,
    
  • ) -> Vec { let mut body = String::new(); for e in events { let kind = e @@ -465,7 +487,7 @@ mod tests {

    let (tx, mut rx) = mpsc::channel::<Result<ResponseEvent>>(8);
    let stream = ReaderStream::new(std::io::Cursor::new(body)).map_err(CodexErr::Io);
    
  •    tokio::spawn(process_sse(stream, tx));
    
  •    tokio::spawn(process_sse(stream, tx, provider.stream_idle_timeout()));
    
       let mut out = Vec::new();
       while let Some(ev) = rx.recv().await {
    

@@ -510,7 +532,25 @@ mod tests { let sse2 = format!("event: response.output_item.done\ndata: {item2}\n\n"); let sse3 = format!("event: response.completed\ndata: {completed}\n\n");

  •    let events = collect_events(&[sse1.as_bytes(), sse2.as_bytes(), sse3.as_bytes()]).await;
    
  •    let provider = ModelProviderInfo {
    
  •        name: "test".to_string(),
    
  •        base_url: "https://test.com".to_string(),
    
  •        env_key: Some("TEST_API_KEY".to_string()),
    
  •        env_key_instructions: None,
    
  •        wire_api: WireApi::Responses,
    
  •        query_params: None,
    
  •        http_headers: None,
    
  •        env_http_headers: None,
    
  •        request_max_retries: Some(0),
    
  •        stream_max_retries: Some(0),
    
  •        stream_idle_timeout_ms: Some(1000),
    
  •    };
    
  •    let events = collect_events(
    
  •        &[sse1.as_bytes(), sse2.as_bytes(), sse3.as_bytes()],
    
  •        provider,
    
  •    )
    
  •    .await;
    
       assert_eq!(events.len(), 3);
    

@@ -551,8 +591,21 @@ mod tests { .to_string();

     let sse1 = format!("event: response.output_item.done\ndata: {item1}\n\n");
  •    let provider = ModelProviderInfo {
    
  •        name: "test".to_string(),
    
  •        base_url: "https://test.com".to_string(),
    
  •        env_key: Some("TEST_API_KEY".to_string()),
    
  •        env_key_instructions: None,
    
  •        wire_api: WireApi::Responses,
    
  •        query_params: None,
    
  •        http_headers: None,
    
  •        env_http_headers: None,
    
  •        request_max_retries: Some(0),
    
  •        stream_max_retries: Some(0),
    
  •        stream_idle_timeout_ms: Some(1000),
    
  •    };
    
  •    let events = collect_events(&[sse1.as_bytes()]).await;
    
  •    let events = collect_events(&[sse1.as_bytes()], provider).await;
    
       assert_eq!(events.len(), 2);
    

@@ -640,7 +693,21 @@ mod tests { let mut evs = vec![case.event]; evs.push(completed.clone());

  •        let out = run_sse(evs).await;
    
  •        let provider = ModelProviderInfo {
    
  •            name: "test".to_string(),
    
  •            base_url: "https://test.com".to_string(),
    
  •            env_key: Some("TEST_API_KEY".to_string()),
    
  •            env_key_instructions: None,
    
  •            wire_api: WireApi::Responses,
    
  •            query_params: None,
    
  •            http_headers: None,
    
  •            env_http_headers: None,
    
  •            request_max_retries: Some(0),
    
  •            stream_max_retries: Some(0),
    
  •            stream_idle_timeout_ms: Some(1000),
    
  •        };
    
  •        let out = run_sse(evs, provider).await;
           assert_eq!(out.len(), case.expected_len, "case {}", case.name);
           assert!(
               (case.expect_first)(&out[0]),
    

diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs index 246198c006..df1ffad50d 100644 --- a/codex-rs/core/src/codex.rs +++ b/codex-rs/core/src/codex.rs @@ -49,7 +49,6 @@ use crate::exec::ExecToolCallOutput; use crate::exec::SandboxType; use crate::exec::process_exec_tool_call; use crate::exec_env::create_env; -use crate::flags::OPENAI_STREAM_MAX_RETRIES; use crate::mcp_connection_manager::McpConnectionManager; use crate::mcp_tool_call::handle_mcp_tool_call; use crate::models::ContentItem; @@ -1027,12 +1026,13 @@ async fn run_turn( Err(CodexErr::Interrupted) => return Err(CodexErr::Interrupted), Err(CodexErr::EnvVar(var)) => return Err(CodexErr::EnvVar(var)), Err(e) => {

  •            if retries < *OPENAI_STREAM_MAX_RETRIES {
    
  •            // Use the configured provider-specific stream retry budget.
    
  •            let max_retries = sess.client.get_provider().stream_max_retries();
    
  •            if retries < max_retries {
                   retries += 1;
                   let delay = backoff(retries);
                   warn!(
    
  •                    "stream disconnected - retrying turn ({retries}/{} in {delay:?})...",
    
  •                    *OPENAI_STREAM_MAX_RETRIES
    
  •                    "stream disconnected - retrying turn ({retries}/{max_retries} in {delay:?})...",
                   );
    
                   // Surface retry information to any UI/frontend so the
    

@@ -1041,8 +1041,7 @@ async fn run_turn( sess.notify_background_event( &sub_id, format!(

  •                        "stream error: {e}; retrying {retries}/{} in {:?}…",
    
  •                        *OPENAI_STREAM_MAX_RETRIES, delay
    
  •                        "stream error: {e}; retrying {retries}/{max_retries} in {delay:?}…"
                       ),
                   )
                   .await;
    

@@ -1124,7 +1123,28 @@ async fn try_run_turn( let mut stream = sess.client.clone().stream(&prompt).await?;

 let mut output = Vec::new();
  • while let Some(Ok(event)) = stream.next().await {
  • loop {
  •    // Poll the next item from the model stream. We must inspect *both* Ok and Err
    
  •    // cases so that transient stream failures (e.g., dropped SSE connection before
    
  •    // `response.completed`) bubble up and trigger the caller's retry logic.
    
  •    let event = stream.next().await;
    
  •    let Some(event) = event else {
    
  •        // Channel closed without yielding a final Completed event or explicit error.
    
  •        // Treat as a disconnected stream so the caller can retry.
    
  •        return Err(CodexErr::Stream(
    
  •            "stream closed before response.completed".into(),
    
  •        ));
    
  •    };
    
  •    let event = match event {
    
  •        Ok(ev) => ev,
    
  •        Err(e) => {
    
  •            // Propagate the underlying stream error to the caller (run_turn), which
    
  •            // will apply the configured `stream_max_retries` policy.
    
  •            return Err(e);
    
  •        }
    
  •    };
    
  •    match event {
           ResponseEvent::Created => {
               let mut state = sess.state.lock().unwrap();
    

@@ -1165,7 +1185,7 @@ async fn try_run_turn(

             let mut state = sess.state.lock().unwrap();
             state.previous_response_id = Some(response_id);
  •            break;
    
  •            return Ok(output);
           }
           ResponseEvent::OutputTextDelta(delta) => {
               let event = Event {
    

@@ -1183,7 +1203,6 @@ async fn try_run_turn( } } }

  • Ok(output) }

async fn handle_response_item( diff --git a/codex-rs/core/src/config.rs b/codex-rs/core/src/config.rs index d67e692fc8..d5b2845398 100644 --- a/codex-rs/core/src/config.rs +++ b/codex-rs/core/src/config.rs @@ -682,6 +682,9 @@ name = "OpenAI using Chat Completions" base_url = "https://api.openai.com/v1" env_key = "OPENAI_API_KEY" wire_api = "chat" +request_max_retries = 4 # retry failed HTTP requests +stream_max_retries = 10 # retry dropped SSE streams +stream_idle_timeout_ms = 300000 # 5m idle timeout

[profiles.o3] model = "o3" @@ -722,6 +725,9 @@ disable_response_storage = true query_params: None, http_headers: None, env_http_headers: None,

  •        request_max_retries: Some(4),
    
  •        stream_max_retries: Some(10),
    
  •        stream_idle_timeout_ms: Some(300_000),
       };
       let model_provider_map = {
           let mut model_provider_map = built_in_model_providers();
    

diff --git a/codex-rs/core/src/flags.rs b/codex-rs/core/src/flags.rs index c21ef67026..c150405491 100644 --- a/codex-rs/core/src/flags.rs +++ b/codex-rs/core/src/flags.rs @@ -11,14 +11,6 @@ env_flags! { pub OPENAI_TIMEOUT_MS: Duration = Duration::from_millis(300_000), |value| { value.parse().map(Duration::from_millis) };

  • pub OPENAI_REQUEST_MAX_RETRIES: u64 = 4;

  • pub OPENAI_STREAM_MAX_RETRIES: u64 = 10;

  • // We generally don't want to disconnect; this updates the timeout to be five minutes

  • // which matches the upstream typescript codex impl.

  • pub OPENAI_STREAM_IDLE_TIMEOUT_MS: Duration = Duration::from_millis(300_000), |value| {

  •    value.parse().map(Duration::from_millis)
    
  • };

    /// Fixture path for offline tests (see client.rs). pub CODEX_RS_SSE_FIXTURE: Option<&str> = None; diff --git a/codex-rs/core/src/model_provider_info.rs b/codex-rs/core/src/model_provider_info.rs index b38c912d34..72ef58c60a 100644 --- a/codex-rs/core/src/model_provider_info.rs +++ b/codex-rs/core/src/model_provider_info.rs @@ -9,6 +9,7 @@ use serde::Deserialize; use serde::Serialize; use std::collections::HashMap; use std::env::VarError; +use std::time::Duration;

use crate::error::EnvVarError; use crate::openai_api_key::get_openai_api_key; @@ -16,6 +17,9 @@ use crate::openai_api_key::get_openai_api_key; /// Value for the OpenAI-Originator header that is sent with requests to /// OpenAI. const OPENAI_ORIGINATOR_HEADER: &str = "codex_cli_rs"; +const DEFAULT_STREAM_IDLE_TIMEOUT_MS: u64 = 300_000; +const DEFAULT_STREAM_MAX_RETRIES: u64 = 10; +const DEFAULT_REQUEST_MAX_RETRIES: u64 = 4;

/// Wire protocol that the provider speaks. Most third-party services only /// implement the classic OpenAI Chat Completions JSON schema, whereas OpenAI @@ -26,7 +30,7 @@ const OPENAI_ORIGINATOR_HEADER: &str = "codex_cli_rs"; #[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)] #[serde(rename_all = "lowercase")] pub enum WireApi {

  • /// The experimental “Responses” API exposed by OpenAI at /v1/responses.
  • /// The experimental "Responses" API exposed by OpenAI at /v1/responses. Responses,

    /// Regular Chat Completions compatible with /v1/chat/completions. @@ -64,6 +68,16 @@ pub struct ModelProviderInfo { /// value should be used. If the environment variable is not set, or the /// value is empty, the header will not be included in the request. pub env_http_headers: Option<HashMap<String, String>>,

  • /// Maximum number of times to retry a failed HTTP request to this provider.

  • pub request_max_retries: Option,

  • /// Number of times to retry reconnecting a dropped streaming response before failing.

  • pub stream_max_retries: Option,

  • /// Idle timeout (in milliseconds) to wait for activity on a streaming response before treating

  • /// the connection as lost.

  • pub stream_idle_timeout_ms: Option, }

impl ModelProviderInfo { @@ -161,6 +175,25 @@ impl ModelProviderInfo { None => Ok(None), } } +

  • /// Effective maximum number of request retries for this provider.
  • pub fn request_max_retries(&self) -> u64 {
  •    self.request_max_retries
    
  •        .unwrap_or(DEFAULT_REQUEST_MAX_RETRIES)
    
  • }
  • /// Effective maximum number of stream reconnection attempts for this provider.
  • pub fn stream_max_retries(&self) -> u64 {
  •    self.stream_max_retries
    
  •        .unwrap_or(DEFAULT_STREAM_MAX_RETRIES)
    
  • }
  • /// Effective idle timeout for streaming responses.
  • pub fn stream_idle_timeout(&self) -> Duration {
  •    self.stream_idle_timeout_ms
    
  •        .map(Duration::from_millis)
    
  •        .unwrap_or(Duration::from_millis(DEFAULT_STREAM_IDLE_TIMEOUT_MS))
    
  • } }

/// Built-in default provider list. @@ -205,6 +238,10 @@ pub fn built_in_model_providers() -> HashMap<String, ModelProviderInfo> { .into_iter() .collect(), ),

  •            // Use global defaults for retry/timeout unless overridden in config.toml.
    
  •            request_max_retries: None,
    
  •            stream_max_retries: None,
    
  •            stream_idle_timeout_ms: None,
           },
       ),
    
    ] @@ -234,6 +271,9 @@ base_url = "http://localhost:11434/v1" query_params: None, http_headers: None, env_http_headers: None,
  •        request_max_retries: None,
    
  •        stream_max_retries: None,
    
  •        stream_idle_timeout_ms: None,
       };
    
       let provider: ModelProviderInfo = toml::from_str(azure_provider_toml).unwrap();
    

@@ -259,6 +299,9 @@ query_params = { api-version = "2025-04-01-preview" } }), http_headers: None, env_http_headers: None,

  •        request_max_retries: None,
    
  •        stream_max_retries: None,
    
  •        stream_idle_timeout_ms: None,
       };
    
       let provider: ModelProviderInfo = toml::from_str(azure_provider_toml).unwrap();
    

@@ -287,6 +330,9 @@ env_http_headers = { "X-Example-Env-Header" = "EXAMPLE_ENV_VAR" } env_http_headers: Some(maplit::hashmap! { "X-Example-Env-Header".to_string() => "EXAMPLE_ENV_VAR".to_string(), }),

  •        request_max_retries: None,
    
  •        stream_max_retries: None,
    
  •        stream_idle_timeout_ms: None,
       };
    
       let provider: ModelProviderInfo = toml::from_str(azure_provider_toml).unwrap();
    

diff --git a/codex-rs/core/tests/cli_stream.rs b/codex-rs/core/tests/cli_stream.rs index 3669b93f51..23ee0a3cbc 100644 --- a/codex-rs/core/tests/cli_stream.rs +++ b/codex-rs/core/tests/cli_stream.rs @@ -173,7 +173,7 @@ async fn integration_creates_and_checks_session_file() { // 5. Sessions are written asynchronously; wait briefly for the directory to appear. let sessions_dir = home.path().join("sessions"); let start = Instant::now();

  • while !sessions_dir.exists() && start.elapsed() < Duration::from_secs(2) {
  • while !sessions_dir.exists() && start.elapsed() < Duration::from_secs(3) { std::thread::sleep(Duration::from_millis(50)); }

diff --git a/codex-rs/core/tests/client.rs b/codex-rs/core/tests/client.rs index f4fb58f5a4..964710b83f 100644 --- a/codex-rs/core/tests/client.rs +++ b/codex-rs/core/tests/client.rs @@ -49,13 +49,6 @@ async fn includes_session_id_and_model_headers_in_request() { .mount(&server) .await;

  • // Environment
  • // Update environment set_var is unsafe starting with the 2024
  • // edition so we group the calls into a single unsafe { … } block.
  • unsafe {
  •    std::env::set_var("OPENAI_REQUEST_MAX_RETRIES", "0");
    
  •    std::env::set_var("OPENAI_STREAM_MAX_RETRIES", "0");
    
  • } let model_provider = ModelProviderInfo { name: "openai".into(), base_url: format!("{}/v1", server.uri()), @@ -72,6 +65,9 @@ async fn includes_session_id_and_model_headers_in_request() { .collect(), ), env_http_headers: None,
  •    request_max_retries: Some(0),
    
  •    stream_max_retries: Some(0),
    
  •    stream_idle_timeout_ms: None,
    

    };

    // Init session diff --git a/codex-rs/core/tests/live_agent.rs b/codex-rs/core/tests/live_agent.rs index c21f9d0032..26a5539dd7 100644 --- a/codex-rs/core/tests/live_agent.rs +++ b/codex-rs/core/tests/live_agent.rs @@ -45,22 +45,10 @@ async fn spawn_codex() -> Result<Codex, CodexErr> { "OPENAI_API_KEY must be set for live tests" );

  • // Environment tweaks to keep the tests snappy and inexpensive while still
  • // exercising retry/robustness logic.
  • //
  • // NOTE: Starting with the 2024 edition std::env::set_var is unsafe
  • // because changing the process environment races with any other threads
  • // that might be performing environment look-ups at the same time.
  • // Restrict the unsafety to this tiny block that happens at the very
  • // beginning of the test, before we spawn any background tasks that could
  • // observe the environment.
  • unsafe {
  •    std::env::set_var("OPENAI_REQUEST_MAX_RETRIES", "2");
    
  •    std::env::set_var("OPENAI_STREAM_MAX_RETRIES", "2");
    
  • }
  • let codex_home = TempDir::new().unwrap();
  • let config = load_default_config_for_test(&codex_home);
  • let mut config = load_default_config_for_test(&codex_home);

  • config.model_provider.request_max_retries = Some(2);

  • config.model_provider.stream_max_retries = Some(2); let (agent, _init_id) = Codex::spawn(config, std::sync::Arc::new(Notify::new())).await?;

    Ok(agent) @@ -79,7 +67,7 @@ async fn live_streaming_and_prev_id_reset() {

    let codex = spawn_codex().await.unwrap();

  • // ---------- Task 1 ----------
  • // ---------- Task 1 ---------- codex .submit(Op::UserInput { items: vec![InputItem::Text { @@ -113,7 +101,7 @@ async fn live_streaming_and_prev_id_reset() { "Agent did not stream any AgentMessage before TaskComplete" );
  • // ---------- Task 2 (same session) ----------
  • // ---------- Task 2 (same session) ---------- codex .submit(Op::UserInput { items: vec![InputItem::Text { diff --git a/codex-rs/core/tests/previous_response_id.rs b/codex-rs/core/tests/previous_response_id.rs index e64271a0ff..9630cc1028 100644 --- a/codex-rs/core/tests/previous_response_id.rs +++ b/codex-rs/core/tests/previous_response_id.rs @@ -88,13 +88,8 @@ async fn keeps_previous_response_id_between_tasks() { .mount(&server) .await;
  • // Environment
  • // Update environment set_var is unsafe starting with the 2024
  • // edition so we group the calls into a single unsafe { … } block.
  • unsafe {
  •    std::env::set_var("OPENAI_REQUEST_MAX_RETRIES", "0");
    
  •    std::env::set_var("OPENAI_STREAM_MAX_RETRIES", "0");
    
  • }
  • // Configure retry behavior explicitly to avoid mutating process-wide

  • // environment variables. let model_provider = ModelProviderInfo { name: "openai".into(), base_url: format!("{}/v1", server.uri()), @@ -107,6 +102,10 @@ async fn keeps_previous_response_id_between_tasks() { query_params: None, http_headers: None, env_http_headers: None,

  •    // disable retries so we don't get duplicate calls in this test
    
  •    request_max_retries: Some(0),
    
  •    stream_max_retries: Some(0),
    
  •    stream_idle_timeout_ms: None,
    

    };

    // Init session diff --git a/codex-rs/core/tests/stream_no_completed.rs b/codex-rs/core/tests/stream_no_completed.rs index 8883eff373..f2de5de188 100644 --- a/codex-rs/core/tests/stream_no_completed.rs +++ b/codex-rs/core/tests/stream_no_completed.rs @@ -32,8 +32,6 @@ fn sse_completed(id: &str) -> String { }

#[tokio::test(flavor = "multi_thread", worker_threads = 2)] -// this test is flaky (has race conditions), so we ignore it for now -#[ignore] async fn retries_on_early_close() { #![allow(clippy::unwrap_used)]

@@ -72,19 +70,8 @@ async fn retries_on_early_close() { .mount(&server) .await;

  • // Environment
  • //
  • // As of Rust 2024 std::env::set_var has been made unsafe because
  • // mutating the process environment is inherently racy when other threads
  • // are running. We therefore have to wrap every call in an explicit
  • // unsafe block. These are limited to the test-setup section so the
  • // scope is very small and clearly delineated.
  • unsafe {
  •    std::env::set_var("OPENAI_REQUEST_MAX_RETRIES", "0");
    
  •    std::env::set_var("OPENAI_STREAM_MAX_RETRIES", "1");
    
  •    std::env::set_var("OPENAI_STREAM_IDLE_TIMEOUT_MS", "2000");
    
  • }
  • // Configure retry behavior explicitly to avoid mutating process-wide

  • // environment variables.

    let model_provider = ModelProviderInfo { name: "openai".into(), @@ -98,6 +85,10 @@ async fn retries_on_early_close() { query_params: None, http_headers: None, env_http_headers: None,

  •    // exercise retry path: first attempt yields incomplete stream, so allow 1 retry
    
  •    request_max_retries: Some(0),
    
  •    stream_max_retries: Some(1),
    
  •    stream_idle_timeout_ms: Some(2000),
    

    };

    let ctrl_c = std::sync::Arc::new(tokio::sync::Notify::new());


## Review Comments

### codex-rs/config.md

- Created: 2025-07-17 21:01:49 UTC | Link: https://github.com/openai/codex/pull/1601#discussion_r2214299757

```diff
@@ -466,7 +466,33 @@ This is analogous to `model_context_window`, but for the maximum number of outpu
 
 Maximum number of bytes to read from an `AGENTS.md` file to include in the instructions sent with the first turn of a session. Defaults to 32 KiB.
 
-## tui
+## Per-provider network tuning
+
+The following optional settings control retry behaviour and streaming idle timeouts **per model provider**. They must be specified inside the corresponding `[model_providers.<id>]` block in `config.toml`. (Older releases accepted toplevel keys; those are now ignored.)
+
+Example:
+
+```toml
+[model_providers.openai]
+name = "OpenAI"
+base_url = "https://api.openai.com/v1"
+env_key = "OPENAI_API_KEY"
+# network tuning overrides (all optional; falls back to builtin defaults)
+openai_request_max_retries = 4            # retry failed HTTP requests
+openai_stream_max_retries = 10            # retry dropped SSE streams
+openai_stream_idle_timeout_ms = 300000    # 5m idle timeout
+```
+
+### openai_request_max_retries
+How many times Codex will retry a failed HTTP request to the model provider. Defaults to `4`.
+
+### openai_stream_max_retries
+Number of times Codex will attempt to reconnect when a streaming response is interrupted. Defaults to `10`.
+
+### openai_stream_idle_timeout_ms
+How long Codex will wait for activity on a streaming response before treating the connection as lost. Defaults to `300000` (5 minutes).
+
+### tui

Why did the header level change from ## to ###? I guess Codex did it?

@@ -466,7 +466,33 @@ This is analogous to `model_context_window`, but for the maximum number of outpu
 
 Maximum number of bytes to read from an `AGENTS.md` file to include in the instructions sent with the first turn of a session. Defaults to 32 KiB.
 
-## tui
+## Per-provider network tuning

I think this should be part of the existing model_providers documentation.

I'm also not sure whether these overrides will work for the built-in openai provider as things work today?

@@ -92,6 +92,32 @@ http_headers = { "X-Example-Header" = "example-value" }
 env_http_headers = { "X-Example-Features": "EXAMPLE_FEATURES" }

+### Per-provider network tuning + +The following optional settings control retry behaviour and streaming idle timeouts per model provider. They must be specified inside the corresponding [model_providers.<id>] block in config.toml. (Older releases accepted toplevel keys; those are now ignored.) + +Example: + +toml +[model_providers.openai] +name = "OpenAI" +base_url = "https://api.openai.com/v1" +env_key = "OPENAI_API_KEY" +# network tuning overrides (all optional; falls back to builtin defaults) +request_max_retries = 4 # retry failed HTTP requests +openai_stream_max_retries = 10 # retry dropped SSE streams +openai_stream_idle_timeout_ms = 300000 # 5m idle timeout + + +#### request_max_retries +How many times Codex will retry a failed HTTP request to the model provider. Defaults to 4. + +#### openai_stream_max_retries +Number of times Codex will attempt to reconnect when a streaming response is interrupted. Defaults to 10. + +#### openai_stream_idle_timeout_ms +How long Codex will wait for activity on a streaming response before treating the connection as lost. Defaults to 300000 (5 minutes).


> Note, TOML supports underscores in int literals, making them easier to read for large values:
> 
> ```suggestion
> How long Codex will wait for activity on a streaming response before treating the connection as lost. Defaults to `300_000` (5 minutes).
> ```

### codex-rs/core/src/chat_completions.rs

- Created: 2025-07-17 18:36:15 UTC | Link: https://github.com/openai/codex/pull/1601#discussion_r2214030972

```diff
@@ -136,7 +136,11 @@ pub(crate) async fn stream_chat_completions(
             Ok(resp) if resp.status().is_success() => {
                 let (tx_event, rx_event) = mpsc::channel::<Result<ResponseEvent>>(1600);
                 let stream = resp.bytes_stream().map_err(CodexErr::Reqwest);
-                tokio::spawn(process_chat_sse(stream, tx_event));
+                tokio::spawn(process_chat_sse(

Seeing how ModelProviderInfo is already in scope, should we instead define stream_idle_timeout_ms, request_max_retries, and stream_max_retries on ModelProviderInfo instead so it is configurable for all model providers?

@@ -34,6 +33,7 @@ pub(crate) async fn stream_chat_completions(
     model: &str,
     client: &reqwest::Client,
     provider: &ModelProviderInfo,
+    _config: &Config,

We can remove this new arg now, right?

codex-rs/core/src/client.rs

@@ -29,8 +29,7 @@ use crate::config_types::ReasoningSummary as ReasoningSummaryConfig;
 use crate::error::CodexErr;
 use crate::error::Result;
 use crate::flags::CODEX_RS_SSE_FIXTURE;
-use crate::flags::OPENAI_REQUEST_MAX_RETRIES;
-use crate::flags::OPENAI_STREAM_IDLE_TIMEOUT_MS;
+use crate::flags::OPENAI_STREAM_IDLE_TIMEOUT_MS; // retained for default config

drop comment?

@@ -64,6 +63,10 @@ impl ModelClient {
         }
     }
 
+    pub fn config(&self) -> &Config {

Is this needed or was this from an earlier rev of this PR?

@@ -404,7 +415,11 @@ async fn stream_from_fixture(path: impl AsRef<Path>) -> Result<ResponseStream> {
 
     let rdr = std::io::Cursor::new(content);
     let stream = ReaderStream::new(rdr).map_err(CodexErr::Io);
-    tokio::spawn(process_sse(stream, tx_event));
+    tokio::spawn(process_sse(
+        stream,
+        tx_event,
+        *OPENAI_STREAM_IDLE_TIMEOUT_MS,

I thought we were passing everything through the provider?

@@ -198,6 +201,9 @@ impl ModelClient {
             }
         }
     }
+    pub fn get_provider(&self) -> ModelProviderInfo {

newline before? (I'm surprised just fmt doesn't do this...)

codex-rs/core/src/codex.rs

@@ -1026,12 +1025,14 @@ async fn run_turn(
             Err(CodexErr::Interrupted) => return Err(CodexErr::Interrupted),
             Err(CodexErr::EnvVar(var)) => return Err(CodexErr::EnvVar(var)),
             Err(e) => {
-                if retries < *OPENAI_STREAM_MAX_RETRIES {
+                // Use the configured provider-specific stream retry budget.
+                let max_retries = sess.client.get_provider().stream_max_retries();
+                if retries < max_retries {
                     retries += 1;
                     let delay = backoff(retries);
                     warn!(
                         "stream disconnected - retrying turn ({retries}/{} in {delay:?})...",
-                        *OPENAI_STREAM_MAX_RETRIES
+                        max_retries

GitHub makes it hard to show this correctly...

                        "stream disconnected - retrying turn ({retries}/{max_retries} in {delay:?})...",

I thought the latest clippy flags this sort of thing, I should investigate...

@@ -1182,7 +1203,7 @@ async fn try_run_turn(
             }
         }
     }
-    Ok(output)
+    // unreachable: loop only exits via return statements above

can remove this comment; the compiler would flag any added code as unreachable, right?

@@ -1026,12 +1025,14 @@ async fn run_turn(
             Err(CodexErr::Interrupted) => return Err(CodexErr::Interrupted),
             Err(CodexErr::EnvVar(var)) => return Err(CodexErr::EnvVar(var)),
             Err(e) => {
-                if retries < *OPENAI_STREAM_MAX_RETRIES {
+                // Use the configured provider-specific stream retry budget.
+                let max_retries = sess.client.get_provider().stream_max_retries();
+                if retries < max_retries {
                     retries += 1;
                     let delay = backoff(retries);
                     warn!(
                         "stream disconnected - retrying turn ({retries}/{} in {delay:?})...",
-                        *OPENAI_STREAM_MAX_RETRIES
+                        "stream disconnected - retrying turn ({retries}/{max_retries} in {delay:?})...",

remove the line above?

@@ -1026,12 +1025,14 @@ async fn run_turn(
             Err(CodexErr::Interrupted) => return Err(CodexErr::Interrupted),
             Err(CodexErr::EnvVar(var)) => return Err(CodexErr::EnvVar(var)),
             Err(e) => {
-                if retries < *OPENAI_STREAM_MAX_RETRIES {
+                // Use the configured provider-specific stream retry budget.
+                let max_retries = sess.client.get_provider().stream_max_retries();
+                if retries < max_retries {
                     retries += 1;
                     let delay = backoff(retries);
                     warn!(
                         "stream disconnected - retrying turn ({retries}/{} in {delay:?})...",
-                        *OPENAI_STREAM_MAX_RETRIES
+                        "stream disconnected - retrying turn ({retries}/{max_retries} in {delay:?})...",

also curious if delay needs the :?

@@ -1123,7 +1123,28 @@ async fn try_run_turn(
     let mut stream = sess.client.clone().stream(&prompt).await?;
 
     let mut output = Vec::new();
-    while let Some(Ok(event)) = stream.next().await {
+    loop {
+        // Poll the next item from the model stream. We must inspect *both* Ok and Err
+        // cases so that transient stream failures (e.g., dropped SSE connection before
+        // `response.completed`) bubble up and trigger the caller's retry logic.
+        let next = stream.next().await;

maybe name it event instead of just next?

@@ -1123,7 +1123,28 @@ async fn try_run_turn(
     let mut stream = sess.client.clone().stream(&prompt).await?;
 
     let mut output = Vec::new();
-    while let Some(Ok(event)) = stream.next().await {

Hmm, so before, if it were None, we would just return whatever output we had thus far?

codex-rs/core/src/config.rs

@@ -722,6 +722,9 @@ disable_response_storage = true
             query_params: None,
             http_headers: None,
             env_http_headers: None,
+            openai_request_max_retries: None,

Maybe you want to update toml on line 671 to exercise the parsing?

codex-rs/core/src/model_provider_info.rs

@@ -64,6 +68,21 @@ pub struct ModelProviderInfo {
     /// value should be used. If the environment variable is not set, or the
     /// value is empty, the header will not be included in the request.
     pub env_http_headers: Option<HashMap<String, String>>,
+
+    /// Maximum number of times to retry a failed HTTP request to this provider.
+    /// When `None`, falls back to the global default from `OPENAI_REQUEST_MAX_RETRIES` (currently 4).
+    #[serde(default)]
+    pub openai_request_max_retries: Option<u64>,

These should not have an openai_ prefix, but should be generally applicable to all providers, right?

@@ -205,6 +243,10 @@ pub fn built_in_model_providers() -> HashMap<String, ModelProviderInfo> {
                         .into_iter()
                         .collect(),
                 ),
+                // Use global defaults for retry/timeout unless overridden in config.toml.

I think we should only be looking at the OPENAI_ environment variables for the built-in OpenAI provider, not all providers, right?

@@ -64,6 +68,21 @@ pub struct ModelProviderInfo {
     /// value should be used. If the environment variable is not set, or the
     /// value is empty, the header will not be included in the request.
     pub env_http_headers: Option<HashMap<String, String>>,
+
+    /// Maximum number of times to retry a failed HTTP request to this provider.
+    /// When `None`, falls back to the global default from `OPENAI_REQUEST_MAX_RETRIES` (currently 4).
+    #[serde(default)]

This should be unnecessary for Optional?

@@ -205,6 +243,10 @@ pub fn built_in_model_providers() -> HashMap<String, ModelProviderInfo> {
                         .into_iter()
                         .collect(),
                 ),
+                // Use global defaults for retry/timeout unless overridden in config.toml.

Taking a step back, I'm not sure we should be honoring OPENAI_STREAM_IDLE_TIMEOUT_MS, OPENAI_REQUEST_MAX_RETRIES, or OPENAI_STREAM_MAX_RETRIES at all. As best I can tell, these are not "standard" OpenAI environment variables, but ones that we made up for Codex?

I've been trying to maintain a consistency where the Config is the "one true way" to configure things, so supporting a small handful of environment variables confuses that.

@@ -161,6 +172,23 @@ impl ModelProviderInfo {
             None => Ok(None),
         }
     }
+
+    /// Effective maximum number of request retries for this provider.
+    pub fn request_max_retries(&self) -> u64 {
+        self.request_max_retries.unwrap_or(4)

I think the default values should be their own constants at the top of the file.

@@ -9,13 +9,17 @@ use serde::Deserialize;
 use serde::Serialize;
 use std::collections::HashMap;
 use std::env::VarError;
+use std::time::Duration;
 
 use crate::error::EnvVarError;
 use crate::openai_api_key::get_openai_api_key;
 
 /// Value for the `OpenAI-Originator` header that is sent with requests to
 /// OpenAI.
 const OPENAI_ORIGINATOR_HEADER: &str = "codex_cli_rs";
+const OPENAI_STREAM_IDLE_TIMEOUT_MS: u64 = 300_000;
+const OPENAI_STREAM_MAX_RETRIES: u64 = 10;
+const OPENAI_REQUEST_MAX_RETRIES: u64 = 4;

I think it's more appropriate for these to be:

const DEFAULT_STREAM_IDLE_TIMEOUT_MS: u64 = 300_000;
const DEFAULT_STREAM_MAX_RETRIES: u64 = 10;
const DEFAULT_REQUEST_MAX_RETRIES: u64 = 4;

codex-rs/core/tests/live_agent.rs

@@ -45,22 +45,9 @@ async fn spawn_codex() -> Result<Codex, CodexErr> {
         "OPENAI_API_KEY must be set for live tests"
     );
 
-    // Environment tweaks to keep the tests snappy and inexpensive while still
-    // exercising retry/robustness logic.
-    //
-    // NOTE: Starting with the 2024 edition `std::env::set_var` is `unsafe`
-    // because changing the process environment races with any other threads
-    // that might be performing environment look-ups at the same time.
-    // Restrict the unsafety to this tiny block that happens at the very
-    // beginning of the test, before we spawn any background tasks that could
-    // observe the environment.
-    unsafe {
-        std::env::set_var("OPENAI_REQUEST_MAX_RETRIES", "2");
-        std::env::set_var("OPENAI_STREAM_MAX_RETRIES", "2");
-    }
-
     let codex_home = TempDir::new().unwrap();
     let config = load_default_config_for_test(&codex_home);
+    // network retry/timeout tuning moved into ModelProviderInfo; using defaults

remove comment

@@ -45,20 +45,6 @@ async fn spawn_codex() -> Result<Codex, CodexErr> {
         "OPENAI_API_KEY must be set for live tests"
     );
 
-    // Environment tweaks to keep the tests snappy and inexpensive while still
-    // exercising retry/robustness logic.
-    //
-    // NOTE: Starting with the 2024 edition `std::env::set_var` is `unsafe`
-    // because changing the process environment races with any other threads
-    // that might be performing environment look-ups at the same time.
-    // Restrict the unsafety to this tiny block that happens at the very
-    // beginning of the test, before we spawn any background tasks that could
-    // observe the environment.
-    unsafe {

In these tests where you are deleting unsafe, I would expect there to be changes to a Config to set the corresponding values. Am I missing something?

codex-rs/core/tests/previous_response_id.rs

@@ -88,13 +88,8 @@ async fn keeps_previous_response_id_between_tasks() {
         .mount(&server)
         .await;
 
-    // Environment
-    // Update environment  `set_var` is `unsafe` starting with the 2024
-    // edition so we group the calls into a single `unsafe { … }` block.
-    unsafe {
-        std::env::set_var("OPENAI_REQUEST_MAX_RETRIES", "0");
-        std::env::set_var("OPENAI_STREAM_MAX_RETRIES", "0");
-    }
+    // Configure retry behaviour explicitly to avoid mutating process-wide
    // Configure retry behavior explicitly to avoid mutating process-wide

codex-rs/core/tests/stream_no_completed.rs

@@ -72,19 +72,8 @@ async fn retries_on_early_close() {
         .mount(&server)
         .await;
 
-    // Environment
-    //
-    // As of Rust 2024 `std::env::set_var` has been made `unsafe` because
-    // mutating the process environment is inherently racy when other threads
-    // are running.  We therefore have to wrap every call in an explicit
-    // `unsafe` block.  These are limited to the test-setup section so the
-    // scope is very small and clearly delineated.
-
-    unsafe {
-        std::env::set_var("OPENAI_REQUEST_MAX_RETRIES", "0");
-        std::env::set_var("OPENAI_STREAM_MAX_RETRIES", "1");
-        std::env::set_var("OPENAI_STREAM_IDLE_TIMEOUT_MS", "2000");
-    }
+    // Configure retry behaviour explicitly to avoid mutating process-wide
    // Configure retry behavior explicitly to avoid mutating process-wide