unified exec tests

2026-04-24 06:35:50 +00:00 · 2025-11-12 10:57:55 -08:00
15 changed files with 88 additions and 551 deletions
--- a/codex-rs/common/src/config_override.rs
+++ b/codex-rs/common/src/config_override.rs
@@ -151,15 +151,6 @@ mod tests {
        assert_eq!(v.as_integer(), Some(42));
    }

-    #[test]
-    fn parses_bool() {
-        let true_literal = parse_toml_value("true").expect("parse");
-        assert_eq!(true_literal.as_bool(), Some(true));
-
-        let false_literal = parse_toml_value("false").expect("parse");
-        assert_eq!(false_literal.as_bool(), Some(false));
-    }
-
    #[test]
    fn fails_on_unquoted_string() {
        assert!(parse_toml_value("hello").is_err());
--- a/codex-rs/core/src/bash.rs
+++ b/codex-rs/core/src/bash.rs
@@ -104,7 +104,7 @@ pub fn extract_bash_command(command: &[String]) -> Option<(&str, &str)> {
    let [shell, flag, script] = command else {
        return None;
    };
-    if !matches!(flag.as_str(), "-lc" | "-c") || !is_well_known_sh_shell(shell) {
+    if flag != "-lc" || !is_well_known_sh_shell(shell) {
        return None;
    }
    Some((shell, script))
--- a/codex-rs/core/src/codex.rs
+++ b/codex-rs/core/src/codex.rs
@@ -97,7 +97,6 @@ use crate::protocol::Submission;
 use crate::protocol::TokenCountEvent;
 use crate::protocol::TokenUsage;
 use crate::protocol::TurnDiffEvent;
-use crate::protocol::WarningEvent;
 use crate::rollout::RolloutRecorder;
 use crate::rollout::RolloutRecorderParams;
 use crate::shell;
@@ -675,34 +674,6 @@ impl Session {
                let rollout_items = conversation_history.get_rollout_items();
                let persist = matches!(conversation_history, InitialHistory::Forked(_));

-                // If resuming, warn when the last recorded model differs from the current one.
-                if let InitialHistory::Resumed(_) = conversation_history
-                    && let Some(prev) = rollout_items.iter().rev().find_map(|it| {
-                        if let RolloutItem::TurnContext(ctx) = it {
-                            Some(ctx.model.as_str())
-                        } else {
-                            None
-                        }
-                    })
-                {
-                    let curr = turn_context.client.get_model();
-                    if prev != curr {
-                        warn!(
-                            "resuming session with different model: previous={prev}, current={curr}"
-                        );
-                        self.send_event(
-                                &turn_context,
-                                EventMsg::Warning(WarningEvent {
-                                    message: format!(
-                                        "This session was recorded with model `{prev}` but is resuming with `{curr}`. \
-                         Consider switching back to `{prev}` as it may affect Codex performance."
-                                    ),
-                                }),
-                            )
-                                .await;
-                    }
-                }
-
                // Always add response items to conversation history
                let reconstructed_history =
                    self.reconstruct_history_from_rollout(&turn_context, &rollout_items);
--- a/codex-rs/core/src/features.rs
+++ b/codex-rs/core/src/features.rs
@@ -29,9 +29,6 @@ pub enum Stage {
 pub enum Feature {
    /// Use the single unified PTY-backed exec tool.
    UnifiedExec,
-    /// Use the shell command tool that takes `command` as a single string of
-    /// shell instead of an array of args passed to `execvp(3)`.
-    ShellCommandTool,
    /// Enable experimental RMCP features such as OAuth login.
    RmcpClient,
    /// Include the freeform apply_patch tool.
@@ -253,12 +250,6 @@ pub const FEATURES: &[FeatureSpec] = &[
        stage: Stage::Experimental,
        default_enabled: false,
    },
-    FeatureSpec {
-        id: Feature::ShellCommandTool,
-        key: "shell_command_tool",
-        stage: Stage::Experimental,
-        default_enabled: false,
-    },
    FeatureSpec {
        id: Feature::RmcpClient,
        key: "rmcp_client",
--- a/codex-rs/core/src/shell.rs
+++ b/codex-rs/core/src/shell.rs
@@ -31,37 +31,16 @@ pub enum Shell {
 impl Shell {
    pub fn name(&self) -> Option<String> {
        match self {
-            Shell::Zsh(ZshShell { shell_path, .. }) | Shell::Bash(BashShell { shell_path, .. }) => {
-                std::path::Path::new(shell_path)
-                    .file_name()
-                    .map(|s| s.to_string_lossy().to_string())
-            }
+            Shell::Zsh(zsh) => std::path::Path::new(&zsh.shell_path)
+                .file_name()
+                .map(|s| s.to_string_lossy().to_string()),
+            Shell::Bash(bash) => std::path::Path::new(&bash.shell_path)
+                .file_name()
+                .map(|s| s.to_string_lossy().to_string()),
            Shell::PowerShell(ps) => Some(ps.exe.clone()),
            Shell::Unknown => None,
        }
    }
-
-    /// Takes a string of shell and returns the full list of command args to
-    /// use with `exec()` to run the shell command.
-    pub fn derive_exec_args(&self, command: &str, use_login_shell: bool) -> Vec<String> {
-        match self {
-            Shell::Zsh(ZshShell { shell_path, .. }) | Shell::Bash(BashShell { shell_path, .. }) => {
-                let arg = if use_login_shell { "-lc" } else { "-c" };
-                vec![shell_path.clone(), arg.to_string(), command.to_string()]
-            }
-            Shell::PowerShell(ps) => {
-                let mut args = vec![ps.exe.clone(), "-NoLogo".to_string()];
-                if !use_login_shell {
-                    args.push("-NoProfile".to_string());
-                }
-
-                args.push("-Command".to_string());
-                args.push(command.to_string());
-                args
-            }
-            Shell::Unknown => shlex::split(command).unwrap_or_else(|| vec![command.to_string()]),
-        }
-    }
 }

 #[cfg(unix)]
--- a/codex-rs/core/src/tasks/user_shell.rs
+++ b/codex-rs/core/src/tasks/user_shell.rs
@@ -63,10 +63,27 @@ impl SessionTask for UserShellCommandTask {
        // Execute the user's script under their default shell when known; this
        // allows commands that use shell features (pipes, &&, redirects, etc.).
        // We do not source rc files or otherwise reformat the script.
-        let use_login_shell = true;
-        let shell_invocation = session
-            .user_shell()
-            .derive_exec_args(&self.command, use_login_shell);
+        let shell_invocation = match session.user_shell() {
+            crate::shell::Shell::Zsh(zsh) => vec![
+                zsh.shell_path.clone(),
+                "-lc".to_string(),
+                self.command.clone(),
+            ],
+            crate::shell::Shell::Bash(bash) => vec![
+                bash.shell_path.clone(),
+                "-lc".to_string(),
+                self.command.clone(),
+            ],
+            crate::shell::Shell::PowerShell(ps) => vec![
+                ps.exe.clone(),
+                "-NoProfile".to_string(),
+                "-Command".to_string(),
+                self.command.clone(),
+            ],
+            crate::shell::Shell::Unknown => {
+                shlex::split(&self.command).unwrap_or_else(|| vec![self.command.clone()])
+            }
+        };

        let call_id = Uuid::new_v4().to_string();
        let raw_command = self.command.clone();
--- a/codex-rs/core/src/tools/handlers/mod.rs
+++ b/codex-rs/core/src/tools/handlers/mod.rs
@@ -19,7 +19,6 @@ pub use mcp::McpHandler;
 pub use mcp_resource::McpResourceHandler;
 pub use plan::PlanHandler;
 pub use read_file::ReadFileHandler;
-pub use shell::ShellCommandHandler;
 pub use shell::ShellHandler;
 pub use test_sync::TestSyncHandler;
 pub use unified_exec::UnifiedExecHandler;
--- a/codex-rs/core/src/tools/handlers/shell.rs
+++ b/codex-rs/core/src/tools/handlers/shell.rs
@@ -1,5 +1,4 @@
 use async_trait::async_trait;
-use codex_protocol::models::ShellCommandToolCallParams;
 use codex_protocol::models::ShellToolCallParams;
 use std::sync::Arc;

@@ -26,8 +25,6 @@ use crate::tools::sandboxing::ToolCtx;

 pub struct ShellHandler;

-pub struct ShellCommandHandler;
-
 impl ShellHandler {
    fn to_exec_params(params: ShellToolCallParams, turn_context: &TurnContext) -> ExecParams {
        ExecParams {
@@ -42,28 +39,6 @@ impl ShellHandler {
    }
 }

-impl ShellCommandHandler {
-    fn to_exec_params(
-        params: ShellCommandToolCallParams,
-        session: &crate::codex::Session,
-        turn_context: &TurnContext,
-    ) -> ExecParams {
-        let shell = session.user_shell();
-        let use_login_shell = true;
-        let command = shell.derive_exec_args(&params.command, use_login_shell);
-
-        ExecParams {
-            command,
-            cwd: turn_context.resolve_path(params.workdir.clone()),
-            timeout_ms: params.timeout_ms,
-            env: create_env(&turn_context.shell_environment_policy),
-            with_escalated_permissions: params.with_escalated_permissions,
-            justification: params.justification,
-            arg0: None,
-        }
-    }
-}
-
 #[async_trait]
 impl ToolHandler for ShellHandler {
    fn kind(&self) -> ToolKind {
@@ -127,49 +102,6 @@ impl ToolHandler for ShellHandler {
    }
 }

-#[async_trait]
-impl ToolHandler for ShellCommandHandler {
-    fn kind(&self) -> ToolKind {
-        ToolKind::Function
-    }
-
-    fn matches_kind(&self, payload: &ToolPayload) -> bool {
-        matches!(payload, ToolPayload::Function { .. })
-    }
-
-    async fn handle(&self, invocation: ToolInvocation) -> Result<ToolOutput, FunctionCallError> {
-        let ToolInvocation {
-            session,
-            turn,
-            tracker,
-            call_id,
-            tool_name,
-            payload,
-        } = invocation;
-
-        let ToolPayload::Function { arguments } = payload else {
-            return Err(FunctionCallError::RespondToModel(format!(
-                "unsupported payload for shell_command handler: {tool_name}"
-            )));
-        };
-
-        let params: ShellCommandToolCallParams = serde_json::from_str(&arguments).map_err(|e| {
-            FunctionCallError::RespondToModel(format!("failed to parse function arguments: {e:?}"))
-        })?;
-        let exec_params = Self::to_exec_params(params, session.as_ref(), turn.as_ref());
-        ShellHandler::run_exec_like(
-            tool_name.as_str(),
-            exec_params,
-            session,
-            turn,
-            tracker,
-            call_id,
-            false,
-        )
-        .await
-    }
-}
-
 impl ShellHandler {
    async fn run_exec_like(
        tool_name: &str,
@@ -308,49 +240,3 @@ impl ShellHandler {
        })
    }
 }
-
-#[cfg(test)]
-mod tests {
-    use crate::is_safe_command::is_known_safe_command;
-    use crate::shell::BashShell;
-    use crate::shell::Shell;
-    use crate::shell::ZshShell;
-
-    /// The logic for is_known_safe_command() has heuristics for known shells,
-    /// so we must ensure the commands generated by [ShellCommandHandler] can be
-    /// recognized as safe if the `command` is safe.
-    #[test]
-    fn commands_generated_by_shell_command_handler_can_be_matched_by_is_known_safe_command() {
-        let bash_shell = Shell::Bash(BashShell {
-            shell_path: "/bin/bash".to_string(),
-            bashrc_path: "/home/user/.bashrc".to_string(),
-        });
-        assert_safe(&bash_shell, "ls -la");
-
-        let zsh_shell = Shell::Zsh(ZshShell {
-            shell_path: "/bin/zsh".to_string(),
-            zshrc_path: "/home/user/.zshrc".to_string(),
-        });
-        assert_safe(&zsh_shell, "ls -la");
-
-        #[cfg(target_os = "windows")]
-        {
-            use crate::shell::PowerShellConfig;
-
-            let powershell = Shell::PowerShell(PowerShellConfig {
-                exe: "pwsh.exe".to_string(),
-                bash_exe_fallback: None,
-            });
-            assert_safe(&powershell, "ls -Name");
-        }
-    }
-
-    fn assert_safe(shell: &Shell, command: &str) {
-        assert!(is_known_safe_command(
-            &shell.derive_exec_args(command, /* use_login_shell */ true)
-        ));
-        assert!(is_known_safe_command(
-            &shell.derive_exec_args(command, /* use_login_shell */ false)
-        ));
-    }
-}
--- a/codex-rs/core/src/tools/spec.rs
+++ b/codex-rs/core/src/tools/spec.rs
@@ -20,8 +20,6 @@ pub enum ConfigShellToolType {
    Default,
    Local,
    UnifiedExec,
-    /// Takes a command as a single string to be run in the user's default shell.
-    ShellCommand,
 }

 #[derive(Debug, Clone)]
@@ -50,8 +48,6 @@ impl ToolsConfig {

        let shell_type = if features.enabled(Feature::UnifiedExec) {
            ConfigShellToolType::UnifiedExec
-        } else if features.enabled(Feature::ShellCommandTool) {
-            ConfigShellToolType::ShellCommand
        } else {
            model_family.shell_type.clone()
        };
@@ -306,53 +302,6 @@ fn create_shell_tool() -> ToolSpec {
    })
 }

-fn create_shell_command_tool() -> ToolSpec {
-    let mut properties = BTreeMap::new();
-    properties.insert(
-        "command".to_string(),
-        JsonSchema::String {
-            description: Some(
-                "The shell script to execute in the user's default shell".to_string(),
-            ),
-        },
-    );
-    properties.insert(
-        "workdir".to_string(),
-        JsonSchema::String {
-            description: Some("The working directory to execute the command in".to_string()),
-        },
-    );
-    properties.insert(
-        "timeout_ms".to_string(),
-        JsonSchema::Number {
-            description: Some("The timeout for the command in milliseconds".to_string()),
-        },
-    );
-    properties.insert(
-        "with_escalated_permissions".to_string(),
-        JsonSchema::Boolean {
-            description: Some("Whether to request escalated permissions. Set to true if command needs to be run without sandbox restrictions".to_string()),
-        },
-    );
-    properties.insert(
-        "justification".to_string(),
-        JsonSchema::String {
-            description: Some("Only set if with_escalated_permissions is true. 1-sentence explanation of why we want to run this command.".to_string()),
-        },
-    );
-
-    ToolSpec::Function(ResponsesApiTool {
-        name: "shell_command".to_string(),
-        description: "Runs a shell command string and returns its output.".to_string(),
-        strict: false,
-        parameters: JsonSchema::Object {
-            properties,
-            required: Some(vec!["command".to_string()]),
-            additional_properties: Some(false.into()),
-        },
-    })
-}
-
 fn create_view_image_tool() -> ToolSpec {
    // Support only local filesystem path.
    let mut properties = BTreeMap::new();
@@ -942,7 +891,6 @@ pub(crate) fn build_specs(
    use crate::tools::handlers::McpResourceHandler;
    use crate::tools::handlers::PlanHandler;
    use crate::tools::handlers::ReadFileHandler;
-    use crate::tools::handlers::ShellCommandHandler;
    use crate::tools::handlers::ShellHandler;
    use crate::tools::handlers::TestSyncHandler;
    use crate::tools::handlers::UnifiedExecHandler;
@@ -958,7 +906,6 @@ pub(crate) fn build_specs(
    let view_image_handler = Arc::new(ViewImageHandler);
    let mcp_handler = Arc::new(McpHandler);
    let mcp_resource_handler = Arc::new(McpResourceHandler);
-    let shell_command_handler = Arc::new(ShellCommandHandler);

    match &config.shell_type {
        ConfigShellToolType::Default => {
@@ -973,16 +920,12 @@ pub(crate) fn build_specs(
            builder.register_handler("exec_command", unified_exec_handler.clone());
            builder.register_handler("write_stdin", unified_exec_handler);
        }
-        ConfigShellToolType::ShellCommand => {
-            builder.push_spec(create_shell_command_tool());
-        }
    }

    // Always register shell aliases so older prompts remain compatible.
    builder.register_handler("shell", shell_handler.clone());
    builder.register_handler("container.exec", shell_handler.clone());
    builder.register_handler("local_shell", shell_handler);
-    builder.register_handler("shell_command", shell_command_handler);

    builder.push_spec_with_parallel_support(create_list_mcp_resources_tool(), true);
    builder.push_spec_with_parallel_support(create_list_mcp_resource_templates_tool(), true);
@@ -1118,7 +1061,6 @@ mod tests {
            ConfigShellToolType::Default => Some("shell"),
            ConfigShellToolType::Local => Some("local_shell"),
            ConfigShellToolType::UnifiedExec => None,
-            ConfigShellToolType::ShellCommand => Some("shell_command"),
        }
    }

@@ -1351,22 +1293,6 @@ mod tests {
        assert_contains_tool_names(&tools, &subset);
    }

-    #[test]
-    fn test_build_specs_shell_command_present() {
-        assert_model_tools(
-            "codex-mini-latest",
-            Features::with_defaults().enable(Feature::ShellCommandTool),
-            &[
-                "shell_command",
-                "list_mcp_resources",
-                "list_mcp_resource_templates",
-                "read_mcp_resource",
-                "update_plan",
-                "view_image",
-            ],
-        );
-    }
-
    #[test]
    #[ignore]
    fn test_parallel_support_flags() {
@@ -1822,21 +1748,6 @@ mod tests {
        assert_eq!(description, expected);
    }

-    #[test]
-    fn test_shell_command_tool() {
-        let tool = super::create_shell_command_tool();
-        let ToolSpec::Function(ResponsesApiTool {
-            description, name, ..
-        }) = &tool
-        else {
-            panic!("expected function tool");
-        };
-        assert_eq!(name, "shell_command");
-
-        let expected = "Runs a shell command string and returns its output.";
-        assert_eq!(description, expected);
-    }
-
    #[test]
    fn test_get_openai_tools_mcp_tools_with_additional_properties_schema() {
        let model_family = find_family_for_model("gpt-5-codex")
--- a/codex-rs/core/tests/common/responses.rs
+++ b/codex-rs/core/tests/common/responses.rs
@@ -446,6 +446,12 @@ pub async fn mount_sse_once(server: &MockServer, body: String) -> ResponseMock {
    response_mock
 }

+pub async fn mount_sse(server: &MockServer, body: String) -> ResponseMock {
+    let (mock, response_mock) = base_mock();
+    mock.respond_with(sse_response(body)).mount(server).await;
+    response_mock
+}
+
 pub async fn start_mock_server() -> MockServer {
    MockServer::builder()
        .body_print_limit(BodyPrintLimit::Limited(80_000))
--- a/codex-rs/core/tests/suite/otel.rs
+++ b/codex-rs/core/tests/suite/otel.rs
@@ -9,6 +9,7 @@ use core_test_support::responses::ev_assistant_message;
 use core_test_support::responses::ev_completed;
 use core_test_support::responses::ev_custom_tool_call;
 use core_test_support::responses::ev_function_call;
+use core_test_support::responses::mount_sse;
 use core_test_support::responses::mount_sse_once;
 use core_test_support::responses::sse;
 use core_test_support::responses::start_mock_server;
@@ -102,6 +103,8 @@ async fn process_sse_emits_failed_event_on_parse_error() {
    let TestCodex { codex, .. } = test_codex()
        .with_config(move |config| {
            config.features.disable(Feature::GhostCommit);
+            config.model_provider.request_max_retries = Some(0);
+            config.model_provider.stream_max_retries = Some(0);
        })
        .build(&server)
        .await
@@ -141,6 +144,8 @@ async fn process_sse_records_failed_event_when_stream_closes_without_completed()
    let TestCodex { codex, .. } = test_codex()
        .with_config(move |config| {
            config.features.disable(Feature::GhostCommit);
+            config.model_provider.request_max_retries = Some(0);
+            config.model_provider.stream_max_retries = Some(0);
        })
        .build(&server)
        .await
@@ -188,18 +193,12 @@ async fn process_sse_failed_event_records_response_error_message() {
        })]),
    )
    .await;
-    mount_sse_once(
-        &server,
-        sse(vec![
-            ev_assistant_message("msg-1", "local shell done"),
-            ev_completed("done"),
-        ]),
-    )
-    .await;

    let TestCodex { codex, .. } = test_codex()
        .with_config(move |config| {
            config.features.disable(Feature::GhostCommit);
+            config.model_provider.request_max_retries = Some(0);
+            config.model_provider.stream_max_retries = Some(0);
        })
        .build(&server)
        .await
@@ -245,18 +244,12 @@ async fn process_sse_failed_event_logs_parse_error() {
        })]),
    )
    .await;
-    mount_sse_once(
-        &server,
-        sse(vec![
-            ev_assistant_message("msg-1", "local shell done"),
-            ev_completed("done"),
-        ]),
-    )
-    .await;

    let TestCodex { codex, .. } = test_codex()
        .with_config(move |config| {
            config.features.disable(Feature::GhostCommit);
+            config.model_provider.request_max_retries = Some(0);
+            config.model_provider.stream_max_retries = Some(0);
        })
        .build(&server)
        .await
@@ -301,6 +294,8 @@ async fn process_sse_failed_event_logs_missing_error() {
    let TestCodex { codex, .. } = test_codex()
        .with_config(move |config| {
            config.features.disable(Feature::GhostCommit);
+            config.model_provider.request_max_retries = Some(0);
+            config.model_provider.stream_max_retries = Some(0);
        })
        .build(&server)
        .await
@@ -342,18 +337,11 @@ async fn process_sse_failed_event_logs_response_completed_parse_error() {
    )
    .await;

-    mount_sse_once(
-        &server,
-        sse(vec![
-            ev_assistant_message("msg-1", "local shell done"),
-            ev_completed("done"),
-        ]),
-    )
-    .await;
-
    let TestCodex { codex, .. } = test_codex()
        .with_config(move |config| {
            config.features.disable(Feature::GhostCommit);
+            config.model_provider.request_max_retries = Some(0);
+            config.model_provider.stream_max_retries = Some(0);
        })
        .build(&server)
        .await
@@ -442,7 +430,7 @@ async fn process_sse_emits_completed_telemetry() {
 async fn handle_response_item_records_tool_result_for_custom_tool_call() {
    let server = start_mock_server().await;

-    mount_sse_once(
+    mount_sse(
        &server,
        sse(vec![
            ev_custom_tool_call(
@@ -454,18 +442,12 @@ async fn handle_response_item_records_tool_result_for_custom_tool_call() {
        ]),
    )
    .await;
-    mount_sse_once(
-        &server,
-        sse(vec![
-            ev_assistant_message("msg-1", "local shell done"),
-            ev_completed("done"),
-        ]),
-    )
-    .await;

    let TestCodex { codex, .. } = test_codex()
        .with_config(move |config| {
            config.features.disable(Feature::GhostCommit);
+            config.model_provider.request_max_retries = Some(0);
+            config.model_provider.stream_max_retries = Some(0);
        })
        .build(&server)
        .await
@@ -512,7 +494,7 @@ async fn handle_response_item_records_tool_result_for_custom_tool_call() {
 async fn handle_response_item_records_tool_result_for_function_call() {
    let server = start_mock_server().await;

-    mount_sse_once(
+    mount_sse(
        &server,
        sse(vec![
            ev_function_call("function-call", "nonexistent", "{\"value\":1}"),
@@ -521,18 +503,11 @@ async fn handle_response_item_records_tool_result_for_function_call() {
    )
    .await;

-    mount_sse_once(
-        &server,
-        sse(vec![
-            ev_assistant_message("msg-1", "local shell done"),
-            ev_completed("done"),
-        ]),
-    )
-    .await;
-
    let TestCodex { codex, .. } = test_codex()
        .with_config(move |config| {
            config.features.disable(Feature::GhostCommit);
+            config.model_provider.request_max_retries = Some(0);
+            config.model_provider.stream_max_retries = Some(0);
        })
        .build(&server)
        .await
@@ -579,7 +554,7 @@ async fn handle_response_item_records_tool_result_for_function_call() {
 async fn handle_response_item_records_tool_result_for_local_shell_missing_ids() {
    let server = start_mock_server().await;

-    mount_sse_once(
+    mount_sse(
        &server,
        sse(vec![
            serde_json::json!({
@@ -598,18 +573,11 @@ async fn handle_response_item_records_tool_result_for_local_shell_missing_ids()
    )
    .await;

-    mount_sse_once(
-        &server,
-        sse(vec![
-            ev_assistant_message("msg-1", "local shell done"),
-            ev_completed("done"),
-        ]),
-    )
-    .await;
-
    let TestCodex { codex, .. } = test_codex()
        .with_config(move |config| {
            config.features.disable(Feature::GhostCommit);
+            config.model_provider.request_max_retries = Some(0);
+            config.model_provider.stream_max_retries = Some(0);
        })
        .build(&server)
        .await
@@ -650,7 +618,7 @@ async fn handle_response_item_records_tool_result_for_local_shell_missing_ids()
 async fn handle_response_item_records_tool_result_for_local_shell_call() {
    let server = start_mock_server().await;

-    mount_sse_once(
+    mount_sse(
        &server,
        sse(vec![
            ev_local_shell_call("shell-call", "completed", vec!["/bin/echo", "shell"]),
@@ -659,18 +627,11 @@ async fn handle_response_item_records_tool_result_for_local_shell_call() {
    )
    .await;

-    mount_sse_once(
-        &server,
-        sse(vec![
-            ev_assistant_message("msg-1", "local shell done"),
-            ev_completed("done"),
-        ]),
-    )
-    .await;
-
    let TestCodex { codex, .. } = test_codex()
        .with_config(move |config| {
            config.features.disable(Feature::GhostCommit);
+            config.model_provider.request_max_retries = Some(0);
+            config.model_provider.stream_max_retries = Some(0);
        })
        .build(&server)
        .await
@@ -749,23 +710,10 @@ fn tool_decision_assertion<'a>(
 #[traced_test]
 async fn handle_container_exec_autoapprove_from_config_records_tool_decision() {
    let server = start_mock_server().await;
-    mount_sse_once(
+    mount_sse(
        &server,
        sse(vec![
-            ev_local_shell_call(
-                "auto_config_call",
-                "completed",
-                vec!["/bin/echo", "local shell"],
-            ),
-            ev_completed("done"),
-        ]),
-    )
-    .await;
-
-    mount_sse_once(
-        &server,
-        sse(vec![
-            ev_assistant_message("msg-1", "local shell done"),
+            ev_local_shell_call("auto_config_call", "completed", vec!["/bin/echo", "hello"]),
            ev_completed("done"),
        ]),
    )
@@ -775,6 +723,8 @@ async fn handle_container_exec_autoapprove_from_config_records_tool_decision() {
        .with_config(|config| {
            config.approval_policy = AskForApproval::OnRequest;
            config.sandbox_policy = SandboxPolicy::DangerFullAccess;
+            config.model_provider.request_max_retries = Some(0);
+            config.model_provider.stream_max_retries = Some(0);
        })
        .build(&server)
        .await
@@ -789,7 +739,7 @@ async fn handle_container_exec_autoapprove_from_config_records_tool_decision() {
        .await
        .unwrap();

-    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
+    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TokenCount(_))).await;

    logs_assert(tool_decision_assertion(
        "auto_config_call",
@@ -802,7 +752,7 @@ async fn handle_container_exec_autoapprove_from_config_records_tool_decision() {
 #[traced_test]
 async fn handle_container_exec_user_approved_records_tool_decision() {
    let server = start_mock_server().await;
-    mount_sse_once(
+    mount_sse(
        &server,
        sse(vec![
            ev_local_shell_call("user_approved_call", "completed", vec!["/bin/date"]),
@@ -811,18 +761,11 @@ async fn handle_container_exec_user_approved_records_tool_decision() {
    )
    .await;

-    mount_sse_once(
-        &server,
-        sse(vec![
-            ev_assistant_message("msg-1", "local shell done"),
-            ev_completed("done"),
-        ]),
-    )
-    .await;
-
    let TestCodex { codex, .. } = test_codex()
        .with_config(|config| {
            config.approval_policy = AskForApproval::UnlessTrusted;
+            config.model_provider.request_max_retries = Some(0);
+            config.model_provider.stream_max_retries = Some(0);
        })
        .build(&server)
        .await
@@ -861,7 +804,7 @@ async fn handle_container_exec_user_approved_records_tool_decision() {
 async fn handle_container_exec_user_approved_for_session_records_tool_decision() {
    let server = start_mock_server().await;

-    mount_sse_once(
+    mount_sse(
        &server,
        sse(vec![
            ev_local_shell_call("user_approved_session_call", "completed", vec!["/bin/date"]),
@@ -869,18 +812,12 @@ async fn handle_container_exec_user_approved_for_session_records_tool_decision()
        ]),
    )
    .await;
-    mount_sse_once(
-        &server,
-        sse(vec![
-            ev_assistant_message("msg-1", "local shell done"),
-            ev_completed("done"),
-        ]),
-    )
-    .await;

    let TestCodex { codex, .. } = test_codex()
        .with_config(|config| {
            config.approval_policy = AskForApproval::UnlessTrusted;
+            config.model_provider.request_max_retries = Some(0);
+            config.model_provider.stream_max_retries = Some(0);
        })
        .build(&server)
        .await
@@ -919,7 +856,7 @@ async fn handle_container_exec_user_approved_for_session_records_tool_decision()
 async fn handle_sandbox_error_user_approves_retry_records_tool_decision() {
    let server = start_mock_server().await;

-    mount_sse_once(
+    mount_sse(
        &server,
        sse(vec![
            ev_local_shell_call("sandbox_retry_call", "completed", vec!["/bin/date"]),
@@ -927,18 +864,12 @@ async fn handle_sandbox_error_user_approves_retry_records_tool_decision() {
        ]),
    )
    .await;
-    mount_sse_once(
-        &server,
-        sse(vec![
-            ev_assistant_message("msg-1", "local shell done"),
-            ev_completed("done"),
-        ]),
-    )
-    .await;

    let TestCodex { codex, .. } = test_codex()
        .with_config(|config| {
            config.approval_policy = AskForApproval::UnlessTrusted;
+            config.model_provider.request_max_retries = Some(0);
+            config.model_provider.stream_max_retries = Some(0);
        })
        .build(&server)
        .await
@@ -977,7 +908,7 @@ async fn handle_sandbox_error_user_approves_retry_records_tool_decision() {
 async fn handle_container_exec_user_denies_records_tool_decision() {
    let server = start_mock_server().await;

-    mount_sse_once(
+    mount_sse(
        &server,
        sse(vec![
            ev_local_shell_call("user_denied_call", "completed", vec!["/bin/date"]),
@@ -986,17 +917,11 @@ async fn handle_container_exec_user_denies_records_tool_decision() {
    )
    .await;

-    mount_sse_once(
-        &server,
-        sse(vec![
-            ev_assistant_message("msg-1", "local shell done"),
-            ev_completed("done"),
-        ]),
-    )
-    .await;
    let TestCodex { codex, .. } = test_codex()
        .with_config(|config| {
            config.approval_policy = AskForApproval::UnlessTrusted;
+            config.model_provider.request_max_retries = Some(0);
+            config.model_provider.stream_max_retries = Some(0);
        })
        .build(&server)
        .await
@@ -1035,7 +960,7 @@ async fn handle_container_exec_user_denies_records_tool_decision() {
 async fn handle_sandbox_error_user_approves_for_session_records_tool_decision() {
    let server = start_mock_server().await;

-    mount_sse_once(
+    mount_sse(
        &server,
        sse(vec![
            ev_local_shell_call("sandbox_session_call", "completed", vec!["/bin/date"]),
@@ -1043,18 +968,12 @@ async fn handle_sandbox_error_user_approves_for_session_records_tool_decision()
        ]),
    )
    .await;
-    mount_sse_once(
-        &server,
-        sse(vec![
-            ev_assistant_message("msg-1", "local shell done"),
-            ev_completed("done"),
-        ]),
-    )
-    .await;

    let TestCodex { codex, .. } = test_codex()
        .with_config(|config| {
            config.approval_policy = AskForApproval::UnlessTrusted;
+            config.model_provider.request_max_retries = Some(0);
+            config.model_provider.stream_max_retries = Some(0);
        })
        .build(&server)
        .await
@@ -1093,7 +1012,7 @@ async fn handle_sandbox_error_user_approves_for_session_records_tool_decision()
 async fn handle_sandbox_error_user_denies_records_tool_decision() {
    let server = start_mock_server().await;

-    mount_sse_once(
+    mount_sse(
        &server,
        sse(vec![
            ev_local_shell_call("sandbox_deny_call", "completed", vec!["/bin/date"]),
@@ -1102,18 +1021,11 @@ async fn handle_sandbox_error_user_denies_records_tool_decision() {
    )
    .await;

-    mount_sse_once(
-        &server,
-        sse(vec![
-            ev_assistant_message("msg-1", "local shell done"),
-            ev_completed("done"),
-        ]),
-    )
-    .await;
-
    let TestCodex { codex, .. } = test_codex()
        .with_config(|config| {
            config.approval_policy = AskForApproval::UnlessTrusted;
+            config.model_provider.request_max_retries = Some(0);
+            config.model_provider.stream_max_retries = Some(0);
        })
        .build(&server)
        .await
--- a/codex-rs/core/tests/suite/resume_warning.rs
+++ b/codex-rs/core/tests/suite/resume_warning.rs
@@ -1,70 +0,0 @@
-#![allow(clippy::unwrap_used, clippy::expect_used)]
-
-use codex_core::AuthManager;
-use codex_core::CodexAuth;
-use codex_core::ConversationManager;
-use codex_core::NewConversation;
-use codex_core::protocol::EventMsg;
-use codex_core::protocol::InitialHistory;
-use codex_core::protocol::ResumedHistory;
-use codex_core::protocol::RolloutItem;
-use codex_core::protocol::TurnContextItem;
-use codex_core::protocol::WarningEvent;
-use codex_protocol::ConversationId;
-use core::time::Duration;
-use core_test_support::load_default_config_for_test;
-use core_test_support::wait_for_event;
-use tempfile::TempDir;
-
-fn resume_history(config: &codex_core::config::Config, previous_model: &str, rollout_path: &std::path::Path) -> InitialHistory {
-    let turn_ctx = TurnContextItem {
-        cwd: config.cwd.clone(),
-        approval_policy: config.approval_policy,
-        sandbox_policy: config.sandbox_policy.clone(),
-        model: previous_model.to_string(),
-        effort: config.model_reasoning_effort,
-        summary: config.model_reasoning_summary,
-    };
-
-    InitialHistory::Resumed(ResumedHistory {
-        conversation_id: ConversationId::default(),
-        history: vec![RolloutItem::TurnContext(turn_ctx)],
-        rollout_path: rollout_path.to_path_buf(),
-    })
-}
-
-#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-async fn emits_warning_when_resumed_model_differs() {
-    // Arrange a config with a current model and a prior rollout recorded under a different model.
-    let home = TempDir::new().expect("tempdir");
-    let mut config = load_default_config_for_test(&home);
-    config.model = "current-model".to_string();
-    // Ensure cwd is absolute (the helper sets it to the temp dir already).
-    assert!(config.cwd.is_absolute());
-
-    let rollout_path = home.path().join("rollout.jsonl");
-    std::fs::write(&rollout_path, "").expect("create rollout placeholder");
-
-    let initial_history = resume_history(&config, "previous-model", &rollout_path);
-
-    let conversation_manager = ConversationManager::with_auth(CodexAuth::from_api_key("test"));
-    let auth_manager = AuthManager::from_auth_for_testing(CodexAuth::from_api_key("test"));
-
-    // Act: resume the conversation.
-    let NewConversation { conversation, .. } = conversation_manager
-        .resume_conversation_with_history(config, initial_history, auth_manager)
-        .await
-        .expect("resume conversation");
-
-    // Assert: a Warning event is emitted describing the model mismatch.
-    let warning = wait_for_event(&conversation, |ev| matches!(ev, EventMsg::Warning(_))).await;
-    let EventMsg::Warning(WarningEvent { message }) = warning else {
-        panic!("expected warning event");
-    };
-    assert!(message.contains("previous-model"));
-    assert!(message.contains("current-model"));
-
-    // Drain the TaskComplete/Shutdown window to avoid leaking tasks between tests.
-    // The warning is emitted during initialization, so a short sleep is sufficient.
-    tokio::time::sleep(Duration::from_millis(50)).await;
-}
--- a/codex-rs/core/tests/suite/unified_exec.rs
+++ b/codex-rs/core/tests/suite/unified_exec.rs
@@ -1,4 +1,3 @@
-#![cfg(not(target_os = "windows"))]
 use std::collections::HashMap;
 use std::sync::OnceLock;

--- a/codex-rs/protocol/src/models.rs
+++ b/codex-rs/protocol/src/models.rs
@@ -292,7 +292,7 @@ impl From<Vec<UserInput>> for ResponseInputItem {
 }

 /// If the `name` of a `ResponseItem::FunctionCall` is either `container.exec`
-/// or `shell`, the `arguments` field should deserialize to this struct.
+/// or shell`, the `arguments` field should deserialize to this struct.
 #[derive(Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
 pub struct ShellToolCallParams {
    pub command: Vec<String>,
@@ -307,22 +307,6 @@ pub struct ShellToolCallParams {
    pub justification: Option<String>,
 }

-/// If the `name` of a `ResponseItem::FunctionCall` is `shell_command`, the
-/// `arguments` field should deserialize to this struct.
-#[derive(Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
-pub struct ShellCommandToolCallParams {
-    pub command: String,
-    pub workdir: Option<String>,
-
-    /// This is the maximum time in milliseconds that the command is allowed to run.
-    #[serde(alias = "timeout")]
-    pub timeout_ms: Option<u64>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub with_escalated_permissions: Option<bool>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub justification: Option<String>,
-}
-
 /// Responses API compatible content items that can be returned by a tool call.
 /// This is a subset of ContentItem with the types we support as function call outputs.
 #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, JsonSchema, TS)]
--- a/docs/prompts.md
+++ b/docs/prompts.md
@@ -42,55 +42,16 @@ Custom prompts turn your repeatable instructions into reusable slash commands, s

 ### Examples

-### Example 1: Basic named arguments
+**Draft PR helper**

-**File**: `~/.codex/prompts/ticket.md`
+`~/.codex/prompts/draftpr.md`

 ```markdown
 ---
-description: Generate a commit message for a ticket
-argument-hint: TICKET_ID=<id> TICKET_TITLE=<title>
+description: Create feature branch, commit and open draft PR.
 ---

-Please write a concise commit message for ticket $TICKET_ID: $TICKET_TITLE
+Create a branch named `tibo/<feature_name>`, commit the changes, and open a draft PR.
 ```

-**Usage**:
-
-```
-/prompts:ticket TICKET_ID=JIRA-1234 TICKET_TITLE="Fix login bug"
-```
-
-**Expanded prompt sent to Codex**:
-
-```
-Please write a concise commit message for ticket JIRA-1234: Fix login bug
-```
-
-**Note**: Both `TICKET_ID` and `TICKET_TITLE` are required. If either is missing, Codex will show a validation error. Values with spaces must be double-quoted.
-
-### Example 2: Mixed positional and named arguments
-
-**File**: `~/.codex/prompts/review.md`
-
-```markdown
---
-description: Review code in a specific file with focus area
-argument-hint: FILE=<path> [FOCUS=<section>]
---
-
-Review the code in $FILE. Pay special attention to $FOCUS.
-```
-
-**Usage**:
-
-```
-/prompts:review FILE=src/auth.js FOCUS="error handling"
-```
-
-**Expanded prompt**:
-
-```
-Review the code in src/auth.js. Pay special attention to error handling.
-
-```
+Usage: type `/prompts:draftpr` to have codex perform the work.