Simplify

Fix
Auto-compact when running out of context
2026-05-13 15:52:40 +00:00 · 2025-09-22 19:34:06 -07:00 · 2025-09-22 17:18:30 -07:00 · 2025-09-22 17:00:23 -07:00 · 2025-09-22 23:12:26 +00:00 · 2025-09-22 22:24:31 +00:00
144 changed files with 4945 additions and 2248 deletions
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -4,6 +4,7 @@ In the codex-rs folder where the rust code lives:

 - Crate names are prefixed with `codex-`. For example, the `core` folder's crate is named `codex-core`
 - When using format! and you can inline variables into {}, always do that.
+- Install any commands the repo relies on (for example `just`, `rg`, or `cargo-insta`) if they aren't already available before running instructions here.
 - Never add or modify any code related to `CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR` or `CODEX_SANDBOX_ENV_VAR`.
  - You operate in a sandbox where `CODEX_SANDBOX_NETWORK_DISABLED=1` will be set whenever you use the `shell` tool. Any existing code that uses `CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR` was authored with this fact in mind. It is often used to early exit out of tests that the author knew you would not be able to run given your sandbox limitations.
  - Similarly, when you spawn a process using Seatbelt (`/usr/bin/sandbox-exec`), `CODEX_SANDBOX=seatbelt` will be set on the child process. Integration tests that want to run Seatbelt themselves cannot be run under Seatbelt, so checks for `CODEX_SANDBOX=seatbelt` are also often used to early exit out of tests, as appropriate.
--- a/codex-rs/Cargo.lock
+++ b/codex-rs/Cargo.lock
@@ -638,9 +638,11 @@ dependencies = [
 "codex-protocol",
 "codex-protocol-ts",
 "codex-tui",
+ "owo-colors",
 "predicates",
 "pretty_assertions",
 "serde_json",
+ "supports-color",
 "tempfile",
 "tokio",
 "tracing",
@@ -687,7 +689,7 @@ dependencies = [
 "portable-pty",
 "predicates",
 "pretty_assertions",
- "rand 0.9.2",
+ "rand",
 "regex-lite",
 "reqwest",
 "seccompiler",
@@ -797,7 +799,8 @@ dependencies = [
 "chrono",
 "codex-core",
 "codex-protocol",
- "rand 0.8.5",
+ "core_test_support",
+ "rand",
 "reqwest",
 "serde",
 "serde_json",
@@ -835,6 +838,7 @@ dependencies = [
 "codex-core",
 "codex-login",
 "codex-protocol",
+ "core_test_support",
 "mcp-types",
 "mcp_test_support",
 "os_info",
@@ -933,7 +937,7 @@ dependencies = [
 "pathdiff",
 "pretty_assertions",
 "pulldown-cmark",
- "rand 0.9.2",
+ "rand",
 "ratatui",
 "regex-lite",
 "serde",
@@ -1076,6 +1080,7 @@ dependencies = [
 "serde_json",
 "tempfile",
 "tokio",
+ "wiremock",
 ]

 [[package]]
@@ -3450,35 +3455,14 @@ dependencies = [
 "nibble_vec",
 ]

-[[package]]
-name = "rand"
-version = "0.8.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
-dependencies = [
- "libc",
- "rand_chacha 0.3.1",
- "rand_core 0.6.4",
-]
-
 [[package]]
 name = "rand"
 version = "0.9.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1"
 dependencies = [
- "rand_chacha 0.9.0",
- "rand_core 0.9.3",
-]
-
-[[package]]
-name = "rand_chacha"
-version = "0.3.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
-dependencies = [
- "ppv-lite86",
- "rand_core 0.6.4",
+ "rand_chacha",
+ "rand_core",
 ]

 [[package]]
@@ -3488,16 +3472,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb"
 dependencies = [
 "ppv-lite86",
- "rand_core 0.9.3",
-]
-
-[[package]]
-name = "rand_core"
-version = "0.6.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
-dependencies = [
- "getrandom 0.2.16",
+ "rand_core",
 ]

 [[package]]
--- a/codex-rs/Cargo.toml
+++ b/codex-rs/Cargo.toml
@@ -29,15 +29,167 @@ version = "0.0.0"
 # edition.
 edition = "2024"

+[workspace.dependencies]
+# Internal
+codex-ansi-escape = { path = "ansi-escape" }
+codex-apply-patch = { path = "apply-patch" }
+codex-arg0 = { path = "arg0" }
+codex-chatgpt = { path = "chatgpt" }
+codex-common = { path = "common" }
+codex-core = { path = "core" }
+codex-exec = { path = "exec" }
+codex-file-search = { path = "file-search" }
+codex-linux-sandbox = { path = "linux-sandbox" }
+codex-login = { path = "login" }
+codex-mcp-client = { path = "mcp-client" }
+codex-mcp-server = { path = "mcp-server" }
+codex-ollama = { path = "ollama" }
+codex-protocol = { path = "protocol" }
+codex-protocol-ts = { path = "protocol-ts" }
+codex-tui = { path = "tui" }
+core_test_support = { path = "core/tests/common" }
+mcp-types = { path = "mcp-types" }
+mcp_test_support = { path = "mcp-server/tests/common" }
+
+# External
+allocative = "0.3.3"
+ansi-to-tui = "7.0.0"
+anyhow = "1"
+arboard = "3"
+askama = "0.12"
+assert_cmd = "2"
+async-channel = "2.3.1"
+async-stream = "0.3.6"
+base64 = "0.22.1"
+bytes = "1.10.1"
+chrono = "0.4.40"
+clap = "4"
+clap_complete = "4"
+color-eyre = "0.6.3"
+crossterm = "0.28.1"
+derive_more = "2"
+diffy = "0.4.2"
+dirs = "6"
+dotenvy = "0.15.7"
+env-flags = "0.1.1"
+env_logger = "0.11.5"
+eventsource-stream = "0.2.3"
+futures = "0.3"
+icu_decimal = "2.0.0"
+icu_locale_core = "2.0.0"
+ignore = "0.4.23"
+image = { version = "^0.25.8", default-features = false }
+insta = "1.43.2"
+itertools = "0.14.0"
+landlock = "0.4.1"
+lazy_static = "1"
+libc = "0.2.175"
+log = "0.4"
+maplit = "1.0.2"
+mime_guess = "2.0.5"
+multimap = "0.10.0"
+nucleo-matcher = "0.3.1"
+once_cell = "1"
+openssl-sys = "*"
+os_info = "3.12.0"
+owo-colors = "4.2.0"
+path-absolutize = "3.1.1"
+path-clean = "1.0.1"
+pathdiff = "0.2"
+portable-pty = "0.9.0"
+predicates = "3"
+pretty_assertions = "1.4.1"
+pulldown-cmark = "0.10"
+rand = "0.9"
+ratatui = "0.29.0"
+regex-lite = "0.1.7"
+reqwest = "0.12"
+schemars = "0.8.22"
+seccompiler = "0.5.0"
+serde = "1"
+serde_json = "1"
+serde_with = "3.14"
+sha1 = "0.10.6"
+sha2 = "0.10"
+shlex = "1.3.0"
+similar = "2.7.0"
+starlark = "0.13.0"
+strum = "0.27.2"
+strum_macros = "0.27.2"
+supports-color = "3.0.2"
+sys-locale = "0.3.2"
+tempfile = "3.13.0"
+textwrap = "0.16.2"
+thiserror = "2.0.16"
+time = "0.3"
+tiny_http = "0.12"
+tokio = "1"
+tokio-stream = "0.1.17"
+tokio-test = "0.4"
+tokio-util = "0.7.16"
+toml = "0.9.5"
+toml_edit = "0.23.4"
+tracing = "0.1.41"
+tracing-appender = "0.2.3"
+tracing-subscriber = "0.3.20"
+tree-sitter = "0.25.9"
+tree-sitter-bash = "0.25.0"
+ts-rs = "11"
+unicode-segmentation = "1.12.0"
+unicode-width = "0.1"
+url = "2"
+urlencoding = "2.1"
+uuid = "1"
+vt100 = "0.16.2"
+walkdir = "2.5.0"
+webbrowser = "1.0"
+which = "6"
+wildmatch = "2.5.0"
+wiremock = "0.6"
+
 [workspace.lints]
 rust = {}

 [workspace.lints.clippy]
 expect_used = "deny"
+identity_op = "deny"
+manual_clamp = "deny"
+manual_filter = "deny"
+manual_find = "deny"
+manual_flatten = "deny"
+manual_map = "deny"
+manual_memcpy = "deny"
+manual_non_exhaustive = "deny"
+manual_ok_or = "deny"
+manual_range_contains = "deny"
+manual_retain = "deny"
+manual_strip = "deny"
+manual_try_fold = "deny"
+manual_unwrap_or = "deny"
+needless_borrow = "deny"
+needless_borrowed_reference = "deny"
+needless_collect = "deny"
+needless_late_init = "deny"
+needless_option_as_deref = "deny"
+needless_question_mark = "deny"
+needless_update = "deny"
 redundant_clone = "deny"
+redundant_closure = "deny"
+redundant_closure_for_method_calls = "deny"
+redundant_static_lifetimes = "deny"
+trivially_copy_pass_by_ref = "deny"
 uninlined_format_args = "deny"
+unnecessary_filter_map = "deny"
+unnecessary_lazy_evaluations = "deny"
+unnecessary_sort_by = "deny"
+unnecessary_to_owned = "deny"
 unwrap_used = "deny"

+# cargo-shear cannot see the platform-specific openssl-sys usage, so we
+# silence the false positive here instead of deleting a real dependency.
+[workspace.metadata.cargo-shear]
+ignored = ["openssl-sys"]
+
 [profile.release]
 lto = "fat"
 # Because we bundle some of these executables with the TypeScript CLI, we
--- a/codex-rs/ansi-escape/Cargo.toml
+++ b/codex-rs/ansi-escape/Cargo.toml
@@ -8,9 +8,9 @@ name = "codex_ansi_escape"
 path = "src/lib.rs"

 [dependencies]
-ansi-to-tui = "7.0.0"
-ratatui = { version = "0.29.0", features = [
+ansi-to-tui = { workspace = true }
+ratatui = { workspace = true, features = [
    "unstable-rendered-line-info",
    "unstable-widget-ref",
 ] }
-tracing = { version = "0.1.41", features = ["log"] }
+tracing = { workspace = true, features = ["log"] }
--- a/codex-rs/apply-patch/Cargo.toml
+++ b/codex-rs/apply-patch/Cargo.toml
@@ -15,14 +15,14 @@ path = "src/main.rs"
 workspace = true

 [dependencies]
-anyhow = "1"
-similar = "2.7.0"
-thiserror = "2.0.16"
-tree-sitter = "0.25.9"
-tree-sitter-bash = "0.25.0"
-once_cell = "1"
+anyhow = { workspace = true }
+similar = { workspace = true }
+thiserror = { workspace = true }
+tree-sitter = { workspace = true }
+tree-sitter-bash = { workspace = true }
+once_cell = { workspace = true }

 [dev-dependencies]
-assert_cmd = "2"
-pretty_assertions = "1.4.1"
-tempfile = "3.13.0"
+assert_cmd = { workspace = true }
+pretty_assertions = { workspace = true }
+tempfile = { workspace = true }
--- a/codex-rs/apply-patch/src/lib.rs
+++ b/codex-rs/apply-patch/src/lib.rs
@@ -648,21 +648,18 @@ fn derive_new_contents_from_chunks(
        }
    };

-    let mut original_lines: Vec<String> = original_contents
-        .split('\n')
-        .map(|s| s.to_string())
-        .collect();
+    let mut original_lines: Vec<String> = original_contents.split('\n').map(String::from).collect();

    // Drop the trailing empty element that results from the final newline so
    // that line counts match the behaviour of standard `diff`.
-    if original_lines.last().is_some_and(|s| s.is_empty()) {
+    if original_lines.last().is_some_and(String::is_empty) {
        original_lines.pop();
    }

    let replacements = compute_replacements(&original_lines, path, chunks)?;
    let new_lines = apply_replacements(original_lines, &replacements);
    let mut new_lines = new_lines;
-    if !new_lines.last().is_some_and(|s| s.is_empty()) {
+    if !new_lines.last().is_some_and(String::is_empty) {
        new_lines.push(String::new());
    }
    let new_contents = new_lines.join("\n");
@@ -706,7 +703,7 @@ fn compute_replacements(
        if chunk.old_lines.is_empty() {
            // Pure addition (no old lines). We'll add them at the end or just
            // before the final empty line if one exists.
-            let insertion_idx = if original_lines.last().is_some_and(|s| s.is_empty()) {
+            let insertion_idx = if original_lines.last().is_some_and(String::is_empty) {
                original_lines.len() - 1
            } else {
                original_lines.len()
@@ -732,11 +729,11 @@ fn compute_replacements(

        let mut new_slice: &[String] = &chunk.new_lines;

-        if found.is_none() && pattern.last().is_some_and(|s| s.is_empty()) {
+        if found.is_none() && pattern.last().is_some_and(String::is_empty) {
            // Retry without the trailing empty line which represents the final
            // newline in the file.
            pattern = &pattern[..pattern.len() - 1];
-            if new_slice.last().is_some_and(|s| s.is_empty()) {
+            if new_slice.last().is_some_and(String::is_empty) {
                new_slice = &new_slice[..new_slice.len() - 1];
            }

@@ -848,6 +845,7 @@ mod tests {
    use super::*;
    use pretty_assertions::assert_eq;
    use std::fs;
+    use std::string::ToString;
    use tempfile::tempdir;

    /// Helper to construct a patch with the given body.
@@ -856,7 +854,7 @@ mod tests {
    }

    fn strs_to_strings(strs: &[&str]) -> Vec<String> {
-        strs.iter().map(|s| s.to_string()).collect()
+        strs.iter().map(ToString::to_string).collect()
    }

    // Test helpers to reduce repetition when building bash -lc heredoc scripts
--- a/codex-rs/apply-patch/src/seek_sequence.rs
+++ b/codex-rs/apply-patch/src/seek_sequence.rs
@@ -112,9 +112,10 @@ pub(crate) fn seek_sequence(
 #[cfg(test)]
 mod tests {
    use super::seek_sequence;
+    use std::string::ToString;

    fn to_vec(strings: &[&str]) -> Vec<String> {
-        strings.iter().map(|s| s.to_string()).collect()
+        strings.iter().map(ToString::to_string).collect()
    }

    #[test]
--- a/codex-rs/arg0/Cargo.toml
+++ b/codex-rs/arg0/Cargo.toml
@@ -11,10 +11,10 @@ path = "src/lib.rs"
 workspace = true

 [dependencies]
-anyhow = "1"
-codex-apply-patch = { path = "../apply-patch" }
-codex-core = { path = "../core" }
-codex-linux-sandbox = { path = "../linux-sandbox" }
-dotenvy = "0.15.7"
-tempfile = "3"
-tokio = { version = "1", features = ["rt-multi-thread"] }
+anyhow = { workspace = true }
+codex-apply-patch = { workspace = true }
+codex-core = { workspace = true }
+codex-linux-sandbox = { workspace = true }
+dotenvy = { workspace = true }
+tempfile = { workspace = true }
+tokio = { workspace = true, features = ["rt-multi-thread"] }
--- a/codex-rs/arg0/src/lib.rs
+++ b/codex-rs/arg0/src/lib.rs
@@ -54,7 +54,7 @@ where

    let argv1 = args.next().unwrap_or_default();
    if argv1 == CODEX_APPLY_PATCH_ARG1 {
-        let patch_arg = args.next().and_then(|s| s.to_str().map(|s| s.to_owned()));
+        let patch_arg = args.next().and_then(|s| s.to_str().map(str::to_owned));
        let exit_code = match patch_arg {
            Some(patch_arg) => {
                let mut stdout = std::io::stdout();
--- a/codex-rs/chatgpt/Cargo.toml
+++ b/codex-rs/chatgpt/Cargo.toml
@@ -7,13 +7,13 @@ version = { workspace = true }
 workspace = true

 [dependencies]
-anyhow = "1"
-clap = { version = "4", features = ["derive"] }
-codex-common = { path = "../common", features = ["cli"] }
-codex-core = { path = "../core" }
-serde = { version = "1", features = ["derive"] }
-serde_json = "1"
-tokio = { version = "1", features = ["full"] }
+anyhow = { workspace = true }
+clap = { workspace = true, features = ["derive"] }
+codex-common = { workspace = true, features = ["cli"] }
+codex-core = { workspace = true }
+serde = { workspace = true, features = ["derive"] }
+serde_json = { workspace = true }
+tokio = { workspace = true, features = ["full"] }

 [dev-dependencies]
-tempfile = "3"
+tempfile = { workspace = true }
--- a/codex-rs/cli/Cargo.toml
+++ b/codex-rs/cli/Cargo.toml
@@ -15,32 +15,34 @@ path = "src/lib.rs"
 workspace = true

 [dependencies]
-anyhow = "1"
-clap = { version = "4", features = ["derive"] }
-clap_complete = "4"
-codex-arg0 = { path = "../arg0" }
-codex-chatgpt = { path = "../chatgpt" }
-codex-common = { path = "../common", features = ["cli"] }
-codex-core = { path = "../core" }
-codex-exec = { path = "../exec" }
-codex-login = { path = "../login" }
-codex-mcp-server = { path = "../mcp-server" }
-codex-protocol = { path = "../protocol" }
-codex-tui = { path = "../tui" }
-serde_json = "1"
-tokio = { version = "1", features = [
+anyhow = { workspace = true }
+clap = { workspace = true, features = ["derive"] }
+clap_complete = { workspace = true }
+codex-arg0 = { workspace = true }
+codex-chatgpt = { workspace = true }
+codex-common = { workspace = true, features = ["cli"] }
+codex-core = { workspace = true }
+codex-exec = { workspace = true }
+codex-login = { workspace = true }
+codex-mcp-server = { workspace = true }
+codex-protocol = { workspace = true }
+codex-protocol-ts = { workspace = true }
+codex-tui = { workspace = true }
+owo-colors = { workspace = true }
+serde_json = { workspace = true }
+supports-color = { workspace = true }
+tokio = { workspace = true, features = [
    "io-std",
    "macros",
    "process",
    "rt-multi-thread",
    "signal",
 ] }
-tracing = "0.1.41"
-tracing-subscriber = "0.3.20"
-codex-protocol-ts = { path = "../protocol-ts" }
+tracing = { workspace = true }
+tracing-subscriber = { workspace = true }

 [dev-dependencies]
-assert_cmd = "2"
-predicates = "3"
-pretty_assertions = "1"
-tempfile = "3"
+assert_cmd = { workspace = true }
+predicates = { workspace = true }
+pretty_assertions = { workspace = true }
+tempfile = { workspace = true }
--- a/codex-rs/cli/src/debug_sandbox.rs
+++ b/codex-rs/cli/src/debug_sandbox.rs
@@ -64,7 +64,6 @@ async fn run_command_under_sandbox(
    sandbox_type: SandboxType,
 ) -> anyhow::Result<()> {
    let sandbox_mode = create_sandbox_mode(full_auto);
-    let cwd = std::env::current_dir()?;
    let config = Config::load_with_cli_overrides(
        config_overrides
            .parse_overrides()
@@ -75,13 +74,29 @@ async fn run_command_under_sandbox(
            ..Default::default()
        },
    )?;
+
+    // In practice, this should be `std::env::current_dir()` because this CLI
+    // does not support `--cwd`, but let's use the config value for consistency.
+    let cwd = config.cwd.clone();
+    // For now, we always use the same cwd for both the command and the
+    // sandbox policy. In the future, we could add a CLI option to set them
+    // separately.
+    let sandbox_policy_cwd = cwd.clone();
+
    let stdio_policy = StdioPolicy::Inherit;
    let env = create_env(&config.shell_environment_policy);

    let mut child = match sandbox_type {
        SandboxType::Seatbelt => {
-            spawn_command_under_seatbelt(command, &config.sandbox_policy, cwd, stdio_policy, env)
-                .await?
+            spawn_command_under_seatbelt(
+                command,
+                cwd,
+                &config.sandbox_policy,
+                sandbox_policy_cwd.as_path(),
+                stdio_policy,
+                env,
+            )
+            .await?
        }
        SandboxType::Landlock => {
            #[expect(clippy::expect_used)]
@@ -91,8 +106,9 @@ async fn run_command_under_sandbox(
            spawn_command_under_linux_sandbox(
                codex_linux_sandbox_exe,
                command,
-                &config.sandbox_policy,
                cwd,
+                &config.sandbox_policy,
+                sandbox_policy_cwd.as_path(),
                stdio_policy,
                env,
            )
--- a/codex-rs/cli/src/main.rs
+++ b/codex-rs/cli/src/main.rs
@@ -14,8 +14,11 @@ use codex_cli::login::run_logout;
 use codex_cli::proto;
 use codex_common::CliConfigOverrides;
 use codex_exec::Cli as ExecCli;
+use codex_tui::AppExitInfo;
 use codex_tui::Cli as TuiCli;
+use owo_colors::OwoColorize;
 use std::path::PathBuf;
+use supports_color::Stream;

 mod mcp_cmd;

@@ -156,6 +159,41 @@ struct GenerateTsCommand {
    prettier: Option<PathBuf>,
 }

+fn format_exit_messages(exit_info: AppExitInfo, color_enabled: bool) -> Vec<String> {
+    let AppExitInfo {
+        token_usage,
+        conversation_id,
+    } = exit_info;
+
+    if token_usage.is_zero() {
+        return Vec::new();
+    }
+
+    let mut lines = vec![format!(
+        "{}",
+        codex_core::protocol::FinalOutput::from(token_usage)
+    )];
+
+    if let Some(session_id) = conversation_id {
+        let resume_cmd = format!("codex resume {session_id}");
+        let command = if color_enabled {
+            resume_cmd.cyan().to_string()
+        } else {
+            resume_cmd
+        };
+        lines.push(format!("To continue this session, run {command}."));
+    }
+
+    lines
+}
+
+fn print_exit_messages(exit_info: AppExitInfo) {
+    let color_enabled = supports_color::on(Stream::Stdout).is_some();
+    for line in format_exit_messages(exit_info, color_enabled) {
+        println!("{line}");
+    }
+}
+
 fn main() -> anyhow::Result<()> {
    arg0_dispatch_or_else(|codex_linux_sandbox_exe| async move {
        cli_main(codex_linux_sandbox_exe).await?;
@@ -176,10 +214,8 @@ async fn cli_main(codex_linux_sandbox_exe: Option<PathBuf>) -> anyhow::Result<()
                &mut interactive.config_overrides,
                root_config_overrides.clone(),
            );
-            let usage = codex_tui::run_main(interactive, codex_linux_sandbox_exe).await?;
-            if !usage.is_zero() {
-                println!("{}", codex_core::protocol::FinalOutput::from(usage));
-            }
+            let exit_info = codex_tui::run_main(interactive, codex_linux_sandbox_exe).await?;
+            print_exit_messages(exit_info);
        }
        Some(Subcommand::Exec(mut exec_cli)) => {
            prepend_config_flags(
@@ -369,6 +405,8 @@ fn print_completion(cmd: CompletionCommand) {
 #[cfg(test)]
 mod tests {
    use super::*;
+    use codex_core::protocol::TokenUsage;
+    use codex_protocol::mcp_protocol::ConversationId;

    fn finalize_from_args(args: &[&str]) -> TuiCli {
        let cli = MultitoolCli::try_parse_from(args).expect("parse");
@@ -390,6 +428,52 @@ mod tests {
        finalize_resume_interactive(interactive, root_overrides, session_id, last, resume_cli)
    }

+    fn sample_exit_info(conversation: Option<&str>) -> AppExitInfo {
+        let token_usage = TokenUsage {
+            output_tokens: 2,
+            total_tokens: 2,
+            ..Default::default()
+        };
+        AppExitInfo {
+            token_usage,
+            conversation_id: conversation
+                .map(ConversationId::from_string)
+                .map(Result::unwrap),
+        }
+    }
+
+    #[test]
+    fn format_exit_messages_skips_zero_usage() {
+        let exit_info = AppExitInfo {
+            token_usage: TokenUsage::default(),
+            conversation_id: None,
+        };
+        let lines = format_exit_messages(exit_info, false);
+        assert!(lines.is_empty());
+    }
+
+    #[test]
+    fn format_exit_messages_includes_resume_hint_without_color() {
+        let exit_info = sample_exit_info(Some("123e4567-e89b-12d3-a456-426614174000"));
+        let lines = format_exit_messages(exit_info, false);
+        assert_eq!(
+            lines,
+            vec![
+                "Token usage: total=2 input=0 output=2".to_string(),
+                "To continue this session, run codex resume 123e4567-e89b-12d3-a456-426614174000."
+                    .to_string(),
+            ]
+        );
+    }
+
+    #[test]
+    fn format_exit_messages_applies_color_when_enabled() {
+        let exit_info = sample_exit_info(Some("123e4567-e89b-12d3-a456-426614174000"));
+        let lines = format_exit_messages(exit_info, true);
+        assert_eq!(lines.len(), 2);
+        assert!(lines[1].contains("\u{1b}[36m"));
+    }
+
    #[test]
    fn resume_model_flag_applies_when_no_root_flags() {
        let interactive = finalize_from_args(["codex", "resume", "-m", "gpt-5-test"].as_ref());
--- a/codex-rs/cli/src/mcp_cmd.rs
+++ b/codex-rs/cli/src/mcp_cmd.rs
@@ -148,7 +148,8 @@ fn run_add(config_overrides: &CliConfigOverrides, add_args: AddArgs) -> Result<(
        command: command_bin,
        args: command_args,
        env: env_map,
-        startup_timeout_ms: None,
+        startup_timeout_sec: None,
+        tool_timeout_sec: None,
    };

    servers.insert(name.clone(), new_entry);
@@ -210,7 +211,12 @@ fn run_list(config_overrides: &CliConfigOverrides, list_args: ListArgs) -> Resul
                    "command": cfg.command,
                    "args": cfg.args,
                    "env": env,
-                    "startup_timeout_ms": cfg.startup_timeout_ms,
+                    "startup_timeout_sec": cfg
+                        .startup_timeout_sec
+                        .map(|timeout| timeout.as_secs_f64()),
+                    "tool_timeout_sec": cfg
+                        .tool_timeout_sec
+                        .map(|timeout| timeout.as_secs_f64()),
                })
            })
            .collect();
@@ -305,7 +311,12 @@ fn run_get(config_overrides: &CliConfigOverrides, get_args: GetArgs) -> Result<(
            "command": server.command,
            "args": server.args,
            "env": env,
-            "startup_timeout_ms": server.startup_timeout_ms,
+            "startup_timeout_sec": server
+                .startup_timeout_sec
+                .map(|timeout| timeout.as_secs_f64()),
+            "tool_timeout_sec": server
+                .tool_timeout_sec
+                .map(|timeout| timeout.as_secs_f64()),
        }))?;
        println!("{output}");
        return Ok(());
@@ -333,8 +344,11 @@ fn run_get(config_overrides: &CliConfigOverrides, get_args: GetArgs) -> Result<(
        }
    };
    println!("  env: {env_display}");
-    if let Some(timeout) = server.startup_timeout_ms {
-        println!("  startup_timeout_ms: {timeout}");
+    if let Some(timeout) = server.startup_timeout_sec {
+        println!("  startup_timeout_sec: {}", timeout.as_secs_f64());
+    }
+    if let Some(timeout) = server.tool_timeout_sec {
+        println!("  tool_timeout_sec: {}", timeout.as_secs_f64());
    }
    println!("  remove: codex mcp remove {}", get_args.name);

--- a/codex-rs/common/Cargo.toml
+++ b/codex-rs/common/Cargo.toml
@@ -7,11 +7,11 @@ version = { workspace = true }
 workspace = true

 [dependencies]
-clap = { version = "4", features = ["derive", "wrap_help"], optional = true }
-codex-core = { path = "../core" }
-codex-protocol = { path = "../protocol" }
-serde = { version = "1", optional = true }
-toml = { version = "0.9", optional = true }
+clap = { workspace = true, features = ["derive", "wrap_help"], optional = true }
+codex-core = { workspace = true }
+codex-protocol = { workspace = true }
+serde = { workspace = true, optional = true }
+toml = { workspace = true, optional = true }

 [features]
 # Separate feature so that `clap` is not a mandatory dependency.
--- a/codex-rs/core/Cargo.toml
+++ b/codex-rs/core/Cargo.toml
@@ -4,84 +4,89 @@ name = "codex-core"
 version = { workspace = true }

 [lib]
+doctest = false
 name = "codex_core"
 path = "src/lib.rs"
-doctest = false

 [lints]
 workspace = true

 [dependencies]
-anyhow = "1"
-askama = "0.12"
-async-channel = "2.3.1"
-base64 = "0.22"
-bytes = "1.10.1"
-chrono = { version = "0.4", features = ["serde"] }
-codex-apply-patch = { path = "../apply-patch" }
-codex-file-search = { path = "../file-search" }
-codex-mcp-client = { path = "../mcp-client" }
-codex-protocol = { path = "../protocol" }
-dirs = "6"
-env-flags = "0.1.1"
-eventsource-stream = "0.2.3"
-futures = "0.3"
-libc = "0.2.175"
-mcp-types = { path = "../mcp-types" }
-os_info = "3.12.0"
-portable-pty = "0.9.0"
-rand = "0.9"
-regex-lite = "0.1.7"
-reqwest = { version = "0.12", features = ["json", "stream"] }
-serde = { version = "1", features = ["derive"] }
-serde_json = "1"
-sha1 = "0.10.6"
-shlex = "1.3.0"
-similar = "2.7.0"
-strum_macros = "0.27.2"
-tempfile = "3"
-thiserror = "2.0.16"
-time = { version = "0.3", features = ["formatting", "parsing", "local-offset", "macros"] }
-tokio = { version = "1", features = [
+anyhow = { workspace = true }
+askama = { workspace = true }
+async-channel = { workspace = true }
+base64 = { workspace = true }
+bytes = { workspace = true }
+chrono = { workspace = true, features = ["serde"] }
+codex-apply-patch = { workspace = true }
+codex-file-search = { workspace = true }
+codex-mcp-client = { workspace = true }
+codex-protocol = { workspace = true }
+dirs = { workspace = true }
+env-flags = { workspace = true }
+eventsource-stream = { workspace = true }
+futures = { workspace = true }
+libc = { workspace = true }
+mcp-types = { workspace = true }
+os_info = { workspace = true }
+portable-pty = { workspace = true }
+rand = { workspace = true }
+regex-lite = { workspace = true }
+reqwest = { workspace = true, features = ["json", "stream"] }
+serde = { workspace = true, features = ["derive"] }
+serde_json = { workspace = true }
+sha1 = { workspace = true }
+shlex = { workspace = true }
+similar = { workspace = true }
+strum_macros = { workspace = true }
+tempfile = { workspace = true }
+thiserror = { workspace = true }
+time = { workspace = true, features = [
+    "formatting",
+    "parsing",
+    "local-offset",
+    "macros",
+] }
+tokio = { workspace = true, features = [
    "io-std",
    "macros",
    "process",
    "rt-multi-thread",
    "signal",
 ] }
-tokio-util = "0.7.16"
-toml = "0.9.5"
-toml_edit = "0.23.4"
-tracing = { version = "0.1.41", features = ["log"] }
-tree-sitter = "0.25.9"
-tree-sitter-bash = "0.25.0"
-uuid = { version = "1", features = ["serde", "v4"] }
-which = "6"
-wildmatch = "2.5.0"
+tokio-util = { workspace = true }
+toml = { workspace = true }
+toml_edit = { workspace = true }
+tracing = { workspace = true, features = ["log"] }
+tree-sitter = { workspace = true }
+tree-sitter-bash = { workspace = true }
+uuid = { workspace = true, features = ["serde", "v4"] }
+which = { workspace = true }
+wildmatch = { workspace = true }


 [target.'cfg(target_os = "linux")'.dependencies]
-landlock = "0.4.1"
-seccompiler = "0.5.0"
+landlock = { workspace = true }
+seccompiler = { workspace = true }

 # Build OpenSSL from source for musl builds.
 [target.x86_64-unknown-linux-musl.dependencies]
-openssl-sys = { version = "*", features = ["vendored"] }
+openssl-sys = { workspace = true, features = ["vendored"] }

 # Build OpenSSL from source for musl builds.
 [target.aarch64-unknown-linux-musl.dependencies]
-openssl-sys = { version = "*", features = ["vendored"] }
+openssl-sys = { workspace = true, features = ["vendored"] }

 [dev-dependencies]
-assert_cmd = "2"
-core_test_support = { path = "tests/common" }
-maplit = "1.0.2"
-predicates = "3"
-pretty_assertions = "1.4.1"
-tempfile = "3"
-tokio-test = "0.4"
-walkdir = "2.5.0"
-wiremock = "0.6"
+assert_cmd = { workspace = true }
+core_test_support = { workspace = true }
+maplit = { workspace = true }
+predicates = { workspace = true }
+pretty_assertions = { workspace = true }
+tempfile = { workspace = true }
+tokio-test = { workspace = true }
+walkdir = { workspace = true }
+wiremock = { workspace = true }

 [package.metadata.cargo-shear]
 ignored = ["openssl-sys"]
--- a/codex-rs/core/gpt_5_codex_prompt.md
+++ b/codex-rs/core/gpt_5_codex_prompt.md
@@ -26,37 +26,41 @@ When using the planning tool:

 ## Codex CLI harness, sandboxing, and approvals

-The Codex CLI harness supports several different sandboxing, and approval configurations that the user can choose from.
+The Codex CLI harness supports several different configurations for sandboxing and escalation approvals that the user can choose from.

-Filesystem sandboxing defines which files can be read or written. The options are:
- **read-only**: You can only read files.
- **workspace-write**: You can read files. You can write to files in this folder, but not outside it.
- **danger-full-access**: No filesystem sandboxing.
+Filesystem sandboxing defines which files can be read or written. The options for `sandbox_mode` are:
+- **read-only**: The sandbox only permits reading files.
+- **workspace-write**: The sandbox permits reading files, and editing files in `cwd` and `writable_roots`. Editing files in other directories requires approval.
+- **danger-full-access**: No filesystem sandboxing - all commands are permitted.

-Network sandboxing defines whether network can be accessed without approval. Options are
+Network sandboxing defines whether network can be accessed without approval. Options for `network_access` are:
 - **restricted**: Requires approval
 - **enabled**: No approval needed

-Approvals are your mechanism to get user consent to perform more privileged actions. Although they introduce friction to the user because your work is paused until the user responds, you should leverage them to accomplish your important work. Do not let these settings or the sandbox deter you from attempting to accomplish the user's task unless it is set to "never", in which case never ask for approvals.
-
-Approval options are
+Approvals are your mechanism to get user consent to run shell commands without the sandbox. Possible configuration options for `approval_policy` are
 - **untrusted**: The harness will escalate most commands for user approval, apart from a limited allowlist of safe "read" commands.
 - **on-failure**: The harness will allow all commands to run in the sandbox (if enabled), and failures will be escalated to the user for approval to run again without the sandbox.
 - **on-request**: Commands will be run in the sandbox by default, and you can specify in your tool call if you want to escalate a command to run without sandboxing. (Note that this mode is not always available. If it is, you'll see parameters for it in the `shell` command description.)
 - **never**: This is a non-interactive mode where you may NEVER ask the user for approval to run commands. Instead, you must always persist and work around constraints to solve the task for the user. You MUST do your utmost best to finish the task and validate your work before yielding. If this mode is paired with `danger-full-access`, take advantage of it to deliver the best outcome for the user. Further, in this mode, your default testing philosophy is overridden: Even if you don't see local patterns for testing, you may add tests and scripts to validate your work. Just remove them before yielding.

-When you are running with approvals `on-request`, and sandboxing enabled, here are scenarios where you'll need to request approval:
- You need to run a command that writes to a directory that requires it (e.g. running tests that write to /tmp)
+When you are running with `approval_policy == on-request`, and sandboxing enabled, here are scenarios where you'll need to request approval:
+- You need to run a command that writes to a directory that requires it (e.g. running tests that write to /var)
 - You need to run a GUI app (e.g., open/xdg-open/osascript) to open browsers or files.
 - You are running sandboxed and need to run a command that requires network access (e.g. installing packages)
- If you run a command that is important to solving the user's query, but it fails because of sandboxing, rerun the command with approval.
+- If you run a command that is important to solving the user's query, but it fails because of sandboxing, rerun the command with approval. ALWAYS proceed to use the `with_escalated_permissions` and `justification` parameters - do not message the user before requesting approval for the command.
 - You are about to take a potentially destructive action such as an `rm` or `git reset` that the user did not explicitly ask for
 - (for all of these, you should weigh alternative paths that do not require approval)

-When sandboxing is set to read-only, you'll need to request approval for any command that isn't a read.
+When `sandbox_mode` is set to read-only, you'll need to request approval for any command that isn't a read.

 You will be told what filesystem sandboxing, network sandboxing, and approval mode are active in a developer or user message. If you are not told about this, assume that you are running with workspace-write, network sandboxing enabled, and approval on-failure.

+Although they introduce friction to the user because your work is paused until the user responds, you should leverage them when necessary to accomplish important work. If the completing the task requires escalated permissions, Do not let these settings or the sandbox deter you from attempting to accomplish the user's task unless it is set to "never", in which case never ask for approvals.
+
+When requesting approval to execute a command that will require escalated privileges:
+  - Provide the `with_escalated_permissions` parameter with the boolean value true
+  - Include a short, 1 sentence explanation for why you need to enable `with_escalated_permissions` in the justification parameter
+
 ## Special user requests

 - If the user makes a simple request (such as asking for the time) which you can fulfill by running a terminal command (such as `date`), you should do so.
--- a/codex-rs/core/src/bash.rs
+++ b/codex-rs/core/src/bash.rs
@@ -1,3 +1,4 @@
+use tree_sitter::Node;
 use tree_sitter::Parser;
 use tree_sitter::Tree;
 use tree_sitter_bash::LANGUAGE as BASH;
@@ -73,6 +74,9 @@ pub fn try_parse_word_only_commands_sequence(tree: &Tree, src: &str) -> Option<V
        }
    }

+    // Walk uses a stack (LIFO), so re-sort by position to restore source order.
+    command_nodes.sort_by_key(Node::start_byte);
+
    let mut commands = Vec::new();
    for node in command_nodes {
        if let Some(words) = parse_plain_command_from_node(node, src) {
@@ -150,10 +154,10 @@ mod tests {
        let src = "ls && pwd; echo 'hi there' | wc -l";
        let cmds = parse_seq(src).unwrap();
        let expected: Vec<Vec<String>> = vec![
-            vec!["wc".to_string(), "-l".to_string()],
-            vec!["echo".to_string(), "hi there".to_string()],
-            vec!["pwd".to_string()],
            vec!["ls".to_string()],
+            vec!["pwd".to_string()],
+            vec!["echo".to_string(), "hi there".to_string()],
+            vec!["wc".to_string(), "-l".to_string()],
        ];
        assert_eq!(cmds, expected);
    }
--- a/codex-rs/core/src/chat_completions.rs
+++ b/codex-rs/core/src/chat_completions.rs
@@ -21,6 +21,7 @@ use crate::client_common::ResponseEvent;
 use crate::client_common::ResponseStream;
 use crate::error::CodexErr;
 use crate::error::Result;
+use crate::error_codes::CONTEXT_LENGTH_EXCEEDED;
 use crate::model_family::ModelFamily;
 use crate::openai_tools::create_tools_json_for_chat_completions_api;
 use crate::util::backoff;
@@ -28,6 +29,19 @@ use codex_protocol::models::ContentItem;
 use codex_protocol::models::ReasoningItemContent;
 use codex_protocol::models::ResponseItem;

+// Minimal error body used to parse structured provider error codes on
+// Chat Completions non‑2xx responses.
+#[derive(serde::Deserialize)]
+struct ChatErrorBody {
+    error: ChatErrorInner,
+}
+
+#[derive(serde::Deserialize)]
+struct ChatErrorInner {
+    code: Option<String>,
+    message: Option<String>,
+}
+
 /// Implementation for the classic Chat Completions API.
 pub(crate) async fn stream_chat_completions(
    prompt: &Prompt,
@@ -309,6 +323,16 @@ pub(crate) async fn stream_chat_completions(
                let status = res.status();
                if !(status == StatusCode::TOO_MANY_REQUESTS || status.is_server_error()) {
                    let body = (res.text().await).unwrap_or_default();
+
+                    // Attempt to parse a structured error and map known codes.
+                    if let Ok(parsed) = serde_json::from_str::<ChatErrorBody>(&body)
+                        && parsed.error.code.as_deref() == Some(CONTEXT_LENGTH_EXCEEDED)
+                    {
+                        return Err(CodexErr::ContextLengthExceeded(
+                            parsed.error.message.unwrap_or_default(),
+                        ));
+                    }
+
                    return Err(CodexErr::UnexpectedStatus(status, body));
                }

@@ -462,7 +486,7 @@ async fn process_chat_sse<S>(
            if let Some(reasoning_val) = choice.get("delta").and_then(|d| d.get("reasoning")) {
                let mut maybe_text = reasoning_val
                    .as_str()
-                    .map(|s| s.to_string())
+                    .map(str::to_string)
                    .filter(|s| !s.is_empty());

                if maybe_text.is_none() && reasoning_val.is_object() {
@@ -716,6 +740,9 @@ where
                    // Not an assistant message – forward immediately.
                    return Poll::Ready(Some(Ok(ResponseEvent::OutputItemDone(item))));
                }
+                Poll::Ready(Some(Ok(ResponseEvent::RateLimits(snapshot)))) => {
+                    return Poll::Ready(Some(Ok(ResponseEvent::RateLimits(snapshot))));
+                }
                Poll::Ready(Some(Ok(ResponseEvent::Completed {
                    response_id,
                    token_usage,
--- a/codex-rs/core/src/client.rs
+++ b/codex-rs/core/src/client.rs
@@ -4,6 +4,8 @@ use std::sync::OnceLock;
 use std::time::Duration;

 use crate::AuthManager;
+use crate::auth::CodexAuth;
+use crate::error_codes::CONTEXT_LENGTH_EXCEEDED;
 use bytes::Bytes;
 use codex_protocol::mcp_protocol::AuthMode;
 use codex_protocol::mcp_protocol::ConversationId;
@@ -11,6 +13,7 @@ use eventsource_stream::Eventsource;
 use futures::prelude::*;
 use regex_lite::Regex;
 use reqwest::StatusCode;
+use reqwest::header::HeaderMap;
 use serde::Deserialize;
 use serde::Serialize;
 use serde_json::Value;
@@ -40,6 +43,7 @@ use crate::model_provider_info::ModelProviderInfo;
 use crate::model_provider_info::WireApi;
 use crate::openai_model_info::get_model_info;
 use crate::openai_tools::create_tools_json_for_responses_api;
+use crate::protocol::RateLimitSnapshotEvent;
 use crate::protocol::TokenUsage;
 use crate::token_data::PlanType;
 use crate::util::backoff;
@@ -274,6 +278,15 @@ impl ModelClient {
                Ok(resp) if resp.status().is_success() => {
                    let (tx_event, rx_event) = mpsc::channel::<Result<ResponseEvent>>(1600);

+                    if let Some(snapshot) = parse_rate_limit_snapshot(resp.headers())
+                        && tx_event
+                            .send(Ok(ResponseEvent::RateLimits(snapshot)))
+                            .await
+                            .is_err()
+                    {
+                        debug!("receiver dropped rate limit snapshot event");
+                    }
+
                    // spawn task to process SSE
                    let stream = resp.bytes_stream().map_err(CodexErr::Reqwest);
                    tokio::spawn(process_sse(
@@ -326,7 +339,7 @@ impl ModelClient {
                                // token.
                                let plan_type = error
                                    .plan_type
-                                    .or_else(|| auth.as_ref().and_then(|a| a.get_plan_type()));
+                                    .or_else(|| auth.as_ref().and_then(CodexAuth::get_plan_type));
                                let resets_in_seconds = error.resets_in_seconds;
                                return Err(CodexErr::UsageLimitReached(UsageLimitReachedError {
                                    plan_type,
@@ -473,6 +486,38 @@ fn attach_item_ids(payload_json: &mut Value, original_items: &[ResponseItem]) {
    }
 }

+fn parse_rate_limit_snapshot(headers: &HeaderMap) -> Option<RateLimitSnapshotEvent> {
+    let primary_used_percent = parse_header_f64(headers, "x-codex-primary-used-percent")?;
+    let secondary_used_percent = parse_header_f64(headers, "x-codex-secondary-used-percent")?;
+    let primary_to_secondary_ratio_percent =
+        parse_header_f64(headers, "x-codex-primary-over-secondary-limit-percent")?;
+    let primary_window_minutes = parse_header_u64(headers, "x-codex-primary-window-minutes")?;
+    let secondary_window_minutes = parse_header_u64(headers, "x-codex-secondary-window-minutes")?;
+
+    Some(RateLimitSnapshotEvent {
+        primary_used_percent,
+        secondary_used_percent,
+        primary_to_secondary_ratio_percent,
+        primary_window_minutes,
+        secondary_window_minutes,
+    })
+}
+
+fn parse_header_f64(headers: &HeaderMap, name: &str) -> Option<f64> {
+    parse_header_str(headers, name)?
+        .parse::<f64>()
+        .ok()
+        .filter(|v| v.is_finite())
+}
+
+fn parse_header_u64(headers: &HeaderMap, name: &str) -> Option<u64> {
+    parse_header_str(headers, name)?.parse::<u64>().ok()
+}
+
+fn parse_header_str<'a>(headers: &'a HeaderMap, name: &str) -> Option<&'a str> {
+    headers.get(name)?.to_str().ok()
+}
+
 async fn process_sse<S>(
    stream: S,
    tx_event: mpsc::Sender<Result<ResponseEvent>>,
@@ -615,7 +660,11 @@ async fn process_sse<S>(
                            Ok(error) => {
                                let delay = try_parse_retry_after(&error);
                                let message = error.message.unwrap_or_default();
-                                response_error = Some(CodexErr::Stream(message, delay));
+                                if error.code.as_deref() == Some(CONTEXT_LENGTH_EXCEEDED) {
+                                    response_error = Some(CodexErr::ContextLengthExceeded(message));
+                                } else {
+                                    response_error = Some(CodexErr::Stream(message, delay));
+                                }
                            }
                            Err(e) => {
                                debug!("failed to parse ErrorResponse: {e}");
--- a/codex-rs/core/src/client_common.rs
+++ b/codex-rs/core/src/client_common.rs
@@ -1,6 +1,7 @@
 use crate::error::Result;
 use crate::model_family::ModelFamily;
 use crate::openai_tools::OpenAiTool;
+use crate::protocol::RateLimitSnapshotEvent;
 use crate::protocol::TokenUsage;
 use codex_apply_patch::APPLY_PATCH_TOOL_INSTRUCTIONS;
 use codex_protocol::config_types::ReasoningEffort as ReasoningEffortConfig;
@@ -34,13 +35,11 @@ pub struct Prompt {
 }

 impl Prompt {
-    pub(crate) fn get_full_instructions(&self, model: &ModelFamily) -> Cow<'_, str> {
+    pub(crate) fn get_full_instructions<'a>(&'a self, model: &'a ModelFamily) -> Cow<'a, str> {
        let base = self
            .base_instructions_override
            .as_deref()
            .unwrap_or(model.base_instructions.deref());
-        let mut sections: Vec<&str> = vec![base];
-
        // When there are no custom instructions, add apply_patch_tool_instructions if:
        // - the model needs special instructions (4.1)
        // AND
@@ -54,9 +53,10 @@ impl Prompt {
            && model.needs_special_apply_patch_instructions
            && !is_apply_patch_tool_present
        {
-            sections.push(APPLY_PATCH_TOOL_INSTRUCTIONS);
+            Cow::Owned(format!("{base}\n{APPLY_PATCH_TOOL_INSTRUCTIONS}"))
+        } else {
+            Cow::Borrowed(base)
        }
-        Cow::Owned(sections.join("\n"))
    }

    pub(crate) fn get_formatted_input(&self) -> Vec<ResponseItem> {
@@ -79,6 +79,7 @@ pub enum ResponseEvent {
    WebSearchCallBegin {
        call_id: String,
    },
+    RateLimits(RateLimitSnapshotEvent),
 }

 #[derive(Debug, Serialize)]
--- a/codex-rs/core/src/codex.rs
+++ b/codex-rs/core/src/codex.rs
@@ -1,10 +1,9 @@
 use std::borrow::Cow;
 use std::collections::HashMap;
 use std::collections::HashSet;
+use std::path::Path;
 use std::path::PathBuf;
 use std::sync::Arc;
-use std::sync::Mutex;
-use std::sync::MutexGuard;
 use std::sync::atomic::AtomicU64;
 use std::time::Duration;

@@ -31,6 +30,7 @@ use mcp_types::CallToolResult;
 use serde::Deserialize;
 use serde::Serialize;
 use serde_json;
+use tokio::sync::Mutex;
 use tokio::sync::oneshot;
 use tokio::task::AbortHandle;
 use tracing::debug;
@@ -86,6 +86,7 @@ use crate::protocol::AgentReasoningSectionBreakEvent;
 use crate::protocol::ApplyPatchApprovalRequestEvent;
 use crate::protocol::AskForApproval;
 use crate::protocol::BackgroundEventEvent;
+use crate::protocol::CompactApprovalRequestEvent;
 use crate::protocol::ErrorEvent;
 use crate::protocol::Event;
 use crate::protocol::EventMsg;
@@ -98,6 +99,7 @@ use crate::protocol::ListCustomPromptsResponseEvent;
 use crate::protocol::Op;
 use crate::protocol::PatchApplyBeginEvent;
 use crate::protocol::PatchApplyEndEvent;
+use crate::protocol::RateLimitSnapshotEvent;
 use crate::protocol::ReviewDecision;
 use crate::protocol::ReviewOutputEvent;
 use crate::protocol::SandboxPolicy;
@@ -105,6 +107,7 @@ use crate::protocol::SessionConfiguredEvent;
 use crate::protocol::StreamErrorEvent;
 use crate::protocol::Submission;
 use crate::protocol::TaskCompleteEvent;
+use crate::protocol::TokenCountEvent;
 use crate::protocol::TokenUsage;
 use crate::protocol::TokenUsageInfo;
 use crate::protocol::TurnDiffEvent;
@@ -131,25 +134,10 @@ use codex_protocol::models::ResponseItem;
 use codex_protocol::models::ShellToolCallParams;
 use codex_protocol::protocol::InitialHistory;

-mod compact;
+pub mod compact;
 use self::compact::build_compacted_history;
 use self::compact::collect_user_messages;

-// A convenience extension trait for acquiring mutex locks where poisoning is
-// unrecoverable and should abort the program. This avoids scattered `.unwrap()`
-// calls on `lock()` while still surfacing a clear panic message when a lock is
-// poisoned.
-trait MutexExt<T> {
-    fn lock_unchecked(&self) -> MutexGuard<'_, T>;
-}
-
-impl<T> MutexExt<T> for Mutex<T> {
-    fn lock_unchecked(&self) -> MutexGuard<'_, T> {
-        #[expect(clippy::expect_used)]
-        self.lock().expect("poisoned lock")
-    }
-}
-
 /// The high-level interface to the Codex system.
 /// It operates as a queue pair where you send submissions and receive events.
 pub struct Codex {
@@ -272,7 +260,7 @@ struct State {
    pending_input: Vec<ResponseInputItem>,
    history: ConversationHistory,
    token_info: Option<TokenUsageInfo>,
-    next_internal_sub_id: u64,
+    latest_rate_limits: Option<RateLimitSnapshotEvent>,
 }

 /// Context for an initialized model agent
@@ -298,6 +286,7 @@ pub(crate) struct Session {
    codex_linux_sandbox_exe: Option<PathBuf>,
    user_shell: shell::Shell,
    show_raw_agent_reasoning: bool,
+    next_internal_sub_id: AtomicU64,
 }

 /// The context needed for a single turn of the conversation.
@@ -471,8 +460,6 @@ impl Session {
            client,
            tools_config: ToolsConfig::new(&ToolsConfigParams {
                model_family: &config.model_family,
-                approval_policy,
-                sandbox_policy: sandbox_policy.clone(),
                include_plan_tool: config.include_plan_tool,
                include_apply_patch_tool: config.include_apply_patch_tool,
                include_web_search_request: config.tools_web_search_request,
@@ -500,6 +487,7 @@ impl Session {
            codex_linux_sandbox_exe: config.codex_linux_sandbox_exe.clone(),
            user_shell: default_shell,
            show_raw_agent_reasoning: config.show_raw_agent_reasoning,
+            next_internal_sub_id: AtomicU64::new(0),
        });

        // Dispatch the SessionConfiguredEvent first and then report any errors.
@@ -528,16 +516,16 @@ impl Session {
        Ok((sess, turn_context))
    }

-    pub fn set_task(&self, task: AgentTask) {
-        let mut state = self.state.lock_unchecked();
+    pub async fn set_task(&self, task: AgentTask) {
+        let mut state = self.state.lock().await;
        if let Some(current_task) = state.current_task.take() {
            current_task.abort(TurnAbortReason::Replaced);
        }
        state.current_task = Some(task);
    }

-    pub fn remove_task(&self, sub_id: &str) {
-        let mut state = self.state.lock_unchecked();
+    pub async fn remove_task(&self, sub_id: &str) {
+        let mut state = self.state.lock().await;
        if let Some(task) = &state.current_task
            && task.sub_id == sub_id
        {
@@ -546,9 +534,9 @@ impl Session {
    }

    fn next_internal_sub_id(&self) -> String {
-        let mut state = self.state.lock_unchecked();
-        let id = state.next_internal_sub_id;
-        state.next_internal_sub_id += 1;
+        let id = self
+            .next_internal_sub_id
+            .fetch_add(1, std::sync::atomic::Ordering::SeqCst);
        format!("auto-compact-{id}")
    }

@@ -571,7 +559,7 @@ impl Session {
                let reconstructed_history =
                    self.reconstruct_history_from_rollout(turn_context, &rollout_items);
                if !reconstructed_history.is_empty() {
-                    self.record_into_history(&reconstructed_history);
+                    self.record_into_history(&reconstructed_history).await;
                }

                // If persisting, persist all rollout items as-is (recorder filters)
@@ -604,7 +592,7 @@ impl Session {
        let (tx_approve, rx_approve) = oneshot::channel();
        let event_id = sub_id.clone();
        let prev_entry = {
-            let mut state = self.state.lock_unchecked();
+            let mut state = self.state.lock().await;
            state.pending_approvals.insert(sub_id, tx_approve)
        };
        if prev_entry.is_some() {
@@ -636,7 +624,7 @@ impl Session {
        let (tx_approve, rx_approve) = oneshot::channel();
        let event_id = sub_id.clone();
        let prev_entry = {
-            let mut state = self.state.lock_unchecked();
+            let mut state = self.state.lock().await;
            state.pending_approvals.insert(sub_id, tx_approve)
        };
        if prev_entry.is_some() {
@@ -656,9 +644,9 @@ impl Session {
        rx_approve
    }

-    pub fn notify_approval(&self, sub_id: &str, decision: ReviewDecision) {
+    pub async fn notify_approval(&self, sub_id: &str, decision: ReviewDecision) {
        let entry = {
-            let mut state = self.state.lock_unchecked();
+            let mut state = self.state.lock().await;
            state.pending_approvals.remove(sub_id)
        };
        match entry {
@@ -671,15 +659,15 @@ impl Session {
        }
    }

-    pub fn add_approved_command(&self, cmd: Vec<String>) {
-        let mut state = self.state.lock_unchecked();
+    pub async fn add_approved_command(&self, cmd: Vec<String>) {
+        let mut state = self.state.lock().await;
        state.approved_commands.insert(cmd);
    }

    /// Records input items: always append to conversation history and
    /// persist these response items to rollout.
    async fn record_conversation_items(&self, items: &[ResponseItem]) {
-        self.record_into_history(items);
+        self.record_into_history(items).await;
        self.persist_rollout_response_items(items).await;
    }

@@ -711,11 +699,9 @@ impl Session {
    }

    /// Append ResponseItems to the in-memory conversation history only.
-    fn record_into_history(&self, items: &[ResponseItem]) {
-        self.state
-            .lock_unchecked()
-            .history
-            .record_items(items.iter());
+    async fn record_into_history(&self, items: &[ResponseItem]) {
+        let mut state = self.state.lock().await;
+        state.history.record_items(items.iter());
    }

    async fn persist_rollout_response_items(&self, items: &[ResponseItem]) {
@@ -727,7 +713,7 @@ impl Session {
        self.persist_rollout_items(&rollout_items).await;
    }

-    fn build_initial_context(&self, turn_context: &TurnContext) -> Vec<ResponseItem> {
+    pub(crate) fn build_initial_context(&self, turn_context: &TurnContext) -> Vec<ResponseItem> {
        let mut items = Vec::<ResponseItem>::with_capacity(2);
        if let Some(user_instructions) = turn_context.user_instructions.as_deref() {
            items.push(UserInstructions::new(user_instructions.to_string()).into());
@@ -743,8 +729,8 @@ impl Session {

    async fn persist_rollout_items(&self, items: &[RolloutItem]) {
        let recorder = {
-            let guard = self.rollout.lock_unchecked();
-            guard.as_ref().cloned()
+            let guard = self.rollout.lock().await;
+            guard.clone()
        };
        if let Some(rec) = recorder
            && let Err(e) = rec.record_items(items).await
@@ -753,19 +739,33 @@ impl Session {
        }
    }

-    fn update_token_usage_info(
+    async fn update_token_usage_info(
        &self,
        turn_context: &TurnContext,
-        token_usage: &Option<TokenUsage>,
-    ) -> Option<TokenUsageInfo> {
-        let mut state = self.state.lock_unchecked();
-        let info = TokenUsageInfo::new_or_append(
-            &state.token_info,
-            token_usage,
-            turn_context.client.get_model_context_window(),
-        );
-        state.token_info = info.clone();
-        info
+        token_usage: Option<&TokenUsage>,
+    ) {
+        let mut state = self.state.lock().await;
+        if let Some(token_usage) = token_usage {
+            let info = TokenUsageInfo::new_or_append(
+                &state.token_info,
+                &Some(token_usage.clone()),
+                turn_context.client.get_model_context_window(),
+            );
+            state.token_info = info;
+        }
+    }
+
+    async fn update_rate_limits(&self, new_rate_limits: RateLimitSnapshotEvent) {
+        let mut state = self.state.lock().await;
+        state.latest_rate_limits = Some(new_rate_limits);
+    }
+
+    async fn get_token_count_event(&self) -> TokenCountEvent {
+        let state = self.state.lock().await;
+        TokenCountEvent {
+            info: state.token_info.clone(),
+            rate_limits: state.latest_rate_limits.clone(),
+        }
    }

    /// Record a user input item to conversation history and also persist a
@@ -915,6 +915,7 @@ impl Session {
            exec_args.params,
            exec_args.sandbox_type,
            exec_args.sandbox_policy,
+            exec_args.sandbox_cwd,
            exec_args.codex_linux_sandbox_exe,
            exec_args.stdout_stream,
        )
@@ -973,13 +974,17 @@ impl Session {

    /// Build the full turn input by concatenating the current conversation
    /// history with additional items for this turn.
-    pub fn turn_input_with_history(&self, extra: Vec<ResponseItem>) -> Vec<ResponseItem> {
-        [self.state.lock_unchecked().history.contents(), extra].concat()
+    pub async fn turn_input_with_history(&self, extra: Vec<ResponseItem>) -> Vec<ResponseItem> {
+        let history = {
+            let state = self.state.lock().await;
+            state.history.contents()
+        };
+        [history, extra].concat()
    }

    /// Returns the input if there was no task running to inject into
-    pub fn inject_input(&self, input: Vec<InputItem>) -> Result<(), Vec<InputItem>> {
-        let mut state = self.state.lock_unchecked();
+    pub async fn inject_input(&self, input: Vec<InputItem>) -> Result<(), Vec<InputItem>> {
+        let mut state = self.state.lock().await;
        if state.current_task.is_some() {
            state.pending_input.push(input.into());
            Ok(())
@@ -988,8 +993,8 @@ impl Session {
        }
    }

-    pub fn get_pending_input(&self) -> Vec<ResponseInputItem> {
-        let mut state = self.state.lock_unchecked();
+    pub async fn get_pending_input(&self) -> Vec<ResponseInputItem> {
+        let mut state = self.state.lock().await;
        if state.pending_input.is_empty() {
            Vec::with_capacity(0)
        } else {
@@ -1004,16 +1009,15 @@ impl Session {
        server: &str,
        tool: &str,
        arguments: Option<serde_json::Value>,
-        timeout: Option<Duration>,
    ) -> anyhow::Result<CallToolResult> {
        self.mcp_connection_manager
-            .call_tool(server, tool, arguments, timeout)
+            .call_tool(server, tool, arguments)
            .await
    }

-    fn interrupt_task(&self) {
+    pub async fn interrupt_task(&self) {
        info!("interrupt received: abort current task, if any");
-        let mut state = self.state.lock_unchecked();
+        let mut state = self.state.lock().await;
        state.pending_approvals.clear();
        state.pending_input.clear();
        if let Some(task) = state.current_task.take() {
@@ -1021,6 +1025,16 @@ impl Session {
        }
    }

+    fn interrupt_task_sync(&self) {
+        if let Ok(mut state) = self.state.try_lock() {
+            state.pending_approvals.clear();
+            state.pending_input.clear();
+            if let Some(task) = state.current_task.take() {
+                task.abort(TurnAbortReason::Interrupted);
+            }
+        }
+    }
+
    /// Spawn the configured notifier (if any) with the given JSON payload as
    /// the last argument. Failures are logged but otherwise ignored so that
    /// notification issues do not interfere with the main workflow.
@@ -1053,7 +1067,7 @@ impl Session {

 impl Drop for Session {
    fn drop(&mut self) {
-        self.interrupt_task();
+        self.interrupt_task_sync();
    }
 }

@@ -1184,7 +1198,7 @@ async fn submission_loop(
        debug!(?sub, "Submission");
        match sub.op {
            Op::Interrupt => {
-                sess.interrupt_task();
+                sess.interrupt_task().await;
            }
            Op::OverrideTurnContext {
                cwd,
@@ -1238,8 +1252,6 @@ async fn submission_loop(

                let tools_config = ToolsConfig::new(&ToolsConfigParams {
                    model_family: &effective_family,
-                    approval_policy: new_approval_policy,
-                    sandbox_policy: new_sandbox_policy.clone(),
                    include_plan_tool: config.include_plan_tool,
                    include_apply_patch_tool: config.include_apply_patch_tool,
                    include_web_search_request: config.tools_web_search_request,
@@ -1277,11 +1289,11 @@ async fn submission_loop(
            }
            Op::UserInput { items } => {
                // attempt to inject input into current task
-                if let Err(items) = sess.inject_input(items) {
+                if let Err(items) = sess.inject_input(items).await {
                    // no current task, spawn a new one
                    let task =
                        AgentTask::spawn(sess.clone(), Arc::clone(&turn_context), sub.id, items);
-                    sess.set_task(task);
+                    sess.set_task(task).await;
                }
            }
            Op::UserTurn {
@@ -1294,7 +1306,7 @@ async fn submission_loop(
                summary,
            } => {
                // attempt to inject input into current task
-                if let Err(items) = sess.inject_input(items) {
+                if let Err(items) = sess.inject_input(items).await {
                    // Derive a fresh TurnContext for this turn using the provided overrides.
                    let provider = turn_context.client.get_provider();
                    let auth_manager = turn_context.client.get_auth_manager();
@@ -1326,8 +1338,6 @@ async fn submission_loop(
                        client,
                        tools_config: ToolsConfig::new(&ToolsConfigParams {
                            model_family: &model_family,
-                            approval_policy,
-                            sandbox_policy: sandbox_policy.clone(),
                            include_plan_tool: config.include_plan_tool,
                            include_apply_patch_tool: config.include_apply_patch_tool,
                            include_web_search_request: config.tools_web_search_request,
@@ -1360,20 +1370,20 @@ async fn submission_loop(
                    // no current task, spawn a new one with the per‑turn context
                    let task =
                        AgentTask::spawn(sess.clone(), Arc::clone(&turn_context), sub.id, items);
-                    sess.set_task(task);
+                    sess.set_task(task).await;
                }
            }
            Op::ExecApproval { id, decision } => match decision {
                ReviewDecision::Abort => {
-                    sess.interrupt_task();
+                    sess.interrupt_task().await;
                }
-                other => sess.notify_approval(&id, other),
+                other => sess.notify_approval(&id, other).await,
            },
            Op::PatchApproval { id, decision } => match decision {
                ReviewDecision::Abort => {
-                    sess.interrupt_task();
+                    sess.interrupt_task().await;
                }
-                other => sess.notify_approval(&id, other),
+                other => sess.notify_approval(&id, other).await,
            },
            Op::AddToHistory { text } => {
                let id = sess.conversation_id;
@@ -1452,15 +1462,19 @@ async fn submission_loop(
            }
            Op::Compact => {
                // Attempt to inject input into current task
-                if let Err(items) = sess.inject_input(vec![InputItem::Text {
-                    text: compact::COMPACT_TRIGGER_TEXT.to_string(),
-                }]) {
+                if let Err(items) = sess
+                    .inject_input(vec![InputItem::Text {
+                        text: compact::COMPACT_TRIGGER_TEXT.to_string(),
+                    }])
+                    .await
+                {
                    compact::spawn_compact_task(
                        sess.clone(),
                        Arc::clone(&turn_context),
                        sub.id,
                        items,
-                    );
+                    )
+                    .await;
                }
            }
            Op::Shutdown => {
@@ -1468,7 +1482,10 @@ async fn submission_loop(

                // Gracefully flush and shutdown rollout recorder on session end so tests
                // that inspect the rollout file do not race with the background writer.
-                let recorder_opt = sess.rollout.lock_unchecked().take();
+                let recorder_opt = {
+                    let mut guard = sess.rollout.lock().await;
+                    guard.take()
+                };
                if let Some(rec) = recorder_opt
                    && let Err(e) = rec.shutdown().await
                {
@@ -1493,7 +1510,7 @@ async fn submission_loop(
                let sub_id = sub.id.clone();
                // Flush rollout writes before returning the path so readers observe a consistent file.
                let (path, rec_opt) = {
-                    let guard = sess.rollout.lock_unchecked();
+                    let guard = sess.rollout.lock().await;
                    match guard.as_ref() {
                        Some(rec) => (rec.get_rollout_path(), Some(rec.clone())),
                        None => {
@@ -1547,8 +1564,6 @@ async fn spawn_review_thread(
        .unwrap_or_else(|| parent_turn_context.client.get_model_family());
    let tools_config = ToolsConfig::new(&ToolsConfigParams {
        model_family: &review_model_family,
-        approval_policy: parent_turn_context.approval_policy,
-        sandbox_policy: parent_turn_context.sandbox_policy.clone(),
        include_plan_tool: false,
        include_apply_patch_tool: config.include_apply_patch_tool,
        include_web_search_request: false,
@@ -1604,7 +1619,7 @@ async fn spawn_review_thread(
    // Clone sub_id for the upcoming announcement before moving it into the task.
    let sub_id_for_event = sub_id.clone();
    let task = AgentTask::review(sess.clone(), tc.clone(), sub_id, input);
-    sess.set_task(task);
+    sess.set_task(task).await;

    // Announce entering review mode so UIs can switch modes.
    sess.send_event(Event {
@@ -1675,6 +1690,7 @@ async fn run_task(
        // may support this, the model might not.
        let pending_input = sess
            .get_pending_input()
+            .await
            .into_iter()
            .map(ResponseItem::from)
            .collect::<Vec<ResponseItem>>();
@@ -1696,7 +1712,7 @@ async fn run_task(
            review_thread_history.clone()
        } else {
            sess.record_conversation_items(&pending_input).await;
-            sess.turn_input_with_history(pending_input)
+            sess.turn_input_with_history(pending_input).await
        };

        let turn_input_messages: Vec<String> = turn_input
@@ -1732,7 +1748,7 @@ async fn run_task(
                    .unwrap_or(i64::MAX);
                let total_usage_tokens = total_token_usage
                    .as_ref()
-                    .map(|usage| usage.tokens_in_context_window());
+                    .map(TokenUsage::tokens_in_context_window);
                let token_limit_reached = total_usage_tokens
                    .map(|tokens| (tokens as i64) >= limit)
                    .unwrap_or(false);
@@ -1908,7 +1924,7 @@ async fn run_task(
        .await;
    }

-    sess.remove_task(&sub_id);
+    sess.remove_task(&sub_id).await;
    let event = Event {
        id: sub_id,
        msg: EventMsg::TaskComplete(TaskCompleteEvent { last_agent_message }),
@@ -1969,6 +1985,19 @@ async fn run_turn(
                return Err(e);
            }
            Err(e) => {
+                // If we hit a context/window limit error, ask the UI to
+                // offer a compact confirmation and stop retrying this turn.
+                if matches!(e, CodexErr::ContextLengthExceeded(_)) {
+                    let event = Event {
+                        id: sub_id.clone(),
+                        msg: EventMsg::CompactApprovalRequest(CompactApprovalRequestEvent {
+                            reason: "The chat has exceeded its limits. To continue, you need to compact the chat. Confirm running compact?".to_string(),
+                        }),
+                    };
+                    sess.send_event(event).await;
+                    // Non-transient – do not retry this turn further; let UI prompt.
+                    return Err(e);
+                }
                // Use the configured provider-specific stream retry budget.
                let max_retries = turn_context.client.get_provider().stream_max_retries();
                if retries < max_retries {
@@ -2137,15 +2166,22 @@ async fn try_run_turn(
                    })
                    .await;
            }
+            ResponseEvent::RateLimits(snapshot) => {
+                // Update internal state with latest rate limits, but defer sending until
+                // token usage is available to avoid duplicate TokenCount events.
+                sess.update_rate_limits(snapshot).await;
+            }
            ResponseEvent::Completed {
                response_id: _,
                token_usage,
            } => {
-                let info = sess.update_token_usage_info(turn_context, &token_usage);
+                sess.update_token_usage_info(turn_context, token_usage.as_ref())
+                    .await;
+                let token_event = sess.get_token_count_event().await;
                let _ = sess
                    .send_event(Event {
                        id: sub_id.to_string(),
-                        msg: EventMsg::TokenCount(crate::protocol::TokenCountEvent { info }),
+                        msg: EventMsg::TokenCount(token_event),
                    })
                    .await;

@@ -2475,7 +2511,10 @@ async fn handle_function_call(
                }
            };
            let abs = turn_context.resolve_path(Some(args.path));
-            let output = match sess.inject_input(vec![InputItem::LocalImage { path: abs }]) {
+            let output = match sess
+                .inject_input(vec![InputItem::LocalImage { path: abs }])
+                .await
+            {
                Ok(()) => FunctionCallOutputPayload {
                    content: "attached local image path".to_string(),
                    success: Some(true),
@@ -2570,12 +2609,7 @@ async fn handle_function_call(
        _ => {
            match sess.mcp_connection_manager.parse_tool_name(&name) {
                Some((server, tool_name)) => {
-                    // TODO(mbolin): Determine appropriate timeout for tool call.
-                    let timeout = None;
-                    handle_mcp_tool_call(
-                        sess, &sub_id, call_id, server, tool_name, arguments, timeout,
-                    )
-                    .await
+                    handle_mcp_tool_call(sess, &sub_id, call_id, server, tool_name, arguments).await
                }
                None => {
                    // Unknown function: reply with structured failure so the model can adapt.
@@ -2681,6 +2715,7 @@ pub struct ExecInvokeArgs<'a> {
    pub params: ExecParams,
    pub sandbox_type: SandboxType,
    pub sandbox_policy: &'a SandboxPolicy,
+    pub sandbox_cwd: &'a Path,
    pub codex_linux_sandbox_exe: &'a Option<PathBuf>,
    pub stdout_stream: Option<StdoutStream>,
 }
@@ -2711,6 +2746,21 @@ async fn handle_container_exec_with_params(
    sub_id: String,
    call_id: String,
 ) -> ResponseInputItem {
+    if params.with_escalated_permissions.unwrap_or(false)
+        && !matches!(turn_context.approval_policy, AskForApproval::OnRequest)
+    {
+        return ResponseInputItem::FunctionCallOutput {
+            call_id,
+            output: FunctionCallOutputPayload {
+                content: format!(
+                    "approval policy is {policy:?}; reject command — you should not ask for escalated permissions if the approval policy is {policy:?}",
+                    policy = turn_context.approval_policy
+                ),
+                success: None,
+            },
+        };
+    }
+
    // check if this was a patch, and apply it if so
    let apply_patch_exec = match maybe_parse_apply_patch_verified(&params.command, &params.cwd) {
        MaybeApplyPatchVerified::Body(changes) => {
@@ -2789,7 +2839,7 @@ async fn handle_container_exec_with_params(
        }
        None => {
            let safety = {
-                let state = sess.state.lock_unchecked();
+                let state = sess.state.lock().await;
                assess_command_safety(
                    &params.command,
                    turn_context.approval_policy,
@@ -2818,7 +2868,7 @@ async fn handle_container_exec_with_params(
            match rx_approve.await.unwrap_or_default() {
                ReviewDecision::Approved => (),
                ReviewDecision::ApprovedForSession => {
-                    sess.add_approved_command(params.command.clone());
+                    sess.add_approved_command(params.command.clone()).await;
                }
                ReviewDecision::Denied | ReviewDecision::Abort => {
                    return ResponseInputItem::FunctionCallOutput {
@@ -2872,6 +2922,7 @@ async fn handle_container_exec_with_params(
                params: params.clone(),
                sandbox_type,
                sandbox_policy: &turn_context.sandbox_policy,
+                sandbox_cwd: &turn_context.cwd,
                codex_linux_sandbox_exe: &sess.codex_linux_sandbox_exe,
                stdout_stream: if exec_command_context.apply_patch.is_some() {
                    None
@@ -2991,7 +3042,7 @@ async fn handle_sandbox_error(
            // remainder of the session so future
            // executions skip the sandbox directly.
            // TODO(ragona): Isn't this a bug? It always saves the command in an | fork?
-            sess.add_approved_command(params.command.clone());
+            sess.add_approved_command(params.command.clone()).await;
            // Inform UI we are retrying without sandbox.
            sess.notify_background_event(&sub_id, "retrying command without sandbox")
                .await;
@@ -3006,6 +3057,7 @@ async fn handle_sandbox_error(
                        params,
                        sandbox_type: SandboxType::None,
                        sandbox_policy: &turn_context.sandbox_policy,
+                        sandbox_cwd: &turn_context.cwd,
                        codex_linux_sandbox_exe: &sess.codex_linux_sandbox_exe,
                        stdout_stream: if exec_command_context.apply_patch.is_some() {
                            None
@@ -3293,7 +3345,7 @@ async fn exit_review_mode(
  <results>
  {findings_str}
  </results>
-</user_tool>
+</user_action>
 "#));
    } else {
        user_message.push_str(r#"<user_action>
@@ -3302,7 +3354,7 @@ async fn exit_review_mode(
  <results>
  None.
  </results>
-</user_tool>
+</user_action>
 "#);
    }

@@ -3315,6 +3367,9 @@ async fn exit_review_mode(
        .await;
 }

+#[cfg(test)]
+pub(crate) use tests::make_session_and_context;
+
 #[cfg(test)]
 mod tests {
    use super::*;
@@ -3327,6 +3382,7 @@ mod tests {
    use mcp_types::ContentBlock;
    use mcp_types::TextContent;
    use pretty_assertions::assert_eq;
+    use serde::Deserialize;
    use serde_json::json;
    use std::path::PathBuf;
    use std::sync::Arc;
@@ -3356,7 +3412,7 @@ mod tests {
            }),
        ));

-        let actual = session.state.lock_unchecked().history.contents();
+        let actual = tokio_test::block_on(async { session.state.lock().await.history.contents() });
        assert_eq!(expected, actual);
    }

@@ -3369,7 +3425,7 @@ mod tests {
            session.record_initial_history(&turn_context, InitialHistory::Forked(rollout_items)),
        );

-        let actual = session.state.lock_unchecked().history.contents();
+        let actual = tokio_test::block_on(async { session.state.lock().await.history.contents() });
        assert_eq!(expected, actual);
    }

@@ -3555,7 +3611,7 @@ mod tests {
        })
    }

-    fn make_session_and_context() -> (Session, TurnContext) {
+    pub(crate) fn make_session_and_context() -> (Session, TurnContext) {
        let (tx_event, _rx_event) = async_channel::unbounded();
        let codex_home = tempfile::tempdir().expect("create temp dir");
        let config = Config::load_from_base_config_with_overrides(
@@ -3576,8 +3632,6 @@ mod tests {
        );
        let tools_config = ToolsConfig::new(&ToolsConfigParams {
            model_family: &config.model_family,
-            approval_policy: config.approval_policy,
-            sandbox_policy: config.sandbox_policy.clone(),
            include_plan_tool: config.include_plan_tool,
            include_apply_patch_tool: config.include_apply_patch_tool,
            include_web_search_request: config.tools_web_search_request,
@@ -3611,6 +3665,7 @@ mod tests {
            codex_linux_sandbox_exe: None,
            user_shell: shell::Shell::Unknown,
            show_raw_agent_reasoning: config.show_raw_agent_reasoning,
+            next_internal_sub_id: AtomicU64::new(0),
        };
        (session, turn_context)
    }
@@ -3716,4 +3771,105 @@ mod tests {

        (rollout_items, live_history.contents())
    }
+
+    #[tokio::test]
+    async fn rejects_escalated_permissions_when_policy_not_on_request() {
+        use crate::exec::ExecParams;
+        use crate::protocol::AskForApproval;
+        use crate::protocol::SandboxPolicy;
+        use crate::turn_diff_tracker::TurnDiffTracker;
+        use std::collections::HashMap;
+
+        let (session, mut turn_context) = make_session_and_context();
+        // Ensure policy is NOT OnRequest so the early rejection path triggers
+        turn_context.approval_policy = AskForApproval::OnFailure;
+
+        let params = ExecParams {
+            command: if cfg!(windows) {
+                vec![
+                    "cmd.exe".to_string(),
+                    "/C".to_string(),
+                    "echo hi".to_string(),
+                ]
+            } else {
+                vec![
+                    "/bin/sh".to_string(),
+                    "-c".to_string(),
+                    "echo hi".to_string(),
+                ]
+            },
+            cwd: turn_context.cwd.clone(),
+            timeout_ms: Some(1000),
+            env: HashMap::new(),
+            with_escalated_permissions: Some(true),
+            justification: Some("test".to_string()),
+        };
+
+        let params2 = ExecParams {
+            with_escalated_permissions: Some(false),
+            ..params.clone()
+        };
+
+        let mut turn_diff_tracker = TurnDiffTracker::new();
+
+        let sub_id = "test-sub".to_string();
+        let call_id = "test-call".to_string();
+
+        let resp = handle_container_exec_with_params(
+            params,
+            &session,
+            &turn_context,
+            &mut turn_diff_tracker,
+            sub_id,
+            call_id,
+        )
+        .await;
+
+        let ResponseInputItem::FunctionCallOutput { output, .. } = resp else {
+            panic!("expected FunctionCallOutput");
+        };
+
+        let expected = format!(
+            "approval policy is {policy:?}; reject command — you should not ask for escalated permissions if the approval policy is {policy:?}",
+            policy = turn_context.approval_policy
+        );
+
+        pretty_assertions::assert_eq!(output.content, expected);
+
+        // Now retry the same command WITHOUT escalated permissions; should succeed.
+        // Force DangerFullAccess to avoid platform sandbox dependencies in tests.
+        turn_context.sandbox_policy = SandboxPolicy::DangerFullAccess;
+
+        let resp2 = handle_container_exec_with_params(
+            params2,
+            &session,
+            &turn_context,
+            &mut turn_diff_tracker,
+            "test-sub".to_string(),
+            "test-call-2".to_string(),
+        )
+        .await;
+
+        let ResponseInputItem::FunctionCallOutput { output, .. } = resp2 else {
+            panic!("expected FunctionCallOutput on retry");
+        };
+
+        #[derive(Deserialize, PartialEq, Eq, Debug)]
+        struct ResponseExecMetadata {
+            exit_code: i32,
+        }
+
+        #[derive(Deserialize)]
+        struct ResponseExecOutput {
+            output: String,
+            metadata: ResponseExecMetadata,
+        }
+
+        let exec_output: ResponseExecOutput =
+            serde_json::from_str(&output.content).expect("valid exec output json");
+
+        pretty_assertions::assert_eq!(exec_output.metadata, ResponseExecMetadata { exit_code: 0 });
+        assert!(exec_output.output.contains("hi"));
+        pretty_assertions::assert_eq!(output.success, Some(true));
+    }
 }
--- a/codex-rs/core/src/codex/compact.rs
+++ b/codex-rs/core/src/codex/compact.rs
@@ -1,7 +1,6 @@
 use std::sync::Arc;

 use super::AgentTask;
-use super::MutexExt;
 use super::Session;
 use super::TurnContext;
 use super::get_last_assistant_message_from_turn;
@@ -19,6 +18,7 @@ use crate::protocol::InputMessageKind;
 use crate::protocol::TaskCompleteEvent;
 use crate::protocol::TaskStartedEvent;
 use crate::protocol::TurnContextItem;
+use crate::truncate::truncate_middle;
 use crate::util::backoff;
 use askama::Template;
 use codex_protocol::models::ContentItem;
@@ -29,6 +29,7 @@ use futures::prelude::*;

 pub(super) const COMPACT_TRIGGER_TEXT: &str = "Start Summarization";
 const SUMMARIZATION_PROMPT: &str = include_str!("../../templates/compact/prompt.md");
+const COMPACT_USER_MESSAGE_MAX_TOKENS: usize = 20_000;

 #[derive(Template)]
 #[template(path = "compact/history_bridge.md", escape = "none")]
@@ -37,7 +38,7 @@ struct HistoryBridgeTemplate<'a> {
    summary_text: &'a str,
 }

-pub(super) fn spawn_compact_task(
+pub(super) async fn spawn_compact_task(
    sess: Arc<Session>,
    turn_context: Arc<TurnContext>,
    sub_id: String,
@@ -50,7 +51,7 @@ pub(super) fn spawn_compact_task(
        input,
        SUMMARIZATION_PROMPT.to_string(),
    );
-    sess.set_task(task);
+    sess.set_task(task).await;
 }

 pub(super) async fn run_inline_auto_compact_task(
@@ -79,15 +80,29 @@ pub(super) async fn run_compact_task(
    input: Vec<InputItem>,
    compact_instructions: String,
 ) {
+    let start_event = Event {
+        id: sub_id.clone(),
+        msg: EventMsg::TaskStarted(TaskStartedEvent {
+            model_context_window: turn_context.client.get_model_context_window(),
+        }),
+    };
+    sess.send_event(start_event).await;
    run_compact_task_inner(
-        sess,
+        sess.clone(),
        turn_context,
-        sub_id,
+        sub_id.clone(),
        input,
        compact_instructions,
        true,
    )
    .await;
+    let event = Event {
+        id: sub_id,
+        msg: EventMsg::TaskComplete(TaskCompleteEvent {
+            last_agent_message: None,
+        }),
+    };
+    sess.send_event(event).await;
 }

 async fn run_compact_task_inner(
@@ -98,23 +113,15 @@ async fn run_compact_task_inner(
    compact_instructions: String,
    remove_task_on_completion: bool,
 ) {
-    let model_context_window = turn_context.client.get_model_context_window();
-    let start_event = Event {
-        id: sub_id.clone(),
-        msg: EventMsg::TaskStarted(TaskStartedEvent {
-            model_context_window,
-        }),
-    };
-    sess.send_event(start_event).await;
-
    let initial_input_for_turn: ResponseInputItem = ResponseInputItem::from(input);
    let instructions_override = compact_instructions;
-    let turn_input = sess.turn_input_with_history(vec![initial_input_for_turn.clone().into()]);

-    let prompt = Prompt {
-        input: turn_input,
-        tools: Vec::new(),
-        base_instructions_override: Some(instructions_override),
+    // Build an in-memory snapshot of the current history; attempts will pop
+    // from this vector on context-length errors without modifying the session
+    // transcript.
+    let mut working_history = {
+        let state = sess.state.lock().await;
+        state.history.contents()
    };

    let max_retries = turn_context.client.get_provider().stream_max_retries();
@@ -131,6 +138,17 @@ async fn run_compact_task_inner(
    sess.persist_rollout_items(&[rollout_item]).await;

    loop {
+        // Build prompt input = history + compact trigger
+        let mut turn_input: Vec<ResponseItem> = Vec::with_capacity(working_history.len() + 1);
+        turn_input.extend_from_slice(&working_history);
+        turn_input.push(initial_input_for_turn.clone().into());
+
+        let prompt = Prompt {
+            input: turn_input,
+            tools: Vec::new(),
+            base_instructions_override: Some(instructions_override.clone()),
+        };
+
        let attempt_result = drain_to_completed(&sess, turn_context.as_ref(), &prompt).await;

        match attempt_result {
@@ -141,6 +159,28 @@ async fn run_compact_task_inner(
                return;
            }
            Err(e) => {
+                // Special-case compaction overflows: trim and retry immediately with no backoff.
+                if matches!(e, CodexErr::ContextLengthExceeded(_)) {
+                    if working_history.pop().is_some() {
+                        sess.notify_stream_error(
+                            &sub_id,
+                            "compact input exceeds context window; retrying with 1 fewer item…",
+                        )
+                        .await;
+                        continue;
+                    } else {
+                        let event = Event {
+                            id: sub_id.clone(),
+                            msg: EventMsg::Error(ErrorEvent {
+                                message:
+                                    "Unable to compact: context window too small for any history"
+                                        .to_string(),
+                            }),
+                        };
+                        sess.send_event(event).await;
+                        return;
+                    }
+                }
                if retries < max_retries {
                    retries += 1;
                    let delay = backoff(retries);
@@ -168,10 +208,10 @@ async fn run_compact_task_inner(
    }

    if remove_task_on_completion {
-        sess.remove_task(&sub_id);
+        sess.remove_task(&sub_id).await;
    }
    let history_snapshot = {
-        let state = sess.state.lock_unchecked();
+        let state = sess.state.lock().await;
        state.history.contents()
    };
    let summary_text = get_last_assistant_message_from_turn(&history_snapshot).unwrap_or_default();
@@ -179,7 +219,7 @@ async fn run_compact_task_inner(
    let initial_context = sess.build_initial_context(turn_context.as_ref());
    let new_history = build_compacted_history(initial_context, &user_messages, &summary_text);
    {
-        let mut state = sess.state.lock_unchecked();
+        let mut state = sess.state.lock().await;
        state.history.replace(new_history);
    }

@@ -195,16 +235,9 @@ async fn run_compact_task_inner(
        }),
    };
    sess.send_event(event).await;
-    let event = Event {
-        id: sub_id.clone(),
-        msg: EventMsg::TaskComplete(TaskCompleteEvent {
-            last_agent_message: None,
-        }),
-    };
-    sess.send_event(event).await;
 }

-fn content_items_to_text(content: &[ContentItem]) -> Option<String> {
+pub fn content_items_to_text(content: &[ContentItem]) -> Option<String> {
    let mut pieces = Vec::new();
    for item in content {
        match item {
@@ -236,7 +269,7 @@ pub(crate) fn collect_user_messages(items: &[ResponseItem]) -> Vec<String> {
        .collect()
 }

-fn is_session_prefix_message(text: &str) -> bool {
+pub fn is_session_prefix_message(text: &str) -> bool {
    matches!(
        InputMessageKind::from(("user", text)),
        InputMessageKind::UserInstructions | InputMessageKind::EnvironmentContext
@@ -249,11 +282,17 @@ pub(crate) fn build_compacted_history(
    summary_text: &str,
 ) -> Vec<ResponseItem> {
    let mut history = initial_context;
-    let user_messages_text = if user_messages.is_empty() {
+    let mut user_messages_text = if user_messages.is_empty() {
        "(none)".to_string()
    } else {
        user_messages.join("\n\n")
    };
+    // Truncate the concatenated prior user messages so the bridge message
+    // stays well under the context window (approx. 4 bytes/token).
+    let max_bytes = COMPACT_USER_MESSAGE_MAX_TOKENS * 4;
+    if user_messages_text.len() > max_bytes {
+        user_messages_text = truncate_middle(&user_messages_text, max_bytes).0;
+    }
    let summary_text = if summary_text.is_empty() {
        "(no summary available)".to_string()
    } else {
@@ -290,7 +329,7 @@ async fn drain_to_completed(
        };
        match event {
            Ok(ResponseEvent::OutputItemDone(item)) => {
-                let mut state = sess.state.lock_unchecked();
+                let mut state = sess.state.lock().await;
                state.history.record_items(std::slice::from_ref(&item));
            }
            Ok(ResponseEvent::Completed { .. }) => {
@@ -397,4 +436,38 @@ mod tests {

        assert_eq!(vec!["real user message".to_string()], collected);
    }
+
+    #[test]
+    fn build_compacted_history_truncates_overlong_user_messages() {
+        // Prepare a very large prior user message so the aggregated
+        // `user_messages_text` exceeds the truncation threshold used by
+        // `build_compacted_history` (80k bytes).
+        let big = "X".repeat(200_000);
+        let history = build_compacted_history(Vec::new(), std::slice::from_ref(&big), "SUMMARY");
+
+        // Expect exactly one bridge message added to history (plus any initial context we provided, which is none).
+        assert_eq!(history.len(), 1);
+
+        // Extract the text content of the bridge message.
+        let bridge_text = match &history[0] {
+            ResponseItem::Message { role, content, .. } if role == "user" => {
+                content_items_to_text(content).unwrap_or_default()
+            }
+            other => panic!("unexpected item in history: {other:?}"),
+        };
+
+        // The bridge should contain the truncation marker and not the full original payload.
+        assert!(
+            bridge_text.contains("tokens truncated"),
+            "expected truncation marker in bridge message"
+        );
+        assert!(
+            !bridge_text.contains(&big),
+            "bridge should not include the full oversized user text"
+        );
+        assert!(
+            bridge_text.contains("SUMMARY"),
+            "bridge should include the provided summary text"
+        );
+    }
 }
--- a/codex-rs/core/src/config.rs
+++ b/codex-rs/core/src/config.rs
@@ -333,14 +333,12 @@ pub fn write_global_mcp_servers(
                entry["env"] = TomlItem::Table(env_table);
            }

-            if let Some(timeout) = config.startup_timeout_ms {
-                let timeout = i64::try_from(timeout).map_err(|_| {
-                    std::io::Error::new(
-                        std::io::ErrorKind::InvalidData,
-                        "startup_timeout_ms exceeds supported range",
-                    )
-                })?;
-                entry["startup_timeout_ms"] = toml_edit::value(timeout);
+            if let Some(timeout) = config.startup_timeout_sec {
+                entry["startup_timeout_sec"] = toml_edit::value(timeout.as_secs_f64());
+            }
+
+            if let Some(timeout) = config.tool_timeout_sec {
+                entry["tool_timeout_sec"] = toml_edit::value(timeout.as_secs_f64());
            }

            doc["mcp_servers"][name.as_str()] = TomlItem::Table(entry);
@@ -1163,10 +1161,12 @@ pub fn log_dir(cfg: &Config) -> std::io::Result<PathBuf> {
 #[cfg(test)]
 mod tests {
    use crate::config_types::HistoryPersistence;
+    use crate::config_types::Notifications;

    use super::*;
    use pretty_assertions::assert_eq;

+    use std::time::Duration;
    use tempfile::TempDir;

    #[test]
@@ -1201,6 +1201,19 @@ persistence = "none"
        );
    }

+    #[test]
+    fn tui_config_missing_notifications_field_defaults_to_disabled() {
+        let cfg = r#"
+[tui]
+"#;
+
+        let parsed = toml::from_str::<ConfigToml>(cfg)
+            .expect("TUI config without notifications should succeed");
+        let tui = parsed.tui.expect("config should include tui section");
+
+        assert_eq!(tui.notifications, Notifications::Enabled(false));
+    }
+
    #[test]
    fn test_sandbox_config_parsing() {
        let sandbox_full_access = r#"
@@ -1278,7 +1291,8 @@ exclude_slash_tmp = true
                command: "echo".to_string(),
                args: vec!["hello".to_string()],
                env: None,
-                startup_timeout_ms: None,
+                startup_timeout_sec: Some(Duration::from_secs(3)),
+                tool_timeout_sec: Some(Duration::from_secs(5)),
            },
        );

@@ -1289,6 +1303,8 @@ exclude_slash_tmp = true
        let docs = loaded.get("docs").expect("docs entry");
        assert_eq!(docs.command, "echo");
        assert_eq!(docs.args, vec!["hello".to_string()]);
+        assert_eq!(docs.startup_timeout_sec, Some(Duration::from_secs(3)));
+        assert_eq!(docs.tool_timeout_sec, Some(Duration::from_secs(5)));

        let empty = BTreeMap::new();
        write_global_mcp_servers(codex_home.path(), &empty)?;
@@ -1298,6 +1314,28 @@ exclude_slash_tmp = true
        Ok(())
    }

+    #[test]
+    fn load_global_mcp_servers_accepts_legacy_ms_field() -> anyhow::Result<()> {
+        let codex_home = TempDir::new()?;
+        let config_path = codex_home.path().join(CONFIG_TOML_FILE);
+
+        std::fs::write(
+            &config_path,
+            r#"
+[mcp_servers]
+[mcp_servers.docs]
+command = "echo"
+startup_timeout_ms = 2500
+"#,
+        )?;
+
+        let servers = load_global_mcp_servers(codex_home.path())?;
+        let docs = servers.get("docs").expect("docs entry");
+        assert_eq!(docs.startup_timeout_sec, Some(Duration::from_millis(2500)));
+
+        Ok(())
+    }
+
    #[tokio::test]
    async fn persist_model_selection_updates_defaults() -> anyhow::Result<()> {
        let codex_home = TempDir::new()?;
--- a/codex-rs/core/src/config_edit.rs
+++ b/codex-rs/core/src/config_edit.rs
@@ -136,7 +136,7 @@ async fn persist_overrides_with_behavior(
    } else {
        doc.get("profile")
            .and_then(|i| i.as_str())
-            .map(|s| s.to_string())
+            .map(str::to_string)
    };

    let mut mutated = false;
--- a/codex-rs/core/src/config_types.rs
+++ b/codex-rs/core/src/config_types.rs
@@ -5,11 +5,15 @@

 use std::collections::HashMap;
 use std::path::PathBuf;
+use std::time::Duration;
 use wildmatch::WildMatchPattern;

 use serde::Deserialize;
+use serde::Deserializer;
+use serde::Serialize;
+use serde::de::Error as SerdeError;

-#[derive(Deserialize, Debug, Clone, PartialEq)]
+#[derive(Serialize, Debug, Clone, PartialEq)]
 pub struct McpServerConfig {
    pub command: String,

@@ -19,9 +23,84 @@ pub struct McpServerConfig {
    #[serde(default)]
    pub env: Option<HashMap<String, String>>,

-    /// Startup timeout in milliseconds for initializing MCP server & initially listing tools.
-    #[serde(default)]
-    pub startup_timeout_ms: Option<u64>,
+    /// Startup timeout in seconds for initializing MCP server & initially listing tools.
+    #[serde(
+        default,
+        with = "option_duration_secs",
+        skip_serializing_if = "Option::is_none"
+    )]
+    pub startup_timeout_sec: Option<Duration>,
+
+    /// Default timeout for MCP tool calls initiated via this server.
+    #[serde(default, with = "option_duration_secs")]
+    pub tool_timeout_sec: Option<Duration>,
+}
+
+impl<'de> Deserialize<'de> for McpServerConfig {
+    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+    where
+        D: Deserializer<'de>,
+    {
+        #[derive(Deserialize)]
+        struct RawMcpServerConfig {
+            command: String,
+            #[serde(default)]
+            args: Vec<String>,
+            #[serde(default)]
+            env: Option<HashMap<String, String>>,
+            #[serde(default)]
+            startup_timeout_sec: Option<f64>,
+            #[serde(default)]
+            startup_timeout_ms: Option<u64>,
+            #[serde(default, with = "option_duration_secs")]
+            tool_timeout_sec: Option<Duration>,
+        }
+
+        let raw = RawMcpServerConfig::deserialize(deserializer)?;
+
+        let startup_timeout_sec = match (raw.startup_timeout_sec, raw.startup_timeout_ms) {
+            (Some(sec), _) => {
+                let duration = Duration::try_from_secs_f64(sec).map_err(SerdeError::custom)?;
+                Some(duration)
+            }
+            (None, Some(ms)) => Some(Duration::from_millis(ms)),
+            (None, None) => None,
+        };
+
+        Ok(Self {
+            command: raw.command,
+            args: raw.args,
+            env: raw.env,
+            startup_timeout_sec,
+            tool_timeout_sec: raw.tool_timeout_sec,
+        })
+    }
+}
+
+mod option_duration_secs {
+    use serde::Deserialize;
+    use serde::Deserializer;
+    use serde::Serializer;
+    use std::time::Duration;
+
+    pub fn serialize<S>(value: &Option<Duration>, serializer: S) -> Result<S::Ok, S::Error>
+    where
+        S: Serializer,
+    {
+        match value {
+            Some(duration) => serializer.serialize_some(&duration.as_secs_f64()),
+            None => serializer.serialize_none(),
+        }
+    }
+
+    pub fn deserialize<'de, D>(deserializer: D) -> Result<Option<Duration>, D::Error>
+    where
+        D: Deserializer<'de>,
+    {
+        let secs = Option::<f64>::deserialize(deserializer)?;
+        secs.map(|secs| Duration::try_from_secs_f64(secs).map_err(serde::de::Error::custom))
+            .transpose()
+    }
 }

 #[derive(Deserialize, Debug, Copy, Clone, PartialEq)]
@@ -94,6 +173,7 @@ impl Default for Notifications {
 pub struct Tui {
    /// Enable desktop notifications from the TUI when the terminal is unfocused.
    /// Defaults to `false`.
+    #[serde(default)]
    pub notifications: Notifications,
 }

--- a/codex-rs/core/src/conversation_manager.rs
+++ b/codex-rs/core/src/conversation_manager.rs
@@ -3,6 +3,8 @@ use crate::CodexAuth;
 use crate::codex::Codex;
 use crate::codex::CodexSpawnOk;
 use crate::codex::INITIAL_SUBMIT_ID;
+use crate::codex::compact::content_items_to_text;
+use crate::codex::compact::is_session_prefix_message;
 use crate::codex_conversation::CodexConversation;
 use crate::config::Config;
 use crate::error::CodexErr;
@@ -134,19 +136,19 @@ impl ConversationManager {
        self.conversations.write().await.remove(conversation_id)
    }

-    /// Fork an existing conversation by dropping the last `drop_last_messages`
-    /// user/assistant messages from its transcript and starting a new
+    /// Fork an existing conversation by taking messages up to the given position
+    /// (not including the message at the given position) and starting a new
    /// conversation with identical configuration (unless overridden by the
    /// caller's `config`). The new conversation will have a fresh id.
    pub async fn fork_conversation(
        &self,
-        num_messages_to_drop: usize,
+        nth_user_message: usize,
        config: Config,
        path: PathBuf,
    ) -> CodexResult<NewConversation> {
        // Compute the prefix up to the cut point.
        let history = RolloutRecorder::get_rollout_history(&path).await?;
-        let history = truncate_after_dropping_last_messages(history, num_messages_to_drop);
+        let history = truncate_before_nth_user_message(history, nth_user_message);

        // Spawn a new conversation with the computed initial history.
        let auth_manager = self.auth_manager.clone();
@@ -159,33 +161,30 @@ impl ConversationManager {
    }
 }

-/// Return a prefix of `items` obtained by dropping the last `n` user messages
-/// and all items that follow them.
-fn truncate_after_dropping_last_messages(history: InitialHistory, n: usize) -> InitialHistory {
-    if n == 0 {
-        return InitialHistory::Forked(history.get_rollout_items());
-    }
-
-    // Work directly on rollout items, and cut the vector at the nth-from-last user message input.
+/// Return a prefix of `items` obtained by cutting strictly before the nth user message
+/// (0-based) and all items that follow it.
+fn truncate_before_nth_user_message(history: InitialHistory, n: usize) -> InitialHistory {
+    // Work directly on rollout items, and cut the vector at the nth user message input.
    let items: Vec<RolloutItem> = history.get_rollout_items();

    // Find indices of user message inputs in rollout order.
    let mut user_positions: Vec<usize> = Vec::new();
    for (idx, item) in items.iter().enumerate() {
-        if let RolloutItem::ResponseItem(ResponseItem::Message { role, .. }) = item
+        if let RolloutItem::ResponseItem(ResponseItem::Message { role, content, .. }) = item
            && role == "user"
+            && content_items_to_text(content).is_some_and(|text| !is_session_prefix_message(&text))
        {
            user_positions.push(idx);
        }
    }

-    // If fewer than n user messages exist, treat as empty.
-    if user_positions.len() < n {
+    // If fewer than or equal to n user messages exist, treat as empty (out of range).
+    if user_positions.len() <= n {
        return InitialHistory::New;
    }

-    // Cut strictly before the nth-from-last user message (do not keep the nth itself).
-    let cut_idx = user_positions[user_positions.len() - n];
+    // Cut strictly before the nth user message (do not keep the nth itself).
+    let cut_idx = user_positions[n];
    let rolled: Vec<RolloutItem> = items.into_iter().take(cut_idx).collect();

    if rolled.is_empty() {
@@ -198,9 +197,11 @@ fn truncate_after_dropping_last_messages(history: InitialHistory, n: usize) -> I
 #[cfg(test)]
 mod tests {
    use super::*;
+    use crate::codex::make_session_and_context;
    use codex_protocol::models::ContentItem;
    use codex_protocol::models::ReasoningItemReasoningSummary;
    use codex_protocol::models::ResponseItem;
+    use pretty_assertions::assert_eq;

    fn user_msg(text: &str) -> ResponseItem {
        ResponseItem::Message {
@@ -252,7 +253,7 @@ mod tests {
            .cloned()
            .map(RolloutItem::ResponseItem)
            .collect();
-        let truncated = truncate_after_dropping_last_messages(InitialHistory::Forked(initial), 1);
+        let truncated = truncate_before_nth_user_message(InitialHistory::Forked(initial), 1);
        let got_items = truncated.get_rollout_items();
        let expected_items = vec![
            RolloutItem::ResponseItem(items[0].clone()),
@@ -269,7 +270,37 @@ mod tests {
            .cloned()
            .map(RolloutItem::ResponseItem)
            .collect();
-        let truncated2 = truncate_after_dropping_last_messages(InitialHistory::Forked(initial2), 2);
+        let truncated2 = truncate_before_nth_user_message(InitialHistory::Forked(initial2), 2);
        assert!(matches!(truncated2, InitialHistory::New));
    }
+
+    #[test]
+    fn ignores_session_prefix_messages_when_truncating() {
+        let (session, turn_context) = make_session_and_context();
+        let mut items = session.build_initial_context(&turn_context);
+        items.push(user_msg("feature request"));
+        items.push(assistant_msg("ack"));
+        items.push(user_msg("second question"));
+        items.push(assistant_msg("answer"));
+
+        let rollout_items: Vec<RolloutItem> = items
+            .iter()
+            .cloned()
+            .map(RolloutItem::ResponseItem)
+            .collect();
+
+        let truncated = truncate_before_nth_user_message(InitialHistory::Forked(rollout_items), 1);
+        let got_items = truncated.get_rollout_items();
+
+        let expected: Vec<RolloutItem> = vec![
+            RolloutItem::ResponseItem(items[0].clone()),
+            RolloutItem::ResponseItem(items[1].clone()),
+            RolloutItem::ResponseItem(items[2].clone()),
+        ];
+
+        assert_eq!(
+            serde_json::to_value(&got_items).unwrap(),
+            serde_json::to_value(&expected).unwrap()
+        );
+    }
 }
--- a/codex-rs/core/src/custom_prompts.rs
+++ b/codex-rs/core/src/custom_prompts.rs
@@ -52,7 +52,7 @@ pub async fn discover_prompts_in_excluding(
        let Some(name) = path
            .file_stem()
            .and_then(|s| s.to_str())
-            .map(|s| s.to_string())
+            .map(str::to_string)
        else {
            continue;
        };
--- a/codex-rs/core/src/error.rs
+++ b/codex-rs/core/src/error.rs
@@ -104,6 +104,10 @@ pub enum CodexErr {
    #[error("codex-linux-sandbox was required but not provided")]
    LandlockSandboxExecutableNotProvided,

+    /// Provider reported the input exceeds the model's context window.
+    #[error("context_length_exceeded: {0}")]
+    ContextLengthExceeded(String),
+
    // -----------------------------------------------------------------
    // Automatic conversions for common external error types
    // -----------------------------------------------------------------
--- a/codex-rs/core/src/error_codes.rs
+++ b/codex-rs/core/src/error_codes.rs
@@ -0,0 +1,2 @@
+/// Known structured error codes returned by model providers.
+pub const CONTEXT_LENGTH_EXCEEDED: &str = "context_length_exceeded";
--- a/codex-rs/core/src/exec.rs
+++ b/codex-rs/core/src/exec.rs
@@ -3,6 +3,7 @@ use std::os::unix::process::ExitStatusExt;

 use std::collections::HashMap;
 use std::io;
+use std::path::Path;
 use std::path::PathBuf;
 use std::process::ExitStatus;
 use std::time::Duration;
@@ -44,7 +45,7 @@ const AGGREGATE_BUFFER_INITIAL_CAPACITY: usize = 8 * 1024; // 8 KiB
 /// Aggregation still collects full output; only the live event stream is capped.
 pub(crate) const MAX_EXEC_OUTPUT_DELTAS_PER_CALL: usize = 10_000;

-#[derive(Debug, Clone)]
+#[derive(Clone, Debug)]
 pub struct ExecParams {
    pub command: Vec<String>,
    pub cwd: PathBuf,
@@ -82,6 +83,7 @@ pub async fn process_exec_tool_call(
    params: ExecParams,
    sandbox_type: SandboxType,
    sandbox_policy: &SandboxPolicy,
+    sandbox_cwd: &Path,
    codex_linux_sandbox_exe: &Option<PathBuf>,
    stdout_stream: Option<StdoutStream>,
 ) -> Result<ExecToolCallOutput> {
@@ -94,12 +96,16 @@ pub async fn process_exec_tool_call(
        SandboxType::None => exec(params, sandbox_policy, stdout_stream.clone()).await,
        SandboxType::MacosSeatbelt => {
            let ExecParams {
-                command, cwd, env, ..
+                command,
+                cwd: command_cwd,
+                env,
+                ..
            } = params;
            let child = spawn_command_under_seatbelt(
                command,
+                command_cwd,
                sandbox_policy,
-                cwd,
+                sandbox_cwd,
                StdioPolicy::RedirectForShellTool,
                env,
            )
@@ -108,7 +114,10 @@ pub async fn process_exec_tool_call(
        }
        SandboxType::LinuxSeccomp => {
            let ExecParams {
-                command, cwd, env, ..
+                command,
+                cwd: command_cwd,
+                env,
+                ..
            } = params;

            let codex_linux_sandbox_exe = codex_linux_sandbox_exe
@@ -117,8 +126,9 @@ pub async fn process_exec_tool_call(
            let child = spawn_command_under_linux_sandbox(
                codex_linux_sandbox_exe,
                command,
+                command_cwd,
                sandbox_policy,
-                cwd,
+                sandbox_cwd,
                StdioPolicy::RedirectForShellTool,
                env,
            )
--- a/codex-rs/core/src/git_info.rs
+++ b/codex-rs/core/src/git_info.rs
@@ -108,6 +108,61 @@ pub async fn collect_git_info(cwd: &Path) -> Option<GitInfo> {
    Some(git_info)
 }

+/// A minimal commit summary entry used for pickers (subject + timestamp + sha).
+#[derive(Clone, Debug, Serialize, Deserialize)]
+pub struct CommitLogEntry {
+    pub sha: String,
+    /// Unix timestamp (seconds since epoch) of the commit time (committer time).
+    pub timestamp: i64,
+    /// Single-line subject of the commit message.
+    pub subject: String,
+}
+
+/// Return the last `limit` commits reachable from HEAD for the current branch.
+/// Each entry contains the SHA, commit timestamp (seconds), and subject line.
+/// Returns an empty vector if not in a git repo or on error/timeout.
+pub async fn recent_commits(cwd: &Path, limit: usize) -> Vec<CommitLogEntry> {
+    // Ensure we're in a git repo first to avoid noisy errors.
+    let Some(out) = run_git_command_with_timeout(&["rev-parse", "--git-dir"], cwd).await else {
+        return Vec::new();
+    };
+    if !out.status.success() {
+        return Vec::new();
+    }
+
+    let fmt = "%H%x1f%ct%x1f%s"; // <sha> <US> <commit_time> <US> <subject>
+    let n = limit.max(1).to_string();
+    let Some(log_out) =
+        run_git_command_with_timeout(&["log", "-n", &n, &format!("--pretty=format:{fmt}")], cwd)
+            .await
+    else {
+        return Vec::new();
+    };
+    if !log_out.status.success() {
+        return Vec::new();
+    }
+
+    let text = String::from_utf8_lossy(&log_out.stdout);
+    let mut entries: Vec<CommitLogEntry> = Vec::new();
+    for line in text.lines() {
+        let mut parts = line.split('\u{001f}');
+        let sha = parts.next().unwrap_or("").trim();
+        let ts_s = parts.next().unwrap_or("").trim();
+        let subject = parts.next().unwrap_or("").trim();
+        if sha.is_empty() || ts_s.is_empty() {
+            continue;
+        }
+        let timestamp = ts_s.parse::<i64>().unwrap_or(0);
+        entries.push(CommitLogEntry {
+            sha: sha.to_string(),
+            timestamp,
+            subject: subject.to_string(),
+        });
+    }
+
+    entries
+}
+
 /// Returns the closest git sha to HEAD that is on a remote as well as the diff to that sha.
 pub async fn git_diff_to_remote(cwd: &Path) -> Option<GitDiffToRemote> {
    get_git_repo_root(cwd)?;
@@ -145,7 +200,7 @@ async fn get_git_remotes(cwd: &Path) -> Option<Vec<String>> {
    let mut remotes: Vec<String> = String::from_utf8(output.stdout)
        .ok()?
        .lines()
-        .map(|s| s.to_string())
+        .map(str::to_string)
        .collect();
    if let Some(pos) = remotes.iter().position(|r| r == "origin") {
        let origin = remotes.remove(pos);
@@ -202,6 +257,11 @@ async fn get_default_branch(cwd: &Path) -> Option<String> {
    }

    // No remote-derived default; try common local defaults if they exist
+    get_default_branch_local(cwd).await
+}
+
+/// Attempt to determine the repository's default branch name from local branches.
+async fn get_default_branch_local(cwd: &Path) -> Option<String> {
    for candidate in ["main", "master"] {
        if let Some(verify) = run_git_command_with_timeout(
            &[
@@ -417,7 +477,7 @@ async fn diff_against_sha(cwd: &Path, sha: &GitSha) -> Option<String> {
        let untracked: Vec<String> = String::from_utf8(untracked_output.stdout)
            .ok()?
            .lines()
-            .map(|s| s.to_string())
+            .map(str::to_string)
            .filter(|s| !s.is_empty())
            .collect();

@@ -485,6 +545,46 @@ pub fn resolve_root_git_project_for_trust(cwd: &Path) -> Option<PathBuf> {
    git_dir_path.parent().map(Path::to_path_buf)
 }

+/// Returns a list of local git branches.
+/// Includes the default branch at the beginning of the list, if it exists.
+pub async fn local_git_branches(cwd: &Path) -> Vec<String> {
+    let mut branches: Vec<String> = if let Some(out) =
+        run_git_command_with_timeout(&["branch", "--format=%(refname:short)"], cwd).await
+        && out.status.success()
+    {
+        String::from_utf8_lossy(&out.stdout)
+            .lines()
+            .map(|s| s.trim().to_string())
+            .filter(|s| !s.is_empty())
+            .collect()
+    } else {
+        Vec::new()
+    };
+
+    branches.sort_unstable();
+
+    if let Some(base) = get_default_branch_local(cwd).await
+        && let Some(pos) = branches.iter().position(|name| name == &base)
+    {
+        let base_branch = branches.remove(pos);
+        branches.insert(0, base_branch);
+    }
+
+    branches
+}
+
+/// Returns the current checked out branch name.
+pub async fn current_branch_name(cwd: &Path) -> Option<String> {
+    let out = run_git_command_with_timeout(&["branch", "--show-current"], cwd).await?;
+    if !out.status.success() {
+        return None;
+    }
+    String::from_utf8(out.stdout)
+        .ok()
+        .map(|s| s.trim().to_string())
+        .filter(|name| !name.is_empty())
+}
+
 #[cfg(test)]
 mod tests {
    use super::*;
@@ -551,6 +651,80 @@ mod tests {
        repo_path
    }

+    #[tokio::test]
+    async fn test_recent_commits_non_git_directory_returns_empty() {
+        let temp_dir = TempDir::new().expect("Failed to create temp dir");
+        let entries = recent_commits(temp_dir.path(), 10).await;
+        assert!(entries.is_empty(), "expected no commits outside a git repo");
+    }
+
+    #[tokio::test]
+    async fn test_recent_commits_orders_and_limits() {
+        use tokio::time::Duration;
+        use tokio::time::sleep;
+
+        let temp_dir = TempDir::new().expect("Failed to create temp dir");
+        let repo_path = create_test_git_repo(&temp_dir).await;
+
+        // Make three distinct commits with small delays to ensure ordering by timestamp.
+        fs::write(repo_path.join("file.txt"), "one").unwrap();
+        Command::new("git")
+            .args(["add", "file.txt"])
+            .current_dir(&repo_path)
+            .output()
+            .await
+            .expect("git add");
+        Command::new("git")
+            .args(["commit", "-m", "first change"])
+            .current_dir(&repo_path)
+            .output()
+            .await
+            .expect("git commit 1");
+
+        sleep(Duration::from_millis(1100)).await;
+
+        fs::write(repo_path.join("file.txt"), "two").unwrap();
+        Command::new("git")
+            .args(["add", "file.txt"])
+            .current_dir(&repo_path)
+            .output()
+            .await
+            .expect("git add 2");
+        Command::new("git")
+            .args(["commit", "-m", "second change"])
+            .current_dir(&repo_path)
+            .output()
+            .await
+            .expect("git commit 2");
+
+        sleep(Duration::from_millis(1100)).await;
+
+        fs::write(repo_path.join("file.txt"), "three").unwrap();
+        Command::new("git")
+            .args(["add", "file.txt"])
+            .current_dir(&repo_path)
+            .output()
+            .await
+            .expect("git add 3");
+        Command::new("git")
+            .args(["commit", "-m", "third change"])
+            .current_dir(&repo_path)
+            .output()
+            .await
+            .expect("git commit 3");
+
+        // Request the latest 3 commits; should be our three changes in reverse time order.
+        let entries = recent_commits(&repo_path, 3).await;
+        assert_eq!(entries.len(), 3);
+        assert_eq!(entries[0].subject, "third change");
+        assert_eq!(entries[1].subject, "second change");
+        assert_eq!(entries[2].subject, "first change");
+        // Basic sanity on SHA formatting
+        for e in entries {
+            assert!(e.sha.len() >= 7 && e.sha.chars().all(|c| c.is_ascii_hexdigit()));
+        }
+    }
+
    async fn create_test_git_repo_with_remote(temp_dir: &TempDir) -> (PathBuf, String) {
        let repo_path = create_test_git_repo(temp_dir).await;
        let remote_path = temp_dir.path().join("remote.git");
--- a/codex-rs/core/src/is_safe_command.rs
+++ b/codex-rs/core/src/is_safe_command.rs
@@ -160,9 +160,10 @@ fn is_valid_sed_n_arg(arg: Option<&str>) -> bool {
 #[cfg(test)]
 mod tests {
    use super::*;
+    use std::string::ToString;

    fn vec_str(args: &[&str]) -> Vec<String> {
-        args.iter().map(|s| s.to_string()).collect()
+        args.iter().map(ToString::to_string).collect()
    }

    #[test]
--- a/codex-rs/core/src/landlock.rs
+++ b/codex-rs/core/src/landlock.rs
@@ -16,21 +16,22 @@ use tokio::process::Child;
 pub async fn spawn_command_under_linux_sandbox<P>(
    codex_linux_sandbox_exe: P,
    command: Vec<String>,
+    command_cwd: PathBuf,
    sandbox_policy: &SandboxPolicy,
-    cwd: PathBuf,
+    sandbox_policy_cwd: &Path,
    stdio_policy: StdioPolicy,
    env: HashMap<String, String>,
 ) -> std::io::Result<Child>
 where
    P: AsRef<Path>,
 {
-    let args = create_linux_sandbox_command_args(command, sandbox_policy, &cwd);
+    let args = create_linux_sandbox_command_args(command, sandbox_policy, sandbox_policy_cwd);
    let arg0 = Some("codex-linux-sandbox");
    spawn_child_async(
        codex_linux_sandbox_exe.as_ref().to_path_buf(),
        args,
        arg0,
-        cwd,
+        command_cwd,
        sandbox_policy,
        stdio_policy,
        env,
@@ -42,10 +43,13 @@ where
 fn create_linux_sandbox_command_args(
    command: Vec<String>,
    sandbox_policy: &SandboxPolicy,
-    cwd: &Path,
+    sandbox_policy_cwd: &Path,
 ) -> Vec<String> {
    #[expect(clippy::expect_used)]
-    let sandbox_policy_cwd = cwd.to_str().expect("cwd must be valid UTF-8").to_string();
+    let sandbox_policy_cwd = sandbox_policy_cwd
+        .to_str()
+        .expect("cwd must be valid UTF-8")
+        .to_string();

    #[expect(clippy::expect_used)]
    let sandbox_policy_json =
--- a/codex-rs/core/src/lib.rs
+++ b/codex-rs/core/src/lib.rs
@@ -54,6 +54,7 @@ pub use conversation_manager::NewConversation;
 pub use auth::AuthManager;
 pub use auth::CodexAuth;
 pub mod default_client;
+pub mod error_codes;
 pub mod model_family;
 mod openai_model_info;
 mod openai_tools;
@@ -92,6 +93,8 @@ pub use client_common::Prompt;
 pub use client_common::REVIEW_PROMPT;
 pub use client_common::ResponseEvent;
 pub use client_common::ResponseStream;
+pub use codex::compact::content_items_to_text;
+pub use codex::compact::is_session_prefix_message;
 pub use codex_protocol::models::ContentItem;
 pub use codex_protocol::models::LocalShellAction;
 pub use codex_protocol::models::LocalShellExecAction;
--- a/codex-rs/core/src/mcp_connection_manager.rs
+++ b/codex-rs/core/src/mcp_connection_manager.rs
@@ -40,6 +40,9 @@ const MAX_TOOL_NAME_LENGTH: usize = 64;
 /// Default timeout for initializing MCP server & initially listing tools.
 const DEFAULT_STARTUP_TIMEOUT: Duration = Duration::from_secs(10);

+/// Default timeout for individual tool calls.
+const DEFAULT_TOOL_TIMEOUT: Duration = Duration::from_secs(60);
+
 /// Map that holds a startup error for every MCP server that could **not** be
 /// spawned successfully.
 pub type ClientStartErrors = HashMap<String, anyhow::Error>;
@@ -85,6 +88,7 @@ struct ToolInfo {
 struct ManagedClient {
    client: Arc<McpClient>,
    startup_timeout: Duration,
+    tool_timeout: Option<Duration>,
 }

 /// A thin wrapper around a set of running [`McpClient`] instances.
@@ -132,10 +136,9 @@ impl McpConnectionManager {
                continue;
            }

-            let startup_timeout = cfg
-                .startup_timeout_ms
-                .map(Duration::from_millis)
-                .unwrap_or(DEFAULT_STARTUP_TIMEOUT);
+            let startup_timeout = cfg.startup_timeout_sec.unwrap_or(DEFAULT_STARTUP_TIMEOUT);
+
+            let tool_timeout = cfg.tool_timeout_sec.unwrap_or(DEFAULT_TOOL_TIMEOUT);

            join_set.spawn(async move {
                let McpServerConfig {
@@ -171,19 +174,19 @@ impl McpConnectionManager {
                            protocol_version: mcp_types::MCP_SCHEMA_VERSION.to_owned(),
                        };
                        let initialize_notification_params = None;
-                        match client
+                        let init_result = client
                            .initialize(
                                params,
                                initialize_notification_params,
                                Some(startup_timeout),
                            )
-                            .await
-                        {
-                            Ok(_response) => (server_name, Ok((client, startup_timeout))),
-                            Err(e) => (server_name, Err(e)),
-                        }
+                            .await;
+                        (
+                            (server_name, tool_timeout),
+                            init_result.map(|_| (client, startup_timeout)),
+                        )
                    }
-                    Err(e) => (server_name, Err(e.into())),
+                    Err(e) => ((server_name, tool_timeout), Err(e.into())),
                }
            });
        }
@@ -191,8 +194,8 @@ impl McpConnectionManager {
        let mut clients: HashMap<String, ManagedClient> = HashMap::with_capacity(join_set.len());

        while let Some(res) = join_set.join_next().await {
-            let (server_name, client_res) = match res {
-                Ok((server_name, client_res)) => (server_name, client_res),
+            let ((server_name, tool_timeout), client_res) = match res {
+                Ok(result) => result,
                Err(e) => {
                    warn!("Task panic when starting MCP server: {e:#}");
                    continue;
@@ -206,6 +209,7 @@ impl McpConnectionManager {
                        ManagedClient {
                            client: Arc::new(client),
                            startup_timeout,
+                            tool_timeout: Some(tool_timeout),
                        },
                    );
                }
@@ -243,14 +247,13 @@ impl McpConnectionManager {
        server: &str,
        tool: &str,
        arguments: Option<serde_json::Value>,
-        timeout: Option<Duration>,
    ) -> Result<mcp_types::CallToolResult> {
-        let client = self
+        let managed = self
            .clients
            .get(server)
-            .ok_or_else(|| anyhow!("unknown MCP server '{server}'"))?
-            .client
-            .clone();
+            .ok_or_else(|| anyhow!("unknown MCP server '{server}'"))?;
+        let client = managed.client.clone();
+        let timeout = managed.tool_timeout;

        client
            .call_tool(tool.to_string(), arguments, timeout)
--- a/codex-rs/core/src/mcp_tool_call.rs
+++ b/codex-rs/core/src/mcp_tool_call.rs
@@ -1,4 +1,3 @@
-use std::time::Duration;
 use std::time::Instant;

 use tracing::error;
@@ -21,7 +20,6 @@ pub(crate) async fn handle_mcp_tool_call(
    server: String,
    tool_name: String,
    arguments: String,
-    timeout: Option<Duration>,
 ) -> ResponseInputItem {
    // Parse the `arguments` as JSON. An empty string is OK, but invalid JSON
    // is not.
@@ -58,7 +56,7 @@ pub(crate) async fn handle_mcp_tool_call(
    let start = Instant::now();
    // Perform the tool call.
    let result = sess
-        .call_tool(&server, &tool_name, arguments_value.clone(), timeout)
+        .call_tool(&server, &tool_name, arguments_value.clone())
        .await
        .map_err(|e| format!("tool call error: {e}"));
    let tool_call_end_event = EventMsg::McpToolCallEnd(McpToolCallEndEvent {
--- a/codex-rs/core/src/openai_tools.rs
+++ b/codex-rs/core/src/openai_tools.rs
@@ -7,8 +7,6 @@ use std::collections::HashMap;

 use crate::model_family::ModelFamily;
 use crate::plan_tool::PLAN_TOOL;
-use crate::protocol::AskForApproval;
-use crate::protocol::SandboxPolicy;
 use crate::tool_apply_patch::ApplyPatchToolType;
 use crate::tool_apply_patch::create_apply_patch_freeform_tool;
 use crate::tool_apply_patch::create_apply_patch_json_tool;
@@ -57,10 +55,9 @@ pub(crate) enum OpenAiTool {

 #[derive(Debug, Clone)]
 pub enum ConfigShellToolType {
-    DefaultShell,
-    ShellWithRequest { sandbox_policy: SandboxPolicy },
-    LocalShell,
-    StreamableShell,
+    Default,
+    Local,
+    Streamable,
 }

 #[derive(Debug, Clone)]
@@ -75,8 +72,6 @@ pub(crate) struct ToolsConfig {

 pub(crate) struct ToolsConfigParams<'a> {
    pub(crate) model_family: &'a ModelFamily,
-    pub(crate) approval_policy: AskForApproval,
-    pub(crate) sandbox_policy: SandboxPolicy,
    pub(crate) include_plan_tool: bool,
    pub(crate) include_apply_patch_tool: bool,
    pub(crate) include_web_search_request: bool,
@@ -89,8 +84,6 @@ impl ToolsConfig {
    pub fn new(params: &ToolsConfigParams) -> Self {
        let ToolsConfigParams {
            model_family,
-            approval_policy,
-            sandbox_policy,
            include_plan_tool,
            include_apply_patch_tool,
            include_web_search_request,
@@ -98,18 +91,13 @@ impl ToolsConfig {
            include_view_image_tool,
            experimental_unified_exec_tool,
        } = params;
-        let mut shell_type = if *use_streamable_shell_tool {
-            ConfigShellToolType::StreamableShell
+        let shell_type = if *use_streamable_shell_tool {
+            ConfigShellToolType::Streamable
        } else if model_family.uses_local_shell_tool {
-            ConfigShellToolType::LocalShell
+            ConfigShellToolType::Local
        } else {
-            ConfigShellToolType::DefaultShell
+            ConfigShellToolType::Default
        };
-        if matches!(approval_policy, AskForApproval::OnRequest) && !use_streamable_shell_tool {
-            shell_type = ConfigShellToolType::ShellWithRequest {
-                sandbox_policy: sandbox_policy.clone(),
-            }
-        }

        let apply_patch_tool_type = match model_family.apply_patch_tool_type {
            Some(ApplyPatchToolType::Freeform) => Some(ApplyPatchToolType::Freeform),
@@ -170,40 +158,6 @@ pub(crate) enum JsonSchema {
    },
 }

-fn create_shell_tool() -> OpenAiTool {
-    let mut properties = BTreeMap::new();
-    properties.insert(
-        "command".to_string(),
-        JsonSchema::Array {
-            items: Box::new(JsonSchema::String { description: None }),
-            description: Some("The command to execute".to_string()),
-        },
-    );
-    properties.insert(
-        "workdir".to_string(),
-        JsonSchema::String {
-            description: Some("The working directory to execute the command in".to_string()),
-        },
-    );
-    properties.insert(
-        "timeout_ms".to_string(),
-        JsonSchema::Number {
-            description: Some("The timeout for the command in milliseconds".to_string()),
-        },
-    );
-
-    OpenAiTool::Function(ResponsesApiTool {
-        name: "shell".to_string(),
-        description: "Runs a shell command and returns its output".to_string(),
-        strict: false,
-        parameters: JsonSchema::Object {
-            properties,
-            required: Some(vec!["command".to_string()]),
-            additional_properties: Some(false),
-        },
-    })
-}
-
 fn create_unified_exec_tool() -> OpenAiTool {
    let mut properties = BTreeMap::new();
    properties.insert(
@@ -251,7 +205,7 @@ fn create_unified_exec_tool() -> OpenAiTool {
    })
 }

-fn create_shell_tool_for_sandbox(sandbox_policy: &SandboxPolicy) -> OpenAiTool {
+fn create_shell_tool() -> OpenAiTool {
    let mut properties = BTreeMap::new();
    properties.insert(
        "command".to_string(),
@@ -273,20 +227,18 @@ fn create_shell_tool_for_sandbox(sandbox_policy: &SandboxPolicy) -> OpenAiTool {
        },
    );

-    if !matches!(sandbox_policy, SandboxPolicy::DangerFullAccess) {
-        properties.insert(
+    properties.insert(
        "with_escalated_permissions".to_string(),
        JsonSchema::Boolean {
            description: Some("Whether to request escalated permissions. Set to true if command needs to be run without sandbox restrictions".to_string()),
        },
    );
-        properties.insert(
+    properties.insert(
        "justification".to_string(),
        JsonSchema::String {
            description: Some("Only set if with_escalated_permissions is true. 1-sentence explanation of why we want to run this command.".to_string()),
        },
    );
-    }

    OpenAiTool::Function(ResponsesApiTool {
        name: "shell".to_string(),
@@ -333,7 +285,7 @@ pub(crate) struct ApplyPatchToolArgs {
 /// Responses API:
 /// https://platform.openai.com/docs/guides/function-calling?api-mode=responses
 pub fn create_tools_json_for_responses_api(
-    tools: &Vec<OpenAiTool>,
+    tools: &[OpenAiTool],
 ) -> crate::error::Result<Vec<serde_json::Value>> {
    let mut tools_json = Vec::new();

@@ -348,7 +300,7 @@ pub fn create_tools_json_for_responses_api(
 /// Chat Completions API:
 /// https://platform.openai.com/docs/guides/function-calling?api-mode=chat
 pub(crate) fn create_tools_json_for_chat_completions_api(
-    tools: &Vec<OpenAiTool>,
+    tools: &[OpenAiTool],
 ) -> crate::error::Result<Vec<serde_json::Value>> {
    // We start with the JSON for the Responses API and than rewrite it to match
    // the chat completions tool call format.
@@ -448,10 +400,7 @@ fn sanitize_json_schema(value: &mut JsonValue) {
            }

            // Normalize/ensure type
-            let mut ty = map
-                .get("type")
-                .and_then(|v| v.as_str())
-                .map(|s| s.to_string());
+            let mut ty = map.get("type").and_then(|v| v.as_str()).map(str::to_string);

            // If type is an array (union), pick first supported; else leave to inference
            if ty.is_none()
@@ -537,16 +486,13 @@ pub(crate) fn get_openai_tools(
        tools.push(create_unified_exec_tool());
    } else {
        match &config.shell_type {
-            ConfigShellToolType::DefaultShell => {
+            ConfigShellToolType::Default => {
                tools.push(create_shell_tool());
            }
-            ConfigShellToolType::ShellWithRequest { sandbox_policy } => {
-                tools.push(create_shell_tool_for_sandbox(sandbox_policy));
-            }
-            ConfigShellToolType::LocalShell => {
+            ConfigShellToolType::Local => {
                tools.push(OpenAiTool::LocalShell {});
            }
-            ConfigShellToolType::StreamableShell => {
+            ConfigShellToolType::Streamable => {
                tools.push(OpenAiTool::Function(
                    crate::exec_command::create_exec_command_tool_for_responses_api(),
                ));
@@ -636,8 +582,6 @@ mod tests {
            .expect("codex-mini-latest should be a valid model family");
        let config = ToolsConfig::new(&ToolsConfigParams {
            model_family: &model_family,
-            approval_policy: AskForApproval::Never,
-            sandbox_policy: SandboxPolicy::ReadOnly,
            include_plan_tool: true,
            include_apply_patch_tool: false,
            include_web_search_request: true,
@@ -658,8 +602,6 @@ mod tests {
        let model_family = find_family_for_model("o3").expect("o3 should be a valid model family");
        let config = ToolsConfig::new(&ToolsConfigParams {
            model_family: &model_family,
-            approval_policy: AskForApproval::Never,
-            sandbox_policy: SandboxPolicy::ReadOnly,
            include_plan_tool: true,
            include_apply_patch_tool: false,
            include_web_search_request: true,
@@ -680,8 +622,6 @@ mod tests {
        let model_family = find_family_for_model("o3").expect("o3 should be a valid model family");
        let config = ToolsConfig::new(&ToolsConfigParams {
            model_family: &model_family,
-            approval_policy: AskForApproval::Never,
-            sandbox_policy: SandboxPolicy::ReadOnly,
            include_plan_tool: false,
            include_apply_patch_tool: false,
            include_web_search_request: true,
@@ -786,8 +726,6 @@ mod tests {
        let model_family = find_family_for_model("o3").expect("o3 should be a valid model family");
        let config = ToolsConfig::new(&ToolsConfigParams {
            model_family: &model_family,
-            approval_policy: AskForApproval::Never,
-            sandbox_policy: SandboxPolicy::ReadOnly,
            include_plan_tool: false,
            include_apply_patch_tool: false,
            include_web_search_request: false,
@@ -864,8 +802,6 @@ mod tests {
        let model_family = find_family_for_model("o3").expect("o3 should be a valid model family");
        let config = ToolsConfig::new(&ToolsConfigParams {
            model_family: &model_family,
-            approval_policy: AskForApproval::Never,
-            sandbox_policy: SandboxPolicy::ReadOnly,
            include_plan_tool: false,
            include_apply_patch_tool: false,
            include_web_search_request: true,
@@ -927,8 +863,6 @@ mod tests {
        let model_family = find_family_for_model("o3").expect("o3 should be a valid model family");
        let config = ToolsConfig::new(&ToolsConfigParams {
            model_family: &model_family,
-            approval_policy: AskForApproval::Never,
-            sandbox_policy: SandboxPolicy::ReadOnly,
            include_plan_tool: false,
            include_apply_patch_tool: false,
            include_web_search_request: true,
@@ -985,8 +919,6 @@ mod tests {
        let model_family = find_family_for_model("o3").expect("o3 should be a valid model family");
        let config = ToolsConfig::new(&ToolsConfigParams {
            model_family: &model_family,
-            approval_policy: AskForApproval::Never,
-            sandbox_policy: SandboxPolicy::ReadOnly,
            include_plan_tool: false,
            include_apply_patch_tool: false,
            include_web_search_request: true,
@@ -1046,8 +978,6 @@ mod tests {
        let model_family = find_family_for_model("o3").expect("o3 should be a valid model family");
        let config = ToolsConfig::new(&ToolsConfigParams {
            model_family: &model_family,
-            approval_policy: AskForApproval::Never,
-            sandbox_policy: SandboxPolicy::ReadOnly,
            include_plan_tool: false,
            include_apply_patch_tool: false,
            include_web_search_request: true,
@@ -1100,14 +1030,8 @@ mod tests {
    }

    #[test]
-    fn test_shell_tool_for_sandbox_workspace_write() {
-        let sandbox_policy = SandboxPolicy::WorkspaceWrite {
-            writable_roots: vec!["workspace".into()],
-            network_access: false,
-            exclude_tmpdir_env_var: false,
-            exclude_slash_tmp: false,
-        };
-        let tool = super::create_shell_tool_for_sandbox(&sandbox_policy);
+    fn test_shell_tool() {
+        let tool = super::create_shell_tool();
        let OpenAiTool::Function(ResponsesApiTool {
            description, name, ..
        }) = &tool
@@ -1119,33 +1043,4 @@ mod tests {
        let expected = "Runs a shell command and returns its output.";
        assert_eq!(description, expected);
    }
-
-    #[test]
-    fn test_shell_tool_for_sandbox_readonly() {
-        let tool = super::create_shell_tool_for_sandbox(&SandboxPolicy::ReadOnly);
-        let OpenAiTool::Function(ResponsesApiTool {
-            description, name, ..
-        }) = &tool
-        else {
-            panic!("expected function tool");
-        };
-        assert_eq!(name, "shell");
-
-        let expected = "Runs a shell command and returns its output.";
-        assert_eq!(description, expected);
-    }
-
-    #[test]
-    fn test_shell_tool_for_sandbox_danger_full_access() {
-        let tool = super::create_shell_tool_for_sandbox(&SandboxPolicy::DangerFullAccess);
-        let OpenAiTool::Function(ResponsesApiTool {
-            description, name, ..
-        }) = &tool
-        else {
-            panic!("expected function tool");
-        };
-        assert_eq!(name, "shell");
-
-        assert_eq!(description, "Runs a shell command and returns its output.");
-    }
 }
--- a/codex-rs/core/src/parse_command.rs
+++ b/codex-rs/core/src/parse_command.rs
@@ -40,7 +40,7 @@ impl From<ParsedCommand> for codex_protocol::parse_command::ParsedCommand {
 }

 fn shlex_join(tokens: &[String]) -> String {
-    shlex_try_join(tokens.iter().map(|s| s.as_str()))
+    shlex_try_join(tokens.iter().map(String::as_str))
        .unwrap_or_else(|_| "<command included NUL byte>".to_string())
 }

@@ -72,13 +72,14 @@ pub fn parse_command(command: &[String]) -> Vec<ParsedCommand> {
 /// Tests are at the top to encourage using TDD + Codex to fix the implementation.
 mod tests {
    use super::*;
+    use std::string::ToString;

    fn shlex_split_safe(s: &str) -> Vec<String> {
-        shlex_split(s).unwrap_or_else(|| s.split_whitespace().map(|s| s.to_string()).collect())
+        shlex_split(s).unwrap_or_else(|| s.split_whitespace().map(ToString::to_string).collect())
    }

    fn vec_str(args: &[&str]) -> Vec<String> {
-        args.iter().map(|s| s.to_string()).collect()
+        args.iter().map(ToString::to_string).collect()
    }

    fn assert_parsed(args: &[String], expected: Vec<ParsedCommand>) {
@@ -894,7 +895,7 @@ fn simplify_once(commands: &[ParsedCommand]) -> Option<Vec<ParsedCommand>> {

    // echo ... && ...rest => ...rest
    if let ParsedCommand::Unknown { cmd } = &commands[0]
-        && shlex_split(cmd).is_some_and(|t| t.first().map(|s| s.as_str()) == Some("echo"))
+        && shlex_split(cmd).is_some_and(|t| t.first().map(String::as_str) == Some("echo"))
    {
        return Some(commands[1..].to_vec());
    }
@@ -902,7 +903,7 @@ fn simplify_once(commands: &[ParsedCommand]) -> Option<Vec<ParsedCommand>> {
    // cd foo && [any command] => [any command] (keep non-cd when a cd is followed by something)
    if let Some(idx) = commands.iter().position(|pc| match pc {
        ParsedCommand::Unknown { cmd } => {
-            shlex_split(cmd).is_some_and(|t| t.first().map(|s| s.as_str()) == Some("cd"))
+            shlex_split(cmd).is_some_and(|t| t.first().map(String::as_str) == Some("cd"))
        }
        _ => false,
    }) && commands.len() > idx + 1
@@ -1035,7 +1036,7 @@ fn short_display_path(path: &str) -> String {
    });
    parts
        .next()
-        .map(|s| s.to_string())
+        .map(str::to_string)
        .unwrap_or_else(|| trimmed.to_string())
 }

@@ -1156,10 +1157,8 @@ fn parse_bash_lc_commands(original: &[String]) -> Option<Vec<ParsedCommand>> {
        // bias toward the primary command when pipelines are present.
        // First, drop obvious small formatting helpers (e.g., wc/awk/etc).
        let had_multiple_commands = all_commands.len() > 1;
-        // The bash AST walker yields commands in right-to-left order for
-        // connector/pipeline sequences. Reverse to reflect actual execution order.
-        let mut filtered_commands = drop_small_formatting_commands(all_commands);
-        filtered_commands.reverse();
+        // Commands arrive in source order; drop formatting helpers while preserving it.
+        let filtered_commands = drop_small_formatting_commands(all_commands);
        if filtered_commands.is_empty() {
            return Some(vec![ParsedCommand::Unknown {
                cmd: script.clone(),
@@ -1192,8 +1191,8 @@ fn parse_bash_lc_commands(original: &[String]) -> Option<Vec<ParsedCommand>> {
                        if had_connectors {
                            let has_pipe = script_tokens.iter().any(|t| t == "|");
                            let has_sed_n = script_tokens.windows(2).any(|w| {
-                                w.first().map(|s| s.as_str()) == Some("sed")
-                                    && w.get(1).map(|s| s.as_str()) == Some("-n")
+                                w.first().map(String::as_str) == Some("sed")
+                                    && w.get(1).map(String::as_str) == Some("-n")
                            });
                            if has_pipe && has_sed_n {
                                ParsedCommand::Read {
@@ -1273,7 +1272,7 @@ fn is_small_formatting_command(tokens: &[String]) -> bool {
            // Keep `sed -n <range> file` (treated as a file read elsewhere);
            // otherwise consider it a formatting helper in a pipeline.
            tokens.len() < 4
-                || !(tokens[1] == "-n" && is_valid_sed_n_arg(tokens.get(2).map(|s| s.as_str())))
+                || !(tokens[1] == "-n" && is_valid_sed_n_arg(tokens.get(2).map(String::as_str)))
        }
        _ => false,
    }
@@ -1320,7 +1319,7 @@ fn summarize_main_tokens(main_cmd: &[String]) -> ParsedCommand {
                (None, non_flags.first().map(|s| short_display_path(s)))
            } else {
                (
-                    non_flags.first().cloned().map(|s| s.to_string()),
+                    non_flags.first().cloned().map(String::from),
                    non_flags.get(1).map(|s| short_display_path(s)),
                )
            };
@@ -1355,7 +1354,7 @@ fn summarize_main_tokens(main_cmd: &[String]) -> ParsedCommand {
                .collect();
            // Do not shorten the query: grep patterns may legitimately contain slashes
            // and should be preserved verbatim. Only paths should be shortened.
-            let query = non_flags.first().cloned().map(|s| s.to_string());
+            let query = non_flags.first().cloned().map(String::from);
            let path = non_flags.get(1).map(|s| short_display_path(s));
            ParsedCommand::Search {
                cmd: shlex_join(main_cmd),
@@ -1365,7 +1364,7 @@ fn summarize_main_tokens(main_cmd: &[String]) -> ParsedCommand {
        }
        Some((head, tail)) if head == "cat" => {
            // Support both `cat <file>` and `cat -- <file>` forms.
-            let effective_tail: &[String] = if tail.first().map(|s| s.as_str()) == Some("--") {
+            let effective_tail: &[String] = if tail.first().map(String::as_str) == Some("--") {
                &tail[1..]
            } else {
                tail
@@ -1481,7 +1480,7 @@ fn summarize_main_tokens(main_cmd: &[String]) -> ParsedCommand {
            if head == "sed"
                && tail.len() >= 3
                && tail[0] == "-n"
-                && is_valid_sed_n_arg(tail.get(1).map(|s| s.as_str())) =>
+                && is_valid_sed_n_arg(tail.get(1).map(String::as_str)) =>
        {
            if let Some(path) = tail.get(2) {
                let name = short_display_path(path);
--- a/codex-rs/core/src/review_format.rs
+++ b/codex-rs/core/src/review_format.rs
@@ -22,14 +22,14 @@ pub fn format_review_findings_block(
    selection: Option<&[bool]>,
 ) -> String {
    let mut lines: Vec<String> = Vec::new();
+    lines.push(String::new());

    // Header
-    let header = if findings.len() > 1 {
-        "Full review comments:"
+    if findings.len() > 1 {
+        lines.push("Full review comments:".to_string());
    } else {
-        "Review comment:"
-    };
-    lines.push(header.to_string());
+        lines.push("Review comment:".to_string());
+    }

    for (idx, item) in findings.iter().enumerate() {
        lines.push(String::new());
--- a/codex-rs/core/src/rollout/policy.rs
+++ b/codex-rs/core/src/rollout/policy.rs
@@ -60,6 +60,7 @@ pub(crate) fn should_persist_event_msg(ev: &EventMsg) -> bool {
        | EventMsg::ExecCommandEnd(_)
        | EventMsg::ExecApprovalRequest(_)
        | EventMsg::ApplyPatchApprovalRequest(_)
+        | EventMsg::CompactApprovalRequest(_)
        | EventMsg::BackgroundEvent(_)
        | EventMsg::StreamError(_)
        | EventMsg::PatchApplyBegin(_)
--- a/codex-rs/core/src/seatbelt.rs
+++ b/codex-rs/core/src/seatbelt.rs
@@ -18,19 +18,20 @@ const MACOS_PATH_TO_SEATBELT_EXECUTABLE: &str = "/usr/bin/sandbox-exec";

 pub async fn spawn_command_under_seatbelt(
    command: Vec<String>,
+    command_cwd: PathBuf,
    sandbox_policy: &SandboxPolicy,
-    cwd: PathBuf,
+    sandbox_policy_cwd: &Path,
    stdio_policy: StdioPolicy,
    mut env: HashMap<String, String>,
 ) -> std::io::Result<Child> {
-    let args = create_seatbelt_command_args(command, sandbox_policy, &cwd);
+    let args = create_seatbelt_command_args(command, sandbox_policy, sandbox_policy_cwd);
    let arg0 = None;
    env.insert(CODEX_SANDBOX_ENV_VAR.to_string(), "seatbelt".to_string());
    spawn_child_async(
        PathBuf::from(MACOS_PATH_TO_SEATBELT_EXECUTABLE),
        args,
        arg0,
-        cwd,
+        command_cwd,
        sandbox_policy,
        stdio_policy,
        env,
@@ -41,7 +42,7 @@ pub async fn spawn_command_under_seatbelt(
 fn create_seatbelt_command_args(
    command: Vec<String>,
    sandbox_policy: &SandboxPolicy,
-    cwd: &Path,
+    sandbox_policy_cwd: &Path,
 ) -> Vec<String> {
    let (file_write_policy, extra_cli_args) = {
        if sandbox_policy.has_full_disk_write_access() {
@@ -51,7 +52,7 @@ fn create_seatbelt_command_args(
                Vec::<String>::new(),
            )
        } else {
-            let writable_roots = sandbox_policy.get_writable_roots_with_cwd(cwd);
+            let writable_roots = sandbox_policy.get_writable_roots_with_cwd(sandbox_policy_cwd);

            let mut writable_folder_policies: Vec<String> = Vec::new();
            let mut cli_args: Vec<String> = Vec::new();
--- a/codex-rs/core/src/shell.rs
+++ b/codex-rs/core/src/shell.rs
@@ -32,15 +32,19 @@ pub enum Shell {
 impl Shell {
    pub fn format_default_shell_invocation(&self, command: Vec<String>) -> Option<Vec<String>> {
        match self {
-            Shell::Zsh(zsh) => {
-                format_shell_invocation_with_rc(&command, &zsh.shell_path, &zsh.zshrc_path)
-            }
-            Shell::Bash(bash) => {
-                format_shell_invocation_with_rc(&command, &bash.shell_path, &bash.bashrc_path)
-            }
+            Shell::Zsh(zsh) => format_shell_invocation_with_rc(
+                command.as_slice(),
+                &zsh.shell_path,
+                &zsh.zshrc_path,
+            ),
+            Shell::Bash(bash) => format_shell_invocation_with_rc(
+                command.as_slice(),
+                &bash.shell_path,
+                &bash.bashrc_path,
+            ),
            Shell::PowerShell(ps) => {
                // If model generated a bash command, prefer a detected bash fallback
-                if let Some(script) = strip_bash_lc(&command) {
+                if let Some(script) = strip_bash_lc(command.as_slice()) {
                    return match &ps.bash_exe_fallback {
                        Some(bash) => Some(vec![
                            bash.to_string_lossy().to_string(),
@@ -69,7 +73,7 @@ impl Shell {
                        return Some(command);
                    }

-                    let joined = shlex::try_join(command.iter().map(|s| s.as_str())).ok();
+                    let joined = shlex::try_join(command.iter().map(String::as_str)).ok();
                    return joined.map(|arg| {
                        vec![
                            ps.exe.clone(),
@@ -102,12 +106,12 @@ impl Shell {
 }

 fn format_shell_invocation_with_rc(
-    command: &Vec<String>,
+    command: &[String],
    shell_path: &str,
    rc_path: &str,
 ) -> Option<Vec<String>> {
    let joined = strip_bash_lc(command)
-        .or_else(|| shlex::try_join(command.iter().map(|s| s.as_str())).ok())?;
+        .or_else(|| shlex::try_join(command.iter().map(String::as_str)).ok())?;

    let rc_command = if std::path::Path::new(rc_path).exists() {
        format!("source {rc_path} && ({joined})")
@@ -118,8 +122,8 @@ fn format_shell_invocation_with_rc(
    Some(vec![shell_path.to_string(), "-lc".to_string(), rc_command])
 }

-fn strip_bash_lc(command: &Vec<String>) -> Option<String> {
-    match command.as_slice() {
+fn strip_bash_lc(command: &[String]) -> Option<String> {
+    match command {
        // exactly three items
        [first, second, third]
            // first two must be "bash", "-lc"
@@ -220,6 +224,7 @@ pub async fn default_user_shell() -> Shell {
 mod tests {
    use super::*;
    use std::process::Command;
+    use std::string::ToString;

    #[tokio::test]
    async fn test_current_shell_detects_zsh() {
@@ -323,7 +328,7 @@ mod tests {
            });

            let actual_cmd = shell
-                .format_default_shell_invocation(input.iter().map(|s| s.to_string()).collect());
+                .format_default_shell_invocation(input.iter().map(ToString::to_string).collect());
            let expected_cmd = expected_cmd
                .iter()
                .map(|s| s.replace("BASHRC_PATH", bashrc_path.to_str().unwrap()))
@@ -345,6 +350,7 @@ mod tests {
                },
                SandboxType::None,
                &SandboxPolicy::DangerFullAccess,
+                temp_home.path(),
                &None,
                None,
            )
@@ -366,6 +372,7 @@ mod tests {
 #[cfg(target_os = "macos")]
 mod macos_tests {
    use super::*;
+    use std::string::ToString;

    #[tokio::test]
    async fn test_run_with_profile_escaping_and_execution() {
@@ -429,7 +436,7 @@ mod macos_tests {
            });

            let actual_cmd = shell
-                .format_default_shell_invocation(input.iter().map(|s| s.to_string()).collect());
+                .format_default_shell_invocation(input.iter().map(ToString::to_string).collect());
            let expected_cmd = expected_cmd
                .iter()
                .map(|s| s.replace("ZSHRC_PATH", zshrc_path.to_str().unwrap()))
@@ -451,6 +458,7 @@ mod macos_tests {
                },
                SandboxType::None,
                &SandboxPolicy::DangerFullAccess,
+                temp_home.path(),
                &None,
                None,
            )
@@ -553,10 +561,10 @@ mod tests_windows {

        for (shell, input, expected_cmd) in cases {
            let actual_cmd = shell
-                .format_default_shell_invocation(input.iter().map(|s| s.to_string()).collect());
+                .format_default_shell_invocation(input.iter().map(|s| (*s).to_string()).collect());
            assert_eq!(
                actual_cmd,
-                Some(expected_cmd.iter().map(|s| s.to_string()).collect())
+                Some(expected_cmd.iter().map(|s| (*s).to_string()).collect())
            );
        }
    }
--- a/codex-rs/core/src/turn_diff_tracker.rs
+++ b/codex-rs/core/src/turn_diff_tracker.rs
@@ -65,7 +65,7 @@ impl TurnDiffTracker {
                let baseline_file_info = if path.exists() {
                    let mode = file_mode_for_path(path);
                    let mode_val = mode.unwrap_or(FileMode::Regular);
-                    let content = blob_bytes(path, &mode_val).unwrap_or_default();
+                    let content = blob_bytes(path, mode_val).unwrap_or_default();
                    let oid = if mode == Some(FileMode::Symlink) {
                        format!("{:x}", git_blob_sha1_hex_bytes(&content))
                    } else {
@@ -266,7 +266,7 @@ impl TurnDiffTracker {
        };

        let current_mode = file_mode_for_path(&current_external_path).unwrap_or(FileMode::Regular);
-        let right_bytes = blob_bytes(&current_external_path, &current_mode);
+        let right_bytes = blob_bytes(&current_external_path, current_mode);

        // Compute displays with &mut self before borrowing any baseline content.
        let left_display = self.relative_to_git_root_str(&baseline_external_path);
@@ -388,7 +388,7 @@ enum FileMode {
 }

 impl FileMode {
-    fn as_str(&self) -> &'static str {
+    fn as_str(self) -> &'static str {
        match self {
            FileMode::Regular => "100644",
            #[cfg(unix)]
@@ -427,9 +427,9 @@ fn file_mode_for_path(_path: &Path) -> Option<FileMode> {
    Some(FileMode::Regular)
 }

-fn blob_bytes(path: &Path, mode: &FileMode) -> Option<Vec<u8>> {
+fn blob_bytes(path: &Path, mode: FileMode) -> Option<Vec<u8>> {
    if path.exists() {
-        let contents = if *mode == FileMode::Symlink {
+        let contents = if mode == FileMode::Symlink {
            symlink_blob_bytes(path)
                .ok_or_else(|| anyhow!("failed to read symlink target for {}", path.display()))
        } else {
--- a/codex-rs/core/src/unified_exec/mod.rs
+++ b/codex-rs/core/src/unified_exec/mod.rs
@@ -557,6 +557,7 @@ mod tests {

    #[cfg(unix)]
    #[tokio::test]
+    #[ignore] // Ignored while we have a better way to test this.
    async fn requests_with_large_timeout_are_capped() -> Result<(), UnifiedExecError> {
        let manager = UnifiedExecSessionManager::default();

@@ -578,6 +579,7 @@ mod tests {

    #[cfg(unix)]
    #[tokio::test]
+    #[ignore] // Ignored while we have a better way to test this.
    async fn completed_commands_do_not_persist_sessions() -> Result<(), UnifiedExecError> {
        let manager = UnifiedExecSessionManager::default();
        let result = manager
--- a/codex-rs/core/tests/common/Cargo.toml
+++ b/codex-rs/core/tests/common/Cargo.toml
@@ -7,7 +7,8 @@ edition = "2024"
 path = "lib.rs"

 [dependencies]
-codex-core = { path = "../.." }
-serde_json = "1"
-tempfile = "3"
-tokio = { version = "1", features = ["time"] }
+codex-core = { workspace = true }
+serde_json = { workspace = true }
+tempfile = { workspace = true }
+tokio = { workspace = true, features = ["time"] }
+wiremock = { workspace = true }
--- a/codex-rs/core/tests/common/lib.rs
+++ b/codex-rs/core/tests/common/lib.rs
@@ -7,6 +7,8 @@ use codex_core::config::Config;
 use codex_core::config::ConfigOverrides;
 use codex_core::config::ConfigToml;

+pub mod responses;
+
 /// Returns a default `Config` whose on-disk state is confined to the provided
 /// temporary directory. Using a per-test directory keeps tests hermetic and
 /// avoids clobbering a developer’s real `~/.codex`.
@@ -124,3 +126,21 @@ where
        }
    }
 }
+
+#[macro_export]
+macro_rules! non_sandbox_test {
+    // For tests that return ()
+    () => {{
+        if ::std::env::var("CODEX_SANDBOX_NETWORK_DISABLED").is_ok() {
+            println!("Skipping test because it cannot execute when network is disabled in a Codex sandbox.");
+            return;
+        }
+    }};
+    // For tests that return Result<(), _>
+    (result $(,)?) => {{
+        if ::std::env::var("CODEX_SANDBOX_NETWORK_DISABLED").is_ok() {
+            println!("Skipping test because it cannot execute when network is disabled in a Codex sandbox.");
+            return ::core::result::Result::Ok(());
+        }
+    }};
+}
--- a/codex-rs/core/tests/common/responses.rs
+++ b/codex-rs/core/tests/common/responses.rs
@@ -0,0 +1,133 @@
+use serde_json::Value;
+use wiremock::BodyPrintLimit;
+use wiremock::Mock;
+use wiremock::MockServer;
+use wiremock::ResponseTemplate;
+use wiremock::matchers::method;
+use wiremock::matchers::path;
+
+/// Build an SSE stream body from a list of JSON events.
+pub fn sse(events: Vec<Value>) -> String {
+    use std::fmt::Write as _;
+    let mut out = String::new();
+    for ev in events {
+        let kind = ev.get("type").and_then(|v| v.as_str()).unwrap();
+        writeln!(&mut out, "event: {kind}").unwrap();
+        if !ev.as_object().map(|o| o.len() == 1).unwrap_or(false) {
+            write!(&mut out, "data: {ev}\n\n").unwrap();
+        } else {
+            out.push('\n');
+        }
+    }
+    out
+}
+
+/// Convenience: SSE event for a completed response with a specific id.
+pub fn ev_completed(id: &str) -> Value {
+    serde_json::json!({
+        "type": "response.completed",
+        "response": {
+            "id": id,
+            "usage": {"input_tokens":0,"input_tokens_details":null,"output_tokens":0,"output_tokens_details":null,"total_tokens":0}
+        }
+    })
+}
+
+pub fn ev_completed_with_tokens(id: &str, total_tokens: u64) -> Value {
+    serde_json::json!({
+        "type": "response.completed",
+        "response": {
+            "id": id,
+            "usage": {
+                "input_tokens": total_tokens,
+                "input_tokens_details": null,
+                "output_tokens": 0,
+                "output_tokens_details": null,
+                "total_tokens": total_tokens
+            }
+        }
+    })
+}
+
+/// Convenience: SSE event for a single assistant message output item.
+pub fn ev_assistant_message(id: &str, text: &str) -> Value {
+    serde_json::json!({
+        "type": "response.output_item.done",
+        "item": {
+            "type": "message",
+            "role": "assistant",
+            "id": id,
+            "content": [{"type": "output_text", "text": text}]
+        }
+    })
+}
+
+pub fn ev_function_call(call_id: &str, name: &str, arguments: &str) -> Value {
+    serde_json::json!({
+        "type": "response.output_item.done",
+        "item": {
+            "type": "function_call",
+            "call_id": call_id,
+            "name": name,
+            "arguments": arguments
+        }
+    })
+}
+
+/// Convenience: SSE event for an `apply_patch` custom tool call with raw patch
+/// text. This mirrors the payload produced by the Responses API when the model
+/// invokes `apply_patch` directly (before we convert it to a function call).
+pub fn ev_apply_patch_custom_tool_call(call_id: &str, patch: &str) -> Value {
+    serde_json::json!({
+        "type": "response.output_item.done",
+        "item": {
+            "type": "custom_tool_call",
+            "name": "apply_patch",
+            "input": patch,
+            "call_id": call_id
+        }
+    })
+}
+
+/// Convenience: SSE event for an `apply_patch` function call. The Responses API
+/// wraps the patch content in a JSON string under the `input` key; we recreate
+/// the same structure so downstream code exercises the full parsing path.
+pub fn ev_apply_patch_function_call(call_id: &str, patch: &str) -> Value {
+    let arguments = serde_json::json!({ "input": patch });
+    let arguments = serde_json::to_string(&arguments).expect("serialize apply_patch arguments");
+
+    serde_json::json!({
+        "type": "response.output_item.done",
+        "item": {
+            "type": "function_call",
+            "name": "apply_patch",
+            "arguments": arguments,
+            "call_id": call_id
+        }
+    })
+}
+
+pub fn sse_response(body: String) -> ResponseTemplate {
+    ResponseTemplate::new(200)
+        .insert_header("content-type", "text/event-stream")
+        .set_body_raw(body, "text/event-stream")
+}
+
+pub async fn mount_sse_once<M>(server: &MockServer, matcher: M, body: String)
+where
+    M: wiremock::Match + Send + Sync + 'static,
+{
+    Mock::given(method("POST"))
+        .and(path("/v1/responses"))
+        .and(matcher)
+        .respond_with(sse_response(body))
+        .mount(server)
+        .await;
+}
+
+pub async fn start_mock_server() -> MockServer {
+    MockServer::builder()
+        .body_print_limit(BodyPrintLimit::Limited(80_000))
+        .start()
+        .await
+}
--- a/codex-rs/core/tests/suite/cli_stream.rs
+++ b/codex-rs/core/tests/suite/cli_stream.rs
@@ -1,7 +1,7 @@
 use assert_cmd::Command as AssertCommand;
 use codex_core::RolloutRecorder;
 use codex_core::protocol::GitInfo;
-use codex_core::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR;
+use core_test_support::non_sandbox_test;
 use std::time::Duration;
 use std::time::Instant;
 use tempfile::TempDir;
@@ -21,12 +21,7 @@ use wiremock::matchers::path;
 /// 4. Ensures the response is received exactly once and contains "hi"
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn chat_mode_stream_cli() {
-    if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
-        println!(
-            "Skipping test because it cannot execute when network is disabled in a Codex sandbox."
-        );
-        return;
-    }
+    non_sandbox_test!();

    let server = MockServer::start().await;
    let sse = concat!(
@@ -102,12 +97,7 @@ async fn chat_mode_stream_cli() {
 /// received by a mock OpenAI Responses endpoint.
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn exec_cli_applies_experimental_instructions_file() {
-    if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
-        println!(
-            "Skipping test because it cannot execute when network is disabled in a Codex sandbox."
-        );
-        return;
-    }
+    non_sandbox_test!();

    // Start mock server which will capture the request and return a minimal
    // SSE stream for a single turn.
@@ -195,12 +185,7 @@ async fn exec_cli_applies_experimental_instructions_file() {
 /// 4. Ensures the fixture content is correctly streamed through the CLI
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn responses_api_stream_cli() {
-    if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
-        println!(
-            "Skipping test because it cannot execute when network is disabled in a Codex sandbox."
-        );
-        return;
-    }
+    non_sandbox_test!();

    let fixture =
        std::path::Path::new(env!("CARGO_MANIFEST_DIR")).join("tests/cli_responses_fixture.sse");
@@ -232,12 +217,7 @@ async fn responses_api_stream_cli() {
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn integration_creates_and_checks_session_file() {
    // Honor sandbox network restrictions for CI parity with the other tests.
-    if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
-        println!(
-            "Skipping test because it cannot execute when network is disabled in a Codex sandbox."
-        );
-        return;
-    }
+    non_sandbox_test!();

    // 1. Temp home so we read/write isolated session files.
    let home = TempDir::new().unwrap();
--- a/codex-rs/core/tests/suite/client.rs
+++ b/codex-rs/core/tests/suite/client.rs
@@ -16,12 +16,13 @@ use codex_core::built_in_model_providers;
 use codex_core::protocol::EventMsg;
 use codex_core::protocol::InputItem;
 use codex_core::protocol::Op;
-use codex_core::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR;
 use codex_protocol::mcp_protocol::ConversationId;
 use codex_protocol::models::ReasoningItemReasoningSummary;
 use codex_protocol::models::WebSearchAction;
 use core_test_support::load_default_config_for_test;
 use core_test_support::load_sse_fixture_with_id;
+use core_test_support::non_sandbox_test;
+use core_test_support::responses;
 use core_test_support::wait_for_event;
 use futures::StreamExt;
 use serde_json::json;
@@ -125,12 +126,7 @@ fn write_auth_json(

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn resume_includes_initial_messages_and_sends_prior_items() {
-    if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
-        println!(
-            "Skipping test because it cannot execute when network is disabled in a Codex sandbox."
-        );
-        return;
-    }
+    non_sandbox_test!();

    // Create a fake rollout session file with prior user + system + assistant messages.
    let tmpdir = TempDir::new().unwrap();
@@ -296,12 +292,7 @@ async fn resume_includes_initial_messages_and_sends_prior_items() {

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn includes_conversation_id_and_model_headers_in_request() {
-    if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
-        println!(
-            "Skipping test because it cannot execute when network is disabled in a Codex sandbox."
-        );
-        return;
-    }
+    non_sandbox_test!();

    // Mock server
    let server = MockServer::start().await;
@@ -426,12 +417,7 @@ async fn includes_base_instructions_override_in_request() {

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn chatgpt_auth_sends_correct_request() {
-    if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
-        println!(
-            "Skipping test because it cannot execute when network is disabled in a Codex sandbox."
-        );
-        return;
-    }
+    non_sandbox_test!();

    // Mock server
    let server = MockServer::start().await;
@@ -505,12 +491,7 @@ async fn chatgpt_auth_sends_correct_request() {

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn prefers_apikey_when_config_prefers_apikey_even_with_chatgpt_tokens() {
-    if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
-        println!(
-            "Skipping test because it cannot execute when network is disabled in a Codex sandbox."
-        );
-        return;
-    }
+    non_sandbox_test!();

    // Mock server
    let server = MockServer::start().await;
@@ -637,12 +618,7 @@ async fn includes_user_instructions_message_in_request() {

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn azure_responses_request_includes_store_and_reasoning_ids() {
-    if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
-        println!(
-            "Skipping test because it cannot execute when network is disabled in a Codex sandbox."
-        );
-        return;
-    }
+    non_sandbox_test!();

    let server = MockServer::start().await;

@@ -776,6 +752,100 @@ async fn azure_responses_request_includes_store_and_reasoning_ids() {
    assert_eq!(body["input"][5]["id"].as_str(), Some("custom-tool-id"));
 }

+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn token_count_includes_rate_limits_snapshot() {
+    let server = MockServer::start().await;
+
+    let sse_body = responses::sse(vec![responses::ev_completed_with_tokens("resp_rate", 123)]);
+
+    let response = ResponseTemplate::new(200)
+        .insert_header("content-type", "text/event-stream")
+        .insert_header("x-codex-primary-used-percent", "12.5")
+        .insert_header("x-codex-secondary-used-percent", "40.0")
+        .insert_header("x-codex-primary-over-secondary-limit-percent", "75.0")
+        .insert_header("x-codex-primary-window-minutes", "10")
+        .insert_header("x-codex-secondary-window-minutes", "60")
+        .set_body_raw(sse_body, "text/event-stream");
+
+    Mock::given(method("POST"))
+        .and(path("/v1/responses"))
+        .respond_with(response)
+        .expect(1)
+        .mount(&server)
+        .await;
+
+    let mut provider = built_in_model_providers()["openai"].clone();
+    provider.base_url = Some(format!("{}/v1", server.uri()));
+
+    let home = TempDir::new().unwrap();
+    let mut config = load_default_config_for_test(&home);
+    config.model_provider = provider;
+
+    let conversation_manager = ConversationManager::with_auth(CodexAuth::from_api_key("test"));
+    let codex = conversation_manager
+        .new_conversation(config)
+        .await
+        .expect("create conversation")
+        .conversation;
+
+    codex
+        .submit(Op::UserInput {
+            items: vec![InputItem::Text {
+                text: "hello".into(),
+            }],
+        })
+        .await
+        .unwrap();
+
+    let token_event = wait_for_event(&codex, |msg| matches!(msg, EventMsg::TokenCount(_))).await;
+    let final_payload = match token_event {
+        EventMsg::TokenCount(ev) => ev,
+        _ => unreachable!(),
+    };
+    // Assert full JSON for the final token count event (usage + rate limits)
+    let final_json = serde_json::to_value(&final_payload).unwrap();
+    pretty_assertions::assert_eq!(
+        final_json,
+        json!({
+            "info": {
+                "total_token_usage": {
+                    "input_tokens": 123,
+                    "cached_input_tokens": 0,
+                    "output_tokens": 0,
+                    "reasoning_output_tokens": 0,
+                    "total_tokens": 123
+                },
+                "last_token_usage": {
+                    "input_tokens": 123,
+                    "cached_input_tokens": 0,
+                    "output_tokens": 0,
+                    "reasoning_output_tokens": 0,
+                    "total_tokens": 123
+                },
+                // Default model is gpt-5 in tests → 272000 context window
+                "model_context_window": 272000
+            },
+            "rate_limits": {
+                "primary_used_percent": 12.5,
+                "secondary_used_percent": 40.0,
+                "primary_to_secondary_ratio_percent": 75.0,
+                "primary_window_minutes": 10,
+                "secondary_window_minutes": 60
+            }
+        })
+    );
+    let usage = final_payload
+        .info
+        .expect("token usage info should be recorded after completion");
+    assert_eq!(usage.total_token_usage.total_tokens, 123);
+    let final_snapshot = final_payload
+        .rate_limits
+        .expect("latest rate limit snapshot should be retained");
+    assert_eq!(final_snapshot.primary_used_percent, 12.5);
+
+    wait_for_event(&codex, |msg| matches!(msg, EventMsg::TaskComplete(_))).await;
+}
+
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn azure_overrides_assign_properties_used_for_responses_url() {
    let existing_env_var_with_random_value = if cfg!(windows) { "USERNAME" } else { "USER" };
@@ -941,12 +1011,7 @@ fn create_dummy_codex_auth() -> CodexAuth {
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn history_dedupes_streamed_and_final_messages_across_turns() {
    // Skip under Codex sandbox network restrictions (mirrors other tests).
-    if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
-        println!(
-            "Skipping test because it cannot execute when network is disabled in a Codex sandbox."
-        );
-        return;
-    }
+    non_sandbox_test!();

    // Mock server that will receive three sequential requests and return the same SSE stream
    // each time: a few deltas, then a final assistant message, then completed.
--- a/codex-rs/core/tests/suite/compact.rs
+++ b/codex-rs/core/tests/suite/compact.rs
@@ -1,5 +1,3 @@
-#![expect(clippy::unwrap_used)]
-
 use codex_core::CodexAuth;
 use codex_core::ConversationManager;
 use codex_core::ModelProviderInfo;
@@ -11,121 +9,32 @@ use codex_core::protocol::InputItem;
 use codex_core::protocol::Op;
 use codex_core::protocol::RolloutItem;
 use codex_core::protocol::RolloutLine;
-use codex_core::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR;
 use core_test_support::load_default_config_for_test;
 use core_test_support::wait_for_event;
-use serde_json::Value;
 use tempfile::TempDir;
-use wiremock::BodyPrintLimit;
 use wiremock::Mock;
-use wiremock::MockServer;
 use wiremock::Request;
 use wiremock::Respond;
 use wiremock::ResponseTemplate;
 use wiremock::matchers::method;
 use wiremock::matchers::path;

+use core_test_support::non_sandbox_test;
+use core_test_support::responses::ev_assistant_message;
+use core_test_support::responses::ev_completed;
+use core_test_support::responses::ev_completed_with_tokens;
+use core_test_support::responses::ev_function_call;
+use core_test_support::responses::mount_sse_once;
+use core_test_support::responses::sse;
+use core_test_support::responses::sse_response;
+use core_test_support::responses::start_mock_server;
 use pretty_assertions::assert_eq;
 use std::sync::Arc;
 use std::sync::Mutex;
 use std::sync::atomic::AtomicUsize;
 use std::sync::atomic::Ordering;
-
 // --- Test helpers -----------------------------------------------------------

-/// Build an SSE stream body from a list of JSON events.
-pub(super) fn sse(events: Vec<Value>) -> String {
-    use std::fmt::Write as _;
-    let mut out = String::new();
-    for ev in events {
-        let kind = ev.get("type").and_then(|v| v.as_str()).unwrap();
-        writeln!(&mut out, "event: {kind}").unwrap();
-        if !ev.as_object().map(|o| o.len() == 1).unwrap_or(false) {
-            write!(&mut out, "data: {ev}\n\n").unwrap();
-        } else {
-            out.push('\n');
-        }
-    }
-    out
-}
-
-/// Convenience: SSE event for a completed response with a specific id.
-pub(super) fn ev_completed(id: &str) -> Value {
-    serde_json::json!({
-        "type": "response.completed",
-        "response": {
-            "id": id,
-            "usage": {"input_tokens":0,"input_tokens_details":null,"output_tokens":0,"output_tokens_details":null,"total_tokens":0}
-        }
-    })
-}
-
-fn ev_completed_with_tokens(id: &str, total_tokens: u64) -> Value {
-    serde_json::json!({
-        "type": "response.completed",
-        "response": {
-            "id": id,
-            "usage": {
-                "input_tokens": total_tokens,
-                "input_tokens_details": null,
-                "output_tokens": 0,
-                "output_tokens_details": null,
-                "total_tokens": total_tokens
-            }
-        }
-    })
-}
-
-/// Convenience: SSE event for a single assistant message output item.
-pub(super) fn ev_assistant_message(id: &str, text: &str) -> Value {
-    serde_json::json!({
-        "type": "response.output_item.done",
-        "item": {
-            "type": "message",
-            "role": "assistant",
-            "id": id,
-            "content": [{"type": "output_text", "text": text}]
-        }
-    })
-}
-
-fn ev_function_call(call_id: &str, name: &str, arguments: &str) -> Value {
-    serde_json::json!({
-        "type": "response.output_item.done",
-        "item": {
-            "type": "function_call",
-            "call_id": call_id,
-            "name": name,
-            "arguments": arguments
-        }
-    })
-}
-
-pub(super) fn sse_response(body: String) -> ResponseTemplate {
-    ResponseTemplate::new(200)
-        .insert_header("content-type", "text/event-stream")
-        .set_body_raw(body, "text/event-stream")
-}
-
-pub(super) async fn mount_sse_once<M>(server: &MockServer, matcher: M, body: String)
-where
-    M: wiremock::Match + Send + Sync + 'static,
-{
-    Mock::given(method("POST"))
-        .and(path("/v1/responses"))
-        .and(matcher)
-        .respond_with(sse_response(body))
-        .mount(server)
-        .await;
-}
-
-async fn start_mock_server() -> MockServer {
-    MockServer::builder()
-        .body_print_limit(BodyPrintLimit::Limited(80_000))
-        .start()
-        .await
-}
-
 pub(super) const FIRST_REPLY: &str = "FIRST_REPLY";
 pub(super) const SUMMARY_TEXT: &str = "SUMMARY_ONLY_CONTEXT";
 pub(super) const SUMMARIZE_TRIGGER: &str = "Start Summarization";
@@ -144,12 +53,7 @@ const DUMMY_CALL_ID: &str = "call-multi-auto";

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn summarize_context_three_requests_and_instructions() {
-    if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
-        println!(
-            "Skipping test because it cannot execute when network is disabled in a Codex sandbox."
-        );
-        return;
-    }
+    non_sandbox_test!();

    // Set up a mock server that we can inspect after the run.
    let server = start_mock_server().await;
@@ -370,12 +274,7 @@ async fn summarize_context_three_requests_and_instructions() {
 #[cfg_attr(windows, tokio::test(flavor = "multi_thread", worker_threads = 4))]
 #[cfg_attr(not(windows), tokio::test(flavor = "multi_thread", worker_threads = 2))]
 async fn auto_compact_runs_after_token_limit_hit() {
-    if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
-        println!(
-            "Skipping test because it cannot execute when network is disabled in a Codex sandbox."
-        );
-        return;
-    }
+    non_sandbox_test!();

    let server = start_mock_server().await;

@@ -511,12 +410,7 @@ async fn auto_compact_runs_after_token_limit_hit() {

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn auto_compact_persists_rollout_entries() {
-    if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
-        println!(
-            "Skipping test because it cannot execute when network is disabled in a Codex sandbox."
-        );
-        return;
-    }
+    non_sandbox_test!();

    let server = start_mock_server().await;

@@ -644,12 +538,7 @@ async fn auto_compact_persists_rollout_entries() {

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn auto_compact_stops_after_failed_attempt() {
-    if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
-        println!(
-            "Skipping test because it cannot execute when network is disabled in a Codex sandbox."
-        );
-        return;
-    }
+    non_sandbox_test!();

    let server = start_mock_server().await;

@@ -758,12 +647,7 @@ async fn auto_compact_stops_after_failed_attempt() {

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn auto_compact_allows_multiple_attempts_when_interleaved_with_other_turn_events() {
-    if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
-        println!(
-            "Skipping test because it cannot execute when network is disabled in a Codex sandbox."
-        );
-        return;
-    }
+    non_sandbox_test!();

    let server = start_mock_server().await;

@@ -861,8 +745,18 @@ async fn auto_compact_allows_multiple_attempts_when_interleaved_with_other_turn_
        .await
        .unwrap();

+    let mut auto_compact_lifecycle_events = Vec::new();
    loop {
        let event = codex.next_event().await.unwrap();
+        if event.id.starts_with("auto-compact-")
+            && matches!(
+                event.msg,
+                EventMsg::TaskStarted(_) | EventMsg::TaskComplete(_)
+            )
+        {
+            auto_compact_lifecycle_events.push(event);
+            continue;
+        }
        if let EventMsg::TaskComplete(_) = &event.msg
            && !event.id.starts_with("auto-compact-")
        {
@@ -870,6 +764,11 @@ async fn auto_compact_allows_multiple_attempts_when_interleaved_with_other_turn_
        }
    }

+    assert!(
+        auto_compact_lifecycle_events.is_empty(),
+        "auto compact should not emit task lifecycle events"
+    );
+
    let request_bodies: Vec<String> = responder
        .recorded_requests()
        .into_iter()
@@ -897,3 +796,133 @@ async fn auto_compact_allows_multiple_attempts_when_interleaved_with_other_turn_
        "second auto compact request should reuse summarization instructions"
    );
 }
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn compact_trims_history_on_context_limit_error() {
+    non_sandbox_test!();
+
+    let server = start_mock_server().await;
+
+    // Minimal completes for two initial user turns.
+    let sse_user_done = sse(vec![ev_completed("r-user")]);
+
+    // First compact attempt fails with context length exceeded.
+    let sse_compact_fail = sse(vec![serde_json::json!({
+        "type": "response.failed",
+        "response": { "error": { "code": "context_length_exceeded", "message": "too big" } }
+    })]);
+
+    // Second compact attempt succeeds.
+    let sse_compact_ok = sse(vec![
+        ev_assistant_message("m-sum", SUMMARY_TEXT),
+        ev_completed("r-sum"),
+    ]);
+
+    // Matchers for the two user turns and two compact attempts.
+    let m_user1 = |req: &Request| {
+        let body = std::str::from_utf8(&req.body).unwrap_or("");
+        body.contains("\"text\":\"u1\"") && !body.contains(SUMMARIZE_TRIGGER)
+    };
+    mount_sse_once(&server, m_user1, sse_user_done.clone()).await;
+
+    let m_user2 = |req: &Request| {
+        let body = std::str::from_utf8(&req.body).unwrap_or("");
+        body.contains("\"text\":\"u2\"") && !body.contains(SUMMARIZE_TRIGGER)
+    };
+    mount_sse_once(&server, m_user2, sse_user_done.clone()).await;
+
+    // First compact attempt: includes the trigger and will fail.
+    let m_compact1 = |req: &Request| {
+        let body = std::str::from_utf8(&req.body).unwrap_or("");
+        body.contains(SUMMARIZE_TRIGGER)
+    };
+    mount_sse_once(&server, m_compact1, sse_compact_fail).await;
+
+    // Second compact attempt: also includes trigger; succeeds.
+    let m_compact2 = |req: &Request| {
+        let body = std::str::from_utf8(&req.body).unwrap_or("");
+        body.contains(SUMMARIZE_TRIGGER)
+    };
+    mount_sse_once(&server, m_compact2, sse_compact_ok).await;
+
+    // Build conversation
+    let model_provider = ModelProviderInfo {
+        base_url: Some(format!("{}/v1", server.uri())),
+        ..built_in_model_providers()["openai"].clone()
+    };
+    let home = TempDir::new().unwrap();
+    let mut config = load_default_config_for_test(&home);
+    config.model_provider = model_provider;
+    let conversation_manager = ConversationManager::with_auth(CodexAuth::from_api_key("dummy"));
+    let codex = conversation_manager
+        .new_conversation(config)
+        .await
+        .unwrap()
+        .conversation;
+
+    // Two user turns to seed history.
+    codex
+        .submit(Op::UserInput {
+            items: vec![InputItem::Text { text: "u1".into() }],
+        })
+        .await
+        .unwrap();
+    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
+
+    codex
+        .submit(Op::UserInput {
+            items: vec![InputItem::Text { text: "u2".into() }],
+        })
+        .await
+        .unwrap();
+    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
+
+    // Request compaction: first attempt fails with context_length_exceeded; retry trims history and succeeds.
+    codex.submit(Op::Compact).await.unwrap();
+    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
+
+    // Inspect requests to verify that there were two compaction attempts and the
+    // second had a smaller input array than the first.
+    let requests = server.received_requests().await.unwrap();
+    let mut compact_bodies: Vec<serde_json::Value> = Vec::new();
+    for req in &requests {
+        let body = req.body_json::<serde_json::Value>().unwrap();
+        let is_compact = body
+            .get("input")
+            .and_then(|v| v.as_array())
+            .map(|arr| {
+                arr.iter().any(|it| {
+                    it.get("type").and_then(|t| t.as_str()) == Some("message")
+                        && it
+                            .get("content")
+                            .and_then(|c| c.as_array())
+                            .and_then(|a| a.first())
+                            .and_then(|t| t.get("text"))
+                            .and_then(|t| t.as_str())
+                            .map(|t| t.contains(SUMMARIZE_TRIGGER))
+                            .unwrap_or(false)
+                })
+            })
+            .unwrap_or(false);
+        if is_compact {
+            compact_bodies.push(body);
+        }
+    }
+    assert!(
+        compact_bodies.len() >= 2,
+        "expected at least two compact attempts (fail then success)"
+    );
+    let n = compact_bodies.len();
+    let len1 = compact_bodies[n - 2]["input"]
+        .as_array()
+        .map(Vec::len)
+        .unwrap_or(0);
+    let len2 = compact_bodies[n - 1]["input"]
+        .as_array()
+        .map(Vec::len)
+        .unwrap_or(0);
+    assert_eq!(
+        len1,
+        len2 + 1,
+        "second compact attempt should trim exactly one item: {len1} -> {len2}"
+    );
+}
--- a/codex-rs/core/tests/suite/compact_resume_fork.rs
+++ b/codex-rs/core/tests/suite/compact_resume_fork.rs
@@ -10,10 +10,6 @@
 use super::compact::FIRST_REPLY;
 use super::compact::SUMMARIZE_TRIGGER;
 use super::compact::SUMMARY_TEXT;
-use super::compact::ev_assistant_message;
-use super::compact::ev_completed;
-use super::compact::mount_sse_once;
-use super::compact::sse;
 use codex_core::CodexAuth;
 use codex_core::CodexConversation;
 use codex_core::ConversationManager;
@@ -27,6 +23,10 @@ use codex_core::protocol::InputItem;
 use codex_core::protocol::Op;
 use codex_core::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR;
 use core_test_support::load_default_config_for_test;
+use core_test_support::responses::ev_assistant_message;
+use core_test_support::responses::ev_completed;
+use core_test_support::responses::mount_sse_once;
+use core_test_support::responses::sse;
 use core_test_support::wait_for_event;
 use pretty_assertions::assert_eq;
 use serde_json::Value;
@@ -74,7 +74,7 @@ async fn compact_resume_and_fork_preserve_model_history_view() {
        "compact+resume test expects resumed path {resumed_path:?} to exist",
    );

-    let forked = fork_conversation(&manager, &config, resumed_path, 1).await;
+    let forked = fork_conversation(&manager, &config, resumed_path, 2).await;
    user_turn(&forked, "AFTER_FORK").await;

    // 3. Capture the requests to the model and validate the history slices.
@@ -100,17 +100,15 @@ async fn compact_resume_and_fork_preserve_model_history_view() {
        "after-resume input should have at least as many items as after-compact",
    );
    assert_eq!(compact_arr.as_slice(), &resume_arr[..compact_arr.len()]);
-    eprint!(
-        "len of compact: {}, len of fork: {}",
-        compact_arr.len(),
-        fork_arr.len()
-    );
-    eprintln!("input_after_fork:{}", json!(input_after_fork));
+
    assert!(
        compact_arr.len() <= fork_arr.len(),
        "after-fork input should have at least as many items as after-compact",
    );
-    assert_eq!(compact_arr.as_slice(), &fork_arr[..compact_arr.len()]);
+    assert_eq!(
+        &compact_arr.as_slice()[..compact_arr.len()],
+        &fork_arr[..compact_arr.len()]
+    );

    let prompt = requests[0]["instructions"]
        .as_str()
@@ -824,14 +822,15 @@ async fn resume_conversation(
    conversation
 }

+#[cfg(test)]
 async fn fork_conversation(
    manager: &ConversationManager,
    config: &Config,
    path: std::path::PathBuf,
-    back_steps: usize,
+    nth_user_message: usize,
 ) -> Arc<CodexConversation> {
    let NewConversation { conversation, .. } = manager
-        .fork_conversation(back_steps, config.clone(), path)
+        .fork_conversation(nth_user_message, config.clone(), path)
        .await
        .expect("fork conversation");
    conversation
--- a/codex-rs/core/tests/suite/exec.rs
+++ b/codex-rs/core/tests/suite/exec.rs
@@ -1,6 +1,7 @@
 #![cfg(target_os = "macos")]

 use std::collections::HashMap;
+use std::string::ToString;

 use codex_core::exec::ExecParams;
 use codex_core::exec::ExecToolCallOutput;
@@ -29,7 +30,7 @@ async fn run_test_cmd(tmp: TempDir, cmd: Vec<&str>) -> Result<ExecToolCallOutput
    assert_eq!(sandbox_type, SandboxType::MacosSeatbelt);

    let params = ExecParams {
-        command: cmd.iter().map(|s| s.to_string()).collect(),
+        command: cmd.iter().map(ToString::to_string).collect(),
        cwd: tmp.path().to_path_buf(),
        timeout_ms: Some(1000),
        env: HashMap::new(),
@@ -39,7 +40,7 @@ async fn run_test_cmd(tmp: TempDir, cmd: Vec<&str>) -> Result<ExecToolCallOutput

    let policy = SandboxPolicy::new_read_only_policy();

-    process_exec_tool_call(params, sandbox_type, &policy, &None, None).await
+    process_exec_tool_call(params, sandbox_type, &policy, tmp.path(), &None, None).await
 }

 /// Command succeeds with exit code 0 normally
--- a/codex-rs/core/tests/suite/exec_stream_events.rs
+++ b/codex-rs/core/tests/suite/exec_stream_events.rs
@@ -49,9 +49,10 @@ async fn test_exec_stdout_stream_events_echo() {
        "printf 'hello-world\n'".to_string(),
    ];

+    let cwd = std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."));
    let params = ExecParams {
        command: cmd,
-        cwd: std::env::current_dir().unwrap_or_else(|_| PathBuf::from(".")),
+        cwd: cwd.clone(),
        timeout_ms: Some(5_000),
        env: HashMap::new(),
        with_escalated_permissions: None,
@@ -64,6 +65,7 @@ async fn test_exec_stdout_stream_events_echo() {
        params,
        SandboxType::None,
        &policy,
+        cwd.as_path(),
        &None,
        Some(stdout_stream),
    )
@@ -99,9 +101,10 @@ async fn test_exec_stderr_stream_events_echo() {
        "printf 'oops\n' 1>&2".to_string(),
    ];

+    let cwd = std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."));
    let params = ExecParams {
        command: cmd,
-        cwd: std::env::current_dir().unwrap_or_else(|_| PathBuf::from(".")),
+        cwd: cwd.clone(),
        timeout_ms: Some(5_000),
        env: HashMap::new(),
        with_escalated_permissions: None,
@@ -114,6 +117,7 @@ async fn test_exec_stderr_stream_events_echo() {
        params,
        SandboxType::None,
        &policy,
+        cwd.as_path(),
        &None,
        Some(stdout_stream),
    )
@@ -152,9 +156,10 @@ async fn test_aggregated_output_interleaves_in_order() {
        "printf 'O1\\n'; sleep 0.01; printf 'E1\\n' 1>&2; sleep 0.01; printf 'O2\\n'; sleep 0.01; printf 'E2\\n' 1>&2".to_string(),
    ];

+    let cwd = std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."));
    let params = ExecParams {
        command: cmd,
-        cwd: std::env::current_dir().unwrap_or_else(|_| PathBuf::from(".")),
+        cwd: cwd.clone(),
        timeout_ms: Some(5_000),
        env: HashMap::new(),
        with_escalated_permissions: None,
@@ -163,9 +168,16 @@ async fn test_aggregated_output_interleaves_in_order() {

    let policy = SandboxPolicy::new_read_only_policy();

-    let result = process_exec_tool_call(params, SandboxType::None, &policy, &None, None)
-        .await
-        .expect("process_exec_tool_call");
+    let result = process_exec_tool_call(
+        params,
+        SandboxType::None,
+        &policy,
+        cwd.as_path(),
+        &None,
+        None,
+    )
+    .await
+    .expect("process_exec_tool_call");

    assert_eq!(result.exit_code, 0);
    assert_eq!(result.stdout.text, "O1\nO2\n");
@@ -182,9 +194,10 @@ async fn test_exec_timeout_returns_partial_output() {
        "printf 'before\\n'; sleep 2; printf 'after\\n'".to_string(),
    ];

+    let cwd = std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."));
    let params = ExecParams {
        command: cmd,
-        cwd: std::env::current_dir().unwrap_or_else(|_| PathBuf::from(".")),
+        cwd: cwd.clone(),
        timeout_ms: Some(200),
        env: HashMap::new(),
        with_escalated_permissions: None,
@@ -193,7 +206,15 @@ async fn test_exec_timeout_returns_partial_output() {

    let policy = SandboxPolicy::new_read_only_policy();

-    let result = process_exec_tool_call(params, SandboxType::None, &policy, &None, None).await;
+    let result = process_exec_tool_call(
+        params,
+        SandboxType::None,
+        &policy,
+        cwd.as_path(),
+        &None,
+        None,
+    )
+    .await;

    let Err(CodexErr::Sandbox(SandboxErr::Timeout { output })) = result else {
        panic!("expected timeout error");
--- a/codex-rs/core/tests/suite/fork_conversation.rs
+++ b/codex-rs/core/tests/suite/fork_conversation.rs
@@ -5,6 +5,8 @@ use codex_core::ModelProviderInfo;
 use codex_core::NewConversation;
 use codex_core::ResponseItem;
 use codex_core::built_in_model_providers;
+use codex_core::content_items_to_text;
+use codex_core::is_session_prefix_message;
 use codex_core::protocol::ConversationPathResponseEvent;
 use codex_core::protocol::EventMsg;
 use codex_core::protocol::InputItem;
@@ -104,13 +106,16 @@ async fn fork_conversation_twice_drops_to_first_message() {
        items
    };

-    // Compute expected prefixes after each fork by truncating base rollout at nth-from-last user input.
+    // Compute expected prefixes after each fork by truncating base rollout
+    // strictly before the nth user input (0-based).
    let base_items = read_items(&base_path);
    let find_user_input_positions = |items: &[RolloutItem]| -> Vec<usize> {
        let mut pos = Vec::new();
        for (i, it) in items.iter().enumerate() {
            if let RolloutItem::ResponseItem(ResponseItem::Message { role, content, .. }) = it
                && role == "user"
+                && content_items_to_text(content)
+                    .is_some_and(|text| !is_session_prefix_message(&text))
            {
                // Consider any user message as an input boundary; recorder stores both EventMsg and ResponseItem.
                // We specifically look for input items, which are represented as ContentItem::InputText.
@@ -126,11 +131,8 @@ async fn fork_conversation_twice_drops_to_first_message() {
    };
    let user_inputs = find_user_input_positions(&base_items);

-    // After dropping last user input (n=1), cut strictly before that input if present, else empty.
-    let cut1 = user_inputs
-        .get(user_inputs.len().saturating_sub(1))
-        .copied()
-        .unwrap_or(0);
+    // After cutting at nth user input (n=1 → second user message), cut strictly before that input.
+    let cut1 = user_inputs.get(1).copied().unwrap_or(0);
    let expected_after_first: Vec<RolloutItem> = base_items[..cut1].to_vec();

    // After dropping again (n=1 on fork1), compute expected relative to fork1's rollout.
@@ -161,12 +163,12 @@ async fn fork_conversation_twice_drops_to_first_message() {
        serde_json::to_value(&expected_after_first).unwrap()
    );

-    // Fork again with n=1 → drops the (new) last user message, leaving only the first.
+    // Fork again with n=0 → drops the (new) last user message, leaving only the first.
    let NewConversation {
        conversation: codex_fork2,
        ..
    } = conversation_manager
-        .fork_conversation(1, config_for_fork.clone(), fork1_path.clone())
+        .fork_conversation(0, config_for_fork.clone(), fork1_path.clone())
        .await
        .expect("fork 2");

--- a/codex-rs/core/tests/suite/review.rs
+++ b/codex-rs/core/tests/suite/review.rs
@@ -20,9 +20,9 @@ use codex_core::protocol::ReviewOutputEvent;
 use codex_core::protocol::ReviewRequest;
 use codex_core::protocol::RolloutItem;
 use codex_core::protocol::RolloutLine;
-use codex_core::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR;
 use core_test_support::load_default_config_for_test;
 use core_test_support::load_sse_fixture_with_id_from_str;
+use core_test_support::non_sandbox_test;
 use core_test_support::wait_for_event;
 use pretty_assertions::assert_eq;
 use std::path::PathBuf;
@@ -42,12 +42,7 @@ use wiremock::matchers::path;
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn review_op_emits_lifecycle_and_review_output() {
    // Skip under Codex sandbox network restrictions.
-    if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
-        println!(
-            "Skipping test because it cannot execute when network is disabled in a Codex sandbox."
-        );
-        return;
-    }
+    non_sandbox_test!();

    // Start mock Responses API server. Return a single assistant message whose
    // text is a JSON-encoded ReviewOutputEvent.
@@ -172,12 +167,7 @@ async fn review_op_emits_lifecycle_and_review_output() {
 #[cfg_attr(windows, tokio::test(flavor = "multi_thread", worker_threads = 4))]
 #[cfg_attr(not(windows), tokio::test(flavor = "multi_thread", worker_threads = 2))]
 async fn review_op_with_plain_text_emits_review_fallback() {
-    if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
-        println!(
-            "Skipping test because it cannot execute when network is disabled in a Codex sandbox."
-        );
-        return;
-    }
+    non_sandbox_test!();

    let sse_raw = r#"[
        {"type":"response.output_item.done", "item":{
@@ -226,12 +216,7 @@ async fn review_op_with_plain_text_emits_review_fallback() {
 #[cfg_attr(windows, tokio::test(flavor = "multi_thread", worker_threads = 4))]
 #[cfg_attr(not(windows), tokio::test(flavor = "multi_thread", worker_threads = 2))]
 async fn review_does_not_emit_agent_message_on_structured_output() {
-    if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
-        println!(
-            "Skipping test because it cannot execute when network is disabled in a Codex sandbox."
-        );
-        return;
-    }
+    non_sandbox_test!();

    let review_json = serde_json::json!({
        "findings": [
@@ -303,12 +288,7 @@ async fn review_does_not_emit_agent_message_on_structured_output() {
 /// request uses that model (and not the main chat model).
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn review_uses_custom_review_model_from_config() {
-    if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
-        println!(
-            "Skipping test because it cannot execute when network is disabled in a Codex sandbox."
-        );
-        return;
-    }
+    non_sandbox_test!();

    // Minimal stream: just a completed event
    let sse_raw = r#"[
@@ -361,12 +341,7 @@ async fn review_uses_custom_review_model_from_config() {
 #[cfg_attr(windows, tokio::test(flavor = "multi_thread", worker_threads = 4))]
 #[cfg_attr(not(windows), tokio::test(flavor = "multi_thread", worker_threads = 2))]
 async fn review_input_isolated_from_parent_history() {
-    if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
-        println!(
-            "Skipping test because it cannot execute when network is disabled in a Codex sandbox."
-        );
-        return;
-    }
+    non_sandbox_test!();

    // Mock server for the single review request
    let sse_raw = r#"[
@@ -542,12 +517,7 @@ async fn review_input_isolated_from_parent_history() {
 /// messages in its request `input`.
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn review_history_does_not_leak_into_parent_session() {
-    if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
-        println!(
-            "Skipping test because it cannot execute when network is disabled in a Codex sandbox."
-        );
-        return;
-    }
+    non_sandbox_test!();

    // Respond to both the review request and the subsequent parent request.
    let sse_raw = r#"[
--- a/codex-rs/core/tests/suite/seatbelt.rs
+++ b/codex-rs/core/tests/suite/seatbelt.rs
@@ -171,6 +171,8 @@ async fn python_getpwuid_works_under_seatbelt() {

    // ReadOnly is sufficient here since we are only exercising user lookup.
    let policy = SandboxPolicy::ReadOnly;
+    let command_cwd = std::env::current_dir().expect("getcwd");
+    let sandbox_cwd = command_cwd.clone();

    let mut child = spawn_command_under_seatbelt(
        vec![
@@ -179,8 +181,9 @@ async fn python_getpwuid_works_under_seatbelt() {
            // Print the passwd struct; success implies lookup worked.
            "import pwd, os; print(pwd.getpwuid(os.getuid()))".to_string(),
        ],
+        command_cwd,
        &policy,
-        std::env::current_dir().expect("should be able to get current dir"),
+        sandbox_cwd.as_path(),
        StdioPolicy::RedirectForShellTool,
        HashMap::new(),
    )
@@ -216,13 +219,16 @@ fn create_test_scenario(tmp: &TempDir) -> TestScenario {
 /// Note that `path` must be absolute.
 async fn touch(path: &Path, policy: &SandboxPolicy) -> bool {
    assert!(path.is_absolute(), "Path must be absolute: {path:?}");
+    let command_cwd = std::env::current_dir().expect("getcwd");
+    let sandbox_cwd = command_cwd.clone();
    let mut child = spawn_command_under_seatbelt(
        vec![
            "/usr/bin/touch".to_string(),
            path.to_string_lossy().to_string(),
        ],
+        command_cwd,
        policy,
-        std::env::current_dir().expect("should be able to get current dir"),
+        sandbox_cwd.as_path(),
        StdioPolicy::RedirectForShellTool,
        HashMap::new(),
    )
--- a/codex-rs/core/tests/suite/stream_error_allows_next_turn.rs
+++ b/codex-rs/core/tests/suite/stream_error_allows_next_turn.rs
@@ -7,9 +7,9 @@ use codex_core::WireApi;
 use codex_core::protocol::EventMsg;
 use codex_core::protocol::InputItem;
 use codex_core::protocol::Op;
-use codex_core::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR;
 use core_test_support::load_default_config_for_test;
 use core_test_support::load_sse_fixture_with_id;
+use core_test_support::non_sandbox_test;
 use core_test_support::wait_for_event_with_timeout;
 use tempfile::TempDir;
 use wiremock::Mock;
@@ -25,12 +25,7 @@ fn sse_completed(id: &str) -> String {

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn continue_after_stream_error() {
-    if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
-        println!(
-            "Skipping test because it cannot execute when network is disabled in a Codex sandbox."
-        );
-        return;
-    }
+    non_sandbox_test!();

    let server = MockServer::start().await;

--- a/codex-rs/core/tests/suite/stream_no_completed.rs
+++ b/codex-rs/core/tests/suite/stream_no_completed.rs
@@ -9,10 +9,10 @@ use codex_core::ModelProviderInfo;
 use codex_core::protocol::EventMsg;
 use codex_core::protocol::InputItem;
 use codex_core::protocol::Op;
-use codex_core::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR;
 use core_test_support::load_default_config_for_test;
 use core_test_support::load_sse_fixture;
 use core_test_support::load_sse_fixture_with_id;
+use core_test_support::non_sandbox_test;
 use tempfile::TempDir;
 use tokio::time::timeout;
 use wiremock::Mock;
@@ -33,12 +33,7 @@ fn sse_completed(id: &str) -> String {

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn retries_on_early_close() {
-    if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
-        println!(
-            "Skipping test because it cannot execute when network is disabled in a Codex sandbox."
-        );
-        return;
-    }
+    non_sandbox_test!();

    let server = MockServer::start().await;

--- a/codex-rs/exec/Cargo.toml
+++ b/codex-rs/exec/Cargo.toml
@@ -15,37 +15,37 @@ path = "src/lib.rs"
 workspace = true

 [dependencies]
-anyhow = "1"
-chrono = "0.4.40"
-clap = { version = "4", features = ["derive"] }
-codex-arg0 = { path = "../arg0" }
-codex-common = { path = "../common", features = [
+anyhow = { workspace = true }
+chrono = { workspace = true }
+clap = { workspace = true, features = ["derive"] }
+codex-arg0 = { workspace = true }
+codex-common = { workspace = true, features = [
    "cli",
    "elapsed",
    "sandbox_summary",
 ] }
-codex-core = { path = "../core" }
-codex-ollama = { path = "../ollama" }
-codex-protocol = { path = "../protocol" }
-owo-colors = "4.2.0"
-serde_json = "1"
-shlex = "1.3.0"
-tokio = { version = "1", features = [
+codex-core = { workspace = true }
+codex-ollama = { workspace = true }
+codex-protocol = { workspace = true }
+owo-colors = { workspace = true }
+serde_json = { workspace = true }
+shlex = { workspace = true }
+tokio = { workspace = true, features = [
    "io-std",
    "macros",
    "process",
    "rt-multi-thread",
    "signal",
 ] }
-tracing = { version = "0.1.41", features = ["log"] }
-tracing-subscriber = { version = "0.3.20", features = ["env-filter"] }
+tracing = { workspace = true, features = ["log"] }
+tracing-subscriber = { workspace = true, features = ["env-filter"] }

 [dev-dependencies]
-assert_cmd = "2"
-core_test_support = { path = "../core/tests/common" }
-libc = "0.2"
-predicates = "3"
-tempfile = "3.13.0"
-uuid = "1"
-walkdir = "2"
-wiremock = "0.6"
+assert_cmd = { workspace = true }
+core_test_support = { workspace = true }
+libc = { workspace = true }
+predicates = { workspace = true }
+tempfile = { workspace = true }
+uuid = { workspace = true }
+walkdir = { workspace = true }
+wiremock = { workspace = true }
--- a/codex-rs/exec/src/event_processor_with_human_output.rs
+++ b/codex-rs/exec/src/event_processor_with_human_output.rs
@@ -558,7 +558,11 @@ impl EventProcessor for EventProcessorWithHumanOutput {
                TurnAbortReason::Replaced => {
                    ts_println!(self, "task aborted: replaced by a new task");
                }
+                TurnAbortReason::ReviewEnded => {
+                    ts_println!(self, "task aborted: review ended");
+                }
            },
+            EventMsg::CompactApprovalRequest(_) => {}
            EventMsg::ShutdownComplete => return CodexStatus::Shutdown,
            EventMsg::ConversationPath(_) => {}
            EventMsg::UserMessage(_) => {}
@@ -570,7 +574,7 @@ impl EventProcessor for EventProcessorWithHumanOutput {
 }

 fn escape_command(command: &[String]) -> String {
-    try_join(command.iter().map(|s| s.as_str())).unwrap_or_else(|_| command.join(" "))
+    try_join(command.iter().map(String::as_str)).unwrap_or_else(|_| command.join(" "))
 }

 fn format_file_change(change: &FileChange) -> &'static str {
--- a/codex-rs/exec/tests/fixtures/sse_apply_patch_add.json
+++ b/codex-rs/exec/tests/fixtures/sse_apply_patch_add.json
@@ -1,25 +0,0 @@
-[
-  {
-    "type": "response.output_item.done",
-    "item": {
-      "type": "custom_tool_call",
-      "name": "apply_patch",
-      "input": "*** Begin Patch\n*** Add File: test.md\n+Hello world\n*** End Patch",
-      "call_id": "__ID__"
-    }
-  },
-  {
-    "type": "response.completed",
-    "response": {
-      "id": "__ID__",
-      "usage": {
-        "input_tokens": 0,
-        "input_tokens_details": null,
-        "output_tokens": 0,
-        "output_tokens_details": null,
-        "total_tokens": 0
-      },
-      "output": []
-    }
-  }
-]
--- a/codex-rs/exec/tests/fixtures/sse_apply_patch_freeform_add.json
+++ b/codex-rs/exec/tests/fixtures/sse_apply_patch_freeform_add.json
@@ -1,25 +0,0 @@
-[
-  {
-    "type": "response.output_item.done",
-    "item": {
-      "type": "custom_tool_call",
-      "name": "apply_patch",
-      "input": "*** Begin Patch\n*** Add File: app.py\n+class BaseClass:\n+  def method():\n+    return False\n*** End Patch",
-      "call_id": "__ID__"
-    }
-  },
-  {
-    "type": "response.completed",
-    "response": {
-      "id": "__ID__",
-      "usage": {
-        "input_tokens": 0,
-        "input_tokens_details": null,
-        "output_tokens": 0,
-        "output_tokens_details": null,
-        "total_tokens": 0
-      },
-      "output": []
-    }
-  }
-]
--- a/codex-rs/exec/tests/fixtures/sse_apply_patch_freeform_update.json
+++ b/codex-rs/exec/tests/fixtures/sse_apply_patch_freeform_update.json
@@ -1,25 +0,0 @@
-[
-  {
-    "type": "response.output_item.done",
-    "item": {
-      "type": "custom_tool_call",
-      "name": "apply_patch",
-      "input": "*** Begin Patch\n*** Update File: app.py\n@@  def method():\n-    return False\n+\n+    return True\n*** End Patch",
-      "call_id": "__ID__"
-    }
-  },
-  {
-    "type": "response.completed",
-    "response": {
-      "id": "__ID__",
-      "usage": {
-        "input_tokens": 0,
-        "input_tokens_details": null,
-        "output_tokens": 0,
-        "output_tokens_details": null,
-        "total_tokens": 0
-      },
-      "output": []
-    }
-  }
-]
--- a/codex-rs/exec/tests/fixtures/sse_apply_patch_update.json
+++ b/codex-rs/exec/tests/fixtures/sse_apply_patch_update.json
@@ -1,25 +0,0 @@
-[
-  {
-    "type": "response.output_item.done",
-    "item": {
-      "type": "function_call",
-      "name": "apply_patch",
-      "arguments": "{\n  \"input\": \"*** Begin Patch\\n*** Update File: test.md\\n@@\\n-Hello world\\n+Final text\\n*** End Patch\"\n}",
-      "call_id": "__ID__"
-    }
-  },
-  {
-    "type": "response.completed",
-    "response": {
-      "id": "__ID__",
-      "usage": {
-        "input_tokens": 0,
-        "input_tokens_details": null,
-        "output_tokens": 0,
-        "output_tokens_details": null,
-        "total_tokens": 0
-      },
-      "output": []
-    }
-  }
-]
--- a/codex-rs/exec/tests/fixtures/sse_response_completed.json
+++ b/codex-rs/exec/tests/fixtures/sse_response_completed.json
@@ -1,16 +0,0 @@
-[
-  {
-    "type": "response.completed",
-    "response": {
-      "id": "__ID__",
-      "usage": {
-        "input_tokens": 0,
-        "input_tokens_details": null,
-        "output_tokens": 0,
-        "output_tokens_details": null,
-        "total_tokens": 0
-      },
-      "output": []
-    }
-  }
-]
--- a/codex-rs/exec/tests/suite/apply_patch.rs
+++ b/codex-rs/exec/tests/suite/apply_patch.rs
@@ -1,8 +1,12 @@
-#![allow(clippy::expect_used, clippy::unwrap_used)]
+#![allow(clippy::expect_used, clippy::unwrap_used, unused_imports)]

 use anyhow::Context;
 use assert_cmd::prelude::*;
 use codex_core::CODEX_APPLY_PATCH_ARG1;
+use core_test_support::responses::ev_apply_patch_custom_tool_call;
+use core_test_support::responses::ev_apply_patch_function_call;
+use core_test_support::responses::ev_completed;
+use core_test_support::responses::sse;
 use std::fs;
 use std::process::Command;
 use tempfile::tempdir;
@@ -44,26 +48,34 @@ fn test_standalone_exec_cli_can_use_apply_patch() -> anyhow::Result<()> {
 #[tokio::test(flavor = "multi_thread", worker_threads = 4)]
 async fn test_apply_patch_tool() -> anyhow::Result<()> {
    use crate::suite::common::run_e2e_exec_test;
-    use codex_core::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR;
+    use core_test_support::non_sandbox_test;

-    if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
-        println!(
-            "Skipping test because it cannot execute when network is disabled in a Codex sandbox."
-        );
-        return Ok(());
-    }
+    non_sandbox_test!(result);

    let tmp_cwd = tempdir().expect("failed to create temp dir");
    let tmp_path = tmp_cwd.path().to_path_buf();
-    run_e2e_exec_test(
-        tmp_cwd.path(),
-        vec![
-            include_str!("../fixtures/sse_apply_patch_add.json").to_string(),
-            include_str!("../fixtures/sse_apply_patch_update.json").to_string(),
-            include_str!("../fixtures/sse_response_completed.json").to_string(),
-        ],
-    )
-    .await;
+    let add_patch = r#"*** Begin Patch
+*** Add File: test.md
+Hello world
+*** End Patch"#;
+    let update_patch = r#"*** Begin Patch
+*** Update File: test.md
+@@
+-Hello world
+Final text
+*** End Patch"#;
+    let response_streams = vec![
+        sse(vec![
+            ev_apply_patch_custom_tool_call("request_0", add_patch),
+            ev_completed("request_0"),
+        ]),
+        sse(vec![
+            ev_apply_patch_function_call("request_1", update_patch),
+            ev_completed("request_1"),
+        ]),
+        sse(vec![ev_completed("request_2")]),
+    ];
+    run_e2e_exec_test(tmp_cwd.path(), response_streams).await;

    let final_path = tmp_path.join("test.md");
    let contents = std::fs::read_to_string(&final_path)
@@ -76,25 +88,36 @@ async fn test_apply_patch_tool() -> anyhow::Result<()> {
 #[tokio::test(flavor = "multi_thread", worker_threads = 4)]
 async fn test_apply_patch_freeform_tool() -> anyhow::Result<()> {
    use crate::suite::common::run_e2e_exec_test;
-    use codex_core::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR;
+    use core_test_support::non_sandbox_test;

-    if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
-        println!(
-            "Skipping test because it cannot execute when network is disabled in a Codex sandbox."
-        );
-        return Ok(());
-    }
+    non_sandbox_test!(result);

    let tmp_cwd = tempdir().expect("failed to create temp dir");
-    run_e2e_exec_test(
-        tmp_cwd.path(),
-        vec![
-            include_str!("../fixtures/sse_apply_patch_freeform_add.json").to_string(),
-            include_str!("../fixtures/sse_apply_patch_freeform_update.json").to_string(),
-            include_str!("../fixtures/sse_response_completed.json").to_string(),
-        ],
-    )
-    .await;
+    let freeform_add_patch = r#"*** Begin Patch
+*** Add File: app.py
+class BaseClass:
+  def method():
+    return False
+*** End Patch"#;
+    let freeform_update_patch = r#"*** Begin Patch
+*** Update File: app.py
+@@  def method():
+-    return False
+
+    return True
+*** End Patch"#;
+    let response_streams = vec![
+        sse(vec![
+            ev_apply_patch_custom_tool_call("request_0", freeform_add_patch),
+            ev_completed("request_0"),
+        ]),
+        sse(vec![
+            ev_apply_patch_custom_tool_call("request_1", freeform_update_patch),
+            ev_completed("request_1"),
+        ]),
+        sse(vec![ev_completed("request_2")]),
+    ];
+    run_e2e_exec_test(tmp_cwd.path(), response_streams).await;

    // Verify final file contents
    let final_path = tmp_cwd.path().join("app.py");
--- a/codex-rs/exec/tests/suite/common.rs
+++ b/codex-rs/exec/tests/suite/common.rs
@@ -4,7 +4,6 @@

 use anyhow::Context;
 use assert_cmd::prelude::*;
-use core_test_support::load_sse_fixture_with_id_from_str;
 use std::path::Path;
 use std::process::Command;
 use std::sync::atomic::AtomicUsize;
@@ -27,10 +26,7 @@ impl Respond for SeqResponder {
        match self.responses.get(call_num) {
            Some(body) => wiremock::ResponseTemplate::new(200)
                .insert_header("content-type", "text/event-stream")
-                .set_body_raw(
-                    load_sse_fixture_with_id_from_str(body, &format!("request_{call_num}")),
-                    "text/event-stream",
-                ),
+                .set_body_string(body.clone()),
            None => panic!("no response for {call_num}"),
        }
    }
--- a/codex-rs/exec/tests/suite/resume.rs
+++ b/codex-rs/exec/tests/suite/resume.rs
@@ -3,6 +3,7 @@ use anyhow::Context;
 use assert_cmd::prelude::*;
 use serde_json::Value;
 use std::process::Command;
+use std::string::ToString;
 use tempfile::TempDir;
 use uuid::Uuid;
 use walkdir::WalkDir;
@@ -45,7 +46,7 @@ fn find_session_file_containing_marker(
                && payload.get("type").and_then(|t| t.as_str()) == Some("message")
                && payload
                    .get("content")
-                    .map(|c| c.to_string())
+                    .map(ToString::to_string)
                    .unwrap_or_default()
                    .contains(marker)
            {
--- a/codex-rs/exec/tests/suite/sandbox.rs
+++ b/codex-rs/exec/tests/suite/sandbox.rs
@@ -4,27 +4,39 @@ use codex_core::spawn::StdioPolicy;
 use std::collections::HashMap;
 use std::future::Future;
 use std::io;
+use std::path::Path;
 use std::path::PathBuf;
 use std::process::ExitStatus;
+use tokio::fs::create_dir_all;
 use tokio::process::Child;

 #[cfg(target_os = "macos")]
 async fn spawn_command_under_sandbox(
    command: Vec<String>,
+    command_cwd: PathBuf,
    sandbox_policy: &SandboxPolicy,
-    cwd: PathBuf,
+    sandbox_cwd: &Path,
    stdio_policy: StdioPolicy,
    env: HashMap<String, String>,
 ) -> std::io::Result<Child> {
    use codex_core::seatbelt::spawn_command_under_seatbelt;
-    spawn_command_under_seatbelt(command, sandbox_policy, cwd, stdio_policy, env).await
+    spawn_command_under_seatbelt(
+        command,
+        command_cwd,
+        sandbox_policy,
+        sandbox_cwd,
+        stdio_policy,
+        env,
+    )
+    .await
 }

 #[cfg(target_os = "linux")]
 async fn spawn_command_under_sandbox(
    command: Vec<String>,
+    command_cwd: PathBuf,
    sandbox_policy: &SandboxPolicy,
-    cwd: PathBuf,
+    sandbox_cwd: &Path,
    stdio_policy: StdioPolicy,
    env: HashMap<String, String>,
 ) -> std::io::Result<Child> {
@@ -33,8 +45,9 @@ async fn spawn_command_under_sandbox(
    spawn_command_under_linux_sandbox(
        codex_linux_sandbox_exe,
        command,
+        command_cwd,
        sandbox_policy,
-        cwd,
+        sandbox_cwd,
        stdio_policy,
        env,
    )
@@ -74,14 +87,17 @@ if __name__ == '__main__':
    p.join()
 "#;

+    let command_cwd = std::env::current_dir().expect("should be able to get current dir");
+    let sandbox_cwd = command_cwd.clone();
    let mut child = spawn_command_under_sandbox(
        vec![
            "python3".to_string(),
            "-c".to_string(),
            python_code.to_string(),
        ],
+        command_cwd,
        &policy,
-        std::env::current_dir().expect("should be able to get current dir"),
+        sandbox_cwd.as_path(),
        StdioPolicy::Inherit,
        HashMap::new(),
    )
@@ -92,6 +108,88 @@ if __name__ == '__main__':
    assert!(status.success(), "python exited with {status:?}");
 }

+#[tokio::test]
+async fn sandbox_distinguishes_command_and_policy_cwds() {
+    let temp = tempfile::tempdir().expect("should be able to create temp dir");
+    let sandbox_root = temp.path().join("sandbox");
+    let command_root = temp.path().join("command");
+    create_dir_all(&sandbox_root).await.expect("mkdir");
+    create_dir_all(&command_root).await.expect("mkdir");
+    let canonical_sandbox_root = tokio::fs::canonicalize(&sandbox_root)
+        .await
+        .expect("canonicalize sandbox root");
+    let canonical_allowed_path = canonical_sandbox_root.join("allowed.txt");
+
+    let disallowed_path = command_root.join("forbidden.txt");
+
+    // Note writable_roots is empty: verify that `canonical_allowed_path` is
+    // writable only because it is under the sandbox policy cwd, not because it
+    // is under a writable root.
+    let policy = SandboxPolicy::WorkspaceWrite {
+        writable_roots: vec![],
+        network_access: false,
+        exclude_tmpdir_env_var: true,
+        exclude_slash_tmp: true,
+    };
+
+    // Attempt to write inside the command cwd, which is outside of the sandbox policy cwd.
+    let mut child = spawn_command_under_sandbox(
+        vec![
+            "bash".to_string(),
+            "-lc".to_string(),
+            "echo forbidden > forbidden.txt".to_string(),
+        ],
+        command_root.clone(),
+        &policy,
+        canonical_sandbox_root.as_path(),
+        StdioPolicy::Inherit,
+        HashMap::new(),
+    )
+    .await
+    .expect("should spawn command writing to forbidden path");
+
+    let status = child
+        .wait()
+        .await
+        .expect("should wait for forbidden command");
+    assert!(
+        !status.success(),
+        "sandbox unexpectedly allowed writing to command cwd: {status:?}"
+    );
+    let forbidden_exists = tokio::fs::try_exists(&disallowed_path)
+        .await
+        .expect("try_exists failed");
+    assert!(
+        !forbidden_exists,
+        "forbidden path should not have been created"
+    );
+
+    // Writing to the sandbox policy cwd after changing directories into it should succeed.
+    let mut child = spawn_command_under_sandbox(
+        vec![
+            "/usr/bin/touch".to_string(),
+            canonical_allowed_path.to_string_lossy().into_owned(),
+        ],
+        command_root,
+        &policy,
+        canonical_sandbox_root.as_path(),
+        StdioPolicy::Inherit,
+        HashMap::new(),
+    )
+    .await
+    .expect("should spawn command writing to sandbox root");
+
+    let status = child.wait().await.expect("should wait for allowed command");
+    assert!(
+        status.success(),
+        "sandbox blocked allowed write: {status:?}"
+    );
+    let allowed_exists = tokio::fs::try_exists(&canonical_allowed_path)
+        .await
+        .expect("try_exists allowed failed");
+    assert!(allowed_exists, "allowed path should exist");
+}
+
 fn unix_sock_body() {
    unsafe {
        let mut fds = [0i32; 2];
@@ -200,10 +298,13 @@ where
        cmds.push(test_selector.into());

        // Your existing launcher:
+        let command_cwd = std::env::current_dir().expect("should be able to get current dir");
+        let sandbox_cwd = command_cwd.clone();
        let mut child = spawn_command_under_sandbox(
            cmds,
+            command_cwd,
            policy,
-            std::env::current_dir().expect("should be able to get current dir"),
+            sandbox_cwd.as_path(),
            stdio_policy,
            HashMap::from([("IN_SANDBOX".into(), "1".into())]),
        )
--- a/codex-rs/execpolicy/Cargo.toml
+++ b/codex-rs/execpolicy/Cargo.toml
@@ -15,19 +15,19 @@ path = "src/lib.rs"
 workspace = true

 [dependencies]
-allocative = "0.3.3"
-anyhow = "1"
-clap = { version = "4", features = ["derive"] }
-derive_more = { version = "2", features = ["display"] }
-env_logger = "0.11.5"
-log = "0.4"
-multimap = "0.10.0"
-path-absolutize = "3.1.1"
-regex-lite = "0.1"
-serde = { version = "1", features = ["derive"] }
-serde_json = "1"
-serde_with = { version = "3", features = ["macros"] }
-starlark = "0.13.0"
+allocative = { workspace = true }
+anyhow = { workspace = true }
+clap = { workspace = true, features = ["derive"] }
+derive_more = { workspace = true, features = ["display"] }
+env_logger = { workspace = true }
+log = { workspace = true }
+multimap = { workspace = true }
+path-absolutize = { workspace = true }
+regex-lite = { workspace = true }
+serde = { workspace = true, features = ["derive"] }
+serde_json = { workspace = true }
+serde_with = { workspace = true, features = ["macros"] }
+starlark = { workspace = true }

 [dev-dependencies]
-tempfile = "3.13.0"
+tempfile = { workspace = true }
--- a/codex-rs/execpolicy/src/execv_checker.rs
+++ b/codex-rs/execpolicy/src/execv_checker.rs
@@ -1,3 +1,4 @@
+use std::borrow::Cow;
 use std::ffi::OsString;
 use std::path::Path;
 use std::path::PathBuf;
@@ -108,7 +109,7 @@ fn ensure_absolute_path(path: &str, cwd: &Option<OsString>) -> Result<PathBuf> {
        file.absolutize()
    };
    result
-        .map(|path| path.into_owned())
+        .map(Cow::into_owned)
        .map_err(|error| CannotCanonicalizePath {
            file: path.to_string(),
            error: error.kind(),
--- a/codex-rs/execpolicy/src/main.rs
+++ b/codex-rs/execpolicy/src/main.rs
@@ -10,6 +10,7 @@ use codex_execpolicy::get_default_policy;
 use serde::Deserialize;
 use serde::Serialize;
 use serde::de;
+use starlark::Error as StarlarkError;
 use std::path::PathBuf;
 use std::str::FromStr;

@@ -71,13 +72,13 @@ fn main() -> Result<()> {
        }
        None => get_default_policy(),
    };
-    let policy = policy.map_err(|err| err.into_anyhow())?;
+    let policy = policy.map_err(StarlarkError::into_anyhow)?;

    let exec = match args.command {
        Command::Check { command } => match command.split_first() {
            Some((first, rest)) => ExecArg {
                program: first.to_string(),
-                args: rest.iter().map(|s| s.to_string()).collect(),
+                args: rest.to_vec(),
            },
            None => {
                eprintln!("no command provided");
@@ -161,6 +162,6 @@ impl FromStr for ExecArg {
    type Err = anyhow::Error;

    fn from_str(s: &str) -> Result<Self, Self::Err> {
-        serde_json::from_str(s).map_err(|e| e.into())
+        serde_json::from_str(s).map_err(Into::into)
    }
 }
--- a/codex-rs/execpolicy/src/program.rs
+++ b/codex-rs/execpolicy/src/program.rs
@@ -169,7 +169,7 @@ impl ProgramSpec {
            let mut options = self
                .required_options
                .difference(&matched_opt_names)
-                .map(|s| s.to_string())
+                .map(String::from)
                .collect::<Vec<_>>();
            options.sort();
            return Err(Error::MissingRequiredOptions {
--- a/codex-rs/file-search/Cargo.toml
+++ b/codex-rs/file-search/Cargo.toml
@@ -12,10 +12,10 @@ name = "codex_file_search"
 path = "src/lib.rs"

 [dependencies]
-anyhow = "1"
-clap = { version = "4", features = ["derive"] }
-ignore = "0.4.23"
-nucleo-matcher = "0.3.1"
-serde = { version = "1", features = ["derive"] }
-serde_json = "1"
-tokio = { version = "1", features = ["full"] }
+anyhow = { workspace = true }
+clap = { workspace = true, features = ["derive"] }
+ignore = { workspace = true }
+nucleo-matcher = { workspace = true }
+serde = { workspace = true, features = ["derive"] }
+serde_json = { workspace = true }
+tokio = { workspace = true, features = ["full"] }
--- a/codex-rs/linux-sandbox/Cargo.toml
+++ b/codex-rs/linux-sandbox/Cargo.toml
@@ -15,15 +15,15 @@ path = "src/lib.rs"
 workspace = true

 [target.'cfg(target_os = "linux")'.dependencies]
-clap = { version = "4", features = ["derive"] }
-codex-core = { path = "../core" }
-landlock = "0.4.1"
-libc = "0.2.175"
-seccompiler = "0.5.0"
+clap = { workspace = true, features = ["derive"] }
+codex-core = { workspace = true }
+landlock = { workspace = true }
+libc = { workspace = true }
+seccompiler = { workspace = true }

 [target.'cfg(target_os = "linux")'.dev-dependencies]
-tempfile = "3"
-tokio = { version = "1", features = [
+tempfile = { workspace = true }
+tokio = { workspace = true, features = [
    "io-std",
    "macros",
    "process",
--- a/codex-rs/linux-sandbox/tests/suite/landlock.rs
+++ b/codex-rs/linux-sandbox/tests/suite/landlock.rs
@@ -35,9 +35,11 @@ fn create_env_from_core_vars() -> HashMap<String, String> {

 #[expect(clippy::print_stdout, clippy::expect_used, clippy::unwrap_used)]
 async fn run_cmd(cmd: &[&str], writable_roots: &[PathBuf], timeout_ms: u64) {
+    let cwd = std::env::current_dir().expect("cwd should exist");
+    let sandbox_cwd = cwd.clone();
    let params = ExecParams {
-        command: cmd.iter().map(|elm| elm.to_string()).collect(),
-        cwd: std::env::current_dir().expect("cwd should exist"),
+        command: cmd.iter().copied().map(str::to_owned).collect(),
+        cwd,
        timeout_ms: Some(timeout_ms),
        env: create_env_from_core_vars(),
        with_escalated_permissions: None,
@@ -59,6 +61,7 @@ async fn run_cmd(cmd: &[&str], writable_roots: &[PathBuf], timeout_ms: u64) {
        params,
        SandboxType::LinuxSeccomp,
        &sandbox_policy,
+        sandbox_cwd.as_path(),
        &codex_linux_sandbox_exe,
        None,
    )
@@ -133,8 +136,9 @@ async fn test_timeout() {
 #[expect(clippy::expect_used)]
 async fn assert_network_blocked(cmd: &[&str]) {
    let cwd = std::env::current_dir().expect("cwd should exist");
+    let sandbox_cwd = cwd.clone();
    let params = ExecParams {
-        command: cmd.iter().map(|s| s.to_string()).collect(),
+        command: cmd.iter().copied().map(str::to_owned).collect(),
        cwd,
        // Give the tool a generous 2-second timeout so even slow DNS timeouts
        // do not stall the suite.
@@ -151,6 +155,7 @@ async fn assert_network_blocked(cmd: &[&str]) {
        params,
        SandboxType::LinuxSeccomp,
        &sandbox_policy,
+        sandbox_cwd.as_path(),
        &codex_linux_sandbox_exe,
        None,
    )
--- a/codex-rs/login/Cargo.toml
+++ b/codex-rs/login/Cargo.toml
@@ -7,27 +7,28 @@ version = { workspace = true }
 workspace = true

 [dependencies]
-base64 = "0.22"
-chrono = { version = "0.4", features = ["serde"] }
-codex-core = { path = "../core" }
-codex-protocol = { path = "../protocol" }
-rand = "0.8"
-reqwest = { version = "0.12", features = ["json", "blocking"] }
-serde = { version = "1", features = ["derive"] }
-serde_json = "1"
-sha2 = "0.10"
-tempfile = "3"
-tiny_http = "0.12"
-tokio = { version = "1", features = [
+base64 = { workspace = true }
+chrono = { workspace = true, features = ["serde"] }
+codex-core = { workspace = true }
+codex-protocol = { workspace = true }
+rand = { workspace = true }
+reqwest = { workspace = true, features = ["json", "blocking"] }
+serde = { workspace = true, features = ["derive"] }
+serde_json = { workspace = true }
+sha2 = { workspace = true }
+tempfile = { workspace = true }
+tiny_http = { workspace = true }
+tokio = { workspace = true, features = [
    "io-std",
    "macros",
    "process",
    "rt-multi-thread",
    "signal",
 ] }
-url = "2"
-urlencoding = "2.1"
-webbrowser = "1.0"
+url = { workspace = true }
+urlencoding = { workspace = true }
+webbrowser = { workspace = true }

 [dev-dependencies]
-tempfile = "3"
+core_test_support = { workspace = true }
+tempfile = { workspace = true }
--- a/codex-rs/login/src/pkce.rs
+++ b/codex-rs/login/src/pkce.rs
@@ -11,7 +11,7 @@ pub struct PkceCodes {

 pub fn generate_pkce() -> PkceCodes {
    let mut bytes = [0u8; 64];
-    rand::thread_rng().fill_bytes(&mut bytes);
+    rand::rng().fill_bytes(&mut bytes);

    // Verifier: URL-safe base64 without padding (43..128 chars)
    let code_verifier = base64::engine::general_purpose::URL_SAFE_NO_PAD.encode(bytes);
--- a/codex-rs/login/src/server.rs
+++ b/codex-rs/login/src/server.rs
@@ -20,6 +20,7 @@ use codex_core::default_client::ORIGINATOR;
 use codex_core::token_data::TokenData;
 use codex_core::token_data::parse_id_token;
 use rand::RngCore;
+use serde_json::Value as JsonValue;
 use tiny_http::Header;
 use tiny_http::Request;
 use tiny_http::Response;
@@ -326,7 +327,7 @@ fn build_authorize_url(

 fn generate_state() -> String {
    let mut bytes = [0u8; 32];
-    rand::thread_rng().fill_bytes(&mut bytes);
+    rand::rng().fill_bytes(&mut bytes);
    base64::engine::general_purpose::URL_SAFE_NO_PAD.encode(bytes)
 }

@@ -496,11 +497,11 @@ fn compose_success_url(port: u16, issuer: &str, id_token: &str, access_token: &s
        .unwrap_or("");
    let completed_onboarding = token_claims
        .get("completed_platform_onboarding")
-        .and_then(|v| v.as_bool())
+        .and_then(JsonValue::as_bool)
        .unwrap_or(false);
    let is_org_owner = token_claims
        .get("is_org_owner")
-        .and_then(|v| v.as_bool())
+        .and_then(JsonValue::as_bool)
        .unwrap_or(false);
    let needs_setup = (!completed_onboarding) && is_org_owner;
    let plan_type = access_claims
--- a/codex-rs/login/tests/suite/login_server_e2e.rs
+++ b/codex-rs/login/tests/suite/login_server_e2e.rs
@@ -8,10 +8,10 @@ use std::time::Duration;
 use base64::Engine;
 use codex_login::ServerOptions;
 use codex_login::run_login_server;
+use core_test_support::non_sandbox_test;
 use tempfile::tempdir;

 // See spawn.rs for details
-pub const CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR: &str = "CODEX_SANDBOX_NETWORK_DISABLED";

 fn start_mock_issuer() -> (SocketAddr, thread::JoinHandle<()>) {
    // Bind to a random available port
@@ -77,12 +77,7 @@ fn start_mock_issuer() -> (SocketAddr, thread::JoinHandle<()>) {

 #[tokio::test]
 async fn end_to_end_login_flow_persists_auth_json() {
-    if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
-        println!(
-            "Skipping test because it cannot execute when network is disabled in a Codex sandbox."
-        );
-        return;
-    }
+    non_sandbox_test!();

    let (issuer_addr, issuer_handle) = start_mock_issuer();
    let issuer = format!("http://{}:{}", issuer_addr.ip(), issuer_addr.port());
@@ -152,12 +147,7 @@ async fn end_to_end_login_flow_persists_auth_json() {

 #[tokio::test]
 async fn creates_missing_codex_home_dir() {
-    if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
-        println!(
-            "Skipping test because it cannot execute when network is disabled in a Codex sandbox."
-        );
-        return;
-    }
+    non_sandbox_test!();

    let (issuer_addr, _issuer_handle) = start_mock_issuer();
    let issuer = format!("http://{}:{}", issuer_addr.ip(), issuer_addr.port());
@@ -196,12 +186,7 @@ async fn creates_missing_codex_home_dir() {

 #[tokio::test(flavor = "multi_thread", worker_threads = 4)]
 async fn cancels_previous_login_server_when_port_is_in_use() {
-    if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
-        println!(
-            "Skipping test because it cannot execute when network is disabled in a Codex sandbox."
-        );
-        return;
-    }
+    non_sandbox_test!();

    let (issuer_addr, _issuer_handle) = start_mock_issuer();
    let issuer = format!("http://{}:{}", issuer_addr.ip(), issuer_addr.port());
--- a/codex-rs/mcp-client/Cargo.toml
+++ b/codex-rs/mcp-client/Cargo.toml
@@ -7,13 +7,13 @@ edition = "2024"
 workspace = true

 [dependencies]
-anyhow = "1"
-mcp-types = { path = "../mcp-types" }
-serde = { version = "1", features = ["derive"] }
-serde_json = "1"
-tracing = { version = "0.1.41", features = ["log"] }
-tracing-subscriber = { version = "0.3", features = ["fmt", "env-filter"] }
-tokio = { version = "1", features = [
+anyhow = { workspace = true }
+mcp-types = { workspace = true }
+serde = { workspace = true, features = ["derive"] }
+serde_json = { workspace = true }
+tracing = { workspace = true, features = ["log"] }
+tracing-subscriber = { workspace = true, features = ["fmt", "env-filter"] }
+tokio = { workspace = true, features = [
    "io-util",
    "macros",
    "process",
--- a/codex-rs/mcp-server/Cargo.toml
+++ b/codex-rs/mcp-server/Cargo.toml
@@ -15,34 +15,35 @@ path = "src/lib.rs"
 workspace = true

 [dependencies]
-anyhow = "1"
-codex-arg0 = { path = "../arg0" }
-codex-common = { path = "../common", features = ["cli"] }
-codex-core = { path = "../core" }
-codex-login = { path = "../login" }
-codex-protocol = { path = "../protocol" }
-mcp-types = { path = "../mcp-types" }
-schemars = "0.8.22"
-serde = { version = "1", features = ["derive"] }
-serde_json = "1"
-shlex = "1.3.0"
-tokio = { version = "1", features = [
+anyhow = { workspace = true }
+codex-arg0 = { workspace = true }
+codex-common = { workspace = true, features = ["cli"] }
+codex-core = { workspace = true }
+codex-login = { workspace = true }
+codex-protocol = { workspace = true }
+mcp-types = { workspace = true }
+schemars = { workspace = true }
+serde = { workspace = true, features = ["derive"] }
+serde_json = { workspace = true }
+shlex = { workspace = true }
+tokio = { workspace = true, features = [
    "io-std",
    "macros",
    "process",
    "rt-multi-thread",
    "signal",
 ] }
-toml = "0.9"
-tracing = { version = "0.1.41", features = ["log"] }
-tracing-subscriber = { version = "0.3", features = ["env-filter", "fmt"] }
-uuid = { version = "1", features = ["serde", "v4"] }
+toml = { workspace = true }
+tracing = { workspace = true, features = ["log"] }
+tracing-subscriber = { workspace = true, features = ["env-filter", "fmt"] }
+uuid = { workspace = true, features = ["serde", "v7"] }

 [dev-dependencies]
-assert_cmd = "2"
-base64 = "0.22"
-mcp_test_support = { path = "tests/common" }
-os_info = "3.12.0"
-pretty_assertions = "1.4.1"
-tempfile = "3"
-wiremock = "0.6"
+assert_cmd = { workspace = true }
+base64 = { workspace = true }
+core_test_support = { workspace = true }
+mcp_test_support = { workspace = true }
+os_info = { workspace = true }
+pretty_assertions = { workspace = true }
+tempfile = { workspace = true }
+wiremock = { workspace = true }
--- a/codex-rs/mcp-server/src/codex_message_processor.rs
+++ b/codex-rs/mcp-server/src/codex_message_processor.rs
@@ -589,12 +589,14 @@ impl CodexMessageProcessor {
        let codex_linux_sandbox_exe = self.config.codex_linux_sandbox_exe.clone();
        let outgoing = self.outgoing.clone();
        let req_id = request_id;
+        let sandbox_cwd = self.config.cwd.clone();

        tokio::spawn(async move {
            match codex_core::exec::process_exec_tool_call(
                exec_params,
                sandbox_type,
                &effective_policy,
+                sandbox_cwd.as_path(),
                &codex_linux_sandbox_exe,
                None,
            )
@@ -814,7 +816,7 @@ impl CodexMessageProcessor {
            return;
        };

-        let required_suffix = format!("{}.jsonl", conversation_id.0);
+        let required_suffix = format!("{conversation_id}.jsonl");
        let Some(file_name) = canonical_rollout_path.file_name().map(OsStr::to_owned) else {
            let error = JSONRPCErrorError {
                code: INVALID_REQUEST_ERROR_CODE,
@@ -1414,13 +1416,13 @@ mod tests {
    #[test]
    fn extract_conversation_summary_prefers_plain_user_messages() {
        let conversation_id =
-            ConversationId(Uuid::parse_str("3f941c35-29b3-493b-b0a4-e25800d9aeb0").unwrap());
+            ConversationId::from_string("3f941c35-29b3-493b-b0a4-e25800d9aeb0").unwrap();
        let timestamp = Some("2025-09-05T16:53:11.850Z".to_string());
        let path = PathBuf::from("rollout.jsonl");

        let head = vec![
            json!({
-                "id": conversation_id.0,
+                "id": conversation_id.to_string(),
                "timestamp": timestamp,
                "cwd": "/",
                "originator": "codex",
--- a/codex-rs/mcp-server/src/codex_tool_runner.rs
+++ b/codex-rs/mcp-server/src/codex_tool_runner.rs
@@ -276,6 +276,7 @@ async fn run_codex_tool_session_inner(
                    | EventMsg::WebSearchEnd(_)
                    | EventMsg::GetHistoryEntryResponse(_)
                    | EventMsg::PlanUpdate(_)
+                    | EventMsg::CompactApprovalRequest(_)
                    | EventMsg::TurnAborted(_)
                    | EventMsg::ConversationPath(_)
                    | EventMsg::UserMessage(_)
--- a/codex-rs/mcp-server/src/exec_approval.rs
+++ b/codex-rs/mcp-server/src/exec_approval.rs
@@ -58,7 +58,7 @@ pub(crate) async fn handle_exec_approval_request(
    call_id: String,
 ) {
    let escaped_command =
-        shlex::try_join(command.iter().map(|s| s.as_str())).unwrap_or_else(|_| command.join(" "));
+        shlex::try_join(command.iter().map(String::as_str)).unwrap_or_else(|_| command.join(" "));
    let message = format!(
        "Allow Codex to run `{escaped_command}` in `{cwd}`?",
        cwd = cwd.to_string_lossy()
--- a/codex-rs/mcp-server/src/message_processor.rs
+++ b/codex-rs/mcp-server/src/message_processor.rs
@@ -36,7 +36,6 @@ use serde_json::json;
 use std::sync::Arc;
 use tokio::sync::Mutex;
 use tokio::task;
-use uuid::Uuid;

 pub(crate) struct MessageProcessor {
    codex_message_processor: CodexMessageProcessor,
@@ -484,8 +483,8 @@ impl MessageProcessor {
                return;
            }
        };
-        let conversation_id = match Uuid::parse_str(&conversation_id) {
-            Ok(id) => ConversationId::from(id),
+        let conversation_id = match ConversationId::from_string(&conversation_id) {
+            Ok(id) => id,
            Err(e) => {
                tracing::error!("Failed to parse conversation_id: {e}");
                let result = CallToolResult {
--- a/codex-rs/mcp-server/tests/common/Cargo.toml
+++ b/codex-rs/mcp-server/tests/common/Cargo.toml
@@ -7,20 +7,20 @@ version = { workspace = true }
 path = "lib.rs"

 [dependencies]
-anyhow = "1"
-assert_cmd = "2"
-codex-core = { path = "../../../core" }
-codex-mcp-server = { path = "../.." }
-codex-protocol = { path = "../../../protocol" }
-mcp-types = { path = "../../../mcp-types" }
-os_info = "3.12.0"
-pretty_assertions = "1.4.1"
-serde = { version = "1" }
-serde_json = "1"
-tokio = { version = "1", features = [
+anyhow = { workspace = true }
+assert_cmd = { workspace = true }
+codex-core = { workspace = true }
+codex-mcp-server = { workspace = true }
+codex-protocol = { workspace = true }
+mcp-types = { workspace = true }
+os_info = { workspace = true }
+pretty_assertions = { workspace = true }
+serde = { workspace = true }
+serde_json = { workspace = true }
+tokio = { workspace = true, features = [
    "io-std",
    "macros",
    "process",
    "rt-multi-thread",
 ] }
-wiremock = "0.6"
+wiremock = { workspace = true }
--- a/codex-rs/mcp-server/tests/suite/codex_tool.rs
+++ b/codex-rs/mcp-server/tests/suite/codex_tool.rs
@@ -24,6 +24,7 @@ use tempfile::TempDir;
 use tokio::time::timeout;
 use wiremock::MockServer;

+use core_test_support::non_sandbox_test;
 use mcp_test_support::McpProcess;
 use mcp_test_support::create_apply_patch_sse_response;
 use mcp_test_support::create_final_assistant_message_sse_response;
@@ -172,7 +173,7 @@ fn create_expected_elicitation_request(
 ) -> anyhow::Result<JSONRPCRequest> {
    let expected_message = format!(
        "Allow Codex to run `{}` in `{}`?",
-        shlex::try_join(command.iter().map(|s| s.as_ref()))?,
+        shlex::try_join(command.iter().map(std::convert::AsRef::as_ref))?,
        workdir.to_string_lossy()
    );
    Ok(JSONRPCRequest {
@@ -307,12 +308,7 @@ async fn patch_approval_triggers_elicitation() -> anyhow::Result<()> {

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn test_codex_tool_passes_base_instructions() {
-    if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
-        println!(
-            "Skipping test because it cannot execute when network is disabled in a Codex sandbox."
-        );
-        return;
-    }
+    non_sandbox_test!();

    // Apparently `#[tokio::test]` must return `()`, so we create a helper
    // function that returns `Result` so we can use `?` in favor of `unwrap`.
--- a/codex-rs/mcp-server/tests/suite/interrupt.rs
+++ b/codex-rs/mcp-server/tests/suite/interrupt.rs
@@ -4,7 +4,6 @@
 use std::path::Path;

 use codex_core::protocol::TurnAbortReason;
-use codex_core::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR;
 use codex_protocol::mcp_protocol::AddConversationListenerParams;
 use codex_protocol::mcp_protocol::InterruptConversationParams;
 use codex_protocol::mcp_protocol::InterruptConversationResponse;
@@ -12,6 +11,7 @@ use codex_protocol::mcp_protocol::NewConversationParams;
 use codex_protocol::mcp_protocol::NewConversationResponse;
 use codex_protocol::mcp_protocol::SendUserMessageParams;
 use codex_protocol::mcp_protocol::SendUserMessageResponse;
+use core_test_support::non_sandbox_test;
 use mcp_types::JSONRPCResponse;
 use mcp_types::RequestId;
 use tempfile::TempDir;
@@ -26,12 +26,7 @@ const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn test_shell_command_interruption() {
-    if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
-        println!(
-            "Skipping test because it cannot execute when network is disabled in a Codex sandbox."
-        );
-        return;
-    }
+    non_sandbox_test!();

    if let Err(err) = shell_command_interruption().await {
        panic!("failure: {err}");
--- a/codex-rs/mcp-server/tests/suite/list_resume.rs
+++ b/codex-rs/mcp-server/tests/suite/list_resume.rs
@@ -142,7 +142,7 @@ async fn test_list_and_resume_conversations() {
    } = to_response::<ResumeConversationResponse>(resume_resp)
        .expect("deserialize resumeConversation response");
    // conversation id should be a valid UUID
-    let _: uuid::Uuid = conversation_id.into();
+    assert!(!conversation_id.to_string().is_empty());
 }

 fn create_fake_rollout(codex_home: &Path, filename_ts: &str, meta_rfc3339: &str, preview: &str) {
--- a/codex-rs/mcp-server/tests/suite/send_message.rs
+++ b/codex-rs/mcp-server/tests/suite/send_message.rs
@@ -136,7 +136,7 @@ async fn test_send_message_session_not_found() {
        .expect("timeout")
        .expect("init");

-    let unknown = ConversationId(uuid::Uuid::new_v4());
+    let unknown = ConversationId::new();
    let req_id = mcp
        .send_send_user_message_request(SendUserMessageParams {
            conversation_id: unknown,
--- a/codex-rs/mcp-types/Cargo.toml
+++ b/codex-rs/mcp-types/Cargo.toml
@@ -7,6 +7,6 @@ version = { workspace = true }
 workspace = true

 [dependencies]
-serde = { version = "1", features = ["derive"] }
-serde_json = "1"
-ts-rs = { version = "11", features = ["serde-json-impl", "no-serde-warnings"] }
+serde = { workspace = true, features = ["derive"] }
+serde_json = { workspace = true }
+ts-rs = { workspace = true, features = ["serde-json-impl", "no-serde-warnings"] }
--- a/codex-rs/ollama/Cargo.toml
+++ b/codex-rs/ollama/Cargo.toml
@@ -11,20 +11,20 @@ path = "src/lib.rs"
 workspace = true

 [dependencies]
-async-stream = "0.3"
-bytes = "1.10.1"
-codex-core = { path = "../core" }
-futures = "0.3"
-reqwest = { version = "0.12", features = ["json", "stream"] }
-serde_json = "1"
-tokio = { version = "1", features = [
+async-stream = { workspace = true }
+bytes = { workspace = true }
+codex-core = { workspace = true }
+futures = { workspace = true }
+reqwest = { workspace = true, features = ["json", "stream"] }
+serde_json = { workspace = true }
+tokio = { workspace = true, features = [
    "io-std",
    "macros",
    "process",
    "rt-multi-thread",
    "signal",
 ] }
-tracing = { version = "0.1.41", features = ["log"] }
-wiremock = "0.6"
+tracing = { workspace = true, features = ["log"] }
+wiremock = { workspace = true }

 [dev-dependencies]
--- a/codex-rs/ollama/src/client.rs
+++ b/codex-rs/ollama/src/client.rs
@@ -117,7 +117,7 @@ impl OllamaClient {
            .map(|arr| {
                arr.iter()
                    .filter_map(|v| v.get("name").and_then(|n| n.as_str()))
-                    .map(|s| s.to_string())
+                    .map(str::to_string)
                    .collect::<Vec<_>>()
            })
            .unwrap_or_default();
--- a/codex-rs/ollama/src/parser.rs
+++ b/codex-rs/ollama/src/parser.rs
@@ -16,8 +16,8 @@ pub(crate) fn pull_events_from_value(value: &JsonValue) -> Vec<PullEvent> {
        .and_then(|d| d.as_str())
        .unwrap_or("")
        .to_string();
-    let total = value.get("total").and_then(|t| t.as_u64());
-    let completed = value.get("completed").and_then(|t| t.as_u64());
+    let total = value.get("total").and_then(JsonValue::as_u64);
+    let completed = value.get("completed").and_then(JsonValue::as_u64);
    if total.is_some() || completed.is_some() {
        events.push(PullEvent::ChunkProgress {
            digest,
--- a/codex-rs/protocol-ts/Cargo.toml
+++ b/codex-rs/protocol-ts/Cargo.toml
@@ -15,8 +15,8 @@ name = "codex-protocol-ts"
 path = "src/main.rs"

 [dependencies]
-anyhow = "1"
-mcp-types = { path = "../mcp-types" }
-codex-protocol = { path = "../protocol" }
-ts-rs = "11"
-clap = { version = "4", features = ["derive"] }
+anyhow = { workspace = true }
+mcp-types = { workspace = true }
+codex-protocol = { workspace = true }
+ts-rs = { workspace = true }
+clap = { workspace = true, features = ["derive"] }
--- a/codex-rs/protocol-ts/src/lib.rs
+++ b/codex-rs/protocol-ts/src/lib.rs
@@ -134,7 +134,7 @@ fn generate_index_ts(out_dir: &Path) -> Result<PathBuf> {
    }

    let mut content =
-        String::with_capacity(HEADER.len() + entries.iter().map(|s| s.len()).sum::<usize>());
+        String::with_capacity(HEADER.len() + entries.iter().map(String::len).sum::<usize>());
    content.push_str(HEADER);
    for line in &entries {
        content.push_str(line);
--- a/codex-rs/protocol/Cargo.toml
+++ b/codex-rs/protocol/Cargo.toml
@@ -11,24 +11,28 @@ path = "src/lib.rs"
 workspace = true

 [dependencies]
-base64 = "0.22.1"
-icu_decimal = "2.0.0"
-icu_locale_core = "2.0.0"
-mcp-types = { path = "../mcp-types" }
-mime_guess = "2.0.5"
-serde = { version = "1", features = ["derive"] }
-serde_json = "1"
-serde_with = { version = "3.14.0", features = ["macros", "base64"] }
-strum = "0.27.2"
-strum_macros = "0.27.2"
-sys-locale = "0.3.2"
-tracing = "0.1.41"
-ts-rs = { version = "11", features = ["uuid-impl", "serde-json-impl", "no-serde-warnings"] }
-uuid = { version = "1", features = ["serde", "v4"] }
+base64 = { workspace = true }
+icu_decimal = { workspace = true }
+icu_locale_core = { workspace = true }
+mcp-types = { workspace = true }
+mime_guess = { workspace = true }
+serde = { workspace = true, features = ["derive"] }
+serde_json = { workspace = true }
+serde_with = { workspace = true, features = ["macros", "base64"] }
+strum = { workspace = true }
+strum_macros = { workspace = true }
+sys-locale = { workspace = true }
+tracing = { workspace = true }
+ts-rs = { workspace = true, features = [
+    "uuid-impl",
+    "serde-json-impl",
+    "no-serde-warnings",
+] }
+uuid = { workspace = true, features = ["serde", "v7"] }

 [dev-dependencies]
-pretty_assertions = "1.4.1"
-tempfile = "3"
+pretty_assertions = { workspace = true }
+tempfile = { workspace = true }

 [package.metadata.cargo-shear]
 # Required because the not imported as strum_macros in non-nightly builds.
--- a/codex-rs/protocol/src/mcp_protocol.rs
+++ b/codex-rs/protocol/src/mcp_protocol.rs
@@ -19,13 +19,23 @@ use strum_macros::Display;
 use ts_rs::TS;
 use uuid::Uuid;

-#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, TS, Hash)]
+#[derive(Debug, Clone, Copy, PartialEq, Eq, TS, Hash)]
 #[ts(type = "string")]
-pub struct ConversationId(pub Uuid);
+pub struct ConversationId {
+    uuid: Uuid,
+}

 impl ConversationId {
    pub fn new() -> Self {
-        Self(Uuid::new_v4())
+        Self {
+            uuid: Uuid::now_v7(),
+        }
+    }
+
+    pub fn from_string(s: &str) -> Result<Self, uuid::Error> {
+        Ok(Self {
+            uuid: Uuid::parse_str(s)?,
+        })
    }
 }

@@ -37,19 +47,27 @@ impl Default for ConversationId {

 impl Display for ConversationId {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(f, "{}", self.0)
+        write!(f, "{}", self.uuid)
    }
 }

-impl From<Uuid> for ConversationId {
-    fn from(value: Uuid) -> Self {
-        Self(value)
+impl Serialize for ConversationId {
+    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+    where
+        S: serde::Serializer,
+    {
+        serializer.collect_str(&self.uuid)
    }
 }

-impl From<ConversationId> for Uuid {
-    fn from(value: ConversationId) -> Self {
-        value.0
+impl<'de> Deserialize<'de> for ConversationId {
+    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+    where
+        D: serde::Deserializer<'de>,
+    {
+        let value = String::deserialize(deserializer)?;
+        let uuid = Uuid::parse_str(&value).map_err(serde::de::Error::custom)?;
+        Ok(Self { uuid })
    }
 }

@@ -719,6 +737,27 @@ mod tests {
    #[test]
    fn test_conversation_id_default_is_not_zeroes() {
        let id = ConversationId::default();
-        assert_ne!(id.0, Uuid::nil());
+        assert_ne!(id.uuid, Uuid::nil());
+    }
+
+    #[test]
+    fn conversation_id_serializes_as_plain_string() {
+        let id = ConversationId::from_string("67e55044-10b1-426f-9247-bb680e5fe0c8").unwrap();
+
+        assert_eq!(
+            json!("67e55044-10b1-426f-9247-bb680e5fe0c8"),
+            serde_json::to_value(id).unwrap()
+        );
+    }
+
+    #[test]
+    fn conversation_id_deserializes_from_plain_string() {
+        let id: ConversationId =
+            serde_json::from_value(json!("67e55044-10b1-426f-9247-bb680e5fe0c8")).unwrap();
+
+        assert_eq!(
+            ConversationId::from_string("67e55044-10b1-426f-9247-bb680e5fe0c8").unwrap(),
+            id,
+        );
    }
 }
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Daniel Edrisian	6ed3c92ed1	Simplify	2025-09-22 19:34:06 -07:00
Daniel Edrisian	a2cfb125dc	Fix	2025-09-22 17:18:30 -07:00
Daniel Edrisian	002d877c02	Auto-compact when running out of context	2025-09-22 17:00:23 -07:00
dedrisian-oai	c415827ac2	Truncate potentially long user messages in compact message. (#4068 ) If a prior user message is massive, any future `/compact` task would fail because we're verbatim copying the user message into the new chat.	2025-09-22 23:12:26 +00:00
Jeremy Rose	4e0550b995	fix codex resume message at end of session (#3957 ) This was only being printed when running the codex-tui executable directly, not via the codex-cli wrapper.	2025-09-22 22:24:31 +00:00
Jeremy Rose	f54a49157b	Fix pager overlay clear between pages (#3952 ) should fix characters sometimes hanging around while scrolling the transcript.	2025-09-22 15:12:29 -07:00
Ahmed Ibrahim	dd56750612	Change headers and struct of rate limits (#4060 )	2025-09-22 21:06:20 +00:00
dedrisian-oai	8bc73a2bfd	Fix branch mode prompt for /review (#4061 ) Updates `/review` branch mode to review against a branch's upstream.	2025-09-22 12:34:08 -07:00
jif-oai	be366a31ab	chore: clippy on redundant closure (#4058 ) Add redundant closure clippy rules and let Codex fix it by minimising FQP	2025-09-22 19:30:16 +00:00
Ahmed Ibrahim	c75920a071	Change limits warning copy (#4059 )	2025-09-22 18:52:45 +00:00
dedrisian-oai	8daba53808	feat: Add view stack to BottomPane (#4026 ) Adds a "View Stack" to the bottom pane to allow for pushing/popping bottom panels. `esc` will go back instead of dismissing. Benefit: We retain the "selection state" of a parent panel (e.g. the review panel).	2025-09-22 11:29:39 -07:00
Ahmed Ibrahim	d2940bd4c3	Remove /limits after moving to /status (#4055 ) Moved to /status #4053	2025-09-22 18:23:05 +00:00
friel-openai	76a9b11678	Tui: fix backtracking (#4020 ) Backtracking multiple times could drop earlier turns. We now derive the active user-turn positions from the transcript on demand (keying off the latest session header) instead of caching state. This keeps the replayed context intact during repeated edits and adds a regression test.	2025-09-22 11:16:25 -07:00
Jeremy Rose	fa80bbb587	simplify StreamController (#3928 ) no intended functional change, just simplifying the code.	2025-09-22 11:14:04 -07:00
Ahmed Ibrahim	434eb4fd49	Add limits to /status (#4053 ) Add limits to status <img width="579" height="430" alt="image" src="https://github.com/user-attachments/assets/d3794d92-ffca-47be-8011-b4452223cc89" />	2025-09-22 18:13:34 +00:00
Jeremy Rose	19f46439ae	timeouts for mcp tool calls (#3959 ) defaults to 60sec, overridable with MCP_TOOL_TIMEOUT or on a per-server basis in the config.	2025-09-22 10:30:59 -07:00
jif-oai	e258ca61b4	chore: more clippy rules 2 (#4057 ) The only file to watch is the cargo.toml All the others come from just fix + a few manual small fix The set of rules have been taken from the list of clippy rules arbitrarily while trying to optimise the learning and style of the code while limiting the loss of productivity	2025-09-22 17:16:02 +00:00
jif-oai	e5fe50d3ce	chore: unify cargo versions (#4044 ) Unify cargo versions at root	2025-09-22 16:47:01 +00:00
pakrym-oai	14a115d488	Add non_sandbox_test helper (#3880 ) Makes tests shorter	2025-09-22 14:50:41 +00:00
dedrisian-oai	5996ee0e5f	feat: Add more /review options (#3961 ) Adds the following options: 1. Review current changes 2. Review a specific commit 3. Review against a base branch (PR style) 4. Custom instructions <img width="487" height="330" alt="Screenshot 2025-09-20 at 2 11 36 PM" src="https://github.com/user-attachments/assets/edb0aaa5-5747-47fa-881f-cc4c4f7fe8bc" /> --- \+ Adds the following UI helpers: 1. Makes list selection searchable 2. Adds navigation to the bottom pane, so you could add a stack of popups 3. Basic custom prompt view	2025-09-21 20:18:35 -07:00
Ahmed Ibrahim	a4ebd069e5	Tui: Rate limits (#3977 ) ### /limits: show rate limits graph <img width="442" height="287" alt="image" src="https://github.com/user-attachments/assets/3e29a241-a4b0-4df8-bf71-43dc4dd805ca" /> ### Warning on close to rate limits: <img width="507" height="96" alt="image" src="https://github.com/user-attachments/assets/732a958b-d240-4a89-8289-caa92de83537" /> Based on #3965	2025-09-21 10:20:49 -07:00
Ahmed Ibrahim	04504d8218	Forward Rate limits to the UI (#3965 ) We currently get information about rate limits in the response headers. We want to forward them to the clients to have better transparency. UI/UX plans have been discussed and this information is needed.	2025-09-20 21:26:16 -07:00
Jeremy Rose	42d335deb8	Cache keyboard enhancement detection before event streams (#3950 ) Hopefully fixes incorrectly showing ^J instead of Shift+Enter in the key hints occasionally.	2025-09-19 21:38:36 +00:00
Jeremy Rose	ad0c2b4db3	don't clear screen on startup (#3925 )	2025-09-19 14:22:58 -07:00
Jeremy Rose	ff389dc52f	fix alignment in slash command popup (#3937 )	2025-09-19 19:08:04 +00:00
pakrym-oai	9b18875a42	Use helpers instead of fixtures (#3888 ) Move to using test helper method everywhere.	2025-09-19 06:46:25 -07:00
pakrym-oai	881c7978f1	Move responses mocking helpers to a shared lib (#3878 ) These are generally useful	2025-09-18 17:53:14 -07:00
Ahmed Ibrahim	a7fda70053	Use a unified shell tell to not break cache (#3814 ) Currently, we change the tool description according to the sandbox policy and approval policy. This breaks the cache when the user hits `/approvals`. This PR does the following: - Always use the shell with escalation parameter: - removes `create_shell_tool_for_sandbox` and always uses unified tool via `create_shell_tool` - Reject the func call when the model uses escalation parameter when it cannot.	2025-09-19 00:08:28 +00:00
Michael Bolin	de64f5f007	fix: update try_parse_word_only_commands_sequence() to return commands in order (#3881 ) Incidentally, we had a test for this in `accepts_multiple_commands_with_allowed_operators()`, but it was verifying the bad behavior. Oops!	2025-09-18 16:07:38 -07:00
Michael Bolin	8595237505	fix: ensure cwd for conversation and sandbox are separate concerns (#3874 ) Previous to this PR, both of these functions take a single `cwd`: `71038381aa/codex-rs/core/src/seatbelt.rs (L19-L25)` `71038381aa/codex-rs/core/src/landlock.rs (L16-L23)` whereas `cwd` and `sandbox_cwd` should be set independently (fixed in this PR). Added `sandbox_distinguishes_command_and_policy_cwds()` to `codex-rs/exec/tests/suite/sandbox.rs` to verify this.	2025-09-18 14:37:06 -07:00
dedrisian-oai	62258df92f	feat: /review (#3774 ) Adds `/review` action in TUI <img width="637" height="370" alt="Screenshot 2025-09-17 at 12 41 19 AM" src="https://github.com/user-attachments/assets/b1979a6e-844a-4b97-ab20-107c185aec1d" />	2025-09-18 14:14:16 -07:00
Jeremy Rose	b34e906396	Reland "refactor transcript view to handle HistoryCells" (#3753 ) Reland of #3538	2025-09-18 20:55:53 +00:00
Jeremy Rose	71038381aa	fix error on missing notifications in [tui] (#3867 ) Fixes #3811.	2025-09-18 11:25:09 -07:00
jif-oai	277fc6254e	chore: use tokio mutex and async function to prevent blocking a worker (#3850 ) ### Why Use `tokio::sync::Mutex` `std::sync::Mutex` are not _async-aware_. As a result, they will block the entire thread instead of just yielding the task. Furthermore they can be poisoned which is not the case of `tokio` Mutex. This allows the Tokio runtime to continue running other tasks while waiting for the lock, preventing deadlocks and performance bottlenecks. In general, this is preferred in async environment	2025-09-18 18:21:52 +01:00
jif-oai	992b531180	fix: some nit Rust reference issues (#3849 ) Fix some small references issue. No behavioural change. Just making the code cleaner	2025-09-18 18:18:06 +01:00
Jeremy Rose	84a0ba9bf5	hint for codex resume on tui exit (#3757 ) <img width="931" height="438" alt="Screenshot 2025-09-16 at 4 25 19 PM" src="https://github.com/user-attachments/assets/ccfb8df1-feaf-45b4-8f7f-56100de916d5" />	2025-09-18 09:28:32 -07:00
jif-oai	4a5d6f7c71	Make ESC button work when auto-compaction (#3857 ) Only emit a task finished when the compaction comes from a `/compact`	2025-09-18 15:34:16 +00:00
jif-oai	1b3c8b8e94	Unify animations (#3729 ) Unify the animation in a single code and add the CTRL + . in the onboarding	2025-09-18 16:27:15 +01:00
pakrym-oai	d4aba772cb	Switch to uuid_v7 and tighten ConversationId usage (#3819 ) Make sure conversations have a timestamp.	2025-09-18 14:37:03 +00:00
jif-oai	4c97eeb32a	bug: Ignore tests for now (#3777 ) Ignore flaky / long tests for now	2025-09-18 10:43:45 +01:00
Thibault Sottiaux	c9505488a1	chore: update "Codex CLI harness, sandboxing, and approvals" section (#3822 )	2025-09-17 16:48:20 -07:00
Jeremy Rose	530382db05	Use agent reply text in turn notifications (#3756 ) Instead of "Agent turn complete", turn-complete notifications now include the first handful of chars from the agent's final message.	2025-09-17 11:23:46 -07:00
Abhishek Bhardwaj	208089e58e	AGENTS.md: Add instruction to install missing commands (#3807 ) This change instructs the model to install any missing command. Else tokens are wasted when it tries to run commands that aren't available multiple times before installing them.	2025-09-17 11:06:59 -07:00