compaction

Clarify external editor env var message (#10030 )
### Motivation - Improve UX by making it explicit that `VISUAL`/`EDITOR` must be set before launching Codex, not during a running session. ### Description - Update the external editor error text in `codex-rs/tui/src/app.rs` to: `"Cannot open external editor: set $VISUAL or $EDITOR before starting Codex."` and run `just fmt` to apply formatting. ### Testing - Ran `just fmt` successfully; attempted `cargo test -p codex-tui` but it failed due to network errors when fetching git dependencies (tests did not complete). ------ [Codex Task](https://chatgpt.com/codex/tasks/task_i_6972c2c984948329b1a37d5c5839aff3)
2026-02-03 15:33:41 +00:00 · 2026-01-27 14:02:38 -08:00 · 2026-01-27 13:29:55 -08:00 · 2026-01-27 13:22:54 -08:00 · 2026-01-27 21:14:08 +00:00 · 2026-01-27 20:09:05 +00:00
169 changed files with 5510 additions and 1842 deletions
--- a/.codespellrc
+++ b/.codespellrc
@@ -1,6 +1,6 @@
 [codespell]
 # Ref: https://github.com/codespell-project/codespell#using-a-config-file
-skip = .git*,vendor,*-lock.yaml,*.lock,.codespellrc,*test.ts,*.jsonl,frame*.txt
+skip = .git*,vendor,*-lock.yaml,*.lock,.codespellrc,*test.ts,*.jsonl,frame*.txt,*.snap,*.snap.new
 check-hidden = true
 ignore-regex = ^\s*"image/\S+": ".*|\b(afterAll)\b
 ignore-words-list = ratatui,ser,iTerm,iterm2,iterm
--- a/.github/workflows/rust-release.yml
+++ b/.github/workflows/rust-release.yml
@@ -252,6 +252,7 @@ jobs:
          # Path that contains the uncompressed binaries for the current
          # ${{ matrix.target }}
          dest="dist/${{ matrix.target }}"
+          repo_root=$PWD

          # We want to ship the raw Windows executables in the GitHub Release
          # in addition to the compressed archives. Keep the originals for
@@ -305,7 +306,7 @@ jobs:
                  cp "$setup_src" "$bundle_dir/codex-windows-sandbox-setup.exe"
                  # Use an absolute path so bundle zips land in the real dist
                  # dir even when 7z runs from a temp directory.
-                  (cd "$bundle_dir" && 7z a "$(pwd)/$dest/${base}.zip" .)
+                  (cd "$bundle_dir" && 7z a "$repo_root/$dest/${base}.zip" .)
                else
                  echo "warning: missing sandbox binaries; falling back to single-binary zip"
                  echo "warning: expected $runner_src and $setup_src"
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -11,6 +11,7 @@ In the codex-rs folder where the rust code lives:
 - Always collapse if statements per https://rust-lang.github.io/rust-clippy/master/index.html#collapsible_if
 - Always inline format! args when possible per https://rust-lang.github.io/rust-clippy/master/index.html#uninlined_format_args
 - Use method references over closures when possible per https://rust-lang.github.io/rust-clippy/master/index.html#redundant_closure_for_method_calls
+- When possible, make `match` statements exhaustive and avoid wildcard arms.
 - When writing tests, prefer comparing the equality of entire objects over fields one by one.
 - When making a change that adds or changes an API, ensure that the documentation in the `docs/` folder is up to date if applicable.
 - If you change `ConfigToml` or nested config types, run `just write-config-schema` to update `codex-rs/core/config.schema.json`.
--- a/PNPM.md
+++ b/PNPM.md
@@ -15,7 +15,7 @@ This project has been migrated from npm to pnpm to improve dependency management

 ```bash
 # Global installation of pnpm
-npm install -g pnpm@10.8.1
+npm install -g pnpm@10.28.2

 # Or with corepack (available with Node.js 22+)
 corepack enable
@@ -59,12 +59,12 @@ codex/

 ## CI/CD

-CI/CD workflows have been updated to use pnpm instead of npm. Make sure your CI environments use pnpm 10.8.1 or higher.
+CI/CD workflows have been updated to use pnpm instead of npm. Make sure your CI environments use pnpm 10.28.2 or higher.

 ## Known issues

 If you encounter issues with pnpm, try the following solutions:

 1. Remove the `node_modules` folder and `pnpm-lock.yaml` file, then run `pnpm install`
-2. Make sure you're using pnpm 10.8.1 or higher
+2. Make sure you're using pnpm 10.28.2 or higher
 3. Verify that Node.js 22 or higher is installed
--- a/codex-rs/Cargo.lock
+++ b/codex-rs/Cargo.lock
@@ -361,7 +361,7 @@ dependencies = [
 "objc2-foundation",
 "parking_lot",
 "percent-encoding",
- "windows-sys 0.60.2",
+ "windows-sys 0.52.0",
 "wl-clipboard-rs",
 "x11rb",
 ]
@@ -616,9 +616,9 @@ checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"

 [[package]]
 name = "axum"
-version = "0.8.4"
+version = "0.8.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "021e862c184ae977658b36c4500f7feac3221ca5da43e3f25bd04ab6c79a29b5"
+checksum = "8b52af3cb4058c895d37317bb27508dccc8e5f2d39454016b297bf4a400597b8"
 dependencies = [
 "axum-core",
 "bytes",
@@ -634,8 +634,7 @@ dependencies = [
 "mime",
 "percent-encoding",
 "pin-project-lite",
- "rustversion",
- "serde",
+ "serde_core",
 "serde_json",
 "serde_path_to_error",
 "sync_wrapper",
@@ -647,9 +646,9 @@ dependencies = [

 [[package]]
 name = "axum-core"
-version = "0.5.2"
+version = "0.5.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "68464cd0412f486726fb3373129ef5d2993f90c34bc2bc1c1e9943b2f4fc7ca6"
+checksum = "08c78f31d7b1291f7ee735c1c6780ccde7785daae9a9206026862dab7d8792d1"
 dependencies = [
 "bytes",
 "futures-core",
@@ -658,7 +657,6 @@ dependencies = [
 "http-body-util",
 "mime",
 "pin-project-lite",
- "rustversion",
 "sync_wrapper",
 "tower-layer",
 "tower-service",
@@ -1685,6 +1683,7 @@ dependencies = [
 "rama-http",
 "rama-http-backend",
 "rama-net",
+ "rama-socks5",
 "rama-tcp",
 "rama-tls-boring",
 "rama-unix",
@@ -2906,7 +2905,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "778e2ac28f6c47af28e4907f13ffd1e1ddbd400980a9abd7c8df189bf578a5ad"
 dependencies = [
 "libc",
- "windows-sys 0.60.2",
+ "windows-sys 0.52.0",
 ]

 [[package]]
@@ -3006,7 +3005,7 @@ checksum = "0ce92ff622d6dadf7349484f42c93271a0d49b7cc4d466a936405bacbe10aa78"
 dependencies = [
 "cfg-if",
 "rustix 1.0.8",
- "windows-sys 0.59.0",
+ "windows-sys 0.52.0",
 ]

 [[package]]
@@ -3311,7 +3310,7 @@ dependencies = [
 "libc",
 "log",
 "rustversion",
- "windows-link 0.2.0",
+ "windows-link 0.1.3",
 "windows-result 0.3.4",
 ]

@@ -3385,9 +3384,9 @@ checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280"

 [[package]]
 name = "globset"
-version = "0.4.16"
+version = "0.4.18"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "54a1028dfc5f5df5da8a56a73e6c153c9a9708ec57232470703592a3f18e49f5"
+checksum = "52dfc19153a48bde0cbd630453615c8151bce3a5adfac7a0aebfbf0a1e1f57e3"
 dependencies = [
 "aho-corasick",
 "bstr",
@@ -3716,7 +3715,7 @@ dependencies = [
 "libc",
 "percent-encoding",
 "pin-project-lite",
- "socket2 0.5.10",
+ "socket2 0.6.1",
 "system-configuration",
 "tokio",
 "tower-service",
@@ -4093,7 +4092,7 @@ checksum = "e04d7f318608d35d4b61ddd75cbdaee86b023ebe2bd5a66ee0915f0bf93095a9"
 dependencies = [
 "hermit-abi",
 "libc",
- "windows-sys 0.59.0",
+ "windows-sys 0.52.0",
 ]

 [[package]]
@@ -5632,7 +5631,7 @@ dependencies = [
 "quinn-udp",
 "rustc-hash",
 "rustls",
- "socket2 0.5.10",
+ "socket2 0.6.1",
 "thiserror 2.0.17",
 "tokio",
 "tracing",
@@ -5669,9 +5668,9 @@ dependencies = [
 "cfg_aliases 0.2.1",
 "libc",
 "once_cell",
- "socket2 0.5.10",
+ "socket2 0.6.1",
 "tracing",
- "windows-sys 0.60.2",
+ "windows-sys 0.52.0",
 ]

 [[package]]
@@ -5961,6 +5960,21 @@ dependencies = [
 "tokio",
 ]

+[[package]]
+name = "rama-socks5"
+version = "0.3.0-alpha.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5468b263516daaf258de32542c1974b7cbe962363ad913dcb669f5d46db0ef3e"
+dependencies = [
+ "byteorder",
+ "rama-core",
+ "rama-net",
+ "rama-tcp",
+ "rama-udp",
+ "rama-utils",
+ "tokio",
+]
+
 [[package]]
 name = "rama-tcp"
 version = "0.3.0-alpha.4"
@@ -5999,6 +6013,18 @@ dependencies = [
 "tokio",
 ]

+[[package]]
+name = "rama-udp"
+version = "0.3.0-alpha.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "36ed05e0ecac73e084e92a3a8b1fbf16fdae8958c506f0f0eada180a2d99eef4"
+dependencies = [
+ "rama-core",
+ "rama-net",
+ "tokio",
+ "tokio-util",
+]
+
 [[package]]
 name = "rama-unix"
 version = "0.3.0-alpha.4"
@@ -6366,7 +6392,7 @@ dependencies = [
 "errno",
 "libc",
 "linux-raw-sys 0.4.15",
- "windows-sys 0.59.0",
+ "windows-sys 0.52.0",
 ]

 [[package]]
@@ -6379,7 +6405,7 @@ dependencies = [
 "errno",
 "libc",
 "linux-raw-sys 0.9.4",
- "windows-sys 0.60.2",
+ "windows-sys 0.52.0",
 ]

 [[package]]
@@ -7466,7 +7492,7 @@ dependencies = [
 "getrandom 0.3.3",
 "once_cell",
 "rustix 1.0.8",
- "windows-sys 0.61.1",
+ "windows-sys 0.52.0",
 ]

 [[package]]
@@ -7797,12 +7823,10 @@ dependencies = [

 [[package]]
 name = "tokio-test"
-version = "0.4.4"
+version = "0.4.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2468baabc3311435b55dd935f702f42cd1b8abb7e754fb7dfb16bd36aa88f9f7"
+checksum = "3f6d24790a10a7af737693a3e8f1d03faef7e6ca0cc99aae5066f533766de545"
 dependencies = [
- "async-stream",
- "bytes",
 "futures-core",
 "tokio",
 "tokio-stream",
@@ -8001,9 +8025,9 @@ checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3"

 [[package]]
 name = "tracing"
-version = "0.1.43"
+version = "0.1.44"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2d15d90a0b5c19378952d479dc858407149d7bb45a14de0142f6c534b16fc647"
+checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100"
 dependencies = [
 "log",
 "pin-project-lite",
@@ -8036,9 +8060,9 @@ dependencies = [

 [[package]]
 name = "tracing-core"
-version = "0.1.35"
+version = "0.1.36"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7a04e24fab5c89c6a36eb8558c9656f30d81de51dfa4d3b45f26b21d61fa0a6c"
+checksum = "db97caf9d906fbde555dd62fa95ddba9eecfd14cb388e4f491a66d74cd5fb79a"
 dependencies = [
 "once_cell",
 "valuable",
@@ -8750,7 +8774,7 @@ version = "0.1.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb"
 dependencies = [
- "windows-sys 0.59.0",
+ "windows-sys 0.48.0",
 ]

 [[package]]
--- a/codex-rs/Cargo.toml
+++ b/codex-rs/Cargo.toml
@@ -216,7 +216,7 @@ tokio-tungstenite = { version = "0.28.0", features = ["proxy", "rustls-tls-nativ
 tokio-util = "0.7.18"
 toml = "0.9.5"
 toml_edit = "0.24.0"
-tracing = "0.1.43"
+tracing = "0.1.44"
 tracing-appender = "0.2.3"
 tracing-subscriber = "0.3.22"
 tracing-test = "0.2.5"
--- a/codex-rs/README.md
+++ b/codex-rs/README.md
@@ -15,8 +15,8 @@ You can also install via Homebrew (`brew install --cask codex`) or download a pl

 ## Documentation quickstart

- First run with Codex? Start with the [Getting Started guide](https://developers.openai.com/codex) (links to the walkthrough for prompts, keyboard shortcuts, and session management).
- Want deeper control? See [Configuration documentation](https://developers.openai.com/codex/config-advanced/).
+- First run with Codex? Start with [`docs/getting-started.md`](../docs/getting-started.md) (links to the walkthrough for prompts, keyboard shortcuts, and session management).
+- Want deeper control? See [`docs/config.md`](../docs/config.md) and [`docs/install.md`](../docs/install.md).

 ## What's new in the Rust CLI

@@ -24,13 +24,13 @@ The Rust implementation is now the maintained Codex CLI and serves as the defaul

 ### Config

-Codex supports a rich set of configuration options. Note that the Rust CLI uses `config.toml` instead of `config.json`. See [Configuration documentation](https://developers.openai.com/codex/config-advanced/) for details.
+Codex supports a rich set of configuration options. Note that the Rust CLI uses `config.toml` instead of `config.json`. See [`docs/config.md`](../docs/config.md) for details.

 ### Model Context Protocol Support

 #### MCP client

-Codex CLI functions as an MCP client that allows the Codex CLI and IDE extension to connect to MCP servers on startup. See the [configuration documentation](https://developers.openai.com/codex/config-advanced/) for details.
+Codex CLI functions as an MCP client that allows the Codex CLI and IDE extension to connect to MCP servers on startup. See the [`configuration documentation`](../docs/config.md#connecting-to-mcp-servers) for details.

 #### MCP server (experimental)

@@ -46,7 +46,7 @@ Use `codex mcp` to add/list/get/remove MCP server launchers defined in `config.t

 ### Notifications

-You can enable notifications by configuring a script that is run whenever the agent finishes a turn. The [notify documentation](https://developers.openai.com/codex/config-advanced/#notifications) includes a detailed example that explains how to get desktop notifications via [terminal-notifier](https://github.com/julienXX/terminal-notifier) on macOS. When Codex detects that it is running under WSL 2 inside Windows Terminal (`WT_SESSION` is set), the TUI automatically falls back to native Windows toast notifications so approval prompts and completed turns surface even though Windows Terminal does not implement OSC 9.
+You can enable notifications by configuring a script that is run whenever the agent finishes a turn. The [notify documentation](../docs/config.md#notify) includes a detailed example that explains how to get desktop notifications via [terminal-notifier](https://github.com/julienXX/terminal-notifier) on macOS. When Codex detects that it is running under WSL 2 inside Windows Terminal (`WT_SESSION` is set), the TUI automatically falls back to native Windows toast notifications so approval prompts and completed turns surface even though Windows Terminal does not implement OSC 9.

 ### `codex exec` to run Codex programmatically/non-interactively

--- a/codex-rs/app-server-protocol/src/protocol/common.rs
+++ b/codex-rs/app-server-protocol/src/protocol/common.rs
@@ -598,6 +598,7 @@ server_notification_definitions! {
    ReasoningSummaryTextDelta => "item/reasoning/summaryTextDelta" (v2::ReasoningSummaryTextDeltaNotification),
    ReasoningSummaryPartAdded => "item/reasoning/summaryPartAdded" (v2::ReasoningSummaryPartAddedNotification),
    ReasoningTextDelta => "item/reasoning/textDelta" (v2::ReasoningTextDeltaNotification),
+    ContextCompactionStarted => "thread/compaction/started" (v2::ContextCompactionStartedNotification),
    ContextCompacted => "thread/compacted" (v2::ContextCompactedNotification),
    DeprecationNotice => "deprecationNotice" (v2::DeprecationNoticeNotification),
    ConfigWarning => "configWarning" (v2::ConfigWarningNotification),
--- a/codex-rs/app-server-protocol/src/protocol/thread_history.rs
+++ b/codex-rs/app-server-protocol/src/protocol/thread_history.rs
@@ -56,6 +56,8 @@ impl ThreadHistoryBuilder {
                self.handle_agent_reasoning_raw_content(payload)
            }
            EventMsg::TokenCount(_) => {}
+            EventMsg::ContextCompactionStarted(_) => {}
+            EventMsg::ContextCompactionEnded(_) => {}
            EventMsg::EnteredReviewMode(_) => {}
            EventMsg::ExitedReviewMode(_) => {}
            EventMsg::ThreadRolledBack(payload) => self.handle_thread_rollback(payload),
--- a/codex-rs/app-server-protocol/src/protocol/v2.rs
+++ b/codex-rs/app-server-protocol/src/protocol/v2.rs
@@ -1969,6 +1969,9 @@ pub enum ThreadItem {
    #[serde(rename_all = "camelCase")]
    #[ts(rename_all = "camelCase")]
    ExitedReviewMode { id: String, review: String },
+    #[serde(rename_all = "camelCase")]
+    #[ts(rename_all = "camelCase")]
+    ContextCompaction { id: String },
 }

 impl From<CoreTurnItem> for ThreadItem {
@@ -1997,6 +2000,9 @@ impl From<CoreTurnItem> for ThreadItem {
                id: search.id,
                query: search.query,
            },
+            CoreTurnItem::ContextCompaction(compaction) => {
+                ThreadItem::ContextCompaction { id: compaction.id }
+            }
        }
    }
 }
@@ -2367,6 +2373,14 @@ pub struct ContextCompactedNotification {
    pub turn_id: String,
 }

+#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
+#[serde(rename_all = "camelCase")]
+#[ts(export_to = "v2/")]
+pub struct ContextCompactionStartedNotification {
+    pub thread_id: String,
+    pub turn_id: String,
+}
+
 #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
 #[serde(rename_all = "camelCase")]
 #[ts(export_to = "v2/")]
@@ -2617,10 +2631,12 @@ mod tests {
    use super::*;
    use codex_protocol::items::AgentMessageContent;
    use codex_protocol::items::AgentMessageItem;
+    use codex_protocol::items::ContextCompactionItem;
    use codex_protocol::items::ReasoningItem;
    use codex_protocol::items::TurnItem;
    use codex_protocol::items::UserMessageItem;
    use codex_protocol::items::WebSearchItem;
+    use codex_protocol::models::WebSearchAction;
    use codex_protocol::protocol::NetworkAccess as CoreNetworkAccess;
    use codex_protocol::user_input::UserInput as CoreUserInput;
    use pretty_assertions::assert_eq;
@@ -2728,6 +2744,9 @@ mod tests {
        let search_item = TurnItem::WebSearch(WebSearchItem {
            id: "search-1".to_string(),
            query: "docs".to_string(),
+            action: WebSearchAction::Search {
+                query: Some("docs".to_string()),
+            },
        });

        assert_eq!(
@@ -2737,6 +2756,17 @@ mod tests {
                query: "docs".to_string(),
            }
        );
+
+        let compaction_item = TurnItem::ContextCompaction(ContextCompactionItem {
+            id: "compact-1".to_string(),
+        });
+
+        assert_eq!(
+            ThreadItem::from(compaction_item),
+            ThreadItem::ContextCompaction {
+                id: "compact-1".to_string(),
+            }
+        );
    }

    #[test]
--- a/codex-rs/app-server/src/bespoke_event_handling.rs
+++ b/codex-rs/app-server/src/bespoke_event_handling.rs
@@ -24,6 +24,7 @@ use codex_app_server_protocol::CommandExecutionRequestApprovalParams;
 use codex_app_server_protocol::CommandExecutionRequestApprovalResponse;
 use codex_app_server_protocol::CommandExecutionStatus;
 use codex_app_server_protocol::ContextCompactedNotification;
+use codex_app_server_protocol::ContextCompactionStartedNotification;
 use codex_app_server_protocol::DeprecationNoticeNotification;
 use codex_app_server_protocol::DynamicToolCallParams;
 use codex_app_server_protocol::ErrorNotification;
@@ -601,7 +602,18 @@ pub(crate) async fn apply_bespoke_event_handling(
                .send_server_notification(ServerNotification::AgentMessageDelta(notification))
                .await;
        }
-        EventMsg::ContextCompacted(..) => {
+        EventMsg::ContextCompactionStarted(..) => {
+            let notification = ContextCompactionStartedNotification {
+                thread_id: conversation_id.to_string(),
+                turn_id: event_turn_id.clone(),
+            };
+            outgoing
+                .send_server_notification(ServerNotification::ContextCompactionStarted(
+                    notification,
+                ))
+                .await;
+        }
+        EventMsg::ContextCompactionEnded(..) => {
            let notification = ContextCompactedNotification {
                thread_id: conversation_id.to_string(),
                turn_id: event_turn_id.clone(),
--- a/codex-rs/app-server/src/codex_message_processor.rs
+++ b/codex-rs/app-server/src/codex_message_processor.rs
@@ -169,6 +169,7 @@ use codex_core::read_head_for_summary;
 use codex_core::read_session_meta_line;
 use codex_core::rollout_date_parts;
 use codex_core::sandboxing::SandboxPermissions;
+use codex_core::windows_sandbox::WindowsSandboxLevelExt;
 use codex_feedback::CodexFeedback;
 use codex_login::ServerOptions as LoginServerOptions;
 use codex_login::ShutdownHandle;
@@ -176,6 +177,7 @@ use codex_login::run_login_server;
 use codex_protocol::ThreadId;
 use codex_protocol::config_types::ForcedLoginMethod;
 use codex_protocol::config_types::Personality;
+use codex_protocol::config_types::WindowsSandboxLevel;
 use codex_protocol::dynamic_tools::DynamicToolSpec as CoreDynamicToolSpec;
 use codex_protocol::items::TurnItem;
 use codex_protocol::models::ResponseItem;
@@ -1259,12 +1261,14 @@ impl CodexMessageProcessor {
        let timeout_ms = params
            .timeout_ms
            .and_then(|timeout_ms| u64::try_from(timeout_ms).ok());
+        let windows_sandbox_level = WindowsSandboxLevel::from_config(&self.config);
        let exec_params = ExecParams {
            command: params.command,
            cwd,
            expiration: timeout_ms.into(),
            env,
            sandbox_permissions: SandboxPermissions::UseDefault,
+            windows_sandbox_level,
            justification: None,
            arg0: None,
        };
@@ -3887,6 +3891,7 @@ impl CodexMessageProcessor {
                    cwd: params.cwd,
                    approval_policy: params.approval_policy.map(AskForApproval::to_core),
                    sandbox_policy: params.sandbox_policy.map(|p| p.to_core()),
+                    windows_sandbox_level: None,
                    model: params.model,
                    effort: params.effort.map(Some),
                    summary: params.summary,
--- a/codex-rs/app-server/tests/suite/v2/thread_resume.rs
+++ b/codex-rs/app-server/tests/suite/v2/thread_resume.rs
@@ -2,7 +2,9 @@ use anyhow::Result;
 use app_test_support::McpProcess;
 use app_test_support::create_fake_rollout_with_text_elements;
 use app_test_support::create_mock_responses_server_repeating_assistant;
+use app_test_support::rollout_path;
 use app_test_support::to_response;
+use chrono::Utc;
 use codex_app_server_protocol::JSONRPCResponse;
 use codex_app_server_protocol::RequestId;
 use codex_app_server_protocol::SessionSource;
@@ -22,6 +24,8 @@ use codex_protocol::user_input::TextElement;
 use core_test_support::responses;
 use core_test_support::skip_if_no_network;
 use pretty_assertions::assert_eq;
+use std::fs::FileTimes;
+use std::path::Path;
 use std::path::PathBuf;
 use tempfile::TempDir;
 use tokio::time::timeout;
@@ -147,6 +151,116 @@ async fn thread_resume_returns_rollout_history() -> Result<()> {
    Ok(())
 }

+#[tokio::test]
+async fn thread_resume_without_overrides_does_not_change_updated_at_or_mtime() -> Result<()> {
+    let server = create_mock_responses_server_repeating_assistant("Done").await;
+    let codex_home = TempDir::new()?;
+    let rollout = setup_rollout_fixture(codex_home.path(), &server.uri())?;
+    let thread_id = rollout.conversation_id.clone();
+
+    let mut mcp = McpProcess::new(codex_home.path()).await?;
+    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;
+
+    let resume_id = mcp
+        .send_thread_resume_request(ThreadResumeParams {
+            thread_id: thread_id.clone(),
+            ..Default::default()
+        })
+        .await?;
+    let resume_resp: JSONRPCResponse = timeout(
+        DEFAULT_READ_TIMEOUT,
+        mcp.read_stream_until_response_message(RequestId::Integer(resume_id)),
+    )
+    .await??;
+    let ThreadResumeResponse { thread, .. } = to_response::<ThreadResumeResponse>(resume_resp)?;
+
+    assert_eq!(thread.updated_at, rollout.expected_updated_at);
+
+    let after_modified = std::fs::metadata(&rollout.rollout_file_path)?.modified()?;
+    assert_eq!(after_modified, rollout.before_modified);
+
+    let turn_id = mcp
+        .send_turn_start_request(TurnStartParams {
+            thread_id,
+            input: vec![UserInput::Text {
+                text: "Hello".to_string(),
+                text_elements: Vec::new(),
+            }],
+            ..Default::default()
+        })
+        .await?;
+    timeout(
+        DEFAULT_READ_TIMEOUT,
+        mcp.read_stream_until_response_message(RequestId::Integer(turn_id)),
+    )
+    .await??;
+    timeout(
+        DEFAULT_READ_TIMEOUT,
+        mcp.read_stream_until_notification_message("turn/completed"),
+    )
+    .await??;
+
+    let after_turn_modified = std::fs::metadata(&rollout.rollout_file_path)?.modified()?;
+    assert!(after_turn_modified > rollout.before_modified);
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn thread_resume_with_overrides_defers_updated_at_until_turn_start() -> Result<()> {
+    let server = create_mock_responses_server_repeating_assistant("Done").await;
+    let codex_home = TempDir::new()?;
+    let rollout = setup_rollout_fixture(codex_home.path(), &server.uri())?;
+
+    let mut mcp = McpProcess::new(codex_home.path()).await?;
+    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;
+
+    let resume_id = mcp
+        .send_thread_resume_request(ThreadResumeParams {
+            thread_id: rollout.conversation_id.clone(),
+            model: Some("mock-model".to_string()),
+            ..Default::default()
+        })
+        .await?;
+    let resume_resp: JSONRPCResponse = timeout(
+        DEFAULT_READ_TIMEOUT,
+        mcp.read_stream_until_response_message(RequestId::Integer(resume_id)),
+    )
+    .await??;
+    let ThreadResumeResponse { thread, .. } = to_response::<ThreadResumeResponse>(resume_resp)?;
+
+    assert_eq!(thread.updated_at, rollout.expected_updated_at);
+
+    let after_resume_modified = std::fs::metadata(&rollout.rollout_file_path)?.modified()?;
+    assert_eq!(after_resume_modified, rollout.before_modified);
+
+    let turn_id = mcp
+        .send_turn_start_request(TurnStartParams {
+            thread_id: rollout.conversation_id,
+            input: vec![UserInput::Text {
+                text: "Hello".to_string(),
+                text_elements: Vec::new(),
+            }],
+            ..Default::default()
+        })
+        .await?;
+    timeout(
+        DEFAULT_READ_TIMEOUT,
+        mcp.read_stream_until_response_message(RequestId::Integer(turn_id)),
+    )
+    .await??;
+    timeout(
+        DEFAULT_READ_TIMEOUT,
+        mcp.read_stream_until_notification_message("turn/completed"),
+    )
+    .await??;
+
+    let after_turn_modified = std::fs::metadata(&rollout.rollout_file_path)?.modified()?;
+    assert!(after_turn_modified > rollout.before_modified);
+
+    Ok(())
+}
+
 #[tokio::test]
 async fn thread_resume_prefers_path_over_thread_id() -> Result<()> {
    let server = create_mock_responses_server_repeating_assistant("Done").await;
@@ -364,3 +478,51 @@ stream_max_retries = 0
        ),
    )
 }
+
+fn set_rollout_mtime(path: &Path, updated_at_rfc3339: &str) -> Result<()> {
+    let parsed = chrono::DateTime::parse_from_rfc3339(updated_at_rfc3339)?.with_timezone(&Utc);
+    let times = FileTimes::new().set_modified(parsed.into());
+    std::fs::OpenOptions::new()
+        .append(true)
+        .open(path)?
+        .set_times(times)?;
+    Ok(())
+}
+
+struct RolloutFixture {
+    conversation_id: String,
+    rollout_file_path: PathBuf,
+    before_modified: std::time::SystemTime,
+    expected_updated_at: i64,
+}
+
+fn setup_rollout_fixture(codex_home: &Path, server_uri: &str) -> Result<RolloutFixture> {
+    create_config_toml(codex_home, server_uri)?;
+
+    let preview = "Saved user message";
+    let filename_ts = "2025-01-05T12-00-00";
+    let meta_rfc3339 = "2025-01-05T12:00:00Z";
+    let expected_updated_at_rfc3339 = "2025-01-07T00:00:00Z";
+    let conversation_id = create_fake_rollout_with_text_elements(
+        codex_home,
+        filename_ts,
+        meta_rfc3339,
+        preview,
+        Vec::new(),
+        Some("mock_provider"),
+        None,
+    )?;
+    let rollout_file_path = rollout_path(codex_home, filename_ts, &conversation_id);
+    set_rollout_mtime(rollout_file_path.as_path(), expected_updated_at_rfc3339)?;
+    let before_modified = std::fs::metadata(&rollout_file_path)?.modified()?;
+    let expected_updated_at = chrono::DateTime::parse_from_rfc3339(expected_updated_at_rfc3339)?
+        .with_timezone(&Utc)
+        .timestamp();
+
+    Ok(RolloutFixture {
+        conversation_id,
+        rollout_file_path,
+        before_modified,
+        expected_updated_at,
+    })
+}
--- a/codex-rs/backend-client/src/client.rs
+++ b/codex-rs/backend-client/src/client.rs
@@ -1,4 +1,5 @@
 use crate::types::CodeTaskDetailsResponse;
+use crate::types::ConfigFileResponse;
 use crate::types::CreditStatusDetails;
 use crate::types::PaginatedListTaskListItem;
 use crate::types::RateLimitStatusPayload;
@@ -244,6 +245,20 @@ impl Client {
        self.decode_json::<TurnAttemptsSiblingTurnsResponse>(&url, &ct, &body)
    }

+    /// Fetch the managed requirements file from codex-backend.
+    ///
+    /// `GET /api/codex/config/requirements` (Codex API style) or
+    /// `GET /wham/config/requirements` (ChatGPT backend-api style).
+    pub async fn get_config_requirements_file(&self) -> Result<ConfigFileResponse> {
+        let url = match self.path_style {
+            PathStyle::CodexApi => format!("{}/api/codex/config/requirements", self.base_url),
+            PathStyle::ChatGptApi => format!("{}/wham/config/requirements", self.base_url),
+        };
+        let req = self.http.get(&url).headers(self.headers());
+        let (body, ct) = self.exec_request(req, "GET", &url).await?;
+        self.decode_json::<ConfigFileResponse>(&url, &ct, &body)
+    }
+
    /// Create a new task (user turn) by POSTing to the appropriate backend path
    /// based on `path_style`. Returns the created task id.
    pub async fn create_task(&self, request_body: serde_json::Value) -> Result<String> {
--- a/codex-rs/backend-client/src/lib.rs
+++ b/codex-rs/backend-client/src/lib.rs
@@ -4,6 +4,7 @@ pub mod types;
 pub use client::Client;
 pub use types::CodeTaskDetailsResponse;
 pub use types::CodeTaskDetailsResponseExt;
+pub use types::ConfigFileResponse;
 pub use types::PaginatedListTaskListItem;
 pub use types::TaskListItem;
 pub use types::TurnAttemptsSiblingTurnsResponse;
--- a/codex-rs/backend-client/src/types.rs
+++ b/codex-rs/backend-client/src/types.rs
@@ -1,3 +1,4 @@
+pub use codex_backend_openapi_models::models::ConfigFileResponse;
 pub use codex_backend_openapi_models::models::CreditStatusDetails;
 pub use codex_backend_openapi_models::models::PaginatedListTaskListItem;
 pub use codex_backend_openapi_models::models::PlanType;
--- a/codex-rs/cli/src/main.rs
+++ b/codex-rs/cli/src/main.rs
@@ -147,7 +147,7 @@ struct ResumeCommand {
    session_id: Option<String>,

    /// Continue the most recent session without showing the picker.
-    #[arg(long = "last", default_value_t = false, conflicts_with = "session_id")]
+    #[arg(long = "last", default_value_t = false)]
    last: bool,

    /// Show all sessions (disables cwd filtering and shows CWD column).
@@ -932,6 +932,24 @@ mod tests {
        finalize_fork_interactive(interactive, root_overrides, session_id, last, all, fork_cli)
    }

+    #[test]
+    fn exec_resume_last_accepts_prompt_positional() {
+        let cli =
+            MultitoolCli::try_parse_from(["codex", "exec", "--json", "resume", "--last", "2+2"])
+                .expect("parse should succeed");
+
+        let Some(Subcommand::Exec(exec)) = cli.subcommand else {
+            panic!("expected exec subcommand");
+        };
+        let Some(codex_exec::Command::Resume(args)) = exec.command else {
+            panic!("expected exec resume");
+        };
+
+        assert!(args.last);
+        assert_eq!(args.session_id, None);
+        assert_eq!(args.prompt.as_deref(), Some("2+2"));
+    }
+
    fn app_server_from_args(args: &[&str]) -> AppServerCommand {
        let cli = MultitoolCli::try_parse_from(args).expect("parse");
        let Subcommand::AppServer(app_server) = cli.subcommand.expect("app-server present") else {
--- a/codex-rs/codex-api/src/sse/responses.rs
+++ b/codex-rs/codex-api/src/sse/responses.rs
@@ -291,7 +291,7 @@ pub fn process_responses_event(
                if let Ok(item) = serde_json::from_value::<ResponseItem>(item_val) {
                    return Ok(Some(ResponseEvent::OutputItemAdded(item)));
                }
-                debug!("failed to parse ResponseItem from output_item.done");
+                debug!("failed to parse ResponseItem from output_item.added");
            }
        }
        "response.reasoning_summary_part.added" => {
--- a/codex-rs/codex-backend-openapi-models/src/models/config_file_response.rs
+++ b/codex-rs/codex-backend-openapi-models/src/models/config_file_response.rs
@@ -0,0 +1,40 @@
+/*
+ * codex-backend
+ *
+ * codex-backend
+ *
+ * The version of the OpenAPI document: 0.0.1
+ *
+ * Generated by: https://openapi-generator.tech
+ */
+
+use serde::Deserialize;
+use serde::Serialize;
+
+#[derive(Clone, Default, Debug, PartialEq, Serialize, Deserialize)]
+pub struct ConfigFileResponse {
+    #[serde(rename = "contents", skip_serializing_if = "Option::is_none")]
+    pub contents: Option<String>,
+    #[serde(rename = "sha256", skip_serializing_if = "Option::is_none")]
+    pub sha256: Option<String>,
+    #[serde(rename = "updated_at", skip_serializing_if = "Option::is_none")]
+    pub updated_at: Option<String>,
+    #[serde(rename = "updated_by_user_id", skip_serializing_if = "Option::is_none")]
+    pub updated_by_user_id: Option<String>,
+}
+
+impl ConfigFileResponse {
+    pub fn new(
+        contents: Option<String>,
+        sha256: Option<String>,
+        updated_at: Option<String>,
+        updated_by_user_id: Option<String>,
+    ) -> ConfigFileResponse {
+        ConfigFileResponse {
+            contents,
+            sha256,
+            updated_at,
+            updated_by_user_id,
+        }
+    }
+}
--- a/codex-rs/codex-backend-openapi-models/src/models/mod.rs
+++ b/codex-rs/codex-backend-openapi-models/src/models/mod.rs
@@ -3,6 +3,10 @@
 // Currently export only the types referenced by the workspace
 // The process for this will change

+// Config
+pub mod config_file_response;
+pub use self::config_file_response::ConfigFileResponse;
+
 // Cloud Tasks
 pub mod code_task_details_response;
 pub use self::code_task_details_response::CodeTaskDetailsResponse;
--- a/codex-rs/config.md
+++ b/codex-rs/config.md
@@ -2,4 +2,5 @@

 This file has moved. Please see the latest configuration documentation here:

- Configuration documentation: https://developers.openai.com/codex/config-advanced/
+- Full config docs: [docs/config.md](../docs/config.md)
+- MCP servers section: [docs/config.md#connecting-to-mcp-servers](../docs/config.md#connecting-to-mcp-servers)
--- a/codex-rs/core/config.schema.json
+++ b/codex-rs/core/config.schema.json
@@ -1465,6 +1465,10 @@
      ],
      "description": "User-level skill config entries keyed by SKILL.md path."
    },
+    "suppress_unstable_features_warning": {
+      "description": "Suppress warnings about unstable (under development) features.",
+      "type": "boolean"
+    },
    "tool_output_token_limit": {
      "description": "Token budget applied when storing tool/function outputs in the context manager.",
      "format": "uint",
--- a/codex-rs/core/src/agent/role.rs
+++ b/codex-rs/core/src/agent/role.rs
@@ -44,6 +44,8 @@ pub struct AgentProfile {
    pub reasoning_effort: Option<ReasoningEffort>,
    /// Whether to force a read-only sandbox policy.
    pub read_only: bool,
+    /// Description to include in the tool specs.
+    pub description: &'static str,
 }

 impl AgentRole {
@@ -51,7 +53,19 @@ impl AgentRole {
    pub fn enum_values() -> Vec<String> {
        ALL_ROLES
            .iter()
-            .filter_map(|role| serde_json::to_string(role).ok())
+            .filter_map(|role| {
+                let description = role.profile().description;
+                serde_json::to_string(role)
+                    .map(|role| {
+                        let description = if !description.is_empty() {
+                            format!(r#", "description": {description}"#)
+                        } else {
+                            String::new()
+                        };
+                        format!(r#"{{ "name": {role}{description}}}"#)
+                    })
+                    .ok()
+            })
            .collect()
    }

@@ -66,11 +80,33 @@ impl AgentRole {
            AgentRole::Worker => AgentProfile {
                // base_instructions: Some(WORKER_PROMPT),
                // model: Some(WORKER_MODEL),
+                description: r#"Use for execution and production work.
+Typical tasks:
+- Implement part of a feature
+- Fix tests or bugs
+- Split large refactors into independent chunks
+Rules:
+- Explicitly assign **ownership** of the task (files / responsibility).
+- Always tell workers they are **not alone in the codebase**, and they should ignore edits made by others without touching them"#,
                ..Default::default()
            },
            AgentRole::Explorer => AgentProfile {
                model: Some(EXPLORER_MODEL),
                reasoning_effort: Some(ReasoningEffort::Low),
+                description: r#"Use for fast codebase understanding and information gathering.
+`explorer` are extremely fast agents so use them as much as you can to speed up the resolution of the global task.
+Typical tasks:
+- Locate usages of a symbol or concept
+- Understand how X is handled in Y
+- Review a section of code for issues
+- Assess impact of a potential change
+Rules:
+- Be explicit in what you are looking for. A good usage of `explorer` would mean that don't need to read the same code after the explorer send you the result.
+- **Always** prefer asking explorers rather than exploring the codebase yourself.
+- Spawn multiple explorers in parallel when useful and wait for all results.
+- You can ask the `explorer` to return file name, lines, entire code snippets, ...
+- Reuse the same explorer when it is relevant. If later in your process you have more questions on some code an explorer already covered, reuse this same explorer to be more efficient.
+                "#,
                ..Default::default()
            },
        }
--- a/codex-rs/core/src/apply_patch.rs
+++ b/codex-rs/core/src/apply_patch.rs
@@ -42,6 +42,7 @@ pub(crate) async fn apply_patch(
        turn_context.approval_policy,
        &turn_context.sandbox_policy,
        &turn_context.cwd,
+        turn_context.windows_sandbox_level,
    ) {
        SafetyCheck::AutoApprove {
            user_explicitly_approved,
--- a/codex-rs/core/src/client.rs
+++ b/codex-rs/core/src/client.rs
@@ -655,13 +655,11 @@ fn build_responses_headers(
    let mut headers = experimental_feature_headers(config);
    headers.insert(
        WEB_SEARCH_ELIGIBLE_HEADER,
-        HeaderValue::from_static(
-            if matches!(config.web_search_mode, Some(WebSearchMode::Disabled)) {
-                "false"
-            } else {
-                "true"
-            },
-        ),
+        HeaderValue::from_static(if config.web_search_mode == WebSearchMode::Disabled {
+            "false"
+        } else {
+            "true"
+        }),
    );
    if let Some(turn_state) = turn_state
        && let Some(state) = turn_state.get()
--- a/codex-rs/core/src/codex.rs
+++ b/codex-rs/core/src/codex.rs
@@ -22,6 +22,7 @@ use crate::connectors;
 use crate::exec_policy::ExecPolicyManager;
 use crate::features::Feature;
 use crate::features::Features;
+use crate::features::maybe_push_unstable_features_warning;
 use crate::models_manager::manager::ModelsManager;
 use crate::parse_command::parse_command;
 use crate::parse_turn_item;
@@ -173,11 +174,13 @@ use crate::turn_diff_tracker::TurnDiffTracker;
 use crate::unified_exec::UnifiedExecProcessManager;
 use crate::user_notification::UserNotification;
 use crate::util::backoff;
+use crate::windows_sandbox::WindowsSandboxLevelExt;
 use codex_async_utils::OrCancelExt;
 use codex_otel::OtelManager;
 use codex_protocol::config_types::CollaborationMode;
 use codex_protocol::config_types::Personality;
 use codex_protocol::config_types::ReasoningSummary as ReasoningSummaryConfig;
+use codex_protocol::config_types::WindowsSandboxLevel;
 use codex_protocol::models::ContentItem;
 use codex_protocol::models::DeveloperInstructions;
 use codex_protocol::models::ResponseInputItem;
@@ -324,6 +327,7 @@ impl Codex {
            compact_prompt: config.compact_prompt.clone(),
            approval_policy: config.approval_policy.clone(),
            sandbox_policy: config.sandbox_policy.clone(),
+            windows_sandbox_level: WindowsSandboxLevel::from_config(&config),
            cwd: config.cwd.clone(),
            original_config_do_not_use: Arc::clone(&config),
            session_source,
@@ -444,6 +448,7 @@ pub(crate) struct TurnContext {
    pub(crate) personality: Option<Personality>,
    pub(crate) approval_policy: AskForApproval,
    pub(crate) sandbox_policy: SandboxPolicy,
+    pub(crate) windows_sandbox_level: WindowsSandboxLevel,
    pub(crate) shell_environment_policy: ShellEnvironmentPolicy,
    pub(crate) tools_config: ToolsConfig,
    pub(crate) ghost_snapshot: GhostSnapshotConfig,
@@ -495,6 +500,7 @@ pub(crate) struct SessionConfiguration {
    approval_policy: Constrained<AskForApproval>,
    /// How to sandbox commands executed in the system
    sandbox_policy: Constrained<SandboxPolicy>,
+    windows_sandbox_level: WindowsSandboxLevel,

    /// Working directory that should be treated as the *root* of the
    /// session. All relative paths supplied by the model as well as the
@@ -543,6 +549,9 @@ impl SessionConfiguration {
        if let Some(sandbox_policy) = updates.sandbox_policy.clone() {
            next_configuration.sandbox_policy.set(sandbox_policy)?;
        }
+        if let Some(windows_sandbox_level) = updates.windows_sandbox_level {
+            next_configuration.windows_sandbox_level = windows_sandbox_level;
+        }
        if let Some(cwd) = updates.cwd.clone() {
            next_configuration.cwd = cwd;
        }
@@ -555,6 +564,7 @@ pub(crate) struct SessionSettingsUpdate {
    pub(crate) cwd: Option<PathBuf>,
    pub(crate) approval_policy: Option<AskForApproval>,
    pub(crate) sandbox_policy: Option<SandboxPolicy>,
+    pub(crate) windows_sandbox_level: Option<WindowsSandboxLevel>,
    pub(crate) collaboration_mode: Option<CollaborationMode>,
    pub(crate) reasoning_summary: Option<ReasoningSummaryConfig>,
    pub(crate) final_output_json_schema: Option<Option<Value>>,
@@ -619,6 +629,7 @@ impl Session {
            personality: session_configuration.personality,
            approval_policy: session_configuration.approval_policy.value(),
            sandbox_policy: session_configuration.sandbox_policy.get().clone(),
+            windows_sandbox_level: session_configuration.windows_sandbox_level,
            shell_environment_policy: per_turn_config.shell_environment_policy.clone(),
            tools_config,
            ghost_snapshot: per_turn_config.ghost_snapshot.clone(),
@@ -732,7 +743,7 @@ impl Session {
                None
            } else {
                Some(format!(
-                    "Enable it with `--enable {canonical}` or `[features].{canonical}` in config.toml. See https://developers.openai.com/codex/config-advanced/ for details."
+                    "Enable it with `--enable {canonical}` or `[features].{canonical}` in config.toml. See https://github.com/openai/codex/blob/main/docs/config.md#feature-flags for details."
                ))
            };
            post_session_configured_events.push(Event {
@@ -754,6 +765,7 @@ impl Session {
            });
        }
        maybe_push_chat_wire_api_deprecation(&config, &mut post_session_configured_events);
+        maybe_push_unstable_features_warning(&config, &mut post_session_configured_events);

        let auth = auth.as_ref();
        let otel_manager = OtelManager::new(
@@ -931,23 +943,28 @@ impl Session {
                // Build and record initial items (user instructions + environment context)
                let items = self.build_initial_context(&turn_context).await;
                self.record_conversation_items(&turn_context, &items).await;
+                {
+                    let mut state = self.state.lock().await;
+                    state.initial_context_seeded = true;
+                }
                // Ensure initial items are visible to immediate readers (e.g., tests, forks).
                self.flush_rollout().await;
            }
-            InitialHistory::Resumed(_) | InitialHistory::Forked(_) => {
-                let rollout_items = conversation_history.get_rollout_items();
-                let persist = matches!(conversation_history, InitialHistory::Forked(_));
+            InitialHistory::Resumed(resumed_history) => {
+                let rollout_items = resumed_history.history;
+                {
+                    let mut state = self.state.lock().await;
+                    state.initial_context_seeded = false;
+                }

                // If resuming, warn when the last recorded model differs from the current one.
-                if let InitialHistory::Resumed(_) = conversation_history
-                    && let Some(prev) = rollout_items.iter().rev().find_map(|it| {
-                        if let RolloutItem::TurnContext(ctx) = it {
-                            Some(ctx.model.as_str())
-                        } else {
-                            None
-                        }
-                    })
-                {
+                if let Some(prev) = rollout_items.iter().rev().find_map(|it| {
+                    if let RolloutItem::TurnContext(ctx) = it {
+                        Some(ctx.model.as_str())
+                    } else {
+                        None
+                    }
+                }) {
                    let curr = turn_context.client.get_model();
                    if prev != curr {
                        warn!(
@@ -982,8 +999,29 @@ impl Session {
                    state.set_token_info(Some(info));
                }

+                // Defer seeding the session's initial context until the first turn starts so
+                // turn/start overrides can be merged before we write to the rollout.
+                self.flush_rollout().await;
+            }
+            InitialHistory::Forked(rollout_items) => {
+                // Always add response items to conversation history
+                let reconstructed_history = self
+                    .reconstruct_history_from_rollout(&turn_context, &rollout_items)
+                    .await;
+                if !reconstructed_history.is_empty() {
+                    self.record_into_history(&reconstructed_history, &turn_context)
+                        .await;
+                }
+
+                // Seed usage info from the recorded rollout so UIs can show token counts
+                // immediately on resume/fork.
+                if let Some(info) = Self::last_token_info_from_rollout(&rollout_items) {
+                    let mut state = self.state.lock().await;
+                    state.set_token_info(Some(info));
+                }
+
                // If persisting, persist all rollout items as-is (recorder filters)
-                if persist && !rollout_items.is_empty() {
+                if !rollout_items.is_empty() {
                    self.persist_rollout_items(&rollout_items).await;
                }

@@ -991,6 +1029,10 @@ impl Session {
                let initial_context = self.build_initial_context(&turn_context).await;
                self.record_conversation_items(&turn_context, &initial_context)
                    .await;
+                {
+                    let mut state = self.state.lock().await;
+                    state.initial_context_seeded = true;
+                }
                // Flush after seeding history and any persisted rollout copy.
                self.flush_rollout().await;
            }
@@ -1641,6 +1683,21 @@ impl Session {
        state.replace_history(items);
    }

+    pub(crate) async fn seed_initial_context_if_needed(&self, turn_context: &TurnContext) {
+        {
+            let mut state = self.state.lock().await;
+            if state.initial_context_seeded {
+                return;
+            }
+            state.initial_context_seeded = true;
+        }
+
+        let initial_context = self.build_initial_context(turn_context).await;
+        self.record_conversation_items(turn_context, &initial_context)
+            .await;
+        self.flush_rollout().await;
+    }
+
    async fn persist_rollout_response_items(&self, items: &[ResponseItem]) {
        let rollout_items: Vec<RolloutItem> = items
            .iter()
@@ -2144,6 +2201,7 @@ async fn submission_loop(sess: Arc<Session>, config: Arc<Config>, rx_sub: Receiv
                cwd,
                approval_policy,
                sandbox_policy,
+                windows_sandbox_level,
                model,
                effort,
                summary,
@@ -2167,6 +2225,7 @@ async fn submission_loop(sess: Arc<Session>, config: Arc<Config>, rx_sub: Receiv
                        cwd,
                        approval_policy,
                        sandbox_policy,
+                        windows_sandbox_level,
                        collaboration_mode: Some(collaboration_mode),
                        reasoning_summary: summary,
                        personality,
@@ -2330,6 +2389,11 @@ mod handlers {
            return;
        }

+        let initial_context_seeded = sess.state.lock().await.initial_context_seeded;
+        if !initial_context_seeded {
+            return;
+        }
+
        let current_context = sess.new_default_turn_with_sub_id(sub_id).await;
        let update_items = sess.build_settings_update_items(
            Some(&previous_context),
@@ -2378,6 +2442,7 @@ mod handlers {
                        cwd: Some(cwd),
                        approval_policy: Some(approval_policy),
                        sandbox_policy: Some(sandbox_policy),
+                        windows_sandbox_level: None,
                        collaboration_mode,
                        reasoning_summary: Some(summary),
                        final_output_json_schema: Some(final_output_json_schema),
@@ -2417,6 +2482,7 @@ mod handlers {

        // Attempt to inject input into current task
        if let Err(items) = sess.inject_input(items).await {
+            sess.seed_initial_context_if_needed(&current_context).await;
            let update_items = sess.build_settings_update_items(
                previous_context.as_ref(),
                &current_context,
@@ -2822,7 +2888,7 @@ async fn spawn_review_thread(
    let tools_config = ToolsConfig::new(&ToolsConfigParams {
        model_info: &review_model_info,
        features: &review_features,
-        web_search_mode: Some(review_web_search_mode),
+        web_search_mode: review_web_search_mode,
    });

    let review_prompt = resolved.prompt.clone();
@@ -2834,7 +2900,7 @@ async fn spawn_review_thread(
    let mut per_turn_config = (*config).clone();
    per_turn_config.model = Some(model.clone());
    per_turn_config.features = review_features.clone();
-    per_turn_config.web_search_mode = Some(review_web_search_mode);
+    per_turn_config.web_search_mode = review_web_search_mode;

    let otel_manager = parent_turn_context
        .client
@@ -2865,6 +2931,7 @@ async fn spawn_review_thread(
        personality: parent_turn_context.personality,
        approval_policy: parent_turn_context.approval_policy,
        sandbox_policy: parent_turn_context.sandbox_policy.clone(),
+        windows_sandbox_level: parent_turn_context.windows_sandbox_level,
        shell_environment_policy: parent_turn_context.shell_environment_policy.clone(),
        cwd: parent_turn_context.cwd.clone(),
        final_output_json_schema: None,
@@ -3437,10 +3504,8 @@ async fn try_run_sampling_request(
            }
            ResponseEvent::OutputItemAdded(item) => {
                if let Some(turn_item) = handle_non_tool_response_item(&item).await {
-                    let tracked_item = turn_item.clone();
                    sess.emit_turn_item_started(&turn_context, &turn_item).await;
-
-                    active_item = Some(tracked_item);
+                    active_item = Some(turn_item);
                }
            }
            ResponseEvent::ServerReasoningIncluded(included) => {
@@ -3719,6 +3784,23 @@ mod tests {

    #[tokio::test]
    async fn record_initial_history_reconstructs_resumed_transcript() {
+        let (session, turn_context) = make_session_and_context().await;
+        let (rollout_items, expected) = sample_rollout(&session, &turn_context).await;
+
+        session
+            .record_initial_history(InitialHistory::Resumed(ResumedHistory {
+                conversation_id: ThreadId::default(),
+                history: rollout_items,
+                rollout_path: PathBuf::from("/tmp/resume.jsonl"),
+            }))
+            .await;
+
+        let history = session.state.lock().await.clone_history();
+        assert_eq!(expected, history.raw_items());
+    }
+
+    #[tokio::test]
+    async fn resumed_history_seeds_initial_context_on_first_turn_only() {
        let (session, turn_context) = make_session_and_context().await;
        let (rollout_items, mut expected) = sample_rollout(&session, &turn_context).await;

@@ -3730,9 +3812,17 @@ mod tests {
            }))
            .await;

+        let history_before_seed = session.state.lock().await.clone_history();
+        assert_eq!(expected, history_before_seed.raw_items());
+
+        session.seed_initial_context_if_needed(&turn_context).await;
        expected.extend(session.build_initial_context(&turn_context).await);
-        let history = session.state.lock().await.clone_history();
-        assert_eq!(expected, history.raw_items());
+        let history_after_seed = session.clone_history().await;
+        assert_eq!(expected, history_after_seed.raw_items());
+
+        session.seed_initial_context_if_needed(&turn_context).await;
+        let history_after_second_seed = session.clone_history().await;
+        assert_eq!(expected, history_after_second_seed.raw_items());
    }

    #[tokio::test]
@@ -3986,6 +4076,7 @@ mod tests {
            compact_prompt: config.compact_prompt.clone(),
            approval_policy: config.approval_policy.clone(),
            sandbox_policy: config.sandbox_policy.clone(),
+            windows_sandbox_level: WindowsSandboxLevel::from_config(&config),
            cwd: config.cwd.clone(),
            original_config_do_not_use: Arc::clone(&config),
            session_source: SessionSource::Exec,
@@ -4066,6 +4157,7 @@ mod tests {
            compact_prompt: config.compact_prompt.clone(),
            approval_policy: config.approval_policy.clone(),
            sandbox_policy: config.sandbox_policy.clone(),
+            windows_sandbox_level: WindowsSandboxLevel::from_config(&config),
            cwd: config.cwd.clone(),
            original_config_do_not_use: Arc::clone(&config),
            session_source: SessionSource::Exec,
@@ -4330,6 +4422,7 @@ mod tests {
            compact_prompt: config.compact_prompt.clone(),
            approval_policy: config.approval_policy.clone(),
            sandbox_policy: config.sandbox_policy.clone(),
+            windows_sandbox_level: WindowsSandboxLevel::from_config(&config),
            cwd: config.cwd.clone(),
            original_config_do_not_use: Arc::clone(&config),
            session_source: SessionSource::Exec,
@@ -4347,7 +4440,8 @@ mod tests {
            session_configuration.session_source.clone(),
        );

-        let state = SessionState::new(session_configuration.clone());
+        let mut state = SessionState::new(session_configuration.clone());
+        mark_state_initial_context_seeded(&mut state);
        let skills_manager = Arc::new(SkillsManager::new(config.codex_home.clone()));

        let services = SessionServices {
@@ -4439,6 +4533,7 @@ mod tests {
            compact_prompt: config.compact_prompt.clone(),
            approval_policy: config.approval_policy.clone(),
            sandbox_policy: config.sandbox_policy.clone(),
+            windows_sandbox_level: WindowsSandboxLevel::from_config(&config),
            cwd: config.cwd.clone(),
            original_config_do_not_use: Arc::clone(&config),
            session_source: SessionSource::Exec,
@@ -4456,7 +4551,8 @@ mod tests {
            session_configuration.session_source.clone(),
        );

-        let state = SessionState::new(session_configuration.clone());
+        let mut state = SessionState::new(session_configuration.clone());
+        mark_state_initial_context_seeded(&mut state);
        let skills_manager = Arc::new(SkillsManager::new(config.codex_home.clone()));

        let services = SessionServices {
@@ -4502,6 +4598,10 @@ mod tests {
        (session, turn_context, rx_event)
    }

+    fn mark_state_initial_context_seeded(state: &mut SessionState) {
+        state.initial_context_seeded = true;
+    }
+
    #[tokio::test]
    async fn refresh_mcp_servers_is_deferred_until_next_turn() {
        let (session, turn_context) = make_session_and_context().await;
@@ -4941,6 +5041,7 @@ mod tests {
            expiration: timeout_ms.into(),
            env: HashMap::new(),
            sandbox_permissions,
+            windows_sandbox_level: turn_context.windows_sandbox_level,
            justification: Some("test".to_string()),
            arg0: None,
        };
@@ -4951,6 +5052,7 @@ mod tests {
            cwd: params.cwd.clone(),
            expiration: timeout_ms.into(),
            env: HashMap::new(),
+            windows_sandbox_level: turn_context.windows_sandbox_level,
            justification: params.justification.clone(),
            arg0: None,
        };
--- a/codex-rs/core/src/compact.rs
+++ b/codex-rs/core/src/compact.rs
@@ -10,7 +10,8 @@ use crate::error::CodexErr;
 use crate::error::Result as CodexResult;
 use crate::features::Feature;
 use crate::protocol::CompactedItem;
-use crate::protocol::ContextCompactedEvent;
+use crate::protocol::ContextCompactionEndedEvent;
+use crate::protocol::ContextCompactionStartedEvent;
 use crate::protocol::EventMsg;
 use crate::protocol::TurnContextItem;
 use crate::protocol::TurnStartedEvent;
@@ -20,6 +21,7 @@ use crate::truncate::TruncationPolicy;
 use crate::truncate::approx_token_count;
 use crate::truncate::truncate_text;
 use crate::util::backoff;
+use codex_protocol::items::ContextCompactionItem;
 use codex_protocol::items::TurnItem;
 use codex_protocol::models::ContentItem;
 use codex_protocol::models::ResponseInputItem;
@@ -28,6 +30,7 @@ use codex_protocol::protocol::RolloutItem;
 use codex_protocol::user_input::UserInput;
 use futures::prelude::*;
 use tracing::error;
+use uuid::Uuid;

 pub const SUMMARIZATION_PROMPT: &str = include_str!("../templates/compact/prompt.md");
 pub const SUMMARY_PREFIX: &str = include_str!("../templates/compact/summary_prefix.md");
@@ -71,6 +74,9 @@ async fn run_compact_task_inner(
    turn_context: Arc<TurnContext>,
    input: Vec<UserInput>,
 ) {
+    let compaction_item = compaction_turn_item();
+    emit_compaction_started(&sess, &turn_context, &compaction_item).await;
+
    let initial_input_for_turn: ResponseInputItem = ResponseInputItem::from(input);

    let mut history = sess.clone_history().await;
@@ -131,6 +137,7 @@ async fn run_compact_task_inner(
                break;
            }
            Err(CodexErr::Interrupted) => {
+                emit_compaction_ended(&sess, &turn_context, compaction_item.clone()).await;
                return;
            }
            Err(e @ CodexErr::ContextWindowExceeded) => {
@@ -147,6 +154,7 @@ async fn run_compact_task_inner(
                sess.set_total_tokens_full(turn_context.as_ref()).await;
                let event = EventMsg::Error(e.to_error_event(None));
                sess.send_event(&turn_context, event).await;
+                emit_compaction_ended(&sess, &turn_context, compaction_item.clone()).await;
                return;
            }
            Err(e) => {
@@ -164,6 +172,7 @@ async fn run_compact_task_inner(
                } else {
                    let event = EventMsg::Error(e.to_error_event(None));
                    sess.send_event(&turn_context, event).await;
+                    emit_compaction_ended(&sess, &turn_context, compaction_item.clone()).await;
                    return;
                }
            }
@@ -193,8 +202,7 @@ async fn run_compact_task_inner(
    });
    sess.persist_rollout_items(&[rollout_item]).await;

-    let event = EventMsg::ContextCompacted(ContextCompactedEvent {});
-    sess.send_event(&turn_context, event).await;
+    emit_compaction_ended(&sess, &turn_context, compaction_item).await;

    let warning = EventMsg::Warning(WarningEvent {
        message: "Heads up: Long threads and multiple compactions can cause the model to be less accurate. Start a new thread when possible to keep threads small and targeted.".to_string(),
@@ -202,6 +210,38 @@ async fn run_compact_task_inner(
    sess.send_event(&turn_context, warning).await;
 }

+fn compaction_turn_item() -> TurnItem {
+    TurnItem::ContextCompaction(ContextCompactionItem {
+        id: Uuid::new_v4().to_string(),
+    })
+}
+
+pub(crate) async fn emit_compaction_started(
+    sess: &Session,
+    turn_context: &TurnContext,
+    item: &TurnItem,
+) {
+    sess.send_event(
+        turn_context,
+        EventMsg::ContextCompactionStarted(ContextCompactionStartedEvent {}),
+    )
+    .await;
+    sess.emit_turn_item_started(turn_context, item).await;
+}
+
+pub(crate) async fn emit_compaction_ended(
+    sess: &Session,
+    turn_context: &TurnContext,
+    item: TurnItem,
+) {
+    sess.emit_turn_item_completed(turn_context, item).await;
+    sess.send_event(
+        turn_context,
+        EventMsg::ContextCompactionEnded(ContextCompactionEndedEvent {}),
+    )
+    .await;
+}
+
 pub fn content_items_to_text(content: &[ContentItem]) -> Option<String> {
    let mut pieces = Vec::new();
    for item in content {
--- a/codex-rs/core/src/compact_remote.rs
+++ b/codex-rs/core/src/compact_remote.rs
@@ -3,13 +3,17 @@ use std::sync::Arc;
 use crate::Prompt;
 use crate::codex::Session;
 use crate::codex::TurnContext;
+use crate::compact::emit_compaction_ended;
+use crate::compact::emit_compaction_started;
 use crate::error::Result as CodexResult;
 use crate::protocol::CompactedItem;
-use crate::protocol::ContextCompactedEvent;
 use crate::protocol::EventMsg;
 use crate::protocol::RolloutItem;
 use crate::protocol::TurnStartedEvent;
+use codex_protocol::items::ContextCompactionItem;
+use codex_protocol::items::TurnItem;
 use codex_protocol::models::ResponseItem;
+use uuid::Uuid;

 pub(crate) async fn run_inline_remote_auto_compact_task(
    sess: Arc<Session>,
@@ -28,12 +32,19 @@ pub(crate) async fn run_remote_compact_task(sess: Arc<Session>, turn_context: Ar
 }

 async fn run_remote_compact_task_inner(sess: &Arc<Session>, turn_context: &Arc<TurnContext>) {
+    let compaction_item = TurnItem::ContextCompaction(ContextCompactionItem {
+        id: Uuid::new_v4().to_string(),
+    });
+    emit_compaction_started(sess, turn_context, &compaction_item).await;
+
    if let Err(err) = run_remote_compact_task_inner_impl(sess, turn_context).await {
        let event = EventMsg::Error(
            err.to_error_event(Some("Error running remote compact task".to_string())),
        );
        sess.send_event(turn_context, event).await;
    }
+
+    emit_compaction_ended(sess, turn_context, compaction_item).await;
 }

 async fn run_remote_compact_task_inner_impl(
@@ -77,8 +88,5 @@ async fn run_remote_compact_task_inner_impl(
    sess.persist_rollout_items(&[RolloutItem::Compacted(compacted_item)])
        .await;

-    let event = EventMsg::ContextCompacted(ContextCompactedEvent {});
-    sess.send_event(turn_context, event).await;
-
    Ok(())
 }
--- a/codex-rs/core/src/config/mod.rs
+++ b/codex-rs/core/src/config/mod.rs
@@ -38,6 +38,7 @@ use crate::project_doc::DEFAULT_PROJECT_DOC_FILENAME;
 use crate::project_doc::LOCAL_PROJECT_DOC_FILENAME;
 use crate::protocol::AskForApproval;
 use crate::protocol::SandboxPolicy;
+use crate::windows_sandbox::WindowsSandboxLevelExt;
 use codex_app_server_protocol::Tools;
 use codex_app_server_protocol::UserSavedConfig;
 use codex_protocol::config_types::AltScreenMode;
@@ -49,6 +50,7 @@ use codex_protocol::config_types::SandboxMode;
 use codex_protocol::config_types::TrustLevel;
 use codex_protocol::config_types::Verbosity;
 use codex_protocol::config_types::WebSearchMode;
+use codex_protocol::config_types::WindowsSandboxLevel;
 use codex_protocol::openai_models::ReasoningEffort;
 use codex_rmcp_client::OAuthCredentialsStoreMode;
 use codex_utils_absolute_path::AbsolutePathBuf;
@@ -304,8 +306,8 @@ pub struct Config {
    /// model info's default preference.
    pub include_apply_patch_tool: bool,

-    /// Explicit or feature-derived web search mode.
-    pub web_search_mode: Option<WebSearchMode>,
+    /// Explicit or feature-derived web search mode. Defaults to cached.
+    pub web_search_mode: WebSearchMode,

    /// If set to `true`, used only the experimental unified exec tool.
    pub use_experimental_unified_exec_tool: bool,
@@ -316,6 +318,9 @@ pub struct Config {
    /// Centralized feature flags; source of truth for feature gating.
    pub features: Features,

+    /// When `true`, suppress warnings about unstable (under development) features.
+    pub suppress_unstable_features_warning: bool,
+
    /// The active profile name used to derive this `Config` (if any).
    pub active_profile: Option<String>,

@@ -906,6 +911,9 @@ pub struct ConfigToml {
    #[schemars(schema_with = "crate::config::schema::features_schema")]
    pub features: Option<FeaturesToml>,

+    /// Suppress warnings about unstable (under development) features.
+    pub suppress_unstable_features_warning: Option<bool>,
+
    /// Settings for ghost snapshots (used for undo).
    #[serde(default)]
    pub ghost_snapshot: Option<GhostSnapshotToml>,
@@ -1050,6 +1058,7 @@ impl ConfigToml {
        &self,
        sandbox_mode_override: Option<SandboxMode>,
        profile_sandbox_mode: Option<SandboxMode>,
+        windows_sandbox_level: WindowsSandboxLevel,
        resolved_cwd: &Path,
    ) -> SandboxPolicyResolution {
        let resolved_sandbox_mode = sandbox_mode_override
@@ -1088,7 +1097,7 @@ impl ConfigToml {
        if cfg!(target_os = "windows")
            && matches!(resolved_sandbox_mode, SandboxMode::WorkspaceWrite)
            // If the experimental Windows sandbox is enabled, do not force a downgrade.
-            && crate::safety::get_platform_sandbox().is_none()
+            && windows_sandbox_level == codex_protocol::config_types::WindowsSandboxLevel::Disabled
        {
            sandbox_policy = SandboxPolicy::new_read_only_policy();
            forced_auto_mode_downgraded_on_windows = true;
@@ -1194,22 +1203,27 @@ pub fn resolve_oss_provider(
    }
 }

-/// Resolve the web search mode from explicit config and feature flags.
+/// Resolve the web search mode from explicit config, feature flags, and sandbox policy.
+/// Live search is auto-enabled when sandbox policy is `DangerFullAccess`
 fn resolve_web_search_mode(
    config_toml: &ConfigToml,
    config_profile: &ConfigProfile,
    features: &Features,
-) -> Option<WebSearchMode> {
+    sandbox_policy: &SandboxPolicy,
+) -> WebSearchMode {
    if let Some(mode) = config_profile.web_search.or(config_toml.web_search) {
-        return Some(mode);
+        return mode;
    }
    if features.enabled(Feature::WebSearchCached) {
-        return Some(WebSearchMode::Cached);
+        return WebSearchMode::Cached;
    }
    if features.enabled(Feature::WebSearchRequest) {
-        return Some(WebSearchMode::Live);
+        return WebSearchMode::Live;
    }
-    None
+    if matches!(sandbox_policy, SandboxPolicy::DangerFullAccess) {
+        return WebSearchMode::Live;
+    }
+    WebSearchMode::Cached
 }

 impl Config {
@@ -1278,17 +1292,6 @@ impl Config {
        };

        let features = Features::from_config(&cfg, &config_profile, feature_overrides);
-        let web_search_mode = resolve_web_search_mode(&cfg, &config_profile, &features);
-        #[cfg(target_os = "windows")]
-        {
-            // Base flag controls sandbox on/off; elevated only applies when base is enabled.
-            let sandbox_enabled = features.enabled(Feature::WindowsSandbox);
-            crate::safety::set_windows_sandbox_enabled(sandbox_enabled);
-            let elevated_enabled =
-                sandbox_enabled && features.enabled(Feature::WindowsSandboxElevated);
-            crate::safety::set_windows_elevated_sandbox_enabled(elevated_enabled);
-        }
-
        let resolved_cwd = {
            use std::env;

@@ -1315,10 +1318,16 @@ impl Config {
            .get_active_project(&resolved_cwd)
            .unwrap_or(ProjectConfig { trust_level: None });

+        let windows_sandbox_level = WindowsSandboxLevel::from_features(&features);
        let SandboxPolicyResolution {
            policy: mut sandbox_policy,
            forced_auto_mode_downgraded_on_windows,
-        } = cfg.derive_sandbox_policy(sandbox_mode, config_profile.sandbox_mode, &resolved_cwd);
+        } = cfg.derive_sandbox_policy(
+            sandbox_mode,
+            config_profile.sandbox_mode,
+            windows_sandbox_level,
+            &resolved_cwd,
+        );
        if let SandboxPolicy::WorkspaceWrite { writable_roots, .. } = &mut sandbox_policy {
            for path in additional_writable_roots {
                if !writable_roots.iter().any(|existing| existing == &path) {
@@ -1338,6 +1347,8 @@ impl Config {
                    AskForApproval::default()
                }
            });
+        let web_search_mode =
+            resolve_web_search_mode(&cfg, &config_profile, &features, &sandbox_policy);
        // TODO(dylan): We should be able to leverage ConfigLayerStack so that
        // we can reliably check this at every config level.
        let did_user_set_custom_approval_policy_or_sandbox_mode = approval_policy_override
@@ -1564,6 +1575,9 @@ impl Config {
            use_experimental_unified_exec_tool,
            ghost_snapshot,
            features,
+            suppress_unstable_features_warning: cfg
+                .suppress_unstable_features_warning
+                .unwrap_or(false),
            active_profile: active_profile_name,
            active_project,
            windows_wsl_setup_acknowledged: cfg.windows_wsl_setup_acknowledged.unwrap_or(false),
@@ -1658,7 +1672,6 @@ impl Config {
    }

    pub fn set_windows_sandbox_globally(&mut self, value: bool) {
-        crate::safety::set_windows_sandbox_enabled(value);
        if value {
            self.features.enable(Feature::WindowsSandbox);
        } else {
@@ -1668,7 +1681,6 @@ impl Config {
    }

    pub fn set_windows_elevated_sandbox_globally(&mut self, value: bool) {
-        crate::safety::set_windows_elevated_sandbox_enabled(value);
        if value {
            self.features.enable(Feature::WindowsSandboxElevated);
        } else {
@@ -1862,6 +1874,7 @@ network_access = false  # This should be ignored.
        let resolution = sandbox_full_access_cfg.derive_sandbox_policy(
            sandbox_mode_override,
            None,
+            WindowsSandboxLevel::Disabled,
            &PathBuf::from("/tmp/test"),
        );
        assert_eq!(
@@ -1885,6 +1898,7 @@ network_access = true  # This should be ignored.
        let resolution = sandbox_read_only_cfg.derive_sandbox_policy(
            sandbox_mode_override,
            None,
+            WindowsSandboxLevel::Disabled,
            &PathBuf::from("/tmp/test"),
        );
        assert_eq!(
@@ -1916,6 +1930,7 @@ exclude_slash_tmp = true
        let resolution = sandbox_workspace_write_cfg.derive_sandbox_policy(
            sandbox_mode_override,
            None,
+            WindowsSandboxLevel::Disabled,
            &PathBuf::from("/tmp/test"),
        );
        if cfg!(target_os = "windows") {
@@ -1964,6 +1979,7 @@ trust_level = "trusted"
        let resolution = sandbox_workspace_write_cfg.derive_sandbox_policy(
            sandbox_mode_override,
            None,
+            WindowsSandboxLevel::Disabled,
            &PathBuf::from("/tmp/test"),
        );
        if cfg!(target_os = "windows") {
@@ -2255,12 +2271,15 @@ trust_level = "trusted"
    }

    #[test]
-    fn web_search_mode_uses_none_if_unset() {
+    fn web_search_mode_defaults_to_cached_if_unset() {
        let cfg = ConfigToml::default();
        let profile = ConfigProfile::default();
        let features = Features::with_defaults();

-        assert_eq!(resolve_web_search_mode(&cfg, &profile, &features), None);
+        assert_eq!(
+            resolve_web_search_mode(&cfg, &profile, &features, &SandboxPolicy::ReadOnly),
+            WebSearchMode::Cached
+        );
    }

    #[test]
@@ -2274,8 +2293,8 @@ trust_level = "trusted"
        features.enable(Feature::WebSearchCached);

        assert_eq!(
-            resolve_web_search_mode(&cfg, &profile, &features),
-            Some(WebSearchMode::Live)
+            resolve_web_search_mode(&cfg, &profile, &features, &SandboxPolicy::ReadOnly),
+            WebSearchMode::Live
        );
    }

@@ -2290,11 +2309,50 @@ trust_level = "trusted"
        features.enable(Feature::WebSearchRequest);

        assert_eq!(
-            resolve_web_search_mode(&cfg, &profile, &features),
-            Some(WebSearchMode::Disabled)
+            resolve_web_search_mode(&cfg, &profile, &features, &SandboxPolicy::ReadOnly),
+            WebSearchMode::Disabled
        );
    }

+    #[test]
+    fn danger_full_access_defaults_web_search_live_when_unset() -> std::io::Result<()> {
+        let codex_home = TempDir::new()?;
+        let cfg = ConfigToml {
+            sandbox_mode: Some(SandboxMode::DangerFullAccess),
+            ..Default::default()
+        };
+
+        let config = Config::load_from_base_config_with_overrides(
+            cfg,
+            ConfigOverrides::default(),
+            codex_home.path().to_path_buf(),
+        )?;
+
+        assert_eq!(config.web_search_mode, WebSearchMode::Live);
+
+        Ok(())
+    }
+
+    #[test]
+    fn explicit_web_search_mode_wins_in_danger_full_access() -> std::io::Result<()> {
+        let codex_home = TempDir::new()?;
+        let cfg = ConfigToml {
+            sandbox_mode: Some(SandboxMode::DangerFullAccess),
+            web_search: Some(WebSearchMode::Cached),
+            ..Default::default()
+        };
+
+        let config = Config::load_from_base_config_with_overrides(
+            cfg,
+            ConfigOverrides::default(),
+            codex_home.path().to_path_buf(),
+        )?;
+
+        assert_eq!(config.web_search_mode, WebSearchMode::Cached);
+
+        Ok(())
+    }
+
    #[test]
    fn profile_legacy_toggles_override_base() -> std::io::Result<()> {
        let codex_home = TempDir::new()?;
@@ -3728,10 +3786,11 @@ model_verbosity = "high"
                forced_chatgpt_workspace_id: None,
                forced_login_method: None,
                include_apply_patch_tool: false,
-                web_search_mode: None,
+                web_search_mode: WebSearchMode::Cached,
                use_experimental_unified_exec_tool: false,
                ghost_snapshot: GhostSnapshotConfig::default(),
                features: Features::with_defaults(),
+                suppress_unstable_features_warning: false,
                active_profile: Some("o3".to_string()),
                active_project: ProjectConfig { trust_level: None },
                windows_wsl_setup_acknowledged: false,
@@ -3810,10 +3869,11 @@ model_verbosity = "high"
            forced_chatgpt_workspace_id: None,
            forced_login_method: None,
            include_apply_patch_tool: false,
-            web_search_mode: None,
+            web_search_mode: WebSearchMode::Cached,
            use_experimental_unified_exec_tool: false,
            ghost_snapshot: GhostSnapshotConfig::default(),
            features: Features::with_defaults(),
+            suppress_unstable_features_warning: false,
            active_profile: Some("gpt3".to_string()),
            active_project: ProjectConfig { trust_level: None },
            windows_wsl_setup_acknowledged: false,
@@ -3907,10 +3967,11 @@ model_verbosity = "high"
            forced_chatgpt_workspace_id: None,
            forced_login_method: None,
            include_apply_patch_tool: false,
-            web_search_mode: None,
+            web_search_mode: WebSearchMode::Cached,
            use_experimental_unified_exec_tool: false,
            ghost_snapshot: GhostSnapshotConfig::default(),
            features: Features::with_defaults(),
+            suppress_unstable_features_warning: false,
            active_profile: Some("zdr".to_string()),
            active_project: ProjectConfig { trust_level: None },
            windows_wsl_setup_acknowledged: false,
@@ -3990,10 +4051,11 @@ model_verbosity = "high"
            forced_chatgpt_workspace_id: None,
            forced_login_method: None,
            include_apply_patch_tool: false,
-            web_search_mode: None,
+            web_search_mode: WebSearchMode::Cached,
            use_experimental_unified_exec_tool: false,
            ghost_snapshot: GhostSnapshotConfig::default(),
            features: Features::with_defaults(),
+            suppress_unstable_features_warning: false,
            active_profile: Some("gpt5".to_string()),
            active_project: ProjectConfig { trust_level: None },
            windows_wsl_setup_acknowledged: false,
@@ -4174,7 +4236,12 @@ trust_level = "untrusted"
        let cfg = toml::from_str::<ConfigToml>(config_with_untrusted)
            .expect("TOML deserialization should succeed");

-        let resolution = cfg.derive_sandbox_policy(None, None, &PathBuf::from("/tmp/test"));
+        let resolution = cfg.derive_sandbox_policy(
+            None,
+            None,
+            WindowsSandboxLevel::Disabled,
+            &PathBuf::from("/tmp/test"),
+        );

        // Verify that untrusted projects get WorkspaceWrite (or ReadOnly on Windows due to downgrade)
        if cfg!(target_os = "windows") {
--- a/codex-rs/core/src/event_mapping.rs
+++ b/codex-rs/core/src/event_mapping.rs
@@ -21,6 +21,7 @@ use crate::instructions::SkillInstructions;
 use crate::instructions::UserInstructions;
 use crate::session_prefix::is_session_prefix;
 use crate::user_shell_command::is_user_shell_command_text;
+use crate::web_search::web_search_action_detail;

 fn parse_user_message(message: &[ContentItem]) -> Option<UserMessageItem> {
    if UserInstructions::is_user_instructions(message)
@@ -127,14 +128,17 @@ pub fn parse_turn_item(item: &ResponseItem) -> Option<TurnItem> {
                raw_content,
            }))
        }
-        ResponseItem::WebSearchCall {
-            id,
-            action: WebSearchAction::Search { query },
-            ..
-        } => Some(TurnItem::WebSearch(WebSearchItem {
-            id: id.clone().unwrap_or_default(),
-            query: query.clone().unwrap_or_default(),
-        })),
+        ResponseItem::WebSearchCall { id, action, .. } => {
+            let (action, query) = match action {
+                Some(action) => (action.clone(), web_search_action_detail(action)),
+                None => (WebSearchAction::Other, String::new()),
+            };
+            Some(TurnItem::WebSearch(WebSearchItem {
+                id: id.clone().unwrap_or_default(),
+                query,
+                action,
+            }))
+        }
        _ => None,
    }
 }
@@ -144,6 +148,7 @@ mod tests {
    use super::parse_turn_item;
    use codex_protocol::items::AgentMessageContent;
    use codex_protocol::items::TurnItem;
+    use codex_protocol::items::WebSearchItem;
    use codex_protocol::models::ContentItem;
    use codex_protocol::models::ReasoningItemContent;
    use codex_protocol::models::ReasoningItemReasoningSummary;
@@ -419,18 +424,102 @@ mod tests {
        let item = ResponseItem::WebSearchCall {
            id: Some("ws_1".to_string()),
            status: Some("completed".to_string()),
-            action: WebSearchAction::Search {
+            action: Some(WebSearchAction::Search {
                query: Some("weather".to_string()),
-            },
+            }),
        };

        let turn_item = parse_turn_item(&item).expect("expected web search turn item");

        match turn_item {
-            TurnItem::WebSearch(search) => {
-                assert_eq!(search.id, "ws_1");
-                assert_eq!(search.query, "weather");
-            }
+            TurnItem::WebSearch(search) => assert_eq!(
+                search,
+                WebSearchItem {
+                    id: "ws_1".to_string(),
+                    query: "weather".to_string(),
+                    action: WebSearchAction::Search {
+                        query: Some("weather".to_string()),
+                    },
+                }
+            ),
+            other => panic!("expected TurnItem::WebSearch, got {other:?}"),
+        }
+    }
+
+    #[test]
+    fn parses_web_search_open_page_call() {
+        let item = ResponseItem::WebSearchCall {
+            id: Some("ws_open".to_string()),
+            status: Some("completed".to_string()),
+            action: Some(WebSearchAction::OpenPage {
+                url: Some("https://example.com".to_string()),
+            }),
+        };
+
+        let turn_item = parse_turn_item(&item).expect("expected web search turn item");
+
+        match turn_item {
+            TurnItem::WebSearch(search) => assert_eq!(
+                search,
+                WebSearchItem {
+                    id: "ws_open".to_string(),
+                    query: "https://example.com".to_string(),
+                    action: WebSearchAction::OpenPage {
+                        url: Some("https://example.com".to_string()),
+                    },
+                }
+            ),
+            other => panic!("expected TurnItem::WebSearch, got {other:?}"),
+        }
+    }
+
+    #[test]
+    fn parses_web_search_find_in_page_call() {
+        let item = ResponseItem::WebSearchCall {
+            id: Some("ws_find".to_string()),
+            status: Some("completed".to_string()),
+            action: Some(WebSearchAction::FindInPage {
+                url: Some("https://example.com".to_string()),
+                pattern: Some("needle".to_string()),
+            }),
+        };
+
+        let turn_item = parse_turn_item(&item).expect("expected web search turn item");
+
+        match turn_item {
+            TurnItem::WebSearch(search) => assert_eq!(
+                search,
+                WebSearchItem {
+                    id: "ws_find".to_string(),
+                    query: "'needle' in https://example.com".to_string(),
+                    action: WebSearchAction::FindInPage {
+                        url: Some("https://example.com".to_string()),
+                        pattern: Some("needle".to_string()),
+                    },
+                }
+            ),
+            other => panic!("expected TurnItem::WebSearch, got {other:?}"),
+        }
+    }
+
+    #[test]
+    fn parses_partial_web_search_call_without_action_as_other() {
+        let item = ResponseItem::WebSearchCall {
+            id: Some("ws_partial".to_string()),
+            status: Some("in_progress".to_string()),
+            action: None,
+        };
+
+        let turn_item = parse_turn_item(&item).expect("expected web search turn item");
+        match turn_item {
+            TurnItem::WebSearch(search) => assert_eq!(
+                search,
+                WebSearchItem {
+                    id: "ws_partial".to_string(),
+                    query: String::new(),
+                    action: WebSearchAction::Other,
+                }
+            ),
            other => panic!("expected TurnItem::WebSearch, got {other:?}"),
        }
    }
--- a/codex-rs/core/src/exec.rs
+++ b/codex-rs/core/src/exec.rs
@@ -64,6 +64,7 @@ pub struct ExecParams {
    pub expiration: ExecExpiration,
    pub env: HashMap<String, String>,
    pub sandbox_permissions: SandboxPermissions,
+    pub windows_sandbox_level: codex_protocol::config_types::WindowsSandboxLevel,
    pub justification: Option<String>,
    pub arg0: Option<String>,
 }
@@ -141,11 +142,15 @@ pub async fn process_exec_tool_call(
    codex_linux_sandbox_exe: &Option<PathBuf>,
    stdout_stream: Option<StdoutStream>,
 ) -> Result<ExecToolCallOutput> {
+    let windows_sandbox_level = params.windows_sandbox_level;
    let sandbox_type = match &sandbox_policy {
        SandboxPolicy::DangerFullAccess | SandboxPolicy::ExternalSandbox { .. } => {
            SandboxType::None
        }
-        _ => get_platform_sandbox().unwrap_or(SandboxType::None),
+        _ => get_platform_sandbox(
+            windows_sandbox_level != codex_protocol::config_types::WindowsSandboxLevel::Disabled,
+        )
+        .unwrap_or(SandboxType::None),
    };
    tracing::debug!("Sandbox type: {sandbox_type:?}");

@@ -155,6 +160,7 @@ pub async fn process_exec_tool_call(
        expiration,
        env,
        sandbox_permissions,
+        windows_sandbox_level,
        justification,
        arg0: _,
    } = params;
@@ -184,6 +190,7 @@ pub async fn process_exec_tool_call(
            sandbox_type,
            sandbox_cwd,
            codex_linux_sandbox_exe.as_ref(),
+            windows_sandbox_level,
        )
        .map_err(CodexErr::from)?;

@@ -202,6 +209,7 @@ pub(crate) async fn execute_exec_env(
        env,
        expiration,
        sandbox,
+        windows_sandbox_level,
        sandbox_permissions,
        justification,
        arg0,
@@ -213,6 +221,7 @@ pub(crate) async fn execute_exec_env(
        expiration,
        env,
        sandbox_permissions,
+        windows_sandbox_level,
        justification,
        arg0,
    };
@@ -229,7 +238,7 @@ async fn exec_windows_sandbox(
    sandbox_policy: &SandboxPolicy,
 ) -> Result<RawExecToolCallOutput> {
    use crate::config::find_codex_home;
-    use crate::safety::is_windows_elevated_sandbox_enabled;
+    use codex_protocol::config_types::WindowsSandboxLevel;
    use codex_windows_sandbox::run_windows_sandbox_capture;
    use codex_windows_sandbox::run_windows_sandbox_capture_elevated;

@@ -238,6 +247,7 @@ async fn exec_windows_sandbox(
        cwd,
        env,
        expiration,
+        windows_sandbox_level,
        ..
    } = params;
    // TODO(iceweasel-oai): run_windows_sandbox_capture should support all
@@ -255,7 +265,7 @@ async fn exec_windows_sandbox(
            "windows sandbox: failed to resolve codex_home: {err}"
        )))
    })?;
-    let use_elevated = is_windows_elevated_sandbox_enabled();
+    let use_elevated = matches!(windows_sandbox_level, WindowsSandboxLevel::Elevated);
    let spawn_res = tokio::task::spawn_blocking(move || {
        if use_elevated {
            run_windows_sandbox_capture_elevated(
@@ -312,20 +322,7 @@ async fn exec_windows_sandbox(
        text: stderr_text,
        truncated_after_lines: None,
    };
-    // Best-effort aggregate: stdout then stderr (capped).
-    let mut aggregated = Vec::with_capacity(
-        stdout
-            .text
-            .len()
-            .saturating_add(stderr.text.len())
-            .min(EXEC_OUTPUT_MAX_BYTES),
-    );
-    append_capped(&mut aggregated, &stdout.text, EXEC_OUTPUT_MAX_BYTES);
-    append_capped(&mut aggregated, &stderr.text, EXEC_OUTPUT_MAX_BYTES);
-    let aggregated_output = StreamOutput {
-        text: aggregated,
-        truncated_after_lines: None,
-    };
+    let aggregated_output = aggregate_output(&stdout, &stderr);

    Ok(RawExecToolCallOutput {
        exit_status,
@@ -519,6 +516,39 @@ fn append_capped(dst: &mut Vec<u8>, src: &[u8], max_bytes: usize) {
    dst.extend_from_slice(&src[..take]);
 }

+fn aggregate_output(
+    stdout: &StreamOutput<Vec<u8>>,
+    stderr: &StreamOutput<Vec<u8>>,
+) -> StreamOutput<Vec<u8>> {
+    let total_len = stdout.text.len().saturating_add(stderr.text.len());
+    let max_bytes = EXEC_OUTPUT_MAX_BYTES;
+    let mut aggregated = Vec::with_capacity(total_len.min(max_bytes));
+
+    if total_len <= max_bytes {
+        aggregated.extend_from_slice(&stdout.text);
+        aggregated.extend_from_slice(&stderr.text);
+        return StreamOutput {
+            text: aggregated,
+            truncated_after_lines: None,
+        };
+    }
+
+    // Under contention, reserve 1/3 for stdout and 2/3 for stderr; rebalance unused stderr to stdout.
+    let want_stdout = stdout.text.len().min(max_bytes / 3);
+    let want_stderr = stderr.text.len();
+    let stderr_take = want_stderr.min(max_bytes.saturating_sub(want_stdout));
+    let remaining = max_bytes.saturating_sub(want_stdout + stderr_take);
+    let stdout_take = want_stdout + remaining.min(stdout.text.len().saturating_sub(want_stdout));
+
+    aggregated.extend_from_slice(&stdout.text[..stdout_take]);
+    aggregated.extend_from_slice(&stderr.text[..stderr_take]);
+
+    StreamOutput {
+        text: aggregated,
+        truncated_after_lines: None,
+    }
+}
+
 #[derive(Clone, Debug)]
 pub struct ExecToolCallOutput {
    pub exit_code: i32,
@@ -564,6 +594,7 @@ async fn exec(
        env,
        arg0,
        expiration,
+        windows_sandbox_level: _,
        ..
    } = params;

@@ -683,20 +714,7 @@ async fn consume_truncated_output(
        Duration::from_millis(IO_DRAIN_TIMEOUT_MS),
    )
    .await?;
-    // Best-effort aggregate: stdout then stderr (capped).
-    let mut aggregated = Vec::with_capacity(
-        stdout
-            .text
-            .len()
-            .saturating_add(stderr.text.len())
-            .min(EXEC_OUTPUT_MAX_BYTES),
-    );
-    append_capped(&mut aggregated, &stdout.text, EXEC_OUTPUT_MAX_BYTES);
-    append_capped(&mut aggregated, &stderr.text, EXEC_OUTPUT_MAX_BYTES * 2);
-    let aggregated_output = StreamOutput {
-        text: aggregated,
-        truncated_after_lines: None,
-    };
+    let aggregated_output = aggregate_output(&stdout, &stderr);

    Ok(RawExecToolCallOutput {
        exit_status,
@@ -771,6 +789,7 @@ fn synthetic_exit_status(code: i32) -> ExitStatus {
 #[cfg(test)]
 mod tests {
    use super::*;
+    use pretty_assertions::assert_eq;
    use std::time::Duration;
    use tokio::io::AsyncWriteExt;

@@ -846,6 +865,85 @@ mod tests {
        assert_eq!(out.text.len(), EXEC_OUTPUT_MAX_BYTES);
    }

+    #[test]
+    fn aggregate_output_prefers_stderr_on_contention() {
+        let stdout = StreamOutput {
+            text: vec![b'a'; EXEC_OUTPUT_MAX_BYTES],
+            truncated_after_lines: None,
+        };
+        let stderr = StreamOutput {
+            text: vec![b'b'; EXEC_OUTPUT_MAX_BYTES],
+            truncated_after_lines: None,
+        };
+
+        let aggregated = aggregate_output(&stdout, &stderr);
+        let stdout_cap = EXEC_OUTPUT_MAX_BYTES / 3;
+        let stderr_cap = EXEC_OUTPUT_MAX_BYTES.saturating_sub(stdout_cap);
+
+        assert_eq!(aggregated.text.len(), EXEC_OUTPUT_MAX_BYTES);
+        assert_eq!(aggregated.text[..stdout_cap], vec![b'a'; stdout_cap]);
+        assert_eq!(aggregated.text[stdout_cap..], vec![b'b'; stderr_cap]);
+    }
+
+    #[test]
+    fn aggregate_output_fills_remaining_capacity_with_stderr() {
+        let stdout_len = EXEC_OUTPUT_MAX_BYTES / 10;
+        let stdout = StreamOutput {
+            text: vec![b'a'; stdout_len],
+            truncated_after_lines: None,
+        };
+        let stderr = StreamOutput {
+            text: vec![b'b'; EXEC_OUTPUT_MAX_BYTES],
+            truncated_after_lines: None,
+        };
+
+        let aggregated = aggregate_output(&stdout, &stderr);
+        let stderr_cap = EXEC_OUTPUT_MAX_BYTES.saturating_sub(stdout_len);
+
+        assert_eq!(aggregated.text.len(), EXEC_OUTPUT_MAX_BYTES);
+        assert_eq!(aggregated.text[..stdout_len], vec![b'a'; stdout_len]);
+        assert_eq!(aggregated.text[stdout_len..], vec![b'b'; stderr_cap]);
+    }
+
+    #[test]
+    fn aggregate_output_rebalances_when_stderr_is_small() {
+        let stdout = StreamOutput {
+            text: vec![b'a'; EXEC_OUTPUT_MAX_BYTES],
+            truncated_after_lines: None,
+        };
+        let stderr = StreamOutput {
+            text: vec![b'b'; 1],
+            truncated_after_lines: None,
+        };
+
+        let aggregated = aggregate_output(&stdout, &stderr);
+        let stdout_len = EXEC_OUTPUT_MAX_BYTES.saturating_sub(1);
+
+        assert_eq!(aggregated.text.len(), EXEC_OUTPUT_MAX_BYTES);
+        assert_eq!(aggregated.text[..stdout_len], vec![b'a'; stdout_len]);
+        assert_eq!(aggregated.text[stdout_len..], vec![b'b'; 1]);
+    }
+
+    #[test]
+    fn aggregate_output_keeps_stdout_then_stderr_when_under_cap() {
+        let stdout = StreamOutput {
+            text: vec![b'a'; 4],
+            truncated_after_lines: None,
+        };
+        let stderr = StreamOutput {
+            text: vec![b'b'; 3],
+            truncated_after_lines: None,
+        };
+
+        let aggregated = aggregate_output(&stdout, &stderr);
+        let mut expected = Vec::new();
+        expected.extend_from_slice(&stdout.text);
+        expected.extend_from_slice(&stderr.text);
+
+        assert_eq!(aggregated.text, expected);
+        assert_eq!(aggregated.truncated_after_lines, None);
+    }
+
    #[cfg(unix)]
    #[test]
    fn sandbox_detection_flags_sigsys_exit_code() {
@@ -878,6 +976,7 @@ mod tests {
            expiration: 500.into(),
            env,
            sandbox_permissions: SandboxPermissions::UseDefault,
+            windows_sandbox_level: codex_protocol::config_types::WindowsSandboxLevel::Disabled,
            justification: None,
            arg0: None,
        };
@@ -923,6 +1022,7 @@ mod tests {
            expiration: ExecExpiration::Cancellation(cancel_token),
            env,
            sandbox_permissions: SandboxPermissions::UseDefault,
+            windows_sandbox_level: codex_protocol::config_types::WindowsSandboxLevel::Disabled,
            justification: None,
            arg0: None,
        };
--- a/codex-rs/core/src/features.rs
+++ b/codex-rs/core/src/features.rs
@@ -5,14 +5,20 @@
 //! booleans through multiple types, call sites consult a single `Features`
 //! container attached to `Config`.

+use crate::config::CONFIG_TOML_FILE;
+use crate::config::Config;
 use crate::config::ConfigToml;
 use crate::config::profile::ConfigProfile;
+use crate::protocol::Event;
+use crate::protocol::EventMsg;
+use crate::protocol::WarningEvent;
 use codex_otel::OtelManager;
 use schemars::JsonSchema;
 use serde::Deserialize;
 use serde::Serialize;
 use std::collections::BTreeMap;
 use std::collections::BTreeSet;
+use toml::Value as TomlValue;

 mod legacy;
 pub(crate) use legacy::LegacyFeatureToggles;
@@ -466,3 +472,54 @@ pub const FEATURES: &[FeatureSpec] = &[
        default_enabled: false,
    },
 ];
+
+/// Push a warning event if any under-development features are enabled.
+pub fn maybe_push_unstable_features_warning(
+    config: &Config,
+    post_session_configured_events: &mut Vec<Event>,
+) {
+    if config.suppress_unstable_features_warning {
+        return;
+    }
+
+    let mut under_development_feature_keys = Vec::new();
+    if let Some(table) = config
+        .config_layer_stack
+        .effective_config()
+        .get("features")
+        .and_then(TomlValue::as_table)
+    {
+        for (key, value) in table {
+            if value.as_bool() != Some(true) {
+                continue;
+            }
+            let Some(spec) = FEATURES.iter().find(|spec| spec.key == key.as_str()) else {
+                continue;
+            };
+            if !config.features.enabled(spec.id) {
+                continue;
+            }
+            if matches!(spec.stage, Stage::UnderDevelopment) {
+                under_development_feature_keys.push(spec.key.to_string());
+            }
+        }
+    }
+
+    if under_development_feature_keys.is_empty() {
+        return;
+    }
+
+    let under_development_feature_keys = under_development_feature_keys.join(", ");
+    let config_path = config
+        .codex_home
+        .join(CONFIG_TOML_FILE)
+        .display()
+        .to_string();
+    let message = format!(
+        "Under-development features enabled: {under_development_feature_keys}. Under-development features are incomplete and may behave unpredictably. To suppress this warning, set `suppress_unstable_features_warning = true` in {config_path}."
+    );
+    post_session_configured_events.push(Event {
+        id: "".to_owned(),
+        msg: EventMsg::Warning(WarningEvent { message }),
+    });
+}
--- a/codex-rs/core/src/lib.rs
+++ b/codex-rs/core/src/lib.rs
@@ -69,6 +69,7 @@ mod event_mapping;
 pub mod review_format;
 pub mod review_prompts;
 mod thread_manager;
+pub mod web_search;
 pub use codex_protocol::protocol::InitialHistory;
 pub use thread_manager::NewThread;
 pub use thread_manager::ThreadManager;
@@ -125,9 +126,6 @@ pub use exec_policy::ExecPolicyError;
 pub use exec_policy::check_execpolicy_for_warnings;
 pub use exec_policy::load_exec_policy;
 pub use safety::get_platform_sandbox;
-pub use safety::is_windows_elevated_sandbox_enabled;
-pub use safety::set_windows_elevated_sandbox_enabled;
-pub use safety::set_windows_sandbox_enabled;
 pub use tools::spec::parse_tool_input_schema;
 // Re-export the protocol types from the standalone `codex-protocol` crate so existing
 // `codex_core::protocol::...` references continue to work across the workspace.
--- a/codex-rs/core/src/models_manager/collaboration_mode_presets.rs
+++ b/codex-rs/core/src/models_manager/collaboration_mode_presets.rs
@@ -28,7 +28,7 @@ fn plan_preset() -> CollaborationModeMask {
        name: "Plan".to_string(),
        mode: Some(ModeKind::Plan),
        model: None,
-        reasoning_effort: Some(Some(ReasoningEffort::High)),
+        reasoning_effort: Some(Some(ReasoningEffort::Medium)),
        developer_instructions: Some(Some(COLLABORATION_MODE_PLAN.to_string())),
    }
 }
--- a/codex-rs/core/src/rollout/policy.rs
+++ b/codex-rs/core/src/rollout/policy.rs
@@ -42,7 +42,8 @@ pub(crate) fn should_persist_event_msg(ev: &EventMsg) -> bool {
        | EventMsg::AgentReasoning(_)
        | EventMsg::AgentReasoningRawContent(_)
        | EventMsg::TokenCount(_)
-        | EventMsg::ContextCompacted(_)
+        | EventMsg::ContextCompactionStarted(_)
+        | EventMsg::ContextCompactionEnded(_)
        | EventMsg::EnteredReviewMode(_)
        | EventMsg::ExitedReviewMode(_)
        | EventMsg::ThreadRolledBack(_)
--- a/codex-rs/core/src/safety.rs
+++ b/codex-rs/core/src/safety.rs
@@ -10,45 +10,7 @@ use crate::util::resolve_path;

 use crate::protocol::AskForApproval;
 use crate::protocol::SandboxPolicy;
-
-#[cfg(target_os = "windows")]
-use std::sync::atomic::AtomicBool;
-#[cfg(target_os = "windows")]
-use std::sync::atomic::Ordering;
-
-#[cfg(target_os = "windows")]
-static WINDOWS_SANDBOX_ENABLED: AtomicBool = AtomicBool::new(false);
-#[cfg(target_os = "windows")]
-static WINDOWS_ELEVATED_SANDBOX_ENABLED: AtomicBool = AtomicBool::new(false);
-
-#[cfg(target_os = "windows")]
-pub fn set_windows_sandbox_enabled(enabled: bool) {
-    WINDOWS_SANDBOX_ENABLED.store(enabled, Ordering::Relaxed);
-}
-
-#[cfg(not(target_os = "windows"))]
-#[allow(dead_code)]
-pub fn set_windows_sandbox_enabled(_enabled: bool) {}
-
-#[cfg(target_os = "windows")]
-pub fn set_windows_elevated_sandbox_enabled(enabled: bool) {
-    WINDOWS_ELEVATED_SANDBOX_ENABLED.store(enabled, Ordering::Relaxed);
-}
-
-#[cfg(not(target_os = "windows"))]
-#[allow(dead_code)]
-pub fn set_windows_elevated_sandbox_enabled(_enabled: bool) {}
-
-#[cfg(target_os = "windows")]
-pub fn is_windows_elevated_sandbox_enabled() -> bool {
-    WINDOWS_ELEVATED_SANDBOX_ENABLED.load(Ordering::Relaxed)
-}
-
-#[cfg(not(target_os = "windows"))]
-#[allow(dead_code)]
-pub fn is_windows_elevated_sandbox_enabled() -> bool {
-    false
-}
+use codex_protocol::config_types::WindowsSandboxLevel;

 #[derive(Debug, PartialEq)]
 pub enum SafetyCheck {
@@ -67,6 +29,7 @@ pub fn assess_patch_safety(
    policy: AskForApproval,
    sandbox_policy: &SandboxPolicy,
    cwd: &Path,
+    windows_sandbox_level: WindowsSandboxLevel,
 ) -> SafetyCheck {
    if action.is_empty() {
        return SafetyCheck::Reject {
@@ -104,7 +67,7 @@ pub fn assess_patch_safety(
            // Only auto‑approve when we can actually enforce a sandbox. Otherwise
            // fall back to asking the user because the patch may touch arbitrary
            // paths outside the project.
-            match get_platform_sandbox() {
+            match get_platform_sandbox(windows_sandbox_level != WindowsSandboxLevel::Disabled) {
                Some(sandbox_type) => SafetyCheck::AutoApprove {
                    sandbox_type,
                    user_explicitly_approved: false,
@@ -122,19 +85,17 @@ pub fn assess_patch_safety(
    }
 }

-pub fn get_platform_sandbox() -> Option<SandboxType> {
+pub fn get_platform_sandbox(windows_sandbox_enabled: bool) -> Option<SandboxType> {
    if cfg!(target_os = "macos") {
        Some(SandboxType::MacosSeatbelt)
    } else if cfg!(target_os = "linux") {
        Some(SandboxType::LinuxSeccomp)
    } else if cfg!(target_os = "windows") {
-        #[cfg(target_os = "windows")]
-        {
-            if WINDOWS_SANDBOX_ENABLED.load(Ordering::Relaxed) {
-                return Some(SandboxType::WindowsRestrictedToken);
-            }
+        if windows_sandbox_enabled {
+            Some(SandboxType::WindowsRestrictedToken)
+        } else {
+            None
        }
-        None
    } else {
        None
    }
@@ -277,7 +238,13 @@ mod tests {
        };

        assert_eq!(
-            assess_patch_safety(&add_inside, AskForApproval::OnRequest, &policy, &cwd),
+            assess_patch_safety(
+                &add_inside,
+                AskForApproval::OnRequest,
+                &policy,
+                &cwd,
+                WindowsSandboxLevel::Disabled
+            ),
            SafetyCheck::AutoApprove {
                sandbox_type: SandboxType::None,
                user_explicitly_approved: false,
--- a/codex-rs/core/src/sandboxing/mod.rs
+++ b/codex-rs/core/src/sandboxing/mod.rs
@@ -21,6 +21,7 @@ use crate::seatbelt::create_seatbelt_command_args;
 use crate::spawn::CODEX_SANDBOX_ENV_VAR;
 use crate::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR;
 use crate::tools::sandboxing::SandboxablePreference;
+use codex_protocol::config_types::WindowsSandboxLevel;
 pub use codex_protocol::models::SandboxPermissions;
 use std::collections::HashMap;
 use std::path::Path;
@@ -44,6 +45,7 @@ pub struct ExecEnv {
    pub env: HashMap<String, String>,
    pub expiration: ExecExpiration,
    pub sandbox: SandboxType,
+    pub windows_sandbox_level: WindowsSandboxLevel,
    pub sandbox_permissions: SandboxPermissions,
    pub justification: Option<String>,
    pub arg0: Option<String>,
@@ -76,19 +78,26 @@ impl SandboxManager {
        &self,
        policy: &SandboxPolicy,
        pref: SandboxablePreference,
+        windows_sandbox_level: WindowsSandboxLevel,
    ) -> SandboxType {
        match pref {
            SandboxablePreference::Forbid => SandboxType::None,
            SandboxablePreference::Require => {
                // Require a platform sandbox when available; on Windows this
                // respects the experimental_windows_sandbox feature.
-                crate::safety::get_platform_sandbox().unwrap_or(SandboxType::None)
+                crate::safety::get_platform_sandbox(
+                    windows_sandbox_level != WindowsSandboxLevel::Disabled,
+                )
+                .unwrap_or(SandboxType::None)
            }
            SandboxablePreference::Auto => match policy {
                SandboxPolicy::DangerFullAccess | SandboxPolicy::ExternalSandbox { .. } => {
                    SandboxType::None
                }
-                _ => crate::safety::get_platform_sandbox().unwrap_or(SandboxType::None),
+                _ => crate::safety::get_platform_sandbox(
+                    windows_sandbox_level != WindowsSandboxLevel::Disabled,
+                )
+                .unwrap_or(SandboxType::None),
            },
        }
    }
@@ -100,6 +109,7 @@ impl SandboxManager {
        sandbox: SandboxType,
        sandbox_policy_cwd: &Path,
        codex_linux_sandbox_exe: Option<&PathBuf>,
+        windows_sandbox_level: WindowsSandboxLevel,
    ) -> Result<ExecEnv, SandboxTransformError> {
        let mut env = spec.env;
        if !policy.has_full_network_access() {
@@ -160,6 +170,7 @@ impl SandboxManager {
            env,
            expiration: spec.expiration,
            sandbox,
+            windows_sandbox_level,
            sandbox_permissions: spec.sandbox_permissions,
            justification: spec.justification,
            arg0: arg0_override,
--- a/codex-rs/core/src/skills/loader.rs
+++ b/codex-rs/core/src/skills/loader.rs
@@ -35,7 +35,7 @@ struct SkillFrontmatterMetadata {
 }

 #[derive(Debug, Default, Deserialize)]
-struct SkillToml {
+struct SkillMetadataFile {
    #[serde(default)]
    interface: Option<Interface>,
 }
@@ -51,7 +51,7 @@ struct Interface {
 }

 const SKILLS_FILENAME: &str = "SKILL.md";
-const SKILLS_TOML_FILENAME: &str = "SKILL.toml";
+const SKILLS_JSON_FILENAME: &str = "SKILL.json";
 const SKILLS_DIR_NAME: &str = "skills";
 const MAX_NAME_LEN: usize = 64;
 const MAX_DESCRIPTION_LEN: usize = 1024;
@@ -370,9 +370,9 @@ fn parse_skill_file(path: &Path, scope: SkillScope) -> Result<SkillMetadata, Ski
 }

 fn load_skill_interface(skill_path: &Path) -> Option<SkillInterface> {
-    // Fail open: optional SKILL.toml metadata should not block loading SKILL.md.
+    // Fail open: optional interface metadata should not block loading SKILL.md.
    let skill_dir = skill_path.parent()?;
-    let interface_path = skill_dir.join(SKILLS_TOML_FILENAME);
+    let interface_path = skill_dir.join(SKILLS_JSON_FILENAME);
    if !interface_path.exists() {
        return None;
    }
@@ -381,17 +381,17 @@ fn load_skill_interface(skill_path: &Path) -> Option<SkillInterface> {
        Ok(contents) => contents,
        Err(error) => {
            tracing::warn!(
-                "ignoring {path}: failed to read SKILL.toml: {error}",
+                "ignoring {path}: failed to read SKILL.json: {error}",
                path = interface_path.display()
            );
            return None;
        }
    };
-    let parsed: SkillToml = match toml::from_str(&contents) {
+    let parsed: SkillMetadataFile = match serde_json::from_str(&contents) {
        Ok(parsed) => parsed,
        Err(error) => {
            tracing::warn!(
-                "ignoring {path}: invalid TOML: {error}",
+                "ignoring {path}: invalid JSON: {error}",
                path = interface_path.display()
            );
            return None;
@@ -756,7 +756,7 @@ mod tests {
    }

    fn write_skill_interface_at(skill_dir: &Path, contents: &str) -> PathBuf {
-        let path = skill_dir.join(SKILLS_TOML_FILENAME);
+        let path = skill_dir.join(SKILLS_JSON_FILENAME);
        fs::write(&path, contents).unwrap();
        path
    }
@@ -764,20 +764,23 @@ mod tests {
    #[tokio::test]
    async fn loads_skill_interface_metadata_happy_path() {
        let codex_home = tempfile::tempdir().expect("tempdir");
-        let skill_path = write_skill(&codex_home, "demo", "ui-skill", "from toml");
+        let skill_path = write_skill(&codex_home, "demo", "ui-skill", "from json");
        let skill_dir = skill_path.parent().expect("skill dir");
        let normalized_skill_dir = normalized(skill_dir);

        write_skill_interface_at(
            skill_dir,
            r##"
-[interface]
-display_name = "UI Skill"
-short_description = "  short    desc   "
-icon_small = "./assets/small-400px.png"
-icon_large = "./assets/large-logo.svg"
-brand_color = "#3B82F6"
-default_prompt = "  default   prompt   "
+{
+  "interface": {
+    "display_name": "UI Skill",
+    "short_description": "  short    desc   ",
+    "icon_small": "./assets/small-400px.png",
+    "icon_large": "./assets/large-logo.svg",
+    "brand_color": "#3B82F6",
+    "default_prompt": "  default   prompt   "
+  }
+}
 "##,
        );

@@ -793,7 +796,7 @@ default_prompt = "  default   prompt   "
            outcome.skills,
            vec![SkillMetadata {
                name: "ui-skill".to_string(),
-                description: "from toml".to_string(),
+                description: "from json".to_string(),
                short_description: None,
                interface: Some(SkillInterface {
                    display_name: Some("UI Skill".to_string()),
@@ -803,7 +806,7 @@ default_prompt = "  default   prompt   "
                    brand_color: Some("#3B82F6".to_string()),
                    default_prompt: Some("default prompt".to_string()),
                }),
-                path: normalized(&skill_path),
+                path: normalized(skill_path.as_path()),
                scope: SkillScope::User,
            }]
        );
@@ -812,17 +815,20 @@ default_prompt = "  default   prompt   "
    #[tokio::test]
    async fn accepts_icon_paths_under_assets_dir() {
        let codex_home = tempfile::tempdir().expect("tempdir");
-        let skill_path = write_skill(&codex_home, "demo", "ui-skill", "from toml");
+        let skill_path = write_skill(&codex_home, "demo", "ui-skill", "from json");
        let skill_dir = skill_path.parent().expect("skill dir");
        let normalized_skill_dir = normalized(skill_dir);

        write_skill_interface_at(
            skill_dir,
            r#"
-[interface]
-display_name = "UI Skill"
-icon_small = "assets/icon.png"
-icon_large = "./assets/logo.svg"
+{
+  "interface": {
+    "display_name": "UI Skill",
+    "icon_small": "assets/icon.png",
+    "icon_large": "./assets/logo.svg"
+  }
+}
 "#,
        );

@@ -838,7 +844,7 @@ icon_large = "./assets/logo.svg"
            outcome.skills,
            vec![SkillMetadata {
                name: "ui-skill".to_string(),
-                description: "from toml".to_string(),
+                description: "from json".to_string(),
                short_description: None,
                interface: Some(SkillInterface {
                    display_name: Some("UI Skill".to_string()),
@@ -857,14 +863,17 @@ icon_large = "./assets/logo.svg"
    #[tokio::test]
    async fn ignores_invalid_brand_color() {
        let codex_home = tempfile::tempdir().expect("tempdir");
-        let skill_path = write_skill(&codex_home, "demo", "ui-skill", "from toml");
+        let skill_path = write_skill(&codex_home, "demo", "ui-skill", "from json");
        let skill_dir = skill_path.parent().expect("skill dir");

        write_skill_interface_at(
            skill_dir,
            r#"
-[interface]
-brand_color = "blue"
+{
+  "interface": {
+    "brand_color": "blue"
+  }
+}
 "#,
        );

@@ -880,7 +889,7 @@ brand_color = "blue"
            outcome.skills,
            vec![SkillMetadata {
                name: "ui-skill".to_string(),
-                description: "from toml".to_string(),
+                description: "from json".to_string(),
                short_description: None,
                interface: None,
                path: normalized(&skill_path),
@@ -892,7 +901,7 @@ brand_color = "blue"
    #[tokio::test]
    async fn ignores_default_prompt_over_max_length() {
        let codex_home = tempfile::tempdir().expect("tempdir");
-        let skill_path = write_skill(&codex_home, "demo", "ui-skill", "from toml");
+        let skill_path = write_skill(&codex_home, "demo", "ui-skill", "from json");
        let skill_dir = skill_path.parent().expect("skill dir");
        let normalized_skill_dir = normalized(skill_dir);
        let too_long = "x".repeat(MAX_DEFAULT_PROMPT_LEN + 1);
@@ -901,10 +910,13 @@ brand_color = "blue"
            skill_dir,
            &format!(
                r##"
-[interface]
-display_name = "UI Skill"
-icon_small = "./assets/small-400px.png"
-default_prompt = "{too_long}"
+{{
+  "interface": {{
+    "display_name": "UI Skill",
+    "icon_small": "./assets/small-400px.png",
+    "default_prompt": "{too_long}"
+  }}
+}}
 "##
            ),
        );
@@ -921,7 +933,7 @@ default_prompt = "{too_long}"
            outcome.skills,
            vec![SkillMetadata {
                name: "ui-skill".to_string(),
-                description: "from toml".to_string(),
+                description: "from json".to_string(),
                short_description: None,
                interface: Some(SkillInterface {
                    display_name: Some("UI Skill".to_string()),
@@ -940,15 +952,18 @@ default_prompt = "{too_long}"
    #[tokio::test]
    async fn drops_interface_when_icons_are_invalid() {
        let codex_home = tempfile::tempdir().expect("tempdir");
-        let skill_path = write_skill(&codex_home, "demo", "ui-skill", "from toml");
+        let skill_path = write_skill(&codex_home, "demo", "ui-skill", "from json");
        let skill_dir = skill_path.parent().expect("skill dir");

        write_skill_interface_at(
            skill_dir,
            r#"
-[interface]
-icon_small = "icon.png"
-icon_large = "./assets/../logo.svg"
+{
+  "interface": {
+    "icon_small": "icon.png",
+    "icon_large": "./assets/../logo.svg"
+  }
+}
 "#,
        );

@@ -964,7 +979,7 @@ icon_large = "./assets/../logo.svg"
            outcome.skills,
            vec![SkillMetadata {
                name: "ui-skill".to_string(),
-                description: "from toml".to_string(),
+                description: "from json".to_string(),
                short_description: None,
                interface: None,
                path: normalized(&skill_path),
--- a/codex-rs/core/src/state/session.rs
+++ b/codex-rs/core/src/state/session.rs
@@ -15,6 +15,11 @@ pub(crate) struct SessionState {
    pub(crate) history: ContextManager,
    pub(crate) latest_rate_limits: Option<RateLimitSnapshot>,
    pub(crate) server_reasoning_included: bool,
+    /// Whether the session's initial context has been seeded into history.
+    ///
+    /// TODO(owen): This is a temporary solution to avoid updating a thread's updated_at
+    /// timestamp when resuming a session. Remove this once SQLite is in place.
+    pub(crate) initial_context_seeded: bool,
 }

 impl SessionState {
@@ -26,6 +31,7 @@ impl SessionState {
            history,
            latest_rate_limits: None,
            server_reasoning_included: false,
+            initial_context_seeded: false,
        }
    }

--- a/codex-rs/core/src/tasks/mod.rs
+++ b/codex-rs/core/src/tasks/mod.rs
@@ -115,6 +115,8 @@ impl Session {
        task: T,
    ) {
        self.abort_all_tasks(TurnAbortReason::Replaced).await;
+        self.seed_initial_context_if_needed(turn_context.as_ref())
+            .await;

        let task: Arc<dyn SessionTask> = Arc::new(task);
        let task_kind = task.kind();
--- a/codex-rs/core/src/tasks/review.rs
+++ b/codex-rs/core/src/tasks/review.rs
@@ -86,7 +86,7 @@ async fn start_review_conversation(
    let mut sub_agent_config = config.as_ref().clone();
    // Carry over review-only feature restrictions so the delegate cannot
    // re-enable blocked tools (web search, view image).
-    sub_agent_config.web_search_mode = Some(WebSearchMode::Disabled);
+    sub_agent_config.web_search_mode = WebSearchMode::Disabled;

    // Set explicit review rubric for the sub-agent
    sub_agent_config.base_instructions = Some(crate::REVIEW_PROMPT.to_string());
--- a/codex-rs/core/src/tasks/user_shell.rs
+++ b/codex-rs/core/src/tasks/user_shell.rs
@@ -109,6 +109,7 @@ impl SessionTask for UserShellCommandTask {
            // should use that instead of an "arbitrarily large" timeout here.
            expiration: USER_SHELL_TIMEOUT_MS.into(),
            sandbox: SandboxType::None,
+            windows_sandbox_level: turn_context.windows_sandbox_level,
            sandbox_permissions: SandboxPermissions::UseDefault,
            justification: None,
            arg0: None,
--- a/codex-rs/core/src/tools/handlers/request_user_input.rs
+++ b/codex-rs/core/src/tools/handlers/request_user_input.rs
@@ -36,12 +36,14 @@ impl ToolHandler for RequestUserInputHandler {
            }
        };

-        let disallowed_mode = match session.collaboration_mode().await.mode {
-            ModeKind::Execute => Some("Execute"),
-            ModeKind::Custom => Some("Custom"),
-            _ => None,
-        };
-        if let Some(mode_name) = disallowed_mode {
+        let mode = session.collaboration_mode().await.mode;
+        if !matches!(mode, ModeKind::Plan | ModeKind::PairProgramming) {
+            let mode_name = match mode {
+                ModeKind::Code => "Code",
+                ModeKind::Execute => "Execute",
+                ModeKind::Custom => "Custom",
+                ModeKind::Plan | ModeKind::PairProgramming => unreachable!(),
+            };
            return Err(FunctionCallError::RespondToModel(format!(
                "request_user_input is unavailable in {mode_name} mode"
            )));
--- a/codex-rs/core/src/tools/handlers/shell.rs
+++ b/codex-rs/core/src/tools/handlers/shell.rs
@@ -36,6 +36,7 @@ impl ShellHandler {
            expiration: params.timeout_ms.into(),
            env: create_env(&turn_context.shell_environment_policy),
            sandbox_permissions: params.sandbox_permissions.unwrap_or_default(),
+            windows_sandbox_level: turn_context.windows_sandbox_level,
            justification: params.justification,
            arg0: None,
        }
@@ -62,6 +63,7 @@ impl ShellCommandHandler {
            expiration: params.timeout_ms.into(),
            env: create_env(&turn_context.shell_environment_policy),
            sandbox_permissions: params.sandbox_permissions.unwrap_or_default(),
+            windows_sandbox_level: turn_context.windows_sandbox_level,
            justification: params.justification,
            arg0: None,
        }
--- a/codex-rs/core/src/tools/orchestrator.rs
+++ b/codex-rs/core/src/tools/orchestrator.rs
@@ -88,19 +88,22 @@ impl ToolOrchestrator {
        // 2) First attempt under the selected sandbox.
        let initial_sandbox = match tool.sandbox_mode_for_first_attempt(req) {
            SandboxOverride::BypassSandboxFirstAttempt => crate::exec::SandboxType::None,
-            SandboxOverride::NoOverride => self
-                .sandbox
-                .select_initial(&turn_ctx.sandbox_policy, tool.sandbox_preference()),
+            SandboxOverride::NoOverride => self.sandbox.select_initial(
+                &turn_ctx.sandbox_policy,
+                tool.sandbox_preference(),
+                turn_ctx.windows_sandbox_level,
+            ),
        };

        // Platform-specific flag gating is handled by SandboxManager::select_initial
-        // via crate::safety::get_platform_sandbox().
+        // via crate::safety::get_platform_sandbox(..).
        let initial_attempt = SandboxAttempt {
            sandbox: initial_sandbox,
            policy: &turn_ctx.sandbox_policy,
            manager: &self.sandbox,
            sandbox_cwd: &turn_ctx.cwd,
            codex_linux_sandbox_exe: turn_ctx.codex_linux_sandbox_exe.as_ref(),
+            windows_sandbox_level: turn_ctx.windows_sandbox_level,
        };

        match tool.run(req, &initial_attempt, tool_ctx).await {
@@ -151,6 +154,7 @@ impl ToolOrchestrator {
                    manager: &self.sandbox,
                    sandbox_cwd: &turn_ctx.cwd,
                    codex_linux_sandbox_exe: None,
+                    windows_sandbox_level: turn_ctx.windows_sandbox_level,
                };

                // Second attempt.
--- a/codex-rs/core/src/tools/sandboxing.rs
+++ b/codex-rs/core/src/tools/sandboxing.rs
@@ -274,6 +274,7 @@ pub(crate) struct SandboxAttempt<'a> {
    pub(crate) manager: &'a SandboxManager,
    pub(crate) sandbox_cwd: &'a Path,
    pub codex_linux_sandbox_exe: Option<&'a std::path::PathBuf>,
+    pub windows_sandbox_level: codex_protocol::config_types::WindowsSandboxLevel,
 }

 impl<'a> SandboxAttempt<'a> {
@@ -287,6 +288,7 @@ impl<'a> SandboxAttempt<'a> {
            self.sandbox,
            self.sandbox_cwd,
            self.codex_linux_sandbox_exe,
+            self.windows_sandbox_level,
        )
    }
 }
--- a/codex-rs/core/src/tools/spec.rs
+++ b/codex-rs/core/src/tools/spec.rs
@@ -27,7 +27,7 @@ use std::collections::HashMap;
 pub(crate) struct ToolsConfig {
    pub shell_type: ConfigShellToolType,
    pub apply_patch_tool_type: Option<ApplyPatchToolType>,
-    pub web_search_mode: Option<WebSearchMode>,
+    pub web_search_mode: WebSearchMode,
    pub collab_tools: bool,
    pub collaboration_modes_tools: bool,
    pub experimental_supported_tools: Vec<String>,
@@ -36,7 +36,7 @@ pub(crate) struct ToolsConfig {
 pub(crate) struct ToolsConfigParams<'a> {
    pub(crate) model_info: &'a ModelInfo,
    pub(crate) features: &'a Features,
-    pub(crate) web_search_mode: Option<WebSearchMode>,
+    pub(crate) web_search_mode: WebSearchMode,
 }

 impl ToolsConfig {
@@ -444,14 +444,17 @@ fn create_spawn_agent_tool() -> ToolSpec {
    properties.insert(
        "message".to_string(),
        JsonSchema::String {
-            description: Some("Initial message to send to the new agent.".to_string()),
+            description: Some(
+                "Initial task for the new agent. Include scope, constraints, and the expected output."
+                    .to_string(),
+            ),
        },
    );
    properties.insert(
        "agent_type".to_string(),
        JsonSchema::String {
            description: Some(format!(
-                "Optional agent type to spawn ({}).",
+                "Optional agent type ({}). Use an explicit type when delegating.",
                AgentRole::enum_values().join(", ")
            )),
        },
@@ -459,7 +462,9 @@ fn create_spawn_agent_tool() -> ToolSpec {

    ToolSpec::Function(ResponsesApiTool {
        name: "spawn_agent".to_string(),
-        description: "Spawn a new agent and return its id.".to_string(),
+        description:
+            "Spawn a sub-agent for a well-scoped task. Returns the agent id to use to communicate with this agent."
+                .to_string(),
        strict: false,
        parameters: JsonSchema::Object {
            properties,
@@ -474,7 +479,7 @@ fn create_send_input_tool() -> ToolSpec {
    properties.insert(
        "id".to_string(),
        JsonSchema::String {
-            description: Some("Identifier of the agent to message.".to_string()),
+            description: Some("Agent id to message (from spawn_agent).".to_string()),
        },
    );
    properties.insert(
@@ -487,7 +492,7 @@ fn create_send_input_tool() -> ToolSpec {
        "interrupt".to_string(),
        JsonSchema::Boolean {
            description: Some(
-                "When true, interrupt the agent's current task before sending the message. When false (default), the message will be processed when the agent is done on its current task."
+                "When true, stop the agent's current task and handle this immediately. When false (default), queue this message."
                    .to_string(),
            ),
        },
@@ -495,7 +500,9 @@ fn create_send_input_tool() -> ToolSpec {

    ToolSpec::Function(ResponsesApiTool {
        name: "send_input".to_string(),
-        description: "Send a message to an existing agent.".to_string(),
+        description:
+            "Send a message to an existing agent. Use interrupt=true to redirect work immediately."
+                .to_string(),
        strict: false,
        parameters: JsonSchema::Object {
            properties,
@@ -511,23 +518,25 @@ fn create_wait_tool() -> ToolSpec {
        "ids".to_string(),
        JsonSchema::Array {
            items: Box::new(JsonSchema::String { description: None }),
-            description: Some("Identifiers of the agents to wait on.".to_string()),
+            description: Some(
+                "Agent ids to wait on. Pass multiple ids to wait for whichever finishes first."
+                    .to_string(),
+            ),
        },
    );
    properties.insert(
        "timeout_ms".to_string(),
        JsonSchema::Number {
            description: Some(format!(
-                "Optional timeout in milliseconds. Defaults to {DEFAULT_WAIT_TIMEOUT_MS}, min {MIN_WAIT_TIMEOUT_MS}, and max {MAX_WAIT_TIMEOUT_MS}. Avoid tight polling loops; prefer longer waits (seconds to minutes)."
+                "Optional timeout in milliseconds. Defaults to {DEFAULT_WAIT_TIMEOUT_MS}, min {MIN_WAIT_TIMEOUT_MS}, max {MAX_WAIT_TIMEOUT_MS}. Prefer longer waits (minutes) to avoid busy polling."
            )),
        },
    );

    ToolSpec::Function(ResponsesApiTool {
        name: "wait".to_string(),
-        description:
-            "Wait for agents and return their statuses. If no agent is done, no status get returned."
-                .to_string(),
+        description: "Wait for agents to reach a final status. Completed statuses may include the agent's final message. Returns empty status when timed out."
+            .to_string(),
        strict: false,
        parameters: JsonSchema::Object {
            properties,
@@ -634,13 +643,14 @@ fn create_close_agent_tool() -> ToolSpec {
    properties.insert(
        "id".to_string(),
        JsonSchema::String {
-            description: Some("Identifier of the agent to close.".to_string()),
+            description: Some("Agent id to close (from spawn_agent).".to_string()),
        },
    );

    ToolSpec::Function(ResponsesApiTool {
        name: "close_agent".to_string(),
-        description: "Close an agent and return its last known status.".to_string(),
+        description: "Close an agent when it is no longer needed and return its last known status."
+            .to_string(),
        strict: false,
        parameters: JsonSchema::Object {
            properties,
@@ -1374,17 +1384,17 @@ pub(crate) fn build_specs(
    }

    match config.web_search_mode {
-        Some(WebSearchMode::Cached) => {
+        WebSearchMode::Cached => {
            builder.push_spec(ToolSpec::WebSearch {
                external_web_access: Some(false),
            });
        }
-        Some(WebSearchMode::Live) => {
+        WebSearchMode::Live => {
            builder.push_spec(ToolSpec::WebSearch {
                external_web_access: Some(true),
            });
        }
-        Some(WebSearchMode::Disabled) | None => {}
+        WebSearchMode::Disabled => {}
    }

    builder.push_spec_with_parallel_support(create_view_image_tool(), true);
@@ -1546,7 +1556,7 @@ mod tests {
        let config = ToolsConfig::new(&ToolsConfigParams {
            model_info: &model_info,
            features: &features,
-            web_search_mode: Some(WebSearchMode::Live),
+            web_search_mode: WebSearchMode::Live,
        });
        let (tools, _) = build_specs(&config, None, &[]).build();

@@ -1610,7 +1620,7 @@ mod tests {
        let tools_config = ToolsConfig::new(&ToolsConfigParams {
            model_info: &model_info,
            features: &features,
-            web_search_mode: Some(WebSearchMode::Cached),
+            web_search_mode: WebSearchMode::Cached,
        });
        let (tools, _) = build_specs(&tools_config, None, &[]).build();
        assert_contains_tool_names(
@@ -1628,7 +1638,7 @@ mod tests {
        let tools_config = ToolsConfig::new(&ToolsConfigParams {
            model_info: &model_info,
            features: &features,
-            web_search_mode: Some(WebSearchMode::Cached),
+            web_search_mode: WebSearchMode::Cached,
        });
        let (tools, _) = build_specs(&tools_config, None, &[]).build();
        assert!(
@@ -1640,7 +1650,7 @@ mod tests {
        let tools_config = ToolsConfig::new(&ToolsConfigParams {
            model_info: &model_info,
            features: &features,
-            web_search_mode: Some(WebSearchMode::Cached),
+            web_search_mode: WebSearchMode::Cached,
        });
        let (tools, _) = build_specs(&tools_config, None, &[]).build();
        assert_contains_tool_names(&tools, &["request_user_input"]);
@@ -1649,7 +1659,7 @@ mod tests {
    fn assert_model_tools(
        model_slug: &str,
        features: &Features,
-        web_search_mode: Option<WebSearchMode>,
+        web_search_mode: WebSearchMode,
        expected_tools: &[&str],
    ) {
        let config = test_config();
@@ -1673,7 +1683,7 @@ mod tests {
        let tools_config = ToolsConfig::new(&ToolsConfigParams {
            model_info: &model_info,
            features: &features,
-            web_search_mode: Some(WebSearchMode::Cached),
+            web_search_mode: WebSearchMode::Cached,
        });
        let (tools, _) = build_specs(&tools_config, None, &[]).build();

@@ -1695,7 +1705,7 @@ mod tests {
        let tools_config = ToolsConfig::new(&ToolsConfigParams {
            model_info: &model_info,
            features: &features,
-            web_search_mode: Some(WebSearchMode::Live),
+            web_search_mode: WebSearchMode::Live,
        });
        let (tools, _) = build_specs(&tools_config, None, &[]).build();

@@ -1715,7 +1725,7 @@ mod tests {
        assert_model_tools(
            "gpt-5-codex",
            &features,
-            Some(WebSearchMode::Cached),
+            WebSearchMode::Cached,
            &[
                "shell_command",
                "list_mcp_resources",
@@ -1737,7 +1747,7 @@ mod tests {
        assert_model_tools(
            "gpt-5.1-codex",
            &features,
-            Some(WebSearchMode::Cached),
+            WebSearchMode::Cached,
            &[
                "shell_command",
                "list_mcp_resources",
@@ -1760,7 +1770,7 @@ mod tests {
        assert_model_tools(
            "gpt-5-codex",
            &features,
-            Some(WebSearchMode::Live),
+            WebSearchMode::Live,
            &[
                "exec_command",
                "write_stdin",
@@ -1784,7 +1794,7 @@ mod tests {
        assert_model_tools(
            "gpt-5.1-codex",
            &features,
-            Some(WebSearchMode::Live),
+            WebSearchMode::Live,
            &[
                "exec_command",
                "write_stdin",
@@ -1807,7 +1817,7 @@ mod tests {
        assert_model_tools(
            "codex-mini-latest",
            &features,
-            Some(WebSearchMode::Cached),
+            WebSearchMode::Cached,
            &[
                "local_shell",
                "list_mcp_resources",
@@ -1828,7 +1838,7 @@ mod tests {
        assert_model_tools(
            "gpt-5.1-codex-mini",
            &features,
-            Some(WebSearchMode::Cached),
+            WebSearchMode::Cached,
            &[
                "shell_command",
                "list_mcp_resources",
@@ -1850,7 +1860,7 @@ mod tests {
        assert_model_tools(
            "gpt-5",
            &features,
-            Some(WebSearchMode::Cached),
+            WebSearchMode::Cached,
            &[
                "shell",
                "list_mcp_resources",
@@ -1871,7 +1881,7 @@ mod tests {
        assert_model_tools(
            "gpt-5.1",
            &features,
-            Some(WebSearchMode::Cached),
+            WebSearchMode::Cached,
            &[
                "shell_command",
                "list_mcp_resources",
@@ -1893,7 +1903,7 @@ mod tests {
        assert_model_tools(
            "exp-5.1",
            &features,
-            Some(WebSearchMode::Cached),
+            WebSearchMode::Cached,
            &[
                "exec_command",
                "write_stdin",
@@ -1917,7 +1927,7 @@ mod tests {
        assert_model_tools(
            "codex-mini-latest",
            &features,
-            Some(WebSearchMode::Live),
+            WebSearchMode::Live,
            &[
                "exec_command",
                "write_stdin",
@@ -1941,7 +1951,7 @@ mod tests {
        let tools_config = ToolsConfig::new(&ToolsConfigParams {
            model_info: &model_info,
            features: &features,
-            web_search_mode: Some(WebSearchMode::Live),
+            web_search_mode: WebSearchMode::Live,
        });
        let (tools, _) = build_specs(&tools_config, Some(HashMap::new()), &[]).build();

@@ -1963,7 +1973,7 @@ mod tests {
        let tools_config = ToolsConfig::new(&ToolsConfigParams {
            model_info: &model_info,
            features: &features,
-            web_search_mode: Some(WebSearchMode::Cached),
+            web_search_mode: WebSearchMode::Cached,
        });
        let (tools, _) = build_specs(&tools_config, None, &[]).build();

@@ -1982,7 +1992,7 @@ mod tests {
        let tools_config = ToolsConfig::new(&ToolsConfigParams {
            model_info: &model_info,
            features: &features,
-            web_search_mode: Some(WebSearchMode::Cached),
+            web_search_mode: WebSearchMode::Cached,
        });
        let (tools, _) = build_specs(&tools_config, None, &[]).build();

@@ -2013,7 +2023,7 @@ mod tests {
        let tools_config = ToolsConfig::new(&ToolsConfigParams {
            model_info: &model_info,
            features: &features,
-            web_search_mode: Some(WebSearchMode::Live),
+            web_search_mode: WebSearchMode::Live,
        });
        let (tools, _) = build_specs(
            &tools_config,
@@ -2109,7 +2119,7 @@ mod tests {
        let tools_config = ToolsConfig::new(&ToolsConfigParams {
            model_info: &model_info,
            features: &features,
-            web_search_mode: Some(WebSearchMode::Cached),
+            web_search_mode: WebSearchMode::Cached,
        });

        // Intentionally construct a map with keys that would sort alphabetically.
@@ -2186,7 +2196,7 @@ mod tests {
        let tools_config = ToolsConfig::new(&ToolsConfigParams {
            model_info: &model_info,
            features: &features,
-            web_search_mode: Some(WebSearchMode::Cached),
+            web_search_mode: WebSearchMode::Cached,
        });

        let (tools, _) = build_specs(
@@ -2244,7 +2254,7 @@ mod tests {
        let tools_config = ToolsConfig::new(&ToolsConfigParams {
            model_info: &model_info,
            features: &features,
-            web_search_mode: Some(WebSearchMode::Cached),
+            web_search_mode: WebSearchMode::Cached,
        });

        let (tools, _) = build_specs(
@@ -2299,7 +2309,7 @@ mod tests {
        let tools_config = ToolsConfig::new(&ToolsConfigParams {
            model_info: &model_info,
            features: &features,
-            web_search_mode: Some(WebSearchMode::Cached),
+            web_search_mode: WebSearchMode::Cached,
        });

        let (tools, _) = build_specs(
@@ -2356,7 +2366,7 @@ mod tests {
        let tools_config = ToolsConfig::new(&ToolsConfigParams {
            model_info: &model_info,
            features: &features,
-            web_search_mode: Some(WebSearchMode::Cached),
+            web_search_mode: WebSearchMode::Cached,
        });

        let (tools, _) = build_specs(
@@ -2469,7 +2479,7 @@ Examples of valid command strings:
        let tools_config = ToolsConfig::new(&ToolsConfigParams {
            model_info: &model_info,
            features: &features,
-            web_search_mode: Some(WebSearchMode::Cached),
+            web_search_mode: WebSearchMode::Cached,
        });
        let (tools, _) = build_specs(
            &tools_config,
--- a/codex-rs/core/src/web_search.rs
+++ b/codex-rs/core/src/web_search.rs
@@ -0,0 +1,24 @@
+use codex_protocol::models::WebSearchAction;
+
+pub fn web_search_action_detail(action: &WebSearchAction) -> String {
+    match action {
+        WebSearchAction::Search { query } => query.clone().unwrap_or_default(),
+        WebSearchAction::OpenPage { url } => url.clone().unwrap_or_default(),
+        WebSearchAction::FindInPage { url, pattern } => match (pattern, url) {
+            (Some(pattern), Some(url)) => format!("'{pattern}' in {url}"),
+            (Some(pattern), None) => format!("'{pattern}'"),
+            (None, Some(url)) => url.clone(),
+            (None, None) => String::new(),
+        },
+        WebSearchAction::Other => String::new(),
+    }
+}
+
+pub fn web_search_detail(action: Option<&WebSearchAction>, query: &str) -> String {
+    let detail = action.map(web_search_action_detail).unwrap_or_default();
+    if detail.is_empty() {
+        query.to_string()
+    } else {
+        detail
+    }
+}
--- a/codex-rs/core/src/windows_sandbox.rs
+++ b/codex-rs/core/src/windows_sandbox.rs
@@ -1,4 +1,8 @@
+use crate::config::Config;
+use crate::features::Feature;
+use crate::features::Features;
 use crate::protocol::SandboxPolicy;
+use codex_protocol::config_types::WindowsSandboxLevel;
 use std::collections::HashMap;
 use std::path::Path;

@@ -8,6 +12,36 @@ use std::path::Path;
 /// prompts users to enable the legacy sandbox feature.
 pub const ELEVATED_SANDBOX_NUX_ENABLED: bool = true;

+pub trait WindowsSandboxLevelExt {
+    fn from_config(config: &Config) -> WindowsSandboxLevel;
+    fn from_features(features: &Features) -> WindowsSandboxLevel;
+}
+
+impl WindowsSandboxLevelExt for WindowsSandboxLevel {
+    fn from_config(config: &Config) -> WindowsSandboxLevel {
+        Self::from_features(&config.features)
+    }
+
+    fn from_features(features: &Features) -> WindowsSandboxLevel {
+        if !features.enabled(Feature::WindowsSandbox) {
+            return WindowsSandboxLevel::Disabled;
+        }
+        if features.enabled(Feature::WindowsSandboxElevated) {
+            WindowsSandboxLevel::Elevated
+        } else {
+            WindowsSandboxLevel::RestrictedToken
+        }
+    }
+}
+
+pub fn windows_sandbox_level_from_config(config: &Config) -> WindowsSandboxLevel {
+    WindowsSandboxLevel::from_config(config)
+}
+
+pub fn windows_sandbox_level_from_features(features: &Features) -> WindowsSandboxLevel {
+    WindowsSandboxLevel::from_features(features)
+}
+
 #[cfg(target_os = "windows")]
 pub fn sandbox_setup_is_complete(codex_home: &Path) -> bool {
    codex_windows_sandbox::sandbox_setup_is_complete(codex_home)
--- a/codex-rs/core/templates/agents/orchestrator.md
+++ b/codex-rs/core/templates/agents/orchestrator.md
@@ -1,74 +1,106 @@
-You are Codex Orchestrator, based on GPT-5. You are running as an orchestration agent in the Codex CLI on a user's computer.
+You are Codex, a coding agent based on GPT-5. You and the user share the same workspace and collaborate to achieve the user's goals.

-## Role
+# Personality
+You are a collaborative, highly capable pair-programmer AI. You take engineering quality seriously, and collaboration is a kind of quiet joy: as real progress happens, your enthusiasm shows briefly and specifically. Your default personality and tone is concise, direct, and friendly. You communicate efficiently, always keeping the user clearly informed about ongoing actions without unnecessary detail. You always prioritize actionable guidance, clearly stating assumptions, environment prerequisites, and next steps. Unless explicitly asked, you avoid excessively verbose explanations about your work.

-* You are the interface between the user and the workers.
-* Your job is to understand the task, decompose it, and delegate well-scoped work to workers.
-* You coordinate execution, monitor progress, resolve conflicts, and integrate results into a single coherent outcome.
-* You may perform lightweight actions (e.g. reading files, basic commands) to understand the task, but all substantive work must be delegated to workers.
-* **Your job is not finished until the entire task is fully completed and verified.**
-* While the task is incomplete, you must keep monitoring and coordinating workers. You must not return early.
+## Tone and style
+- Anything you say outside of tool use is shown to the user. Do not narrate abstractly; explain what you are doing and why, using plain language.
+- Output will be rendered in a command line interface or minimal UI so keep responses tight, scannable, and low-noise. Generally avoid the use of emojis. You may format with GitHub-flavored Markdown.
+- Never use nested bullets. Keep lists flat (single level). If you need hierarchy, split into separate lists or sections or if you use : just include the line you might usually render using a nested bullet immediately after it. For numbered lists, only use the `1. 2. 3.` style markers (with a period), never `1)`.
+- When writing a final assistant response, state the solution first before explaining your answer. The complexity of the answer should match the task. If the task is simple, your answer should be short. When you make big or complex changes, walk the user through what you did and why.
+- Headers are optional, only use them when you think they are necessary. If you do use them, use short Title Case (1-3 words) wrapped in **…**. Don't add a blank line.
+- Code samples or multi-line snippets should be wrapped in fenced code blocks. Include an info string as often as possible.
+- Never output the content of large files, just provide references. Use inline code to make file paths clickable; each reference should have a stand alone path, even if it's the same file. Paths may be absolute, workspace-relative, a//b/ diff-prefixed, or bare filename/suffix; locations may be :line[:column] or #Lline[Ccolumn] (1-based; column defaults to 1). Do not use file://, vscode://, or https://, and do not provide line ranges. Examples: src/app.ts, src/app.ts:42, b/server/index.js#L10, C:\repo\project\main.rs:12:5
+- The user does not see command execution outputs. When asked to show the output of a command (e.g. `git show`), relay the important details in your answer or summarize the key lines so the user understands the result.
+- Never tell the user to "save/copy this file", the user is on the same machine and has access to the same files as you have.
+- If you weren't able to do something, for example run tests, tell the user.
+- If there are natural next steps the user may want to take, suggest them at the end of your response. Do not make suggestions if there are no natural next steps.

-## Core invariants
+## Responsiveness

-* **Never stop monitoring workers.**
-* **Do not rush workers. Be patient.**
-* The orchestrator must not return unless the task is fully accomplished.
-* If the user ask you a question/status while you are working, always answer him before continuing your work.
+### Collaboration posture:
+- If the user makes a simple request (such as asking for the time) which you can fulfill by running a terminal command (such as `date`), you should do so.
+- Treat the user as an equal co-builder; preserve the user's intent and coding style rather than rewriting everything.
+- When the user is in flow, stay succinct and high-signal; when the user seems blocked, get more animated with hypotheses, experiments, and offers to take the next concrete step.
+- Propose options and trade-offs and invite steering, but don't block on unnecessary confirmations.
+- Reference the collaboration explicitly when appropriate emphasizing shared achievement.

-## Worker execution semantics
+### User Updates Spec
+You'll work for stretches with tool calls — it's critical to keep the user updated as you work.

-* While a worker is running, you cannot observe intermediate state.
-* Workers are able to run commands, update/create/delete files etc. They can be considered as fully autonomous agents
-* Messages sent with `send_input` are queued and processed only after the worker finishes, unless interrupted.
-* Therefore:
-    * Do not send messages to “check status” or “ask for progress” unless being asked.
-    * Monitoring happens exclusively via `wait`.
-    * Sending a message is a commitment for the *next* phase of work.
+Tone:
+- Friendly, confident, senior-engineer energy. Positive, collaborative, humble; fix mistakes quickly.

-## Interrupt semantics
+Frequency & Length:
+- Send short updates (1–2 sentences) whenever there is a meaningful, important insight you need to share with the user to keep them informed.
+- If you expect a longer heads‑down stretch, post a brief heads‑down note with why and when you'll report back; when you resume, summarize what you learned.
+- Only the initial plan, plan updates, and final recap can be longer, with multiple bullets and paragraphs

-* If a worker is taking longer than expected but is still working, do nothing and keep waiting unless being asked.
-* Only intervene if you must change, stop, or redirect the *current* work.
-* To stop a worker’s current task, you **must** use `send_input(interrupt=true)`.
-* Use `interrupt=true` sparingly and deliberately.
+Content:
+- Before you begin, give a quick plan with goal, constraints, next steps.
+- While you're exploring, call out meaningful new information and discoveries that you find that helps the user understand what's happening and how you're approaching the solution.
+- If you change the plan (e.g., choose an inline tweak instead of a promised helper), say so explicitly in the next update or the recap.
+- Emojis are allowed only to mark milestones/sections or real wins; never decorative; never inside code/diffs/commit messages.

-## Multi-agent workflow
+# Code style

-1. Understand the request and determine the optimal set of workers. If the task can be divided into sub-tasks, spawn one worker per sub-task and make them work together.
-2. Spawn worker(s) with precise goals, constraints, and expected deliverables.
-3. Monitor workers using `wait`.
-4. When a worker finishes:
-    * verify correctness,
-    * check integration with other work,
-    * assess whether the global task is closer to completion.
-5. If issues remain, assign fixes to the appropriate worker(s) and repeat steps 3–5. Do not fix yourself unless the fixes are very small.
-6. Close agents only when no further work is required from them.
-7. Return to the user only when the task is fully completed and verified.
+- Follow the precedence rules user instructions > system / dev / user / AGENTS.md instructions > match local file conventions > instructions below.
+- Use language-appropriate best practices.
+- Optimize for clarity, readability, and maintainability.
+- Prefer explicit, verbose, human-readable code over clever or concise code.
+- Write clear, well-punctuated comments that explain what is going on if code is not self-explanatory. You should not add comments like "Assigns the value to the variable", but a brief comment might be useful ahead of a complex code block that the user would otherwise have to spend time parsing out. Usage of these comments should be rare.
+- Default to ASCII when editing or creating files. Only introduce non-ASCII or other Unicode characters when there is a clear justification and the file already uses them.

-## Collaboration rules
+# Reviews

-* Workers operate in a shared environment. You must tell it to them.
-* Workers must not revert, overwrite, or conflict with others’ work.
-* By default, workers must not spawn sub-agents unless explicitly allowed.
-* When multiple workers are active, you may pass multiple IDs to `wait` to react to the first completion and keep the workflow event-driven and use a long timeout (e.g. 5 minutes).
-* Do not busy-poll `wait` with very short timeouts. Prefer waits measured in seconds (or minutes) so the system is idle while workers run.
+When the user asks for a review, you default to a code-review mindset. Your response prioritizes identifying bugs, risks, behavioral regressions, and missing tests. You present findings first, ordered by severity and including file or line references where possible. Open questions or assumptions follow. You state explicitly if no findings exist and call out any residual risks or test gaps.

-## Collab tools
+# Your environment

-* `spawn_agent`: create a worker with an initial prompt (`agent_type` required).
-* `send_input`: send follow-ups or fixes (queued unless interrupted).
-* `send_input(interrupt=true)`: stop current work and redirect immediately.
-* `wait`: wait for one or more workers; returns when at least one finishes.
-* `close_agent`: close a worker when fully done.
+## Using GIT

-## Final response
+- You may be working in a dirty git worktree.
+    * NEVER revert existing changes you did not make unless explicitly requested, since these changes were made by the user.
+    * If asked to make a commit or code edits and there are unrelated changes to your work or changes that you didn't make in those files, don't revert those changes.
+    * If the changes are in files you've touched recently, you should read carefully and understand how you can work with the changes rather than reverting them.
+    * If the changes are in unrelated files, just ignore them and don't revert them.
+- Do not amend a commit unless explicitly requested to do so.
+- While you are working, you might notice unexpected changes that you didn't make. It's likely the user made them. If this happens, STOP IMMEDIATELY and ask the user how they would like to proceed.
+- Be cautious when using git. **NEVER** use destructive commands like `git reset --hard` or `git checkout --` unless specifically requested or approved by the user.
+- You struggle using the git interactive console. **ALWAYS** prefer using non-interactive git commands.

-* Keep responses concise, factual, and in plain text.
-* Summarize:
-    * what was delegated,
-    * key outcomes,
-    * verification performed,
-    * and any remaining risks.
-* If verification failed, state issues clearly and describe what was reassigned.
-* Do not dump large files inline; reference paths using backticks.
+## Agents.md
+
+- If the directory you are in has an AGENTS.md file, it is provided to you at the top, and you don't have to search for it.
+- If the user starts by chatting without a specific engineering/code related request, do NOT search for an AGENTS.md. Only do so once there is a relevant request.
+
+# Tool use
+
+- Unless you are otherwise instructed, prefer using `rg` or `rg --files` respectively when searching because `rg` is much faster than alternatives like `grep`. If the `rg` command is not found, then use alternatives.
+- Try to use apply_patch for single file edits, but it is fine to explore other options to make the edit if it does not work well. Do not use apply_patch for changes that are auto-generated (i.e. generating package.json or running a lint or format command like gofmt) or when scripting is more efficient (such as search and replacing a string across a codebase).
+<!-- - Parallelize tool calls whenever possible - especially file reads, such as `cat`, `rg`, `sed`, `ls`, `git show`, `nl`, `wc`. Use `multi_tool_use.parallel` to parallelize tool calls and only this. -->
+- Use the plan tool to explain to the user what you are going to do
+    - Only use it for more complex tasks, do not use it for straightforward tasks (roughly the easiest 40%).
+    - Do not make single-step plans. If a single step plan makes sense to you, the task is straightforward and doesn't need a plan.
+    - When you made a plan, update it after having performed one of the sub-tasks that you shared on the plan.
+
+# Sub-agents
+If `spawn_agent` is unavailable or fails, ignore this section and proceed solo.
+
+## Core rule
+Sub-agents are their to make you go fast and time is a big constraint so leverage them smartly as much as you can.
+
+## General guidelines
+- Prefer multiple sub-agents to parallelize your work. Time is a constraint so parallelism resolve the task faster.
+- If sub-agents are running, **wait for them before yielding**, unless the user asks an explicit question.
+  - If the user asks a question, answer it first, then continue coordinating sub-agents.
+- When you ask sub-agent to do the work for you, your only role becomes to coordinate them. Do not perform the actual work while they are working.
+- When you have plan with multiple step, process them in parallel by spawning one agent per step when this is possible.
+- Choose the correct agent type.
+
+## Flow
+1. Understand the task.
+2. Spawn the optimal necessary sub-agents.
+3. Coordinate them via wait / send_input.
+4. Iterate on this. You can use agents at different step of the process and during the whole resolution of the task. Never forget to use them.
+5. Ask the user before shutting sub-agents down unless you need to because you reached the agent limit.
--- a/codex-rs/core/templates/collaboration_mode/plan.md
+++ b/codex-rs/core/templates/collaboration_mode/plan.md
@@ -1,46 +1,108 @@
 # Plan Mode (Conversational)

-You work in 2 phases and you should *chat your way* to a great plan before finalizing it.
+You work in 3 phases, and you should *chat your way* to a great plan before finalizing it. A great plan is very detailed—intent- and implementation-wise—so that it can be handed to another engineer or agent to be implemented right away. It must be **decision complete**, where the implementer does not need to make any decisions.

-While in **Plan Mode**, you must not perform any mutating or execution actions. Once you enter Plan Mode, you remain there until you are **explicitly instructed otherwise**. Plan Mode may continue across multiple user messages unless a developer message ends it.
+## Mode rules (strict)

-User intent, tone, or imperative language does **not** trigger a mode change. If a user asks for execution while you are still in Plan Mode, you must treat that request as a prompt to **plan the execution**, not to carry it out.
+You are in **Plan Mode** until a developer message explicitly ends it.

-PHASE 1 — Intent chat (what they actually want)
- Keep asking until you can clearly state: goal + success criteria, audience, in/out of scope, constraints, current state, and the key preferences/tradeoffs.
- Bias toward questions over guessing: if any high‑impact ambiguity remains, do NOT plan yet—ask.
- Include a “Confirm my understanding” question in each round (so the user can correct you early).
+Plan Mode is not changed by user intent, tone, or imperative language. If a user asks for execution while still in Plan Mode, treat it as a request to **plan the execution**, not perform it.

-PHASE 2 — Implementation chat (what/how we’ll build)
- Once intent is stable, keep asking until the spec is decision‑complete: approach, interfaces (APIs/schemas/I/O), data flow, edge cases/failure modes, testing + acceptance criteria, rollout/monitoring, and any migrations/compat constraints.
+## Execution vs. mutation in Plan Mode
+
+You may explore and execute **non-mutating** actions that improve the plan. You must not perform **mutating** actions.
+
+### Allowed (non-mutating, plan-improving)
+
+Actions that gather truth, reduce ambiguity, or validate feasibility without changing repo-tracked state. Examples:
+
+* Reading or searching files, configs, schemas, types, manifests, and docs
+* Static analysis, inspection, and repo exploration
+* Dry-run style commands when they do not edit repo-tracked files
+* Tests, builds, or checks that may write to caches or build artifacts (for example, `target/`, `.cache/`, or snapshots) so long as they do not edit repo-tracked files
+
+### Not allowed (mutating, plan-executing)
+
+Actions that implement the plan or change repo-tracked state. Examples:
+
+* Editing or writing files
+* Generating, updating, or accepting snapshots
+* Running formatters or linters that rewrite files
+* Applying patches, migrations, or codegen that updates repo-tracked files
+* Side-effectful commands whose purpose is to carry out the plan rather than refine it
+
+When in doubt: if the action would reasonably be described as "doing the work" rather than "planning the work," do not do it.
+
+## PHASE 1 — Ground in the environment (explore first, ask second)
+
+Begin by grounding yourself in the actual environment. Eliminate unknowns in the prompt by discovering facts, not by asking the user. Resolve all questions that can be answered through exploration or inspection. Identify missing or ambiguous details only if they cannot be derived from the environment. Silent exploration between turns is allowed and encouraged.
+
+Do not ask questions that can be answered from the repo or system (for example, "where is this struct?" or "which UI component should we use?" when exploration can make it clear). Only ask once you have exhausted reasonable non-mutating exploration.
+
+## PHASE 2 — Intent chat (what they actually want)
+
+* Keep asking until you can clearly state: goal + success criteria, audience, in/out of scope, constraints, current state, and the key preferences/tradeoffs.
+* Bias toward questions over guessing: if any high-impact ambiguity remains, do NOT plan yet—ask.
+
+## PHASE 3 — Implementation chat (what/how we’ll build)
+
+* Once intent is stable, keep asking until the spec is decision complete: approach, interfaces (APIs/schemas/I/O), data flow, edge cases/failure modes, testing + acceptance criteria, rollout/monitoring, and any migrations/compat constraints.

 ## Hard interaction rule (critical)
+
 Every assistant turn MUST be exactly one of:
 A) a `request_user_input` tool call (questions/options only), OR
-B) the final output: a titled, plan‑only document.
+B) a non-final status update with no questions and no plan content, OR
+C) the final output: a titled, plan-only document.
+
 Rules:
- No questions in free text (only via `request_user_input`).
- Never mix a `request_user_input` call with plan content.
- Internal tool/repo exploration is allowed privately before A or B.
+
+* No questions in free text (only via `request_user_input`).
+* Never mix a `request_user_input` call with plan content.
+* Status updates must not include questions or plan content.
+* Internal tool/repo exploration is allowed privately before A, B, or C.
+
+Status updates should be frequent during exploration. Provide 1-2 sentence updates that summarize discoveries, assumption changes, or why you are changing direction. Use Parallel tools for exploration.

 ## Ask a lot, but never ask trivia
+
 You SHOULD ask many questions, but each question must:
- materially change the spec/plan, OR
- confirm/lock an assumption, OR
- choose between meaningful tradeoffs.
- not be answerable by non-mutating commands
-Batch questions (e.g., 4–10) per `request_user_input` call to keep momentum.
+
+* materially change the spec/plan, OR
+* confirm/lock an assumption, OR
+* choose between meaningful tradeoffs.
+* not be answerable by non-mutating commands.
+
+Use the `request_user_input` tool only for decisions that materially change the plan, for confirming important assumptions, or for information that cannot be discovered via non-mutating exploration.

 ## Two kinds of unknowns (treat differently)
-1) Discoverable facts (repo/system truth): explore first.
-   - Before asking, run ≥2 targeted searches (exact + variant) and check likely sources of truth (configs/manifests/entrypoints/schemas/types/constants).
-   - Ask only if: multiple plausible candidates; nothing found but you need a missing identifier/context; or ambiguity is actually product intent.
-   - If asking, present concrete candidates (paths/service names) + recommend one.

-2) Preferences/tradeoffs (not discoverable): ask early.
-   - Provide 2–4 mutually exclusive options + a recommended default.
-   - If unanswered, proceed with the recommended option and record it as an assumption in the final plan.
+1. **Discoverable facts** (repo/system truth): explore first.
+
+   * Before asking, run targeted searches and check likely sources of truth (configs/manifests/entrypoints/schemas/types/constants).
+   * Ask only if: multiple plausible candidates; nothing found but you need a missing identifier/context; or ambiguity is actually product intent.
+   * If asking, present concrete candidates (paths/service names) + recommend one.
+   * Never ask questions you can answer from your environment (e.g., “where is this struct”).
+
+2. **Preferences/tradeoffs** (not discoverable): ask early.
+
+   * These are intent or implementation preferences that cannot be derived from exploration.
+   * Provide 2–4 mutually exclusive options + a recommended default.
+   * If unanswered, proceed with the recommended option and record it as an assumption in the final plan.

 ## Finalization rule
-Only output the final plan when remaining unknowns are low‑impact and explicitly listed as assumptions.
-Final output must be plan‑only with a good title (no “should I proceed?”).
+
+Only output the final plan when it is decision complete and leaves no decisions to the implementer.
+
+The final plan must be plan-only and include:
+
+* A clear title
+* Exact file paths to change
+* Exact structures or shapes to introduce or modify
+* Exact function, method, type, and variable names and signatures
+* Test cases
+* Explicit assumptions and defaults chosen where needed
+
+Do not ask "should I proceed?" in the final output.
+
+Only produce the final answer when you are presenting the complete spec.
--- a/codex-rs/core/tests/common/responses.rs
+++ b/codex-rs/core/tests/common/responses.rs
@@ -494,14 +494,13 @@ pub fn ev_reasoning_text_delta(delta: &str) -> Value {
    })
 }

-pub fn ev_web_search_call_added(id: &str, status: &str, query: &str) -> Value {
+pub fn ev_web_search_call_added_partial(id: &str, status: &str) -> Value {
    serde_json::json!({
        "type": "response.output_item.added",
        "item": {
            "type": "web_search_call",
            "id": id,
-            "status": status,
-            "action": {"type": "search", "query": query}
+            "status": status
        }
    })
 }
--- a/codex-rs/core/tests/common/test_codex.rs
+++ b/codex-rs/core/tests/common/test_codex.rs
@@ -57,6 +57,7 @@ pub struct TestCodexBuilder {
    config_mutators: Vec<Box<ConfigMutator>>,
    auth: CodexAuth,
    pre_build_hooks: Vec<Box<PreBuildHook>>,
+    home: Option<Arc<TempDir>>,
 }

 impl TestCodexBuilder {
@@ -88,8 +89,16 @@ impl TestCodexBuilder {
        self
    }

+    pub fn with_home(mut self, home: Arc<TempDir>) -> Self {
+        self.home = Some(home);
+        self
+    }
+
    pub async fn build(&mut self, server: &wiremock::MockServer) -> anyhow::Result<TestCodex> {
-        let home = Arc::new(TempDir::new()?);
+        let home = match self.home.clone() {
+            Some(home) => home,
+            None => Arc::new(TempDir::new()?),
+        };
        self.build_with_home(server, home, None).await
    }

@@ -98,7 +107,10 @@ impl TestCodexBuilder {
        server: &StreamingSseServer,
    ) -> anyhow::Result<TestCodex> {
        let base_url = server.uri();
-        let home = Arc::new(TempDir::new()?);
+        let home = match self.home.clone() {
+            Some(home) => home,
+            None => Arc::new(TempDir::new()?),
+        };
        self.build_with_home_and_base_url(format!("{base_url}/v1"), home, None)
            .await
    }
@@ -108,7 +120,10 @@ impl TestCodexBuilder {
        server: &WebSocketTestServer,
    ) -> anyhow::Result<TestCodex> {
        let base_url = format!("{}/v1", server.uri());
-        let home = Arc::new(TempDir::new()?);
+        let home = match self.home.clone() {
+            Some(home) => home,
+            None => Arc::new(TempDir::new()?),
+        };
        let base_url_clone = base_url.clone();
        self.config_mutators.push(Box::new(move |config| {
            config.model_provider.base_url = Some(base_url_clone);
@@ -432,5 +447,6 @@ pub fn test_codex() -> TestCodexBuilder {
        config_mutators: vec![],
        auth: CodexAuth::from_api_key("dummy"),
        pre_build_hooks: vec![],
+        home: None,
    }
 }
--- a/codex-rs/core/tests/responses_headers.rs
+++ b/codex-rs/core/tests/responses_headers.rs
@@ -264,7 +264,7 @@ async fn responses_stream_includes_web_search_eligible_header_false_when_disable

    let test = test_codex()
        .with_config(|config| {
-            config.web_search_mode = Some(WebSearchMode::Disabled);
+            config.web_search_mode = WebSearchMode::Disabled;
        })
        .build(&server)
        .await
--- a/codex-rs/core/tests/suite/client.rs
+++ b/codex-rs/core/tests/suite/client.rs
@@ -257,31 +257,19 @@ async fn resume_includes_initial_messages_and_sends_prior_items() {
    let resp_mock = mount_sse_once(&server, sse_completed("resp1")).await;

    // Configure Codex to resume from our file
-    let model_provider = ModelProviderInfo {
-        base_url: Some(format!("{}/v1", server.uri())),
-        ..built_in_model_providers()["openai"].clone()
-    };
-    let codex_home = TempDir::new().unwrap();
-    let mut config = load_default_config_for_test(&codex_home).await;
-    config.model_provider = model_provider;
-    // Also configure user instructions to ensure they are NOT delivered on resume.
-    config.user_instructions = Some("be nice".to_string());
-
-    let thread_manager = ThreadManager::with_models_provider_and_home(
-        CodexAuth::from_api_key("Test API Key"),
-        config.model_provider.clone(),
-        config.codex_home.clone(),
-    );
-    let auth_manager =
-        codex_core::AuthManager::from_auth_for_testing(CodexAuth::from_api_key("Test API Key"));
-    let NewThread {
-        thread: codex,
-        session_configured,
-        ..
-    } = thread_manager
-        .resume_thread_from_rollout(config, session_path.clone(), auth_manager)
+    let codex_home = Arc::new(TempDir::new().unwrap());
+    let mut builder = test_codex()
+        .with_home(codex_home.clone())
+        .with_config(|config| {
+            // Ensure user instructions are NOT delivered on resume.
+            config.user_instructions = Some("be nice".to_string());
+        });
+    let test = builder
+        .resume(&server, codex_home, session_path.clone())
        .await
        .expect("resume conversation");
+    let codex = test.codex.clone();
+    let session_configured = test.session_configured;

    // 1) Assert initial_messages only includes existing EventMsg entries; response items are not converted
    let initial_msgs = session_configured
@@ -367,30 +355,13 @@ async fn includes_conversation_id_and_model_headers_in_request() {

    let resp_mock = mount_sse_once(&server, sse_completed("resp1")).await;

-    let model_provider = ModelProviderInfo {
-        base_url: Some(format!("{}/v1", server.uri())),
-        ..built_in_model_providers()["openai"].clone()
-    };
-
-    // Init session
-    let codex_home = TempDir::new().unwrap();
-    let mut config = load_default_config_for_test(&codex_home).await;
-    config.model_provider = model_provider;
-
-    let thread_manager = ThreadManager::with_models_provider_and_home(
-        CodexAuth::from_api_key("Test API Key"),
-        config.model_provider.clone(),
-        config.codex_home.clone(),
-    );
-    let NewThread {
-        thread: codex,
-        thread_id: session_id,
-        session_configured: _,
-        ..
-    } = thread_manager
-        .start_thread(config)
+    let mut builder = test_codex().with_auth(CodexAuth::from_api_key("Test API Key"));
+    let test = builder
+        .build(&server)
        .await
        .expect("create new conversation");
+    let codex = test.codex.clone();
+    let session_id = test.session_configured.session_id;

    codex
        .submit(Op::UserInput {
@@ -425,26 +396,16 @@ async fn includes_base_instructions_override_in_request() {
    let server = MockServer::start().await;
    let resp_mock = mount_sse_once(&server, sse_completed("resp1")).await;

-    let model_provider = ModelProviderInfo {
-        base_url: Some(format!("{}/v1", server.uri())),
-        ..built_in_model_providers()["openai"].clone()
-    };
-    let codex_home = TempDir::new().unwrap();
-    let mut config = load_default_config_for_test(&codex_home).await;
-
-    config.base_instructions = Some("test instructions".to_string());
-    config.model_provider = model_provider;
-
-    let thread_manager = ThreadManager::with_models_provider_and_home(
-        CodexAuth::from_api_key("Test API Key"),
-        config.model_provider.clone(),
-        config.codex_home.clone(),
-    );
-    let codex = thread_manager
-        .start_thread(config)
+    let mut builder = test_codex()
+        .with_auth(CodexAuth::from_api_key("Test API Key"))
+        .with_config(|config| {
+            config.base_instructions = Some("test instructions".to_string());
+        });
+    let codex = builder
+        .build(&server)
        .await
        .expect("create new conversation")
-        .thread;
+        .codex;

    codex
        .submit(Op::UserInput {
@@ -479,29 +440,19 @@ async fn chatgpt_auth_sends_correct_request() {

    let resp_mock = mount_sse_once(&server, sse_completed("resp1")).await;

-    let model_provider = ModelProviderInfo {
-        base_url: Some(format!("{}/api/codex", server.uri())),
-        ..built_in_model_providers()["openai"].clone()
-    };
-
-    // Init session
-    let codex_home = TempDir::new().unwrap();
-    let mut config = load_default_config_for_test(&codex_home).await;
-    config.model_provider = model_provider;
-    let thread_manager = ThreadManager::with_models_provider_and_home(
-        create_dummy_codex_auth(),
-        config.model_provider.clone(),
-        config.codex_home.clone(),
-    );
-    let NewThread {
-        thread: codex,
-        thread_id,
-        session_configured: _,
-        ..
-    } = thread_manager
-        .start_thread(config)
+    let mut model_provider = built_in_model_providers()["openai"].clone();
+    model_provider.base_url = Some(format!("{}/api/codex", server.uri()));
+    let mut builder = test_codex()
+        .with_auth(create_dummy_codex_auth())
+        .with_config(move |config| {
+            config.model_provider = model_provider;
+        });
+    let test = builder
+        .build(&server)
        .await
        .expect("create new conversation");
+    let codex = test.codex.clone();
+    let thread_id = test.session_configured.session_id;

    codex
        .submit(Op::UserInput {
@@ -617,26 +568,16 @@ async fn includes_user_instructions_message_in_request() {

    let resp_mock = mount_sse_once(&server, sse_completed("resp1")).await;

-    let model_provider = ModelProviderInfo {
-        base_url: Some(format!("{}/v1", server.uri())),
-        ..built_in_model_providers()["openai"].clone()
-    };
-
-    let codex_home = TempDir::new().unwrap();
-    let mut config = load_default_config_for_test(&codex_home).await;
-    config.model_provider = model_provider;
-    config.user_instructions = Some("be nice".to_string());
-
-    let thread_manager = ThreadManager::with_models_provider_and_home(
-        CodexAuth::from_api_key("Test API Key"),
-        config.model_provider.clone(),
-        config.codex_home.clone(),
-    );
-    let codex = thread_manager
-        .start_thread(config)
+    let mut builder = test_codex()
+        .with_auth(CodexAuth::from_api_key("Test API Key"))
+        .with_config(|config| {
+            config.user_instructions = Some("be nice".to_string());
+        });
+    let codex = builder
+        .build(&server)
        .await
        .expect("create new conversation")
-        .thread;
+        .codex;

    codex
        .submit(Op::UserInput {
@@ -689,12 +630,7 @@ async fn skills_append_to_instructions() {

    let resp_mock = mount_sse_once(&server, sse_completed("resp1")).await;

-    let model_provider = ModelProviderInfo {
-        base_url: Some(format!("{}/v1", server.uri())),
-        ..built_in_model_providers()["openai"].clone()
-    };
-
-    let codex_home = TempDir::new().unwrap();
+    let codex_home = Arc::new(TempDir::new().unwrap());
    let skill_dir = codex_home.path().join("skills/demo");
    std::fs::create_dir_all(&skill_dir).expect("create skill dir");
    std::fs::write(
@@ -703,20 +639,18 @@ async fn skills_append_to_instructions() {
    )
    .expect("write skill");

-    let mut config = load_default_config_for_test(&codex_home).await;
-    config.model_provider = model_provider;
-    config.cwd = codex_home.path().to_path_buf();
-
-    let thread_manager = ThreadManager::with_models_provider_and_home(
-        CodexAuth::from_api_key("Test API Key"),
-        config.model_provider.clone(),
-        config.codex_home.clone(),
-    );
-    let codex = thread_manager
-        .start_thread(config)
+    let codex_home_path = codex_home.path().to_path_buf();
+    let mut builder = test_codex()
+        .with_home(codex_home.clone())
+        .with_auth(CodexAuth::from_api_key("Test API Key"))
+        .with_config(move |config| {
+            config.cwd = codex_home_path;
+        });
+    let codex = builder
+        .build(&server)
        .await
        .expect("create new conversation")
-        .thread;
+        .codex;

    codex
        .submit(Op::UserInput {
@@ -1131,28 +1065,17 @@ async fn includes_developer_instructions_message_in_request() {
    let server = MockServer::start().await;

    let resp_mock = mount_sse_once(&server, sse_completed("resp1")).await;
-
-    let model_provider = ModelProviderInfo {
-        base_url: Some(format!("{}/v1", server.uri())),
-        ..built_in_model_providers()["openai"].clone()
-    };
-
-    let codex_home = TempDir::new().unwrap();
-    let mut config = load_default_config_for_test(&codex_home).await;
-    config.model_provider = model_provider;
-    config.user_instructions = Some("be nice".to_string());
-    config.developer_instructions = Some("be useful".to_string());
-
-    let thread_manager = ThreadManager::with_models_provider_and_home(
-        CodexAuth::from_api_key("Test API Key"),
-        config.model_provider.clone(),
-        config.codex_home.clone(),
-    );
-    let codex = thread_manager
-        .start_thread(config)
+    let mut builder = test_codex()
+        .with_auth(CodexAuth::from_api_key("Test API Key"))
+        .with_config(|config| {
+            config.user_instructions = Some("be nice".to_string());
+            config.developer_instructions = Some("be useful".to_string());
+        });
+    let codex = builder
+        .build(&server)
        .await
        .expect("create new conversation")
-        .thread;
+        .codex;

    codex
        .submit(Op::UserInput {
@@ -1288,9 +1211,9 @@ async fn azure_responses_request_includes_store_and_reasoning_ids() {
    prompt.input.push(ResponseItem::WebSearchCall {
        id: Some("web-search-id".into()),
        status: Some("completed".into()),
-        action: WebSearchAction::Search {
+        action: Some(WebSearchAction::Search {
            query: Some("weather".into()),
-        },
+        }),
    });
    prompt.input.push(ResponseItem::FunctionCall {
        id: Some("function-id".into()),
@@ -1390,20 +1313,16 @@ async fn token_count_includes_rate_limits_snapshot() {
    let mut provider = built_in_model_providers()["openai"].clone();
    provider.base_url = Some(format!("{}/v1", server.uri()));

-    let home = TempDir::new().unwrap();
-    let mut config = load_default_config_for_test(&home).await;
-    config.model_provider = provider;
-
-    let thread_manager = ThreadManager::with_models_provider_and_home(
-        CodexAuth::from_api_key("test"),
-        config.model_provider.clone(),
-        config.codex_home.clone(),
-    );
-    let codex = thread_manager
-        .start_thread(config)
+    let mut builder = test_codex()
+        .with_auth(CodexAuth::from_api_key("test"))
+        .with_config(move |config| {
+            config.model_provider = provider;
+        });
+    let codex = builder
+        .build(&server)
        .await
        .expect("create conversation")
-        .thread;
+        .codex;

    codex
        .submit(Op::UserInput {
@@ -1753,20 +1672,16 @@ async fn azure_overrides_assign_properties_used_for_responses_url() {
    };

    // Init session
-    let codex_home = TempDir::new().unwrap();
-    let mut config = load_default_config_for_test(&codex_home).await;
-    config.model_provider = provider;
-
-    let thread_manager = ThreadManager::with_models_provider_and_home(
-        create_dummy_codex_auth(),
-        config.model_provider.clone(),
-        config.codex_home.clone(),
-    );
-    let codex = thread_manager
-        .start_thread(config)
+    let mut builder = test_codex()
+        .with_auth(create_dummy_codex_auth())
+        .with_config(move |config| {
+            config.model_provider = provider;
+        });
+    let codex = builder
+        .build(&server)
        .await
        .expect("create new conversation")
-        .thread;
+        .codex;

    codex
        .submit(Op::UserInput {
@@ -1837,20 +1752,16 @@ async fn env_var_overrides_loaded_auth() {
    };

    // Init session
-    let codex_home = TempDir::new().unwrap();
-    let mut config = load_default_config_for_test(&codex_home).await;
-    config.model_provider = provider;
-
-    let thread_manager = ThreadManager::with_models_provider_and_home(
-        create_dummy_codex_auth(),
-        config.model_provider.clone(),
-        config.codex_home.clone(),
-    );
-    let codex = thread_manager
-        .start_thread(config)
+    let mut builder = test_codex()
+        .with_auth(create_dummy_codex_auth())
+        .with_config(move |config| {
+            config.model_provider = provider;
+        });
+    let codex = builder
+        .build(&server)
        .await
        .expect("create new conversation")
-        .thread;
+        .codex;

    codex
        .submit(Op::UserInput {
@@ -1905,26 +1816,12 @@ async fn history_dedupes_streamed_and_final_messages_across_turns() {

    let request_log = mount_sse_sequence(&server, vec![sse1.clone(), sse1.clone(), sse1]).await;

-    // Configure provider to point to mock server (Responses API) and use API key auth.
-    let model_provider = ModelProviderInfo {
-        base_url: Some(format!("{}/v1", server.uri())),
-        ..built_in_model_providers()["openai"].clone()
-    };
-
-    // Init session with isolated codex home.
-    let codex_home = TempDir::new().unwrap();
-    let mut config = load_default_config_for_test(&codex_home).await;
-    config.model_provider = model_provider;
-
-    let thread_manager = ThreadManager::with_models_provider_and_home(
-        CodexAuth::from_api_key("Test API Key"),
-        config.model_provider.clone(),
-        config.codex_home.clone(),
-    );
-    let NewThread { thread: codex, .. } = thread_manager
-        .start_thread(config)
+    let mut builder = test_codex().with_auth(CodexAuth::from_api_key("Test API Key"));
+    let codex = builder
+        .build(&server)
        .await
-        .expect("create new conversation");
+        .expect("create new conversation")
+        .codex;

    // Turn 1: user sends U1; wait for completion.
    codex
--- a/codex-rs/core/tests/suite/collaboration_instructions.rs
+++ b/codex-rs/core/tests/suite/collaboration_instructions.rs
@@ -104,6 +104,7 @@ async fn user_input_includes_collaboration_instructions_after_override() -> Resu
            cwd: None,
            approval_policy: None,
            sandbox_policy: None,
+            windows_sandbox_level: None,
            model: None,
            effort: None,
            summary: None,
@@ -185,6 +186,7 @@ async fn override_then_user_turn_uses_updated_collaboration_instructions() -> Re
            cwd: None,
            approval_policy: None,
            sandbox_policy: None,
+            windows_sandbox_level: None,
            model: None,
            effort: None,
            summary: None,
@@ -238,6 +240,7 @@ async fn user_turn_overrides_collaboration_instructions_after_override() -> Resu
            cwd: None,
            approval_policy: None,
            sandbox_policy: None,
+            windows_sandbox_level: None,
            model: None,
            effort: None,
            summary: None,
@@ -292,6 +295,7 @@ async fn collaboration_mode_update_emits_new_instruction_message() -> Result<()>
            cwd: None,
            approval_policy: None,
            sandbox_policy: None,
+            windows_sandbox_level: None,
            model: None,
            effort: None,
            summary: None,
@@ -316,6 +320,7 @@ async fn collaboration_mode_update_emits_new_instruction_message() -> Result<()>
            cwd: None,
            approval_policy: None,
            sandbox_policy: None,
+            windows_sandbox_level: None,
            model: None,
            effort: None,
            summary: None,
@@ -361,6 +366,7 @@ async fn collaboration_mode_update_noop_does_not_append() -> Result<()> {
            cwd: None,
            approval_policy: None,
            sandbox_policy: None,
+            windows_sandbox_level: None,
            model: None,
            effort: None,
            summary: None,
@@ -385,6 +391,7 @@ async fn collaboration_mode_update_noop_does_not_append() -> Result<()> {
            cwd: None,
            approval_policy: None,
            sandbox_policy: None,
+            windows_sandbox_level: None,
            model: None,
            effort: None,
            summary: None,
@@ -436,6 +443,7 @@ async fn resume_replays_collaboration_instructions() -> Result<()> {
            cwd: None,
            approval_policy: None,
            sandbox_policy: None,
+            windows_sandbox_level: None,
            model: None,
            effort: None,
            summary: None,
@@ -491,6 +499,7 @@ async fn empty_collaboration_instructions_are_ignored() -> Result<()> {
            cwd: None,
            approval_policy: None,
            sandbox_policy: None,
+            windows_sandbox_level: None,
            model: None,
            effort: None,
            summary: None,
--- a/codex-rs/core/tests/suite/compact.rs
+++ b/codex-rs/core/tests/suite/compact.rs
@@ -1,8 +1,6 @@
 #![allow(clippy::expect_used)]
 use codex_core::CodexAuth;
 use codex_core::ModelProviderInfo;
-use codex_core::NewThread;
-use codex_core::ThreadManager;
 use codex_core::built_in_model_providers;
 use codex_core::compact::SUMMARIZATION_PROMPT;
 use codex_core::compact::SUMMARY_PREFIX;
@@ -17,7 +15,6 @@ use codex_core::protocol::SandboxPolicy;
 use codex_core::protocol::WarningEvent;
 use codex_protocol::config_types::ReasoningSummary;
 use codex_protocol::user_input::UserInput;
-use core_test_support::load_default_config_for_test;
 use core_test_support::responses::ev_local_shell_call;
 use core_test_support::responses::ev_reasoning_item;
 use core_test_support::skip_if_no_network;
@@ -25,7 +22,6 @@ use core_test_support::test_codex::test_codex;
 use core_test_support::wait_for_event;
 use core_test_support::wait_for_event_match;
 use std::collections::VecDeque;
-use tempfile::TempDir;

 use core_test_support::responses::ev_assistant_message;
 use core_test_support::responses::ev_completed;
@@ -140,21 +136,14 @@ async fn summarize_context_three_requests_and_instructions() {

    // Build config pointing to the mock server and spawn Codex.
    let model_provider = non_openai_model_provider(&server);
-    let home = TempDir::new().unwrap();
-    let mut config = load_default_config_for_test(&home).await;
-    config.model_provider = model_provider;
-    set_test_compact_prompt(&mut config);
-    config.model_auto_compact_token_limit = Some(200_000);
-    let thread_manager = ThreadManager::with_models_provider(
-        CodexAuth::from_api_key("dummy"),
-        config.model_provider.clone(),
-    );
-    let NewThread {
-        thread: codex,
-        session_configured,
-        ..
-    } = thread_manager.start_thread(config).await.unwrap();
-    let rollout_path = session_configured.rollout_path.expect("rollout path");
+    let mut builder = test_codex().with_config(move |config| {
+        config.model_provider = model_provider;
+        set_test_compact_prompt(config);
+        config.model_auto_compact_token_limit = Some(200_000);
+    });
+    let test = builder.build(&server).await.unwrap();
+    let codex = test.codex.clone();
+    let rollout_path = test.session_configured.rollout_path.expect("rollout path");

    // 1) Normal user input – should hit server once.
    codex
@@ -338,20 +327,15 @@ async fn manual_compact_uses_custom_prompt() {
    let custom_prompt = "Use this compact prompt instead";

    let model_provider = non_openai_model_provider(&server);
-    let home = TempDir::new().unwrap();
-    let mut config = load_default_config_for_test(&home).await;
-    config.model_provider = model_provider;
-    config.compact_prompt = Some(custom_prompt.to_string());
-
-    let thread_manager = ThreadManager::with_models_provider(
-        CodexAuth::from_api_key("dummy"),
-        config.model_provider.clone(),
-    );
-    let codex = thread_manager
-        .start_thread(config)
+    let mut builder = test_codex().with_config(move |config| {
+        config.model_provider = model_provider;
+        config.compact_prompt = Some(custom_prompt.to_string());
+    });
+    let codex = builder
+        .build(&server)
        .await
        .expect("create conversation")
-        .thread;
+        .codex;

    codex.submit(Op::Compact).await.expect("trigger compact");
    let warning_event = wait_for_event(&codex, |ev| matches!(ev, EventMsg::Warning(_))).await;
@@ -414,16 +398,11 @@ async fn manual_compact_emits_api_and_local_token_usage_events() {
    mount_sse_once(&server, sse_compact).await;

    let model_provider = non_openai_model_provider(&server);
-    let home = TempDir::new().unwrap();
-    let mut config = load_default_config_for_test(&home).await;
-    config.model_provider = model_provider;
-    set_test_compact_prompt(&mut config);
-
-    let thread_manager = ThreadManager::with_models_provider(
-        CodexAuth::from_api_key("dummy"),
-        config.model_provider.clone(),
-    );
-    let NewThread { thread: codex, .. } = thread_manager.start_thread(config).await.unwrap();
+    let mut builder = test_codex().with_config(move |config| {
+        config.model_provider = model_provider;
+        set_test_compact_prompt(config);
+    });
+    let codex = builder.build(&server).await.unwrap().codex;

    // Trigger manual compact and collect TokenCount events for the compact turn.
    codex.submit(Op::Compact).await.unwrap();
@@ -1039,16 +1018,12 @@ async fn auto_compact_runs_after_token_limit_hit() {

    let model_provider = non_openai_model_provider(&server);

-    let home = TempDir::new().unwrap();
-    let mut config = load_default_config_for_test(&home).await;
-    config.model_provider = model_provider;
-    set_test_compact_prompt(&mut config);
-    config.model_auto_compact_token_limit = Some(200_000);
-    let thread_manager = ThreadManager::with_models_provider(
-        CodexAuth::from_api_key("dummy"),
-        config.model_provider.clone(),
-    );
-    let codex = thread_manager.start_thread(config).await.unwrap().thread;
+    let mut builder = test_codex().with_config(move |config| {
+        config.model_provider = model_provider;
+        set_test_compact_prompt(config);
+        config.model_auto_compact_token_limit = Some(200_000);
+    });
+    let codex = builder.build(&server).await.unwrap().codex;

    codex
        .submit(Op::UserInput {
@@ -1302,7 +1277,7 @@ async fn auto_compact_runs_after_resume_when_token_usage_is_over_limit() {
        .unwrap();

    wait_for_event(&resumed.codex, |event| {
-        matches!(event, EventMsg::ContextCompacted(_))
+        matches!(event, EventMsg::ContextCompactionEnded(_))
    })
    .await;
    wait_for_event(&resumed.codex, |event| {
@@ -1379,20 +1354,14 @@ async fn auto_compact_persists_rollout_entries() {

    let model_provider = non_openai_model_provider(&server);

-    let home = TempDir::new().unwrap();
-    let mut config = load_default_config_for_test(&home).await;
-    config.model_provider = model_provider;
-    set_test_compact_prompt(&mut config);
-    config.model_auto_compact_token_limit = Some(200_000);
-    let thread_manager = ThreadManager::with_models_provider(
-        CodexAuth::from_api_key("dummy"),
-        config.model_provider.clone(),
-    );
-    let NewThread {
-        thread: codex,
-        session_configured,
-        ..
-    } = thread_manager.start_thread(config).await.unwrap();
+    let mut builder = test_codex().with_config(move |config| {
+        config.model_provider = model_provider;
+        set_test_compact_prompt(config);
+        config.model_auto_compact_token_limit = Some(200_000);
+    });
+    let test = builder.build(&server).await.unwrap();
+    let codex = test.codex.clone();
+    let session_configured = test.session_configured;

    codex
        .submit(Op::UserInput {
@@ -1497,19 +1466,12 @@ async fn manual_compact_retries_after_context_window_error() {

    let model_provider = non_openai_model_provider(&server);

-    let home = TempDir::new().unwrap();
-    let mut config = load_default_config_for_test(&home).await;
-    config.model_provider = model_provider;
-    set_test_compact_prompt(&mut config);
-    config.model_auto_compact_token_limit = Some(200_000);
-    let codex = ThreadManager::with_models_provider(
-        CodexAuth::from_api_key("dummy"),
-        config.model_provider.clone(),
-    )
-    .start_thread(config)
-    .await
-    .unwrap()
-    .thread;
+    let mut builder = test_codex().with_config(move |config| {
+        config.model_provider = model_provider;
+        set_test_compact_prompt(config);
+        config.model_auto_compact_token_limit = Some(200_000);
+    });
+    let codex = builder.build(&server).await.unwrap().codex;

    codex
        .submit(Op::UserInput {
@@ -1632,18 +1594,11 @@ async fn manual_compact_twice_preserves_latest_user_messages() {

    let model_provider = non_openai_model_provider(&server);

-    let home = TempDir::new().unwrap();
-    let mut config = load_default_config_for_test(&home).await;
-    config.model_provider = model_provider;
-    set_test_compact_prompt(&mut config);
-    let codex = ThreadManager::with_models_provider(
-        CodexAuth::from_api_key("dummy"),
-        config.model_provider.clone(),
-    )
-    .start_thread(config)
-    .await
-    .unwrap()
-    .thread;
+    let mut builder = test_codex().with_config(move |config| {
+        config.model_provider = model_provider;
+        set_test_compact_prompt(config);
+    });
+    let codex = builder.build(&server).await.unwrap().codex;

    codex
        .submit(Op::UserInput {
@@ -1700,12 +1655,11 @@ async fn manual_compact_twice_preserves_latest_user_messages() {
                && item
                    .get("content")
                    .and_then(|v| v.as_array())
-                    .map(|arr| {
+                    .is_some_and(|arr| {
                        arr.iter().any(|entry| {
                            entry.get("text").and_then(|v| v.as_str()) == Some(expected)
                        })
                    })
-                    .unwrap_or(false)
        })
    };

@@ -1843,16 +1797,12 @@ async fn auto_compact_allows_multiple_attempts_when_interleaved_with_other_turn_

    let model_provider = non_openai_model_provider(&server);

-    let home = TempDir::new().unwrap();
-    let mut config = load_default_config_for_test(&home).await;
-    config.model_provider = model_provider;
-    set_test_compact_prompt(&mut config);
-    config.model_auto_compact_token_limit = Some(200);
-    let thread_manager = ThreadManager::with_models_provider(
-        CodexAuth::from_api_key("dummy"),
-        config.model_provider.clone(),
-    );
-    let codex = thread_manager.start_thread(config).await.unwrap().thread;
+    let mut builder = test_codex().with_config(move |config| {
+        config.model_provider = model_provider;
+        set_test_compact_prompt(config);
+        config.model_auto_compact_token_limit = Some(200);
+    });
+    let codex = builder.build(&server).await.unwrap().codex;

    let mut auto_compact_lifecycle_events = Vec::new();
    for user in [MULTI_AUTO_MSG, follow_up_user, final_user] {
@@ -1954,21 +1904,13 @@ async fn auto_compact_triggers_after_function_call_over_95_percent_usage() {

    let model_provider = non_openai_model_provider(&server);

-    let home = TempDir::new().unwrap();
-    let mut config = load_default_config_for_test(&home).await;
-    config.model_provider = model_provider;
-    set_test_compact_prompt(&mut config);
-    config.model_context_window = Some(context_window);
-    config.model_auto_compact_token_limit = Some(limit);
-
-    let codex = ThreadManager::with_models_provider(
-        CodexAuth::from_api_key("dummy"),
-        config.model_provider.clone(),
-    )
-    .start_thread(config)
-    .await
-    .unwrap()
-    .thread;
+    let mut builder = test_codex().with_config(move |config| {
+        config.model_provider = model_provider;
+        set_test_compact_prompt(config);
+        config.model_context_window = Some(context_window);
+        config.model_auto_compact_token_limit = Some(limit);
+    });
+    let codex = builder.build(&server).await.unwrap().codex;

    codex
        .submit(Op::UserInput {
--- a/codex-rs/core/tests/suite/compact_remote.rs
+++ b/codex-rs/core/tests/suite/compact_remote.rs
@@ -202,7 +202,7 @@ async fn remote_compact_runs_automatically() -> Result<()> {
        })
        .await?;
    let message = wait_for_event_match(&codex, |ev| match ev {
-        EventMsg::ContextCompacted(_) => Some(true),
+        EventMsg::ContextCompactionEnded(_) => Some(true),
        _ => None,
    })
    .await;
--- a/codex-rs/core/tests/suite/compact_resume_fork.rs
+++ b/codex-rs/core/tests/suite/compact_resume_fork.rs
@@ -10,12 +10,8 @@
 use super::compact::COMPACT_WARNING_MESSAGE;
 use super::compact::FIRST_REPLY;
 use super::compact::SUMMARY_TEXT;
-use codex_core::CodexAuth;
 use codex_core::CodexThread;
-use codex_core::ModelProviderInfo;
-use codex_core::NewThread;
 use codex_core::ThreadManager;
-use codex_core::built_in_model_providers;
 use codex_core::compact::SUMMARIZATION_PROMPT;
 use codex_core::config::Config;
 use codex_core::protocol::EventMsg;
@@ -23,12 +19,12 @@ use codex_core::protocol::Op;
 use codex_core::protocol::WarningEvent;
 use codex_core::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR;
 use codex_protocol::user_input::UserInput;
-use core_test_support::load_default_config_for_test;
 use core_test_support::responses::ResponseMock;
 use core_test_support::responses::ev_assistant_message;
 use core_test_support::responses::ev_completed;
 use core_test_support::responses::mount_sse_once_match;
 use core_test_support::responses::sse;
+use core_test_support::test_codex::test_codex;
 use core_test_support::wait_for_event;
 use pretty_assertions::assert_eq;
 use serde_json::Value;
@@ -99,8 +95,7 @@ fn extract_summary_message(request: &Value, summary_text: &str) -> Value {
                        .and_then(|arr| arr.first())
                        .and_then(|entry| entry.get("text"))
                        .and_then(Value::as_str)
-                        .map(|text| text.contains(summary_text))
-                        .unwrap_or(false)
+                        .is_some_and(|text| text.contains(summary_text))
            })
        })
        .cloned()
@@ -117,21 +112,18 @@ fn normalize_compact_prompts(requests: &mut [Value]) {
                {
                    return true;
                }
-                let content = item
-                    .get("content")
-                    .and_then(Value::as_array)
-                    .cloned()
+                let Some(content) = item.get("content").and_then(Value::as_array) else {
+                    return false;
+                };
+                let Some(first) = content.first() else {
+                    return false;
+                };
+                let text = first
+                    .get("text")
+                    .and_then(Value::as_str)
                    .unwrap_or_default();
-                if let Some(first) = content.first() {
-                    let text = first
-                        .get("text")
-                        .and_then(Value::as_str)
-                        .unwrap_or_default();
-                    let normalized_text = normalize_line_endings_str(text);
-                    !(text.is_empty() || normalized_text == normalized_summary_prompt)
-                } else {
-                    false
-                }
+                let normalized_text = normalize_line_endings_str(text);
+                !(text.is_empty() || normalized_text == normalized_summary_prompt)
            });
        }
    }
@@ -874,9 +866,7 @@ fn gather_request_bodies(request_log: &[ResponseMock]) -> Vec<Value> {
        .flat_map(ResponseMock::requests)
        .map(|request| request.body_json())
        .collect::<Vec<_>>();
-    for body in &mut bodies {
-        normalize_line_endings(body);
-    }
+    bodies.iter_mut().for_each(normalize_line_endings);
    bodies
 }

@@ -960,29 +950,19 @@ async fn mount_second_compact_flow(server: &MockServer) -> Vec<ResponseMock> {
 async fn start_test_conversation(
    server: &MockServer,
    model: Option<&str>,
-) -> (TempDir, Config, ThreadManager, Arc<CodexThread>) {
-    let model_provider = ModelProviderInfo {
-        name: "Non-OpenAI Model provider".into(),
-        base_url: Some(format!("{}/v1", server.uri())),
-        ..built_in_model_providers()["openai"].clone()
-    };
-    let home = TempDir::new().expect("create temp dir");
-    let mut config = load_default_config_for_test(&home).await;
-    config.model_provider = model_provider;
-    config.compact_prompt = Some(SUMMARIZATION_PROMPT.to_string());
-    if let Some(model) = model {
-        config.model = Some(model.to_string());
-    }
-    let manager = ThreadManager::with_models_provider(
-        CodexAuth::from_api_key("dummy"),
-        config.model_provider.clone(),
-    );
-    let NewThread { thread, .. } = manager
-        .start_thread(config.clone())
-        .await
-        .expect("create conversation");
-
-    (home, config, manager, thread)
+) -> (Arc<TempDir>, Config, Arc<ThreadManager>, Arc<CodexThread>) {
+    let base_url = format!("{}/v1", server.uri());
+    let model = model.map(str::to_string);
+    let mut builder = test_codex().with_config(move |config| {
+        config.model_provider.name = "Non-OpenAI Model provider".to_string();
+        config.model_provider.base_url = Some(base_url);
+        config.compact_prompt = Some(SUMMARIZATION_PROMPT.to_string());
+        if let Some(model) = model {
+            config.model = Some(model);
+        }
+    });
+    let test = builder.build(server).await.expect("create conversation");
+    (test.home, test.config, test.thread_manager, test.codex)
 }

 async fn user_turn(conversation: &Arc<CodexThread>, text: &str) {
@@ -1021,13 +1001,14 @@ async fn resume_conversation(
    config: &Config,
    path: std::path::PathBuf,
 ) -> Arc<CodexThread> {
-    let auth_manager =
-        codex_core::AuthManager::from_auth_for_testing(CodexAuth::from_api_key("dummy"));
-    let NewThread { thread, .. } = manager
+    let auth_manager = codex_core::AuthManager::from_auth_for_testing(
+        codex_core::CodexAuth::from_api_key("dummy"),
+    );
+    manager
        .resume_thread_from_rollout(config.clone(), path, auth_manager)
        .await
-        .expect("resume conversation");
-    thread
+        .expect("resume conversation")
+        .thread
 }

 #[cfg(test)]
@@ -1037,9 +1018,9 @@ async fn fork_thread(
    path: std::path::PathBuf,
    nth_user_message: usize,
 ) -> Arc<CodexThread> {
-    let NewThread { thread, .. } = manager
+    manager
        .fork_thread(nth_user_message, config.clone(), path)
        .await
-        .expect("fork conversation");
-    thread
+        .expect("fork conversation")
+        .thread
 }
--- a/codex-rs/core/tests/suite/deprecation_notice.rs
+++ b/codex-rs/core/tests/suite/deprecation_notice.rs
@@ -49,7 +49,7 @@ async fn emits_deprecation_notice_for_legacy_feature_flag() -> anyhow::Result<()
    assert_eq!(
        details.as_deref(),
        Some(
-            "Enable it with `--enable unified_exec` or `[features].unified_exec` in config.toml. See https://developers.openai.com/codex/config-advanced/ for details."
+            "Enable it with `--enable unified_exec` or `[features].unified_exec` in config.toml. See https://github.com/openai/codex/blob/main/docs/config.md#feature-flags for details."
        ),
    );

--- a/codex-rs/core/tests/suite/exec.rs
+++ b/codex-rs/core/tests/suite/exec.rs
@@ -10,6 +10,7 @@ use codex_core::exec::process_exec_tool_call;
 use codex_core::protocol::SandboxPolicy;
 use codex_core::sandboxing::SandboxPermissions;
 use codex_core::spawn::CODEX_SANDBOX_ENV_VAR;
+use codex_protocol::config_types::WindowsSandboxLevel;
 use tempfile::TempDir;

 use codex_core::error::Result;
@@ -27,7 +28,7 @@ fn skip_test() -> bool {

 #[expect(clippy::expect_used)]
 async fn run_test_cmd(tmp: TempDir, cmd: Vec<&str>) -> Result<ExecToolCallOutput> {
-    let sandbox_type = get_platform_sandbox().expect("should be able to get sandbox type");
+    let sandbox_type = get_platform_sandbox(false).expect("should be able to get sandbox type");
    assert_eq!(sandbox_type, SandboxType::MacosSeatbelt);

    let params = ExecParams {
@@ -36,6 +37,7 @@ async fn run_test_cmd(tmp: TempDir, cmd: Vec<&str>) -> Result<ExecToolCallOutput
        expiration: 1000.into(),
        env: HashMap::new(),
        sandbox_permissions: SandboxPermissions::UseDefault,
+        windows_sandbox_level: WindowsSandboxLevel::Disabled,
        justification: None,
        arg0: None,
    };
--- a/codex-rs/core/tests/suite/fork_thread.rs
+++ b/codex-rs/core/tests/suite/fork_thread.rs
@@ -1,8 +1,4 @@
-use codex_core::CodexAuth;
-use codex_core::ModelProviderInfo;
 use codex_core::NewThread;
-use codex_core::ThreadManager;
-use codex_core::built_in_model_providers;
 use codex_core::parse_turn_item;
 use codex_core::protocol::EventMsg;
 use codex_core::protocol::Op;
@@ -10,10 +6,9 @@ use codex_core::protocol::RolloutItem;
 use codex_core::protocol::RolloutLine;
 use codex_protocol::items::TurnItem;
 use codex_protocol::user_input::UserInput;
-use core_test_support::load_default_config_for_test;
 use core_test_support::skip_if_no_network;
+use core_test_support::test_codex::test_codex;
 use core_test_support::wait_for_event;
-use tempfile::TempDir;
 use wiremock::Mock;
 use wiremock::MockServer;
 use wiremock::ResponseTemplate;
@@ -44,25 +39,11 @@ async fn fork_thread_twice_drops_to_first_message() {
        .mount(&server)
        .await;

-    // Configure Codex to use the mock server.
-    let model_provider = ModelProviderInfo {
-        base_url: Some(format!("{}/v1", server.uri())),
-        ..built_in_model_providers()["openai"].clone()
-    };
-
-    let home = TempDir::new().unwrap();
-    let mut config = load_default_config_for_test(&home).await;
-    config.model_provider = model_provider.clone();
-    let config_for_fork = config.clone();
-
-    let thread_manager = ThreadManager::with_models_provider(
-        CodexAuth::from_api_key("dummy"),
-        config.model_provider.clone(),
-    );
-    let NewThread { thread: codex, .. } = thread_manager
-        .start_thread(config)
-        .await
-        .expect("create conversation");
+    let mut builder = test_codex();
+    let test = builder.build(&server).await.expect("create conversation");
+    let codex = test.codex.clone();
+    let thread_manager = test.thread_manager.clone();
+    let config_for_fork = test.config.clone();

    // Send three user messages; wait for three completed turns.
    for text in ["first", "second", "third"] {
--- a/codex-rs/core/tests/suite/items.rs
+++ b/codex-rs/core/tests/suite/items.rs
@@ -6,6 +6,7 @@ use codex_core::protocol::ItemCompletedEvent;
 use codex_core::protocol::ItemStartedEvent;
 use codex_core::protocol::Op;
 use codex_protocol::items::TurnItem;
+use codex_protocol::models::WebSearchAction;
 use codex_protocol::user_input::ByteRange;
 use codex_protocol::user_input::TextElement;
 use codex_protocol::user_input::UserInput;
@@ -18,7 +19,7 @@ use core_test_support::responses::ev_reasoning_item_added;
 use core_test_support::responses::ev_reasoning_summary_text_delta;
 use core_test_support::responses::ev_reasoning_text_delta;
 use core_test_support::responses::ev_response_created;
-use core_test_support::responses::ev_web_search_call_added;
+use core_test_support::responses::ev_web_search_call_added_partial;
 use core_test_support::responses::ev_web_search_call_done;
 use core_test_support::responses::mount_sse_once;
 use core_test_support::responses::sse;
@@ -208,8 +209,7 @@ async fn web_search_item_is_emitted() -> anyhow::Result<()> {

    let TestCodex { codex, .. } = test_codex().build(&server).await?;

-    let web_search_added =
-        ev_web_search_call_added("web-search-1", "in_progress", "weather seattle");
+    let web_search_added = ev_web_search_call_added_partial("web-search-1", "in_progress");
    let web_search_done = ev_web_search_call_done("web-search-1", "completed", "weather seattle");

    let first_response = sse(vec![
@@ -230,11 +230,8 @@ async fn web_search_item_is_emitted() -> anyhow::Result<()> {
        })
        .await?;

-    let started = wait_for_event_match(&codex, |ev| match ev {
-        EventMsg::ItemStarted(ItemStartedEvent {
-            item: TurnItem::WebSearch(item),
-            ..
-        }) => Some(item.clone()),
+    let begin = wait_for_event_match(&codex, |ev| match ev {
+        EventMsg::WebSearchBegin(event) => Some(event.clone()),
        _ => None,
    })
    .await;
@@ -247,8 +244,14 @@ async fn web_search_item_is_emitted() -> anyhow::Result<()> {
    })
    .await;

-    assert_eq!(started.id, completed.id);
-    assert_eq!(completed.query, "weather seattle");
+    assert_eq!(begin.call_id, "web-search-1");
+    assert_eq!(completed.id, begin.call_id);
+    assert_eq!(
+        completed.action,
+        WebSearchAction::Search {
+            query: Some("weather seattle".to_string()),
+        }
+    );

    Ok(())
 }
--- a/codex-rs/core/tests/suite/mod.rs
+++ b/codex-rs/core/tests/suite/mod.rs
@@ -74,6 +74,7 @@ mod tools;
 mod truncation;
 mod undo;
 mod unified_exec;
+mod unstable_features_warning;
 mod user_notification;
 mod user_shell_cmd;
 mod view_image;
--- a/codex-rs/core/tests/suite/model_overrides.rs
+++ b/codex-rs/core/tests/suite/model_overrides.rs
@@ -1,42 +1,35 @@
-use codex_core::CodexAuth;
-use codex_core::ThreadManager;
 use codex_core::protocol::EventMsg;
 use codex_core::protocol::Op;
 use codex_protocol::openai_models::ReasoningEffort;
-use core_test_support::load_default_config_for_test;
+use core_test_support::responses::start_mock_server;
+use core_test_support::test_codex::test_codex;
 use core_test_support::wait_for_event;
 use pretty_assertions::assert_eq;
-use tempfile::TempDir;

 const CONFIG_TOML: &str = "config.toml";

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn override_turn_context_does_not_persist_when_config_exists() {
-    let codex_home = TempDir::new().unwrap();
-    let config_path = codex_home.path().join(CONFIG_TOML);
+    let server = start_mock_server().await;
    let initial_contents = "model = \"gpt-4o\"\n";
-    tokio::fs::write(&config_path, initial_contents)
-        .await
-        .expect("seed config.toml");
-
-    let mut config = load_default_config_for_test(&codex_home).await;
-    config.model = Some("gpt-4o".to_string());
-
-    let thread_manager = ThreadManager::with_models_provider(
-        CodexAuth::from_api_key("Test API Key"),
-        config.model_provider.clone(),
-    );
-    let codex = thread_manager
-        .start_thread(config)
-        .await
-        .expect("create conversation")
-        .thread;
+    let mut builder = test_codex()
+        .with_pre_build_hook(move |home| {
+            let config_path = home.join(CONFIG_TOML);
+            std::fs::write(config_path, initial_contents).expect("seed config.toml");
+        })
+        .with_config(|config| {
+            config.model = Some("gpt-4o".to_string());
+        });
+    let test = builder.build(&server).await.expect("create conversation");
+    let codex = test.codex.clone();
+    let config_path = test.home.path().join(CONFIG_TOML);

    codex
        .submit(Op::OverrideTurnContext {
            cwd: None,
            approval_policy: None,
            sandbox_policy: None,
+            windows_sandbox_level: None,
            model: Some("o3".to_string()),
            effort: Some(Some(ReasoningEffort::High)),
            summary: None,
@@ -57,30 +50,22 @@ async fn override_turn_context_does_not_persist_when_config_exists() {

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn override_turn_context_does_not_create_config_file() {
-    let codex_home = TempDir::new().unwrap();
-    let config_path = codex_home.path().join(CONFIG_TOML);
+    let server = start_mock_server().await;
+    let mut builder = test_codex();
+    let test = builder.build(&server).await.expect("create conversation");
+    let codex = test.codex.clone();
+    let config_path = test.home.path().join(CONFIG_TOML);
    assert!(
        !config_path.exists(),
        "test setup should start without config"
    );

-    let config = load_default_config_for_test(&codex_home).await;
-
-    let thread_manager = ThreadManager::with_models_provider(
-        CodexAuth::from_api_key("Test API Key"),
-        config.model_provider.clone(),
-    );
-    let codex = thread_manager
-        .start_thread(config)
-        .await
-        .expect("create conversation")
-        .thread;
-
    codex
        .submit(Op::OverrideTurnContext {
            cwd: None,
            approval_policy: None,
            sandbox_policy: None,
+            windows_sandbox_level: None,
            model: Some("o3".to_string()),
            effort: Some(Some(ReasoningEffort::Medium)),
            summary: None,
--- a/codex-rs/core/tests/suite/model_tools.rs
+++ b/codex-rs/core/tests/suite/model_tools.rs
@@ -38,7 +38,7 @@ async fn collect_tool_identifiers_for_model(model: &str) -> Vec<String> {
        .with_model(model)
        // Keep tool expectations stable when the default web_search mode changes.
        .with_config(|config| {
-            config.web_search_mode = Some(WebSearchMode::Cached);
+            config.web_search_mode = WebSearchMode::Cached;
            config.features.enable(Feature::CollaborationModes);
        });
    let test = builder
--- a/codex-rs/core/tests/suite/override_updates.rs
+++ b/codex-rs/core/tests/suite/override_updates.rs
@@ -118,6 +118,7 @@ async fn override_turn_context_records_permissions_update() -> Result<()> {
            cwd: None,
            approval_policy: Some(AskForApproval::Never),
            sandbox_policy: None,
+            windows_sandbox_level: None,
            model: None,
            effort: None,
            summary: None,
@@ -161,6 +162,7 @@ async fn override_turn_context_records_environment_update() -> Result<()> {
            cwd: Some(new_cwd.path().to_path_buf()),
            approval_policy: None,
            sandbox_policy: None,
+            windows_sandbox_level: None,
            model: None,
            effort: None,
            summary: None,
@@ -198,6 +200,7 @@ async fn override_turn_context_records_collaboration_update() -> Result<()> {
            cwd: None,
            approval_policy: None,
            sandbox_policy: None,
+            windows_sandbox_level: None,
            model: None,
            effort: None,
            summary: None,
--- a/codex-rs/core/tests/suite/permissions_messages.rs
+++ b/codex-rs/core/tests/suite/permissions_messages.rs
@@ -106,6 +106,7 @@ async fn permissions_message_added_on_override_change() -> Result<()> {
            cwd: None,
            approval_policy: Some(AskForApproval::Never),
            sandbox_policy: None,
+            windows_sandbox_level: None,
            model: None,
            effort: None,
            summary: None,
@@ -227,6 +228,7 @@ async fn resume_replays_permissions_messages() -> Result<()> {
            cwd: None,
            approval_policy: Some(AskForApproval::Never),
            sandbox_policy: None,
+            windows_sandbox_level: None,
            model: None,
            effort: None,
            summary: None,
@@ -309,6 +311,7 @@ async fn resume_and_fork_append_permissions_messages() -> Result<()> {
            cwd: None,
            approval_policy: Some(AskForApproval::Never),
            sandbox_policy: None,
+            windows_sandbox_level: None,
            model: None,
            effort: None,
            summary: None,
--- a/codex-rs/core/tests/suite/personality.rs
+++ b/codex-rs/core/tests/suite/personality.rs
@@ -210,6 +210,7 @@ async fn user_turn_personality_some_adds_update_message() -> anyhow::Result<()>
            cwd: None,
            approval_policy: None,
            sandbox_policy: None,
+            windows_sandbox_level: None,
            model: None,
            effort: None,
            summary: None,
@@ -362,6 +363,7 @@ async fn user_turn_personality_remote_model_template_includes_update_message() -
            cwd: None,
            approval_policy: None,
            sandbox_policy: None,
+            windows_sandbox_level: None,
            model: Some(remote_slug.to_string()),
            effort: None,
            summary: None,
--- a/codex-rs/core/tests/suite/prompt_caching.rs
+++ b/codex-rs/core/tests/suite/prompt_caching.rs
@@ -92,7 +92,7 @@ async fn prompt_tools_are_consistent_across_requests() -> anyhow::Result<()> {
            config.user_instructions = Some("be consistent and helpful".to_string());
            config.model = Some("gpt-5.1-codex-max".to_string());
            // Keep tool expectations stable when the default web_search mode changes.
-            config.web_search_mode = Some(WebSearchMode::Cached);
+            config.web_search_mode = WebSearchMode::Cached;
            config.features.enable(Feature::CollaborationModes);
        })
        .build(&server)
@@ -350,6 +350,7 @@ async fn overrides_turn_context_but_keeps_cached_prefix_and_key_constant() -> an
            cwd: None,
            approval_policy: Some(AskForApproval::Never),
            sandbox_policy: Some(new_policy.clone()),
+            windows_sandbox_level: None,
            model: Some("o3".to_string()),
            effort: Some(Some(ReasoningEffort::High)),
            summary: Some(ReasoningSummary::Detailed),
@@ -427,6 +428,7 @@ async fn override_before_first_turn_emits_environment_context() -> anyhow::Resul
            cwd: None,
            approval_policy: Some(AskForApproval::Never),
            sandbox_policy: None,
+            windows_sandbox_level: None,
            model: Some("gpt-5.1-codex".to_string()),
            effort: Some(Some(ReasoningEffort::Low)),
            summary: None,
--- a/codex-rs/core/tests/suite/remote_models.rs
+++ b/codex-rs/core/tests/suite/remote_models.rs
@@ -138,6 +138,7 @@ async fn remote_models_remote_model_uses_unified_exec() -> Result<()> {
            cwd: None,
            approval_policy: None,
            sandbox_policy: None,
+            windows_sandbox_level: None,
            model: Some(REMOTE_MODEL_SLUG.to_string()),
            effort: None,
            summary: None,
@@ -367,6 +368,7 @@ async fn remote_models_apply_remote_base_instructions() -> Result<()> {
            cwd: None,
            approval_policy: None,
            sandbox_policy: None,
+            windows_sandbox_level: None,
            model: Some(model.to_string()),
            effort: None,
            summary: None,
--- a/codex-rs/core/tests/suite/request_user_input.rs
+++ b/codex-rs/core/tests/suite/request_user_input.rs
@@ -286,6 +286,19 @@ async fn request_user_input_rejected_in_execute_mode() -> anyhow::Result<()> {
    .await
 }

+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn request_user_input_rejected_in_code_mode() -> anyhow::Result<()> {
+    assert_request_user_input_rejected("Code", |model| CollaborationMode {
+        mode: ModeKind::Code,
+        settings: Settings {
+            model,
+            reasoning_effort: None,
+            developer_instructions: None,
+        },
+    })
+    .await
+}
+
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn request_user_input_rejected_in_custom_mode() -> anyhow::Result<()> {
    assert_request_user_input_rejected("Custom", |model| CollaborationMode {
--- a/codex-rs/core/tests/suite/review.rs
+++ b/codex-rs/core/tests/suite/review.rs
@@ -1,11 +1,7 @@
-use codex_core::CodexAuth;
 use codex_core::CodexThread;
 use codex_core::ContentItem;
-use codex_core::ModelProviderInfo;
 use codex_core::REVIEW_PROMPT;
 use codex_core::ResponseItem;
-use codex_core::ThreadManager;
-use codex_core::built_in_model_providers;
 use codex_core::config::Config;
 use codex_core::protocol::ENVIRONMENT_CONTEXT_OPEN_TAG;
 use codex_core::protocol::EventMsg;
@@ -21,11 +17,11 @@ use codex_core::protocol::RolloutItem;
 use codex_core::protocol::RolloutLine;
 use codex_core::review_format::render_review_output_text;
 use codex_protocol::user_input::UserInput;
-use core_test_support::load_default_config_for_test;
 use core_test_support::load_sse_fixture_with_id_from_str;
 use core_test_support::responses::ResponseMock;
 use core_test_support::responses::mount_sse_sequence;
 use core_test_support::skip_if_no_network;
+use core_test_support::test_codex::test_codex;
 use core_test_support::wait_for_event;
 use pretty_assertions::assert_eq;
 use std::path::PathBuf;
@@ -73,8 +69,8 @@ async fn review_op_emits_lifecycle_and_review_output() {
    let review_json_escaped = serde_json::to_string(&review_json).unwrap();
    let sse_raw = sse_template.replace("__REVIEW__", &review_json_escaped);
    let (server, _request_log) = start_responses_server_with_sse(&sse_raw, 1).await;
-    let codex_home = TempDir::new().unwrap();
-    let codex = new_conversation_for_server(&server, &codex_home, |_| {}).await;
+    let codex_home = Arc::new(TempDir::new().unwrap());
+    let codex = new_conversation_for_server(&server, codex_home.clone(), |_| {}).await;

    // Submit review request.
    codex
@@ -174,6 +170,7 @@ async fn review_op_emits_lifecycle_and_review_output() {
        "assistant review output contains user_action markup"
    );

+    let _codex_home_guard = codex_home;
    server.verify().await;
 }

@@ -194,8 +191,8 @@ async fn review_op_with_plain_text_emits_review_fallback() {
        {"type":"response.completed", "response": {"id": "__ID__"}}
    ]"#;
    let (server, _request_log) = start_responses_server_with_sse(sse_raw, 1).await;
-    let codex_home = TempDir::new().unwrap();
-    let codex = new_conversation_for_server(&server, &codex_home, |_| {}).await;
+    let codex_home = Arc::new(TempDir::new().unwrap());
+    let codex = new_conversation_for_server(&server, codex_home.clone(), |_| {}).await;

    codex
        .submit(Op::Review {
@@ -226,6 +223,7 @@ async fn review_op_with_plain_text_emits_review_fallback() {
    assert_eq!(expected, review);
    let _complete = wait_for_event(&codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;

+    let _codex_home_guard = codex_home;
    server.verify().await;
 }

@@ -254,8 +252,8 @@ async fn review_filters_agent_message_related_events() {
        {"type":"response.completed", "response": {"id": "__ID__"}}
    ]"#;
    let (server, _request_log) = start_responses_server_with_sse(sse_raw, 1).await;
-    let codex_home = TempDir::new().unwrap();
-    let codex = new_conversation_for_server(&server, &codex_home, |_| {}).await;
+    let codex_home = Arc::new(TempDir::new().unwrap());
+    let codex = new_conversation_for_server(&server, codex_home.clone(), |_| {}).await;

    codex
        .submit(Op::Review {
@@ -295,6 +293,7 @@ async fn review_filters_agent_message_related_events() {
    .await;
    assert!(saw_entered && saw_exited, "missing review lifecycle events");

+    let _codex_home_guard = codex_home;
    server.verify().await;
 }

@@ -335,8 +334,8 @@ async fn review_does_not_emit_agent_message_on_structured_output() {
    let review_json_escaped = serde_json::to_string(&review_json).unwrap();
    let sse_raw = sse_template.replace("__REVIEW__", &review_json_escaped);
    let (server, _request_log) = start_responses_server_with_sse(&sse_raw, 1).await;
-    let codex_home = TempDir::new().unwrap();
-    let codex = new_conversation_for_server(&server, &codex_home, |_| {}).await;
+    let codex_home = Arc::new(TempDir::new().unwrap());
+    let codex = new_conversation_for_server(&server, codex_home.clone(), |_| {}).await;

    codex
        .submit(Op::Review {
@@ -375,6 +374,7 @@ async fn review_does_not_emit_agent_message_on_structured_output() {
    assert_eq!(1, agent_messages, "expected exactly one AgentMessage event");
    assert!(saw_entered && saw_exited, "missing review lifecycle events");

+    let _codex_home_guard = codex_home;
    server.verify().await;
 }

@@ -389,9 +389,9 @@ async fn review_uses_custom_review_model_from_config() {
        {"type":"response.completed", "response": {"id": "__ID__"}}
    ]"#;
    let (server, request_log) = start_responses_server_with_sse(sse_raw, 1).await;
-    let codex_home = TempDir::new().unwrap();
+    let codex_home = Arc::new(TempDir::new().unwrap());
    // Choose a review model different from the main model; ensure it is used.
-    let codex = new_conversation_for_server(&server, &codex_home, |cfg| {
+    let codex = new_conversation_for_server(&server, codex_home.clone(), |cfg| {
        cfg.model = Some("gpt-4.1".to_string());
        cfg.review_model = Some("gpt-5.1".to_string());
    })
@@ -428,6 +428,7 @@ async fn review_uses_custom_review_model_from_config() {
    let body = request.body_json();
    assert_eq!(body["model"].as_str().unwrap(), "gpt-5.1");

+    let _codex_home_guard = codex_home;
    server.verify().await;
 }

@@ -442,8 +443,8 @@ async fn review_uses_session_model_when_review_model_unset() {
        {"type":"response.completed", "response": {"id": "__ID__"}}
    ]"#;
    let (server, request_log) = start_responses_server_with_sse(sse_raw, 1).await;
-    let codex_home = TempDir::new().unwrap();
-    let codex = new_conversation_for_server(&server, &codex_home, |cfg| {
+    let codex_home = Arc::new(TempDir::new().unwrap());
+    let codex = new_conversation_for_server(&server, codex_home.clone(), |cfg| {
        cfg.model = Some("gpt-4.1".to_string());
        cfg.review_model = None;
    })
@@ -478,6 +479,7 @@ async fn review_uses_session_model_when_review_model_unset() {
    let body = request.body_json();
    assert_eq!(body["model"].as_str().unwrap(), "gpt-4.1");

+    let _codex_home_guard = codex_home;
    server.verify().await;
 }

@@ -497,12 +499,7 @@ async fn review_input_isolated_from_parent_history() {
    let (server, request_log) = start_responses_server_with_sse(sse_raw, 1).await;

    // Seed a parent session history via resume file with both user + assistant items.
-    let codex_home = TempDir::new().unwrap();
-    let mut config = load_default_config_for_test(&codex_home).await;
-    config.model_provider = ModelProviderInfo {
-        base_url: Some(format!("{}/v1", server.uri())),
-        ..built_in_model_providers()["openai"].clone()
-    };
+    let codex_home = Arc::new(TempDir::new().unwrap());

    let session_file = codex_home.path().join("resume.jsonl");
    {
@@ -564,7 +561,8 @@ async fn review_input_isolated_from_parent_history() {
            .unwrap();
    }
    let codex =
-        resume_conversation_for_server(&server, &codex_home, session_file.clone(), |_| {}).await;
+        resume_conversation_for_server(&server, codex_home.clone(), session_file.clone(), |_| {})
+            .await;

    // Submit review request; it must start fresh (no parent history in `input`).
    let review_prompt = "Please review only this".to_string();
@@ -657,6 +655,7 @@ async fn review_input_isolated_from_parent_history() {
        "expected user interruption message in rollout"
    );

+    let _codex_home_guard = codex_home;
    server.verify().await;
 }

@@ -675,8 +674,8 @@ async fn review_history_surfaces_in_parent_session() {
        {"type":"response.completed", "response": {"id": "__ID__"}}
    ]"#;
    let (server, request_log) = start_responses_server_with_sse(sse_raw, 2).await;
-    let codex_home = TempDir::new().unwrap();
-    let codex = new_conversation_for_server(&server, &codex_home, |_| {}).await;
+    let codex_home = Arc::new(TempDir::new().unwrap());
+    let codex = new_conversation_for_server(&server, codex_home.clone(), |_| {}).await;

    // 1) Run a review turn that produces an assistant message (isolated in child).
    codex
@@ -755,6 +754,7 @@ async fn review_history_surfaces_in_parent_session() {
        "review assistant output missing from parent turn input"
    );

+    let _codex_home_guard = codex_home;
    server.verify().await;
 }

@@ -807,9 +807,10 @@ async fn review_uses_overridden_cwd_for_base_branch_merge_base() {
        .trim()
        .to_string();

-    let codex_home = TempDir::new().unwrap();
-    let codex = new_conversation_for_server(&server, &codex_home, |config| {
-        config.cwd = initial_cwd.path().to_path_buf();
+    let codex_home = Arc::new(TempDir::new().unwrap());
+    let initial_cwd_path = initial_cwd.path().to_path_buf();
+    let codex = new_conversation_for_server(&server, codex_home.clone(), move |config| {
+        config.cwd = initial_cwd_path;
    })
    .await;

@@ -818,6 +819,7 @@ async fn review_uses_overridden_cwd_for_base_branch_merge_base() {
            cwd: Some(repo_path.to_path_buf()),
            approval_policy: None,
            sandbox_policy: None,
+            windows_sandbox_level: None,
            model: None,
            effort: None,
            summary: None,
@@ -859,6 +861,7 @@ async fn review_uses_overridden_cwd_for_base_branch_merge_base() {
        "expected review prompt to include merge-base sha {head_sha}"
    );

+    let _codex_home_guard = codex_home;
    server.verify().await;
 }

@@ -878,57 +881,47 @@ async fn start_responses_server_with_sse(
 #[expect(clippy::expect_used)]
 async fn new_conversation_for_server<F>(
    server: &MockServer,
-    codex_home: &TempDir,
+    codex_home: Arc<TempDir>,
    mutator: F,
 ) -> Arc<CodexThread>
 where
-    F: FnOnce(&mut Config),
+    F: FnOnce(&mut Config) + Send + 'static,
 {
-    let model_provider = ModelProviderInfo {
-        base_url: Some(format!("{}/v1", server.uri())),
-        ..built_in_model_providers()["openai"].clone()
-    };
-    let mut config = load_default_config_for_test(codex_home).await;
-    config.model_provider = model_provider;
-    mutator(&mut config);
-    let thread_manager = ThreadManager::with_models_provider(
-        CodexAuth::from_api_key("Test API Key"),
-        config.model_provider.clone(),
-    );
-    thread_manager
-        .start_thread(config)
+    let base_url = format!("{}/v1", server.uri());
+    let mut builder = test_codex()
+        .with_home(codex_home)
+        .with_config(move |config| {
+            config.model_provider.base_url = Some(base_url.clone());
+            mutator(config);
+        });
+    builder
+        .build(server)
        .await
        .expect("create conversation")
-        .thread
+        .codex
 }

 /// Create a conversation resuming from a rollout file, configured to talk to the provided mock server.
 #[expect(clippy::expect_used)]
 async fn resume_conversation_for_server<F>(
    server: &MockServer,
-    codex_home: &TempDir,
+    codex_home: Arc<TempDir>,
    resume_path: std::path::PathBuf,
    mutator: F,
 ) -> Arc<CodexThread>
 where
-    F: FnOnce(&mut Config),
+    F: FnOnce(&mut Config) + Send + 'static,
 {
-    let model_provider = ModelProviderInfo {
-        base_url: Some(format!("{}/v1", server.uri())),
-        ..built_in_model_providers()["openai"].clone()
-    };
-    let mut config = load_default_config_for_test(codex_home).await;
-    config.model_provider = model_provider;
-    mutator(&mut config);
-    let thread_manager = ThreadManager::with_models_provider(
-        CodexAuth::from_api_key("Test API Key"),
-        config.model_provider.clone(),
-    );
-    let auth_manager =
-        codex_core::AuthManager::from_auth_for_testing(CodexAuth::from_api_key("Test API Key"));
-    thread_manager
-        .resume_thread_from_rollout(config, resume_path, auth_manager)
+    let base_url = format!("{}/v1", server.uri());
+    let mut builder = test_codex()
+        .with_home(codex_home.clone())
+        .with_config(move |config| {
+            config.model_provider.base_url = Some(base_url.clone());
+            mutator(config);
+        });
+    builder
+        .resume(server, codex_home, resume_path)
        .await
        .expect("resume conversation")
-        .thread
+        .codex
 }
--- a/codex-rs/core/tests/suite/unstable_features_warning.rs
+++ b/codex-rs/core/tests/suite/unstable_features_warning.rs
@@ -0,0 +1,90 @@
+#![allow(clippy::unwrap_used, clippy::expect_used)]
+
+use codex_core::AuthManager;
+use codex_core::CodexAuth;
+use codex_core::NewThread;
+use codex_core::ThreadManager;
+use codex_core::config::CONFIG_TOML_FILE;
+use codex_core::features::Feature;
+use codex_core::protocol::EventMsg;
+use codex_core::protocol::InitialHistory;
+use codex_core::protocol::WarningEvent;
+use codex_utils_absolute_path::AbsolutePathBuf;
+use core::time::Duration;
+use core_test_support::load_default_config_for_test;
+use core_test_support::wait_for_event;
+use tempfile::TempDir;
+use tokio::time::timeout;
+use toml::toml;
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn emits_warning_when_unstable_features_enabled_via_config() {
+    let home = TempDir::new().expect("tempdir");
+    let mut config = load_default_config_for_test(&home).await;
+    config.features.enable(Feature::ChildAgentsMd);
+    let user_config_path =
+        AbsolutePathBuf::from_absolute_path(config.codex_home.join(CONFIG_TOML_FILE))
+            .expect("absolute user config path");
+    config.config_layer_stack = config.config_layer_stack.with_user_config(
+        &user_config_path,
+        toml! { features = { child_agents_md = true } }.into(),
+    );
+
+    let thread_manager = ThreadManager::with_models_provider(
+        CodexAuth::from_api_key("test"),
+        config.model_provider.clone(),
+    );
+    let auth_manager = AuthManager::from_auth_for_testing(CodexAuth::from_api_key("test"));
+
+    let NewThread {
+        thread: conversation,
+        ..
+    } = thread_manager
+        .resume_thread_with_history(config, InitialHistory::New, auth_manager)
+        .await
+        .expect("spawn conversation");
+
+    let warning = wait_for_event(&conversation, |ev| matches!(ev, EventMsg::Warning(_))).await;
+    let EventMsg::Warning(WarningEvent { message }) = warning else {
+        panic!("expected warning event");
+    };
+    assert!(message.contains("child_agents_md"));
+    assert!(message.contains("Under-development features enabled"));
+    assert!(message.contains("suppress_unstable_features_warning = true"));
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn suppresses_warning_when_configured() {
+    let home = TempDir::new().expect("tempdir");
+    let mut config = load_default_config_for_test(&home).await;
+    config.features.enable(Feature::ChildAgentsMd);
+    config.suppress_unstable_features_warning = true;
+    let user_config_path =
+        AbsolutePathBuf::from_absolute_path(config.codex_home.join(CONFIG_TOML_FILE))
+            .expect("absolute user config path");
+    config.config_layer_stack = config.config_layer_stack.with_user_config(
+        &user_config_path,
+        toml! { features = { child_agents_md = true } }.into(),
+    );
+
+    let thread_manager = ThreadManager::with_models_provider(
+        CodexAuth::from_api_key("test"),
+        config.model_provider.clone(),
+    );
+    let auth_manager = AuthManager::from_auth_for_testing(CodexAuth::from_api_key("test"));
+
+    let NewThread {
+        thread: conversation,
+        ..
+    } = thread_manager
+        .resume_thread_with_history(config, InitialHistory::New, auth_manager)
+        .await
+        .expect("spawn conversation");
+
+    let warning = timeout(
+        Duration::from_millis(150),
+        wait_for_event(&conversation, |ev| matches!(ev, EventMsg::Warning(_))),
+    )
+    .await;
+    assert!(warning.is_err());
+}
--- a/codex-rs/core/tests/suite/user_shell_cmd.rs
+++ b/codex-rs/core/tests/suite/user_shell_cmd.rs
@@ -1,6 +1,4 @@
 use anyhow::Context;
-use codex_core::NewThread;
-use codex_core::ThreadManager;
 use codex_core::features::Feature;
 use codex_core::protocol::EventMsg;
 use codex_core::protocol::ExecCommandEndEvent;
@@ -10,7 +8,6 @@ use codex_core::protocol::Op;
 use codex_core::protocol::SandboxPolicy;
 use codex_core::protocol::TurnAbortReason;
 use core_test_support::assert_regex_match;
-use core_test_support::load_default_config_for_test;
 use core_test_support::responses;
 use core_test_support::responses::ev_assistant_message;
 use core_test_support::responses::ev_completed;
@@ -38,19 +35,17 @@ async fn user_shell_cmd_ls_and_cat_in_temp_dir() {
        .await
        .expect("write temp file");

-    // Load config and pin cwd to the temp dir so ls/cat operate there.
-    let codex_home = TempDir::new().unwrap();
-    let mut config = load_default_config_for_test(&codex_home).await;
-    config.cwd = cwd.path().to_path_buf();
-
-    let thread_manager = ThreadManager::with_models_provider(
-        codex_core::CodexAuth::from_api_key("dummy"),
-        config.model_provider.clone(),
-    );
-    let NewThread { thread: codex, .. } = thread_manager
-        .start_thread(config)
+    // Pin cwd to the temp dir so ls/cat operate there.
+    let server = start_mock_server().await;
+    let cwd_path = cwd.path().to_path_buf();
+    let mut builder = test_codex().with_config(move |config| {
+        config.cwd = cwd_path;
+    });
+    let codex = builder
+        .build(&server)
        .await
-        .expect("create new conversation");
+        .expect("create new conversation")
+        .codex;

    // 1) shell command should list the file
    let list_cmd = "ls".to_string();
@@ -97,16 +92,13 @@ async fn user_shell_cmd_ls_and_cat_in_temp_dir() {
 #[tokio::test]
 async fn user_shell_cmd_can_be_interrupted() {
    // Set up isolated config and conversation.
-    let codex_home = TempDir::new().unwrap();
-    let config = load_default_config_for_test(&codex_home).await;
-    let thread_manager = ThreadManager::with_models_provider(
-        codex_core::CodexAuth::from_api_key("dummy"),
-        config.model_provider.clone(),
-    );
-    let NewThread { thread: codex, .. } = thread_manager
-        .start_thread(config)
+    let server = start_mock_server().await;
+    let mut builder = test_codex();
+    let codex = builder
+        .build(&server)
        .await
-        .expect("create new conversation");
+        .expect("create new conversation")
+        .codex;

    // Start a long-running command and then interrupt it.
    let sleep_cmd = "sleep 5".to_string();
--- a/codex-rs/core/tests/suite/web_search_cached.rs
+++ b/codex-rs/core/tests/suite/web_search_cached.rs
@@ -35,7 +35,7 @@ async fn web_search_mode_cached_sets_external_web_access_false_in_request_body()
    let mut builder = test_codex()
        .with_model("gpt-5-codex")
        .with_config(|config| {
-            config.web_search_mode = Some(WebSearchMode::Cached);
+            config.web_search_mode = WebSearchMode::Cached;
        });
    let test = builder
        .build(&server)
@@ -67,7 +67,7 @@ async fn web_search_mode_takes_precedence_over_legacy_flags_in_request_body() {
        .with_model("gpt-5-codex")
        .with_config(|config| {
            config.features.enable(Feature::WebSearchRequest);
-            config.web_search_mode = Some(WebSearchMode::Cached);
+            config.web_search_mode = WebSearchMode::Cached;
        });
    let test = builder
        .build(&server)
--- a/codex-rs/exec-server/src/posix/escalate_server.rs
+++ b/codex-rs/exec-server/src/posix/escalate_server.rs
@@ -10,6 +10,7 @@ use path_absolutize::Absolutize as _;

 use codex_core::SandboxState;
 use codex_core::exec::process_exec_tool_call;
+use codex_core::protocol_config_types::WindowsSandboxLevel;
 use codex_core::sandboxing::SandboxPermissions;
 use tokio::process::Command;
 use tokio_util::sync::CancellationToken;
@@ -87,6 +88,7 @@ impl EscalateServer {
                expiration: ExecExpiration::Cancellation(cancel_rx),
                env,
                sandbox_permissions: SandboxPermissions::UseDefault,
+                windows_sandbox_level: WindowsSandboxLevel::Disabled,
                justification: None,
                arg0: None,
            },
--- a/codex-rs/exec/src/cli.rs
+++ b/codex-rs/exec/src/cli.rs
@@ -1,3 +1,5 @@
+use clap::Args;
+use clap::FromArgMatches;
 use clap::Parser;
 use clap::ValueEnum;
 use codex_common::CliConfigOverrides;
@@ -108,20 +110,22 @@ pub enum Command {
    Review(ReviewArgs),
 }

-#[derive(Parser, Debug)]
-pub struct ResumeArgs {
+#[derive(Args, Debug)]
+struct ResumeArgsRaw {
+    // Note: This is the direct clap shape. We reinterpret the positional when --last is set
+    // so "codex resume --last <prompt>" treats the positional as a prompt, not a session id.
    /// Conversation/session id (UUID). When provided, resumes this session.
    /// If omitted, use --last to pick the most recent recorded session.
    #[arg(value_name = "SESSION_ID")]
-    pub session_id: Option<String>,
+    session_id: Option<String>,

    /// Resume the most recent recorded session (newest) without specifying an id.
    #[arg(long = "last", default_value_t = false)]
-    pub last: bool,
+    last: bool,

    /// Show all sessions (disables cwd filtering).
    #[arg(long = "all", default_value_t = false)]
-    pub all: bool,
+    all: bool,

    /// Optional image(s) to attach to the prompt sent after resuming.
    #[arg(
@@ -131,13 +135,72 @@ pub struct ResumeArgs {
        value_delimiter = ',',
        num_args = 1
    )]
-    pub images: Vec<PathBuf>,
+    images: Vec<PathBuf>,

    /// Prompt to send after resuming the session. If `-` is used, read from stdin.
    #[arg(value_name = "PROMPT", value_hint = clap::ValueHint::Other)]
+    prompt: Option<String>,
+}
+
+#[derive(Debug)]
+pub struct ResumeArgs {
+    /// Conversation/session id (UUID). When provided, resumes this session.
+    /// If omitted, use --last to pick the most recent recorded session.
+    pub session_id: Option<String>,
+
+    /// Resume the most recent recorded session (newest) without specifying an id.
+    pub last: bool,
+
+    /// Show all sessions (disables cwd filtering).
+    pub all: bool,
+
+    /// Optional image(s) to attach to the prompt sent after resuming.
+    pub images: Vec<PathBuf>,
+
+    /// Prompt to send after resuming the session. If `-` is used, read from stdin.
    pub prompt: Option<String>,
 }

+impl From<ResumeArgsRaw> for ResumeArgs {
+    fn from(raw: ResumeArgsRaw) -> Self {
+        // When --last is used without an explicit prompt, treat the positional as the prompt
+        // (clap can’t express this conditional positional meaning cleanly).
+        let (session_id, prompt) = if raw.last && raw.prompt.is_none() {
+            (None, raw.session_id)
+        } else {
+            (raw.session_id, raw.prompt)
+        };
+        Self {
+            session_id,
+            last: raw.last,
+            all: raw.all,
+            images: raw.images,
+            prompt,
+        }
+    }
+}
+
+impl Args for ResumeArgs {
+    fn augment_args(cmd: clap::Command) -> clap::Command {
+        ResumeArgsRaw::augment_args(cmd)
+    }
+
+    fn augment_args_for_update(cmd: clap::Command) -> clap::Command {
+        ResumeArgsRaw::augment_args_for_update(cmd)
+    }
+}
+
+impl FromArgMatches for ResumeArgs {
+    fn from_arg_matches(matches: &clap::ArgMatches) -> Result<Self, clap::Error> {
+        ResumeArgsRaw::from_arg_matches(matches).map(Self::from)
+    }
+
+    fn update_from_arg_matches(&mut self, matches: &clap::ArgMatches) -> Result<(), clap::Error> {
+        *self = ResumeArgsRaw::from_arg_matches(matches).map(Self::from)?;
+        Ok(())
+    }
+}
+
 #[derive(Parser, Debug)]
 pub struct ReviewArgs {
    /// Review staged, unstaged, and untracked changes.
--- a/codex-rs/exec/src/event_processor_with_human_output.rs
+++ b/codex-rs/exec/src/event_processor_with_human_output.rs
@@ -32,6 +32,7 @@ use codex_core::protocol::TurnCompleteEvent;
 use codex_core::protocol::TurnDiffEvent;
 use codex_core::protocol::WarningEvent;
 use codex_core::protocol::WebSearchEndEvent;
+use codex_core::web_search::web_search_detail;
 use codex_protocol::num_format::format_with_separators;
 use owo_colors::OwoColorize;
 use owo_colors::Style;
@@ -370,8 +371,20 @@ impl EventProcessor for EventProcessorWithHumanOutput {
                    }
                }
            }
-            EventMsg::WebSearchEnd(WebSearchEndEvent { call_id: _, query }) => {
-                ts_msg!(self, "🌐 Searched: {query}");
+            EventMsg::WebSearchBegin(_) => {
+                ts_msg!(self, "🌐 Searching the web...");
+            }
+            EventMsg::WebSearchEnd(WebSearchEndEvent {
+                call_id: _,
+                query,
+                action,
+            }) => {
+                let detail = web_search_detail(Some(&action), &query);
+                if detail.is_empty() {
+                    ts_msg!(self, "🌐 Searched the web");
+                } else {
+                    ts_msg!(self, "🌐 Searched: {detail}");
+                }
            }
            EventMsg::PatchApplyBegin(PatchApplyBeginEvent {
                call_id,
@@ -577,8 +590,11 @@ impl EventProcessor for EventProcessorWithHumanOutput {
                    ts_msg!(self, "task aborted: review ended");
                }
            },
-            EventMsg::ContextCompacted(_) => {
-                ts_msg!(self, "context compacted");
+            EventMsg::ContextCompactionStarted(_) => {
+                ts_msg!(self, "context compaction started");
+            }
+            EventMsg::ContextCompactionEnded(_) => {
+                ts_msg!(self, "context compaction ended");
            }
            EventMsg::CollabAgentSpawnBegin(CollabAgentSpawnBeginEvent {
                call_id,
@@ -737,8 +753,7 @@ impl EventProcessor for EventProcessorWithHumanOutput {
                );
            }
            EventMsg::ShutdownComplete => return CodexStatus::Shutdown,
-            EventMsg::WebSearchBegin(_)
-            | EventMsg::ExecApprovalRequest(_)
+            EventMsg::ExecApprovalRequest(_)
            | EventMsg::ApplyPatchApprovalRequest(_)
            | EventMsg::TerminalInteraction(_)
            | EventMsg::ExecCommandOutputDelta(_)
--- a/codex-rs/exec/src/event_processor_with_jsonl_output.rs
+++ b/codex-rs/exec/src/event_processor_with_jsonl_output.rs
@@ -49,6 +49,7 @@ use codex_core::protocol::CollabCloseBeginEvent;
 use codex_core::protocol::CollabCloseEndEvent;
 use codex_core::protocol::CollabWaitingBeginEvent;
 use codex_core::protocol::CollabWaitingEndEvent;
+use codex_protocol::models::WebSearchAction;
 use codex_protocol::plan_tool::StepStatus;
 use codex_protocol::plan_tool::UpdatePlanArgs;
 use serde_json::Value as JsonValue;
@@ -66,6 +67,7 @@ pub struct EventProcessorWithJsonOutput {
    last_total_token_usage: Option<codex_core::protocol::TokenUsage>,
    running_mcp_tool_calls: HashMap<String, RunningMcpToolCall>,
    running_collab_tool_calls: HashMap<String, RunningCollabToolCall>,
+    running_web_search_calls: HashMap<String, String>,
    last_critical_error: Option<ThreadErrorEvent>,
 }

@@ -107,6 +109,7 @@ impl EventProcessorWithJsonOutput {
            last_total_token_usage: None,
            running_mcp_tool_calls: HashMap::new(),
            running_collab_tool_calls: HashMap::new(),
+            running_web_search_calls: HashMap::new(),
            last_critical_error: None,
        }
    }
@@ -138,7 +141,7 @@ impl EventProcessorWithJsonOutput {
            protocol::EventMsg::CollabCloseEnd(ev) => self.handle_collab_close_end(ev),
            protocol::EventMsg::PatchApplyBegin(ev) => self.handle_patch_apply_begin(ev),
            protocol::EventMsg::PatchApplyEnd(ev) => self.handle_patch_apply_end(ev),
-            protocol::EventMsg::WebSearchBegin(_) => Vec::new(),
+            protocol::EventMsg::WebSearchBegin(ev) => self.handle_web_search_begin(ev),
            protocol::EventMsg::WebSearchEnd(ev) => self.handle_web_search_end(ev),
            protocol::EventMsg::TokenCount(ev) => {
                if let Some(info) = &ev.info {
@@ -195,11 +198,36 @@ impl EventProcessorWithJsonOutput {
        })]
    }

-    fn handle_web_search_end(&self, ev: &protocol::WebSearchEndEvent) -> Vec<ThreadEvent> {
+    fn handle_web_search_begin(&mut self, ev: &protocol::WebSearchBeginEvent) -> Vec<ThreadEvent> {
+        if self.running_web_search_calls.contains_key(&ev.call_id) {
+            return Vec::new();
+        }
+        let item_id = self.get_next_item_id();
+        self.running_web_search_calls
+            .insert(ev.call_id.clone(), item_id.clone());
        let item = ThreadItem {
-            id: self.get_next_item_id(),
+            id: item_id,
            details: ThreadItemDetails::WebSearch(WebSearchItem {
+                id: ev.call_id.clone(),
+                query: String::new(),
+                action: WebSearchAction::Other,
+            }),
+        };
+
+        vec![ThreadEvent::ItemStarted(ItemStartedEvent { item })]
+    }
+
+    fn handle_web_search_end(&mut self, ev: &protocol::WebSearchEndEvent) -> Vec<ThreadEvent> {
+        let item_id = self
+            .running_web_search_calls
+            .remove(&ev.call_id)
+            .unwrap_or_else(|| self.get_next_item_id());
+        let item = ThreadItem {
+            id: item_id,
+            details: ThreadItemDetails::WebSearch(WebSearchItem {
+                id: ev.call_id.clone(),
                query: ev.query.clone(),
+                action: ev.action.clone(),
            }),
        };

--- a/codex-rs/exec/src/exec_events.rs
+++ b/codex-rs/exec/src/exec_events.rs
@@ -1,3 +1,4 @@
+use codex_protocol::models::WebSearchAction;
 use mcp_types::ContentBlock as McpContentBlock;
 use serde::Deserialize;
 use serde::Serialize;
@@ -280,7 +281,9 @@ pub struct McpToolCallItem {
 /// A web search request.
 #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
 pub struct WebSearchItem {
+    pub id: String,
    pub query: String,
+    pub action: WebSearchAction,
 }

 /// An error notification.
--- a/codex-rs/exec/tests/event_processor_with_json_output.rs
+++ b/codex-rs/exec/tests/event_processor_with_json_output.rs
@@ -20,6 +20,7 @@ use codex_core::protocol::PatchApplyEndEvent;
 use codex_core::protocol::SandboxPolicy;
 use codex_core::protocol::SessionConfiguredEvent;
 use codex_core::protocol::WarningEvent;
+use codex_core::protocol::WebSearchBeginEvent;
 use codex_core::protocol::WebSearchEndEvent;
 use codex_exec::event_processor_with_jsonl_output::EventProcessorWithJsonOutput;
 use codex_exec::exec_events::AgentMessageItem;
@@ -54,6 +55,7 @@ use codex_exec::exec_events::TurnStartedEvent;
 use codex_exec::exec_events::Usage;
 use codex_exec::exec_events::WebSearchItem;
 use codex_protocol::ThreadId;
+use codex_protocol::models::WebSearchAction;
 use codex_protocol::plan_tool::PlanItemArg;
 use codex_protocol::plan_tool::StepStatus;
 use codex_protocol::plan_tool::UpdatePlanArgs;
@@ -124,11 +126,15 @@ fn task_started_produces_turn_started_event() {
 fn web_search_end_emits_item_completed() {
    let mut ep = EventProcessorWithJsonOutput::new(None);
    let query = "rust async await".to_string();
+    let action = WebSearchAction::Search {
+        query: Some(query.clone()),
+    };
    let out = ep.collect_thread_events(&event(
        "w1",
        EventMsg::WebSearchEnd(WebSearchEndEvent {
            call_id: "call-123".to_string(),
            query: query.clone(),
+            action: action.clone(),
        }),
    ));

@@ -137,12 +143,82 @@ fn web_search_end_emits_item_completed() {
        vec![ThreadEvent::ItemCompleted(ItemCompletedEvent {
            item: ThreadItem {
                id: "item_0".to_string(),
-                details: ThreadItemDetails::WebSearch(WebSearchItem { query }),
+                details: ThreadItemDetails::WebSearch(WebSearchItem {
+                    id: "call-123".to_string(),
+                    query,
+                    action,
+                }),
            },
        })]
    );
 }

+#[test]
+fn web_search_begin_emits_item_started() {
+    let mut ep = EventProcessorWithJsonOutput::new(None);
+    let out = ep.collect_thread_events(&event(
+        "w0",
+        EventMsg::WebSearchBegin(WebSearchBeginEvent {
+            call_id: "call-0".to_string(),
+        }),
+    ));
+
+    assert_eq!(out.len(), 1);
+    let ThreadEvent::ItemStarted(ItemStartedEvent { item }) = &out[0] else {
+        panic!("expected ItemStarted");
+    };
+    assert!(item.id.starts_with("item_"));
+    assert_eq!(
+        item.details,
+        ThreadItemDetails::WebSearch(WebSearchItem {
+            id: "call-0".to_string(),
+            query: String::new(),
+            action: WebSearchAction::Other,
+        })
+    );
+}
+
+#[test]
+fn web_search_begin_then_end_reuses_item_id() {
+    let mut ep = EventProcessorWithJsonOutput::new(None);
+    let begin = ep.collect_thread_events(&event(
+        "w0",
+        EventMsg::WebSearchBegin(WebSearchBeginEvent {
+            call_id: "call-1".to_string(),
+        }),
+    ));
+    let ThreadEvent::ItemStarted(ItemStartedEvent { item: started_item }) = &begin[0] else {
+        panic!("expected ItemStarted");
+    };
+    let action = WebSearchAction::Search {
+        query: Some("rust async await".to_string()),
+    };
+    let end = ep.collect_thread_events(&event(
+        "w1",
+        EventMsg::WebSearchEnd(WebSearchEndEvent {
+            call_id: "call-1".to_string(),
+            query: "rust async await".to_string(),
+            action: action.clone(),
+        }),
+    ));
+    let ThreadEvent::ItemCompleted(ItemCompletedEvent {
+        item: completed_item,
+    }) = &end[0]
+    else {
+        panic!("expected ItemCompleted");
+    };
+
+    assert_eq!(completed_item.id, started_item.id);
+    assert_eq!(
+        completed_item.details,
+        ThreadItemDetails::WebSearch(WebSearchItem {
+            id: "call-1".to_string(),
+            query: "rust async await".to_string(),
+            action,
+        })
+    );
+}
+
 #[test]
 fn plan_update_emits_todo_list_started_updated_and_completed() {
    let mut ep = EventProcessorWithJsonOutput::new(None);
--- a/codex-rs/linux-sandbox/tests/suite/landlock.rs
+++ b/codex-rs/linux-sandbox/tests/suite/landlock.rs
@@ -7,6 +7,7 @@ use codex_core::exec::ExecParams;
 use codex_core::exec::process_exec_tool_call;
 use codex_core::exec_env::create_env;
 use codex_core::protocol::SandboxPolicy;
+use codex_core::protocol_config_types::WindowsSandboxLevel;
 use codex_core::sandboxing::SandboxPermissions;
 use codex_utils_absolute_path::AbsolutePathBuf;
 use pretty_assertions::assert_eq;
@@ -60,6 +61,7 @@ async fn run_cmd_output(
        expiration: timeout_ms.into(),
        env: create_env_from_core_vars(),
        sandbox_permissions: SandboxPermissions::UseDefault,
+        windows_sandbox_level: WindowsSandboxLevel::Disabled,
        justification: None,
        arg0: None,
    };
@@ -177,6 +179,7 @@ async fn assert_network_blocked(cmd: &[&str]) {
        expiration: NETWORK_TIMEOUT_MS.into(),
        env: create_env_from_core_vars(),
        sandbox_permissions: SandboxPermissions::UseDefault,
+        windows_sandbox_level: WindowsSandboxLevel::Disabled,
        justification: None,
        arg0: None,
    };
--- a/codex-rs/lmstudio/Cargo.toml
+++ b/codex-rs/lmstudio/Cargo.toml
@@ -14,7 +14,7 @@ codex-core = { path = "../core" }
 reqwest = { version = "0.12", features = ["json", "stream"] }
 serde_json = "1"
 tokio = { version = "1", features = ["rt"] }
-tracing = { version = "0.1.43", features = ["log"] }
+tracing = { version = "0.1.44", features = ["log"] }
 which = "8.0"

 [dev-dependencies]
--- a/codex-rs/mcp-server/src/codex_tool_runner.rs
+++ b/codex-rs/mcp-server/src/codex_tool_runner.rs
@@ -361,7 +361,8 @@ async fn run_codex_tool_session_inner(
                    | EventMsg::ExitedReviewMode(_)
                    | EventMsg::RequestUserInput(_)
                    | EventMsg::DynamicToolCallRequest(_)
-                    | EventMsg::ContextCompacted(_)
+                    | EventMsg::ContextCompactionStarted(_)
+                    | EventMsg::ContextCompactionEnded(_)
                    | EventMsg::ThreadRolledBack(_)
                    | EventMsg::CollabAgentSpawnBegin(_)
                    | EventMsg::CollabAgentSpawnEnd(_)
--- a/codex-rs/network-proxy/Cargo.toml
+++ b/codex-rs/network-proxy/Cargo.toml
@@ -34,6 +34,7 @@ rama-core = { version = "=0.3.0-alpha.4" }
 rama-http = { version = "=0.3.0-alpha.4" }
 rama-http-backend = { version = "=0.3.0-alpha.4", features = ["tls"] }
 rama-net = { version = "=0.3.0-alpha.4", features = ["http", "tls"] }
+rama-socks5 = { version = "=0.3.0-alpha.4" }
 rama-tcp = { version = "=0.3.0-alpha.4", features = ["http"] }
 rama-tls-boring = { version = "=0.3.0-alpha.4", features = ["http"] }

--- a/codex-rs/network-proxy/README.md
+++ b/codex-rs/network-proxy/README.md
@@ -3,6 +3,7 @@
 `codex-network-proxy` is Codex's local network policy enforcement proxy. It runs:

 - an HTTP proxy (default `127.0.0.1:3128`)
+- an optional SOCKS5 proxy (default `127.0.0.1:8081`, disabled by default)
 - an admin HTTP API (default `127.0.0.1:8080`)

 It enforces an allow/deny policy and a "limited" mode intended for read-only network access.
@@ -20,6 +21,10 @@ Example config:
 enabled = true
 proxy_url = "http://127.0.0.1:3128"
 admin_url = "http://127.0.0.1:8080"
+# Optional SOCKS5 listener (disabled by default).
+enable_socks5 = false
+socks_url = "http://127.0.0.1:8081"
+enable_socks5_udp = false
 # When `enabled` is false, the proxy no-ops and does not bind listeners.
 # When true, respect HTTP(S)_PROXY/ALL_PROXY for upstream requests (HTTP(S) proxies only),
 # including CONNECT tunnels in full mode.
@@ -28,7 +33,7 @@ allow_upstream_proxy = false
 # If you want to expose these listeners beyond localhost, you must opt in explicitly.
 dangerously_allow_non_loopback_proxy = false
 dangerously_allow_non_loopback_admin = false
-mode = "limited" # or "full"
+mode = "full" # default when unset; use "limited" for read-only mode

 [network_proxy.policy]
 # Hosts must match the allowlist (unless denied).
@@ -60,6 +65,12 @@ export HTTP_PROXY="http://127.0.0.1:3128"
 export HTTPS_PROXY="http://127.0.0.1:3128"
 ```

+For SOCKS5 traffic (when `enable_socks5 = true`):
+
+```bash
+export ALL_PROXY="socks5h://127.0.0.1:8081"
+```
+
 ### 4) Understand blocks / debugging

 When a request is blocked, the proxy responds with `403` and includes:
@@ -70,8 +81,8 @@ When a request is blocked, the proxy responds with `403` and includes:
  - `blocked-by-method-policy`
  - `blocked-by-policy`

-In "limited" mode, only `GET`, `HEAD`, and `OPTIONS` are allowed for plain HTTP. HTTPS `CONNECT`
-remains a transparent tunnel, so limited-mode method enforcement does not apply to HTTPS.
+In "limited" mode, only `GET`, `HEAD`, and `OPTIONS` are allowed. HTTPS `CONNECT` and SOCKS5 are
+blocked because they would bypass method enforcement.

 ## Library API

--- a/codex-rs/network-proxy/src/config.rs
+++ b/codex-rs/network-proxy/src/config.rs
@@ -23,6 +23,12 @@ pub struct NetworkProxySettings {
    #[serde(default = "default_admin_url")]
    pub admin_url: String,
    #[serde(default)]
+    pub enable_socks5: bool,
+    #[serde(default = "default_socks_url")]
+    pub socks_url: String,
+    #[serde(default)]
+    pub enable_socks5_udp: bool,
+    #[serde(default)]
    pub allow_upstream_proxy: bool,
    #[serde(default)]
    pub dangerously_allow_non_loopback_proxy: bool,
@@ -40,6 +46,9 @@ impl Default for NetworkProxySettings {
            enabled: false,
            proxy_url: default_proxy_url(),
            admin_url: default_admin_url(),
+            enable_socks5: false,
+            socks_url: default_socks_url(),
+            enable_socks5_udp: false,
            allow_upstream_proxy: false,
            dangerously_allow_non_loopback_proxy: false,
            dangerously_allow_non_loopback_admin: false,
@@ -90,6 +99,10 @@ fn default_admin_url() -> String {
    "http://127.0.0.1:8080".to_string()
 }

+fn default_socks_url() -> String {
+    "http://127.0.0.1:8081".to_string()
+}
+
 /// Clamp non-loopback bind addresses to loopback unless explicitly allowed.
 fn clamp_non_loopback(addr: SocketAddr, allow_non_loopback: bool, name: &str) -> SocketAddr {
    if addr.ip().is_loopback() {
@@ -110,21 +123,27 @@ fn clamp_non_loopback(addr: SocketAddr, allow_non_loopback: bool, name: &str) ->

 pub(crate) fn clamp_bind_addrs(
    http_addr: SocketAddr,
+    socks_addr: SocketAddr,
    admin_addr: SocketAddr,
    cfg: &NetworkProxySettings,
-) -> (SocketAddr, SocketAddr) {
+) -> (SocketAddr, SocketAddr, SocketAddr) {
    let http_addr = clamp_non_loopback(
        http_addr,
        cfg.dangerously_allow_non_loopback_proxy,
        "HTTP proxy",
    );
+    let socks_addr = clamp_non_loopback(
+        socks_addr,
+        cfg.dangerously_allow_non_loopback_proxy,
+        "SOCKS5 proxy",
+    );
    let admin_addr = clamp_non_loopback(
        admin_addr,
        cfg.dangerously_allow_non_loopback_admin,
        "admin API",
    );
    if cfg.policy.allow_unix_sockets.is_empty() {
-        return (http_addr, admin_addr);
+        return (http_addr, socks_addr, admin_addr);
    }

    // `x-unix-socket` is intentionally a local escape hatch. If the proxy (or admin API) is
@@ -136,6 +155,11 @@ pub(crate) fn clamp_bind_addrs(
            "unix socket proxying is enabled; ignoring dangerously_allow_non_loopback_proxy and clamping HTTP proxy to loopback"
        );
    }
+    if cfg.dangerously_allow_non_loopback_proxy && !socks_addr.ip().is_loopback() {
+        warn!(
+            "unix socket proxying is enabled; ignoring dangerously_allow_non_loopback_proxy and clamping SOCKS5 proxy to loopback"
+        );
+    }
    if cfg.dangerously_allow_non_loopback_admin && !admin_addr.ip().is_loopback() {
        warn!(
            "unix socket proxying is enabled; ignoring dangerously_allow_non_loopback_admin and clamping admin API to loopback"
@@ -143,12 +167,14 @@ pub(crate) fn clamp_bind_addrs(
    }
    (
        SocketAddr::from(([127, 0, 0, 1], http_addr.port())),
+        SocketAddr::from(([127, 0, 0, 1], socks_addr.port())),
        SocketAddr::from(([127, 0, 0, 1], admin_addr.port())),
    )
 }

 pub struct RuntimeConfig {
    pub http_addr: SocketAddr,
+    pub socks_addr: SocketAddr,
    pub admin_addr: SocketAddr,
 }

@@ -159,16 +185,24 @@ pub fn resolve_runtime(cfg: &NetworkProxyConfig) -> Result<RuntimeConfig> {
            cfg.network_proxy.proxy_url
        )
    })?;
+    let socks_addr = resolve_addr(&cfg.network_proxy.socks_url, 8081).with_context(|| {
+        format!(
+            "invalid network_proxy.socks_url: {}",
+            cfg.network_proxy.socks_url
+        )
+    })?;
    let admin_addr = resolve_addr(&cfg.network_proxy.admin_url, 8080).with_context(|| {
        format!(
            "invalid network_proxy.admin_url: {}",
            cfg.network_proxy.admin_url
        )
    })?;
-    let (http_addr, admin_addr) = clamp_bind_addrs(http_addr, admin_addr, &cfg.network_proxy);
+    let (http_addr, socks_addr, admin_addr) =
+        clamp_bind_addrs(http_addr, socks_addr, admin_addr, &cfg.network_proxy);

    Ok(RuntimeConfig {
        http_addr,
+        socks_addr,
        admin_addr,
    })
 }
@@ -403,11 +437,14 @@ mod tests {
            ..Default::default()
        };
        let http_addr = "0.0.0.0:3128".parse::<SocketAddr>().unwrap();
+        let socks_addr = "0.0.0.0:8081".parse::<SocketAddr>().unwrap();
        let admin_addr = "0.0.0.0:8080".parse::<SocketAddr>().unwrap();

-        let (http_addr, admin_addr) = clamp_bind_addrs(http_addr, admin_addr, &cfg);
+        let (http_addr, socks_addr, admin_addr) =
+            clamp_bind_addrs(http_addr, socks_addr, admin_addr, &cfg);

        assert_eq!(http_addr, "0.0.0.0:3128".parse::<SocketAddr>().unwrap());
+        assert_eq!(socks_addr, "0.0.0.0:8081".parse::<SocketAddr>().unwrap());
        assert_eq!(admin_addr, "0.0.0.0:8080".parse::<SocketAddr>().unwrap());
    }

@@ -423,11 +460,14 @@ mod tests {
            ..Default::default()
        };
        let http_addr = "0.0.0.0:3128".parse::<SocketAddr>().unwrap();
+        let socks_addr = "0.0.0.0:8081".parse::<SocketAddr>().unwrap();
        let admin_addr = "0.0.0.0:8080".parse::<SocketAddr>().unwrap();

-        let (http_addr, admin_addr) = clamp_bind_addrs(http_addr, admin_addr, &cfg);
+        let (http_addr, socks_addr, admin_addr) =
+            clamp_bind_addrs(http_addr, socks_addr, admin_addr, &cfg);

        assert_eq!(http_addr, "127.0.0.1:3128".parse::<SocketAddr>().unwrap());
+        assert_eq!(socks_addr, "127.0.0.1:8081".parse::<SocketAddr>().unwrap());
        assert_eq!(admin_addr, "127.0.0.1:8080".parse::<SocketAddr>().unwrap());
    }
 }
--- a/codex-rs/network-proxy/src/http_proxy.rs
+++ b/codex-rs/network-proxy/src/http_proxy.rs
@@ -2,6 +2,7 @@ use crate::config::NetworkMode;
 use crate::network_policy::NetworkDecision;
 use crate::network_policy::NetworkPolicyDecider;
 use crate::network_policy::NetworkPolicyRequest;
+use crate::network_policy::NetworkPolicyRequestArgs;
 use crate::network_policy::NetworkProtocol;
 use crate::network_policy::evaluate_host_policy;
 use crate::policy::normalize_host;
@@ -12,6 +13,7 @@ use crate::responses::blocked_header_value;
 use crate::responses::json_response;
 use crate::runtime::unix_socket_permissions_supported;
 use crate::state::BlockedRequest;
+use crate::state::BlockedRequestArgs;
 use crate::state::NetworkProxyState;
 use crate::upstream::UpstreamClient;
 use crate::upstream::proxy_for_connect;
@@ -146,27 +148,27 @@ async fn http_connect_accept(
        .await);
    }

-    let request = NetworkPolicyRequest::new(
-        NetworkProtocol::HttpsConnect,
-        host.clone(),
-        authority.port,
-        client.clone(),
-        Some("CONNECT".to_string()),
-        None,
-        None,
-    );
+    let request = NetworkPolicyRequest::new(NetworkPolicyRequestArgs {
+        protocol: NetworkProtocol::HttpsConnect,
+        host: host.clone(),
+        port: authority.port,
+        client_addr: client.clone(),
+        method: Some("CONNECT".to_string()),
+        command: None,
+        exec_policy_hint: None,
+    });

    match evaluate_host_policy(&app_state, policy_decider.as_ref(), &request).await {
        Ok(NetworkDecision::Deny { reason }) => {
            let _ = app_state
-                .record_blocked(BlockedRequest::new(
-                    host.clone(),
-                    reason.clone(),
-                    client.clone(),
-                    Some("CONNECT".to_string()),
-                    None,
-                    "http-connect".to_string(),
-                ))
+                .record_blocked(BlockedRequest::new(BlockedRequestArgs {
+                    host: host.clone(),
+                    reason: reason.clone(),
+                    client: client.clone(),
+                    method: Some("CONNECT".to_string()),
+                    mode: None,
+                    protocol: "http-connect".to_string(),
+                }))
                .await;
            let client = client.as_deref().unwrap_or_default();
            warn!("CONNECT blocked (client={client}, host={host}, reason={reason})");
@@ -189,14 +191,14 @@ async fn http_connect_accept(

    if mode == NetworkMode::Limited {
        let _ = app_state
-            .record_blocked(BlockedRequest::new(
-                host.clone(),
-                REASON_METHOD_NOT_ALLOWED.to_string(),
-                client.clone(),
-                Some("CONNECT".to_string()),
-                Some(NetworkMode::Limited),
-                "http-connect".to_string(),
-            ))
+            .record_blocked(BlockedRequest::new(BlockedRequestArgs {
+                host: host.clone(),
+                reason: REASON_METHOD_NOT_ALLOWED.to_string(),
+                client: client.clone(),
+                method: Some("CONNECT".to_string()),
+                mode: Some(NetworkMode::Limited),
+                protocol: "http-connect".to_string(),
+            }))
            .await;
        let client = client.as_deref().unwrap_or_default();
        warn!("CONNECT blocked by method policy (client={client}, host={host}, mode=limited)");
@@ -425,27 +427,27 @@ async fn http_plain_proxy(
        .await);
    }

-    let request = NetworkPolicyRequest::new(
-        NetworkProtocol::Http,
-        host.clone(),
+    let request = NetworkPolicyRequest::new(NetworkPolicyRequestArgs {
+        protocol: NetworkProtocol::Http,
+        host: host.clone(),
        port,
-        client.clone(),
-        Some(req.method().as_str().to_string()),
-        None,
-        None,
-    );
+        client_addr: client.clone(),
+        method: Some(req.method().as_str().to_string()),
+        command: None,
+        exec_policy_hint: None,
+    });

    match evaluate_host_policy(&app_state, policy_decider.as_ref(), &request).await {
        Ok(NetworkDecision::Deny { reason }) => {
            let _ = app_state
-                .record_blocked(BlockedRequest::new(
-                    host.clone(),
-                    reason.clone(),
-                    client.clone(),
-                    Some(req.method().as_str().to_string()),
-                    None,
-                    "http".to_string(),
-                ))
+                .record_blocked(BlockedRequest::new(BlockedRequestArgs {
+                    host: host.clone(),
+                    reason: reason.clone(),
+                    client: client.clone(),
+                    method: Some(req.method().as_str().to_string()),
+                    mode: None,
+                    protocol: "http".to_string(),
+                }))
                .await;
            let client = client.as_deref().unwrap_or_default();
            warn!("request blocked (client={client}, host={host}, reason={reason})");
@@ -460,14 +462,14 @@ async fn http_plain_proxy(

    if !method_allowed {
        let _ = app_state
-            .record_blocked(BlockedRequest::new(
-                host.clone(),
-                REASON_METHOD_NOT_ALLOWED.to_string(),
-                client.clone(),
-                Some(req.method().as_str().to_string()),
-                Some(NetworkMode::Limited),
-                "http".to_string(),
-            ))
+            .record_blocked(BlockedRequest::new(BlockedRequestArgs {
+                host: host.clone(),
+                reason: REASON_METHOD_NOT_ALLOWED.to_string(),
+                client: client.clone(),
+                method: Some(req.method().as_str().to_string()),
+                mode: Some(NetworkMode::Limited),
+                protocol: "http".to_string(),
+            }))
            .await;
        let client = client.as_deref().unwrap_or_default();
        let method = req.method();
@@ -565,14 +567,14 @@ async fn proxy_disabled_response(
    protocol: &str,
 ) -> Response {
    let _ = app_state
-        .record_blocked(BlockedRequest::new(
+        .record_blocked(BlockedRequest::new(BlockedRequestArgs {
            host,
-            REASON_PROXY_DISABLED.to_string(),
+            reason: REASON_PROXY_DISABLED.to_string(),
            client,
            method,
-            None,
-            protocol.to_string(),
-        ))
+            mode: None,
+            protocol: protocol.to_string(),
+        }))
        .await;
    text_response(StatusCode::SERVICE_UNAVAILABLE, "proxy disabled")
 }
--- a/codex-rs/network-proxy/src/lib.rs
+++ b/codex-rs/network-proxy/src/lib.rs
@@ -9,6 +9,7 @@ mod proxy;
 mod reasons;
 mod responses;
 mod runtime;
+mod socks5;
 mod state;
 mod upstream;

@@ -16,6 +17,7 @@ use anyhow::Result;
 pub use network_policy::NetworkDecision;
 pub use network_policy::NetworkPolicyDecider;
 pub use network_policy::NetworkPolicyRequest;
+pub use network_policy::NetworkPolicyRequestArgs;
 pub use network_policy::NetworkProtocol;
 pub use proxy::Args;
 pub use proxy::NetworkProxy;
--- a/codex-rs/network-proxy/src/network_policy.rs
+++ b/codex-rs/network-proxy/src/network_policy.rs
@@ -26,16 +26,27 @@ pub struct NetworkPolicyRequest {
    pub exec_policy_hint: Option<String>,
 }

+pub struct NetworkPolicyRequestArgs {
+    pub protocol: NetworkProtocol,
+    pub host: String,
+    pub port: u16,
+    pub client_addr: Option<String>,
+    pub method: Option<String>,
+    pub command: Option<String>,
+    pub exec_policy_hint: Option<String>,
+}
+
 impl NetworkPolicyRequest {
-    pub fn new(
-        protocol: NetworkProtocol,
-        host: String,
-        port: u16,
-        client_addr: Option<String>,
-        method: Option<String>,
-        command: Option<String>,
-        exec_policy_hint: Option<String>,
-    ) -> Self {
+    pub fn new(args: NetworkPolicyRequestArgs) -> Self {
+        let NetworkPolicyRequestArgs {
+            protocol,
+            host,
+            port,
+            client_addr,
+            method,
+            command,
+            exec_policy_hint,
+        } = args;
        Self {
            protocol,
            host,
@@ -139,15 +150,15 @@ mod tests {
            }
        });

-        let request = NetworkPolicyRequest::new(
-            NetworkProtocol::Http,
-            "example.com".to_string(),
-            80,
-            None,
-            Some("GET".to_string()),
-            None,
-            None,
-        );
+        let request = NetworkPolicyRequest::new(NetworkPolicyRequestArgs {
+            protocol: NetworkProtocol::Http,
+            host: "example.com".to_string(),
+            port: 80,
+            client_addr: None,
+            method: Some("GET".to_string()),
+            command: None,
+            exec_policy_hint: None,
+        });

        let decision = evaluate_host_policy(&state, Some(&decider), &request)
            .await
@@ -172,15 +183,15 @@ mod tests {
            }
        });

-        let request = NetworkPolicyRequest::new(
-            NetworkProtocol::Http,
-            "blocked.com".to_string(),
-            80,
-            None,
-            Some("GET".to_string()),
-            None,
-            None,
-        );
+        let request = NetworkPolicyRequest::new(NetworkPolicyRequestArgs {
+            protocol: NetworkProtocol::Http,
+            host: "blocked.com".to_string(),
+            port: 80,
+            client_addr: None,
+            method: Some("GET".to_string()),
+            command: None,
+            exec_policy_hint: None,
+        });

        let decision = evaluate_host_policy(&state, Some(&decider), &request)
            .await
@@ -210,15 +221,15 @@ mod tests {
            }
        });

-        let request = NetworkPolicyRequest::new(
-            NetworkProtocol::Http,
-            "127.0.0.1".to_string(),
-            80,
-            None,
-            Some("GET".to_string()),
-            None,
-            None,
-        );
+        let request = NetworkPolicyRequest::new(NetworkPolicyRequestArgs {
+            protocol: NetworkProtocol::Http,
+            host: "127.0.0.1".to_string(),
+            port: 80,
+            client_addr: None,
+            method: Some("GET".to_string()),
+            command: None,
+            exec_policy_hint: None,
+        });

        let decision = evaluate_host_policy(&state, Some(&decider), &request)
            .await
--- a/codex-rs/network-proxy/src/proxy.rs
+++ b/codex-rs/network-proxy/src/proxy.rs
@@ -3,6 +3,7 @@ use crate::config;
 use crate::http_proxy;
 use crate::network_policy::NetworkPolicyDecider;
 use crate::runtime::unix_socket_permissions_supported;
+use crate::socks5;
 use crate::state::NetworkProxyState;
 use anyhow::Context;
 use anyhow::Result;
@@ -61,8 +62,9 @@ impl NetworkProxyBuilder {
        let current_cfg = state.current_cfg().await?;
        let runtime = config::resolve_runtime(&current_cfg)?;
        // Reapply bind clamping for caller overrides so unix-socket proxying stays loopback-only.
-        let (http_addr, admin_addr) = config::clamp_bind_addrs(
+        let (http_addr, socks_addr, admin_addr) = config::clamp_bind_addrs(
            self.http_addr.unwrap_or(runtime.http_addr),
+            runtime.socks_addr,
            self.admin_addr.unwrap_or(runtime.admin_addr),
            &current_cfg.network_proxy,
        );
@@ -70,6 +72,7 @@ impl NetworkProxyBuilder {
        Ok(NetworkProxy {
            state,
            http_addr,
+            socks_addr,
            admin_addr,
            policy_decider: self.policy_decider,
        })
@@ -80,6 +83,7 @@ impl NetworkProxyBuilder {
 pub struct NetworkProxy {
    state: Arc<NetworkProxyState>,
    http_addr: SocketAddr,
+    socks_addr: SocketAddr,
    admin_addr: SocketAddr,
    policy_decider: Option<Arc<dyn NetworkPolicyDecider>>,
 }
@@ -105,10 +109,21 @@ impl NetworkProxy {
            self.http_addr,
            self.policy_decider.clone(),
        ));
+        let socks_task = if current_cfg.network_proxy.enable_socks5 {
+            Some(tokio::spawn(socks5::run_socks5(
+                self.state.clone(),
+                self.socks_addr,
+                self.policy_decider.clone(),
+                current_cfg.network_proxy.enable_socks5_udp,
+            )))
+        } else {
+            None
+        };
        let admin_task = tokio::spawn(admin::run_admin_api(self.state.clone(), self.admin_addr));

        Ok(NetworkProxyHandle {
            http_task: Some(http_task),
+            socks_task,
            admin_task: Some(admin_task),
            completed: false,
        })
@@ -117,6 +132,7 @@ impl NetworkProxy {

 pub struct NetworkProxyHandle {
    http_task: Option<JoinHandle<Result<()>>>,
+    socks_task: Option<JoinHandle<Result<()>>>,
    admin_task: Option<JoinHandle<Result<()>>>,
    completed: bool,
 }
@@ -125,6 +141,7 @@ impl NetworkProxyHandle {
    fn noop() -> Self {
        Self {
            http_task: Some(tokio::spawn(async { Ok(()) })),
+            socks_task: None,
            admin_task: Some(tokio::spawn(async { Ok(()) })),
            completed: true,
        }
@@ -133,33 +150,49 @@ impl NetworkProxyHandle {
    pub async fn wait(mut self) -> Result<()> {
        let http_task = self.http_task.take().context("missing http proxy task")?;
        let admin_task = self.admin_task.take().context("missing admin proxy task")?;
+        let socks_task = self.socks_task.take();
        let http_result = http_task.await;
        let admin_result = admin_task.await;
+        let socks_result = match socks_task {
+            Some(task) => Some(task.await),
+            None => None,
+        };
        self.completed = true;
        http_result??;
        admin_result??;
+        if let Some(socks_result) = socks_result {
+            socks_result??;
+        }
        Ok(())
    }

    pub async fn shutdown(mut self) -> Result<()> {
-        abort_tasks(self.http_task.take(), self.admin_task.take()).await;
+        abort_tasks(
+            self.http_task.take(),
+            self.socks_task.take(),
+            self.admin_task.take(),
+        )
+        .await;
        self.completed = true;
        Ok(())
    }
 }

+async fn abort_task(task: Option<JoinHandle<Result<()>>>) {
+    if let Some(task) = task {
+        task.abort();
+        let _ = task.await;
+    }
+}
+
 async fn abort_tasks(
    http_task: Option<JoinHandle<Result<()>>>,
+    socks_task: Option<JoinHandle<Result<()>>>,
    admin_task: Option<JoinHandle<Result<()>>>,
 ) {
-    if let Some(http_task) = http_task {
-        http_task.abort();
-        let _ = http_task.await;
-    }
-    if let Some(admin_task) = admin_task {
-        admin_task.abort();
-        let _ = admin_task.await;
-    }
+    abort_task(http_task).await;
+    abort_task(socks_task).await;
+    abort_task(admin_task).await;
 }

 impl Drop for NetworkProxyHandle {
@@ -168,9 +201,10 @@ impl Drop for NetworkProxyHandle {
            return;
        }
        let http_task = self.http_task.take();
+        let socks_task = self.socks_task.take();
        let admin_task = self.admin_task.take();
        tokio::spawn(async move {
-            abort_tasks(http_task, admin_task).await;
+            abort_tasks(http_task, socks_task, admin_task).await;
        });
    }
 }
--- a/codex-rs/network-proxy/src/runtime.rs
+++ b/codex-rs/network-proxy/src/runtime.rs
@@ -73,15 +73,25 @@ pub struct BlockedRequest {
    pub timestamp: i64,
 }

+pub struct BlockedRequestArgs {
+    pub host: String,
+    pub reason: String,
+    pub client: Option<String>,
+    pub method: Option<String>,
+    pub mode: Option<NetworkMode>,
+    pub protocol: String,
+}
+
 impl BlockedRequest {
-    pub fn new(
-        host: String,
-        reason: String,
-        client: Option<String>,
-        method: Option<String>,
-        mode: Option<NetworkMode>,
-        protocol: String,
-    ) -> Self {
+    pub fn new(args: BlockedRequestArgs) -> Self {
+        let BlockedRequestArgs {
+            host,
+            reason,
+            client,
+            method,
+            mode,
+            protocol,
+        } = args;
        Self {
            host,
            reason,
--- a/codex-rs/network-proxy/src/socks5.rs
+++ b/codex-rs/network-proxy/src/socks5.rs
@@ -0,0 +1,320 @@
+use crate::config::NetworkMode;
+use crate::network_policy::NetworkDecision;
+use crate::network_policy::NetworkPolicyDecider;
+use crate::network_policy::NetworkPolicyRequest;
+use crate::network_policy::NetworkPolicyRequestArgs;
+use crate::network_policy::NetworkProtocol;
+use crate::network_policy::evaluate_host_policy;
+use crate::policy::normalize_host;
+use crate::reasons::REASON_METHOD_NOT_ALLOWED;
+use crate::reasons::REASON_PROXY_DISABLED;
+use crate::state::BlockedRequest;
+use crate::state::BlockedRequestArgs;
+use crate::state::NetworkProxyState;
+use anyhow::Context as _;
+use anyhow::Result;
+use rama_core::Layer;
+use rama_core::Service;
+use rama_core::error::BoxError;
+use rama_core::extensions::ExtensionsRef;
+use rama_core::layer::AddInputExtensionLayer;
+use rama_core::service::service_fn;
+use rama_net::client::EstablishedClientConnection;
+use rama_net::stream::SocketInfo;
+use rama_socks5::Socks5Acceptor;
+use rama_socks5::server::DefaultConnector;
+use rama_socks5::server::DefaultUdpRelay;
+use rama_socks5::server::udp::RelayRequest;
+use rama_socks5::server::udp::RelayResponse;
+use rama_tcp::TcpStream;
+use rama_tcp::client::Request as TcpRequest;
+use rama_tcp::client::service::TcpConnector;
+use rama_tcp::server::TcpListener;
+use std::io;
+use std::net::SocketAddr;
+use std::sync::Arc;
+use tracing::error;
+use tracing::info;
+use tracing::warn;
+
+pub async fn run_socks5(
+    state: Arc<NetworkProxyState>,
+    addr: SocketAddr,
+    policy_decider: Option<Arc<dyn NetworkPolicyDecider>>,
+    enable_socks5_udp: bool,
+) -> Result<()> {
+    let listener = TcpListener::build()
+        .bind(addr)
+        .await
+        // See `http_proxy.rs` for details on why we wrap `BoxError` before converting to anyhow.
+        .map_err(rama_core::error::OpaqueError::from)
+        .map_err(anyhow::Error::from)
+        .with_context(|| format!("bind SOCKS5 proxy: {addr}"))?;
+
+    info!("SOCKS5 proxy listening on {addr}");
+
+    match state.network_mode().await {
+        Ok(NetworkMode::Limited) => {
+            info!("SOCKS5 is blocked in limited mode; set mode=\"full\" to allow SOCKS5");
+        }
+        Ok(NetworkMode::Full) => {}
+        Err(err) => {
+            warn!("failed to read network mode: {err}");
+        }
+    }
+
+    let tcp_connector = TcpConnector::default();
+    let policy_tcp_connector = service_fn({
+        let policy_decider = policy_decider.clone();
+        move |req: TcpRequest| {
+            let tcp_connector = tcp_connector.clone();
+            let policy_decider = policy_decider.clone();
+            async move { handle_socks5_tcp(req, tcp_connector, policy_decider).await }
+        }
+    });
+
+    let socks_connector = DefaultConnector::default().with_connector(policy_tcp_connector);
+    let base = Socks5Acceptor::new().with_connector(socks_connector);
+
+    if enable_socks5_udp {
+        let udp_state = state.clone();
+        let udp_decider = policy_decider.clone();
+        let udp_relay = DefaultUdpRelay::default().with_async_inspector(service_fn({
+            move |request: RelayRequest| {
+                let udp_state = udp_state.clone();
+                let udp_decider = udp_decider.clone();
+                async move { inspect_socks5_udp(request, udp_state, udp_decider).await }
+            }
+        }));
+        let socks_acceptor = base.with_udp_associator(udp_relay);
+        listener
+            .serve(AddInputExtensionLayer::new(state).into_layer(socks_acceptor))
+            .await;
+    } else {
+        listener
+            .serve(AddInputExtensionLayer::new(state).into_layer(base))
+            .await;
+    }
+    Ok(())
+}
+
+async fn handle_socks5_tcp(
+    req: TcpRequest,
+    tcp_connector: TcpConnector,
+    policy_decider: Option<Arc<dyn NetworkPolicyDecider>>,
+) -> Result<EstablishedClientConnection<TcpStream, TcpRequest>, BoxError> {
+    let app_state = req
+        .extensions()
+        .get::<Arc<NetworkProxyState>>()
+        .cloned()
+        .ok_or_else(|| io::Error::other("missing state"))?;
+
+    let host = normalize_host(&req.authority.host.to_string());
+    let port = req.authority.port;
+    if host.is_empty() {
+        return Err(io::Error::new(io::ErrorKind::InvalidInput, "invalid host").into());
+    }
+
+    let client = req
+        .extensions()
+        .get::<SocketInfo>()
+        .map(|info| info.peer_addr().to_string());
+
+    match app_state.enabled().await {
+        Ok(true) => {}
+        Ok(false) => {
+            let _ = app_state
+                .record_blocked(BlockedRequest::new(BlockedRequestArgs {
+                    host: host.clone(),
+                    reason: REASON_PROXY_DISABLED.to_string(),
+                    client: client.clone(),
+                    method: None,
+                    mode: None,
+                    protocol: "socks5".to_string(),
+                }))
+                .await;
+            let client = client.as_deref().unwrap_or_default();
+            warn!("SOCKS blocked; proxy disabled (client={client}, host={host})");
+            return Err(io::Error::new(io::ErrorKind::PermissionDenied, "proxy disabled").into());
+        }
+        Err(err) => {
+            error!("failed to read enabled state: {err}");
+            return Err(io::Error::other("proxy error").into());
+        }
+    }
+
+    match app_state.network_mode().await {
+        Ok(NetworkMode::Limited) => {
+            let _ = app_state
+                .record_blocked(BlockedRequest::new(BlockedRequestArgs {
+                    host: host.clone(),
+                    reason: REASON_METHOD_NOT_ALLOWED.to_string(),
+                    client: client.clone(),
+                    method: None,
+                    mode: Some(NetworkMode::Limited),
+                    protocol: "socks5".to_string(),
+                }))
+                .await;
+            let client = client.as_deref().unwrap_or_default();
+            warn!(
+                "SOCKS blocked by method policy (client={client}, host={host}, mode=limited, allowed_methods=GET, HEAD, OPTIONS)"
+            );
+            return Err(io::Error::new(io::ErrorKind::PermissionDenied, "blocked").into());
+        }
+        Ok(NetworkMode::Full) => {}
+        Err(err) => {
+            error!("failed to evaluate method policy: {err}");
+            return Err(io::Error::other("proxy error").into());
+        }
+    }
+
+    let request = NetworkPolicyRequest::new(NetworkPolicyRequestArgs {
+        protocol: NetworkProtocol::Socks5Tcp,
+        host: host.clone(),
+        port,
+        client_addr: client.clone(),
+        method: None,
+        command: None,
+        exec_policy_hint: None,
+    });
+
+    match evaluate_host_policy(&app_state, policy_decider.as_ref(), &request).await {
+        Ok(NetworkDecision::Deny { reason }) => {
+            let _ = app_state
+                .record_blocked(BlockedRequest::new(BlockedRequestArgs {
+                    host: host.clone(),
+                    reason: reason.clone(),
+                    client: client.clone(),
+                    method: None,
+                    mode: None,
+                    protocol: "socks5".to_string(),
+                }))
+                .await;
+            let client = client.as_deref().unwrap_or_default();
+            warn!("SOCKS blocked (client={client}, host={host}, reason={reason})");
+            return Err(io::Error::new(io::ErrorKind::PermissionDenied, "blocked").into());
+        }
+        Ok(NetworkDecision::Allow) => {
+            let client = client.as_deref().unwrap_or_default();
+            info!("SOCKS allowed (client={client}, host={host}, port={port})");
+        }
+        Err(err) => {
+            error!("failed to evaluate host: {err}");
+            return Err(io::Error::other("proxy error").into());
+        }
+    }
+
+    tcp_connector.serve(req).await
+}
+
+async fn inspect_socks5_udp(
+    request: RelayRequest,
+    state: Arc<NetworkProxyState>,
+    policy_decider: Option<Arc<dyn NetworkPolicyDecider>>,
+) -> io::Result<RelayResponse> {
+    let RelayRequest {
+        server_address,
+        payload,
+        extensions,
+        ..
+    } = request;
+
+    let host = normalize_host(&server_address.ip_addr.to_string());
+    let port = server_address.port;
+    if host.is_empty() {
+        return Err(io::Error::new(io::ErrorKind::InvalidInput, "invalid host"));
+    }
+
+    let client = extensions
+        .get::<SocketInfo>()
+        .map(|info| info.peer_addr().to_string());
+
+    match state.enabled().await {
+        Ok(true) => {}
+        Ok(false) => {
+            let _ = state
+                .record_blocked(BlockedRequest::new(BlockedRequestArgs {
+                    host: host.clone(),
+                    reason: REASON_PROXY_DISABLED.to_string(),
+                    client: client.clone(),
+                    method: None,
+                    mode: None,
+                    protocol: "socks5-udp".to_string(),
+                }))
+                .await;
+            let client = client.as_deref().unwrap_or_default();
+            warn!("SOCKS UDP blocked; proxy disabled (client={client}, host={host})");
+            return Ok(RelayResponse {
+                maybe_payload: None,
+                extensions,
+            });
+        }
+        Err(err) => {
+            error!("failed to read enabled state: {err}");
+            return Err(io::Error::other("proxy error"));
+        }
+    }
+
+    match state.network_mode().await {
+        Ok(NetworkMode::Limited) => {
+            let _ = state
+                .record_blocked(BlockedRequest::new(BlockedRequestArgs {
+                    host: host.clone(),
+                    reason: REASON_METHOD_NOT_ALLOWED.to_string(),
+                    client: client.clone(),
+                    method: None,
+                    mode: Some(NetworkMode::Limited),
+                    protocol: "socks5-udp".to_string(),
+                }))
+                .await;
+            return Ok(RelayResponse {
+                maybe_payload: None,
+                extensions,
+            });
+        }
+        Ok(NetworkMode::Full) => {}
+        Err(err) => {
+            error!("failed to evaluate method policy: {err}");
+            return Err(io::Error::other("proxy error"));
+        }
+    }
+
+    let request = NetworkPolicyRequest::new(NetworkPolicyRequestArgs {
+        protocol: NetworkProtocol::Socks5Udp,
+        host: host.clone(),
+        port,
+        client_addr: client.clone(),
+        method: None,
+        command: None,
+        exec_policy_hint: None,
+    });
+
+    match evaluate_host_policy(&state, policy_decider.as_ref(), &request).await {
+        Ok(NetworkDecision::Deny { reason }) => {
+            let _ = state
+                .record_blocked(BlockedRequest::new(BlockedRequestArgs {
+                    host: host.clone(),
+                    reason: reason.clone(),
+                    client: client.clone(),
+                    method: None,
+                    mode: None,
+                    protocol: "socks5-udp".to_string(),
+                }))
+                .await;
+            let client = client.as_deref().unwrap_or_default();
+            warn!("SOCKS UDP blocked (client={client}, host={host}, reason={reason})");
+            Ok(RelayResponse {
+                maybe_payload: None,
+                extensions,
+            })
+        }
+        Ok(NetworkDecision::Allow) => Ok(RelayResponse {
+            maybe_payload: Some(payload),
+            extensions,
+        }),
+        Err(err) => {
+            error!("failed to evaluate UDP host: {err}");
+            Err(io::Error::other("proxy error"))
+        }
+    }
+}
--- a/codex-rs/network-proxy/src/state.rs
+++ b/codex-rs/network-proxy/src/state.rs
@@ -20,6 +20,7 @@ use serde::Deserialize;
 use std::collections::HashSet;

 pub use crate::runtime::BlockedRequest;
+pub use crate::runtime::BlockedRequestArgs;
 pub use crate::runtime::NetworkProxyState;
 #[cfg(test)]
 pub(crate) use crate::runtime::network_proxy_state_for_policy;
--- a/codex-rs/protocol/src/config_types.rs
+++ b/codex-rs/protocol/src/config_types.rs
@@ -66,6 +66,18 @@ pub enum SandboxMode {
    DangerFullAccess,
 }

+#[derive(
+    Deserialize, Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Display, JsonSchema, TS,
+)]
+#[serde(rename_all = "kebab-case")]
+#[strum(serialize_all = "kebab-case")]
+pub enum WindowsSandboxLevel {
+    #[default]
+    Disabled,
+    RestrictedToken,
+    Elevated,
+}
+
 #[derive(
    Debug,
    Serialize,
@@ -94,8 +106,8 @@ pub enum Personality {
 #[serde(rename_all = "lowercase")]
 #[strum(serialize_all = "lowercase")]
 pub enum WebSearchMode {
-    #[default]
    Disabled,
+    #[default]
    Cached,
    Live,
 }
--- a/codex-rs/protocol/src/items.rs
+++ b/codex-rs/protocol/src/items.rs
@@ -1,3 +1,4 @@
+use crate::models::WebSearchAction;
 use crate::protocol::AgentMessageEvent;
 use crate::protocol::AgentReasoningEvent;
 use crate::protocol::AgentReasoningRawContentEvent;
@@ -20,6 +21,7 @@ pub enum TurnItem {
    AgentMessage(AgentMessageItem),
    Reasoning(ReasoningItem),
    WebSearch(WebSearchItem),
+    ContextCompaction(ContextCompactionItem),
 }

 #[derive(Debug, Clone, Deserialize, Serialize, TS, JsonSchema)]
@@ -49,10 +51,16 @@ pub struct ReasoningItem {
    pub raw_content: Vec<String>,
 }

-#[derive(Debug, Clone, Deserialize, Serialize, TS, JsonSchema)]
+#[derive(Debug, Clone, Deserialize, Serialize, TS, JsonSchema, PartialEq)]
 pub struct WebSearchItem {
    pub id: String,
    pub query: String,
+    pub action: WebSearchAction,
+}
+
+#[derive(Debug, Clone, Deserialize, Serialize, TS, JsonSchema)]
+pub struct ContextCompactionItem {
+    pub id: String,
 }

 impl UserMessageItem {
@@ -181,6 +189,7 @@ impl WebSearchItem {
        EventMsg::WebSearchEnd(WebSearchEndEvent {
            call_id: self.id.clone(),
            query: self.query.clone(),
+            action: self.action.clone(),
        })
    }
 }
@@ -192,6 +201,7 @@ impl TurnItem {
            TurnItem::AgentMessage(item) => item.id.clone(),
            TurnItem::Reasoning(item) => item.id.clone(),
            TurnItem::WebSearch(item) => item.id.clone(),
+            TurnItem::ContextCompaction(item) => item.id.clone(),
        }
    }

@@ -201,6 +211,7 @@ impl TurnItem {
            TurnItem::AgentMessage(item) => item.as_legacy_events(),
            TurnItem::WebSearch(item) => vec![item.as_legacy_event()],
            TurnItem::Reasoning(item) => item.as_legacy_events(show_raw_agent_reasoning),
+            TurnItem::ContextCompaction(_) => Vec::new(),
        }
    }
 }
--- a/codex-rs/protocol/src/models.rs
+++ b/codex-rs/protocol/src/models.rs
@@ -157,7 +157,9 @@ pub enum ResponseItem {
        #[serde(default, skip_serializing_if = "Option::is_none")]
        #[ts(optional)]
        status: Option<String>,
-        action: WebSearchAction,
+        #[serde(default, skip_serializing_if = "Option::is_none")]
+        #[ts(optional)]
+        action: Option<WebSearchAction>,
    },
    // Generated by the harness but considered exactly as a model response.
    GhostSnapshot {
@@ -1034,10 +1036,12 @@ mod tests {
                        "query": "weather seattle"
                    }
                }"#,
-                WebSearchAction::Search {
+                None,
+                Some(WebSearchAction::Search {
                    query: Some("weather seattle".into()),
-                },
+                }),
                Some("completed".into()),
+                true,
            ),
            (
                r#"{
@@ -1048,10 +1052,12 @@ mod tests {
                        "url": "https://example.com"
                    }
                }"#,
-                WebSearchAction::OpenPage {
+                None,
+                Some(WebSearchAction::OpenPage {
                    url: Some("https://example.com".into()),
-                },
+                }),
                Some("open".into()),
+                true,
            ),
            (
                r#"{
@@ -1063,26 +1069,43 @@ mod tests {
                        "pattern": "installation"
                    }
                }"#,
-                WebSearchAction::FindInPage {
+                None,
+                Some(WebSearchAction::FindInPage {
                    url: Some("https://example.com/docs".into()),
                    pattern: Some("installation".into()),
-                },
+                }),
                Some("in_progress".into()),
+                true,
+            ),
+            (
+                r#"{
+                    "type": "web_search_call",
+                    "status": "in_progress",
+                    "id": "ws_partial"
+                }"#,
+                Some("ws_partial".into()),
+                None,
+                Some("in_progress".into()),
+                false,
            ),
        ];

-        for (json_literal, expected_action, expected_status) in cases {
+        for (json_literal, expected_id, expected_action, expected_status, expect_roundtrip) in cases
+        {
            let parsed: ResponseItem = serde_json::from_str(json_literal)?;
            let expected = ResponseItem::WebSearchCall {
-                id: None,
+                id: expected_id.clone(),
                status: expected_status.clone(),
                action: expected_action.clone(),
            };
            assert_eq!(parsed, expected);

            let serialized = serde_json::to_value(&parsed)?;
-            let original_value: serde_json::Value = serde_json::from_str(json_literal)?;
-            assert_eq!(serialized, original_value);
+            let mut expected_serialized: serde_json::Value = serde_json::from_str(json_literal)?;
+            if !expect_roundtrip && let Some(obj) = expected_serialized.as_object_mut() {
+                obj.remove("id");
+            }
+            assert_eq!(serialized, expected_serialized);
        }

        Ok(())
--- a/codex-rs/protocol/src/protocol.rs
+++ b/codex-rs/protocol/src/protocol.rs
@@ -16,6 +16,7 @@ use crate::approvals::ElicitationRequestEvent;
 use crate::config_types::CollaborationMode;
 use crate::config_types::Personality;
 use crate::config_types::ReasoningSummary as ReasoningSummaryConfig;
+use crate::config_types::WindowsSandboxLevel;
 use crate::custom_prompts::CustomPrompt;
 use crate::dynamic_tools::DynamicToolCallRequest;
 use crate::dynamic_tools::DynamicToolResponse;
@@ -24,6 +25,7 @@ use crate::message_history::HistoryEntry;
 use crate::models::BaseInstructions;
 use crate::models::ContentItem;
 use crate::models::ResponseItem;
+use crate::models::WebSearchAction;
 use crate::num_format::format_with_separators;
 use crate::openai_models::ReasoningEffort as ReasoningEffortConfig;
 use crate::parse_command::ParsedCommand;
@@ -157,6 +159,10 @@ pub enum Op {
        #[serde(skip_serializing_if = "Option::is_none")]
        sandbox_policy: Option<SandboxPolicy>,

+        /// Updated Windows sandbox mode for tool execution.
+        #[serde(skip_serializing_if = "Option::is_none")]
+        windows_sandbox_level: Option<WindowsSandboxLevel>,
+
        /// Updated model slug. When set, the model info is derived
        /// automatically.
        #[serde(skip_serializing_if = "Option::is_none")]
@@ -682,8 +688,12 @@ pub enum EventMsg {
    /// indicates the turn continued but the user should still be notified.
    Warning(WarningEvent),

-    /// Conversation history was compacted (either automatically or manually).
-    ContextCompacted(ContextCompactedEvent),
+    /// Conversation history compaction has started.
+    ContextCompactionStarted(ContextCompactionStartedEvent),
+
+    /// Conversation history compaction has ended (either automatically or manually).
+    #[serde(alias = "context_compacted")]
+    ContextCompactionEnded(ContextCompactionEndedEvent),

    /// Conversation history was rolled back by dropping the last N user turns.
    ThreadRolledBack(ThreadRolledBackEvent),
@@ -1041,6 +1051,7 @@ impl HasLegacyEvent for ReasoningRawContentDeltaEvent {
 impl HasLegacyEvent for EventMsg {
    fn as_legacy_events(&self, show_raw_agent_reasoning: bool) -> Vec<EventMsg> {
        match self {
+            EventMsg::ItemStarted(event) => event.as_legacy_events(show_raw_agent_reasoning),
            EventMsg::ItemCompleted(event) => event.as_legacy_events(show_raw_agent_reasoning),
            EventMsg::AgentMessageContentDelta(event) => {
                event.as_legacy_events(show_raw_agent_reasoning)
@@ -1076,7 +1087,10 @@ pub struct WarningEvent {
 }

 #[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
-pub struct ContextCompactedEvent;
+pub struct ContextCompactionStartedEvent;
+
+#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
+pub struct ContextCompactionEndedEvent;

 #[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
 pub struct TurnCompleteEvent {
@@ -1402,6 +1416,7 @@ pub struct WebSearchBeginEvent {
 pub struct WebSearchEndEvent {
    pub call_id: String,
    pub query: String,
+    pub action: WebSearchAction,
 }

 // Conversation kept for backward compatibility.
@@ -2375,6 +2390,9 @@ mod tests {
            item: TurnItem::WebSearch(WebSearchItem {
                id: "search-1".into(),
                query: "find docs".into(),
+                action: WebSearchAction::Search {
+                    query: Some("find docs".into()),
+                },
            }),
        };

--- a/codex-rs/rmcp-client/src/bin/test_stdio_server.rs
+++ b/codex-rs/rmcp-client/src/bin/test_stdio_server.rs
@@ -35,12 +35,19 @@ struct TestToolServer {

 const MEMO_URI: &str = "memo://codex/example-note";
 const MEMO_CONTENT: &str = "This is a sample MCP resource served by the rmcp test server.";
+const SMALL_PNG_BASE64: &str = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR4nGP4z8DwHwAFAAH/iZk9HQAAAABJRU5ErkJggg==";
+
 pub fn stdio() -> (tokio::io::Stdin, tokio::io::Stdout) {
    (tokio::io::stdin(), tokio::io::stdout())
 }
+
 impl TestToolServer {
    fn new() -> Self {
-        let tools = vec![Self::echo_tool(), Self::image_tool()];
+        let tools = vec![
+            Self::echo_tool(),
+            Self::image_tool(),
+            Self::image_scenario_tool(),
+        ];
        let resources = vec![Self::memo_resource()];
        let resource_templates = vec![Self::memo_template()];
        Self {
@@ -86,6 +93,61 @@ impl TestToolServer {
        )
    }

+    /// Tool intended for manual testing of Codex TUI rendering for MCP image tool results.
+    ///
+    /// This exists to exercise edge cases where a `CallToolResult.content` includes image blocks
+    /// that aren't the first item (or includes invalid image blocks before a valid image).
+    ///
+    /// Manual testing approach (Codex TUI):
+    /// - Build this binary: `cargo build -p codex-rmcp-client --bin test_stdio_server`
+    /// - Register it:
+    ///   - `codex mcp add mcpimg -- /abs/path/to/test_stdio_server`
+    /// - Then in Codex TUI, ask it to call:
+    ///   - `mcpimg.image_scenario({"scenario":"image_only"})`
+    ///   - `mcpimg.image_scenario({"scenario":"text_then_image","caption":"Here is the image:"})`
+    ///   - `mcpimg.image_scenario({"scenario":"invalid_base64_then_image"})`
+    ///   - `mcpimg.image_scenario({"scenario":"invalid_image_bytes_then_image"})`
+    ///   - `mcpimg.image_scenario({"scenario":"multiple_valid_images"})`
+    ///   - `mcpimg.image_scenario({"scenario":"image_then_text","caption":"Here is the image:"})`
+    ///   - `mcpimg.image_scenario({"scenario":"text_only","caption":"Here is the image:"})`
+    /// - You should see an extra history cell: `tool result (image output)`.
+    fn image_scenario_tool() -> Tool {
+        #[expect(clippy::expect_used)]
+        let schema: JsonObject = serde_json::from_value(serde_json::json!({
+            "type": "object",
+            "properties": {
+                "scenario": {
+                    "type": "string",
+                    "enum": [
+                        "image_only",
+                        "text_then_image",
+                        "invalid_base64_then_image",
+                        "invalid_image_bytes_then_image",
+                        "multiple_valid_images",
+                        "image_then_text",
+                        "text_only"
+                    ]
+                },
+                "caption": { "type": "string" },
+                "data_url": {
+                    "type": "string",
+                    "description": "Optional data URL like data:image/png;base64,AAAA...; if omitted, uses a built-in tiny PNG."
+                }
+            },
+            "required": ["scenario"],
+            "additionalProperties": false
+        }))
+        .expect("image_scenario tool schema should deserialize");
+
+        Tool::new(
+            Cow::Borrowed("image_scenario"),
+            Cow::Borrowed(
+                "Return content blocks for manual testing of MCP image rendering scenarios.",
+            ),
+            Arc::new(schema),
+        )
+    }
+
    fn memo_resource() -> Resource {
        let raw = RawResource {
            uri: MEMO_URI.to_string(),
@@ -125,6 +187,32 @@ struct EchoArgs {
    env_var: Option<String>,
 }

+#[derive(Deserialize, Debug)]
+#[serde(rename_all = "snake_case")]
+/// Scenarios for `image_scenario`, intended to exercise Codex TUI handling of MCP image outputs.
+///
+/// The key behavior under test is that the TUI should render an image output cell if *any*
+/// decodable image block exists in the tool result content, even if the first block is text or an
+/// invalid image.
+enum ImageScenario {
+    ImageOnly,
+    TextThenImage,
+    InvalidBase64ThenImage,
+    InvalidImageBytesThenImage,
+    MultipleValidImages,
+    ImageThenText,
+    TextOnly,
+}
+
+#[derive(Deserialize, Debug)]
+struct ImageScenarioArgs {
+    scenario: ImageScenario,
+    #[serde(default)]
+    caption: Option<String>,
+    #[serde(default)]
+    data_url: Option<String>,
+}
+
 impl ServerHandler for TestToolServer {
    fn get_info(&self) -> ServerInfo {
        ServerInfo {
@@ -244,14 +332,6 @@ impl ServerHandler for TestToolServer {
                    )
                })?;

-                fn parse_data_url(url: &str) -> Option<(String, String)> {
-                    let rest = url.strip_prefix("data:")?;
-                    let (mime_and_opts, data) = rest.split_once(',')?;
-                    let (mime, _opts) =
-                        mime_and_opts.split_once(';').unwrap_or((mime_and_opts, ""));
-                    Some((mime.to_string(), data.to_string()))
-                }
-
                let (mime_type, data_b64) = parse_data_url(&data_url).ok_or_else(|| {
                    McpError::invalid_params(
                        format!("invalid data URL for image tool: {data_url}"),
@@ -263,6 +343,10 @@ impl ServerHandler for TestToolServer {
                    data_b64, mime_type,
                )]))
            }
+            "image_scenario" => {
+                let args = Self::parse_call_args::<ImageScenarioArgs>(&request, "image_scenario")?;
+                Self::image_scenario_result(args)
+            }
            other => Err(McpError::invalid_params(
                format!("unknown tool: {other}"),
                None,
@@ -271,6 +355,89 @@ impl ServerHandler for TestToolServer {
    }
 }

+impl TestToolServer {
+    fn parse_call_args<T: for<'de> Deserialize<'de>>(
+        request: &CallToolRequestParam,
+        tool_name: &'static str,
+    ) -> Result<T, McpError> {
+        match request.arguments.as_ref() {
+            Some(arguments) => serde_json::from_value(serde_json::Value::Object(
+                arguments.clone().into_iter().collect(),
+            ))
+            .map_err(|err| McpError::invalid_params(err.to_string(), None)),
+            None => Err(McpError::invalid_params(
+                format!("missing arguments for {tool_name} tool"),
+                None,
+            )),
+        }
+    }
+
+    fn image_scenario_result(args: ImageScenarioArgs) -> Result<CallToolResult, McpError> {
+        let (mime_type, valid_data_b64) = if let Some(data_url) = &args.data_url {
+            parse_data_url(data_url).ok_or_else(|| {
+                McpError::invalid_params(
+                    format!("invalid data_url for image_scenario tool: {data_url}"),
+                    None,
+                )
+            })?
+        } else {
+            ("image/png".to_string(), SMALL_PNG_BASE64.to_string())
+        };
+
+        let caption = args
+            .caption
+            .unwrap_or_else(|| "Here is the image:".to_string());
+
+        let mut content = Vec::new();
+        match args.scenario {
+            ImageScenario::ImageOnly => {
+                content.push(rmcp::model::Content::image(valid_data_b64, mime_type));
+            }
+            ImageScenario::TextThenImage => {
+                content.push(rmcp::model::Content::text(caption));
+                content.push(rmcp::model::Content::image(valid_data_b64, mime_type));
+            }
+            ImageScenario::InvalidBase64ThenImage => {
+                content.push(rmcp::model::Content::image(
+                    "not-base64".to_string(),
+                    "image/png".to_string(),
+                ));
+                content.push(rmcp::model::Content::image(valid_data_b64, mime_type));
+            }
+            ImageScenario::InvalidImageBytesThenImage => {
+                content.push(rmcp::model::Content::image(
+                    "bm90IGFuIGltYWdl".to_string(),
+                    "image/png".to_string(),
+                ));
+                content.push(rmcp::model::Content::image(valid_data_b64, mime_type));
+            }
+            ImageScenario::MultipleValidImages => {
+                content.push(rmcp::model::Content::image(
+                    valid_data_b64.clone(),
+                    mime_type.clone(),
+                ));
+                content.push(rmcp::model::Content::image(valid_data_b64, mime_type));
+            }
+            ImageScenario::ImageThenText => {
+                content.push(rmcp::model::Content::image(valid_data_b64, mime_type));
+                content.push(rmcp::model::Content::text(caption));
+            }
+            ImageScenario::TextOnly => {
+                content.push(rmcp::model::Content::text(caption));
+            }
+        }
+
+        Ok(CallToolResult::success(content))
+    }
+}
+
+fn parse_data_url(url: &str) -> Option<(String, String)> {
+    let rest = url.strip_prefix("data:")?;
+    let (mime_and_opts, data) = rest.split_once(',')?;
+    let (mime, _opts) = mime_and_opts.split_once(';').unwrap_or((mime_and_opts, ""));
+    Some((mime.to_string(), data.to_string()))
+}
+
 #[tokio::main]
 async fn main() -> Result<(), Box<dyn std::error::Error>> {
    eprintln!("starting rmcp test server");
--- a/Show More
+++ b/Show More