release

feat: add shell snapshot for shell command (#7786 )
feat: warning for long snapshots (#7870 )
2026-05-12 07:12:37 +00:00 · 2025-12-11 08:07:12 -08:00 · 2025-12-11 13:46:43 +00:00 · 2025-12-11 12:42:47 +00:00 · 2025-12-11 00:39:47 -08:00 · 2025-12-10 21:23:16 -08:00
948 changed files with 63031 additions and 2677 deletions
--- a/.codespellignore
+++ b/.codespellignore
@@ -1 +1,2 @@
 iTerm
+psuedo
--- a/.github/actions/windows-code-sign/action.yml
+++ b/.github/actions/windows-code-sign/action.yml
@@ -0,0 +1,55 @@
+name: windows-code-sign
+description: Sign Windows binaries with Azure Trusted Signing.
+inputs:
+  target:
+    description: Target triple for the artifacts to sign.
+    required: true
+  client-id:
+    description: Azure Trusted Signing client ID.
+    required: true
+  tenant-id:
+    description: Azure tenant ID for Trusted Signing.
+    required: true
+  subscription-id:
+    description: Azure subscription ID for Trusted Signing.
+    required: true
+  endpoint:
+    description: Azure Trusted Signing endpoint.
+    required: true
+  account-name:
+    description: Azure Trusted Signing account name.
+    required: true
+  certificate-profile-name:
+    description: Certificate profile name for signing.
+    required: true
+
+runs:
+  using: composite
+  steps:
+    - name: Azure login for Trusted Signing (OIDC)
+      uses: azure/login@v2
+      with:
+        client-id: ${{ inputs.client-id }}
+        tenant-id: ${{ inputs.tenant-id }}
+        subscription-id: ${{ inputs.subscription-id }}
+
+    - name: Sign Windows binaries with Azure Trusted Signing
+      uses: azure/trusted-signing-action@v0
+      with:
+        endpoint: ${{ inputs.endpoint }}
+        trusted-signing-account-name: ${{ inputs.account-name }}
+        certificate-profile-name: ${{ inputs.certificate-profile-name }}
+        exclude-environment-credential: true
+        exclude-workload-identity-credential: true
+        exclude-managed-identity-credential: true
+        exclude-shared-token-cache-credential: true
+        exclude-visual-studio-credential: true
+        exclude-visual-studio-code-credential: true
+        exclude-azure-cli-credential: false
+        exclude-azure-powershell-credential: true
+        exclude-azure-developer-cli-credential: true
+        exclude-interactive-browser-credential: true
+        cache-dependencies: false
+        files: |
+          ${{ github.workspace }}/codex-rs/target/${{ inputs.target }}/release/codex.exe
+          ${{ github.workspace }}/codex-rs/target/${{ inputs.target }}/release/codex-responses-api-proxy.exe
--- a/.github/workflows/rust-ci.yml
+++ b/.github/workflows/rust-ci.yml
@@ -412,14 +412,6 @@ jobs:
      - name: Install DotSlash
        uses: facebook/install-dotslash@v2

-      - name: Pre-fetch DotSlash artifacts
-        # The Bash wrapper is not available on Windows.
-        if: ${{ !startsWith(matrix.runner, 'windows') }}
-        shell: bash
-        run: |
-          set -euo pipefail
-          dotslash -- fetch exec-server/tests/suite/bash
-
      - uses: dtolnay/rust-toolchain@1.90
        with:
          targets: ${{ matrix.target }}
--- a/.github/workflows/rust-release.yml
+++ b/.github/workflows/rust-release.yml
@@ -110,6 +110,18 @@ jobs:
          target: ${{ matrix.target }}
          artifacts-dir: ${{ github.workspace }}/codex-rs/target/${{ matrix.target }}/release

+      - if: ${{ contains(matrix.target, 'windows') }}
+        name: Sign Windows binaries with Azure Trusted Signing
+        uses: ./.github/actions/windows-code-sign
+        with:
+          target: ${{ matrix.target }}
+          client-id: ${{ secrets.AZURE_TRUSTED_SIGNING_CLIENT_ID }}
+          tenant-id: ${{ secrets.AZURE_TRUSTED_SIGNING_TENANT_ID }}
+          subscription-id: ${{ secrets.AZURE_TRUSTED_SIGNING_SUBSCRIPTION_ID }}
+          endpoint: ${{ secrets.AZURE_TRUSTED_SIGNING_ENDPOINT }}
+          account-name: ${{ secrets.AZURE_TRUSTED_SIGNING_ACCOUNT_NAME }}
+          certificate-profile-name: ${{ secrets.AZURE_TRUSTED_SIGNING_CERTIFICATE_PROFILE_NAME }}
+
      - if: ${{ matrix.runner == 'macos-15-xlarge' }}
        name: Configure Apple code signing
        shell: bash
--- a/codex-rs/Cargo.lock
+++ b/codex-rs/Cargo.lock
@@ -238,48 +238,6 @@ dependencies = [
 "term",
 ]

-[[package]]
-name = "askama"
-version = "0.14.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f75363874b771be265f4ffe307ca705ef6f3baa19011c149da8674a87f1b75c4"
-dependencies = [
- "askama_derive",
- "itoa",
- "percent-encoding",
- "serde",
- "serde_json",
-]
-
-[[package]]
-name = "askama_derive"
-version = "0.14.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "129397200fe83088e8a68407a8e2b1f826cf0086b21ccdb866a722c8bcd3a94f"
-dependencies = [
- "askama_parser",
- "basic-toml",
- "memchr",
- "proc-macro2",
- "quote",
- "rustc-hash",
- "serde",
- "serde_derive",
- "syn 2.0.104",
-]
-
-[[package]]
-name = "askama_parser"
-version = "0.14.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d6ab5630b3d5eaf232620167977f95eb51f3432fc76852328774afbd242d4358"
-dependencies = [
- "memchr",
- "serde",
- "serde_derive",
- "winnow",
-]
-
 [[package]]
 name = "assert-json-diff"
 version = "2.0.2"
@@ -557,15 +515,6 @@ version = "0.22.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6"

-[[package]]
-name = "basic-toml"
-version = "0.1.10"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ba62675e8242a4c4e806d12f11d136e626e6c8361d6b829310732241652a178a"
-dependencies = [
- "serde",
-]
-
 [[package]]
 name = "beef"
 version = "0.5.2"
@@ -887,6 +836,7 @@ dependencies = [
 "codex-file-search",
 "codex-login",
 "codex-protocol",
+ "codex-rmcp-client",
 "codex-utils-json-to-toml",
 "core_test_support",
 "mcp-types",
@@ -900,7 +850,8 @@ dependencies = [
 "shlex",
 "tempfile",
 "tokio",
- "toml",
+ "toml 0.9.5",
+ "toml_edit",
 "tracing",
 "tracing-subscriber",
 "uuid",
@@ -1040,6 +991,7 @@ dependencies = [
 "codex-rmcp-client",
 "codex-stdio-to-uds",
 "codex-tui",
+ "codex-tui2",
 "codex-windows-sandbox",
 "ctor 0.5.0",
 "libc",
@@ -1051,7 +1003,7 @@ dependencies = [
 "supports-color 3.0.2",
 "tempfile",
 "tokio",
- "toml",
+ "toml 0.9.5",
 "tracing",
 ]

@@ -1127,7 +1079,7 @@ dependencies = [
 "codex-ollama",
 "codex-protocol",
 "serde",
- "toml",
+ "toml 0.9.5",
 ]

 [[package]]
@@ -1135,7 +1087,6 @@ name = "codex-core"
 version = "0.0.0"
 dependencies = [
 "anyhow",
- "askama",
 "assert_cmd",
 "assert_matches",
 "async-channel",
@@ -1148,6 +1099,7 @@ dependencies = [
 "codex-apply-patch",
 "codex-arg0",
 "codex-async-utils",
+ "codex-client",
 "codex-core",
 "codex-execpolicy",
 "codex-file-search",
@@ -1156,6 +1108,7 @@ dependencies = [
 "codex-otel",
 "codex-protocol",
 "codex-rmcp-client",
+ "codex-utils-absolute-path",
 "codex-utils-pty",
 "codex-utils-readiness",
 "codex-utils-string",
@@ -1205,7 +1158,7 @@ dependencies = [
 "tokio",
 "tokio-test",
 "tokio-util",
- "toml",
+ "toml 0.9.5",
 "toml_edit",
 "tracing",
 "tracing-test",
@@ -1463,6 +1416,7 @@ dependencies = [
 "chrono",
 "codex-app-server-protocol",
 "codex-protocol",
+ "codex-utils-absolute-path",
 "eventsource-stream",
 "http",
 "opentelemetry",
@@ -1491,7 +1445,6 @@ name = "codex-protocol"
 version = "0.0.0"
 dependencies = [
 "anyhow",
- "base64",
 "codex-git",
 "codex-utils-image",
 "icu_decimal",
@@ -1625,7 +1578,7 @@ dependencies = [
 "tokio",
 "tokio-stream",
 "tokio-util",
- "toml",
+ "toml 0.9.5",
 "tracing",
 "tracing-appender",
 "tracing-subscriber",
@@ -1638,6 +1591,85 @@ dependencies = [
 "vt100",
 ]

+[[package]]
+name = "codex-tui2"
+version = "0.0.0"
+dependencies = [
+ "anyhow",
+ "arboard",
+ "assert_matches",
+ "async-stream",
+ "base64",
+ "chrono",
+ "clap",
+ "codex-ansi-escape",
+ "codex-app-server-protocol",
+ "codex-arg0",
+ "codex-backend-client",
+ "codex-common",
+ "codex-core",
+ "codex-feedback",
+ "codex-file-search",
+ "codex-login",
+ "codex-protocol",
+ "codex-tui",
+ "codex-windows-sandbox",
+ "color-eyre",
+ "crossterm",
+ "derive_more 2.1.0",
+ "diffy",
+ "dirs",
+ "dunce",
+ "image",
+ "insta",
+ "itertools 0.14.0",
+ "lazy_static",
+ "libc",
+ "mcp-types",
+ "opentelemetry-appender-tracing",
+ "pathdiff",
+ "pretty_assertions",
+ "pulldown-cmark",
+ "rand 0.9.2",
+ "ratatui",
+ "ratatui-macros",
+ "regex-lite",
+ "reqwest",
+ "serde",
+ "serde_json",
+ "serial_test",
+ "shlex",
+ "strum 0.27.2",
+ "strum_macros 0.27.2",
+ "supports-color 3.0.2",
+ "tempfile",
+ "textwrap 0.16.2",
+ "tokio",
+ "tokio-stream",
+ "tokio-util",
+ "toml 0.9.5",
+ "tracing",
+ "tracing-appender",
+ "tracing-subscriber",
+ "tree-sitter-bash",
+ "tree-sitter-highlight",
+ "unicode-segmentation",
+ "unicode-width 0.2.1",
+ "url",
+ "uuid",
+ "vt100",
+]
+
+[[package]]
+name = "codex-utils-absolute-path"
+version = "0.0.0"
+dependencies = [
+ "path-absolutize",
+ "serde",
+ "serde_json",
+ "tempfile",
+]
+
 [[package]]
 name = "codex-utils-cache"
 version = "0.0.0"
@@ -1665,7 +1697,7 @@ version = "0.0.0"
 dependencies = [
 "pretty_assertions",
 "serde_json",
- "toml",
+ "toml 0.9.5",
 ]

 [[package]]
@@ -1673,8 +1705,13 @@ name = "codex-utils-pty"
 version = "0.0.0"
 dependencies = [
 "anyhow",
+ "filedescriptor",
+ "lazy_static",
+ "log",
 "portable-pty",
+ "shared_library",
 "tokio",
+ "winapi",
 ]

 [[package]]
@@ -1697,6 +1734,8 @@ name = "codex-windows-sandbox"
 version = "0.0.0"
 dependencies = [
 "anyhow",
+ "base64",
+ "chrono",
 "codex-protocol",
 "dirs-next",
 "dunce",
@@ -1704,7 +1743,9 @@ dependencies = [
 "serde",
 "serde_json",
 "tempfile",
+ "windows 0.58.0",
 "windows-sys 0.52.0",
+ "winres",
 ]

 [[package]]
@@ -2563,7 +2604,7 @@ checksum = "0ce92ff622d6dadf7349484f42c93271a0d49b7cc4d466a936405bacbe10aa78"
 dependencies = [
 "cfg-if",
 "rustix 1.0.8",
- "windows-sys 0.52.0",
+ "windows-sys 0.59.0",
 ]

 [[package]]
@@ -2578,7 +2619,8 @@ dependencies = [
 [[package]]
 name = "filedescriptor"
 version = "0.8.3"
-source = "git+https://github.com/pakrym/wezterm?branch=PSUEDOCONSOLE_INHERIT_CURSOR#fe38df8409545a696909aa9a09e63438630f217d"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e40758ed24c9b2eeb76c35fb0aebc66c626084edd827e07e1552279814c6682d"
 dependencies = [
 "libc",
 "thiserror 1.0.69",
@@ -3136,7 +3178,7 @@ dependencies = [
 "js-sys",
 "log",
 "wasm-bindgen",
- "windows-core",
+ "windows-core 0.61.2",
 ]

 [[package]]
@@ -3466,7 +3508,7 @@ checksum = "e04d7f318608d35d4b61ddd75cbdaee86b023ebe2bd5a66ee0915f0bf93095a9"
 dependencies = [
 "hermit-abi",
 "libc",
- "windows-sys 0.52.0",
+ "windows-sys 0.59.0",
 ]

 [[package]]
@@ -4656,7 +4698,8 @@ dependencies = [
 [[package]]
 name = "portable-pty"
 version = "0.9.0"
-source = "git+https://github.com/pakrym/wezterm?branch=PSUEDOCONSOLE_INHERIT_CURSOR#fe38df8409545a696909aa9a09e63438630f217d"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b4a596a2b3d2752d94f51fac2d4a96737b8705dddd311a32b9af47211f08671e"
 dependencies = [
 "anyhow",
 "bitflags 1.3.2",
@@ -4665,7 +4708,7 @@ dependencies = [
 "lazy_static",
 "libc",
 "log",
- "nix 0.29.0",
+ "nix 0.28.0",
 "serial2",
 "shared_library",
 "shell-words",
@@ -4774,7 +4817,7 @@ dependencies = [
 "nix 0.30.1",
 "tokio",
 "tracing",
- "windows",
+ "windows 0.61.3",
 ]

 [[package]]
@@ -5254,7 +5297,7 @@ dependencies = [
 "errno",
 "libc",
 "linux-raw-sys 0.4.15",
- "windows-sys 0.52.0",
+ "windows-sys 0.59.0",
 ]

 [[package]]
@@ -6640,6 +6683,15 @@ dependencies = [
 "tokio",
 ]

+[[package]]
+name = "toml"
+version = "0.5.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f4f7f0dd8d50a853a531c426359045b1998f04219d88799810762cd4ad314234"
+dependencies = [
+ "serde",
+]
+
 [[package]]
 name = "toml"
 version = "0.9.5"
@@ -7435,7 +7487,7 @@ version = "0.1.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb"
 dependencies = [
- "windows-sys 0.52.0",
+ "windows-sys 0.59.0",
 ]

 [[package]]
@@ -7444,6 +7496,16 @@ version = "0.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"

+[[package]]
+name = "windows"
+version = "0.58.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dd04d41d93c4992d421894c18c8b43496aa748dd4c081bac0dc93eb0489272b6"
+dependencies = [
+ "windows-core 0.58.0",
+ "windows-targets 0.52.6",
+]
+
 [[package]]
 name = "windows"
 version = "0.61.3"
@@ -7451,7 +7513,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9babd3a767a4c1aef6900409f85f5d53ce2544ccdfaa86dad48c91782c6d6893"
 dependencies = [
 "windows-collections",
- "windows-core",
+ "windows-core 0.61.2",
 "windows-future",
 "windows-link 0.1.3",
 "windows-numerics",
@@ -7463,7 +7525,20 @@ version = "0.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "3beeceb5e5cfd9eb1d76b381630e82c4241ccd0d27f1a39ed41b2760b255c5e8"
 dependencies = [
- "windows-core",
+ "windows-core 0.61.2",
+]
+
+[[package]]
+name = "windows-core"
+version = "0.58.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6ba6d44ec8c2591c134257ce647b7ea6b20335bf6379a27dac5f1641fcf59f99"
+dependencies = [
+ "windows-implement 0.58.0",
+ "windows-interface 0.58.0",
+ "windows-result 0.2.0",
+ "windows-strings 0.1.0",
+ "windows-targets 0.52.6",
 ]

 [[package]]
@@ -7472,11 +7547,11 @@ version = "0.61.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "c0fdd3ddb90610c7638aa2b3a3ab2904fb9e5cdbecc643ddb3647212781c4ae3"
 dependencies = [
- "windows-implement",
- "windows-interface",
+ "windows-implement 0.60.0",
+ "windows-interface 0.59.1",
 "windows-link 0.1.3",
- "windows-result",
- "windows-strings",
+ "windows-result 0.3.4",
+ "windows-strings 0.4.2",
 ]

 [[package]]
@@ -7485,11 +7560,22 @@ version = "0.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "fc6a41e98427b19fe4b73c550f060b59fa592d7d686537eebf9385621bfbad8e"
 dependencies = [
- "windows-core",
+ "windows-core 0.61.2",
 "windows-link 0.1.3",
 "windows-threading",
 ]

+[[package]]
+name = "windows-implement"
+version = "0.58.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2bbd5b46c938e506ecbce286b6628a02171d56153ba733b6c741fc627ec9579b"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.104",
+]
+
 [[package]]
 name = "windows-implement"
 version = "0.60.0"
@@ -7501,6 +7587,17 @@ dependencies = [
 "syn 2.0.104",
 ]

+[[package]]
+name = "windows-interface"
+version = "0.58.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "053c4c462dc91d3b1504c6fe5a726dd15e216ba718e84a0e46a88fbe5ded3515"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.104",
+]
+
 [[package]]
 name = "windows-interface"
 version = "0.59.1"
@@ -7530,7 +7627,7 @@ version = "0.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9150af68066c4c5c07ddc0ce30421554771e528bde427614c61038bc2c92c2b1"
 dependencies = [
- "windows-core",
+ "windows-core 0.61.2",
 "windows-link 0.1.3",
 ]

@@ -7541,8 +7638,17 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "5b8a9ed28765efc97bbc954883f4e6796c33a06546ebafacbabee9696967499e"
 dependencies = [
 "windows-link 0.1.3",
- "windows-result",
- "windows-strings",
+ "windows-result 0.3.4",
+ "windows-strings 0.4.2",
+]
+
+[[package]]
+name = "windows-result"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1d1043d8214f791817bab27572aaa8af63732e11bf84aa21a45a78d6c317ae0e"
+dependencies = [
+ "windows-targets 0.52.6",
 ]

 [[package]]
@@ -7554,6 +7660,16 @@ dependencies = [
 "windows-link 0.1.3",
 ]

+[[package]]
+name = "windows-strings"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4cd9b125c486025df0eabcb585e62173c6c9eddcec5d117d3b6e8c30e2ee4d10"
+dependencies = [
+ "windows-result 0.2.0",
+ "windows-targets 0.52.6",
+]
+
 [[package]]
 name = "windows-strings"
 version = "0.4.2"
@@ -7877,6 +7993,15 @@ dependencies = [
 "winapi",
 ]

+[[package]]
+name = "winres"
+version = "0.1.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b68db261ef59e9e52806f688020631e987592bd83619edccda9c47d42cde4f6c"
+dependencies = [
+ "toml 0.5.11",
+]
+
 [[package]]
 name = "winsafe"
 version = "0.0.19"
--- a/codex-rs/Cargo.toml
+++ b/codex-rs/Cargo.toml
@@ -34,6 +34,8 @@ members = [
    "stdio-to-uds",
    "otel",
    "tui",
+    "tui2",
+    "utils/absolute-path",
    "utils/git",
    "utils/cache",
    "utils/image",
@@ -88,6 +90,8 @@ codex-responses-api-proxy = { path = "responses-api-proxy" }
 codex-rmcp-client = { path = "rmcp-client" }
 codex-stdio-to-uds = { path = "stdio-to-uds" }
 codex-tui = { path = "tui" }
+codex-tui2 = { path = "tui2" }
+codex-utils-absolute-path = { path = "utils/absolute-path" }
 codex-utils-cache = { path = "utils/cache" }
 codex-utils-image = { path = "utils/image" }
 codex-utils-json-to-toml = { path = "utils/json-to-toml" }
@@ -105,7 +109,6 @@ allocative = "0.3.3"
 ansi-to-tui = "7.0.0"
 anyhow = "1"
 arboard = { version = "3", features = ["wayland-data-control"] }
-askama = "0.14"
 assert_cmd = "2"
 assert_matches = "1.5.0"
 async-channel = "2.3.1"
@@ -289,7 +292,6 @@ opt-level = 0
 # Uncomment to debug local changes.
 # ratatui = { path = "../../ratatui" }
 crossterm = { git = "https://github.com/nornagon/crossterm", branch = "nornagon/color-query" }
-portable-pty = { git = "https://github.com/pakrym/wezterm", branch = "PSUEDOCONSOLE_INHERIT_CURSOR" }
 ratatui = { git = "https://github.com/nornagon/ratatui", branch = "nornagon-v0.29.0-patch" }

 # Uncomment to debug local changes.
--- a/codex-rs/app-server-protocol/src/protocol/common.rs
+++ b/codex-rs/app-server-protocol/src/protocol/common.rs
@@ -139,6 +139,11 @@ client_request_definitions! {
        response: v2::ModelListResponse,
    },

+    McpServerOauthLogin => "mcpServer/oauth/login" {
+        params: v2::McpServerOauthLoginParams,
+        response: v2::McpServerOauthLoginResponse,
+    },
+
    McpServersList => "mcpServers/list" {
        params: v2::ListMcpServersParams,
        response: v2::ListMcpServersResponse,
@@ -522,8 +527,10 @@ server_notification_definitions! {
    ItemCompleted => "item/completed" (v2::ItemCompletedNotification),
    AgentMessageDelta => "item/agentMessage/delta" (v2::AgentMessageDeltaNotification),
    CommandExecutionOutputDelta => "item/commandExecution/outputDelta" (v2::CommandExecutionOutputDeltaNotification),
+    TerminalInteraction => "item/commandExecution/terminalInteraction" (v2::TerminalInteractionNotification),
    FileChangeOutputDelta => "item/fileChange/outputDelta" (v2::FileChangeOutputDeltaNotification),
    McpToolCallProgress => "item/mcpToolCall/progress" (v2::McpToolCallProgressNotification),
+    McpServerOauthLoginCompleted => "mcpServer/oauthLogin/completed" (v2::McpServerOauthLoginCompletedNotification),
    AccountUpdated => "account/updated" (v2::AccountUpdatedNotification),
    AccountRateLimitsUpdated => "account/rateLimits/updated" (v2::AccountRateLimitsUpdatedNotification),
    ReasoningSummaryTextDelta => "item/reasoning/summaryTextDelta" (v2::ReasoningSummaryTextDeltaNotification),
@@ -647,7 +654,6 @@ mod tests {
            command: vec!["echo".to_string(), "hello".to_string()],
            cwd: PathBuf::from("/tmp"),
            reason: Some("because tests".to_string()),
-            risk: None,
            parsed_cmd: vec![ParsedCommand::Unknown {
                cmd: "echo hello".to_string(),
            }],
@@ -667,7 +673,6 @@ mod tests {
                    "command": ["echo", "hello"],
                    "cwd": "/tmp",
                    "reason": "because tests",
-                    "risk": null,
                    "parsedCmd": [
                        {
                            "type": "unknown",
--- a/codex-rs/app-server-protocol/src/protocol/v1.rs
+++ b/codex-rs/app-server-protocol/src/protocol/v1.rs
@@ -13,7 +13,6 @@ use codex_protocol::protocol::AskForApproval;
 use codex_protocol::protocol::EventMsg;
 use codex_protocol::protocol::FileChange;
 use codex_protocol::protocol::ReviewDecision;
-use codex_protocol::protocol::SandboxCommandAssessment;
 use codex_protocol::protocol::SandboxPolicy;
 use codex_protocol::protocol::SessionSource;
 use codex_protocol::protocol::TurnAbortReason;
@@ -226,7 +225,6 @@ pub struct ExecCommandApprovalParams {
    pub command: Vec<String>,
    pub cwd: PathBuf,
    pub reason: Option<String>,
-    pub risk: Option<SandboxCommandAssessment>,
    pub parsed_cmd: Vec<ParsedCommand>,
 }

--- a/codex-rs/app-server-protocol/src/protocol/v2.rs
+++ b/codex-rs/app-server-protocol/src/protocol/v2.rs
@@ -4,8 +4,10 @@ use std::path::PathBuf;
 use crate::protocol::common::AuthMode;
 use codex_protocol::account::PlanType;
 use codex_protocol::approvals::ExecPolicyAmendment as CoreExecPolicyAmendment;
-use codex_protocol::approvals::SandboxCommandAssessment as CoreSandboxCommandAssessment;
+use codex_protocol::config_types::ForcedLoginMethod;
 use codex_protocol::config_types::ReasoningSummary;
+use codex_protocol::config_types::SandboxMode as CoreSandboxMode;
+use codex_protocol::config_types::Verbosity;
 use codex_protocol::items::AgentMessageContent as CoreAgentMessageContent;
 use codex_protocol::items::TurnItem as CoreTurnItem;
 use codex_protocol::models::ResponseItem;
@@ -13,6 +15,7 @@ use codex_protocol::openai_models::ReasoningEffort;
 use codex_protocol::parse_command::ParsedCommand as CoreParsedCommand;
 use codex_protocol::plan_tool::PlanItemArg as CorePlanItemArg;
 use codex_protocol::plan_tool::StepStatus as CorePlanStepStatus;
+use codex_protocol::protocol::AskForApproval as CoreAskForApproval;
 use codex_protocol::protocol::CodexErrorInfo as CoreCodexErrorInfo;
 use codex_protocol::protocol::CreditsSnapshot as CoreCreditsSnapshot;
 use codex_protocol::protocol::RateLimitSnapshot as CoreRateLimitSnapshot;
@@ -123,17 +126,68 @@ impl From<CoreCodexErrorInfo> for CodexErrorInfo {
    }
 }

-v2_enum_from_core!(
-    pub enum AskForApproval from codex_protocol::protocol::AskForApproval {
-        UnlessTrusted, OnFailure, OnRequest, Never
-    }
-);
+#[derive(Serialize, Deserialize, Debug, Clone, Copy, PartialEq, Eq, JsonSchema, TS)]
+#[serde(rename_all = "kebab-case")]
+#[ts(rename_all = "kebab-case", export_to = "v2/")]
+pub enum AskForApproval {
+    #[serde(rename = "untrusted")]
+    #[ts(rename = "untrusted")]
+    UnlessTrusted,
+    OnFailure,
+    OnRequest,
+    Never,
+}

-v2_enum_from_core!(
-    pub enum SandboxMode from codex_protocol::config_types::SandboxMode {
-        ReadOnly, WorkspaceWrite, DangerFullAccess
+impl AskForApproval {
+    pub fn to_core(self) -> CoreAskForApproval {
+        match self {
+            AskForApproval::UnlessTrusted => CoreAskForApproval::UnlessTrusted,
+            AskForApproval::OnFailure => CoreAskForApproval::OnFailure,
+            AskForApproval::OnRequest => CoreAskForApproval::OnRequest,
+            AskForApproval::Never => CoreAskForApproval::Never,
+        }
    }
-);
+}
+
+impl From<CoreAskForApproval> for AskForApproval {
+    fn from(value: CoreAskForApproval) -> Self {
+        match value {
+            CoreAskForApproval::UnlessTrusted => AskForApproval::UnlessTrusted,
+            CoreAskForApproval::OnFailure => AskForApproval::OnFailure,
+            CoreAskForApproval::OnRequest => AskForApproval::OnRequest,
+            CoreAskForApproval::Never => AskForApproval::Never,
+        }
+    }
+}
+
+#[derive(Serialize, Deserialize, Debug, Clone, Copy, PartialEq, Eq, JsonSchema, TS)]
+#[serde(rename_all = "kebab-case")]
+#[ts(rename_all = "kebab-case", export_to = "v2/")]
+pub enum SandboxMode {
+    ReadOnly,
+    WorkspaceWrite,
+    DangerFullAccess,
+}
+
+impl SandboxMode {
+    pub fn to_core(self) -> CoreSandboxMode {
+        match self {
+            SandboxMode::ReadOnly => CoreSandboxMode::ReadOnly,
+            SandboxMode::WorkspaceWrite => CoreSandboxMode::WorkspaceWrite,
+            SandboxMode::DangerFullAccess => CoreSandboxMode::DangerFullAccess,
+        }
+    }
+}
+
+impl From<CoreSandboxMode> for SandboxMode {
+    fn from(value: CoreSandboxMode) -> Self {
+        match value {
+            CoreSandboxMode::ReadOnly => SandboxMode::ReadOnly,
+            CoreSandboxMode::WorkspaceWrite => SandboxMode::WorkspaceWrite,
+            CoreSandboxMode::DangerFullAccess => SandboxMode::DangerFullAccess,
+        }
+    }
+}

 v2_enum_from_core!(
    pub enum ReviewDelivery from codex_protocol::protocol::ReviewDelivery {
@@ -160,6 +214,72 @@ pub enum ConfigLayerName {
    User,
 }

+#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Default, JsonSchema, TS)]
+#[serde(rename_all = "snake_case")]
+#[ts(export_to = "v2/")]
+pub struct SandboxWorkspaceWrite {
+    #[serde(default)]
+    pub writable_roots: Vec<PathBuf>,
+    #[serde(default)]
+    pub network_access: bool,
+    #[serde(default)]
+    pub exclude_tmpdir_env_var: bool,
+    #[serde(default)]
+    pub exclude_slash_tmp: bool,
+}
+
+#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
+#[serde(rename_all = "snake_case")]
+#[ts(export_to = "v2/")]
+pub struct ToolsV2 {
+    #[serde(alias = "web_search_request")]
+    pub web_search: Option<bool>,
+    pub view_image: Option<bool>,
+}
+
+#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
+#[serde(rename_all = "snake_case")]
+#[ts(export_to = "v2/")]
+pub struct ProfileV2 {
+    pub model: Option<String>,
+    pub model_provider: Option<String>,
+    pub approval_policy: Option<AskForApproval>,
+    pub model_reasoning_effort: Option<ReasoningEffort>,
+    pub model_reasoning_summary: Option<ReasoningSummary>,
+    pub model_verbosity: Option<Verbosity>,
+    pub chatgpt_base_url: Option<String>,
+    #[serde(default, flatten)]
+    pub additional: HashMap<String, JsonValue>,
+}
+
+#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
+#[serde(rename_all = "snake_case")]
+#[ts(export_to = "v2/")]
+pub struct Config {
+    pub model: Option<String>,
+    pub review_model: Option<String>,
+    pub model_context_window: Option<i64>,
+    pub model_auto_compact_token_limit: Option<i64>,
+    pub model_provider: Option<String>,
+    pub approval_policy: Option<AskForApproval>,
+    pub sandbox_mode: Option<SandboxMode>,
+    pub sandbox_workspace_write: Option<SandboxWorkspaceWrite>,
+    pub forced_chatgpt_workspace_id: Option<String>,
+    pub forced_login_method: Option<ForcedLoginMethod>,
+    pub tools: Option<ToolsV2>,
+    pub profile: Option<String>,
+    #[serde(default)]
+    pub profiles: HashMap<String, ProfileV2>,
+    pub instructions: Option<String>,
+    pub developer_instructions: Option<String>,
+    pub compact_prompt: Option<String>,
+    pub model_reasoning_effort: Option<ReasoningEffort>,
+    pub model_reasoning_summary: Option<ReasoningSummary>,
+    pub model_verbosity: Option<Verbosity>,
+    #[serde(default, flatten)]
+    pub additional: HashMap<String, JsonValue>,
+}
+
 #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
 #[serde(rename_all = "camelCase")]
 #[ts(export_to = "v2/")]
@@ -238,7 +358,7 @@ pub struct ConfigReadParams {
 #[serde(rename_all = "camelCase")]
 #[ts(export_to = "v2/")]
 pub struct ConfigReadResponse {
-    pub config: JsonValue,
+    pub config: Config,
    pub origins: HashMap<String, ConfigLayerMetadata>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub layers: Option<Vec<ConfigLayer>>,
@@ -275,14 +395,6 @@ pub struct ConfigEdit {
    pub merge_strategy: MergeStrategy,
 }

-v2_enum_from_core!(
-    pub enum CommandRiskLevel from codex_protocol::approvals::SandboxRiskLevel {
-        Low,
-        Medium,
-        High
-    }
-);
-
 #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, JsonSchema, TS)]
 #[serde(rename_all = "camelCase")]
 #[ts(export_to = "v2/")]
@@ -362,32 +474,6 @@ impl From<codex_protocol::protocol::SandboxPolicy> for SandboxPolicy {
    }
 }

-#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
-#[serde(rename_all = "camelCase")]
-#[ts(export_to = "v2/")]
-pub struct SandboxCommandAssessment {
-    pub description: String,
-    pub risk_level: CommandRiskLevel,
-}
-
-impl SandboxCommandAssessment {
-    pub fn into_core(self) -> CoreSandboxCommandAssessment {
-        CoreSandboxCommandAssessment {
-            description: self.description,
-            risk_level: self.risk_level.to_core(),
-        }
-    }
-}
-
-impl From<CoreSandboxCommandAssessment> for SandboxCommandAssessment {
-    fn from(value: CoreSandboxCommandAssessment) -> Self {
-        Self {
-            description: value.description,
-            risk_level: CommandRiskLevel::from(value.risk_level),
-        }
-    }
-}
-
 #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, JsonSchema, TS)]
 #[serde(transparent)]
 #[ts(type = "Array<string>", export_to = "v2/")]
@@ -582,10 +668,21 @@ pub struct CancelLoginAccountParams {
    pub login_id: String,
 }

-#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
+#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, JsonSchema, TS)]
+#[serde(rename_all = "camelCase")]
+#[ts(rename_all = "camelCase")]
+#[ts(export_to = "v2/")]
+pub enum CancelLoginAccountStatus {
+    Canceled,
+    NotFound,
+}
+
+#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, JsonSchema, TS)]
 #[serde(rename_all = "camelCase")]
 #[ts(export_to = "v2/")]
-pub struct CancelLoginAccountResponse {}
+pub struct CancelLoginAccountResponse {
+    pub status: CancelLoginAccountStatus,
+}

 #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
 #[serde(rename_all = "camelCase")]
@@ -688,6 +785,26 @@ pub struct ListMcpServersResponse {
    pub next_cursor: Option<String>,
 }

+#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
+#[serde(rename_all = "camelCase")]
+#[ts(export_to = "v2/")]
+pub struct McpServerOauthLoginParams {
+    pub name: String,
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    #[ts(optional)]
+    pub scopes: Option<Vec<String>>,
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    #[ts(optional)]
+    pub timeout_secs: Option<i64>,
+}
+
+#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
+#[serde(rename_all = "camelCase")]
+#[ts(export_to = "v2/")]
+pub struct McpServerOauthLoginResponse {
+    pub authorization_url: String,
+}
+
 #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
 #[serde(rename_all = "camelCase")]
 #[ts(export_to = "v2/")]
@@ -1437,6 +1554,17 @@ pub struct ReasoningTextDeltaNotification {
    pub content_index: i64,
 }

+#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
+#[serde(rename_all = "camelCase")]
+#[ts(export_to = "v2/")]
+pub struct TerminalInteractionNotification {
+    pub thread_id: String,
+    pub turn_id: String,
+    pub item_id: String,
+    pub process_id: String,
+    pub stdin: String,
+}
+
 #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
 #[serde(rename_all = "camelCase")]
 #[ts(export_to = "v2/")]
@@ -1467,6 +1595,17 @@ pub struct McpToolCallProgressNotification {
    pub message: String,
 }

+#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
+#[serde(rename_all = "camelCase")]
+#[ts(export_to = "v2/")]
+pub struct McpServerOauthLoginCompletedNotification {
+    pub name: String,
+    pub success: bool,
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    #[ts(optional)]
+    pub error: Option<String>,
+}
+
 #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
 #[serde(rename_all = "camelCase")]
 #[ts(export_to = "v2/")]
@@ -1493,8 +1632,6 @@ pub struct CommandExecutionRequestApprovalParams {
    pub item_id: String,
    /// Optional explanatory reason (e.g. request for network access).
    pub reason: Option<String>,
-    /// Optional model-provided risk assessment describing the blocked command.
-    pub risk: Option<SandboxCommandAssessment>,
    /// Optional proposed execpolicy amendment to allow similar commands without prompting.
    pub proposed_execpolicy_amendment: Option<ExecPolicyAmendment>,
 }
--- a/codex-rs/app-server-test-client/src/main.rs
+++ b/codex-rs/app-server-test-client/src/main.rs
@@ -553,6 +553,10 @@ impl CodexClient {
                    print!("{}", delta.delta);
                    std::io::stdout().flush().ok();
                }
+                ServerNotification::TerminalInteraction(delta) => {
+                    println!("[stdin sent: {}]", delta.stdin);
+                    std::io::stdout().flush().ok();
+                }
                ServerNotification::ItemStarted(payload) => {
                    println!("\n< item started: {:?}", payload.item);
                }
@@ -752,7 +756,6 @@ impl CodexClient {
            turn_id,
            item_id,
            reason,
-            risk,
            proposed_execpolicy_amendment,
        } = params;

@@ -762,9 +765,6 @@ impl CodexClient {
        if let Some(reason) = reason.as_deref() {
            println!("< reason: {reason}");
        }
-        if let Some(risk) = risk.as_ref() {
-            println!("< risk assessment: {risk:?}");
-        }
        if let Some(execpolicy_amendment) = proposed_execpolicy_amendment.as_ref() {
            println!("< proposed execpolicy amendment: {execpolicy_amendment:?}");
        }
--- a/codex-rs/app-server/Cargo.toml
+++ b/codex-rs/app-server/Cargo.toml
@@ -26,6 +26,7 @@ codex-login = { workspace = true }
 codex-protocol = { workspace = true }
 codex-app-server-protocol = { workspace = true }
 codex-feedback = { workspace = true }
+codex-rmcp-client = { workspace = true }
 codex-utils-json-to-toml = { workspace = true }
 chrono = { workspace = true }
 serde = { workspace = true, features = ["derive"] }
@@ -34,6 +35,7 @@ sha2 = { workspace = true }
 mcp-types = { workspace = true }
 tempfile = { workspace = true }
 toml = { workspace = true }
+toml_edit = { workspace = true }
 tokio = { workspace = true, features = [
    "io-std",
    "macros",
--- a/codex-rs/app-server/README.md
+++ b/codex-rs/app-server/README.md
@@ -65,6 +65,8 @@ Example (from OpenAI's official VSCode extension):
 - `review/start` — kick off Codex’s automated reviewer for a thread; responds like `turn/start` and emits `item/started`/`item/completed` notifications with `enteredReviewMode` and `exitedReviewMode` items, plus a final assistant `agentMessage` containing the review.
 - `command/exec` — run a single command under the server sandbox without starting a thread/turn (handy for utilities and validation).
 - `model/list` — list available models (with reasoning effort options).
+- `mcpServer/oauth/login` — start an OAuth login for a configured MCP server; returns an `authorization_url` and later emits `mcpServer/oauthLogin/completed` once the browser flow finishes.
+- `mcpServers/list` — enumerate configured MCP servers with their tools, resources, resource templates, and auth status; supports cursor+limit pagination.
 - `feedback/upload` — submit a feedback report (classification + optional reason/logs and conversation_id); returns the tracking thread id.
 - `command/exec` — run a single command under the server sandbox without starting a thread/turn (handy for utilities and validation).
 - `config/read` — fetch the effective config on disk after resolving config layering.
@@ -366,6 +368,8 @@ The JSON-RPC auth/account surface exposes request/response methods plus server-i
 - `account/logout` — sign out; triggers `account/updated`.
 - `account/updated` (notify) — emitted whenever auth mode changes (`authMode`: `apikey`, `chatgpt`, or `null`).
 - `account/rateLimits/read` — fetch ChatGPT rate limits; updates arrive via `account/rateLimits/updated` (notify).
+- `account/rateLimits/updated` (notify) — emitted whenever a user's ChatGPT rate limits change.
+- `mcpServer/oauthLogin/completed` (notify) — emitted after a `mcpServer/oauth/login` flow finishes for a server; payload includes `{ name, success, error? }`.

 ### 1) Check auth state

--- a/codex-rs/app-server/src/bespoke_event_handling.rs
+++ b/codex-rs/app-server/src/bespoke_event_handling.rs
@@ -34,9 +34,9 @@ use codex_app_server_protocol::PatchChangeKind as V2PatchChangeKind;
 use codex_app_server_protocol::ReasoningSummaryPartAddedNotification;
 use codex_app_server_protocol::ReasoningSummaryTextDeltaNotification;
 use codex_app_server_protocol::ReasoningTextDeltaNotification;
-use codex_app_server_protocol::SandboxCommandAssessment as V2SandboxCommandAssessment;
 use codex_app_server_protocol::ServerNotification;
 use codex_app_server_protocol::ServerRequestPayload;
+use codex_app_server_protocol::TerminalInteractionNotification;
 use codex_app_server_protocol::ThreadItem;
 use codex_app_server_protocol::ThreadTokenUsage;
 use codex_app_server_protocol::ThreadTokenUsageUpdatedNotification;
@@ -179,7 +179,6 @@ pub(crate) async fn apply_bespoke_event_handling(
            command,
            cwd,
            reason,
-            risk,
            proposed_execpolicy_amendment,
            parsed_cmd,
        }) => match api_version {
@@ -190,7 +189,6 @@ pub(crate) async fn apply_bespoke_event_handling(
                    command,
                    cwd,
                    reason,
-                    risk,
                    parsed_cmd,
                };
                let rx = outgoing
@@ -218,7 +216,6 @@ pub(crate) async fn apply_bespoke_event_handling(
                    // and emit the corresponding EventMsg, we repurpose the call_id as the item_id.
                    item_id: item_id.clone(),
                    reason,
-                    risk: risk.map(V2SandboxCommandAssessment::from),
                    proposed_execpolicy_amendment: proposed_execpolicy_amendment_v2,
                };
                let rx = outgoing
@@ -573,6 +570,20 @@ pub(crate) async fn apply_bespoke_event_handling(
                    .await;
            }
        }
+        EventMsg::TerminalInteraction(terminal_event) => {
+            let item_id = terminal_event.call_id.clone();
+
+            let notification = TerminalInteractionNotification {
+                thread_id: conversation_id.to_string(),
+                turn_id: event_turn_id.clone(),
+                item_id,
+                process_id: terminal_event.process_id,
+                stdin: terminal_event.stdin,
+            };
+            outgoing
+                .send_server_notification(ServerNotification::TerminalInteraction(notification))
+                .await;
+        }
        EventMsg::ExecCommandEnd(exec_command_end_event) => {
            let ExecCommandEndEvent {
                call_id,
@@ -1199,7 +1210,7 @@ async fn construct_mcp_tool_call_notification(
    }
 }

-/// simiilar to handle_mcp_tool_call_end in exec
+/// similar to handle_mcp_tool_call_end in exec
 async fn construct_mcp_tool_call_end_notification(
    end_event: McpToolCallEndEvent,
    thread_id: String,
--- a/codex-rs/app-server/src/codex_message_processor.rs
+++ b/codex-rs/app-server/src/codex_message_processor.rs
@@ -19,6 +19,7 @@ use codex_app_server_protocol::AuthMode;
 use codex_app_server_protocol::AuthStatusChangeNotification;
 use codex_app_server_protocol::CancelLoginAccountParams;
 use codex_app_server_protocol::CancelLoginAccountResponse;
+use codex_app_server_protocol::CancelLoginAccountStatus;
 use codex_app_server_protocol::CancelLoginChatGptResponse;
 use codex_app_server_protocol::ClientRequest;
 use codex_app_server_protocol::CommandExecParams;
@@ -55,6 +56,9 @@ use codex_app_server_protocol::LoginChatGptResponse;
 use codex_app_server_protocol::LogoutAccountResponse;
 use codex_app_server_protocol::LogoutChatGptResponse;
 use codex_app_server_protocol::McpServer;
+use codex_app_server_protocol::McpServerOauthLoginCompletedNotification;
+use codex_app_server_protocol::McpServerOauthLoginParams;
+use codex_app_server_protocol::McpServerOauthLoginResponse;
 use codex_app_server_protocol::ModelListParams;
 use codex_app_server_protocol::ModelListResponse;
 use codex_app_server_protocol::NewConversationParams;
@@ -115,6 +119,7 @@ use codex_core::config::Config;
 use codex_core::config::ConfigOverrides;
 use codex_core::config::ConfigToml;
 use codex_core::config::edit::ConfigEditsBuilder;
+use codex_core::config::types::McpServerTransportConfig;
 use codex_core::config_loader::load_config_as_toml;
 use codex_core::default_client::get_codex_user_agent;
 use codex_core::exec::ExecParams;
@@ -132,6 +137,7 @@ use codex_core::protocol::ReviewRequest;
 use codex_core::protocol::ReviewTarget as CoreReviewTarget;
 use codex_core::protocol::SessionConfiguredEvent;
 use codex_core::read_head_for_summary;
+use codex_core::sandboxing::SandboxPermissions;
 use codex_feedback::CodexFeedback;
 use codex_login::ServerOptions as LoginServerOptions;
 use codex_login::ShutdownHandle;
@@ -147,6 +153,7 @@ use codex_protocol::protocol::RolloutItem;
 use codex_protocol::protocol::SessionMetaLine;
 use codex_protocol::protocol::USER_MESSAGE_BEGIN;
 use codex_protocol::user_input::UserInput as CoreInputItem;
+use codex_rmcp_client::perform_oauth_login_return_url;
 use codex_utils_json_to_toml::json_to_toml;
 use std::collections::HashMap;
 use std::collections::HashSet;
@@ -161,6 +168,7 @@ use std::time::Duration;
 use tokio::select;
 use tokio::sync::Mutex;
 use tokio::sync::oneshot;
+use toml::Value as TomlValue;
 use tracing::error;
 use tracing::info;
 use tracing::warn;
@@ -178,6 +186,9 @@ pub(crate) struct TurnSummary {

 pub(crate) type TurnSummaryStore = Arc<Mutex<HashMap<ConversationId, TurnSummary>>>;

+const THREAD_LIST_DEFAULT_LIMIT: usize = 25;
+const THREAD_LIST_MAX_LIMIT: usize = 100;
+
 // Duration before a ChatGPT login attempt is abandoned.
 const LOGIN_CHATGPT_TIMEOUT: Duration = Duration::from_secs(10 * 60);
 struct ActiveLogin {
@@ -185,6 +196,11 @@ struct ActiveLogin {
    login_id: Uuid,
 }

+#[derive(Clone, Copy, Debug)]
+enum CancelLoginError {
+    NotFound(Uuid),
+}
+
 impl Drop for ActiveLogin {
    fn drop(&mut self) {
        self.shutdown_handle.shutdown();
@@ -198,6 +214,7 @@ pub(crate) struct CodexMessageProcessor {
    outgoing: Arc<OutgoingMessageSender>,
    codex_linux_sandbox_exe: Option<PathBuf>,
    config: Arc<Config>,
+    cli_overrides: Vec<(String, TomlValue)>,
    conversation_listeners: HashMap<Uuid, oneshot::Sender<()>>,
    active_login: Arc<Mutex<Option<ActiveLogin>>>,
    // Queue of pending interrupt requests per conversation. We reply when TurnAborted arrives.
@@ -244,6 +261,7 @@ impl CodexMessageProcessor {
        outgoing: Arc<OutgoingMessageSender>,
        codex_linux_sandbox_exe: Option<PathBuf>,
        config: Arc<Config>,
+        cli_overrides: Vec<(String, TomlValue)>,
        feedback: CodexFeedback,
    ) -> Self {
        Self {
@@ -252,6 +270,7 @@ impl CodexMessageProcessor {
            outgoing,
            codex_linux_sandbox_exe,
            config,
+            cli_overrides,
            conversation_listeners: HashMap::new(),
            active_login: Arc::new(Mutex::new(None)),
            pending_interrupts: Arc::new(Mutex::new(HashMap::new())),
@@ -261,6 +280,16 @@ impl CodexMessageProcessor {
        }
    }

+    async fn load_latest_config(&self) -> Result<Config, JSONRPCErrorError> {
+        Config::load_with_cli_overrides(self.cli_overrides.clone(), ConfigOverrides::default())
+            .await
+            .map_err(|err| JSONRPCErrorError {
+                code: INTERNAL_ERROR_CODE,
+                message: format!("failed to reload config: {err}"),
+                data: None,
+            })
+    }
+
    fn review_request_from_target(
        target: ApiReviewTarget,
    ) -> Result<(ReviewRequest, String), JSONRPCErrorError> {
@@ -369,6 +398,9 @@ impl CodexMessageProcessor {
            ClientRequest::ModelList { request_id, params } => {
                self.list_models(request_id, params).await;
            }
+            ClientRequest::McpServerOauthLogin { request_id, params } => {
+                self.mcp_server_oauth_login(request_id, params).await;
+            }
            ClientRequest::McpServersList { request_id, params } => {
                self.list_mcp_servers(request_id, params).await;
            }
@@ -802,7 +834,7 @@ impl CodexMessageProcessor {
    async fn cancel_login_chatgpt_common(
        &mut self,
        login_id: Uuid,
-    ) -> std::result::Result<(), JSONRPCErrorError> {
+    ) -> std::result::Result<(), CancelLoginError> {
        let mut guard = self.active_login.lock().await;
        if guard.as_ref().map(|l| l.login_id) == Some(login_id) {
            if let Some(active) = guard.take() {
@@ -810,11 +842,7 @@ impl CodexMessageProcessor {
            }
            Ok(())
        } else {
-            Err(JSONRPCErrorError {
-                code: INVALID_REQUEST_ERROR_CODE,
-                message: format!("login id not found: {login_id}"),
-                data: None,
-            })
+            Err(CancelLoginError::NotFound(login_id))
        }
    }

@@ -825,7 +853,12 @@ impl CodexMessageProcessor {
                    .send_response(request_id, CancelLoginChatGptResponse {})
                    .await;
            }
-            Err(error) => {
+            Err(CancelLoginError::NotFound(missing_login_id)) => {
+                let error = JSONRPCErrorError {
+                    code: INVALID_REQUEST_ERROR_CODE,
+                    message: format!("login id not found: {missing_login_id}"),
+                    data: None,
+                };
                self.outgoing.send_error(request_id, error).await;
            }
        }
@@ -834,16 +867,14 @@ impl CodexMessageProcessor {
    async fn cancel_login_v2(&mut self, request_id: RequestId, params: CancelLoginAccountParams) {
        let login_id = params.login_id;
        match Uuid::parse_str(&login_id) {
-            Ok(uuid) => match self.cancel_login_chatgpt_common(uuid).await {
-                Ok(()) => {
-                    self.outgoing
-                        .send_response(request_id, CancelLoginAccountResponse {})
-                        .await;
-                }
-                Err(error) => {
-                    self.outgoing.send_error(request_id, error).await;
-                }
-            },
+            Ok(uuid) => {
+                let status = match self.cancel_login_chatgpt_common(uuid).await {
+                    Ok(()) => CancelLoginAccountStatus::Canceled,
+                    Err(CancelLoginError::NotFound(_)) => CancelLoginAccountStatus::NotFound,
+                };
+                let response = CancelLoginAccountResponse { status };
+                self.outgoing.send_response(request_id, response).await;
+            }
            Err(_) => {
                let error = JSONRPCErrorError {
                    code: INVALID_REQUEST_ERROR_CODE,
@@ -1169,7 +1200,7 @@ impl CodexMessageProcessor {
            cwd,
            expiration: timeout_ms.into(),
            env,
-            with_escalated_permissions: None,
+            sandbox_permissions: SandboxPermissions::UseDefault,
            justification: None,
            arg0: None,
        };
@@ -1485,10 +1516,12 @@ impl CodexMessageProcessor {
            model_providers,
        } = params;

-        let page_size = limit.unwrap_or(25).max(1) as usize;
-
+        let requested_page_size = limit
+            .map(|value| value as usize)
+            .unwrap_or(THREAD_LIST_DEFAULT_LIMIT)
+            .clamp(1, THREAD_LIST_MAX_LIMIT);
        let (summaries, next_cursor) = match self
-            .list_conversations_common(page_size, cursor, model_providers)
+            .list_conversations_common(requested_page_size, cursor, model_providers)
            .await
        {
            Ok(r) => r,
@@ -1499,7 +1532,6 @@ impl CodexMessageProcessor {
        };

        let data = summaries.into_iter().map(summary_to_thread).collect();
-
        let response = ThreadListResponse { data, next_cursor };
        self.outgoing.send_response(request_id, response).await;
    }
@@ -1777,10 +1809,12 @@ impl CodexMessageProcessor {
            cursor,
            model_providers,
        } = params;
-        let page_size = page_size.unwrap_or(25).max(1);
+        let requested_page_size = page_size
+            .unwrap_or(THREAD_LIST_DEFAULT_LIMIT)
+            .clamp(1, THREAD_LIST_MAX_LIMIT);

        match self
-            .list_conversations_common(page_size, cursor, model_providers)
+            .list_conversations_common(requested_page_size, cursor, model_providers)
            .await
        {
            Ok((items, next_cursor)) => {
@@ -1795,12 +1829,15 @@ impl CodexMessageProcessor {

    async fn list_conversations_common(
        &self,
-        page_size: usize,
+        requested_page_size: usize,
        cursor: Option<String>,
        model_providers: Option<Vec<String>>,
    ) -> Result<(Vec<ConversationSummary>, Option<String>), JSONRPCErrorError> {
-        let cursor_obj: Option<RolloutCursor> = cursor.as_ref().and_then(|s| parse_cursor(s));
-        let cursor_ref = cursor_obj.as_ref();
+        let mut cursor_obj: Option<RolloutCursor> = cursor.as_ref().and_then(|s| parse_cursor(s));
+        let mut last_cursor = cursor_obj.clone();
+        let mut remaining = requested_page_size;
+        let mut items = Vec::with_capacity(requested_page_size);
+        let mut next_cursor: Option<String> = None;

        let model_provider_filter = match model_providers {
            Some(providers) => {
@@ -1814,55 +1851,76 @@ impl CodexMessageProcessor {
        };
        let fallback_provider = self.config.model_provider_id.clone();

-        let page = match RolloutRecorder::list_conversations(
-            &self.config.codex_home,
-            page_size,
-            cursor_ref,
-            INTERACTIVE_SESSION_SOURCES,
-            model_provider_filter.as_deref(),
-            fallback_provider.as_str(),
-        )
-        .await
-        {
-            Ok(p) => p,
-            Err(err) => {
-                return Err(JSONRPCErrorError {
-                    code: INTERNAL_ERROR_CODE,
-                    message: format!("failed to list conversations: {err}"),
-                    data: None,
-                });
+        while remaining > 0 {
+            let page_size = remaining.min(THREAD_LIST_MAX_LIMIT);
+            let page = RolloutRecorder::list_conversations(
+                &self.config.codex_home,
+                page_size,
+                cursor_obj.as_ref(),
+                INTERACTIVE_SESSION_SOURCES,
+                model_provider_filter.as_deref(),
+                fallback_provider.as_str(),
+            )
+            .await
+            .map_err(|err| JSONRPCErrorError {
+                code: INTERNAL_ERROR_CODE,
+                message: format!("failed to list conversations: {err}"),
+                data: None,
+            })?;
+
+            let mut filtered = page
+                .items
+                .into_iter()
+                .filter_map(|it| {
+                    let session_meta_line = it.head.first().and_then(|first| {
+                        serde_json::from_value::<SessionMetaLine>(first.clone()).ok()
+                    })?;
+                    extract_conversation_summary(
+                        it.path,
+                        &it.head,
+                        &session_meta_line.meta,
+                        session_meta_line.git.as_ref(),
+                        fallback_provider.as_str(),
+                    )
+                })
+                .collect::<Vec<_>>();
+            if filtered.len() > remaining {
+                filtered.truncate(remaining);
            }
-        };
+            items.extend(filtered);
+            remaining = requested_page_size.saturating_sub(items.len());

-        let items = page
-            .items
-            .into_iter()
-            .filter_map(|it| {
-                let session_meta_line = it.head.first().and_then(|first| {
-                    serde_json::from_value::<SessionMetaLine>(first.clone()).ok()
-                })?;
-                extract_conversation_summary(
-                    it.path,
-                    &it.head,
-                    &session_meta_line.meta,
-                    session_meta_line.git.as_ref(),
-                    fallback_provider.as_str(),
-                )
-            })
-            .collect::<Vec<_>>();
+            // Encode RolloutCursor into the JSON-RPC string form returned to clients.
+            let next_cursor_value = page.next_cursor.clone();
+            next_cursor = next_cursor_value
+                .as_ref()
+                .and_then(|cursor| serde_json::to_value(cursor).ok())
+                .and_then(|value| value.as_str().map(str::to_owned));
+            if remaining == 0 {
+                break;
+            }

-        // Encode next_cursor as a plain string
-        let next_cursor = page
-            .next_cursor
-            .and_then(|cursor| serde_json::to_value(&cursor).ok())
-            .and_then(|value| value.as_str().map(str::to_owned));
+            match next_cursor_value {
+                Some(cursor_val) if remaining > 0 => {
+                    // Break if our pagination would reuse the same cursor again; this avoids
+                    // an infinite loop when filtering drops everything on the page.
+                    if last_cursor.as_ref() == Some(&cursor_val) {
+                        next_cursor = None;
+                        break;
+                    }
+                    last_cursor = Some(cursor_val.clone());
+                    cursor_obj = Some(cursor_val);
+                }
+                _ => break,
+            }
+        }

        Ok((items, next_cursor))
    }

    async fn list_models(&self, request_id: RequestId, params: ModelListParams) {
        let ModelListParams { limit, cursor } = params;
-        let models = supported_models(self.conversation_manager.clone()).await;
+        let models = supported_models(self.conversation_manager.clone(), &self.config).await;
        let total = models.len();

        if total == 0 {
@@ -1916,6 +1974,110 @@ impl CodexMessageProcessor {
        self.outgoing.send_response(request_id, response).await;
    }

+    async fn mcp_server_oauth_login(
+        &self,
+        request_id: RequestId,
+        params: McpServerOauthLoginParams,
+    ) {
+        let config = match self.load_latest_config().await {
+            Ok(config) => config,
+            Err(error) => {
+                self.outgoing.send_error(request_id, error).await;
+                return;
+            }
+        };
+
+        if !config.features.enabled(Feature::RmcpClient) {
+            let error = JSONRPCErrorError {
+                code: INVALID_REQUEST_ERROR_CODE,
+                message: "OAuth login is only supported when [features].rmcp_client is true in config.toml".to_string(),
+                data: None,
+            };
+            self.outgoing.send_error(request_id, error).await;
+            return;
+        }
+
+        let McpServerOauthLoginParams {
+            name,
+            scopes,
+            timeout_secs,
+        } = params;
+
+        let Some(server) = config.mcp_servers.get(&name) else {
+            let error = JSONRPCErrorError {
+                code: INVALID_REQUEST_ERROR_CODE,
+                message: format!("No MCP server named '{name}' found."),
+                data: None,
+            };
+            self.outgoing.send_error(request_id, error).await;
+            return;
+        };
+
+        let (url, http_headers, env_http_headers) = match &server.transport {
+            McpServerTransportConfig::StreamableHttp {
+                url,
+                http_headers,
+                env_http_headers,
+                ..
+            } => (url.clone(), http_headers.clone(), env_http_headers.clone()),
+            _ => {
+                let error = JSONRPCErrorError {
+                    code: INVALID_REQUEST_ERROR_CODE,
+                    message: "OAuth login is only supported for streamable HTTP servers."
+                        .to_string(),
+                    data: None,
+                };
+                self.outgoing.send_error(request_id, error).await;
+                return;
+            }
+        };
+
+        match perform_oauth_login_return_url(
+            &name,
+            &url,
+            config.mcp_oauth_credentials_store_mode,
+            http_headers,
+            env_http_headers,
+            scopes.as_deref().unwrap_or_default(),
+            timeout_secs,
+        )
+        .await
+        {
+            Ok(handle) => {
+                let authorization_url = handle.authorization_url().to_string();
+                let notification_name = name.clone();
+                let outgoing = Arc::clone(&self.outgoing);
+
+                tokio::spawn(async move {
+                    let (success, error) = match handle.wait().await {
+                        Ok(()) => (true, None),
+                        Err(err) => (false, Some(err.to_string())),
+                    };
+
+                    let notification = ServerNotification::McpServerOauthLoginCompleted(
+                        McpServerOauthLoginCompletedNotification {
+                            name: notification_name,
+                            success,
+                            error,
+                        },
+                    );
+                    outgoing.send_server_notification(notification).await;
+                });
+
+                let response = McpServerOauthLoginResponse { authorization_url };
+                self.outgoing.send_response(request_id, response).await;
+            }
+            Err(err) => {
+                let error = JSONRPCErrorError {
+                    code: INTERNAL_ERROR_CODE,
+                    message: format!("failed to login to MCP server '{name}': {err}"),
+                    data: None,
+                };
+                self.outgoing.send_error(request_id, error).await;
+            }
+        }
+    }
+
    async fn list_mcp_servers(&self, request_id: RequestId, params: ListMcpServersParams) {
        let snapshot = collect_mcp_snapshot(self.config.as_ref()).await;

@@ -2669,7 +2831,7 @@ impl CodexMessageProcessor {
        })?;

        let mut config = self.config.as_ref().clone();
-        config.model = self.config.review_model.clone();
+        config.model = Some(self.config.review_model.clone());

        let NewConversation {
            conversation_id,
--- a/codex-rs/app-server/src/config_api.rs
+++ b/codex-rs/app-server/src/config_api.rs
@@ -1,6 +1,6 @@
 use crate::error_code::INTERNAL_ERROR_CODE;
 use crate::error_code::INVALID_REQUEST_ERROR_CODE;
-use anyhow::anyhow;
+use codex_app_server_protocol::Config;
 use codex_app_server_protocol::ConfigBatchWriteParams;
 use codex_app_server_protocol::ConfigLayer;
 use codex_app_server_protocol::ConfigLayerMetadata;
@@ -15,6 +15,8 @@ use codex_app_server_protocol::MergeStrategy;
 use codex_app_server_protocol::OverriddenMetadata;
 use codex_app_server_protocol::WriteStatus;
 use codex_core::config::ConfigToml;
+use codex_core::config::edit::ConfigEdit;
+use codex_core::config::edit::ConfigEditsBuilder;
 use codex_core::config_loader::LoadedConfigLayers;
 use codex_core::config_loader::LoaderOverrides;
 use codex_core::config_loader::load_config_layers_with_overrides;
@@ -26,9 +28,8 @@ use sha2::Sha256;
 use std::collections::HashMap;
 use std::path::Path;
 use std::path::PathBuf;
-use tempfile::NamedTempFile;
-use tokio::task;
 use toml::Value as TomlValue;
+use toml_edit::Item as TomlItem;

 const SESSION_FLAGS_SOURCE: &str = "--config";
 const MDM_SOURCE: &str = "com.openai.codex/config_toml_base64";
@@ -75,8 +76,10 @@ impl ConfigApi {
        let effective = layers.effective_config();
        validate_config(&effective).map_err(|err| internal_error("invalid configuration", err))?;

+        let config: Config = serde_json::from_value(to_json_value(&effective))
+            .map_err(|err| internal_error("failed to deserialize configuration", err))?;
        let response = ConfigReadResponse {
-            config: to_json_value(&effective),
+            config,
            origins: layers.origins(),
            layers: params.include_layers.then(|| layers.layers_high_to_low()),
        };
@@ -141,19 +144,20 @@ impl ConfigApi {
        }

        let mut user_config = layers.user.config.clone();
-        let mut mutated = false;
        let mut parsed_segments = Vec::new();
+        let mut config_edits = Vec::new();

        for (key_path, value, strategy) in edits.into_iter() {
            let segments = parse_key_path(&key_path).map_err(|message| {
                config_write_error(ConfigWriteErrorCode::ConfigValidationError, message)
            })?;
+            let original_value = value_at_path(&user_config, &segments).cloned();
            let parsed_value = parse_value(value).map_err(|message| {
                config_write_error(ConfigWriteErrorCode::ConfigValidationError, message)
            })?;

-            let changed = apply_merge(&mut user_config, &segments, parsed_value.as_ref(), strategy)
-                .map_err(|err| match err {
+            apply_merge(&mut user_config, &segments, parsed_value.as_ref(), strategy).map_err(
+                |err| match err {
                    MergeError::PathNotFound => config_write_error(
                        ConfigWriteErrorCode::ConfigPathNotFound,
                        "Path not found",
@@ -161,9 +165,24 @@ impl ConfigApi {
                    MergeError::Validation(message) => {
                        config_write_error(ConfigWriteErrorCode::ConfigValidationError, message)
                    }
-                })?;
+                },
+            )?;
+
+            let updated_value = value_at_path(&user_config, &segments).cloned();
+            if original_value != updated_value {
+                let edit = match updated_value {
+                    Some(value) => ConfigEdit::SetPath {
+                        segments: segments.clone(),
+                        value: toml_value_to_item(&value)
+                            .map_err(|err| internal_error("failed to build config edits", err))?,
+                    },
+                    None => ConfigEdit::ClearPath {
+                        segments: segments.clone(),
+                    },
+                };
+                config_edits.push(edit);
+            }

-            mutated |= changed;
            parsed_segments.push(segments);
        }

@@ -183,8 +202,10 @@ impl ConfigApi {
            )
        })?;

-        if mutated {
-            self.persist_user_config(&user_config)
+        if !config_edits.is_empty() {
+            ConfigEditsBuilder::new(&self.codex_home)
+                .with_edits(config_edits)
+                .apply()
                .await
                .map_err(|err| internal_error("failed to persist config.toml", err))?;
        }
@@ -253,25 +274,6 @@ impl ConfigApi {
            mdm,
        })
    }
-
-    async fn persist_user_config(&self, user_config: &TomlValue) -> anyhow::Result<()> {
-        let codex_home = self.codex_home.clone();
-        let serialized = toml::to_string_pretty(user_config)?;
-
-        task::spawn_blocking(move || -> anyhow::Result<()> {
-            std::fs::create_dir_all(&codex_home)?;
-
-            let target = codex_home.join(CONFIG_FILE_NAME);
-            let tmp = NamedTempFile::new_in(&codex_home)?;
-            std::fs::write(tmp.path(), serialized.as_bytes())?;
-            tmp.persist(&target)?;
-            Ok(())
-        })
-        .await
-        .map_err(|err| anyhow!("config persistence task panicked: {err}"))??;
-
-        Ok(())
-    }
 }

 fn parse_value(value: JsonValue) -> Result<Option<TomlValue>, String> {
@@ -422,6 +424,44 @@ fn clear_path(root: &mut TomlValue, segments: &[String]) -> Result<bool, MergeEr
    Ok(parent.remove(last).is_some())
 }

+fn toml_value_to_item(value: &TomlValue) -> anyhow::Result<TomlItem> {
+    match value {
+        TomlValue::Table(table) => {
+            let mut table_item = toml_edit::Table::new();
+            table_item.set_implicit(false);
+            for (key, val) in table {
+                table_item.insert(key, toml_value_to_item(val)?);
+            }
+            Ok(TomlItem::Table(table_item))
+        }
+        other => Ok(TomlItem::Value(toml_value_to_value(other)?)),
+    }
+}
+
+fn toml_value_to_value(value: &TomlValue) -> anyhow::Result<toml_edit::Value> {
+    match value {
+        TomlValue::String(val) => Ok(toml_edit::Value::from(val.clone())),
+        TomlValue::Integer(val) => Ok(toml_edit::Value::from(*val)),
+        TomlValue::Float(val) => Ok(toml_edit::Value::from(*val)),
+        TomlValue::Boolean(val) => Ok(toml_edit::Value::from(*val)),
+        TomlValue::Datetime(val) => Ok(toml_edit::Value::from(*val)),
+        TomlValue::Array(items) => {
+            let mut array = toml_edit::Array::new();
+            for item in items {
+                array.push(toml_value_to_value(item)?);
+            }
+            Ok(toml_edit::Value::Array(array))
+        }
+        TomlValue::Table(table) => {
+            let mut inline = toml_edit::InlineTable::new();
+            for (key, val) in table {
+                inline.insert(key, toml_value_to_value(val)?);
+            }
+            Ok(toml_edit::Value::InlineTable(inline))
+        }
+    }
+}
+
 #[derive(Clone)]
 struct LayerState {
    name: ConfigLayerName,
@@ -735,9 +775,105 @@ fn config_write_error(code: ConfigWriteErrorCode, message: impl Into<String>) ->
 #[cfg(test)]
 mod tests {
    use super::*;
+    use anyhow::Result;
+    use codex_app_server_protocol::AskForApproval;
    use pretty_assertions::assert_eq;
    use tempfile::tempdir;

+    #[test]
+    fn toml_value_to_item_handles_nested_config_tables() {
+        let config = r#"
+[mcp_servers.docs]
+command = "docs-server"
+
+[mcp_servers.docs.http_headers]
+X-Doc = "42"
+"#;
+
+        let value: TomlValue = toml::from_str(config).expect("parse config example");
+        let item = toml_value_to_item(&value).expect("convert to toml_edit item");
+
+        let root = item.as_table().expect("root table");
+        assert!(!root.is_implicit(), "root table should be explicit");
+
+        let mcp_servers = root
+            .get("mcp_servers")
+            .and_then(TomlItem::as_table)
+            .expect("mcp_servers table");
+        assert!(
+            !mcp_servers.is_implicit(),
+            "mcp_servers table should be explicit"
+        );
+
+        let docs = mcp_servers
+            .get("docs")
+            .and_then(TomlItem::as_table)
+            .expect("docs table");
+        assert_eq!(
+            docs.get("command")
+                .and_then(TomlItem::as_value)
+                .and_then(toml_edit::Value::as_str),
+            Some("docs-server")
+        );
+
+        let http_headers = docs
+            .get("http_headers")
+            .and_then(TomlItem::as_table)
+            .expect("http_headers table");
+        assert_eq!(
+            http_headers
+                .get("X-Doc")
+                .and_then(TomlItem::as_value)
+                .and_then(toml_edit::Value::as_str),
+            Some("42")
+        );
+    }
+
+    #[tokio::test]
+    async fn write_value_preserves_comments_and_order() -> Result<()> {
+        let tmp = tempdir().expect("tempdir");
+        let original = r#"# Codex user configuration
+model = "gpt-5"
+approval_policy = "on-request"
+
+[notice]
+# Preserve this comment
+hide_full_access_warning = true
+
+[features]
+unified_exec = true
+"#;
+        std::fs::write(tmp.path().join(CONFIG_FILE_NAME), original)?;
+
+        let api = ConfigApi::new(tmp.path().to_path_buf(), vec![]);
+        api.write_value(ConfigValueWriteParams {
+            file_path: Some(tmp.path().join(CONFIG_FILE_NAME).display().to_string()),
+            key_path: "features.remote_compaction".to_string(),
+            value: json!(true),
+            merge_strategy: MergeStrategy::Replace,
+            expected_version: None,
+        })
+        .await
+        .expect("write succeeds");
+
+        let updated =
+            std::fs::read_to_string(tmp.path().join(CONFIG_FILE_NAME)).expect("read config");
+        let expected = r#"# Codex user configuration
+model = "gpt-5"
+approval_policy = "on-request"
+
+[notice]
+# Preserve this comment
+hide_full_access_warning = true
+
+[features]
+unified_exec = true
+remote_compaction = true
+"#;
+        assert_eq!(updated, expected);
+        Ok(())
+    }
+
    #[tokio::test]
    async fn read_includes_origins_and_layers() {
        let tmp = tempdir().expect("tempdir");
@@ -763,10 +899,7 @@ mod tests {
            .await
            .expect("response");

-        assert_eq!(
-            response.config.get("approval_policy"),
-            Some(&json!("never"))
-        );
+        assert_eq!(response.config.approval_policy, Some(AskForApproval::Never));

        assert_eq!(
            response
@@ -821,8 +954,10 @@ mod tests {
            })
            .await
            .expect("read");
-        let config_object = read_after.config.as_object().expect("object");
-        assert_eq!(config_object.get("approval_policy"), Some(&json!("never")));
+        assert_eq!(
+            read_after.config.approval_policy,
+            Some(AskForApproval::Never)
+        );
        assert_eq!(
            read_after
                .origins
@@ -961,7 +1096,7 @@ mod tests {
            .await
            .expect("response");

-        assert_eq!(response.config.get("model"), Some(&json!("system")));
+        assert_eq!(response.config.model.as_deref(), Some("system"));
        assert_eq!(
            response.origins.get("model").expect("origin").name,
            ConfigLayerName::System
--- a/codex-rs/app-server/src/message_processor.rs
+++ b/codex-rs/app-server/src/message_processor.rs
@@ -59,6 +59,7 @@ impl MessageProcessor {
            outgoing.clone(),
            codex_linux_sandbox_exe,
            Arc::clone(&config),
+            cli_overrides.clone(),
            feedback,
        );
        let config_api = ConfigApi::new(config.codex_home.clone(), cli_overrides);
--- a/codex-rs/app-server/src/models.rs
+++ b/codex-rs/app-server/src/models.rs
@@ -3,12 +3,16 @@ use std::sync::Arc;
 use codex_app_server_protocol::Model;
 use codex_app_server_protocol::ReasoningEffortOption;
 use codex_core::ConversationManager;
+use codex_core::config::Config;
 use codex_protocol::openai_models::ModelPreset;
 use codex_protocol::openai_models::ReasoningEffortPreset;

-pub async fn supported_models(conversation_manager: Arc<ConversationManager>) -> Vec<Model> {
+pub async fn supported_models(
+    conversation_manager: Arc<ConversationManager>,
+    config: &Config,
+) -> Vec<Model> {
    conversation_manager
-        .list_models()
+        .list_models(config)
        .await
        .into_iter()
        .map(model_from_preset)
--- a/codex-rs/app-server/tests/common/lib.rs
+++ b/codex-rs/app-server/tests/common/lib.rs
@@ -1,6 +1,7 @@
 mod auth_fixtures;
 mod mcp_process;
 mod mock_model_server;
+mod models_cache;
 mod responses;
 mod rollout;

@@ -11,9 +12,13 @@ pub use auth_fixtures::write_chatgpt_auth;
 use codex_app_server_protocol::JSONRPCResponse;
 pub use core_test_support::format_with_current_shell;
 pub use core_test_support::format_with_current_shell_display;
+pub use core_test_support::format_with_current_shell_display_non_login;
+pub use core_test_support::format_with_current_shell_non_login;
 pub use mcp_process::McpProcess;
 pub use mock_model_server::create_mock_chat_completions_server;
 pub use mock_model_server::create_mock_chat_completions_server_unchecked;
+pub use models_cache::write_models_cache;
+pub use models_cache::write_models_cache_with_models;
 pub use responses::create_apply_patch_sse_response;
 pub use responses::create_exec_command_sse_response;
 pub use responses::create_final_assistant_message_sse_response;
--- a/codex-rs/app-server/tests/common/models_cache.rs
+++ b/codex-rs/app-server/tests/common/models_cache.rs
@@ -0,0 +1,74 @@
+use chrono::DateTime;
+use chrono::Utc;
+use codex_core::openai_models::model_presets::all_model_presets;
+use codex_protocol::openai_models::ClientVersion;
+use codex_protocol::openai_models::ConfigShellToolType;
+use codex_protocol::openai_models::ModelInfo;
+use codex_protocol::openai_models::ModelPreset;
+use codex_protocol::openai_models::ModelVisibility;
+use serde_json::json;
+use std::path::Path;
+
+/// Convert a ModelPreset to ModelInfo for cache storage.
+fn preset_to_info(preset: &ModelPreset, priority: i32) -> ModelInfo {
+    ModelInfo {
+        slug: preset.id.clone(),
+        display_name: preset.display_name.clone(),
+        description: Some(preset.description.clone()),
+        default_reasoning_level: preset.default_reasoning_effort,
+        supported_reasoning_levels: preset.supported_reasoning_efforts.clone(),
+        shell_type: ConfigShellToolType::ShellCommand,
+        visibility: if preset.show_in_picker {
+            ModelVisibility::List
+        } else {
+            ModelVisibility::Hide
+        },
+        minimal_client_version: ClientVersion(0, 1, 0),
+        supported_in_api: true,
+        priority,
+        upgrade: preset.upgrade.as_ref().map(|u| u.id.clone()),
+        base_instructions: None,
+    }
+}
+
+/// Write a models_cache.json file to the codex home directory.
+/// This prevents ModelsManager from making network requests to refresh models.
+/// The cache will be treated as fresh (within TTL) and used instead of fetching from the network.
+/// Uses the built-in model presets from ModelsManager, converted to ModelInfo format.
+pub fn write_models_cache(codex_home: &Path) -> std::io::Result<()> {
+    // Get all presets and filter for show_in_picker (same as builtin_model_presets does)
+    let presets: Vec<&ModelPreset> = all_model_presets()
+        .iter()
+        .filter(|preset| preset.show_in_picker)
+        .collect();
+    // Convert presets to ModelInfo, assigning priorities (higher = earlier in list)
+    // Priority is used for sorting, so first model gets highest priority
+    let models: Vec<ModelInfo> = presets
+        .iter()
+        .enumerate()
+        .map(|(idx, preset)| {
+            // Higher priority = earlier in list, so reverse the index
+            let priority = (presets.len() - idx) as i32;
+            preset_to_info(preset, priority)
+        })
+        .collect();
+
+    write_models_cache_with_models(codex_home, models)
+}
+
+/// Write a models_cache.json file with specific models.
+/// Useful when tests need specific models to be available.
+pub fn write_models_cache_with_models(
+    codex_home: &Path,
+    models: Vec<ModelInfo>,
+) -> std::io::Result<()> {
+    let cache_path = codex_home.join("models_cache.json");
+    // DateTime<Utc> serializes to RFC3339 format by default with serde
+    let fetched_at: DateTime<Utc> = Utc::now();
+    let cache = json!({
+        "fetched_at": fetched_at,
+        "etag": null,
+        "models": models
+    });
+    std::fs::write(cache_path, serde_json::to_string_pretty(&cache)?)
+}
--- a/codex-rs/app-server/tests/suite/codex_message_processor_flow.rs
+++ b/codex-rs/app-server/tests/suite/codex_message_processor_flow.rs
@@ -271,7 +271,6 @@ async fn test_send_user_turn_changes_approval_policy_behavior() -> Result<()> {
            command: format_with_current_shell("python3 -c 'print(42)'"),
            cwd: working_directory.clone(),
            reason: None,
-            risk: None,
            parsed_cmd: vec![ParsedCommand::Unknown {
                cmd: "python3 -c 'print(42)'".to_string()
            }],
--- a/codex-rs/app-server/tests/suite/list_resume.rs
+++ b/codex-rs/app-server/tests/suite/list_resume.rs
@@ -358,3 +358,81 @@ async fn test_list_and_resume_conversations() -> Result<()> {

    Ok(())
 }
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn list_conversations_fetches_through_filtered_pages() -> Result<()> {
+    let codex_home = TempDir::new()?;
+
+    // Only the last 3 conversations match the provider filter; request 3 and
+    // ensure pagination keeps fetching past non-matching pages.
+    let cases = [
+        (
+            "2025-03-04T12-00-00",
+            "2025-03-04T12:00:00Z",
+            "skip_provider",
+        ),
+        (
+            "2025-03-03T12-00-00",
+            "2025-03-03T12:00:00Z",
+            "skip_provider",
+        ),
+        (
+            "2025-03-02T12-00-00",
+            "2025-03-02T12:00:00Z",
+            "target_provider",
+        ),
+        (
+            "2025-03-01T12-00-00",
+            "2025-03-01T12:00:00Z",
+            "target_provider",
+        ),
+        (
+            "2025-02-28T12-00-00",
+            "2025-02-28T12:00:00Z",
+            "target_provider",
+        ),
+    ];
+
+    for (ts_file, ts_rfc, provider) in cases {
+        create_fake_rollout(
+            codex_home.path(),
+            ts_file,
+            ts_rfc,
+            "Hello",
+            Some(provider),
+            None,
+        )?;
+    }
+
+    let mut mcp = McpProcess::new(codex_home.path()).await?;
+    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;
+
+    let req_id = mcp
+        .send_list_conversations_request(ListConversationsParams {
+            page_size: Some(3),
+            cursor: None,
+            model_providers: Some(vec!["target_provider".to_string()]),
+        })
+        .await?;
+    let resp: JSONRPCResponse = timeout(
+        DEFAULT_READ_TIMEOUT,
+        mcp.read_stream_until_response_message(RequestId::Integer(req_id)),
+    )
+    .await??;
+    let ListConversationsResponse { items, next_cursor } =
+        to_response::<ListConversationsResponse>(resp)?;
+
+    assert_eq!(
+        items.len(),
+        3,
+        "should fetch across pages to satisfy the limit"
+    );
+    assert!(
+        items
+            .iter()
+            .all(|item| item.model_provider == "target_provider")
+    );
+    assert_eq!(next_cursor, None);
+
+    Ok(())
+}
--- a/codex-rs/app-server/tests/suite/login.rs
+++ b/codex-rs/app-server/tests/suite/login.rs
@@ -1,8 +1,6 @@
 use anyhow::Result;
 use app_test_support::McpProcess;
 use app_test_support::to_response;
-use codex_app_server_protocol::CancelLoginChatGptParams;
-use codex_app_server_protocol::CancelLoginChatGptResponse;
 use codex_app_server_protocol::GetAuthStatusParams;
 use codex_app_server_protocol::GetAuthStatusResponse;
 use codex_app_server_protocol::JSONRPCError;
@@ -14,7 +12,6 @@ use codex_core::auth::AuthCredentialsStoreMode;
 use codex_login::login_with_api_key;
 use serial_test::serial;
 use std::path::Path;
-use std::time::Duration;
 use tempfile::TempDir;
 use tokio::time::timeout;

@@ -87,48 +84,6 @@ async fn logout_chatgpt_removes_auth() -> Result<()> {
    Ok(())
 }

-#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-// Serialize tests that launch the login server since it binds to a fixed port.
-#[serial(login_port)]
-async fn login_and_cancel_chatgpt() -> Result<()> {
-    let codex_home = TempDir::new()?;
-    create_config_toml(codex_home.path())?;
-
-    let mut mcp = McpProcess::new(codex_home.path()).await?;
-    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;
-
-    let login_id = mcp.send_login_chat_gpt_request().await?;
-    let login_resp: JSONRPCResponse = timeout(
-        DEFAULT_READ_TIMEOUT,
-        mcp.read_stream_until_response_message(RequestId::Integer(login_id)),
-    )
-    .await??;
-    let login: LoginChatGptResponse = to_response(login_resp)?;
-
-    let cancel_id = mcp
-        .send_cancel_login_chat_gpt_request(CancelLoginChatGptParams {
-            login_id: login.login_id,
-        })
-        .await?;
-    let cancel_resp: JSONRPCResponse = timeout(
-        DEFAULT_READ_TIMEOUT,
-        mcp.read_stream_until_response_message(RequestId::Integer(cancel_id)),
-    )
-    .await??;
-    let _ok: CancelLoginChatGptResponse = to_response(cancel_resp)?;
-
-    // Optionally observe the completion notification; do not fail if it races.
-    let maybe_note = timeout(
-        Duration::from_secs(2),
-        mcp.read_stream_until_notification_message("codex/event/login_chat_gpt_complete"),
-    )
-    .await;
-    if maybe_note.is_err() {
-        eprintln!("warning: did not observe login_chat_gpt_complete notification after cancel");
-    }
-    Ok(())
-}
-
 fn create_config_toml_forced_login(codex_home: &Path, forced_method: &str) -> std::io::Result<()> {
    let config_toml = codex_home.join("config.toml");
    let contents = format!(
--- a/codex-rs/app-server/tests/suite/v2/account.rs
+++ b/codex-rs/app-server/tests/suite/v2/account.rs
@@ -241,7 +241,7 @@ async fn login_account_chatgpt_rejected_when_forced_api() -> Result<()> {
 #[tokio::test]
 // Serialize tests that launch the login server since it binds to a fixed port.
 #[serial(login_port)]
-async fn login_account_chatgpt_start() -> Result<()> {
+async fn login_account_chatgpt_start_can_be_cancelled() -> Result<()> {
    let codex_home = TempDir::new()?;
    create_config_toml(codex_home.path(), CreateConfigTomlParams::default())?;

--- a/codex-rs/app-server/tests/suite/v2/config_rpc.rs
+++ b/codex-rs/app-server/tests/suite/v2/config_rpc.rs
@@ -1,6 +1,7 @@
 use anyhow::Result;
 use app_test_support::McpProcess;
 use app_test_support::to_response;
+use codex_app_server_protocol::AskForApproval;
 use codex_app_server_protocol::ConfigBatchWriteParams;
 use codex_app_server_protocol::ConfigEdit;
 use codex_app_server_protocol::ConfigLayerName;
@@ -12,9 +13,12 @@ use codex_app_server_protocol::JSONRPCError;
 use codex_app_server_protocol::JSONRPCResponse;
 use codex_app_server_protocol::MergeStrategy;
 use codex_app_server_protocol::RequestId;
+use codex_app_server_protocol::SandboxMode;
+use codex_app_server_protocol::ToolsV2;
 use codex_app_server_protocol::WriteStatus;
 use pretty_assertions::assert_eq;
 use serde_json::json;
+use std::path::PathBuf;
 use tempfile::TempDir;
 use tokio::time::timeout;

@@ -57,7 +61,7 @@ sandbox_mode = "workspace-write"
        layers,
    } = to_response(resp)?;

-    assert_eq!(config.get("model"), Some(&json!("gpt-user")));
+    assert_eq!(config.model.as_deref(), Some("gpt-user"));
    assert_eq!(
        origins.get("model").expect("origin").name,
        ConfigLayerName::User
@@ -70,6 +74,64 @@ sandbox_mode = "workspace-write"
    Ok(())
 }

+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn config_read_includes_tools() -> Result<()> {
+    let codex_home = TempDir::new()?;
+    write_config(
+        &codex_home,
+        r#"
+model = "gpt-user"
+
+[tools]
+web_search = true
+view_image = false
+"#,
+    )?;
+
+    let mut mcp = McpProcess::new(codex_home.path()).await?;
+    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;
+
+    let request_id = mcp
+        .send_config_read_request(ConfigReadParams {
+            include_layers: true,
+        })
+        .await?;
+    let resp: JSONRPCResponse = timeout(
+        DEFAULT_READ_TIMEOUT,
+        mcp.read_stream_until_response_message(RequestId::Integer(request_id)),
+    )
+    .await??;
+    let ConfigReadResponse {
+        config,
+        origins,
+        layers,
+    } = to_response(resp)?;
+
+    let tools = config.tools.expect("tools present");
+    assert_eq!(
+        tools,
+        ToolsV2 {
+            web_search: Some(true),
+            view_image: Some(false),
+        }
+    );
+    assert_eq!(
+        origins.get("tools.web_search").expect("origin").name,
+        ConfigLayerName::User
+    );
+    assert_eq!(
+        origins.get("tools.view_image").expect("origin").name,
+        ConfigLayerName::User
+    );
+
+    let layers = layers.expect("layers present");
+    assert_eq!(layers.len(), 2);
+    assert_eq!(layers[0].name, ConfigLayerName::SessionFlags);
+    assert_eq!(layers[1].name, ConfigLayerName::User);
+
+    Ok(())
+}
+
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn config_read_includes_system_layer_and_overrides() -> Result<()> {
    let codex_home = TempDir::new()?;
@@ -123,30 +185,29 @@ writable_roots = ["/system"]
        layers,
    } = to_response(resp)?;

-    assert_eq!(config.get("model"), Some(&json!("gpt-system")));
+    assert_eq!(config.model.as_deref(), Some("gpt-system"));
    assert_eq!(
        origins.get("model").expect("origin").name,
        ConfigLayerName::System
    );

-    assert_eq!(config.get("approval_policy"), Some(&json!("never")));
+    assert_eq!(config.approval_policy, Some(AskForApproval::Never));
    assert_eq!(
        origins.get("approval_policy").expect("origin").name,
        ConfigLayerName::System
    );

-    assert_eq!(config.get("sandbox_mode"), Some(&json!("workspace-write")));
+    assert_eq!(config.sandbox_mode, Some(SandboxMode::WorkspaceWrite));
    assert_eq!(
        origins.get("sandbox_mode").expect("origin").name,
        ConfigLayerName::User
    );

-    assert_eq!(
-        config
-            .get("sandbox_workspace_write")
-            .and_then(|v| v.get("writable_roots")),
-        Some(&json!(["/system"]))
-    );
+    let sandbox = config
+        .sandbox_workspace_write
+        .as_ref()
+        .expect("sandbox workspace write");
+    assert_eq!(sandbox.writable_roots, vec![PathBuf::from("/system")]);
    assert_eq!(
        origins
            .get("sandbox_workspace_write.writable_roots.0")
@@ -155,12 +216,7 @@ writable_roots = ["/system"]
        ConfigLayerName::System
    );

-    assert_eq!(
-        config
-            .get("sandbox_workspace_write")
-            .and_then(|v| v.get("network_access")),
-        Some(&json!(true))
-    );
+    assert!(sandbox.network_access);
    assert_eq!(
        origins
            .get("sandbox_workspace_write.network_access")
@@ -242,7 +298,7 @@ model = "gpt-old"
    )
    .await??;
    let verify: ConfigReadResponse = to_response(verify_resp)?;
-    assert_eq!(verify.config.get("model"), Some(&json!("gpt-new")));
+    assert_eq!(verify.config.model.as_deref(), Some("gpt-new"));

    Ok(())
 }
@@ -342,22 +398,14 @@ async fn config_batch_write_applies_multiple_edits() -> Result<()> {
    )
    .await??;
    let read: ConfigReadResponse = to_response(read_resp)?;
-    assert_eq!(
-        read.config.get("sandbox_mode"),
-        Some(&json!("workspace-write"))
-    );
-    assert_eq!(
-        read.config
-            .get("sandbox_workspace_write")
-            .and_then(|v| v.get("writable_roots")),
-        Some(&json!(["/tmp"]))
-    );
-    assert_eq!(
-        read.config
-            .get("sandbox_workspace_write")
-            .and_then(|v| v.get("network_access")),
-        Some(&json!(false))
-    );
+    assert_eq!(read.config.sandbox_mode, Some(SandboxMode::WorkspaceWrite));
+    let sandbox = read
+        .config
+        .sandbox_workspace_write
+        .as_ref()
+        .expect("sandbox workspace write");
+    assert_eq!(sandbox.writable_roots, vec![PathBuf::from("/tmp")]);
+    assert!(!sandbox.network_access);

    Ok(())
 }
--- a/codex-rs/app-server/tests/suite/v2/model_list.rs
+++ b/codex-rs/app-server/tests/suite/v2/model_list.rs
@@ -4,6 +4,7 @@ use anyhow::Result;
 use anyhow::anyhow;
 use app_test_support::McpProcess;
 use app_test_support::to_response;
+use app_test_support::write_models_cache;
 use codex_app_server_protocol::JSONRPCError;
 use codex_app_server_protocol::JSONRPCResponse;
 use codex_app_server_protocol::Model;
@@ -22,6 +23,7 @@ const INVALID_REQUEST_ERROR_CODE: i64 = -32600;
 #[tokio::test]
 async fn list_models_returns_all_models_with_large_limit() -> Result<()> {
    let codex_home = TempDir::new()?;
+    write_models_cache(codex_home.path())?;
    let mut mcp = McpProcess::new(codex_home.path()).await?;

    timeout(DEFAULT_TIMEOUT, mcp.initialize()).await??;
@@ -114,6 +116,37 @@ async fn list_models_returns_all_models_with_large_limit() -> Result<()> {
            default_reasoning_effort: ReasoningEffort::Medium,
            is_default: false,
        },
+        Model {
+            id: "robin".to_string(),
+            model: "robin".to_string(),
+            display_name: "robin".to_string(),
+            description: "Robin".to_string(),
+            supported_reasoning_efforts: vec![
+                ReasoningEffortOption {
+                    reasoning_effort: ReasoningEffort::Low,
+                    description: "Balances speed with some reasoning; useful for straightforward \
+                                   queries and short explanations"
+                        .to_string(),
+                },
+                ReasoningEffortOption {
+                    reasoning_effort: ReasoningEffort::Medium,
+                    description: "Provides a solid balance of reasoning depth and latency for \
+                         general-purpose tasks"
+                        .to_string(),
+                },
+                ReasoningEffortOption {
+                    reasoning_effort: ReasoningEffort::High,
+                    description: "Maximizes reasoning depth for complex or ambiguous problems"
+                        .to_string(),
+                },
+                ReasoningEffortOption {
+                    reasoning_effort: ReasoningEffort::XHigh,
+                    description: "Extra high reasoning for complex problems".to_string(),
+                },
+            ],
+            default_reasoning_effort: ReasoningEffort::Medium,
+            is_default: false,
+        },
        Model {
            id: "gpt-5.1".to_string(),
            model: "gpt-5.1".to_string(),
@@ -151,6 +184,7 @@ async fn list_models_returns_all_models_with_large_limit() -> Result<()> {
 #[tokio::test]
 async fn list_models_pagination_works() -> Result<()> {
    let codex_home = TempDir::new()?;
+    write_models_cache(codex_home.path())?;
    let mut mcp = McpProcess::new(codex_home.path()).await?;

    timeout(DEFAULT_TIMEOUT, mcp.initialize()).await??;
@@ -240,14 +274,37 @@ async fn list_models_pagination_works() -> Result<()> {
    } = to_response::<ModelListResponse>(fourth_response)?;

    assert_eq!(fourth_items.len(), 1);
-    assert_eq!(fourth_items[0].id, "gpt-5.1");
-    assert!(fourth_cursor.is_none());
+    assert_eq!(fourth_items[0].id, "robin");
+    let fifth_cursor = fourth_cursor.ok_or_else(|| anyhow!("cursor for fifth page"))?;
+
+    let fifth_request = mcp
+        .send_list_models_request(ModelListParams {
+            limit: Some(1),
+            cursor: Some(fifth_cursor.clone()),
+        })
+        .await?;
+
+    let fifth_response: JSONRPCResponse = timeout(
+        DEFAULT_TIMEOUT,
+        mcp.read_stream_until_response_message(RequestId::Integer(fifth_request)),
+    )
+    .await??;
+
+    let ModelListResponse {
+        data: fifth_items,
+        next_cursor: fifth_cursor,
+    } = to_response::<ModelListResponse>(fifth_response)?;
+
+    assert_eq!(fifth_items.len(), 1);
+    assert_eq!(fifth_items[0].id, "gpt-5.1");
+    assert!(fifth_cursor.is_none());
    Ok(())
 }

 #[tokio::test]
 async fn list_models_rejects_invalid_cursor() -> Result<()> {
    let codex_home = TempDir::new()?;
+    write_models_cache(codex_home.path())?;
    let mut mcp = McpProcess::new(codex_home.path()).await?;

    timeout(DEFAULT_TIMEOUT, mcp.initialize()).await??;
--- a/codex-rs/app-server/tests/suite/v2/thread_list.rs
+++ b/codex-rs/app-server/tests/suite/v2/thread_list.rs
@@ -6,37 +6,96 @@ use codex_app_server_protocol::GitInfo as ApiGitInfo;
 use codex_app_server_protocol::JSONRPCResponse;
 use codex_app_server_protocol::RequestId;
 use codex_app_server_protocol::SessionSource;
-use codex_app_server_protocol::ThreadListParams;
 use codex_app_server_protocol::ThreadListResponse;
 use codex_protocol::protocol::GitInfo as CoreGitInfo;
+use std::path::Path;
 use std::path::PathBuf;
 use tempfile::TempDir;
 use tokio::time::timeout;

 const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(10);

+async fn init_mcp(codex_home: &Path) -> Result<McpProcess> {
+    let mut mcp = McpProcess::new(codex_home).await?;
+    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;
+    Ok(mcp)
+}
+
+async fn list_threads(
+    mcp: &mut McpProcess,
+    cursor: Option<String>,
+    limit: Option<u32>,
+    providers: Option<Vec<String>>,
+) -> Result<ThreadListResponse> {
+    let request_id = mcp
+        .send_thread_list_request(codex_app_server_protocol::ThreadListParams {
+            cursor,
+            limit,
+            model_providers: providers,
+        })
+        .await?;
+    let resp: JSONRPCResponse = timeout(
+        DEFAULT_READ_TIMEOUT,
+        mcp.read_stream_until_response_message(RequestId::Integer(request_id)),
+    )
+    .await??;
+    to_response::<ThreadListResponse>(resp)
+}
+
+fn create_fake_rollouts<F, G>(
+    codex_home: &Path,
+    count: usize,
+    provider_for_index: F,
+    timestamp_for_index: G,
+    preview: &str,
+) -> Result<Vec<String>>
+where
+    F: Fn(usize) -> &'static str,
+    G: Fn(usize) -> (String, String),
+{
+    let mut ids = Vec::with_capacity(count);
+    for i in 0..count {
+        let (ts_file, ts_rfc) = timestamp_for_index(i);
+        ids.push(create_fake_rollout(
+            codex_home,
+            &ts_file,
+            &ts_rfc,
+            preview,
+            Some(provider_for_index(i)),
+            None,
+        )?);
+    }
+    Ok(ids)
+}
+
+fn timestamp_at(
+    year: i32,
+    month: u32,
+    day: u32,
+    hour: u32,
+    minute: u32,
+    second: u32,
+) -> (String, String) {
+    (
+        format!("{year:04}-{month:02}-{day:02}T{hour:02}-{minute:02}-{second:02}"),
+        format!("{year:04}-{month:02}-{day:02}T{hour:02}:{minute:02}:{second:02}Z"),
+    )
+}
+
 #[tokio::test]
 async fn thread_list_basic_empty() -> Result<()> {
    let codex_home = TempDir::new()?;
    create_minimal_config(codex_home.path())?;

-    let mut mcp = McpProcess::new(codex_home.path()).await?;
-    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;
+    let mut mcp = init_mcp(codex_home.path()).await?;

-    // List threads in an empty CODEX_HOME; should return an empty page with nextCursor: null.
-    let list_id = mcp
-        .send_thread_list_request(ThreadListParams {
-            cursor: None,
-            limit: Some(10),
-            model_providers: Some(vec!["mock_provider".to_string()]),
-        })
-        .await?;
-    let list_resp: JSONRPCResponse = timeout(
-        DEFAULT_READ_TIMEOUT,
-        mcp.read_stream_until_response_message(RequestId::Integer(list_id)),
+    let ThreadListResponse { data, next_cursor } = list_threads(
+        &mut mcp,
+        None,
+        Some(10),
+        Some(vec!["mock_provider".to_string()]),
    )
-    .await??;
-    let ThreadListResponse { data, next_cursor } = to_response::<ThreadListResponse>(list_resp)?;
+    .await?;
    assert!(data.is_empty());
    assert_eq!(next_cursor, None);

@@ -86,26 +145,19 @@ async fn thread_list_pagination_next_cursor_none_on_last_page() -> Result<()> {
        None,
    )?;

-    let mut mcp = McpProcess::new(codex_home.path()).await?;
-    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;
+    let mut mcp = init_mcp(codex_home.path()).await?;

    // Page 1: limit 2 → expect next_cursor Some.
-    let page1_id = mcp
-        .send_thread_list_request(ThreadListParams {
-            cursor: None,
-            limit: Some(2),
-            model_providers: Some(vec!["mock_provider".to_string()]),
-        })
-        .await?;
-    let page1_resp: JSONRPCResponse = timeout(
-        DEFAULT_READ_TIMEOUT,
-        mcp.read_stream_until_response_message(RequestId::Integer(page1_id)),
-    )
-    .await??;
    let ThreadListResponse {
        data: data1,
        next_cursor: cursor1,
-    } = to_response::<ThreadListResponse>(page1_resp)?;
+    } = list_threads(
+        &mut mcp,
+        None,
+        Some(2),
+        Some(vec!["mock_provider".to_string()]),
+    )
+    .await?;
    assert_eq!(data1.len(), 2);
    for thread in &data1 {
        assert_eq!(thread.preview, "Hello");
@@ -119,22 +171,16 @@ async fn thread_list_pagination_next_cursor_none_on_last_page() -> Result<()> {
    let cursor1 = cursor1.expect("expected nextCursor on first page");

    // Page 2: with cursor → expect next_cursor None when no more results.
-    let page2_id = mcp
-        .send_thread_list_request(ThreadListParams {
-            cursor: Some(cursor1),
-            limit: Some(2),
-            model_providers: Some(vec!["mock_provider".to_string()]),
-        })
-        .await?;
-    let page2_resp: JSONRPCResponse = timeout(
-        DEFAULT_READ_TIMEOUT,
-        mcp.read_stream_until_response_message(RequestId::Integer(page2_id)),
-    )
-    .await??;
    let ThreadListResponse {
        data: data2,
        next_cursor: cursor2,
-    } = to_response::<ThreadListResponse>(page2_resp)?;
+    } = list_threads(
+        &mut mcp,
+        Some(cursor1),
+        Some(2),
+        Some(vec!["mock_provider".to_string()]),
+    )
+    .await?;
    assert!(data2.len() <= 2);
    for thread in &data2 {
        assert_eq!(thread.preview, "Hello");
@@ -173,23 +219,16 @@ async fn thread_list_respects_provider_filter() -> Result<()> {
        None,
    )?;

-    let mut mcp = McpProcess::new(codex_home.path()).await?;
-    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;
+    let mut mcp = init_mcp(codex_home.path()).await?;

    // Filter to only other_provider; expect 1 item, nextCursor None.
-    let list_id = mcp
-        .send_thread_list_request(ThreadListParams {
-            cursor: None,
-            limit: Some(10),
-            model_providers: Some(vec!["other_provider".to_string()]),
-        })
-        .await?;
-    let resp: JSONRPCResponse = timeout(
-        DEFAULT_READ_TIMEOUT,
-        mcp.read_stream_until_response_message(RequestId::Integer(list_id)),
+    let ThreadListResponse { data, next_cursor } = list_threads(
+        &mut mcp,
+        None,
+        Some(10),
+        Some(vec!["other_provider".to_string()]),
    )
-    .await??;
-    let ThreadListResponse { data, next_cursor } = to_response::<ThreadListResponse>(resp)?;
+    .await?;
    assert_eq!(data.len(), 1);
    assert_eq!(next_cursor, None);
    let thread = &data[0];
@@ -205,6 +244,146 @@ async fn thread_list_respects_provider_filter() -> Result<()> {
    Ok(())
 }

+#[tokio::test]
+async fn thread_list_fetches_until_limit_or_exhausted() -> Result<()> {
+    let codex_home = TempDir::new()?;
+    create_minimal_config(codex_home.path())?;
+
+    // Newest 16 conversations belong to a different provider; the older 8 are the
+    // only ones that match the filter. We request 8 so the server must keep
+    // paging past the first two pages to reach the desired count.
+    create_fake_rollouts(
+        codex_home.path(),
+        24,
+        |i| {
+            if i < 16 {
+                "skip_provider"
+            } else {
+                "target_provider"
+            }
+        },
+        |i| timestamp_at(2025, 3, 30 - i as u32, 12, 0, 0),
+        "Hello",
+    )?;
+
+    let mut mcp = init_mcp(codex_home.path()).await?;
+
+    // Request 8 threads for the target provider; the matches only start on the
+    // third page so we rely on pagination to reach the limit.
+    let ThreadListResponse { data, next_cursor } = list_threads(
+        &mut mcp,
+        None,
+        Some(8),
+        Some(vec!["target_provider".to_string()]),
+    )
+    .await?;
+    assert_eq!(
+        data.len(),
+        8,
+        "should keep paging until the requested count is filled"
+    );
+    assert!(
+        data.iter()
+            .all(|thread| thread.model_provider == "target_provider"),
+        "all returned threads must match the requested provider"
+    );
+    assert_eq!(
+        next_cursor, None,
+        "once the requested count is satisfied on the final page, nextCursor should be None"
+    );
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn thread_list_enforces_max_limit() -> Result<()> {
+    let codex_home = TempDir::new()?;
+    create_minimal_config(codex_home.path())?;
+
+    create_fake_rollouts(
+        codex_home.path(),
+        105,
+        |_| "mock_provider",
+        |i| {
+            let month = 5 + (i / 28);
+            let day = (i % 28) + 1;
+            timestamp_at(2025, month as u32, day as u32, 0, 0, 0)
+        },
+        "Hello",
+    )?;
+
+    let mut mcp = init_mcp(codex_home.path()).await?;
+
+    let ThreadListResponse { data, next_cursor } = list_threads(
+        &mut mcp,
+        None,
+        Some(200),
+        Some(vec!["mock_provider".to_string()]),
+    )
+    .await?;
+    assert_eq!(
+        data.len(),
+        100,
+        "limit should be clamped to the maximum page size"
+    );
+    assert!(
+        next_cursor.is_some(),
+        "when more than the maximum exist, nextCursor should continue pagination"
+    );
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn thread_list_stops_when_not_enough_filtered_results_exist() -> Result<()> {
+    let codex_home = TempDir::new()?;
+    create_minimal_config(codex_home.path())?;
+
+    // Only the last 7 conversations match the provider filter; we ask for 10 to
+    // ensure the server exhausts pagination without looping forever.
+    create_fake_rollouts(
+        codex_home.path(),
+        22,
+        |i| {
+            if i < 15 {
+                "skip_provider"
+            } else {
+                "target_provider"
+            }
+        },
+        |i| timestamp_at(2025, 4, 28 - i as u32, 8, 0, 0),
+        "Hello",
+    )?;
+
+    let mut mcp = init_mcp(codex_home.path()).await?;
+
+    // Request more threads than exist after filtering; expect all matches to be
+    // returned with nextCursor None.
+    let ThreadListResponse { data, next_cursor } = list_threads(
+        &mut mcp,
+        None,
+        Some(10),
+        Some(vec!["target_provider".to_string()]),
+    )
+    .await?;
+    assert_eq!(
+        data.len(),
+        7,
+        "all available filtered threads should be returned"
+    );
+    assert!(
+        data.iter()
+            .all(|thread| thread.model_provider == "target_provider"),
+        "results should still respect the provider filter"
+    );
+    assert_eq!(
+        next_cursor, None,
+        "when results are exhausted before reaching the limit, nextCursor should be None"
+    );
+
+    Ok(())
+}
+
 #[tokio::test]
 async fn thread_list_includes_git_info() -> Result<()> {
    let codex_home = TempDir::new()?;
@@ -224,22 +403,15 @@ async fn thread_list_includes_git_info() -> Result<()> {
        Some(git_info),
    )?;

-    let mut mcp = McpProcess::new(codex_home.path()).await?;
-    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;
+    let mut mcp = init_mcp(codex_home.path()).await?;

-    let list_id = mcp
-        .send_thread_list_request(ThreadListParams {
-            cursor: None,
-            limit: Some(10),
-            model_providers: Some(vec!["mock_provider".to_string()]),
-        })
-        .await?;
-    let resp: JSONRPCResponse = timeout(
-        DEFAULT_READ_TIMEOUT,
-        mcp.read_stream_until_response_message(RequestId::Integer(list_id)),
+    let ThreadListResponse { data, .. } = list_threads(
+        &mut mcp,
+        None,
+        Some(10),
+        Some(vec!["mock_provider".to_string()]),
    )
-    .await??;
-    let ThreadListResponse { data, .. } = to_response::<ThreadListResponse>(resp)?;
+    .await?;
    let thread = data
        .iter()
        .find(|t| t.id == conversation_id)
--- a/codex-rs/apply-patch/src/lib.rs
+++ b/codex-rs/apply-patch/src/lib.rs
@@ -112,7 +112,7 @@ fn classify_shell_name(shell: &str) -> Option<String> {

 fn classify_shell(shell: &str, flag: &str) -> Option<ApplyPatchShell> {
    classify_shell_name(shell).and_then(|name| match name.as_str() {
-        "bash" | "zsh" | "sh" if flag == "-lc" => Some(ApplyPatchShell::Unix),
+        "bash" | "zsh" | "sh" if matches!(flag, "-lc" | "-c") => Some(ApplyPatchShell::Unix),
        "pwsh" | "powershell" if flag.eq_ignore_ascii_case("-command") => {
            Some(ApplyPatchShell::PowerShell)
        }
@@ -1097,6 +1097,13 @@ mod tests {
        assert_match(&heredoc_script(""), None);
    }

+    #[test]
+    fn test_heredoc_non_login_shell() {
+        let script = heredoc_script("");
+        let args = strs_to_strings(&["bash", "-c", &script]);
+        assert_match_args(args, None);
+    }
+
    #[test]
    fn test_heredoc_applypatch() {
        let args = strs_to_strings(&[
--- a/codex-rs/cli/Cargo.toml
+++ b/codex-rs/cli/Cargo.toml
@@ -36,6 +36,7 @@ codex-responses-api-proxy = { workspace = true }
 codex-rmcp-client = { workspace = true }
 codex-stdio-to-uds = { workspace = true }
 codex-tui = { workspace = true }
+codex-tui2 = { workspace = true }
 ctor = { workspace = true }
 libc = { workspace = true }
 owo-colors = { workspace = true }
--- a/codex-rs/cli/src/main.rs
+++ b/codex-rs/cli/src/main.rs
@@ -25,6 +25,7 @@ use codex_responses_api_proxy::Args as ResponsesApiProxyArgs;
 use codex_tui::AppExitInfo;
 use codex_tui::Cli as TuiCli;
 use codex_tui::update_action::UpdateAction;
+use codex_tui2 as tui2;
 use owo_colors::OwoColorize;
 use std::path::PathBuf;
 use supports_color::Stream;
@@ -37,6 +38,11 @@ use crate::mcp_cmd::McpCli;

 use codex_core::config::Config;
 use codex_core::config::ConfigOverrides;
+use codex_core::config::find_codex_home;
+use codex_core::config::load_config_as_toml_with_cli_overrides;
+use codex_core::features::Feature;
+use codex_core::features::FeatureOverrides;
+use codex_core::features::Features;
 use codex_core::features::is_known_feature_key;

 /// Codex CLI
@@ -444,7 +450,7 @@ async fn cli_main(codex_linux_sandbox_exe: Option<PathBuf>) -> anyhow::Result<()
                &mut interactive.config_overrides,
                root_config_overrides.clone(),
            );
-            let exit_info = codex_tui::run_main(interactive, codex_linux_sandbox_exe).await?;
+            let exit_info = run_interactive_tui(interactive, codex_linux_sandbox_exe).await?;
            handle_app_exit(exit_info)?;
        }
        Some(Subcommand::Exec(mut exec_cli)) => {
@@ -499,7 +505,7 @@ async fn cli_main(codex_linux_sandbox_exe: Option<PathBuf>) -> anyhow::Result<()
                all,
                config_overrides,
            );
-            let exit_info = codex_tui::run_main(interactive, codex_linux_sandbox_exe).await?;
+            let exit_info = run_interactive_tui(interactive, codex_linux_sandbox_exe).await?;
            handle_app_exit(exit_info)?;
        }
        Some(Subcommand::Login(mut login_cli)) => {
@@ -650,6 +656,40 @@ fn prepend_config_flags(
        .splice(0..0, cli_config_overrides.raw_overrides);
 }

+/// Run the interactive Codex TUI, dispatching to either the legacy implementation or the
+/// experimental TUI v2 shim based on feature flags resolved from config.
+async fn run_interactive_tui(
+    interactive: TuiCli,
+    codex_linux_sandbox_exe: Option<PathBuf>,
+) -> std::io::Result<AppExitInfo> {
+    if is_tui2_enabled(&interactive).await? {
+        let result = tui2::run_main(interactive.into(), codex_linux_sandbox_exe).await?;
+        Ok(result.into())
+    } else {
+        codex_tui::run_main(interactive, codex_linux_sandbox_exe).await
+    }
+}
+
+/// Returns `Ok(true)` when the resolved configuration enables the `tui2` feature flag.
+///
+/// This performs a lightweight config load (honoring the same precedence as the lower-level TUI
+/// bootstrap: `$CODEX_HOME`, config.toml, profile, and CLI `-c` overrides) solely to decide which
+/// TUI frontend to launch. The full configuration is still loaded later by the interactive TUI.
+async fn is_tui2_enabled(cli: &TuiCli) -> std::io::Result<bool> {
+    let raw_overrides = cli.config_overrides.raw_overrides.clone();
+    let overrides_cli = codex_common::CliConfigOverrides { raw_overrides };
+    let cli_kv_overrides = overrides_cli
+        .parse_overrides()
+        .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidInput, e))?;
+
+    let codex_home = find_codex_home()?;
+    let config_toml = load_config_as_toml_with_cli_overrides(&codex_home, cli_kv_overrides).await?;
+    let config_profile = config_toml.get_config_profile(cli.config_profile.clone())?;
+    let overrides = FeatureOverrides::default();
+    let features = Features::from_config(&config_toml, &config_profile, overrides);
+    Ok(features.enabled(Feature::Tui2))
+}
+
 /// Build the final `TuiCli` for a `codex resume` invocation.
 fn finalize_resume_interactive(
    mut interactive: TuiCli,
--- a/codex-rs/codex-client/src/default_client.rs
+++ b/codex-rs/codex-client/src/default_client.rs
@@ -0,0 +1,143 @@
+use http::Error as HttpError;
+use reqwest::IntoUrl;
+use reqwest::Method;
+use reqwest::Response;
+use reqwest::header::HeaderMap;
+use reqwest::header::HeaderName;
+use reqwest::header::HeaderValue;
+use serde::Serialize;
+use std::collections::HashMap;
+use std::fmt::Display;
+use std::time::Duration;
+
+#[derive(Clone, Debug)]
+pub struct CodexHttpClient {
+    inner: reqwest::Client,
+}
+
+impl CodexHttpClient {
+    pub fn new(inner: reqwest::Client) -> Self {
+        Self { inner }
+    }
+
+    pub fn get<U>(&self, url: U) -> CodexRequestBuilder
+    where
+        U: IntoUrl,
+    {
+        self.request(Method::GET, url)
+    }
+
+    pub fn post<U>(&self, url: U) -> CodexRequestBuilder
+    where
+        U: IntoUrl,
+    {
+        self.request(Method::POST, url)
+    }
+
+    pub fn request<U>(&self, method: Method, url: U) -> CodexRequestBuilder
+    where
+        U: IntoUrl,
+    {
+        let url_str = url.as_str().to_string();
+        CodexRequestBuilder::new(self.inner.request(method.clone(), url), method, url_str)
+    }
+}
+
+#[must_use = "requests are not sent unless `send` is awaited"]
+#[derive(Debug)]
+pub struct CodexRequestBuilder {
+    builder: reqwest::RequestBuilder,
+    method: Method,
+    url: String,
+}
+
+impl CodexRequestBuilder {
+    fn new(builder: reqwest::RequestBuilder, method: Method, url: String) -> Self {
+        Self {
+            builder,
+            method,
+            url,
+        }
+    }
+
+    fn map(self, f: impl FnOnce(reqwest::RequestBuilder) -> reqwest::RequestBuilder) -> Self {
+        Self {
+            builder: f(self.builder),
+            method: self.method,
+            url: self.url,
+        }
+    }
+
+    pub fn headers(self, headers: HeaderMap) -> Self {
+        self.map(|builder| builder.headers(headers))
+    }
+
+    pub fn header<K, V>(self, key: K, value: V) -> Self
+    where
+        HeaderName: TryFrom<K>,
+        <HeaderName as TryFrom<K>>::Error: Into<HttpError>,
+        HeaderValue: TryFrom<V>,
+        <HeaderValue as TryFrom<V>>::Error: Into<HttpError>,
+    {
+        self.map(|builder| builder.header(key, value))
+    }
+
+    pub fn bearer_auth<T>(self, token: T) -> Self
+    where
+        T: Display,
+    {
+        self.map(|builder| builder.bearer_auth(token))
+    }
+
+    pub fn timeout(self, timeout: Duration) -> Self {
+        self.map(|builder| builder.timeout(timeout))
+    }
+
+    pub fn json<T>(self, value: &T) -> Self
+    where
+        T: ?Sized + Serialize,
+    {
+        self.map(|builder| builder.json(value))
+    }
+
+    pub async fn send(self) -> Result<Response, reqwest::Error> {
+        match self.builder.send().await {
+            Ok(response) => {
+                let request_ids = Self::extract_request_ids(&response);
+                tracing::debug!(
+                    method = %self.method,
+                    url = %self.url,
+                    status = %response.status(),
+                    request_ids = ?request_ids,
+                    version = ?response.version(),
+                    "Request completed"
+                );
+
+                Ok(response)
+            }
+            Err(error) => {
+                let status = error.status();
+                tracing::debug!(
+                    method = %self.method,
+                    url = %self.url,
+                    status = status.map(|s| s.as_u16()),
+                    error = %error,
+                    "Request failed"
+                );
+                Err(error)
+            }
+        }
+    }
+
+    fn extract_request_ids(response: &Response) -> HashMap<String, String> {
+        ["cf-ray", "x-request-id", "x-oai-request-id"]
+            .iter()
+            .filter_map(|&name| {
+                let header_name = HeaderName::from_static(name);
+                let value = response.headers().get(header_name)?;
+                let value = value.to_str().ok()?.to_owned();
+                Some((name.to_owned(), value))
+            })
+            .collect()
+    }
+}
--- a/codex-rs/codex-client/src/lib.rs
+++ b/codex-rs/codex-client/src/lib.rs
@@ -1,3 +1,4 @@
+mod default_client;
 mod error;
 mod request;
 mod retry;
@@ -5,6 +6,8 @@ mod sse;
 mod telemetry;
 mod transport;

+pub use crate::default_client::CodexHttpClient;
+pub use crate::default_client::CodexRequestBuilder;
 pub use crate::error::StreamError;
 pub use crate::error::TransportError;
 pub use crate::request::Request;
--- a/codex-rs/codex-client/src/transport.rs
+++ b/codex-rs/codex-client/src/transport.rs
@@ -1,3 +1,5 @@
+use crate::default_client::CodexHttpClient;
+use crate::default_client::CodexRequestBuilder;
 use crate::error::TransportError;
 use crate::request::Request;
 use crate::request::Response;
@@ -28,15 +30,17 @@ pub trait HttpTransport: Send + Sync {

 #[derive(Clone, Debug)]
 pub struct ReqwestTransport {
-    client: reqwest::Client,
+    client: CodexHttpClient,
 }

 impl ReqwestTransport {
    pub fn new(client: reqwest::Client) -> Self {
-        Self { client }
+        Self {
+            client: CodexHttpClient::new(client),
+        }
    }

-    fn build(&self, req: Request) -> Result<reqwest::RequestBuilder, TransportError> {
+    fn build(&self, req: Request) -> Result<CodexRequestBuilder, TransportError> {
        let mut builder = self
            .client
            .request(
--- a/codex-rs/common/src/config_summary.rs
+++ b/codex-rs/common/src/config_summary.rs
@@ -4,10 +4,10 @@ use codex_core::config::Config;
 use crate::sandbox_summary::summarize_sandbox_policy;

 /// Build a list of key/value pairs summarizing the effective configuration.
-pub fn create_config_summary_entries(config: &Config) -> Vec<(&'static str, String)> {
+pub fn create_config_summary_entries(config: &Config, model: &str) -> Vec<(&'static str, String)> {
    let mut entries = vec![
        ("workdir", config.cwd.display().to_string()),
-        ("model", config.model.clone()),
+        ("model", model.to_string()),
        ("provider", config.model_provider_id.clone()),
        ("approval", config.approval_policy.to_string()),
        ("sandbox", summarize_sandbox_policy(&config.sandbox_policy)),
--- a/codex-rs/core/Cargo.toml
+++ b/codex-rs/core/Cargo.toml
@@ -1,8 +1,8 @@
 [package]
-name = "codex-core"
-version.workspace = true
 edition.workspace = true
 license.workspace = true
+name = "codex-core"
+version.workspace = true

 [lib]
 doctest = false
@@ -14,16 +14,16 @@ workspace = true

 [dependencies]
 anyhow = { workspace = true }
-askama = { workspace = true }
 async-channel = { workspace = true }
 async-trait = { workspace = true }
 base64 = { workspace = true }
-chrono = { workspace = true, features = ["serde"] }
 chardetng = { workspace = true }
+chrono = { workspace = true, features = ["serde"] }
+codex-api = { workspace = true }
 codex-app-server-protocol = { workspace = true }
 codex-apply-patch = { workspace = true }
 codex-async-utils = { workspace = true }
-codex-api = { workspace = true }
+codex-client = { workspace = true }
 codex-execpolicy = { workspace = true }
 codex-file-search = { workspace = true }
 codex-git = { workspace = true }
@@ -31,14 +31,15 @@ codex-keyring-store = { workspace = true }
 codex-otel = { workspace = true, features = ["otel"] }
 codex-protocol = { workspace = true }
 codex-rmcp-client = { workspace = true }
+codex-utils-absolute-path = { workspace = true }
 codex-utils-pty = { workspace = true }
 codex-utils-readiness = { workspace = true }
 codex-utils-string = { workspace = true }
 codex-windows-sandbox = { package = "codex-windows-sandbox", path = "../windows-sandbox-rs" }
 dirs = { workspace = true }
 dunce = { workspace = true }
-env-flags = { workspace = true }
 encoding_rs = { workspace = true }
+env-flags = { workspace = true }
 eventsource-stream = { workspace = true }
 futures = { workspace = true }
 http = { workspace = true }
@@ -46,8 +47,10 @@ indexmap = { workspace = true }
 keyring = { workspace = true, features = ["crypto-rust"] }
 libc = { workspace = true }
 mcp-types = { workspace = true }
+once_cell = { workspace = true }
 os_info = { workspace = true }
 rand = { workspace = true }
+regex = { workspace = true }
 regex-lite = { workspace = true }
 reqwest = { workspace = true, features = ["json", "stream"] }
 serde = { workspace = true, features = ["derive"] }
@@ -58,9 +61,6 @@ sha2 = { workspace = true }
 shlex = { workspace = true }
 similar = { workspace = true }
 strum_macros = { workspace = true }
-url = { workspace = true }
-once_cell = { workspace = true }
-regex = { workspace = true }
 tempfile = { workspace = true }
 test-case = "3.3.1"
 test-log = { workspace = true }
@@ -84,6 +84,7 @@ toml_edit = { workspace = true }
 tracing = { workspace = true, features = ["log"] }
 tree-sitter = { workspace = true }
 tree-sitter-bash = { workspace = true }
+url = { workspace = true }
 uuid = { workspace = true, features = ["serde", "v4", "v5"] }
 which = { workspace = true }
 wildmatch = { workspace = true }
@@ -94,9 +95,9 @@ test-support = []


 [target.'cfg(target_os = "linux")'.dependencies]
+keyring = { workspace = true, features = ["linux-native-async-persistent"] }
 landlock = { workspace = true }
 seccompiler = { workspace = true }
-keyring = { workspace = true, features = ["linux-native-async-persistent"] }

 [target.'cfg(target_os = "macos")'.dependencies]
 core-foundation = "0.9"
--- a/codex-rs/core/gpt-5.1-codex-max_prompt.md
+++ b/codex-rs/core/gpt-5.1-codex-max_prompt.md
@@ -48,7 +48,7 @@ When you are running with `approval_policy == on-request`, and sandboxing enable
 - You need to run a command that writes to a directory that requires it (e.g. running tests that write to /var)
 - You need to run a GUI app (e.g., open/xdg-open/osascript) to open browsers or files.
 - You are running sandboxed and need to run a command that requires network access (e.g. installing packages)
- If you run a command that is important to solving the user's query, but it fails because of sandboxing, rerun the command with approval. ALWAYS proceed to use the `with_escalated_permissions` and `justification` parameters - do not message the user before requesting approval for the command.
+- If you run a command that is important to solving the user's query, but it fails because of sandboxing, rerun the command with approval. ALWAYS proceed to use the `sandbox_permissions` and `justification` parameters - do not message the user before requesting approval for the command.
 - You are about to take a potentially destructive action such as an `rm` or `git reset` that the user did not explicitly ask for
 - (for all of these, you should weigh alternative paths that do not require approval)

@@ -59,8 +59,8 @@ You will be told what filesystem sandboxing, network sandboxing, and approval mo
 Although they introduce friction to the user because your work is paused until the user responds, you should leverage them when necessary to accomplish important work. If the completing the task requires escalated permissions, Do not let these settings or the sandbox deter you from attempting to accomplish the user's task unless it is set to "never", in which case never ask for approvals.

 When requesting approval to execute a command that will require escalated privileges:
-  - Provide the `with_escalated_permissions` parameter with the boolean value true
-  - Include a short, 1 sentence explanation for why you need to enable `with_escalated_permissions` in the justification parameter
+  - Provide the `sandbox_permissions` parameter with the value `"require_escalated"`
+  - Include a short, 1 sentence explanation for why you need escalated permissions in the justification parameter

 ## Special user requests

--- a/codex-rs/core/gpt_5_1_prompt.md
+++ b/codex-rs/core/gpt_5_1_prompt.md
@@ -182,7 +182,7 @@ When you are running with `approval_policy == on-request`, and sandboxing enable
 - You need to run a command that writes to a directory that requires it (e.g. running tests that write to /var)
 - You need to run a GUI app (e.g., open/xdg-open/osascript) to open browsers or files.
 - You are running sandboxed and need to run a command that requires network access (e.g. installing packages)
- If you run a command that is important to solving the user's query, but it fails because of sandboxing, rerun the command with approval. ALWAYS proceed to use the `with_escalated_permissions` and `justification` parameters. Within this harness, prefer requesting approval via the tool over asking in natural language.
+- If you run a command that is important to solving the user's query, but it fails because of sandboxing, rerun the command with approval. ALWAYS proceed to use the `sandbox_permissions` and `justification` parameters. Within this harness, prefer requesting approval via the tool over asking in natural language.
 - You are about to take a potentially destructive action such as an `rm` or `git reset` that the user did not explicitly ask for
 - (for all of these, you should weigh alternative paths that do not require approval)

@@ -193,8 +193,8 @@ You will be told what filesystem sandboxing, network sandboxing, and approval mo
 Although they introduce friction to the user because your work is paused until the user responds, you should leverage them when necessary to accomplish important work. If the completing the task requires escalated permissions, Do not let these settings or the sandbox deter you from attempting to accomplish the user's task unless it is set to "never", in which case never ask for approvals.

 When requesting approval to execute a command that will require escalated privileges:
-  - Provide the `with_escalated_permissions` parameter with the boolean value true
-  - Include a short, 1 sentence explanation for why you need to enable `with_escalated_permissions` in the justification parameter
+  - Provide the `sandbox_permissions` parameter with the value `"require_escalated"`
+  - Include a short, 1 sentence explanation for why you need escalated permissions in the justification parameter

 ## Validating your work

--- a/codex-rs/core/gpt_5_codex_prompt.md
+++ b/codex-rs/core/gpt_5_codex_prompt.md
@@ -48,7 +48,7 @@ When you are running with `approval_policy == on-request`, and sandboxing enable
 - You need to run a command that writes to a directory that requires it (e.g. running tests that write to /var)
 - You need to run a GUI app (e.g., open/xdg-open/osascript) to open browsers or files.
 - You are running sandboxed and need to run a command that requires network access (e.g. installing packages)
- If you run a command that is important to solving the user's query, but it fails because of sandboxing, rerun the command with approval. ALWAYS proceed to use the `with_escalated_permissions` and `justification` parameters - do not message the user before requesting approval for the command.
+- If you run a command that is important to solving the user's query, but it fails because of sandboxing, rerun the command with approval. ALWAYS proceed to use the `sandbox_permissions` and `justification` parameters - do not message the user before requesting approval for the command.
 - You are about to take a potentially destructive action such as an `rm` or `git reset` that the user did not explicitly ask for
 - (for all of these, you should weigh alternative paths that do not require approval)

@@ -59,8 +59,8 @@ You will be told what filesystem sandboxing, network sandboxing, and approval mo
 Although they introduce friction to the user because your work is paused until the user responds, you should leverage them when necessary to accomplish important work. If the completing the task requires escalated permissions, Do not let these settings or the sandbox deter you from attempting to accomplish the user's task unless it is set to "never", in which case never ask for approvals.

 When requesting approval to execute a command that will require escalated privileges:
-  - Provide the `with_escalated_permissions` parameter with the boolean value true
-  - Include a short, 1 sentence explanation for why you need to enable `with_escalated_permissions` in the justification parameter
+  - Provide the `sandbox_permissions` parameter with the value `"require_escalated"`
+  - Include a short, 1 sentence explanation for why you need escalated permissions in the justification parameter

 ## Special user requests

--- a/codex-rs/core/src/auth.rs
+++ b/codex-rs/core/src/auth.rs
@@ -23,7 +23,6 @@ pub use crate::auth::storage::AuthDotJson;
 use crate::auth::storage::AuthStorageBackend;
 use crate::auth::storage::create_auth_storage;
 use crate::config::Config;
-use crate::default_client::CodexHttpClient;
 use crate::error::RefreshTokenFailedError;
 use crate::error::RefreshTokenFailedReason;
 use crate::token_data::KnownPlan as InternalKnownPlan;
@@ -31,6 +30,7 @@ use crate::token_data::PlanType as InternalPlanType;
 use crate::token_data::TokenData;
 use crate::token_data::parse_id_token;
 use crate::util::try_parse_error_message;
+use codex_client::CodexHttpClient;
 use codex_protocol::account::PlanType as AccountPlanType;
 use once_cell::sync::Lazy;
 use serde_json::Value;
--- a/codex-rs/core/src/client.rs
+++ b/codex-rs/core/src/client.rs
@@ -166,7 +166,7 @@ impl ModelClient {

            let stream_result = client
                .stream_prompt(
-                    &self.config.model,
+                    &self.get_model(),
                    &api_prompt,
                    Some(conversation_id.clone()),
                    Some(session_source.clone()),
@@ -260,7 +260,7 @@ impl ModelClient {
            };

            let stream_result = client
-                .stream_prompt(&self.config.model, &api_prompt, options)
+                .stream_prompt(&self.get_model(), &api_prompt, options)
                .await;

            match stream_result {
@@ -292,7 +292,7 @@ impl ModelClient {

    /// Returns the currently configured model slug.
    pub fn get_model(&self) -> String {
-        self.config.model.clone()
+        self.get_model_family().get_model_slug().to_string()
    }

    /// Returns the currently configured model family.
@@ -337,7 +337,7 @@ impl ModelClient {
            .get_full_instructions(&self.get_model_family())
            .into_owned();
        let payload = ApiCompactionInput {
-            model: &self.config.model,
+            model: &self.get_model(),
            input: &prompt.input,
            instructions: &instructions,
        };
--- a/codex-rs/core/src/codex.rs
+++ b/codex-rs/core/src/codex.rs
@@ -95,9 +95,11 @@ use crate::protocol::RateLimitSnapshot;
 use crate::protocol::ReasoningContentDeltaEvent;
 use crate::protocol::ReasoningRawContentDeltaEvent;
 use crate::protocol::ReviewDecision;
-use crate::protocol::SandboxCommandAssessment;
 use crate::protocol::SandboxPolicy;
 use crate::protocol::SessionConfiguredEvent;
+use crate::protocol::SkillErrorInfo;
+use crate::protocol::SkillInfo;
+use crate::protocol::SkillLoadOutcomeInfo;
 use crate::protocol::StreamErrorEvent;
 use crate::protocol::Submission;
 use crate::protocol::TokenCountEvent;
@@ -109,6 +111,11 @@ use crate::rollout::RolloutRecorder;
 use crate::rollout::RolloutRecorderParams;
 use crate::rollout::map_session_init_error;
 use crate::shell;
+use crate::shell_snapshot::ShellSnapshot;
+use crate::skills::SkillInjections;
+use crate::skills::SkillLoadOutcome;
+use crate::skills::build_skill_injections;
+use crate::skills::load_skills;
 use crate::state::ActiveTurn;
 use crate::state::SessionServices;
 use crate::state::SessionState;
@@ -173,7 +180,31 @@ impl Codex {
        let (tx_sub, rx_sub) = async_channel::bounded(SUBMISSION_CHANNEL_CAPACITY);
        let (tx_event, rx_event) = async_channel::unbounded();

-        let user_instructions = get_user_instructions(&config).await;
+        let loaded_skills = if config.features.enabled(Feature::Skills) {
+            Some(load_skills(&config))
+        } else {
+            None
+        };
+
+        if let Some(outcome) = &loaded_skills {
+            for err in &outcome.errors {
+                error!(
+                    "failed to load skill {}: {}",
+                    err.path.display(),
+                    err.message
+                );
+            }
+        }
+
+        let skills_outcome = loaded_skills.clone();
+
+        let user_instructions = get_user_instructions(
+            &config,
+            skills_outcome
+                .as_ref()
+                .map(|outcome| outcome.skills.as_slice()),
+        )
+        .await;

        let exec_policy = load_exec_policy_for_features(&config.features, &config.codex_home)
            .await
@@ -181,10 +212,15 @@ impl Codex {
        let exec_policy = Arc::new(RwLock::new(exec_policy));

        let config = Arc::new(config);
-
+        if config.features.enabled(Feature::RemoteModels)
+            && let Err(err) = models_manager.refresh_available_models(&config).await
+        {
+            error!("failed to refresh available models: {err:?}");
+        }
+        let model = models_manager.get_model(&config.model, &config).await;
        let session_configuration = SessionConfiguration {
            provider: config.model_provider.clone(),
-            model: config.model.clone(),
+            model: model.clone(),
            model_reasoning_effort: config.model_reasoning_effort,
            model_reasoning_summary: config.model_reasoning_summary,
            developer_instructions: config.developer_instructions.clone(),
@@ -201,6 +237,7 @@ impl Codex {

        // Generate a unique ID for the lifetime of this Codex session.
        let session_source_clone = session_configuration.session_source.clone();
+
        let session = Session::new(
            session_configuration,
            config.clone(),
@@ -209,6 +246,7 @@ impl Codex {
            tx_event.clone(),
            conversation_history,
            session_source_clone,
+            skills_outcome.clone(),
        )
        .await
        .map_err(|e| {
@@ -398,10 +436,11 @@ pub(crate) struct SessionSettingsUpdate {
 }

 impl Session {
+    /// Don't expand the number of mutated arguments on config. We are in the process of getting rid of it.
    fn build_per_turn_config(session_configuration: &SessionConfiguration) -> Config {
+        // todo(aibrahim): store this state somewhere else so we don't need to mut config
        let config = session_configuration.original_config_do_not_use.clone();
        let mut per_turn_config = (*config).clone();
-        per_turn_config.model = session_configuration.model.clone();
        per_turn_config.model_reasoning_effort = session_configuration.model_reasoning_effort;
        per_turn_config.model_reasoning_summary = session_configuration.model_reasoning_summary;
        per_turn_config.features = config.features.clone();
@@ -421,7 +460,7 @@ impl Session {
    ) -> TurnContext {
        let otel_event_manager = otel_event_manager.clone().with_model(
            session_configuration.model.as_str(),
-            model_family.slug.as_str(),
+            model_family.get_model_slug(),
        );

        let per_turn_config = Arc::new(per_turn_config);
@@ -465,6 +504,7 @@ impl Session {
        }
    }

+    #[allow(clippy::too_many_arguments)]
    async fn new(
        session_configuration: SessionConfiguration,
        config: Arc<Config>,
@@ -473,6 +513,7 @@ impl Session {
        tx_event: Sender<Event>,
        initial_history: InitialHistory,
        session_source: SessionSource,
+        skills: Option<SkillLoadOutcome>,
    ) -> anyhow::Result<Arc<Self>> {
        debug!(
            "Configuring session: model={}; provider={:?}",
@@ -510,7 +551,6 @@ impl Session {
        // - load history metadata
        let rollout_fut = RolloutRecorder::new(&config, rollout_params);

-        let default_shell = shell::default_user_shell();
        let history_meta_fut = crate::message_history::history_metadata(&config);
        let auth_statuses_fut = compute_auth_statuses(
            config.mcp_servers.iter(),
@@ -545,14 +585,11 @@ impl Session {
            });
        }

-        let model_family = models_manager
-            .construct_model_family(&config.model, &config)
-            .await;
        // todo(aibrahim): why are we passing model here while it can change?
        let otel_event_manager = OtelEventManager::new(
            conversation_id,
-            config.model.as_str(),
-            model_family.slug.as_str(),
+            session_configuration.model.as_str(),
+            session_configuration.model.as_str(),
            auth_manager.auth().and_then(|a| a.get_account_id()),
            auth_manager.auth().and_then(|a| a.get_account_email()),
            auth_manager.auth().map(|a| a.mode),
@@ -572,7 +609,14 @@ impl Session {
            config.active_profile.clone(),
        );

+        let mut default_shell = shell::default_user_shell();
        // Create the mutable state for the Session.
+        if config.features.enabled(Feature::ShellSnapshot) {
+            default_shell.shell_snapshot =
+                ShellSnapshot::try_new(&config.codex_home, &default_shell)
+                    .await
+                    .map(Arc::new);
+        }
        let state = SessionState::new(session_configuration.clone());

        let services = SessionServices {
@@ -581,12 +625,13 @@ impl Session {
            unified_exec_manager: UnifiedExecSessionManager::default(),
            notifier: UserNotifier::new(config.notify.clone()),
            rollout: Mutex::new(Some(rollout_recorder)),
-            user_shell: default_shell,
+            user_shell: Arc::new(default_shell),
            show_raw_agent_reasoning: config.show_raw_agent_reasoning,
            auth_manager: Arc::clone(&auth_manager),
            otel_event_manager,
            models_manager: Arc::clone(&models_manager),
            tool_approvals: Mutex::new(ApprovalStore::default()),
+            skills: skills.clone(),
        };

        let sess = Arc::new(Session {
@@ -602,6 +647,7 @@ impl Session {
        // Dispatch the SessionConfiguredEvent first and then report any errors.
        // If resuming, include converted initial messages in the payload so UIs can render them immediately.
        let initial_messages = initial_history.get_event_msgs();
+        let skill_load_outcome = skill_load_outcome_for_client(skills.as_ref());

        let events = std::iter::once(Event {
            id: INITIAL_SUBMIT_ID.to_owned(),
@@ -616,6 +662,7 @@ impl Session {
                history_log_id,
                history_entry_count,
                initial_messages,
+                skill_load_outcome,
                rollout_path,
            }),
        })
@@ -774,7 +821,7 @@ impl Session {
        let model_family = self
            .services
            .models_manager
-            .construct_model_family(&per_turn_config.model, &per_turn_config)
+            .construct_model_family(session_configuration.model.as_str(), &per_turn_config)
            .await;
        let mut turn_context: TurnContext = Self::make_turn_context(
            Some(Arc::clone(&self.services.auth_manager)),
@@ -799,14 +846,16 @@ impl Session {
    ) -> Option<ResponseItem> {
        let prev = previous?;

-        let prev_context = EnvironmentContext::from(prev.as_ref());
-        let next_context = EnvironmentContext::from(next);
+        let shell = self.user_shell();
+        let prev_context = EnvironmentContext::from_turn_context(prev.as_ref(), shell.as_ref());
+        let next_context = EnvironmentContext::from_turn_context(next, shell.as_ref());
        if prev_context.equals_except_shell(&next_context) {
            return None;
        }
        Some(ResponseItem::from(EnvironmentContext::diff(
            prev.as_ref(),
            next,
+            shell.as_ref(),
        )))
    }

@@ -866,34 +915,6 @@ impl Session {
        .await;
    }

-    pub(crate) async fn assess_sandbox_command(
-        &self,
-        turn_context: &TurnContext,
-        call_id: &str,
-        command: &[String],
-        failure_message: Option<&str>,
-    ) -> Option<SandboxCommandAssessment> {
-        let config = turn_context.client.config();
-        let provider = turn_context.client.provider().clone();
-        let auth_manager = Arc::clone(&self.services.auth_manager);
-        let otel = self.services.otel_event_manager.clone();
-        crate::sandboxing::assessment::assess_command(
-            config,
-            provider,
-            auth_manager,
-            &otel,
-            self.conversation_id,
-            self.services.models_manager.clone(),
-            turn_context.client.get_session_source(),
-            call_id,
-            command,
-            &turn_context.sandbox_policy,
-            &turn_context.cwd,
-            failure_message,
-        )
-        .await
-    }
-
    /// Adds an execpolicy amendment to both the in-memory and on-disk policies so future
    /// commands can use the newly approved prefix.
    pub(crate) async fn persist_execpolicy_amendment(
@@ -941,7 +962,6 @@ impl Session {
        command: Vec<String>,
        cwd: PathBuf,
        reason: Option<String>,
-        risk: Option<SandboxCommandAssessment>,
        proposed_execpolicy_amendment: Option<ExecPolicyAmendment>,
    ) -> ReviewDecision {
        let sub_id = turn_context.sub_id.clone();
@@ -969,7 +989,6 @@ impl Session {
            command,
            cwd,
            reason,
-            risk,
            proposed_execpolicy_amendment,
            parsed_cmd,
        });
@@ -1156,6 +1175,7 @@ impl Session {

    pub(crate) fn build_initial_context(&self, turn_context: &TurnContext) -> Vec<ResponseItem> {
        let mut items = Vec::<ResponseItem>::with_capacity(3);
+        let shell = self.user_shell();
        if let Some(developer_instructions) = turn_context.developer_instructions.as_deref() {
            items.push(DeveloperInstructions::new(developer_instructions.to_string()).into());
        }
@@ -1172,7 +1192,7 @@ impl Session {
            Some(turn_context.cwd.clone()),
            Some(turn_context.approval_policy),
            Some(turn_context.sandbox_policy.clone()),
-            self.user_shell().clone(),
+            shell.as_ref().clone(),
        )));
        items
    }
@@ -1447,8 +1467,8 @@ impl Session {
        &self.services.notifier
    }

-    pub(crate) fn user_shell(&self) -> &shell::Shell {
-        &self.services.user_shell
+    pub(crate) fn user_shell(&self) -> Arc<shell::Shell> {
+        Arc::clone(&self.services.user_shell)
    }

    fn show_raw_agent_reasoning(&self) -> bool {
@@ -1465,16 +1485,6 @@ async fn submission_loop(sess: Arc<Session>, config: Arc<Config>, rx_sub: Receiv
    let mut previous_context: Option<Arc<TurnContext>> =
        Some(sess.new_turn(SessionSettingsUpdate::default()).await);

-    if config.features.enabled(Feature::RemoteModels)
-        && let Err(err) = sess
-            .services
-            .models_manager
-            .refresh_available_models(&config.model_provider)
-            .await
-    {
-        error!("failed to refresh available models: {err}");
-    }
-
    // To break out of this loop, send Op::Shutdown.
    while let Ok(sub) = rx_sub.recv().await {
        debug!(?sub, "Submission");
@@ -1946,7 +1956,6 @@ async fn spawn_review_thread(

    // Build per‑turn client with the requested model/family.
    let mut per_turn_config = (*config).clone();
-    per_turn_config.model = model.clone();
    per_turn_config.model_reasoning_effort = Some(ReasoningEffortConfig::Low);
    per_turn_config.model_reasoning_summary = ReasoningSummaryConfig::Detailed;
    per_turn_config.features = review_features.clone();
@@ -1955,7 +1964,7 @@ async fn spawn_review_thread(
        .client
        .get_otel_event_manager()
        .with_model(
-            per_turn_config.model.as_str(),
+            config.review_model.as_str(),
            review_model_family.slug.as_str(),
        );

@@ -2007,6 +2016,30 @@ async fn spawn_review_thread(
        .await;
 }

+fn skill_load_outcome_for_client(
+    outcome: Option<&SkillLoadOutcome>,
+) -> Option<SkillLoadOutcomeInfo> {
+    outcome.map(|outcome| SkillLoadOutcomeInfo {
+        skills: outcome
+            .skills
+            .iter()
+            .map(|skill| SkillInfo {
+                name: skill.name.clone(),
+                description: skill.description.clone(),
+                path: skill.path.clone(),
+            })
+            .collect(),
+        errors: outcome
+            .errors
+            .iter()
+            .map(|err| SkillErrorInfo {
+                path: err.path.clone(),
+                message: err.message.clone(),
+            })
+            .collect(),
+    })
+}
+
 /// Takes a user message as input and runs a loop where, at each turn, the model
 /// replies with either:
 ///
@@ -2035,11 +2068,26 @@ pub(crate) async fn run_task(
    });
    sess.send_event(&turn_context, event).await;

+    let SkillInjections {
+        items: skill_items,
+        warnings: skill_warnings,
+    } = build_skill_injections(&input, sess.services.skills.as_ref()).await;
+
+    for message in skill_warnings {
+        sess.send_event(&turn_context, EventMsg::Warning(WarningEvent { message }))
+            .await;
+    }
+
    let initial_input_for_turn: ResponseInputItem = ResponseInputItem::from(input);
    let response_item: ResponseItem = initial_input_for_turn.clone().into();
    sess.record_response_item_and_emit_turn_item(turn_context.as_ref(), response_item)
        .await;

+    if !skill_items.is_empty() {
+        sess.record_conversation_items(&turn_context, &skill_items)
+            .await;
+    }
+
    sess.maybe_start_ghost_snapshot(Arc::clone(&turn_context), cancellation_token.child_token())
        .await;
    let mut last_agent_message: Option<String> = None;
@@ -2174,21 +2222,11 @@ async fn run_turn(
        .get_model_family()
        .supports_parallel_tool_calls;

-    // TODO(jif) revert once testing phase is done.
-    let parallel_tool_calls = model_supports_parallel && sess.enabled(Feature::ParallelToolCalls);
-    let mut base_instructions = turn_context.base_instructions.clone();
-    if parallel_tool_calls {
-        static INSTRUCTIONS: &str = include_str!("../templates/parallel/instructions.md");
-        let family = turn_context.client.get_model_family();
-        let mut new_instructions = base_instructions.unwrap_or(family.base_instructions);
-        new_instructions.push_str(INSTRUCTIONS);
-        base_instructions = Some(new_instructions);
-    }
    let prompt = Prompt {
        input,
        tools: router.specs(),
-        parallel_tool_calls,
-        base_instructions_override: base_instructions,
+        parallel_tool_calls: model_supports_parallel && sess.enabled(Feature::ParallelToolCalls),
+        base_instructions_override: turn_context.base_instructions.clone(),
        output_schema: turn_context.final_output_json_schema.clone(),
    };

@@ -2586,9 +2624,10 @@ mod tests {
        )
        .expect("load default test config");
        let config = Arc::new(config);
+        let model = ModelsManager::get_model_offline(config.model.as_deref());
        let session_configuration = SessionConfiguration {
            provider: config.model_provider.clone(),
-            model: config.model.clone(),
+            model,
            model_reasoning_effort: config.model_reasoning_effort,
            model_reasoning_summary: config.model_reasoning_summary,
            developer_instructions: config.developer_instructions.clone(),
@@ -2657,9 +2696,10 @@ mod tests {
        )
        .expect("load default test config");
        let config = Arc::new(config);
+        let model = ModelsManager::get_model_offline(config.model.as_deref());
        let session_configuration = SessionConfiguration {
            provider: config.model_provider.clone(),
-            model: config.model.clone(),
+            model,
            model_reasoning_effort: config.model_reasoning_effort,
            model_reasoning_summary: config.model_reasoning_summary,
            developer_instructions: config.developer_instructions.clone(),
@@ -2834,7 +2874,7 @@ mod tests {
    ) -> OtelEventManager {
        OtelEventManager::new(
            conversation_id,
-            config.model.as_str(),
+            ModelsManager::get_model_offline(config.model.as_deref()).as_str(),
            model_family.slug.as_str(),
            None,
            Some("test@test.com".to_string()),
@@ -2858,9 +2898,10 @@ mod tests {
        let auth_manager =
            AuthManager::from_auth_for_testing(CodexAuth::from_api_key("Test API Key"));
        let models_manager = Arc::new(ModelsManager::new(auth_manager.clone()));
+        let model = ModelsManager::get_model_offline(config.model.as_deref());
        let session_configuration = SessionConfiguration {
            provider: config.model_provider.clone(),
-            model: config.model.clone(),
+            model,
            model_reasoning_effort: config.model_reasoning_effort,
            model_reasoning_summary: config.model_reasoning_summary,
            developer_instructions: config.developer_instructions.clone(),
@@ -2875,8 +2916,10 @@ mod tests {
            session_source: SessionSource::Exec,
        };
        let per_turn_config = Session::build_per_turn_config(&session_configuration);
-        let model_family =
-            ModelsManager::construct_model_family_offline(&per_turn_config.model, &per_turn_config);
+        let model_family = ModelsManager::construct_model_family_offline(
+            session_configuration.model.as_str(),
+            &per_turn_config,
+        );
        let otel_event_manager =
            otel_event_manager(conversation_id, config.as_ref(), &model_family);

@@ -2888,12 +2931,13 @@ mod tests {
            unified_exec_manager: UnifiedExecSessionManager::default(),
            notifier: UserNotifier::new(None),
            rollout: Mutex::new(None),
-            user_shell: default_user_shell(),
+            user_shell: Arc::new(default_user_shell()),
            show_raw_agent_reasoning: config.show_raw_agent_reasoning,
            auth_manager: auth_manager.clone(),
            otel_event_manager: otel_event_manager.clone(),
            models_manager,
            tool_approvals: Mutex::new(ApprovalStore::default()),
+            skills: None,
        };

        let turn_context = Session::make_turn_context(
@@ -2940,9 +2984,10 @@ mod tests {
        let auth_manager =
            AuthManager::from_auth_for_testing(CodexAuth::from_api_key("Test API Key"));
        let models_manager = Arc::new(ModelsManager::new(auth_manager.clone()));
+        let model = ModelsManager::get_model_offline(config.model.as_deref());
        let session_configuration = SessionConfiguration {
            provider: config.model_provider.clone(),
-            model: config.model.clone(),
+            model,
            model_reasoning_effort: config.model_reasoning_effort,
            model_reasoning_summary: config.model_reasoning_summary,
            developer_instructions: config.developer_instructions.clone(),
@@ -2957,8 +3002,10 @@ mod tests {
            session_source: SessionSource::Exec,
        };
        let per_turn_config = Session::build_per_turn_config(&session_configuration);
-        let model_family =
-            ModelsManager::construct_model_family_offline(&per_turn_config.model, &per_turn_config);
+        let model_family = ModelsManager::construct_model_family_offline(
+            session_configuration.model.as_str(),
+            &per_turn_config,
+        );
        let otel_event_manager =
            otel_event_manager(conversation_id, config.as_ref(), &model_family);

@@ -2970,12 +3017,13 @@ mod tests {
            unified_exec_manager: UnifiedExecSessionManager::default(),
            notifier: UserNotifier::new(None),
            rollout: Mutex::new(None),
-            user_shell: default_user_shell(),
+            user_shell: Arc::new(default_user_shell()),
            show_raw_agent_reasoning: config.show_raw_agent_reasoning,
            auth_manager: Arc::clone(&auth_manager),
            otel_event_manager: otel_event_manager.clone(),
            models_manager,
            tool_approvals: Mutex::new(ApprovalStore::default()),
+            skills: None,
        };

        let turn_context = Arc::new(Session::make_turn_context(
@@ -3325,6 +3373,7 @@ mod tests {
        use crate::exec::ExecParams;
        use crate::protocol::AskForApproval;
        use crate::protocol::SandboxPolicy;
+        use crate::sandboxing::SandboxPermissions;
        use crate::turn_diff_tracker::TurnDiffTracker;
        use std::collections::HashMap;

@@ -3335,6 +3384,7 @@ mod tests {
        let mut turn_context = Arc::new(turn_context_raw);

        let timeout_ms = 1000;
+        let sandbox_permissions = SandboxPermissions::RequireEscalated;
        let params = ExecParams {
            command: if cfg!(windows) {
                vec![
@@ -3352,13 +3402,13 @@ mod tests {
            cwd: turn_context.cwd.clone(),
            expiration: timeout_ms.into(),
            env: HashMap::new(),
-            with_escalated_permissions: Some(true),
+            sandbox_permissions,
            justification: Some("test".to_string()),
            arg0: None,
        };

        let params2 = ExecParams {
-            with_escalated_permissions: Some(false),
+            sandbox_permissions: SandboxPermissions::UseDefault,
            command: params.command.clone(),
            cwd: params.cwd.clone(),
            expiration: timeout_ms.into(),
@@ -3385,7 +3435,7 @@ mod tests {
                        "command": params.command.clone(),
                        "workdir": Some(turn_context.cwd.to_string_lossy().to_string()),
                        "timeout_ms": params.expiration.timeout_ms(),
-                        "with_escalated_permissions": params.with_escalated_permissions,
+                        "sandbox_permissions": params.sandbox_permissions,
                        "justification": params.justification.clone(),
                    })
                    .to_string(),
@@ -3422,7 +3472,7 @@ mod tests {
                        "command": params2.command.clone(),
                        "workdir": Some(turn_context.cwd.to_string_lossy().to_string()),
                        "timeout_ms": params2.expiration.timeout_ms(),
-                        "with_escalated_permissions": params2.with_escalated_permissions,
+                        "sandbox_permissions": params2.sandbox_permissions,
                        "justification": params2.justification.clone(),
                    })
                    .to_string(),
@@ -3455,6 +3505,7 @@ mod tests {
    #[tokio::test]
    async fn unified_exec_rejects_escalated_permissions_when_policy_not_on_request() {
        use crate::protocol::AskForApproval;
+        use crate::sandboxing::SandboxPermissions;
        use crate::turn_diff_tracker::TurnDiffTracker;

        let (session, mut turn_context_raw) = make_session_and_context();
@@ -3474,7 +3525,7 @@ mod tests {
                payload: ToolPayload::Function {
                    arguments: serde_json::json!({
                        "cmd": "echo hi",
-                        "with_escalated_permissions": true,
+                        "sandbox_permissions": SandboxPermissions::RequireEscalated,
                        "justification": "need unsandboxed execution",
                    })
                    .to_string(),
--- a/codex-rs/core/src/codex_delegate.rs
+++ b/codex-rs/core/src/codex_delegate.rs
@@ -280,7 +280,6 @@ async fn handle_exec_approval(
        event.command,
        event.cwd,
        event.reason,
-        event.risk,
        event.proposed_execpolicy_amendment,
    );
    let decision = await_approval_with_cancel(
--- a/codex-rs/core/src/config/edit.rs
+++ b/codex-rs/core/src/config/edit.rs
@@ -555,6 +555,14 @@ impl ConfigEditsBuilder {
        self
    }

+    pub fn with_edits<I>(mut self, edits: I) -> Self
+    where
+        I: IntoIterator<Item = ConfigEdit>,
+    {
+        self.edits.extend(edits);
+        self
+    }
+
    /// Apply edits on a blocking thread.
    pub fn apply_blocking(self) -> anyhow::Result<()> {
        apply_blocking(&self.codex_home, self.profile.as_deref(), &self.edits)
@@ -603,6 +611,24 @@ model_reasoning_effort = "high"
        assert_eq!(contents, expected);
    }

+    #[test]
+    fn builder_with_edits_applies_custom_paths() {
+        let tmp = tempdir().expect("tmpdir");
+        let codex_home = tmp.path();
+
+        ConfigEditsBuilder::new(codex_home)
+            .with_edits(vec![ConfigEdit::SetPath {
+                segments: vec!["enabled".to_string()],
+                value: value(true),
+            }])
+            .apply_blocking()
+            .expect("persist");
+
+        let contents =
+            std::fs::read_to_string(codex_home.join(CONFIG_TOML_FILE)).expect("read config");
+        assert_eq!(contents, "enabled = true\n");
+    }
+
    #[test]
    fn blocking_set_model_preserves_inline_table_contents() {
        let tmp = tempdir().expect("tmpdir");
--- a/codex-rs/core/src/config/mod.rs
+++ b/codex-rs/core/src/config/mod.rs
@@ -40,6 +40,7 @@ use codex_protocol::config_types::TrustLevel;
 use codex_protocol::config_types::Verbosity;
 use codex_protocol::openai_models::ReasoningEffort;
 use codex_rmcp_client::OAuthCredentialsStoreMode;
+use codex_utils_absolute_path::AbsolutePathBufGuard;
 use dirs::home_dir;
 use dunce::canonicalize;
 use serde::Deserialize;
@@ -58,7 +59,6 @@ pub mod edit;
 pub mod profile;
 pub mod types;

-pub const OPENAI_DEFAULT_MODEL: &str = "gpt-5.1-codex-max";
 const OPENAI_DEFAULT_REVIEW_MODEL: &str = "gpt-5.1-codex-max";

 /// Maximum number of bytes of the documentation that will be embedded. Larger
@@ -72,7 +72,7 @@ pub const CONFIG_TOML_FILE: &str = "config.toml";
 #[derive(Debug, Clone, PartialEq)]
 pub struct Config {
    /// Optional override of model selection.
-    pub model: String,
+    pub model: Option<String>,

    /// Model used specifically for review sessions. Defaults to "gpt-5.1-codex-max".
    pub review_model: String,
@@ -245,9 +245,6 @@ pub struct Config {

    pub tools_web_search_request: bool,

-    /// When `true`, run a model-based assessment for commands denied by the sandbox.
-    pub experimental_sandbox_command_assessment: bool,
-
    /// If set to `true`, used only the experimental unified exec tool.
    pub use_experimental_unified_exec_tool: bool,

@@ -299,9 +296,9 @@ impl Config {
        )
        .await?;

-        let cfg: ConfigToml = root_value.try_into().map_err(|e| {
+        let cfg = deserialize_config_toml_with_base(root_value, &codex_home).map_err(|e| {
            tracing::error!("Failed to deserialize overridden config: {e}");
-            std::io::Error::new(std::io::ErrorKind::InvalidData, e)
+            e
        })?;

        Self::load_from_base_config_with_overrides(cfg, overrides, codex_home)
@@ -319,9 +316,9 @@ pub async fn load_config_as_toml_with_cli_overrides(
    )
    .await?;

-    let cfg: ConfigToml = root_value.try_into().map_err(|e| {
+    let cfg = deserialize_config_toml_with_base(root_value, codex_home).map_err(|e| {
        tracing::error!("Failed to deserialize overridden config: {e}");
-        std::io::Error::new(std::io::ErrorKind::InvalidData, e)
+        e
    })?;

    Ok(cfg)
@@ -357,6 +354,18 @@ fn apply_overlays(
    base
 }

+fn deserialize_config_toml_with_base(
+    root_value: TomlValue,
+    config_base_dir: &Path,
+) -> std::io::Result<ConfigToml> {
+    // This guard ensures that any relative paths that is deserialized into an
+    // [AbsolutePathBuf] is resolved against `config_base_dir`.
+    let _guard = AbsolutePathBufGuard::new(config_base_dir);
+    root_value
+        .try_into()
+        .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))
+}
+
 pub async fn load_global_mcp_servers(
    codex_home: &Path,
 ) -> std::io::Result<BTreeMap<String, McpServerConfig>> {
@@ -720,7 +729,6 @@ pub struct ConfigToml {
    pub experimental_use_unified_exec_tool: Option<bool>,
    pub experimental_use_rmcp_client: Option<bool>,
    pub experimental_use_freeform_apply_patch: Option<bool>,
-    pub experimental_sandbox_command_assessment: Option<bool>,
    /// Preferred OSS provider for local models, e.g. "lmstudio" or "ollama".
    pub oss_provider: Option<String>,
 }
@@ -906,7 +914,6 @@ pub struct ConfigOverrides {
    pub include_apply_patch_tool: Option<bool>,
    pub show_raw_agent_reasoning: Option<bool>,
    pub tools_web_search_request: Option<bool>,
-    pub experimental_sandbox_command_assessment: Option<bool>,
    /// Additional directories that should be treated as writable roots for this session.
    pub additional_writable_roots: Vec<PathBuf>,
 }
@@ -965,7 +972,6 @@ impl Config {
            include_apply_patch_tool: include_apply_patch_tool_override,
            show_raw_agent_reasoning,
            tools_web_search_request: override_tools_web_search_request,
-            experimental_sandbox_command_assessment: sandbox_command_assessment_override,
            additional_writable_roots,
        } = overrides;

@@ -990,7 +996,6 @@ impl Config {
        let feature_overrides = FeatureOverrides {
            include_apply_patch_tool: include_apply_patch_tool_override,
            web_search_request: override_tools_web_search_request,
-            experimental_sandbox_command_assessment: sandbox_command_assessment_override,
        };

        let features = Features::from_config(&cfg, &config_profile, feature_overrides);
@@ -1089,8 +1094,6 @@ impl Config {
        let tools_web_search_request = features.enabled(Feature::WebSearchRequest);
        let use_experimental_unified_exec_tool = features.enabled(Feature::UnifiedExec);
        let use_experimental_use_rmcp_client = features.enabled(Feature::RmcpClient);
-        let experimental_sandbox_command_assessment =
-            features.enabled(Feature::SandboxCommandAssessment);

        let forced_chatgpt_workspace_id =
            cfg.forced_chatgpt_workspace_id.as_ref().and_then(|value| {
@@ -1104,11 +1107,7 @@ impl Config {

        let forced_login_method = cfg.forced_login_method;

-        // todo(aibrahim): make model optional
-        let model = model
-            .or(config_profile.model)
-            .or(cfg.model)
-            .unwrap_or_else(default_model);
+        let model = model.or(config_profile.model).or(cfg.model);

        let compact_prompt = compact_prompt.or(cfg.compact_prompt).and_then(|value| {
            let trimmed = value.trim();
@@ -1221,7 +1220,6 @@ impl Config {
            forced_login_method,
            include_apply_patch_tool: include_apply_patch_tool_flag,
            tools_web_search_request,
-            experimental_sandbox_command_assessment,
            use_experimental_unified_exec_tool,
            use_experimental_use_rmcp_client,
            features,
@@ -1310,10 +1308,6 @@ impl Config {
    }
 }

-fn default_model() -> String {
-    OPENAI_DEFAULT_MODEL.to_string()
-}
-
 fn default_review_model() -> String {
    OPENAI_DEFAULT_REVIEW_MODEL.to_string()
 }
@@ -1852,10 +1846,11 @@ trust_level = "trusted"
        };

        let root_value = load_resolved_config(codex_home.path(), Vec::new(), overrides).await?;
-        let cfg: ConfigToml = root_value.try_into().map_err(|e| {
-            tracing::error!("Failed to deserialize overridden config: {e}");
-            std::io::Error::new(std::io::ErrorKind::InvalidData, e)
-        })?;
+        let cfg =
+            deserialize_config_toml_with_base(root_value, codex_home.path()).map_err(|e| {
+                tracing::error!("Failed to deserialize overridden config: {e}");
+                e
+            })?;
        assert_eq!(
            cfg.mcp_oauth_credentials_store,
            Some(OAuthCredentialsStoreMode::Keyring),
@@ -1972,10 +1967,11 @@ trust_level = "trusted"
        )
        .await?;

-        let cfg: ConfigToml = root_value.try_into().map_err(|e| {
-            tracing::error!("Failed to deserialize overridden config: {e}");
-            std::io::Error::new(std::io::ErrorKind::InvalidData, e)
-        })?;
+        let cfg =
+            deserialize_config_toml_with_base(root_value, codex_home.path()).map_err(|e| {
+                tracing::error!("Failed to deserialize overridden config: {e}");
+                e
+            })?;

        assert_eq!(cfg.model.as_deref(), Some("managed_config"));
        Ok(())
@@ -2935,7 +2931,7 @@ model_verbosity = "high"
        )?;
        assert_eq!(
            Config {
-                model: "o3".to_string(),
+                model: Some("o3".to_string()),
                review_model: OPENAI_DEFAULT_REVIEW_MODEL.to_string(),
                model_context_window: None,
                model_auto_compact_token_limit: None,
@@ -2975,7 +2971,6 @@ model_verbosity = "high"
                forced_login_method: None,
                include_apply_patch_tool: false,
                tools_web_search_request: false,
-                experimental_sandbox_command_assessment: false,
                use_experimental_unified_exec_tool: false,
                use_experimental_use_rmcp_client: false,
                features: Features::with_defaults(),
@@ -3010,7 +3005,7 @@ model_verbosity = "high"
            fixture.codex_home(),
        )?;
        let expected_gpt3_profile_config = Config {
-            model: "gpt-3.5-turbo".to_string(),
+            model: Some("gpt-3.5-turbo".to_string()),
            review_model: OPENAI_DEFAULT_REVIEW_MODEL.to_string(),
            model_context_window: None,
            model_auto_compact_token_limit: None,
@@ -3050,7 +3045,6 @@ model_verbosity = "high"
            forced_login_method: None,
            include_apply_patch_tool: false,
            tools_web_search_request: false,
-            experimental_sandbox_command_assessment: false,
            use_experimental_unified_exec_tool: false,
            use_experimental_use_rmcp_client: false,
            features: Features::with_defaults(),
@@ -3100,7 +3094,7 @@ model_verbosity = "high"
            fixture.codex_home(),
        )?;
        let expected_zdr_profile_config = Config {
-            model: "o3".to_string(),
+            model: Some("o3".to_string()),
            review_model: OPENAI_DEFAULT_REVIEW_MODEL.to_string(),
            model_context_window: None,
            model_auto_compact_token_limit: None,
@@ -3140,7 +3134,6 @@ model_verbosity = "high"
            forced_login_method: None,
            include_apply_patch_tool: false,
            tools_web_search_request: false,
-            experimental_sandbox_command_assessment: false,
            use_experimental_unified_exec_tool: false,
            use_experimental_use_rmcp_client: false,
            features: Features::with_defaults(),
@@ -3176,7 +3169,7 @@ model_verbosity = "high"
            fixture.codex_home(),
        )?;
        let expected_gpt5_profile_config = Config {
-            model: "gpt-5.1".to_string(),
+            model: Some("gpt-5.1".to_string()),
            review_model: OPENAI_DEFAULT_REVIEW_MODEL.to_string(),
            model_context_window: None,
            model_auto_compact_token_limit: None,
@@ -3216,7 +3209,6 @@ model_verbosity = "high"
            forced_login_method: None,
            include_apply_patch_tool: false,
            tools_web_search_request: false,
-            experimental_sandbox_command_assessment: false,
            use_experimental_unified_exec_tool: false,
            use_experimental_use_rmcp_client: false,
            features: Features::with_defaults(),
--- a/codex-rs/core/src/config/profile.rs
+++ b/codex-rs/core/src/config/profile.rs
@@ -27,7 +27,6 @@ pub struct ConfigProfile {
    pub experimental_use_unified_exec_tool: Option<bool>,
    pub experimental_use_rmcp_client: Option<bool>,
    pub experimental_use_freeform_apply_patch: Option<bool>,
-    pub experimental_sandbox_command_assessment: Option<bool>,
    pub tools_web_search: Option<bool>,
    pub tools_view_image: Option<bool>,
    /// Optional feature toggles scoped to this profile.
--- a/codex-rs/core/src/config/types.rs
+++ b/codex-rs/core/src/config/types.rs
@@ -3,13 +3,14 @@
 // Note this file should generally be restricted to simple struct/enum
 // definitions that do not contain business logic.

-use serde::Deserializer;
+use codex_utils_absolute_path::AbsolutePathBuf;
 use std::collections::HashMap;
 use std::path::PathBuf;
 use std::time::Duration;
 use wildmatch::WildMatchPattern;

 use serde::Deserialize;
+use serde::Deserializer;
 use serde::Serialize;
 use serde::de::Error as SerdeError;

@@ -285,9 +286,9 @@ pub enum OtelHttpProtocol {
 #[derive(Deserialize, Debug, Clone, PartialEq, Default)]
 #[serde(rename_all = "kebab-case")]
 pub struct OtelTlsConfig {
-    pub ca_certificate: Option<PathBuf>,
-    pub client_certificate: Option<PathBuf>,
-    pub client_private_key: Option<PathBuf>,
+    pub ca_certificate: Option<AbsolutePathBuf>,
+    pub client_certificate: Option<AbsolutePathBuf>,
+    pub client_private_key: Option<AbsolutePathBuf>,
 }

 /// Which OTEL exporter to use.
--- a/codex-rs/core/src/conversation_manager.rs
+++ b/codex-rs/core/src/conversation_manager.rs
@@ -1,5 +1,7 @@
 use crate::AuthManager;
 use crate::CodexAuth;
+#[cfg(any(test, feature = "test-support"))]
+use crate::ModelProviderInfo;
 use crate::codex::Codex;
 use crate::codex::CodexSpawnOk;
 use crate::codex::INITIAL_SUBMIT_ID;
@@ -54,11 +56,14 @@ impl ConversationManager {
    #[cfg(any(test, feature = "test-support"))]
    /// Construct with a dummy AuthManager containing the provided CodexAuth.
    /// Used for integration tests: should not be used by ordinary business logic.
-    pub fn with_auth(auth: CodexAuth) -> Self {
-        Self::new(
-            crate::AuthManager::from_auth_for_testing(auth),
-            SessionSource::Exec,
-        )
+    pub fn with_models_provider(auth: CodexAuth, provider: ModelProviderInfo) -> Self {
+        let auth_manager = crate::AuthManager::from_auth_for_testing(auth);
+        Self {
+            conversations: Arc::new(RwLock::new(HashMap::new())),
+            auth_manager: auth_manager.clone(),
+            session_source: SessionSource::Exec,
+            models_manager: Arc::new(ModelsManager::with_provider(auth_manager, provider)),
+        }
    }

    pub fn session_source(&self) -> SessionSource {
@@ -213,8 +218,8 @@ impl ConversationManager {
        self.finalize_spawn(codex, conversation_id).await
    }

-    pub async fn list_models(&self) -> Vec<ModelPreset> {
-        self.models_manager.list_models().await
+    pub async fn list_models(&self, config: &Config) -> Vec<ModelPreset> {
+        self.models_manager.list_models(config).await
    }

    pub fn get_models_manager(&self) -> Arc<ModelsManager> {
--- a/codex-rs/core/src/default_client.rs
+++ b/codex-rs/core/src/default_client.rs
@@ -1,17 +1,12 @@
 use crate::spawn::CODEX_SANDBOX_ENV_VAR;
-use http::Error as HttpError;
-use reqwest::IntoUrl;
-use reqwest::Method;
-use reqwest::Response;
-use reqwest::header::HeaderName;
 use reqwest::header::HeaderValue;
-use serde::Serialize;
-use std::collections::HashMap;
-use std::fmt::Display;
 use std::sync::LazyLock;
 use std::sync::Mutex;
 use std::sync::OnceLock;

+use codex_client::CodexHttpClient;
+pub use codex_client::CodexRequestBuilder;
+
 /// Set this to add a suffix to the User-Agent string.
 ///
 /// It is not ideal that we're using a global singleton for this.
@@ -31,129 +26,6 @@ pub static USER_AGENT_SUFFIX: LazyLock<Mutex<Option<String>>> = LazyLock::new(||
 pub const DEFAULT_ORIGINATOR: &str = "codex_cli_rs";
 pub const CODEX_INTERNAL_ORIGINATOR_OVERRIDE_ENV_VAR: &str = "CODEX_INTERNAL_ORIGINATOR_OVERRIDE";

-#[derive(Clone, Debug)]
-pub struct CodexHttpClient {
-    inner: reqwest::Client,
-}
-
-impl CodexHttpClient {
-    fn new(inner: reqwest::Client) -> Self {
-        Self { inner }
-    }
-
-    pub fn get<U>(&self, url: U) -> CodexRequestBuilder
-    where
-        U: IntoUrl,
-    {
-        self.request(Method::GET, url)
-    }
-
-    pub fn post<U>(&self, url: U) -> CodexRequestBuilder
-    where
-        U: IntoUrl,
-    {
-        self.request(Method::POST, url)
-    }
-
-    pub fn request<U>(&self, method: Method, url: U) -> CodexRequestBuilder
-    where
-        U: IntoUrl,
-    {
-        let url_str = url.as_str().to_string();
-        CodexRequestBuilder::new(self.inner.request(method.clone(), url), method, url_str)
-    }
-}
-
-#[must_use = "requests are not sent unless `send` is awaited"]
-#[derive(Debug)]
-pub struct CodexRequestBuilder {
-    builder: reqwest::RequestBuilder,
-    method: Method,
-    url: String,
-}
-
-impl CodexRequestBuilder {
-    fn new(builder: reqwest::RequestBuilder, method: Method, url: String) -> Self {
-        Self {
-            builder,
-            method,
-            url,
-        }
-    }
-
-    fn map(self, f: impl FnOnce(reqwest::RequestBuilder) -> reqwest::RequestBuilder) -> Self {
-        Self {
-            builder: f(self.builder),
-            method: self.method,
-            url: self.url,
-        }
-    }
-
-    pub fn header<K, V>(self, key: K, value: V) -> Self
-    where
-        HeaderName: TryFrom<K>,
-        <HeaderName as TryFrom<K>>::Error: Into<HttpError>,
-        HeaderValue: TryFrom<V>,
-        <HeaderValue as TryFrom<V>>::Error: Into<HttpError>,
-    {
-        self.map(|builder| builder.header(key, value))
-    }
-
-    pub fn bearer_auth<T>(self, token: T) -> Self
-    where
-        T: Display,
-    {
-        self.map(|builder| builder.bearer_auth(token))
-    }
-
-    pub fn json<T>(self, value: &T) -> Self
-    where
-        T: ?Sized + Serialize,
-    {
-        self.map(|builder| builder.json(value))
-    }
-
-    pub async fn send(self) -> Result<Response, reqwest::Error> {
-        match self.builder.send().await {
-            Ok(response) => {
-                let request_ids = Self::extract_request_ids(&response);
-                tracing::debug!(
-                    method = %self.method,
-                    url = %self.url,
-                    status = %response.status(),
-                    request_ids = ?request_ids,
-                    version = ?response.version(),
-                    "Request completed"
-                );
-
-                Ok(response)
-            }
-            Err(error) => {
-                let status = error.status();
-                tracing::debug!(
-                    method = %self.method,
-                    url = %self.url,
-                    status = status.map(|s| s.as_u16()),
-                    error = %error,
-                    "Request failed"
-                );
-                Err(error)
-            }
-        }
-    }
-
-    fn extract_request_ids(response: &Response) -> HashMap<String, String> {
-        ["cf-ray", "x-request-id", "x-oai-request-id"]
-            .iter()
-            .filter_map(|&name| {
-                let header_name = HeaderName::from_static(name);
-                let value = response.headers().get(header_name)?;
-                let value = value.to_str().ok()?.to_owned();
-                Some((name.to_owned(), value))
-            })
-            .collect()
-    }
-}
 #[derive(Debug, Clone)]
 pub struct Originator {
    pub value: String,
--- a/codex-rs/core/src/environment_context.rs
+++ b/codex-rs/core/src/environment_context.rs
@@ -6,7 +6,6 @@ use crate::codex::TurnContext;
 use crate::protocol::AskForApproval;
 use crate::protocol::SandboxPolicy;
 use crate::shell::Shell;
-use crate::shell::default_user_shell;
 use codex_protocol::config_types::SandboxMode;
 use codex_protocol::models::ContentItem;
 use codex_protocol::models::ResponseItem;
@@ -95,7 +94,7 @@ impl EnvironmentContext {
            && self.writable_roots == *writable_roots
    }

-    pub fn diff(before: &TurnContext, after: &TurnContext) -> Self {
+    pub fn diff(before: &TurnContext, after: &TurnContext, shell: &Shell) -> Self {
        let cwd = if before.cwd != after.cwd {
            Some(after.cwd.clone())
        } else {
@@ -111,18 +110,15 @@ impl EnvironmentContext {
        } else {
            None
        };
-        EnvironmentContext::new(cwd, approval_policy, sandbox_policy, default_user_shell())
+        EnvironmentContext::new(cwd, approval_policy, sandbox_policy, shell.clone())
    }
-}

-impl From<&TurnContext> for EnvironmentContext {
-    fn from(turn_context: &TurnContext) -> Self {
+    pub fn from_turn_context(turn_context: &TurnContext, shell: &Shell) -> Self {
        Self::new(
            Some(turn_context.cwd.clone()),
            Some(turn_context.approval_policy),
            Some(turn_context.sandbox_policy.clone()),
-            // Shell is not configurable from turn to turn
-            default_user_shell(),
+            shell.clone(),
        )
    }
 }
@@ -201,6 +197,7 @@ mod tests {
        Shell {
            shell_type: ShellType::Bash,
            shell_path: PathBuf::from("/bin/bash"),
+            shell_snapshot: None,
        }
    }

@@ -338,6 +335,7 @@ mod tests {
            Shell {
                shell_type: ShellType::Bash,
                shell_path: "/bin/bash".into(),
+                shell_snapshot: None,
            },
        );
        let context2 = EnvironmentContext::new(
@@ -347,6 +345,7 @@ mod tests {
            Shell {
                shell_type: ShellType::Zsh,
                shell_path: "/bin/zsh".into(),
+                shell_snapshot: None,
            },
        );

--- a/codex-rs/core/src/event_mapping.rs
+++ b/codex-rs/core/src/event_mapping.rs
@@ -13,6 +13,7 @@ use codex_protocol::user_input::UserInput;
 use tracing::warn;
 use uuid::Uuid;

+use crate::user_instructions::SkillInstructions;
 use crate::user_instructions::UserInstructions;
 use crate::user_shell_command::is_user_shell_command_text;

@@ -23,7 +24,9 @@ fn is_session_prefix(text: &str) -> bool {
 }

 fn parse_user_message(message: &[ContentItem]) -> Option<UserMessageItem> {
-    if UserInstructions::is_user_instructions(message) {
+    if UserInstructions::is_user_instructions(message)
+        || SkillInstructions::is_skill_instructions(message)
+    {
        return None;
    }

@@ -198,14 +201,22 @@ mod tests {
                    text: "# AGENTS.md instructions for test_directory\n\n<INSTRUCTIONS>\ntest_text\n</INSTRUCTIONS>".to_string(),
                }],
            },
-        ResponseItem::Message {
-            id: None,
-            role: "user".to_string(),
-            content: vec![ContentItem::InputText {
-                text: "<user_shell_command>echo 42</user_shell_command>".to_string(),
-            }],
-        },
-    ];
+            ResponseItem::Message {
+                id: None,
+                role: "user".to_string(),
+                content: vec![ContentItem::InputText {
+                    text: "<skill>\n<name>demo</name>\n<path>skills/demo/SKILL.md</path>\nbody\n</skill>"
+                        .to_string(),
+                }],
+            },
+            ResponseItem::Message {
+                id: None,
+                role: "user".to_string(),
+                content: vec![ContentItem::InputText {
+                    text: "<user_shell_command>echo 42</user_shell_command>".to_string(),
+                }],
+            },
+        ];

        for item in items {
            let turn_item = parse_turn_item(&item);
--- a/codex-rs/core/src/exec.rs
+++ b/codex-rs/core/src/exec.rs
@@ -28,6 +28,7 @@ use crate::protocol::SandboxPolicy;
 use crate::sandboxing::CommandSpec;
 use crate::sandboxing::ExecEnv;
 use crate::sandboxing::SandboxManager;
+use crate::sandboxing::SandboxPermissions;
 use crate::spawn::StdioPolicy;
 use crate::spawn::spawn_child_async;
 use crate::text_encoding::bytes_to_string_smart;
@@ -55,7 +56,7 @@ pub struct ExecParams {
    pub cwd: PathBuf,
    pub expiration: ExecExpiration,
    pub env: HashMap<String, String>,
-    pub with_escalated_permissions: Option<bool>,
+    pub sandbox_permissions: SandboxPermissions,
    pub justification: Option<String>,
    pub arg0: Option<String>,
 }
@@ -144,7 +145,7 @@ pub async fn process_exec_tool_call(
        cwd,
        expiration,
        env,
-        with_escalated_permissions,
+        sandbox_permissions,
        justification,
        arg0: _,
    } = params;
@@ -162,7 +163,7 @@ pub async fn process_exec_tool_call(
        cwd,
        env,
        expiration,
-        with_escalated_permissions,
+        sandbox_permissions,
        justification,
    };

@@ -192,7 +193,7 @@ pub(crate) async fn execute_exec_env(
        env,
        expiration,
        sandbox,
-        with_escalated_permissions,
+        sandbox_permissions,
        justification,
        arg0,
    } = env;
@@ -202,7 +203,7 @@ pub(crate) async fn execute_exec_env(
        cwd,
        expiration,
        env,
-        with_escalated_permissions,
+        sandbox_permissions,
        justification,
        arg0,
    };
@@ -851,15 +852,13 @@ mod tests {
            "-c".to_string(),
            "sleep 60 & echo $!; sleep 60".to_string(),
        ];
-        let env: HashMap<String, String> = std::env::vars_os()
-            .filter_map(|(key, value)| Some((key.into_string().ok()?, value.into_string().ok()?)))
-            .collect();
+        let env: HashMap<String, String> = std::env::vars().collect();
        let params = ExecParams {
            command,
            cwd: std::env::current_dir()?,
            expiration: 500.into(),
            env,
-            with_escalated_permissions: None,
+            sandbox_permissions: SandboxPermissions::UseDefault,
            justification: None,
            arg0: None,
        };
@@ -896,9 +895,7 @@ mod tests {
    async fn process_exec_tool_call_respects_cancellation_token() -> Result<()> {
        let command = long_running_command();
        let cwd = std::env::current_dir()?;
-        let env: HashMap<String, String> = std::env::vars_os()
-            .filter_map(|(key, value)| Some((key.into_string().ok()?, value.into_string().ok()?)))
-            .collect();
+        let env: HashMap<String, String> = std::env::vars().collect();
        let cancel_token = CancellationToken::new();
        let cancel_tx = cancel_token.clone();
        let params = ExecParams {
@@ -906,7 +903,7 @@ mod tests {
            cwd: cwd.clone(),
            expiration: ExecExpiration::Cancellation(cancel_token),
            env,
-            with_escalated_permissions: None,
+            sandbox_permissions: SandboxPermissions::UseDefault,
            justification: None,
            arg0: None,
        };
--- a/codex-rs/core/src/exec_env.rs
+++ b/codex-rs/core/src/exec_env.rs
@@ -12,11 +12,7 @@ use std::collections::HashSet;
 /// The derivation follows the algorithm documented in the struct-level comment
 /// for [`ShellEnvironmentPolicy`].
 pub fn create_env(policy: &ShellEnvironmentPolicy) -> HashMap<String, String> {
-    populate_env(
-        std::env::vars_os()
-            .filter_map(|(key, value)| Some((key.into_string().ok()?, value.into_string().ok()?))),
-        policy,
-    )
+    populate_env(std::env::vars(), policy)
 }

 fn populate_env<I>(vars: I, policy: &ShellEnvironmentPolicy) -> HashMap<String, String>
--- a/codex-rs/core/src/features.rs
+++ b/codex-rs/core/src/features.rs
@@ -48,8 +48,6 @@ pub enum Feature {
    WebSearchRequest,
    /// Gate the execpolicy enforcement for shell/unified exec.
    ExecPolicy,
-    /// Enable the model-based risk assessments for sandboxed commands.
-    SandboxCommandAssessment,
    /// Enable Windows sandbox (restricted token) on Windows.
    WindowsSandbox,
    /// Remote compaction enabled (only for ChatGPT auth)
@@ -60,6 +58,10 @@ pub enum Feature {
    ParallelToolCalls,
    /// Experimental skills injection (CLI flag-driven).
    Skills,
+    /// Experimental shell snapshotting.
+    ShellSnapshot,
+    /// Experimental TUI v2 (viewport) implementation.
+    Tui2,
 }

 impl Feature {
@@ -100,7 +102,6 @@ pub struct Features {
 pub struct FeatureOverrides {
    pub include_apply_patch_tool: Option<bool>,
    pub web_search_request: Option<bool>,
-    pub experimental_sandbox_command_assessment: Option<bool>,
 }

 impl FeatureOverrides {
@@ -192,7 +193,6 @@ impl Features {
        let mut features = Features::with_defaults();

        let base_legacy = LegacyFeatureToggles {
-            experimental_sandbox_command_assessment: cfg.experimental_sandbox_command_assessment,
            experimental_use_freeform_apply_patch: cfg.experimental_use_freeform_apply_patch,
            experimental_use_unified_exec_tool: cfg.experimental_use_unified_exec_tool,
            experimental_use_rmcp_client: cfg.experimental_use_rmcp_client,
@@ -208,8 +208,6 @@ impl Features {

        let profile_legacy = LegacyFeatureToggles {
            include_apply_patch_tool: config_profile.include_apply_patch_tool,
-            experimental_sandbox_command_assessment: config_profile
-                .experimental_sandbox_command_assessment,
            experimental_use_freeform_apply_patch: config_profile
                .experimental_use_freeform_apply_patch,

@@ -268,6 +266,12 @@ pub const FEATURES: &[FeatureSpec] = &[
        stage: Stage::Stable,
        default_enabled: true,
    },
+    FeatureSpec {
+        id: Feature::ParallelToolCalls,
+        key: "parallel",
+        stage: Stage::Stable,
+        default_enabled: true,
+    },
    FeatureSpec {
        id: Feature::ViewImageTool,
        key: "view_image_tool",
@@ -317,12 +321,6 @@ pub const FEATURES: &[FeatureSpec] = &[
        stage: Stage::Experimental,
        default_enabled: true,
    },
-    FeatureSpec {
-        id: Feature::SandboxCommandAssessment,
-        key: "experimental_sandbox_command_assessment",
-        stage: Stage::Experimental,
-        default_enabled: false,
-    },
    FeatureSpec {
        id: Feature::WindowsSandbox,
        key: "enable_experimental_windows_sandbox",
@@ -341,16 +339,22 @@ pub const FEATURES: &[FeatureSpec] = &[
        stage: Stage::Experimental,
        default_enabled: false,
    },
-    FeatureSpec {
-        id: Feature::ParallelToolCalls,
-        key: "parallel",
-        stage: Stage::Experimental,
-        default_enabled: false,
-    },
    FeatureSpec {
        id: Feature::Skills,
        key: "skills",
        stage: Stage::Experimental,
        default_enabled: false,
    },
+    FeatureSpec {
+        id: Feature::ShellSnapshot,
+        key: "shell_snapshot",
+        stage: Stage::Experimental,
+        default_enabled: false,
+    },
+    FeatureSpec {
+        id: Feature::Tui2,
+        key: "tui2",
+        stage: Stage::Experimental,
+        default_enabled: false,
+    },
 ];
--- a/codex-rs/core/src/features/legacy.rs
+++ b/codex-rs/core/src/features/legacy.rs
@@ -9,10 +9,6 @@ struct Alias {
 }

 const ALIASES: &[Alias] = &[
-    Alias {
-        legacy_key: "experimental_sandbox_command_assessment",
-        feature: Feature::SandboxCommandAssessment,
-    },
    Alias {
        legacy_key: "experimental_use_unified_exec_tool",
        feature: Feature::UnifiedExec,
@@ -48,7 +44,6 @@ pub(crate) fn feature_for_key(key: &str) -> Option<Feature> {
 #[derive(Debug, Default)]
 pub struct LegacyFeatureToggles {
    pub include_apply_patch_tool: Option<bool>,
-    pub experimental_sandbox_command_assessment: Option<bool>,
    pub experimental_use_freeform_apply_patch: Option<bool>,
    pub experimental_use_unified_exec_tool: Option<bool>,
    pub experimental_use_rmcp_client: Option<bool>,
@@ -64,12 +59,6 @@ impl LegacyFeatureToggles {
            self.include_apply_patch_tool,
            "include_apply_patch_tool",
        );
-        set_if_some(
-            features,
-            Feature::SandboxCommandAssessment,
-            self.experimental_sandbox_command_assessment,
-            "experimental_sandbox_command_assessment",
-        );
        set_if_some(
            features,
            Feature::ApplyPatchFreeform,
--- a/codex-rs/core/src/lib.rs
+++ b/codex-rs/core/src/lib.rs
@@ -72,6 +72,7 @@ mod rollout;
 pub(crate) mod safety;
 pub mod seatbelt;
 pub mod shell;
+pub mod shell_snapshot;
 pub mod skills;
 pub mod spawn;
 pub mod terminal;
--- a/codex-rs/core/src/model_provider_info.rs
+++ b/codex-rs/core/src/model_provider_info.rs
@@ -99,7 +99,6 @@ pub struct ModelProviderInfo {
 }

 impl ModelProviderInfo {
-    #[allow(dead_code)]
    fn build_header_map(&self) -> crate::error::Result<HeaderMap> {
        let mut headers = HeaderMap::new();
        if let Some(extra) = &self.http_headers {
@@ -208,6 +207,45 @@ impl ModelProviderInfo {
            .map(Duration::from_millis)
            .unwrap_or(Duration::from_millis(DEFAULT_STREAM_IDLE_TIMEOUT_MS))
    }
+    pub fn create_openai_provider() -> ModelProviderInfo {
+        ModelProviderInfo {
+            name: "OpenAI".into(),
+            // Allow users to override the default OpenAI endpoint by
+            // exporting `OPENAI_BASE_URL`. This is useful when pointing
+            // Codex at a proxy, mock server, or Azure-style deployment
+            // without requiring a full TOML override for the built-in
+            // OpenAI provider.
+            base_url: std::env::var("OPENAI_BASE_URL")
+                .ok()
+                .filter(|v| !v.trim().is_empty()),
+            env_key: None,
+            env_key_instructions: None,
+            experimental_bearer_token: None,
+            wire_api: WireApi::Responses,
+            query_params: None,
+            http_headers: Some(
+                [("version".to_string(), env!("CARGO_PKG_VERSION").to_string())]
+                    .into_iter()
+                    .collect(),
+            ),
+            env_http_headers: Some(
+                [
+                    (
+                        "OpenAI-Organization".to_string(),
+                        "OPENAI_ORGANIZATION".to_string(),
+                    ),
+                    ("OpenAI-Project".to_string(), "OPENAI_PROJECT".to_string()),
+                ]
+                .into_iter()
+                .collect(),
+            ),
+            // Use global defaults for retry/timeout unless overridden in config.toml.
+            request_max_retries: None,
+            stream_max_retries: None,
+            stream_idle_timeout_ms: None,
+            requires_openai_auth: true,
+        }
+    }
 }

 pub const DEFAULT_LMSTUDIO_PORT: u16 = 1234;
@@ -225,46 +263,7 @@ pub fn built_in_model_providers() -> HashMap<String, ModelProviderInfo> {
    // open source ("oss") providers by default. Users are encouraged to add to
    // `model_providers` in config.toml to add their own providers.
    [
-        (
-            "openai",
-            P {
-                name: "OpenAI".into(),
-                // Allow users to override the default OpenAI endpoint by
-                // exporting `OPENAI_BASE_URL`. This is useful when pointing
-                // Codex at a proxy, mock server, or Azure-style deployment
-                // without requiring a full TOML override for the built-in
-                // OpenAI provider.
-                base_url: std::env::var("OPENAI_BASE_URL")
-                    .ok()
-                    .filter(|v| !v.trim().is_empty()),
-                env_key: None,
-                env_key_instructions: None,
-                experimental_bearer_token: None,
-                wire_api: WireApi::Responses,
-                query_params: None,
-                http_headers: Some(
-                    [("version".to_string(), env!("CARGO_PKG_VERSION").to_string())]
-                        .into_iter()
-                        .collect(),
-                ),
-                env_http_headers: Some(
-                    [
-                        (
-                            "OpenAI-Organization".to_string(),
-                            "OPENAI_ORGANIZATION".to_string(),
-                        ),
-                        ("OpenAI-Project".to_string(), "OPENAI_PROJECT".to_string()),
-                    ]
-                    .into_iter()
-                    .collect(),
-                ),
-                // Use global defaults for retry/timeout unless overridden in config.toml.
-                request_max_retries: None,
-                stream_max_retries: None,
-                stream_idle_timeout_ms: None,
-                requires_openai_auth: true,
-            },
-        ),
+        ("openai", P::create_openai_provider()),
        (
            OLLAMA_OSS_PROVIDER_ID,
            create_oss_provider(DEFAULT_OLLAMA_PORT, WireApi::Chat),
--- a/codex-rs/core/src/openai_models/model_family.rs
+++ b/codex-rs/core/src/openai_models/model_family.rs
@@ -116,6 +116,10 @@ impl ModelFamily {
    const fn default_auto_compact_limit(context_window: i64) -> i64 {
        (context_window * 9) / 10
    }
+
+    pub fn get_model_slug(&self) -> &str {
+        &self.slug
+    }
 }

 macro_rules! model_family {
@@ -220,22 +224,18 @@ pub fn find_family_for_model(slug: &str) -> ModelFamily {
            truncation_policy: TruncationPolicy::Tokens(10_000),
        )

-    // Internal models.
-    } else if slug.starts_with("codex-exp-") {
+    // Experimental models.
+    } else if slug.starts_with("exp-codex") {
+        // Same as gpt-5.1-codex-max.
        model_family!(
            slug, slug,
            supports_reasoning_summaries: true,
            reasoning_summary_format: ReasoningSummaryFormat::Experimental,
-            base_instructions: GPT_5_CODEX_INSTRUCTIONS.to_string(),
+            base_instructions: GPT_5_1_CODEX_MAX_INSTRUCTIONS.to_string(),
            apply_patch_tool_type: Some(ApplyPatchToolType::Freeform),
-            experimental_supported_tools: vec![
-                "grep_files".to_string(),
-                "list_dir".to_string(),
-                "read_file".to_string(),
-            ],
            shell_type: ConfigShellToolType::ShellCommand,
            supports_parallel_tool_calls: true,
-            support_verbosity: true,
+            support_verbosity: false,
            truncation_policy: TruncationPolicy::Tokens(10_000),
            context_window: Some(CONTEXT_WINDOW_272K),
        )
@@ -263,7 +263,7 @@ pub fn find_family_for_model(slug: &str) -> ModelFamily {
            base_instructions: GPT_5_1_CODEX_MAX_INSTRUCTIONS.to_string(),
            apply_patch_tool_type: Some(ApplyPatchToolType::Freeform),
            shell_type: ConfigShellToolType::ShellCommand,
-            supports_parallel_tool_calls: true,
+            supports_parallel_tool_calls: false,
            support_verbosity: false,
            truncation_policy: TruncationPolicy::Tokens(10_000),
            context_window: Some(CONTEXT_WINDOW_272K),
@@ -279,11 +279,25 @@ pub fn find_family_for_model(slug: &str) -> ModelFamily {
            base_instructions: GPT_5_CODEX_INSTRUCTIONS.to_string(),
            apply_patch_tool_type: Some(ApplyPatchToolType::Freeform),
            shell_type: ConfigShellToolType::ShellCommand,
-            supports_parallel_tool_calls: true,
+            supports_parallel_tool_calls: false,
            support_verbosity: false,
            truncation_policy: TruncationPolicy::Tokens(10_000),
            context_window: Some(CONTEXT_WINDOW_272K),
        )
+    } else if slug.starts_with("robin") {
+        model_family!(
+            slug, slug,
+            supports_reasoning_summaries: true,
+            apply_patch_tool_type: Some(ApplyPatchToolType::Freeform),
+            support_verbosity: true,
+            default_verbosity: Some(Verbosity::Low),
+            base_instructions: GPT_5_1_INSTRUCTIONS.to_string(),
+            default_reasoning_effort: Some(ReasoningEffort::Medium),
+            truncation_policy: TruncationPolicy::Bytes(10_000),
+            shell_type: ConfigShellToolType::ShellCommand,
+            supports_parallel_tool_calls: true,
+            context_window: Some(CONTEXT_WINDOW_272K),
+        )
    } else if slug.starts_with("gpt-5.1") {
        model_family!(
            slug, "gpt-5.1",
--- a/codex-rs/core/src/openai_models/model_presets.rs
+++ b/codex-rs/core/src/openai_models/model_presets.rs
@@ -93,6 +93,34 @@ static PRESETS: Lazy<Vec<ModelPreset>> = Lazy::new(|| {
            }),
            show_in_picker: true,
        },
+        ModelPreset {
+            id: "robin".to_string(),
+            model: "robin".to_string(),
+            display_name: "robin".to_string(),
+            description: "Robin".to_string(),
+            default_reasoning_effort: ReasoningEffort::Medium,
+            supported_reasoning_efforts: vec![
+                ReasoningEffortPreset {
+                    effort: ReasoningEffort::Low,
+                    description: "Balances speed with some reasoning; useful for straightforward queries and short explanations".to_string(),
+                },
+                ReasoningEffortPreset {
+                    effort: ReasoningEffort::Medium,
+                    description: "Provides a solid balance of reasoning depth and latency for general-purpose tasks".to_string(),
+                },
+                ReasoningEffortPreset {
+                    effort: ReasoningEffort::High,
+                    description: "Maximizes reasoning depth for complex or ambiguous problems".to_string(),
+                },
+                ReasoningEffortPreset {
+                    effort: ReasoningEffort::XHigh,
+                    description: "Extra high reasoning for complex problems".to_string(),
+                },
+            ],
+            is_default: false,
+            upgrade: None,
+            show_in_picker: true,
+        },
        ModelPreset {
            id: "gpt-5.1".to_string(),
            model: "gpt-5.1".to_string(),
--- a/codex-rs/core/src/openai_models/models_manager.rs
+++ b/codex-rs/core/src/openai_models/models_manager.rs
@@ -1,6 +1,7 @@
 use chrono::Utc;
 use codex_api::ModelsClient;
 use codex_api::ReqwestTransport;
+use codex_app_server_protocol::AuthMode;
 use codex_protocol::openai_models::ModelInfo;
 use codex_protocol::openai_models::ModelPreset;
 use codex_protocol::openai_models::ModelsResponse;
@@ -20,6 +21,7 @@ use crate::auth::AuthManager;
 use crate::config::Config;
 use crate::default_client::build_reqwest_client;
 use crate::error::Result as CoreResult;
+use crate::features::Feature;
 use crate::model_provider_info::ModelProviderInfo;
 use crate::openai_models::model_family::ModelFamily;
 use crate::openai_models::model_family::find_family_for_model;
@@ -27,6 +29,8 @@ use crate::openai_models::model_presets::builtin_model_presets;

 const MODEL_CACHE_FILE: &str = "models_cache.json";
 const DEFAULT_MODEL_CACHE_TTL: Duration = Duration::from_secs(300);
+const OPENAI_DEFAULT_MODEL: &str = "gpt-5.1-codex-max";
+const CODEX_AUTO_BALANCED_MODEL: &str = "codex-auto-balanced";

 /// Coordinates remote model discovery plus cached metadata on disk.
 #[derive(Debug)]
@@ -38,6 +42,7 @@ pub struct ModelsManager {
    etag: RwLock<Option<String>>,
    codex_home: PathBuf,
    cache_ttl: Duration,
+    provider: ModelProviderInfo,
 }

 impl ModelsManager {
@@ -51,18 +56,37 @@ impl ModelsManager {
            etag: RwLock::new(None),
            codex_home,
            cache_ttl: DEFAULT_MODEL_CACHE_TTL,
+            provider: ModelProviderInfo::create_openai_provider(),
+        }
+    }
+
+    #[cfg(any(test, feature = "test-support"))]
+    /// Construct a manager scoped to the provided `AuthManager` with a specific provider. Used for integration tests.
+    pub fn with_provider(auth_manager: Arc<AuthManager>, provider: ModelProviderInfo) -> Self {
+        let codex_home = auth_manager.codex_home().to_path_buf();
+        Self {
+            available_models: RwLock::new(builtin_model_presets(auth_manager.get_auth_mode())),
+            remote_models: RwLock::new(Vec::new()),
+            auth_manager,
+            etag: RwLock::new(None),
+            codex_home,
+            cache_ttl: DEFAULT_MODEL_CACHE_TTL,
+            provider,
        }
    }

    /// Fetch the latest remote models, using the on-disk cache when still fresh.
-    pub async fn refresh_available_models(&self, provider: &ModelProviderInfo) -> CoreResult<()> {
+    pub async fn refresh_available_models(&self, config: &Config) -> CoreResult<()> {
+        if !config.features.enabled(Feature::RemoteModels) {
+            return Ok(());
+        }
        if self.try_load_cache().await {
            return Ok(());
        }

        let auth = self.auth_manager.auth();
-        let api_provider = provider.to_api_provider(auth.as_ref().map(|auth| auth.mode))?;
-        let api_auth = auth_provider_from_auth(auth.clone(), provider).await?;
+        let api_provider = self.provider.to_api_provider(Some(AuthMode::ChatGPT))?;
+        let api_auth = auth_provider_from_auth(auth.clone(), &self.provider).await?;
        let transport = ReqwestTransport::new(build_reqwest_client());
        let client = ModelsClient::new(transport, api_provider, api_auth);

@@ -80,7 +104,10 @@ impl ModelsManager {
        Ok(())
    }

-    pub async fn list_models(&self) -> Vec<ModelPreset> {
+    pub async fn list_models(&self, config: &Config) -> Vec<ModelPreset> {
+        if let Err(err) = self.refresh_available_models(config).await {
+            error!("failed to refresh available models: {err}");
+        }
        self.available_models.read().await.clone()
    }

@@ -97,6 +124,33 @@ impl ModelsManager {
            .with_remote_overrides(self.remote_models.read().await.clone())
    }

+    pub async fn get_model(&self, model: &Option<String>, config: &Config) -> String {
+        if let Some(model) = model.as_ref() {
+            return model.to_string();
+        }
+        if let Err(err) = self.refresh_available_models(config).await {
+            error!("failed to refresh available models: {err}");
+        }
+        // if codex-auto-balanced exists & signed in with chatgpt mode, return it, otherwise return the default model
+        let auth_mode = self.auth_manager.get_auth_mode();
+        if auth_mode == Some(AuthMode::ChatGPT)
+            && self
+                .available_models
+                .read()
+                .await
+                .iter()
+                .any(|m| m.model == CODEX_AUTO_BALANCED_MODEL)
+        {
+            return CODEX_AUTO_BALANCED_MODEL.to_string();
+        }
+        OPENAI_DEFAULT_MODEL.to_string()
+    }
+
+    #[cfg(any(test, feature = "test-support"))]
+    pub fn get_model_offline(model: Option<&str>) -> String {
+        model.unwrap_or(OPENAI_DEFAULT_MODEL).to_string()
+    }
+
    #[cfg(any(test, feature = "test-support"))]
    /// Offline helper that builds a `ModelFamily` without consulting remote state.
    pub fn construct_model_family_offline(model: &str, config: &Config) -> ModelFamily {
@@ -111,6 +165,7 @@ impl ModelsManager {

    /// Attempt to satisfy the refresh from the cache when it matches the provider and TTL.
    async fn try_load_cache(&self) -> bool {
+        // todo(aibrahim): think if we should store fetched_at in ModelsManager so we don't always need to read the disk
        let cache_path = self.cache_path();
        let cache = match cache::load_cache(&cache_path).await {
            Ok(cache) => cache,
@@ -196,6 +251,10 @@ mod tests {
    use super::*;
    use crate::CodexAuth;
    use crate::auth::AuthCredentialsStoreMode;
+    use crate::config::Config;
+    use crate::config::ConfigOverrides;
+    use crate::config::ConfigToml;
+    use crate::features::Feature;
    use crate::model_provider_info::WireApi;
    use codex_protocol::openai_models::ModelsResponse;
    use core_test_support::responses::mount_models_once;
@@ -255,19 +314,27 @@ mod tests {
        )
        .await;

+        let codex_home = tempdir().expect("temp dir");
+        let mut config = Config::load_from_base_config_with_overrides(
+            ConfigToml::default(),
+            ConfigOverrides::default(),
+            codex_home.path().to_path_buf(),
+        )
+        .expect("load default test config");
+        config.features.enable(Feature::RemoteModels);
        let auth_manager =
            AuthManager::from_auth_for_testing(CodexAuth::from_api_key("Test API Key"));
-        let manager = ModelsManager::new(auth_manager);
        let provider = provider_for(server.uri());
+        let manager = ModelsManager::with_provider(auth_manager, provider);

        manager
-            .refresh_available_models(&provider)
+            .refresh_available_models(&config)
            .await
            .expect("refresh succeeds");
        let cached_remote = manager.remote_models.read().await.clone();
        assert_eq!(cached_remote, remote_models);

-        let available = manager.list_models().await;
+        let available = manager.list_models(&config).await;
        assert_eq!(available.len(), 2);
        assert_eq!(available[0].model, "priority-high");
        assert!(
@@ -297,16 +364,23 @@ mod tests {
        .await;

        let codex_home = tempdir().expect("temp dir");
+        let mut config = Config::load_from_base_config_with_overrides(
+            ConfigToml::default(),
+            ConfigOverrides::default(),
+            codex_home.path().to_path_buf(),
+        )
+        .expect("load default test config");
+        config.features.enable(Feature::RemoteModels);
        let auth_manager = Arc::new(AuthManager::new(
            codex_home.path().to_path_buf(),
            false,
            AuthCredentialsStoreMode::File,
        ));
-        let manager = ModelsManager::new(auth_manager);
        let provider = provider_for(server.uri());
+        let manager = ModelsManager::with_provider(auth_manager, provider);

        manager
-            .refresh_available_models(&provider)
+            .refresh_available_models(&config)
            .await
            .expect("first refresh succeeds");
        assert_eq!(
@@ -317,7 +391,7 @@ mod tests {

        // Second call should read from cache and avoid the network.
        manager
-            .refresh_available_models(&provider)
+            .refresh_available_models(&config)
            .await
            .expect("cached refresh succeeds");
        assert_eq!(
@@ -346,16 +420,23 @@ mod tests {
        .await;

        let codex_home = tempdir().expect("temp dir");
+        let mut config = Config::load_from_base_config_with_overrides(
+            ConfigToml::default(),
+            ConfigOverrides::default(),
+            codex_home.path().to_path_buf(),
+        )
+        .expect("load default test config");
+        config.features.enable(Feature::RemoteModels);
        let auth_manager = Arc::new(AuthManager::new(
            codex_home.path().to_path_buf(),
            false,
            AuthCredentialsStoreMode::File,
        ));
-        let manager = ModelsManager::new(auth_manager);
        let provider = provider_for(server.uri());
+        let manager = ModelsManager::with_provider(auth_manager, provider);

        manager
-            .refresh_available_models(&provider)
+            .refresh_available_models(&config)
            .await
            .expect("initial refresh succeeds");

@@ -381,7 +462,7 @@ mod tests {
        .await;

        manager
-            .refresh_available_models(&provider)
+            .refresh_available_models(&config)
            .await
            .expect("second refresh succeeds");
        assert_eq!(
--- a/codex-rs/core/src/parse_command.rs
+++ b/codex-rs/core/src/parse_command.rs
@@ -117,9 +117,6 @@ mod tests {
                    query: None,
                    path: None,
                },
-                ParsedCommand::Unknown {
-                    cmd: "head -n 40".to_string(),
-                },
            ],
        );
    }
@@ -143,16 +140,11 @@ mod tests {
        let inner = "rg -n \"BUG|FIXME|TODO|XXX|HACK\" -S | head -n 200";
        assert_parsed(
            &vec_str(&["bash", "-lc", inner]),
-            vec![
-                ParsedCommand::Search {
-                    cmd: "rg -n 'BUG|FIXME|TODO|XXX|HACK' -S".to_string(),
-                    query: Some("BUG|FIXME|TODO|XXX|HACK".to_string()),
-                    path: None,
-                },
-                ParsedCommand::Unknown {
-                    cmd: "head -n 200".to_string(),
-                },
-            ],
+            vec![ParsedCommand::Search {
+                cmd: "rg -n 'BUG|FIXME|TODO|XXX|HACK' -S".to_string(),
+                query: Some("BUG|FIXME|TODO|XXX|HACK".to_string()),
+                path: None,
+            }],
        );
    }

@@ -174,16 +166,11 @@ mod tests {
        let inner = "rg --files | head -n 50";
        assert_parsed(
            &vec_str(&["bash", "-lc", inner]),
-            vec![
-                ParsedCommand::Search {
-                    cmd: "rg --files".to_string(),
-                    query: None,
-                    path: None,
-                },
-                ParsedCommand::Unknown {
-                    cmd: "head -n 50".to_string(),
-                },
-            ],
+            vec![ParsedCommand::Search {
+                cmd: "rg --files".to_string(),
+                query: None,
+                path: None,
+            }],
        );
    }

@@ -273,6 +260,19 @@ mod tests {
        );
    }

+    #[test]
+    fn supports_head_file_only() {
+        let inner = "head Cargo.toml";
+        assert_parsed(
+            &vec_str(&["bash", "-lc", inner]),
+            vec![ParsedCommand::Read {
+                cmd: inner.to_string(),
+                name: "Cargo.toml".to_string(),
+                path: PathBuf::from("Cargo.toml"),
+            }],
+        );
+    }
+
    #[test]
    fn supports_cat_sed_n() {
        let inner = "cat tui/Cargo.toml | sed -n '1,200p'";
@@ -313,6 +313,19 @@ mod tests {
        );
    }

+    #[test]
+    fn supports_tail_file_only() {
+        let inner = "tail README.md";
+        assert_parsed(
+            &vec_str(&["bash", "-lc", inner]),
+            vec![ParsedCommand::Read {
+                cmd: inner.to_string(),
+                name: "README.md".to_string(),
+                path: PathBuf::from("README.md"),
+            }],
+        );
+    }
+
    #[test]
    fn supports_npm_run_build_is_unknown() {
        assert_parsed(
@@ -391,6 +404,19 @@ mod tests {
        );
    }

+    #[test]
+    fn supports_single_string_script_with_cd_and_pipe() {
+        let inner = r#"cd /Users/pakrym/code/codex && rg -n "codex_api" codex-rs -S | head -n 50"#;
+        assert_parsed(
+            &vec_str(&["bash", "-lc", inner]),
+            vec![ParsedCommand::Search {
+                cmd: "rg -n codex_api codex-rs -S".to_string(),
+                query: Some("codex_api".to_string()),
+                path: Some("codex-rs".to_string()),
+            }],
+        );
+    }
+
    // ---- is_small_formatting_command unit tests ----
    #[test]
    fn small_formatting_always_true_commands() {
@@ -408,38 +434,43 @@ mod tests {
    fn head_behavior() {
        // No args -> small formatting
        assert!(is_small_formatting_command(&vec_str(&["head"])));
-        // Numeric count only -> not considered small formatting by implementation
-        assert!(!is_small_formatting_command(&shlex_split_safe(
-            "head -n 40"
-        )));
+        // Numeric count only -> formatting
+        assert!(is_small_formatting_command(&shlex_split_safe("head -n 40")));
        // With explicit file -> not small formatting
        assert!(!is_small_formatting_command(&shlex_split_safe(
            "head -n 40 file.txt"
        )));
-        // File only (no count) -> treated as small formatting by implementation
-        assert!(is_small_formatting_command(&vec_str(&["head", "file.txt"])));
+        // File only (no count) -> not formatting
+        assert!(!is_small_formatting_command(&vec_str(&[
+            "head", "file.txt"
+        ])));
    }

    #[test]
    fn tail_behavior() {
        // No args -> small formatting
        assert!(is_small_formatting_command(&vec_str(&["tail"])));
-        // Numeric with plus offset -> not small formatting
-        assert!(!is_small_formatting_command(&shlex_split_safe(
+        // Numeric with plus offset -> formatting
+        assert!(is_small_formatting_command(&shlex_split_safe(
            "tail -n +10"
        )));
        assert!(!is_small_formatting_command(&shlex_split_safe(
            "tail -n +10 file.txt"
        )));
-        // Numeric count
-        assert!(!is_small_formatting_command(&shlex_split_safe(
-            "tail -n 30"
-        )));
+        // Numeric count -> formatting
+        assert!(is_small_formatting_command(&shlex_split_safe("tail -n 30")));
        assert!(!is_small_formatting_command(&shlex_split_safe(
            "tail -n 30 file.txt"
        )));
-        // File only -> small formatting by implementation
-        assert!(is_small_formatting_command(&vec_str(&["tail", "file.txt"])));
+        // Byte count -> formatting
+        assert!(is_small_formatting_command(&shlex_split_safe("tail -c 30")));
+        assert!(is_small_formatting_command(&shlex_split_safe(
+            "tail -c +10"
+        )));
+        // File only (no count) -> not formatting
+        assert!(!is_small_formatting_command(&vec_str(&[
+            "tail", "file.txt"
+        ])));
    }

    #[test]
@@ -714,20 +745,15 @@ mod tests {

    #[test]
    fn bash_dash_c_pipeline_parsing() {
-        // Ensure -c is handled similarly to -lc by normalization
+        // Ensure -c is handled similarly to -lc by shell parsing
        let inner = "rg --files | head -n 1";
        assert_parsed(
-            &shlex_split_safe(inner),
-            vec![
-                ParsedCommand::Search {
-                    cmd: "rg --files".to_string(),
-                    query: None,
-                    path: None,
-                },
-                ParsedCommand::Unknown {
-                    cmd: "head -n 1".to_string(),
-                },
-            ],
+            &vec_str(&["bash", "-c", inner]),
+            vec![ParsedCommand::Search {
+                cmd: "rg --files".to_string(),
+                query: None,
+                path: None,
+            }],
        );
    }

@@ -1384,13 +1410,50 @@ fn is_small_formatting_command(tokens: &[String]) -> bool {
            // Treat as formatting when no explicit file operand is present.
            // Common forms: `head -n 40`, `head -c 100`.
            // Keep cases like `head -n 40 file`.
-            tokens.len() < 3
+            match tokens {
+                // `head`
+                [_] => true,
+                // `head <file>` or `head -n50`/`head -c100`
+                [_, arg] => arg.starts_with('-'),
+                // `head -n 40` / `head -c 100` (no file operand)
+                [_, flag, count]
+                    if (flag == "-n" || flag == "-c")
+                        && count.chars().all(|c| c.is_ascii_digit()) =>
+                {
+                    true
+                }
+                _ => false,
+            }
        }
        "tail" => {
            // Treat as formatting when no explicit file operand is present.
-            // Common forms: `tail -n +10`, `tail -n 30`.
+            // Common forms: `tail -n +10`, `tail -n 30`, `tail -c 100`.
            // Keep cases like `tail -n 30 file`.
-            tokens.len() < 3
+            match tokens {
+                // `tail`
+                [_] => true,
+                // `tail <file>` or `tail -n30`/`tail -n+10`
+                [_, arg] => arg.starts_with('-'),
+                // `tail -n 30` / `tail -n +10` (no file operand)
+                [_, flag, count]
+                    if flag == "-n"
+                        && (count.chars().all(|c| c.is_ascii_digit())
+                            || (count.starts_with('+')
+                                && count[1..].chars().all(|c| c.is_ascii_digit()))) =>
+                {
+                    true
+                }
+                // `tail -c 100` / `tail -c +10` (no file operand)
+                [_, flag, count]
+                    if flag == "-c"
+                        && (count.chars().all(|c| c.is_ascii_digit())
+                            || (count.starts_with('+')
+                                && count[1..].chars().all(|c| c.is_ascii_digit()))) =>
+                {
+                    true
+                }
+                _ => false,
+            }
        }
        "sed" => {
            // Keep `sed -n <range> file` (treated as a file read elsewhere);
@@ -1543,6 +1606,16 @@ fn summarize_main_tokens(main_cmd: &[String]) -> ParsedCommand {
                    };
                }
            }
+            if let [path] = tail
+                && !path.starts_with('-')
+            {
+                let name = short_display_path(path);
+                return ParsedCommand::Read {
+                    cmd: shlex_join(main_cmd),
+                    name,
+                    path: PathBuf::from(path),
+                };
+            }
            ParsedCommand::Unknown {
                cmd: shlex_join(main_cmd),
            }
@@ -1587,6 +1660,16 @@ fn summarize_main_tokens(main_cmd: &[String]) -> ParsedCommand {
                    };
                }
            }
+            if let [path] = tail
+                && !path.starts_with('-')
+            {
+                let name = short_display_path(path);
+                return ParsedCommand::Read {
+                    cmd: shlex_join(main_cmd),
+                    name,
+                    path: PathBuf::from(path),
+                };
+            }
            ParsedCommand::Unknown {
                cmd: shlex_join(main_cmd),
            }
--- a/codex-rs/core/src/project_doc.rs
+++ b/codex-rs/core/src/project_doc.rs
@@ -15,7 +15,7 @@

 use crate::config::Config;
 use crate::features::Feature;
-use crate::skills::load_skills;
+use crate::skills::SkillMetadata;
 use crate::skills::render_skills_section;
 use dunce::canonicalize as normalize_path;
 use std::path::PathBuf;
@@ -33,17 +33,12 @@ const PROJECT_DOC_SEPARATOR: &str = "\n\n--- project-doc ---\n\n";

 /// Combines `Config::instructions` and `AGENTS.md` (if present) into a single
 /// string of instructions.
-pub(crate) async fn get_user_instructions(config: &Config) -> Option<String> {
+pub(crate) async fn get_user_instructions(
+    config: &Config,
+    skills: Option<&[SkillMetadata]>,
+) -> Option<String> {
    let skills_section = if config.features.enabled(Feature::Skills) {
-        let skills_outcome = load_skills(config);
-        for err in &skills_outcome.errors {
-            error!(
-                "failed to load skill {}: {}",
-                err.path.display(),
-                err.message
-            );
-        }
-        render_skills_section(&skills_outcome.skills)
+        skills.and_then(render_skills_section)
    } else {
        None
    };
@@ -244,6 +239,7 @@ mod tests {
    use super::*;
    use crate::config::ConfigOverrides;
    use crate::config::ConfigToml;
+    use crate::skills::load_skills;
    use std::fs;
    use std::path::PathBuf;
    use tempfile::TempDir;
@@ -289,7 +285,7 @@ mod tests {
    async fn no_doc_file_returns_none() {
        let tmp = tempfile::tempdir().expect("tempdir");

-        let res = get_user_instructions(&make_config(&tmp, 4096, None)).await;
+        let res = get_user_instructions(&make_config(&tmp, 4096, None), None).await;
        assert!(
            res.is_none(),
            "Expected None when AGENTS.md is absent and no system instructions provided"
@@ -303,7 +299,7 @@ mod tests {
        let tmp = tempfile::tempdir().expect("tempdir");
        fs::write(tmp.path().join("AGENTS.md"), "hello world").unwrap();

-        let res = get_user_instructions(&make_config(&tmp, 4096, None))
+        let res = get_user_instructions(&make_config(&tmp, 4096, None), None)
            .await
            .expect("doc expected");

@@ -322,7 +318,7 @@ mod tests {
        let huge = "A".repeat(LIMIT * 2); // 2 KiB
        fs::write(tmp.path().join("AGENTS.md"), &huge).unwrap();

-        let res = get_user_instructions(&make_config(&tmp, LIMIT, None))
+        let res = get_user_instructions(&make_config(&tmp, LIMIT, None), None)
            .await
            .expect("doc expected");

@@ -354,7 +350,9 @@ mod tests {
        let mut cfg = make_config(&repo, 4096, None);
        cfg.cwd = nested;

-        let res = get_user_instructions(&cfg).await.expect("doc expected");
+        let res = get_user_instructions(&cfg, None)
+            .await
+            .expect("doc expected");
        assert_eq!(res, "root level doc");
    }

@@ -364,7 +362,7 @@ mod tests {
        let tmp = tempfile::tempdir().expect("tempdir");
        fs::write(tmp.path().join("AGENTS.md"), "something").unwrap();

-        let res = get_user_instructions(&make_config(&tmp, 0, None)).await;
+        let res = get_user_instructions(&make_config(&tmp, 0, None), None).await;
        assert!(
            res.is_none(),
            "With limit 0 the function should return None"
@@ -380,7 +378,7 @@ mod tests {

        const INSTRUCTIONS: &str = "base instructions";

-        let res = get_user_instructions(&make_config(&tmp, 4096, Some(INSTRUCTIONS)))
+        let res = get_user_instructions(&make_config(&tmp, 4096, Some(INSTRUCTIONS)), None)
            .await
            .expect("should produce a combined instruction string");

@@ -397,7 +395,7 @@ mod tests {

        const INSTRUCTIONS: &str = "some instructions";

-        let res = get_user_instructions(&make_config(&tmp, 4096, Some(INSTRUCTIONS))).await;
+        let res = get_user_instructions(&make_config(&tmp, 4096, Some(INSTRUCTIONS)), None).await;

        assert_eq!(res, Some(INSTRUCTIONS.to_string()));
    }
@@ -426,7 +424,9 @@ mod tests {
        let mut cfg = make_config(&repo, 4096, None);
        cfg.cwd = nested;

-        let res = get_user_instructions(&cfg).await.expect("doc expected");
+        let res = get_user_instructions(&cfg, None)
+            .await
+            .expect("doc expected");
        assert_eq!(res, "root doc\n\ncrate doc");
    }

@@ -439,7 +439,7 @@ mod tests {

        let cfg = make_config(&tmp, 4096, None);

-        let res = get_user_instructions(&cfg)
+        let res = get_user_instructions(&cfg, None)
            .await
            .expect("local doc expected");

@@ -461,7 +461,7 @@ mod tests {

        let cfg = make_config_with_fallback(&tmp, 4096, None, &["EXAMPLE.md"]);

-        let res = get_user_instructions(&cfg)
+        let res = get_user_instructions(&cfg, None)
            .await
            .expect("fallback doc expected");

@@ -477,7 +477,7 @@ mod tests {

        let cfg = make_config_with_fallback(&tmp, 4096, None, &["EXAMPLE.md", ".example.md"]);

-        let res = get_user_instructions(&cfg)
+        let res = get_user_instructions(&cfg, None)
            .await
            .expect("AGENTS.md should win");

@@ -506,9 +506,13 @@ mod tests {
            "extract from pdfs",
        );

-        let res = get_user_instructions(&cfg)
-            .await
-            .expect("instructions expected");
+        let skills = load_skills(&cfg);
+        let res = get_user_instructions(
+            &cfg,
+            skills.errors.is_empty().then_some(skills.skills.as_slice()),
+        )
+        .await
+        .expect("instructions expected");
        let expected_path = dunce::canonicalize(
            cfg.codex_home
                .join("skills/pdf-processing/SKILL.md")
@@ -529,9 +533,13 @@ mod tests {
        let cfg = make_config(&tmp, 4096, None);
        create_skill(cfg.codex_home.clone(), "linting", "run clippy");

-        let res = get_user_instructions(&cfg)
-            .await
-            .expect("instructions expected");
+        let skills = load_skills(&cfg);
+        let res = get_user_instructions(
+            &cfg,
+            skills.errors.is_empty().then_some(skills.skills.as_slice()),
+        )
+        .await
+        .expect("instructions expected");
        let expected_path =
            dunce::canonicalize(cfg.codex_home.join("skills/linting/SKILL.md").as_path())
                .unwrap_or_else(|_| cfg.codex_home.join("skills/linting/SKILL.md"));
--- a/codex-rs/core/src/rollout/policy.rs
+++ b/codex-rs/core/src/rollout/policy.rs
@@ -62,6 +62,7 @@ pub(crate) fn should_persist_event_msg(ev: &EventMsg) -> bool {
        | EventMsg::WebSearchBegin(_)
        | EventMsg::WebSearchEnd(_)
        | EventMsg::ExecCommandBegin(_)
+        | EventMsg::TerminalInteraction(_)
        | EventMsg::ExecCommandOutputDelta(_)
        | EventMsg::ExecCommandEnd(_)
        | EventMsg::ExecApprovalRequest(_)
--- a/codex-rs/core/src/sandboxing/assessment.rs
+++ b/codex-rs/core/src/sandboxing/assessment.rs
@@ -1,268 +0,0 @@
-use std::path::Path;
-use std::path::PathBuf;
-use std::sync::Arc;
-use std::time::Duration;
-use std::time::Instant;
-
-use crate::AuthManager;
-use crate::ModelProviderInfo;
-use crate::client::ModelClient;
-use crate::client_common::Prompt;
-use crate::client_common::ResponseEvent;
-use crate::config::Config;
-use crate::openai_models::models_manager::ModelsManager;
-use crate::protocol::SandboxPolicy;
-use askama::Template;
-use codex_otel::otel_event_manager::OtelEventManager;
-use codex_protocol::ConversationId;
-use codex_protocol::models::ContentItem;
-use codex_protocol::models::ResponseItem;
-use codex_protocol::openai_models::ReasoningEffort as ReasoningEffortConfig;
-use codex_protocol::protocol::SandboxCommandAssessment;
-use codex_protocol::protocol::SessionSource;
-use futures::StreamExt;
-use serde_json::json;
-use tokio::time::timeout;
-use tracing::warn;
-
-const SANDBOX_ASSESSMENT_TIMEOUT: Duration = Duration::from_secs(15);
-const SANDBOX_ASSESSMENT_REASONING_EFFORT: ReasoningEffortConfig = ReasoningEffortConfig::Medium;
-
-#[derive(Template)]
-#[template(path = "sandboxing/assessment_prompt.md", escape = "none")]
-struct SandboxAssessmentPromptTemplate<'a> {
-    platform: &'a str,
-    sandbox_policy: &'a str,
-    filesystem_roots: Option<&'a str>,
-    working_directory: &'a str,
-    command_argv: &'a str,
-    command_joined: &'a str,
-    sandbox_failure_message: Option<&'a str>,
-}
-
-#[allow(clippy::too_many_arguments)]
-pub(crate) async fn assess_command(
-    config: Arc<Config>,
-    provider: ModelProviderInfo,
-    auth_manager: Arc<AuthManager>,
-    parent_otel: &OtelEventManager,
-    conversation_id: ConversationId,
-    models_manager: Arc<ModelsManager>,
-    session_source: SessionSource,
-    call_id: &str,
-    command: &[String],
-    sandbox_policy: &SandboxPolicy,
-    cwd: &Path,
-    failure_message: Option<&str>,
-) -> Option<SandboxCommandAssessment> {
-    if !config.experimental_sandbox_command_assessment || command.is_empty() {
-        return None;
-    }
-
-    let command_json = serde_json::to_string(command).unwrap_or_else(|_| "[]".to_string());
-    let command_joined =
-        shlex::try_join(command.iter().map(String::as_str)).unwrap_or_else(|_| command.join(" "));
-    let failure = failure_message
-        .map(str::trim)
-        .filter(|msg| !msg.is_empty())
-        .map(str::to_string);
-
-    let cwd_str = cwd.to_string_lossy().to_string();
-    let sandbox_summary = summarize_sandbox_policy(sandbox_policy);
-    let mut roots = sandbox_roots_for_prompt(sandbox_policy, cwd);
-    roots.sort();
-    roots.dedup();
-
-    let platform = std::env::consts::OS;
-    let roots_formatted = roots.iter().map(|root| root.to_string_lossy().to_string());
-    let filesystem_roots = match roots_formatted.collect::<Vec<_>>() {
-        collected if collected.is_empty() => None,
-        collected => Some(collected.join(", ")),
-    };
-
-    let prompt_template = SandboxAssessmentPromptTemplate {
-        platform,
-        sandbox_policy: sandbox_summary.as_str(),
-        filesystem_roots: filesystem_roots.as_deref(),
-        working_directory: cwd_str.as_str(),
-        command_argv: command_json.as_str(),
-        command_joined: command_joined.as_str(),
-        sandbox_failure_message: failure.as_deref(),
-    };
-    let rendered_prompt = match prompt_template.render() {
-        Ok(rendered) => rendered,
-        Err(err) => {
-            warn!("failed to render sandbox assessment prompt: {err}");
-            return None;
-        }
-    };
-    let (system_prompt_section, user_prompt_section) = match rendered_prompt.split_once("\n---\n") {
-        Some(split) => split,
-        None => {
-            warn!("rendered sandbox assessment prompt missing separator");
-            return None;
-        }
-    };
-    let system_prompt = system_prompt_section
-        .strip_prefix("System Prompt:\n")
-        .unwrap_or(system_prompt_section)
-        .trim()
-        .to_string();
-    let user_prompt = user_prompt_section
-        .strip_prefix("User Prompt:\n")
-        .unwrap_or(user_prompt_section)
-        .trim()
-        .to_string();
-
-    let prompt = Prompt {
-        input: vec![ResponseItem::Message {
-            id: None,
-            role: "user".to_string(),
-            content: vec![ContentItem::InputText { text: user_prompt }],
-        }],
-        tools: Vec::new(),
-        parallel_tool_calls: false,
-        base_instructions_override: Some(system_prompt),
-        output_schema: Some(sandbox_assessment_schema()),
-    };
-
-    let model_family = models_manager
-        .construct_model_family(&config.model, &config)
-        .await;
-
-    let child_otel = parent_otel.with_model(config.model.as_str(), model_family.slug.as_str());
-
-    let client = ModelClient::new(
-        Arc::clone(&config),
-        Some(auth_manager),
-        model_family,
-        child_otel,
-        provider,
-        Some(SANDBOX_ASSESSMENT_REASONING_EFFORT),
-        config.model_reasoning_summary,
-        conversation_id,
-        session_source,
-    );
-
-    let start = Instant::now();
-    let assessment_result = timeout(SANDBOX_ASSESSMENT_TIMEOUT, async move {
-        let mut stream = client.stream(&prompt).await?;
-        let mut last_json: Option<String> = None;
-        while let Some(event) = stream.next().await {
-            match event {
-                Ok(ResponseEvent::OutputItemDone(item)) => {
-                    if let Some(text) = response_item_text(&item) {
-                        last_json = Some(text);
-                    }
-                }
-                Ok(ResponseEvent::RateLimits(_)) => {}
-                Ok(ResponseEvent::Completed { .. }) => break,
-                Ok(_) => continue,
-                Err(err) => return Err(err),
-            }
-        }
-        Ok(last_json)
-    })
-    .await;
-    let duration = start.elapsed();
-    parent_otel.sandbox_assessment_latency(call_id, duration);
-
-    match assessment_result {
-        Ok(Ok(Some(raw))) => match serde_json::from_str::<SandboxCommandAssessment>(raw.trim()) {
-            Ok(assessment) => {
-                parent_otel.sandbox_assessment(
-                    call_id,
-                    "success",
-                    Some(assessment.risk_level),
-                    duration,
-                );
-                return Some(assessment);
-            }
-            Err(err) => {
-                warn!("failed to parse sandbox assessment JSON: {err}");
-                parent_otel.sandbox_assessment(call_id, "parse_error", None, duration);
-            }
-        },
-        Ok(Ok(None)) => {
-            warn!("sandbox assessment response did not include any message");
-            parent_otel.sandbox_assessment(call_id, "no_output", None, duration);
-        }
-        Ok(Err(err)) => {
-            warn!("sandbox assessment failed: {err}");
-            parent_otel.sandbox_assessment(call_id, "model_error", None, duration);
-        }
-        Err(_) => {
-            warn!("sandbox assessment timed out");
-            parent_otel.sandbox_assessment(call_id, "timeout", None, duration);
-        }
-    }
-
-    None
-}
-
-fn summarize_sandbox_policy(policy: &SandboxPolicy) -> String {
-    match policy {
-        SandboxPolicy::DangerFullAccess => "danger-full-access".to_string(),
-        SandboxPolicy::ReadOnly => "read-only".to_string(),
-        SandboxPolicy::WorkspaceWrite { network_access, .. } => {
-            let network = if *network_access {
-                "network"
-            } else {
-                "no-network"
-            };
-            format!("workspace-write (network_access={network})")
-        }
-    }
-}
-
-fn sandbox_roots_for_prompt(policy: &SandboxPolicy, cwd: &Path) -> Vec<PathBuf> {
-    let mut roots = vec![cwd.to_path_buf()];
-    if let SandboxPolicy::WorkspaceWrite { writable_roots, .. } = policy {
-        roots.extend(writable_roots.iter().cloned());
-    }
-    roots
-}
-
-fn sandbox_assessment_schema() -> serde_json::Value {
-    json!({
-        "type": "object",
-        "required": ["description", "risk_level"],
-        "properties": {
-            "description": {
-                "type": "string",
-                "minLength": 1,
-                "maxLength": 500
-            },
-            "risk_level": {
-                "type": "string",
-                "enum": ["low", "medium", "high"]
-            },
-        },
-        "additionalProperties": false
-    })
-}
-
-fn response_item_text(item: &ResponseItem) -> Option<String> {
-    match item {
-        ResponseItem::Message { content, .. } => {
-            let mut buffers: Vec<&str> = Vec::new();
-            for segment in content {
-                match segment {
-                    ContentItem::InputText { text } | ContentItem::OutputText { text } => {
-                        if !text.is_empty() {
-                            buffers.push(text);
-                        }
-                    }
-                    ContentItem::InputImage { .. } => {}
-                }
-            }
-            if buffers.is_empty() {
-                None
-            } else {
-                Some(buffers.join("\n"))
-            }
-        }
-        ResponseItem::FunctionCallOutput { output, .. } => Some(output.content.clone()),
-        _ => None,
-    }
-}
--- a/codex-rs/core/src/sandboxing/mod.rs
+++ b/codex-rs/core/src/sandboxing/mod.rs
@@ -6,8 +6,6 @@ sandbox placement and transformation of portable CommandSpec into a
 ready‑to‑spawn environment.
 */

-pub mod assessment;
-
 use crate::exec::ExecExpiration;
 use crate::exec::ExecToolCallOutput;
 use crate::exec::SandboxType;
@@ -23,32 +21,11 @@ use crate::seatbelt::create_seatbelt_command_args;
 use crate::spawn::CODEX_SANDBOX_ENV_VAR;
 use crate::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR;
 use crate::tools::sandboxing::SandboxablePreference;
+pub use codex_protocol::models::SandboxPermissions;
 use std::collections::HashMap;
 use std::path::Path;
 use std::path::PathBuf;

-#[derive(Clone, Copy, Debug, Eq, PartialEq)]
-pub enum SandboxPermissions {
-    UseDefault,
-    RequireEscalated,
-}
-
-impl SandboxPermissions {
-    pub fn requires_escalated_permissions(self) -> bool {
-        matches!(self, SandboxPermissions::RequireEscalated)
-    }
-}
-
-impl From<bool> for SandboxPermissions {
-    fn from(with_escalated_permissions: bool) -> Self {
-        if with_escalated_permissions {
-            SandboxPermissions::RequireEscalated
-        } else {
-            SandboxPermissions::UseDefault
-        }
-    }
-}
-
 #[derive(Debug)]
 pub struct CommandSpec {
    pub program: String,
@@ -56,7 +33,7 @@ pub struct CommandSpec {
    pub cwd: PathBuf,
    pub env: HashMap<String, String>,
    pub expiration: ExecExpiration,
-    pub with_escalated_permissions: Option<bool>,
+    pub sandbox_permissions: SandboxPermissions,
    pub justification: Option<String>,
 }

@@ -67,7 +44,7 @@ pub struct ExecEnv {
    pub env: HashMap<String, String>,
    pub expiration: ExecExpiration,
    pub sandbox: SandboxType,
-    pub with_escalated_permissions: Option<bool>,
+    pub sandbox_permissions: SandboxPermissions,
    pub justification: Option<String>,
    pub arg0: Option<String>,
 }
@@ -181,7 +158,7 @@ impl SandboxManager {
            env,
            expiration: spec.expiration,
            sandbox,
-            with_escalated_permissions: spec.with_escalated_permissions,
+            sandbox_permissions: spec.sandbox_permissions,
            justification: spec.justification,
            arg0: arg0_override,
        })
--- a/codex-rs/core/src/shell.rs
+++ b/codex-rs/core/src/shell.rs
@@ -1,6 +1,9 @@
 use serde::Deserialize;
 use serde::Serialize;
 use std::path::PathBuf;
+use std::sync::Arc;
+
+use crate::shell_snapshot::ShellSnapshot;

 #[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize)]
 pub enum ShellType {
@@ -15,6 +18,8 @@ pub enum ShellType {
 pub struct Shell {
    pub(crate) shell_type: ShellType,
    pub(crate) shell_path: PathBuf,
+    #[serde(skip_serializing, skip_deserializing, default)]
+    pub(crate) shell_snapshot: Option<Arc<ShellSnapshot>>,
 }

 impl Shell {
@@ -134,6 +139,7 @@ fn get_zsh_shell(path: Option<&PathBuf>) -> Option<Shell> {
    shell_path.map(|shell_path| Shell {
        shell_type: ShellType::Zsh,
        shell_path,
+        shell_snapshot: None,
    })
 }

@@ -143,6 +149,7 @@ fn get_bash_shell(path: Option<&PathBuf>) -> Option<Shell> {
    shell_path.map(|shell_path| Shell {
        shell_type: ShellType::Bash,
        shell_path,
+        shell_snapshot: None,
    })
 }

@@ -152,6 +159,7 @@ fn get_sh_shell(path: Option<&PathBuf>) -> Option<Shell> {
    shell_path.map(|shell_path| Shell {
        shell_type: ShellType::Sh,
        shell_path,
+        shell_snapshot: None,
    })
 }

@@ -167,6 +175,7 @@ fn get_powershell_shell(path: Option<&PathBuf>) -> Option<Shell> {
    shell_path.map(|shell_path| Shell {
        shell_type: ShellType::PowerShell,
        shell_path,
+        shell_snapshot: None,
    })
 }

@@ -176,6 +185,7 @@ fn get_cmd_shell(path: Option<&PathBuf>) -> Option<Shell> {
    shell_path.map(|shell_path| Shell {
        shell_type: ShellType::Cmd,
        shell_path,
+        shell_snapshot: None,
    })
 }

@@ -184,11 +194,13 @@ fn ultimate_fallback_shell() -> Shell {
        Shell {
            shell_type: ShellType::Cmd,
            shell_path: PathBuf::from("cmd.exe"),
+            shell_snapshot: None,
        }
    } else {
        Shell {
            shell_type: ShellType::Sh,
            shell_path: PathBuf::from("/bin/sh"),
+            shell_snapshot: None,
        }
    }
 }
@@ -413,6 +425,7 @@ mod tests {
        let test_bash_shell = Shell {
            shell_type: ShellType::Bash,
            shell_path: PathBuf::from("/bin/bash"),
+            shell_snapshot: None,
        };
        assert_eq!(
            test_bash_shell.derive_exec_args("echo hello", false),
@@ -426,6 +439,7 @@ mod tests {
        let test_zsh_shell = Shell {
            shell_type: ShellType::Zsh,
            shell_path: PathBuf::from("/bin/zsh"),
+            shell_snapshot: None,
        };
        assert_eq!(
            test_zsh_shell.derive_exec_args("echo hello", false),
@@ -439,6 +453,7 @@ mod tests {
        let test_powershell_shell = Shell {
            shell_type: ShellType::PowerShell,
            shell_path: PathBuf::from("pwsh.exe"),
+            shell_snapshot: None,
        };
        assert_eq!(
            test_powershell_shell.derive_exec_args("echo hello", false),
@@ -465,6 +480,7 @@ mod tests {
                Shell {
                    shell_type: ShellType::Zsh,
                    shell_path: PathBuf::from(shell_path),
+                    shell_snapshot: None,
                }
            );
        }
--- a/codex-rs/core/src/shell_snapshot.rs
+++ b/codex-rs/core/src/shell_snapshot.rs
@@ -0,0 +1,416 @@
+use std::path::Path;
+use std::path::PathBuf;
+use std::time::Duration;
+
+use crate::shell::Shell;
+use crate::shell::ShellType;
+use crate::shell::get_shell;
+use anyhow::Context;
+use anyhow::Result;
+use anyhow::anyhow;
+use anyhow::bail;
+use tokio::fs;
+use tokio::process::Command;
+use tokio::time::timeout;
+use uuid::Uuid;
+
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub struct ShellSnapshot {
+    pub path: PathBuf,
+}
+
+const SNAPSHOT_TIMEOUT: Duration = Duration::from_secs(10);
+
+impl ShellSnapshot {
+    pub async fn try_new(codex_home: &Path, shell: &Shell) -> Option<Self> {
+        let extension = match shell.shell_type {
+            ShellType::PowerShell => "ps1",
+            _ => "sh",
+        };
+        let path =
+            codex_home
+                .join("shell_snapshots")
+                .join(format!("{}.{}", Uuid::new_v4(), extension));
+        match write_shell_snapshot(shell.shell_type.clone(), &path).await {
+            Ok(path) => {
+                tracing::info!("Shell snapshot successfully created: {}", path.display());
+                Some(Self { path })
+            }
+            Err(err) => {
+                tracing::warn!(
+                    "Failed to create shell snapshot for {}: {err:?}",
+                    shell.name()
+                );
+                None
+            }
+        }
+    }
+}
+
+impl Drop for ShellSnapshot {
+    fn drop(&mut self) {
+        if let Err(err) = std::fs::remove_file(&self.path) {
+            tracing::warn!(
+                "Failed to delete shell snapshot at {:?}: {err:?}",
+                self.path
+            );
+        }
+    }
+}
+
+pub async fn write_shell_snapshot(shell_type: ShellType, output_path: &Path) -> Result<PathBuf> {
+    if shell_type == ShellType::PowerShell || shell_type == ShellType::Cmd {
+        bail!("Shell snapshot not supported yet for {shell_type:?}");
+    }
+    let shell = get_shell(shell_type.clone(), None)
+        .with_context(|| format!("No available shell for {shell_type:?}"))?;
+
+    let raw_snapshot = capture_snapshot(&shell).await?;
+    let snapshot = strip_snapshot_preamble(&raw_snapshot)?;
+
+    if let Some(parent) = output_path.parent() {
+        let parent_display = parent.display();
+        fs::create_dir_all(parent)
+            .await
+            .with_context(|| format!("Failed to create snapshot parent {parent_display}"))?;
+    }
+
+    let snapshot_path = output_path.display();
+    fs::write(output_path, snapshot)
+        .await
+        .with_context(|| format!("Failed to write snapshot to {snapshot_path}"))?;
+
+    Ok(output_path.to_path_buf())
+}
+
+async fn capture_snapshot(shell: &Shell) -> Result<String> {
+    let shell_type = shell.shell_type.clone();
+    match shell_type {
+        ShellType::Zsh => run_shell_script(shell, zsh_snapshot_script()).await,
+        ShellType::Bash => run_shell_script(shell, bash_snapshot_script()).await,
+        ShellType::Sh => run_shell_script(shell, sh_snapshot_script()).await,
+        ShellType::PowerShell => run_shell_script(shell, powershell_snapshot_script()).await,
+        ShellType::Cmd => bail!("Shell snapshotting is not yet supported for {shell_type:?}"),
+    }
+}
+
+fn strip_snapshot_preamble(snapshot: &str) -> Result<String> {
+    let marker = "# Snapshot file";
+    let Some(start) = snapshot.find(marker) else {
+        bail!("Snapshot output missing marker {marker}");
+    };
+
+    Ok(snapshot[start..].to_string())
+}
+
+async fn run_shell_script(shell: &Shell, script: &str) -> Result<String> {
+    run_shell_script_with_timeout(shell, script, SNAPSHOT_TIMEOUT).await
+}
+
+async fn run_shell_script_with_timeout(
+    shell: &Shell,
+    script: &str,
+    snapshot_timeout: Duration,
+) -> Result<String> {
+    let args = shell.derive_exec_args(script, true);
+    let shell_name = shell.name();
+
+    // Handler is kept as guard to control the drop. The `mut` pattern is required because .args()
+    // returns a ref of handler.
+    let mut handler = Command::new(&args[0]);
+    handler.args(&args[1..]);
+    handler.kill_on_drop(true);
+    let output = timeout(snapshot_timeout, handler.output())
+        .await
+        .map_err(|_| anyhow!("Snapshot command timed out for {shell_name}"))?
+        .with_context(|| format!("Failed to execute {shell_name}"))?;
+
+    if !output.status.success() {
+        let status = output.status;
+        let stderr = String::from_utf8_lossy(&output.stderr);
+        bail!("Snapshot command exited with status {status}: {stderr}");
+    }
+
+    Ok(String::from_utf8_lossy(&output.stdout).into_owned())
+}
+
+fn zsh_snapshot_script() -> &'static str {
+    r##"print '# Snapshot file'
+print '# Unset all aliases to avoid conflicts with functions'
+print 'unalias -a 2>/dev/null || true'
+print '# Functions'
+functions
+print ''
+setopt_count=$(setopt | wc -l | tr -d ' ')
+print "# setopts $setopt_count"
+setopt | sed 's/^/setopt /'
+print ''
+alias_count=$(alias -L | wc -l | tr -d ' ')
+print "# aliases $alias_count"
+alias -L
+print ''
+export_count=$(export -p | wc -l | tr -d ' ')
+print "# exports $export_count"
+export -p
+"##
+}
+
+fn bash_snapshot_script() -> &'static str {
+    r##"echo '# Snapshot file'
+echo '# Unset all aliases to avoid conflicts with functions'
+unalias -a 2>/dev/null || true
+echo '# Functions'
+declare -f
+echo ''
+bash_opts=$(set -o | awk '$2=="on"{print $1}')
+bash_opt_count=$(printf '%s\n' "$bash_opts" | sed '/^$/d' | wc -l | tr -d ' ')
+echo "# setopts $bash_opt_count"
+if [ -n "$bash_opts" ]; then
+  printf 'set -o %s\n' $bash_opts
+fi
+echo ''
+alias_count=$(alias -p | wc -l | tr -d ' ')
+echo "# aliases $alias_count"
+alias -p
+echo ''
+export_count=$(export -p | wc -l | tr -d ' ')
+echo "# exports $export_count"
+export -p
+"##
+}
+
+fn sh_snapshot_script() -> &'static str {
+    r##"echo '# Snapshot file'
+echo '# Unset all aliases to avoid conflicts with functions'
+unalias -a 2>/dev/null || true
+echo '# Functions'
+if command -v typeset >/dev/null 2>&1; then
+  typeset -f
+elif command -v declare >/dev/null 2>&1; then
+  declare -f
+fi
+echo ''
+if set -o >/dev/null 2>&1; then
+  sh_opts=$(set -o | awk '$2=="on"{print $1}')
+  sh_opt_count=$(printf '%s\n' "$sh_opts" | sed '/^$/d' | wc -l | tr -d ' ')
+  echo "# setopts $sh_opt_count"
+  if [ -n "$sh_opts" ]; then
+    printf 'set -o %s\n' $sh_opts
+  fi
+else
+  echo '# setopts 0'
+fi
+echo ''
+if alias >/dev/null 2>&1; then
+  alias_count=$(alias | wc -l | tr -d ' ')
+  echo "# aliases $alias_count"
+  alias
+  echo ''
+else
+  echo '# aliases 0'
+fi
+if export -p >/dev/null 2>&1; then
+  export_count=$(export -p | wc -l | tr -d ' ')
+  echo "# exports $export_count"
+  export -p
+else
+  export_count=$(env | wc -l | tr -d ' ')
+  echo "# exports $export_count"
+  env | sort | while IFS='=' read -r key value; do
+    escaped=$(printf "%s" "$value" | sed "s/'/'\"'\"'/g")
+    printf "export %s='%s'\n" "$key" "$escaped"
+  done
+fi
+"##
+}
+
+fn powershell_snapshot_script() -> &'static str {
+    r##"$ErrorActionPreference = 'Stop'
+Write-Output '# Snapshot file'
+Write-Output '# Unset all aliases to avoid conflicts with functions'
+Write-Output 'Remove-Item Alias:* -ErrorAction SilentlyContinue'
+Write-Output '# Functions'
+Get-ChildItem Function: | ForEach-Object {
+    "function {0} {{`n{1}`n}}" -f $_.Name, $_.Definition
+}
+Write-Output ''
+$aliases = Get-Alias
+Write-Output ("# aliases " + $aliases.Count)
+$aliases | ForEach-Object {
+    "Set-Alias -Name {0} -Value {1}" -f $_.Name, $_.Definition
+}
+Write-Output ''
+$envVars = Get-ChildItem Env:
+Write-Output ("# exports " + $envVars.Count)
+$envVars | ForEach-Object {
+    $escaped = $_.Value -replace "'", "''"
+    "`$env:{0}='{1}'" -f $_.Name, $escaped
+}
+"##
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use pretty_assertions::assert_eq;
+    #[cfg(target_os = "linux")]
+    use std::os::unix::fs::PermissionsExt;
+    #[cfg(target_os = "linux")]
+    use std::process::Command as StdCommand;
+
+    use tempfile::tempdir;
+
+    #[cfg(not(target_os = "windows"))]
+    fn assert_posix_snapshot_sections(snapshot: &str) {
+        assert!(snapshot.contains("# Snapshot file"));
+        assert!(snapshot.contains("aliases "));
+        assert!(snapshot.contains("exports "));
+        assert!(
+            snapshot.contains("PATH"),
+            "snapshot should capture a PATH export"
+        );
+        assert!(snapshot.contains("setopts "));
+    }
+
+    async fn get_snapshot(shell_type: ShellType) -> Result<String> {
+        let dir = tempdir()?;
+        let path = dir.path().join("snapshot.sh");
+        write_shell_snapshot(shell_type, &path).await?;
+        let content = fs::read_to_string(&path).await?;
+        Ok(content)
+    }
+
+    #[test]
+    fn strip_snapshot_preamble_removes_leading_output() {
+        let snapshot = "noise\n# Snapshot file\nexport PATH=/bin\n";
+        let cleaned = strip_snapshot_preamble(snapshot).expect("snapshot marker exists");
+        assert_eq!(cleaned, "# Snapshot file\nexport PATH=/bin\n");
+    }
+
+    #[test]
+    fn strip_snapshot_preamble_requires_marker() {
+        let result = strip_snapshot_preamble("missing header");
+        assert!(result.is_err());
+    }
+
+    #[cfg(unix)]
+    #[tokio::test]
+    async fn try_new_creates_and_deletes_snapshot_file() -> Result<()> {
+        let dir = tempdir()?;
+        let shell = Shell {
+            shell_type: ShellType::Bash,
+            shell_path: PathBuf::from("/bin/bash"),
+            shell_snapshot: None,
+        };
+
+        let snapshot = ShellSnapshot::try_new(dir.path(), &shell)
+            .await
+            .expect("snapshot should be created");
+        let path = snapshot.path.clone();
+        assert!(path.exists());
+
+        drop(snapshot);
+
+        assert!(!path.exists());
+
+        Ok(())
+    }
+
+    #[cfg(target_os = "linux")]
+    #[tokio::test]
+    async fn timed_out_snapshot_shell_is_terminated() -> Result<()> {
+        use std::process::Stdio;
+        use tokio::time::Duration as TokioDuration;
+        use tokio::time::Instant;
+        use tokio::time::sleep;
+
+        let dir = tempdir()?;
+        let shell_path = dir.path().join("hanging-shell.sh");
+        let pid_path = dir.path().join("pid");
+
+        let script = format!(
+            "#!/bin/sh\n\
+             echo $$ > {}\n\
+             sleep 30\n",
+            pid_path.display()
+        );
+        fs::write(&shell_path, script).await?;
+        let mut permissions = std::fs::metadata(&shell_path)?.permissions();
+        permissions.set_mode(0o755);
+        std::fs::set_permissions(&shell_path, permissions)?;
+
+        let shell = Shell {
+            shell_type: ShellType::Sh,
+            shell_path,
+            shell_snapshot: None,
+        };
+
+        let err = run_shell_script_with_timeout(&shell, "ignored", Duration::from_millis(500))
+            .await
+            .expect_err("snapshot shell should time out");
+        assert!(
+            err.to_string().contains("timed out"),
+            "expected timeout error, got {err:?}"
+        );
+
+        let pid = fs::read_to_string(&pid_path)
+            .await
+            .expect("snapshot shell writes its pid before timing out")
+            .trim()
+            .parse::<i32>()?;
+
+        let deadline = Instant::now() + TokioDuration::from_secs(1);
+        loop {
+            let kill_status = StdCommand::new("kill")
+                .arg("-0")
+                .arg(pid.to_string())
+                .stderr(Stdio::null())
+                .stdout(Stdio::null())
+                .status()?;
+            if !kill_status.success() {
+                break;
+            }
+            if Instant::now() >= deadline {
+                panic!("timed out snapshot shell is still alive after grace period");
+            }
+            sleep(TokioDuration::from_millis(50)).await;
+        }
+
+        Ok(())
+    }
+
+    #[cfg(target_os = "macos")]
+    #[tokio::test]
+    async fn macos_zsh_snapshot_includes_sections() -> Result<()> {
+        let snapshot = get_snapshot(ShellType::Zsh).await?;
+        assert_posix_snapshot_sections(&snapshot);
+        Ok(())
+    }
+
+    #[cfg(target_os = "linux")]
+    #[tokio::test]
+    async fn linux_bash_snapshot_includes_sections() -> Result<()> {
+        let snapshot = get_snapshot(ShellType::Bash).await?;
+        assert_posix_snapshot_sections(&snapshot);
+        Ok(())
+    }
+
+    #[cfg(target_os = "linux")]
+    #[tokio::test]
+    async fn linux_sh_snapshot_includes_sections() -> Result<()> {
+        let snapshot = get_snapshot(ShellType::Sh).await?;
+        assert_posix_snapshot_sections(&snapshot);
+        Ok(())
+    }
+
+    #[cfg(target_os = "windows")]
+    #[ignore]
+    #[tokio::test]
+    async fn windows_powershell_snapshot_includes_sections() -> Result<()> {
+        let snapshot = get_snapshot(ShellType::PowerShell).await?;
+        assert!(snapshot.contains("# Snapshot file"));
+        assert!(snapshot.contains("aliases "));
+        assert!(snapshot.contains("exports "));
+        Ok(())
+    }
+}
--- a/codex-rs/core/src/skills/injection.rs
+++ b/codex-rs/core/src/skills/injection.rs
@@ -0,0 +1,78 @@
+use std::collections::HashSet;
+
+use crate::skills::SkillLoadOutcome;
+use crate::skills::SkillMetadata;
+use crate::user_instructions::SkillInstructions;
+use codex_protocol::models::ResponseItem;
+use codex_protocol::user_input::UserInput;
+use tokio::fs;
+
+#[derive(Debug, Default)]
+pub(crate) struct SkillInjections {
+    pub(crate) items: Vec<ResponseItem>,
+    pub(crate) warnings: Vec<String>,
+}
+
+pub(crate) async fn build_skill_injections(
+    inputs: &[UserInput],
+    skills: Option<&SkillLoadOutcome>,
+) -> SkillInjections {
+    if inputs.is_empty() {
+        return SkillInjections::default();
+    }
+
+    let Some(outcome) = skills else {
+        return SkillInjections::default();
+    };
+
+    let mentioned_skills = collect_explicit_skill_mentions(inputs, &outcome.skills);
+    if mentioned_skills.is_empty() {
+        return SkillInjections::default();
+    }
+
+    let mut result = SkillInjections {
+        items: Vec::with_capacity(mentioned_skills.len()),
+        warnings: Vec::new(),
+    };
+
+    for skill in mentioned_skills {
+        match fs::read_to_string(&skill.path).await {
+            Ok(contents) => {
+                result.items.push(ResponseItem::from(SkillInstructions {
+                    name: skill.name,
+                    path: skill.path.to_string_lossy().into_owned(),
+                    contents,
+                }));
+            }
+            Err(err) => {
+                let message = format!(
+                    "Failed to load skill {} at {}: {err:#}",
+                    skill.name,
+                    skill.path.display()
+                );
+                result.warnings.push(message);
+            }
+        }
+    }
+
+    result
+}
+
+fn collect_explicit_skill_mentions(
+    inputs: &[UserInput],
+    skills: &[SkillMetadata],
+) -> Vec<SkillMetadata> {
+    let mut selected: Vec<SkillMetadata> = Vec::new();
+    let mut seen: HashSet<String> = HashSet::new();
+
+    for input in inputs {
+        if let UserInput::Skill { name, path } = input
+            && seen.insert(name.clone())
+            && let Some(skill) = skills.iter().find(|s| s.name == *name && s.path == *path)
+        {
+            selected.push(skill.clone());
+        }
+    }
+
+    selected
+}
--- a/codex-rs/core/src/skills/mod.rs
+++ b/codex-rs/core/src/skills/mod.rs
@@ -1,7 +1,10 @@
+pub mod injection;
 pub mod loader;
 pub mod model;
 pub mod render;

+pub(crate) use injection::SkillInjections;
+pub(crate) use injection::build_skill_injections;
 pub use loader::load_skills;
 pub use model::SkillError;
 pub use model::SkillLoadOutcome;
--- a/codex-rs/core/src/state/service.rs
+++ b/codex-rs/core/src/state/service.rs
@@ -4,6 +4,7 @@ use crate::AuthManager;
 use crate::RolloutRecorder;
 use crate::mcp_connection_manager::McpConnectionManager;
 use crate::openai_models::models_manager::ModelsManager;
+use crate::skills::SkillLoadOutcome;
 use crate::tools::sandboxing::ApprovalStore;
 use crate::unified_exec::UnifiedExecSessionManager;
 use crate::user_notification::UserNotifier;
@@ -18,10 +19,11 @@ pub(crate) struct SessionServices {
    pub(crate) unified_exec_manager: UnifiedExecSessionManager,
    pub(crate) notifier: UserNotifier,
    pub(crate) rollout: Mutex<Option<RolloutRecorder>>,
-    pub(crate) user_shell: crate::shell::Shell,
+    pub(crate) user_shell: Arc<crate::shell::Shell>,
    pub(crate) show_raw_agent_reasoning: bool,
    pub(crate) auth_manager: Arc<AuthManager>,
    pub(crate) models_manager: Arc<ModelsManager>,
    pub(crate) otel_event_manager: OtelEventManager,
    pub(crate) tool_approvals: Mutex<ApprovalStore>,
+    pub(crate) skills: Option<SkillLoadOutcome>,
 }
--- a/codex-rs/core/src/tasks/ghost_snapshot.rs
+++ b/codex-rs/core/src/tasks/ghost_snapshot.rs
@@ -15,6 +15,8 @@ use codex_protocol::user_input::UserInput;
 use codex_utils_readiness::Readiness;
 use codex_utils_readiness::Token;
 use std::sync::Arc;
+use std::time::Duration;
+use tokio::sync::oneshot;
 use tokio_util::sync::CancellationToken;
 use tracing::info;
 use tracing::warn;
@@ -23,6 +25,8 @@ pub(crate) struct GhostSnapshotTask {
    token: Token,
 }

+const SNAPSHOT_WARNING_THRESHOLD: Duration = Duration::from_secs(240);
+
 #[async_trait]
 impl SessionTask for GhostSnapshotTask {
    fn kind(&self) -> TaskKind {
@@ -38,7 +42,33 @@ impl SessionTask for GhostSnapshotTask {
    ) -> Option<String> {
        tokio::task::spawn(async move {
            let token = self.token;
-            let ctx_for_task = Arc::clone(&ctx);
+            // Channel used to signal when the snapshot work has finished so the
+            // timeout warning task can exit early without sending a warning.
+            let (snapshot_done_tx, snapshot_done_rx) = oneshot::channel::<()>();
+            let ctx_for_warning = ctx.clone();
+            let cancellation_token_for_warning = cancellation_token.clone();
+            let session_for_warning = session.clone();
+            // Fire a generic warning if the snapshot is still running after
+            // three minutes; this helps users discover large untracked files
+            // that might need to be added to .gitignore.
+            tokio::task::spawn(async move {
+                tokio::select! {
+                    _ = tokio::time::sleep(SNAPSHOT_WARNING_THRESHOLD) => {
+                        session_for_warning.session
+                            .send_event(
+                                &ctx_for_warning,
+                                EventMsg::Warning(WarningEvent {
+                                    message: "Repository snapshot is taking longer than expected. Large untracked or ignored files can slow snapshots; consider adding large files or directories to .gitignore or disabling `undo` in your config.".to_string()
+                                }),
+                            )
+                            .await;
+                    }
+                    _ = snapshot_done_rx => {}
+                    _ = cancellation_token_for_warning.cancelled() => {}
+                }
+            });
+
+            let ctx_for_task = ctx.clone();
            let cancelled = tokio::select! {
                _ = cancellation_token.cancelled() => true,
                _ = async {
@@ -109,6 +139,8 @@ impl SessionTask for GhostSnapshotTask {
                } => false,
            };

+            let _ = snapshot_done_tx.send(());
+
            if cancelled {
                info!("ghost snapshot task cancelled");
            }
--- a/codex-rs/core/src/tasks/review.rs
+++ b/codex-rs/core/src/tasks/review.rs
@@ -92,6 +92,8 @@ async fn start_review_conversation(

    // Set explicit review rubric for the sub-agent
    sub_agent_config.base_instructions = Some(crate::REVIEW_PROMPT.to_string());
+
+    sub_agent_config.model = Some(config.review_model.clone());
    (run_codex_conversation_one_shot(
        sub_agent_config,
        session.auth_manager(),
--- a/codex-rs/core/src/tasks/user_shell.rs
+++ b/codex-rs/core/src/tasks/user_shell.rs
@@ -24,6 +24,7 @@ use crate::protocol::ExecCommandSource;
 use crate::protocol::SandboxPolicy;
 use crate::protocol::TaskStartedEvent;
 use crate::sandboxing::ExecEnv;
+use crate::sandboxing::SandboxPermissions;
 use crate::state::TaskKind;
 use crate::tools::format_exec_output_str;
 use crate::user_shell_command::user_shell_command_record_item;
@@ -100,7 +101,7 @@ impl SessionTask for UserShellCommandTask {
            // should use that instead of an "arbitrarily large" timeout here.
            expiration: USER_SHELL_TIMEOUT_MS.into(),
            sandbox: SandboxType::None,
-            with_escalated_permissions: None,
+            sandbox_permissions: SandboxPermissions::UseDefault,
            justification: None,
            arg0: None,
        };
--- a/codex-rs/core/src/tools/events.rs
+++ b/codex-rs/core/src/tools/events.rs
@@ -134,7 +134,6 @@ impl ToolEmitter {
        command: &[String],
        cwd: PathBuf,
        source: ExecCommandSource,
-        interaction_input: Option<String>,
        process_id: Option<String>,
    ) -> Self {
        let parsed_cmd = parse_command(command);
@@ -142,7 +141,7 @@ impl ToolEmitter {
            command: command.to_vec(),
            cwd,
            source,
-            interaction_input,
+            interaction_input: None, // TODO(jif) drop this field in the protocol.
            parsed_cmd,
            process_id,
        }
--- a/codex-rs/core/src/tools/handlers/apply_patch.rs
+++ b/codex-rs/core/src/tools/handlers/apply_patch.rs
@@ -46,7 +46,7 @@ impl ToolHandler for ApplyPatchHandler {
        )
    }

-    fn is_mutating(&self, _invocation: &ToolInvocation) -> bool {
+    async fn is_mutating(&self, _invocation: &ToolInvocation) -> bool {
        true
    }

--- a/codex-rs/core/src/tools/handlers/shell.rs
+++ b/codex-rs/core/src/tools/handlers/shell.rs
@@ -10,7 +10,7 @@ use crate::exec_policy::create_exec_approval_requirement_for_command;
 use crate::function_tool::FunctionCallError;
 use crate::is_safe_command::is_known_safe_command;
 use crate::protocol::ExecCommandSource;
-use crate::sandboxing::SandboxPermissions;
+use crate::shell::Shell;
 use crate::tools::context::ToolInvocation;
 use crate::tools::context::ToolOutput;
 use crate::tools::context::ToolPayload;
@@ -35,7 +35,7 @@ impl ShellHandler {
            cwd: turn_context.resolve_path(params.workdir.clone()),
            expiration: params.timeout_ms.into(),
            env: create_env(&turn_context.shell_environment_policy),
-            with_escalated_permissions: params.with_escalated_permissions,
+            sandbox_permissions: params.sandbox_permissions.unwrap_or_default(),
            justification: params.justification,
            arg0: None,
        }
@@ -43,20 +43,25 @@ impl ShellHandler {
 }

 impl ShellCommandHandler {
+    fn base_command(shell: &Shell, command: &str, login: Option<bool>) -> Vec<String> {
+        let use_login_shell = login.unwrap_or(true);
+        shell.derive_exec_args(command, use_login_shell)
+    }
+
    fn to_exec_params(
        params: ShellCommandToolCallParams,
        session: &crate::codex::Session,
        turn_context: &TurnContext,
    ) -> ExecParams {
        let shell = session.user_shell();
-        let command = shell.derive_exec_args(&params.command, params.login.unwrap_or(true));
+        let command = Self::base_command(shell.as_ref(), &params.command, params.login);

        ExecParams {
            command,
            cwd: turn_context.resolve_path(params.workdir.clone()),
            expiration: params.timeout_ms.into(),
            env: create_env(&turn_context.shell_environment_policy),
-            with_escalated_permissions: params.with_escalated_permissions,
+            sandbox_permissions: params.sandbox_permissions.unwrap_or_default(),
            justification: params.justification,
            arg0: None,
        }
@@ -76,7 +81,7 @@ impl ToolHandler for ShellHandler {
        )
    }

-    fn is_mutating(&self, invocation: &ToolInvocation) -> bool {
+    async fn is_mutating(&self, invocation: &ToolInvocation) -> bool {
        match &invocation.payload {
            ToolPayload::Function { arguments } => {
                serde_json::from_str::<ShellToolCallParams>(arguments)
@@ -148,7 +153,7 @@ impl ToolHandler for ShellCommandHandler {
        matches!(payload, ToolPayload::Function { .. })
    }

-    fn is_mutating(&self, invocation: &ToolInvocation) -> bool {
+    async fn is_mutating(&self, invocation: &ToolInvocation) -> bool {
        let ToolPayload::Function { arguments } = &invocation.payload else {
            return true;
        };
@@ -156,7 +161,7 @@ impl ToolHandler for ShellCommandHandler {
        serde_json::from_str::<ShellCommandToolCallParams>(arguments)
            .map(|params| {
                let shell = invocation.session.user_shell();
-                let command = shell.derive_exec_args(&params.command, params.login.unwrap_or(true));
+                let command = Self::base_command(shell.as_ref(), &params.command, params.login);
                !is_known_safe_command(&command)
            })
            .unwrap_or(true)
@@ -206,7 +211,9 @@ impl ShellHandler {
        freeform: bool,
    ) -> Result<ToolOutput, FunctionCallError> {
        // Approval policy guard for explicit escalation in non-OnRequest modes.
-        if exec_params.with_escalated_permissions.unwrap_or(false)
+        if exec_params
+            .sandbox_permissions
+            .requires_escalated_permissions()
            && !matches!(
                turn.approval_policy,
                codex_protocol::protocol::AskForApproval::OnRequest
@@ -251,7 +258,7 @@ impl ShellHandler {
            &exec_params.command,
            turn.approval_policy,
            &turn.sandbox_policy,
-            SandboxPermissions::from(exec_params.with_escalated_permissions.unwrap_or(false)),
+            exec_params.sandbox_permissions,
        )
        .await;

@@ -260,7 +267,7 @@ impl ShellHandler {
            cwd: exec_params.cwd.clone(),
            timeout_ms: exec_params.expiration.timeout_ms(),
            env: exec_params.env.clone(),
-            with_escalated_permissions: exec_params.with_escalated_permissions,
+            sandbox_permissions: exec_params.sandbox_permissions,
            justification: exec_params.justification.clone(),
            exec_approval_requirement,
        };
@@ -288,6 +295,7 @@ impl ShellHandler {
 #[cfg(test)]
 mod tests {
    use std::path::PathBuf;
+    use std::sync::Arc;

    use codex_protocol::models::ShellCommandToolCallParams;
    use pretty_assertions::assert_eq;
@@ -295,8 +303,10 @@ mod tests {
    use crate::codex::make_session_and_context;
    use crate::exec_env::create_env;
    use crate::is_safe_command::is_known_safe_command;
+    use crate::sandboxing::SandboxPermissions;
    use crate::shell::Shell;
    use crate::shell::ShellType;
+    use crate::shell_snapshot::ShellSnapshot;
    use crate::tools::handlers::ShellCommandHandler;

    /// The logic for is_known_safe_command() has heuristics for known shells,
@@ -307,18 +317,21 @@ mod tests {
        let bash_shell = Shell {
            shell_type: ShellType::Bash,
            shell_path: PathBuf::from("/bin/bash"),
+            shell_snapshot: None,
        };
        assert_safe(&bash_shell, "ls -la");

        let zsh_shell = Shell {
            shell_type: ShellType::Zsh,
            shell_path: PathBuf::from("/bin/zsh"),
+            shell_snapshot: None,
        };
        assert_safe(&zsh_shell, "ls -la");

        let powershell = Shell {
            shell_type: ShellType::PowerShell,
            shell_path: PathBuf::from("pwsh.exe"),
+            shell_snapshot: None,
        };
        assert_safe(&powershell, "ls -Name");
    }
@@ -340,7 +353,7 @@ mod tests {
        let workdir = Some("subdir".to_string());
        let login = None;
        let timeout_ms = Some(1234);
-        let with_escalated_permissions = Some(true);
+        let sandbox_permissions = SandboxPermissions::RequireEscalated;
        let justification = Some("because tests".to_string());

        let expected_command = session.user_shell().derive_exec_args(&command, true);
@@ -352,7 +365,7 @@ mod tests {
            workdir,
            login,
            timeout_ms,
-            with_escalated_permissions,
+            sandbox_permissions: Some(sandbox_permissions),
            justification: justification.clone(),
        };

@@ -363,11 +376,33 @@ mod tests {
        assert_eq!(exec_params.cwd, expected_cwd);
        assert_eq!(exec_params.env, expected_env);
        assert_eq!(exec_params.expiration.timeout_ms(), timeout_ms);
-        assert_eq!(
-            exec_params.with_escalated_permissions,
-            with_escalated_permissions
-        );
+        assert_eq!(exec_params.sandbox_permissions, sandbox_permissions);
        assert_eq!(exec_params.justification, justification);
        assert_eq!(exec_params.arg0, None);
    }
+
+    #[test]
+    fn shell_command_handler_respects_explicit_login_flag() {
+        let shell = Shell {
+            shell_type: ShellType::Bash,
+            shell_path: PathBuf::from("/bin/bash"),
+            shell_snapshot: Some(Arc::new(ShellSnapshot {
+                path: PathBuf::from("/tmp/snapshot.sh"),
+            })),
+        };
+
+        let login_command =
+            ShellCommandHandler::base_command(&shell, "echo login shell", Some(true));
+        assert_eq!(
+            login_command,
+            shell.derive_exec_args("echo login shell", true)
+        );
+
+        let non_login_command =
+            ShellCommandHandler::base_command(&shell, "echo non login shell", Some(false));
+        assert_eq!(
+            non_login_command,
+            shell.derive_exec_args("echo non login shell", false)
+        );
+    }
 }
--- a/codex-rs/core/src/tools/handlers/unified_exec.rs
+++ b/codex-rs/core/src/tools/handlers/unified_exec.rs
@@ -1,12 +1,10 @@
-use std::path::PathBuf;
-
 use crate::function_tool::FunctionCallError;
 use crate::is_safe_command::is_known_safe_command;
 use crate::protocol::EventMsg;
-use crate::protocol::ExecCommandOutputDeltaEvent;
 use crate::protocol::ExecCommandSource;
-use crate::protocol::ExecOutputStream;
-use crate::shell::default_user_shell;
+use crate::protocol::TerminalInteractionEvent;
+use crate::sandboxing::SandboxPermissions;
+use crate::shell::Shell;
 use crate::shell::get_shell_by_model_provided_path;
 use crate::tools::context::ToolInvocation;
 use crate::tools::context::ToolOutput;
@@ -24,6 +22,8 @@ use crate::unified_exec::UnifiedExecSessionManager;
 use crate::unified_exec::WriteStdinRequest;
 use async_trait::async_trait;
 use serde::Deserialize;
+use std::path::PathBuf;
+use std::sync::Arc;

 pub struct UnifiedExecHandler;

@@ -41,7 +41,7 @@ struct ExecCommandArgs {
    #[serde(default)]
    max_output_tokens: Option<usize>,
    #[serde(default)]
-    with_escalated_permissions: Option<bool>,
+    sandbox_permissions: SandboxPermissions,
    #[serde(default)]
    justification: Option<String>,
 }
@@ -83,7 +83,7 @@ impl ToolHandler for UnifiedExecHandler {
        )
    }

-    fn is_mutating(&self, invocation: &ToolInvocation) -> bool {
+    async fn is_mutating(&self, invocation: &ToolInvocation) -> bool {
        let (ToolPayload::Function { arguments } | ToolPayload::UnifiedExec { arguments }) =
            &invocation.payload
        else {
@@ -93,7 +93,7 @@ impl ToolHandler for UnifiedExecHandler {
        let Ok(params) = serde_json::from_str::<ExecCommandArgs>(arguments) else {
            return true;
        };
-        let command = get_command(&params);
+        let command = get_command(&params, invocation.session.user_shell());
        !is_known_safe_command(&command)
    }

@@ -129,18 +129,18 @@ impl ToolHandler for UnifiedExecHandler {
                    ))
                })?;
                let process_id = manager.allocate_process_id().await;
+                let command = get_command(&args, session.user_shell());

-                let command = get_command(&args);
                let ExecCommandArgs {
                    workdir,
                    yield_time_ms,
                    max_output_tokens,
-                    with_escalated_permissions,
+                    sandbox_permissions,
                    justification,
                    ..
                } = args;

-                if with_escalated_permissions.unwrap_or(false)
+                if sandbox_permissions.requires_escalated_permissions()
                    && !matches!(
                        context.turn.approval_policy,
                        codex_protocol::protocol::AskForApproval::OnRequest
@@ -184,7 +184,6 @@ impl ToolHandler for UnifiedExecHandler {
                    &command,
                    cwd.clone(),
                    ExecCommandSource::UnifiedExecStartup,
-                    None,
                    Some(process_id.clone()),
                );
                emitter.emit(event_ctx, ToolEventStage::Begin).await;
@@ -197,7 +196,7 @@ impl ToolHandler for UnifiedExecHandler {
                            yield_time_ms,
                            max_output_tokens,
                            workdir,
-                            with_escalated_permissions,
+                            sandbox_permissions,
                            justification,
                        },
                        &context,
@@ -213,7 +212,7 @@ impl ToolHandler for UnifiedExecHandler {
                        "failed to parse write_stdin arguments: {err:?}"
                    ))
                })?;
-                manager
+                let response = manager
                    .write_stdin(WriteStdinRequest {
                        process_id: &args.session_id.to_string(),
                        input: &args.chars,
@@ -223,7 +222,18 @@ impl ToolHandler for UnifiedExecHandler {
                    .await
                    .map_err(|err| {
                        FunctionCallError::RespondToModel(format!("write_stdin failed: {err:?}"))
-                    })?
+                    })?;
+
+                let interaction = TerminalInteractionEvent {
+                    call_id: response.event_call_id.clone(),
+                    process_id: args.session_id.to_string(),
+                    stdin: args.chars.clone(),
+                };
+                session
+                    .send_event(turn.as_ref(), EventMsg::TerminalInteraction(interaction))
+                    .await;
+
+                response
            }
            other => {
                return Err(FunctionCallError::RespondToModel(format!(
@@ -232,18 +242,6 @@ impl ToolHandler for UnifiedExecHandler {
            }
        };

-        // Emit a delta event with the chunk of output we just produced, if any.
-        if !response.output.is_empty() {
-            let delta = ExecCommandOutputDeltaEvent {
-                call_id: response.event_call_id.clone(),
-                stream: ExecOutputStream::Stdout,
-                chunk: response.output.as_bytes().to_vec(),
-            };
-            session
-                .send_event(turn.as_ref(), EventMsg::ExecCommandOutputDelta(delta))
-                .await;
-        }
-
        let content = format_response(&response);

        Ok(ToolOutput::Function {
@@ -254,12 +252,14 @@ impl ToolHandler for UnifiedExecHandler {
    }
 }

-fn get_command(args: &ExecCommandArgs) -> Vec<String> {
-    let shell = if let Some(shell_str) = &args.shell {
-        get_shell_by_model_provided_path(&PathBuf::from(shell_str))
-    } else {
-        default_user_shell()
-    };
+fn get_command(args: &ExecCommandArgs, session_shell: Arc<Shell>) -> Vec<String> {
+    let model_shell = args.shell.as_ref().map(|shell_str| {
+        let mut shell = get_shell_by_model_provided_path(&PathBuf::from(shell_str));
+        shell.shell_snapshot = None;
+        shell
+    });
+
+    let shell = model_shell.as_ref().unwrap_or(session_shell.as_ref());

    shell.derive_exec_args(&args.cmd, args.login)
 }
@@ -296,6 +296,8 @@ fn format_response(response: &UnifiedExecResponse) -> String {
 #[cfg(test)]
 mod tests {
    use super::*;
+    use crate::shell::default_user_shell;
+    use std::sync::Arc;

    #[test]
    fn test_get_command_uses_default_shell_when_unspecified() {
@@ -306,7 +308,7 @@ mod tests {

        assert!(args.shell.is_none());

-        let command = get_command(&args);
+        let command = get_command(&args, Arc::new(default_user_shell()));

        assert_eq!(command.len(), 3);
        assert_eq!(command[2], "echo hello");
@@ -321,9 +323,15 @@ mod tests {

        assert_eq!(args.shell.as_deref(), Some("/bin/bash"));

-        let command = get_command(&args);
+        let command = get_command(&args, Arc::new(default_user_shell()));

-        assert_eq!(command[2], "echo hello");
+        assert_eq!(command.last(), Some(&"echo hello".to_string()));
+        if command
+            .iter()
+            .any(|arg| arg.eq_ignore_ascii_case("-Command"))
+        {
+            assert!(command.contains(&"-NoProfile".to_string()));
+        }
    }

    #[test]
@@ -335,7 +343,7 @@ mod tests {

        assert_eq!(args.shell.as_deref(), Some("powershell"));

-        let command = get_command(&args);
+        let command = get_command(&args, Arc::new(default_user_shell()));

        assert_eq!(command[2], "echo hello");
    }
@@ -349,7 +357,7 @@ mod tests {

        assert_eq!(args.shell.as_deref(), Some("cmd"));

-        let command = get_command(&args);
+        let command = get_command(&args, Arc::new(default_user_shell()));

        assert_eq!(command[2], "echo hello");
    }
--- a/codex-rs/core/src/tools/orchestrator.rs
+++ b/codex-rs/core/src/tools/orchestrator.rs
@@ -7,12 +7,10 @@ retry without sandbox on denial (no re‑approval thanks to caching).
 */
 use crate::error::CodexErr;
 use crate::error::SandboxErr;
-use crate::error::get_error_message_ui;
 use crate::exec::ExecToolCallOutput;
 use crate::sandboxing::SandboxManager;
 use crate::tools::sandboxing::ApprovalCtx;
 use crate::tools::sandboxing::ExecApprovalRequirement;
-use crate::tools::sandboxing::ProvidesSandboxRetryData;
 use crate::tools::sandboxing::SandboxAttempt;
 use crate::tools::sandboxing::SandboxOverride;
 use crate::tools::sandboxing::ToolCtx;
@@ -43,7 +41,6 @@ impl ToolOrchestrator {
    ) -> Result<Out, ToolError>
    where
        T: ToolRuntime<Rq, Out>,
-        Rq: ProvidesSandboxRetryData,
    {
        let otel = turn_ctx.client.get_otel_event_manager();
        let otel_tn = &tool_ctx.tool_name;
@@ -65,26 +62,11 @@ impl ToolOrchestrator {
                return Err(ToolError::Rejected(reason));
            }
            ExecApprovalRequirement::NeedsApproval { reason, .. } => {
-                let mut risk = None;
-
-                if let Some(metadata) = req.sandbox_retry_data() {
-                    risk = tool_ctx
-                        .session
-                        .assess_sandbox_command(
-                            turn_ctx,
-                            &tool_ctx.call_id,
-                            &metadata.command,
-                            None,
-                        )
-                        .await;
-                }
-
                let approval_ctx = ApprovalCtx {
                    session: tool_ctx.session,
                    turn: turn_ctx,
                    call_id: &tool_ctx.call_id,
                    retry_reason: reason,
-                    risk,
                };
                let decision = tool.start_approval_async(req, approval_ctx).await;

@@ -141,33 +123,12 @@ impl ToolOrchestrator {

                // Ask for approval before retrying without sandbox.
                if !tool.should_bypass_approval(approval_policy, already_approved) {
-                    let mut risk = None;
-
-                    if let Some(metadata) = req.sandbox_retry_data() {
-                        let err = SandboxErr::Denied {
-                            output: output.clone(),
-                        };
-                        let friendly = get_error_message_ui(&CodexErr::Sandbox(err));
-                        let failure_summary = format!("failed in sandbox: {friendly}");
-
-                        risk = tool_ctx
-                            .session
-                            .assess_sandbox_command(
-                                turn_ctx,
-                                &tool_ctx.call_id,
-                                &metadata.command,
-                                Some(failure_summary.as_str()),
-                            )
-                            .await;
-                    }
-
                    let reason_msg = build_denial_reason_from_output(output.as_ref());
                    let approval_ctx = ApprovalCtx {
                        session: tool_ctx.session,
                        turn: turn_ctx,
                        call_id: &tool_ctx.call_id,
                        retry_reason: Some(reason_msg),
-                        risk,
                    };

                    let decision = tool.start_approval_async(req, approval_ctx).await;
--- a/codex-rs/core/src/tools/registry.rs
+++ b/codex-rs/core/src/tools/registry.rs
@@ -30,7 +30,7 @@ pub trait ToolHandler: Send + Sync {
        )
    }

-    fn is_mutating(&self, _invocation: &ToolInvocation) -> bool {
+    async fn is_mutating(&self, _invocation: &ToolInvocation) -> bool {
        false
    }

@@ -110,7 +110,7 @@ impl ToolRegistry {
                    let output_cell = &output_cell;
                    let invocation = invocation;
                    async move {
-                        if handler.is_mutating(&invocation) {
+                        if handler.is_mutating(&invocation).await {
                            tracing::trace!("waiting for tool gate");
                            invocation.turn.tool_call_gate.wait_ready().await;
                            tracing::trace!("tool gate released");
--- a/codex-rs/core/src/tools/router.rs
+++ b/codex-rs/core/src/tools/router.rs
@@ -5,6 +5,7 @@ use crate::client_common::tools::ToolSpec;
 use crate::codex::Session;
 use crate::codex::TurnContext;
 use crate::function_tool::FunctionCallError;
+use crate::sandboxing::SandboxPermissions;
 use crate::tools::context::SharedTurnDiffTracker;
 use crate::tools::context::ToolInvocation;
 use crate::tools::context::ToolPayload;
@@ -114,7 +115,7 @@ impl ToolRouter {
                            command: exec.command,
                            workdir: exec.working_directory,
                            timeout_ms: exec.timeout_ms,
-                            with_escalated_permissions: None,
+                            sandbox_permissions: Some(SandboxPermissions::UseDefault),
                            justification: None,
                        };
                        Ok(Some(ToolCall {
--- a/codex-rs/core/src/tools/runtimes/apply_patch.rs
+++ b/codex-rs/core/src/tools/runtimes/apply_patch.rs
@@ -7,12 +7,11 @@
 use crate::CODEX_APPLY_PATCH_ARG1;
 use crate::exec::ExecToolCallOutput;
 use crate::sandboxing::CommandSpec;
+use crate::sandboxing::SandboxPermissions;
 use crate::sandboxing::execute_env;
 use crate::tools::sandboxing::Approvable;
 use crate::tools::sandboxing::ApprovalCtx;
-use crate::tools::sandboxing::ProvidesSandboxRetryData;
 use crate::tools::sandboxing::SandboxAttempt;
-use crate::tools::sandboxing::SandboxRetryData;
 use crate::tools::sandboxing::Sandboxable;
 use crate::tools::sandboxing::SandboxablePreference;
 use crate::tools::sandboxing::ToolCtx;
@@ -34,12 +33,6 @@ pub struct ApplyPatchRequest {
    pub codex_exe: Option<PathBuf>,
 }

-impl ProvidesSandboxRetryData for ApplyPatchRequest {
-    fn sandbox_retry_data(&self) -> Option<SandboxRetryData> {
-        None
-    }
-}
-
 #[derive(Default)]
 pub struct ApplyPatchRuntime;

@@ -70,7 +63,7 @@ impl ApplyPatchRuntime {
            expiration: req.timeout_ms.into(),
            // Run apply_patch with a minimal environment for determinism and to avoid leaks.
            env: HashMap::new(),
-            with_escalated_permissions: None,
+            sandbox_permissions: SandboxPermissions::UseDefault,
            justification: None,
        })
    }
@@ -114,7 +107,6 @@ impl Approvable<ApplyPatchRequest> for ApplyPatchRuntime {
        let call_id = ctx.call_id.to_string();
        let cwd = req.cwd.clone();
        let retry_reason = ctx.retry_reason.clone();
-        let risk = ctx.risk.clone();
        let user_explicitly_approved = req.user_explicitly_approved;
        Box::pin(async move {
            with_cached_approval(&session.services, key, move || async move {
@@ -126,7 +118,6 @@ impl Approvable<ApplyPatchRequest> for ApplyPatchRuntime {
                            vec!["apply_patch".to_string()],
                            cwd,
                            Some(reason),
-                            risk,
                            None,
                        )
                        .await
--- a/codex-rs/core/src/tools/runtimes/mod.rs
+++ b/codex-rs/core/src/tools/runtimes/mod.rs
@@ -6,6 +6,8 @@ small and focused and reuses the orchestrator for approvals + sandbox + retry.
 */
 use crate::exec::ExecExpiration;
 use crate::sandboxing::CommandSpec;
+use crate::sandboxing::SandboxPermissions;
+use crate::shell::Shell;
 use crate::tools::sandboxing::ToolError;
 use std::collections::HashMap;
 use std::path::Path;
@@ -21,7 +23,7 @@ pub(crate) fn build_command_spec(
    cwd: &Path,
    env: &HashMap<String, String>,
    expiration: ExecExpiration,
-    with_escalated_permissions: Option<bool>,
+    sandbox_permissions: SandboxPermissions,
    justification: Option<String>,
 ) -> Result<CommandSpec, ToolError> {
    let (program, args) = command
@@ -33,7 +35,43 @@ pub(crate) fn build_command_spec(
        cwd: cwd.to_path_buf(),
        env: env.clone(),
        expiration,
-        with_escalated_permissions,
+        sandbox_permissions,
        justification,
    })
 }
+
+/// POSIX-only helper: for commands produced by `Shell::derive_exec_args`
+/// for Bash/Zsh/sh of the form `[shell_path, "-lc", "<script>"]`, and
+/// when a snapshot is configured on the session shell, rewrite the argv
+/// to a single non-login shell that sources the snapshot before running
+/// the original script:
+///
+///   shell -lc "<script>"
+///   => shell -c ". SNAPSHOT && <script>"
+///
+/// On non-POSIX shells or non-matching commands this is a no-op.
+pub(crate) fn maybe_wrap_shell_lc_with_snapshot(
+    command: &[String],
+    session_shell: &Shell,
+) -> Vec<String> {
+    let Some(snapshot) = &session_shell.shell_snapshot else {
+        return command.to_vec();
+    };
+
+    if command.len() < 3 {
+        return command.to_vec();
+    }
+
+    let flag = command[1].as_str();
+    if flag != "-lc" {
+        return command.to_vec();
+    }
+
+    let snapshot_path = snapshot.path.to_string_lossy();
+    let rewritten_script = format!(". \"{snapshot_path}\" && {}", command[2]);
+
+    let mut rewritten = command.to_vec();
+    rewritten[1] = "-c".to_string();
+    rewritten[2] = rewritten_script;
+    rewritten
+}
--- a/codex-rs/core/src/tools/runtimes/shell.rs
+++ b/codex-rs/core/src/tools/runtimes/shell.rs
@@ -5,15 +5,15 @@ Executes shell requests under the orchestrator: asks for approval when needed,
 builds a CommandSpec, and runs it under the current SandboxAttempt.
 */
 use crate::exec::ExecToolCallOutput;
+use crate::sandboxing::SandboxPermissions;
 use crate::sandboxing::execute_env;
 use crate::tools::runtimes::build_command_spec;
+use crate::tools::runtimes::maybe_wrap_shell_lc_with_snapshot;
 use crate::tools::sandboxing::Approvable;
 use crate::tools::sandboxing::ApprovalCtx;
 use crate::tools::sandboxing::ExecApprovalRequirement;
-use crate::tools::sandboxing::ProvidesSandboxRetryData;
 use crate::tools::sandboxing::SandboxAttempt;
 use crate::tools::sandboxing::SandboxOverride;
-use crate::tools::sandboxing::SandboxRetryData;
 use crate::tools::sandboxing::Sandboxable;
 use crate::tools::sandboxing::SandboxablePreference;
 use crate::tools::sandboxing::ToolCtx;
@@ -30,20 +30,11 @@ pub struct ShellRequest {
    pub cwd: PathBuf,
    pub timeout_ms: Option<u64>,
    pub env: std::collections::HashMap<String, String>,
-    pub with_escalated_permissions: Option<bool>,
+    pub sandbox_permissions: SandboxPermissions,
    pub justification: Option<String>,
    pub exec_approval_requirement: ExecApprovalRequirement,
 }

-impl ProvidesSandboxRetryData for ShellRequest {
-    fn sandbox_retry_data(&self) -> Option<SandboxRetryData> {
-        Some(SandboxRetryData {
-            command: self.command.clone(),
-            cwd: self.cwd.clone(),
-        })
-    }
-}
-
 #[derive(Default)]
 pub struct ShellRuntime;

@@ -51,7 +42,7 @@ pub struct ShellRuntime;
 pub(crate) struct ApprovalKey {
    command: Vec<String>,
    cwd: PathBuf,
-    escalated: bool,
+    sandbox_permissions: SandboxPermissions,
 }

 impl ShellRuntime {
@@ -84,7 +75,7 @@ impl Approvable<ShellRequest> for ShellRuntime {
        ApprovalKey {
            command: req.command.clone(),
            cwd: req.cwd.clone(),
-            escalated: req.with_escalated_permissions.unwrap_or(false),
+            sandbox_permissions: req.sandbox_permissions,
        }
    }

@@ -100,7 +91,6 @@ impl Approvable<ShellRequest> for ShellRuntime {
            .retry_reason
            .clone()
            .or_else(|| req.justification.clone());
-        let risk = ctx.risk.clone();
        let session = ctx.session;
        let turn = ctx.turn;
        let call_id = ctx.call_id.to_string();
@@ -113,7 +103,6 @@ impl Approvable<ShellRequest> for ShellRuntime {
                        command,
                        cwd,
                        reason,
-                        risk,
                        req.exec_approval_requirement
                            .proposed_execpolicy_amendment()
                            .cloned(),
@@ -129,7 +118,7 @@ impl Approvable<ShellRequest> for ShellRuntime {
    }

    fn sandbox_mode_for_first_attempt(&self, req: &ShellRequest) -> SandboxOverride {
-        if req.with_escalated_permissions.unwrap_or(false)
+        if req.sandbox_permissions.requires_escalated_permissions()
            || matches!(
                req.exec_approval_requirement,
                ExecApprovalRequirement::Skip {
@@ -152,12 +141,16 @@ impl ToolRuntime<ShellRequest, ExecToolCallOutput> for ShellRuntime {
        attempt: &SandboxAttempt<'_>,
        ctx: &ToolCtx<'_>,
    ) -> Result<ExecToolCallOutput, ToolError> {
+        let base_command = &req.command;
+        let session_shell = ctx.session.user_shell();
+        let command = maybe_wrap_shell_lc_with_snapshot(base_command, session_shell.as_ref());
+
        let spec = build_command_spec(
-            &req.command,
+            &command,
            &req.cwd,
            &req.env,
            req.timeout_ms.into(),
-            req.with_escalated_permissions,
+            req.sandbox_permissions,
            req.justification.clone(),
        )?;
        let env = attempt
--- a/codex-rs/core/src/tools/runtimes/unified_exec.rs
+++ b/codex-rs/core/src/tools/runtimes/unified_exec.rs
@@ -7,14 +7,14 @@ the session manager to spawn PTYs once an ExecEnv is prepared.
 use crate::error::CodexErr;
 use crate::error::SandboxErr;
 use crate::exec::ExecExpiration;
+use crate::sandboxing::SandboxPermissions;
 use crate::tools::runtimes::build_command_spec;
+use crate::tools::runtimes::maybe_wrap_shell_lc_with_snapshot;
 use crate::tools::sandboxing::Approvable;
 use crate::tools::sandboxing::ApprovalCtx;
 use crate::tools::sandboxing::ExecApprovalRequirement;
-use crate::tools::sandboxing::ProvidesSandboxRetryData;
 use crate::tools::sandboxing::SandboxAttempt;
 use crate::tools::sandboxing::SandboxOverride;
-use crate::tools::sandboxing::SandboxRetryData;
 use crate::tools::sandboxing::Sandboxable;
 use crate::tools::sandboxing::SandboxablePreference;
 use crate::tools::sandboxing::ToolCtx;
@@ -34,25 +34,16 @@ pub struct UnifiedExecRequest {
    pub command: Vec<String>,
    pub cwd: PathBuf,
    pub env: HashMap<String, String>,
-    pub with_escalated_permissions: Option<bool>,
+    pub sandbox_permissions: SandboxPermissions,
    pub justification: Option<String>,
    pub exec_approval_requirement: ExecApprovalRequirement,
 }

-impl ProvidesSandboxRetryData for UnifiedExecRequest {
-    fn sandbox_retry_data(&self) -> Option<SandboxRetryData> {
-        Some(SandboxRetryData {
-            command: self.command.clone(),
-            cwd: self.cwd.clone(),
-        })
-    }
-}
-
 #[derive(serde::Serialize, Clone, Debug, Eq, PartialEq, Hash)]
 pub struct UnifiedExecApprovalKey {
    pub command: Vec<String>,
    pub cwd: PathBuf,
-    pub escalated: bool,
+    pub sandbox_permissions: SandboxPermissions,
 }

 pub struct UnifiedExecRuntime<'a> {
@@ -64,7 +55,7 @@ impl UnifiedExecRequest {
        command: Vec<String>,
        cwd: PathBuf,
        env: HashMap<String, String>,
-        with_escalated_permissions: Option<bool>,
+        sandbox_permissions: SandboxPermissions,
        justification: Option<String>,
        exec_approval_requirement: ExecApprovalRequirement,
    ) -> Self {
@@ -72,7 +63,7 @@ impl UnifiedExecRequest {
            command,
            cwd,
            env,
-            with_escalated_permissions,
+            sandbox_permissions,
            justification,
            exec_approval_requirement,
        }
@@ -102,7 +93,7 @@ impl Approvable<UnifiedExecRequest> for UnifiedExecRuntime<'_> {
        UnifiedExecApprovalKey {
            command: req.command.clone(),
            cwd: req.cwd.clone(),
-            escalated: req.with_escalated_permissions.unwrap_or(false),
+            sandbox_permissions: req.sandbox_permissions,
        }
    }

@@ -121,7 +112,6 @@ impl Approvable<UnifiedExecRequest> for UnifiedExecRuntime<'_> {
            .retry_reason
            .clone()
            .or_else(|| req.justification.clone());
-        let risk = ctx.risk.clone();
        Box::pin(async move {
            with_cached_approval(&session.services, key, || async move {
                session
@@ -131,7 +121,6 @@ impl Approvable<UnifiedExecRequest> for UnifiedExecRuntime<'_> {
                        command,
                        cwd,
                        reason,
-                        risk,
                        req.exec_approval_requirement
                            .proposed_execpolicy_amendment()
                            .cloned(),
@@ -150,7 +139,7 @@ impl Approvable<UnifiedExecRequest> for UnifiedExecRuntime<'_> {
    }

    fn sandbox_mode_for_first_attempt(&self, req: &UnifiedExecRequest) -> SandboxOverride {
-        if req.with_escalated_permissions.unwrap_or(false)
+        if req.sandbox_permissions.requires_escalated_permissions()
            || matches!(
                req.exec_approval_requirement,
                ExecApprovalRequirement::Skip {
@@ -171,14 +160,18 @@ impl<'a> ToolRuntime<UnifiedExecRequest, UnifiedExecSession> for UnifiedExecRunt
        &mut self,
        req: &UnifiedExecRequest,
        attempt: &SandboxAttempt<'_>,
-        _ctx: &ToolCtx<'_>,
+        ctx: &ToolCtx<'_>,
    ) -> Result<UnifiedExecSession, ToolError> {
+        let base_command = &req.command;
+        let session_shell = ctx.session.user_shell();
+        let command = maybe_wrap_shell_lc_with_snapshot(base_command, session_shell.as_ref());
+
        let spec = build_command_spec(
-            &req.command,
+            &command,
            &req.cwd,
            &req.env,
            ExecExpiration::DefaultTimeout,
-            req.with_escalated_permissions,
+            req.sandbox_permissions,
            req.justification.clone(),
        )
        .map_err(|_| ToolError::Rejected("missing command line for PTY".to_string()))?;
--- a/codex-rs/core/src/tools/sandboxing.rs
+++ b/codex-rs/core/src/tools/sandboxing.rs
@@ -7,7 +7,6 @@
 use crate::codex::Session;
 use crate::codex::TurnContext;
 use crate::error::CodexErr;
-use crate::protocol::SandboxCommandAssessment;
 use crate::protocol::SandboxPolicy;
 use crate::sandboxing::CommandSpec;
 use crate::sandboxing::SandboxManager;
@@ -20,7 +19,6 @@ use std::collections::HashMap;
 use std::fmt::Debug;
 use std::hash::Hash;
 use std::path::Path;
-use std::path::PathBuf;

 use futures::Future;
 use futures::future::BoxFuture;
@@ -84,7 +82,6 @@ pub(crate) struct ApprovalCtx<'a> {
    pub turn: &'a TurnContext,
    pub call_id: &'a str,
    pub retry_reason: Option<String>,
-    pub risk: Option<SandboxCommandAssessment>,
 }

 // Specifies what tool orchestrator should do with a given tool call.
@@ -219,17 +216,6 @@ pub(crate) struct ToolCtx<'a> {
    pub tool_name: String,
 }

-/// Captures the command metadata needed to re-run a tool request without sandboxing.
-#[derive(Clone, Debug, PartialEq, Eq)]
-pub(crate) struct SandboxRetryData {
-    pub command: Vec<String>,
-    pub cwd: PathBuf,
-}
-
-pub(crate) trait ProvidesSandboxRetryData {
-    fn sandbox_retry_data(&self) -> Option<SandboxRetryData>;
-}
-
 #[derive(Debug)]
 pub(crate) enum ToolError {
    Rejected(String),
--- a/codex-rs/core/src/tools/spec.rs
+++ b/codex-rs/core/src/tools/spec.rs
@@ -153,7 +153,8 @@ fn create_exec_command_tool() -> ToolSpec {
        "login".to_string(),
        JsonSchema::Boolean {
            description: Some(
-                "Whether to run the shell with -l/-i semantics. Defaults to true.".to_string(),
+                "Whether to run the shell with -l/-i semantics. Defaults to false unless a shell snapshot is available."
+                    .to_string(),
            ),
        },
    );
@@ -174,10 +175,10 @@ fn create_exec_command_tool() -> ToolSpec {
        },
    );
    properties.insert(
-        "with_escalated_permissions".to_string(),
-        JsonSchema::Boolean {
+        "sandbox_permissions".to_string(),
+        JsonSchema::String {
            description: Some(
-                "Whether to request escalated permissions. Set to true if command needs to be run without sandbox restrictions"
+                "Sandbox permissions for the command. Set to \"require_escalated\" to request running without sandbox restrictions; defaults to \"use_default\"."
                    .to_string(),
            ),
        },
@@ -186,7 +187,7 @@ fn create_exec_command_tool() -> ToolSpec {
        "justification".to_string(),
        JsonSchema::String {
            description: Some(
-                "Only set if with_escalated_permissions is true. 1-sentence explanation of why we want to run this command."
+                "Only set if sandbox_permissions is \"require_escalated\". 1-sentence explanation of why we want to run this command."
                    .to_string(),
            ),
        },
@@ -274,15 +275,15 @@ fn create_shell_tool() -> ToolSpec {
    );

    properties.insert(
-        "with_escalated_permissions".to_string(),
-        JsonSchema::Boolean {
-            description: Some("Whether to request escalated permissions. Set to true if command needs to be run without sandbox restrictions".to_string()),
+        "sandbox_permissions".to_string(),
+        JsonSchema::String {
+            description: Some("Sandbox permissions for the command. Set to \"require_escalated\" to request running without sandbox restrictions; defaults to \"use_default\".".to_string()),
        },
    );
    properties.insert(
        "justification".to_string(),
        JsonSchema::String {
-            description: Some("Only set if with_escalated_permissions is true. 1-sentence explanation of why we want to run this command.".to_string()),
+            description: Some("Only set if sandbox_permissions is \"require_escalated\". 1-sentence explanation of why we want to run this command.".to_string()),
        },
    );

@@ -335,7 +336,7 @@ fn create_shell_command_tool() -> ToolSpec {
        "login".to_string(),
        JsonSchema::Boolean {
            description: Some(
-                "Whether to run the shell with login shell semantics. Defaults to true."
+                "Whether to run the shell with login shell semantics. Defaults to false unless a shell snapshot is available."
                    .to_string(),
            ),
        },
@@ -347,15 +348,15 @@ fn create_shell_command_tool() -> ToolSpec {
        },
    );
    properties.insert(
-        "with_escalated_permissions".to_string(),
-        JsonSchema::Boolean {
-            description: Some("Whether to request escalated permissions. Set to true if command needs to be run without sandbox restrictions".to_string()),
+        "sandbox_permissions".to_string(),
+        JsonSchema::String {
+            description: Some("Sandbox permissions for the command. Set to \"require_escalated\" to request running without sandbox restrictions; defaults to \"use_default\".".to_string()),
        },
    );
    properties.insert(
        "justification".to_string(),
        JsonSchema::String {
-            description: Some("Only set if with_escalated_permissions is true. 1-sentence explanation of why we want to run this command.".to_string()),
+            description: Some("Only set if sandbox_permissions is \"require_escalated\". 1-sentence explanation of why we want to run this command.".to_string()),
        },
    );

--- a/codex-rs/core/src/unified_exec/async_watcher.rs
+++ b/codex-rs/core/src/unified_exec/async_watcher.rs
@@ -0,0 +1,231 @@
+use std::path::PathBuf;
+use std::pin::Pin;
+use std::sync::Arc;
+
+use tokio::sync::Mutex;
+use tokio::time::Duration;
+use tokio::time::Instant;
+use tokio::time::Sleep;
+
+use crate::codex::Session;
+use crate::codex::TurnContext;
+use crate::exec::ExecToolCallOutput;
+use crate::exec::StreamOutput;
+use crate::protocol::EventMsg;
+use crate::protocol::ExecCommandOutputDeltaEvent;
+use crate::protocol::ExecCommandSource;
+use crate::protocol::ExecOutputStream;
+use crate::tools::events::ToolEmitter;
+use crate::tools::events::ToolEventCtx;
+use crate::tools::events::ToolEventStage;
+
+use super::CommandTranscript;
+use super::UnifiedExecContext;
+use super::session::UnifiedExecSession;
+
+pub(crate) const TRAILING_OUTPUT_GRACE: Duration = Duration::from_millis(100);
+
+/// Spawn a background task that continuously reads from the PTY, appends to the
+/// shared transcript, and emits ExecCommandOutputDelta events on UTF‑8
+/// boundaries.
+pub(crate) fn start_streaming_output(
+    session: &UnifiedExecSession,
+    context: &UnifiedExecContext,
+    transcript: Arc<Mutex<CommandTranscript>>,
+) {
+    let mut receiver = session.output_receiver();
+    let output_drained = session.output_drained_notify();
+    let exit_token = session.cancellation_token();
+
+    let session_ref = Arc::clone(&context.session);
+    let turn_ref = Arc::clone(&context.turn);
+    let call_id = context.call_id.clone();
+
+    tokio::spawn(async move {
+        use tokio::sync::broadcast::error::RecvError;
+
+        let mut pending = Vec::<u8>::new();
+
+        let mut grace_sleep: Option<Pin<Box<Sleep>>> = None;
+
+        loop {
+            tokio::select! {
+                _ = exit_token.cancelled(), if grace_sleep.is_none() => {
+                    let deadline = Instant::now() + TRAILING_OUTPUT_GRACE;
+                    grace_sleep.replace(Box::pin(tokio::time::sleep_until(deadline)));
+                }
+
+                _ = async {
+                    if let Some(sleep) = grace_sleep.as_mut() {
+                        sleep.as_mut().await;
+                    }
+                }, if grace_sleep.is_some() => {
+                    output_drained.notify_one();
+                    break;
+                }
+
+                received = receiver.recv() => {
+                    let chunk = match received {
+                        Ok(chunk) => chunk,
+                        Err(RecvError::Lagged(_)) => {
+                            continue;
+                        },
+                        Err(RecvError::Closed) => {
+                            output_drained.notify_one();
+                            break;
+                        }
+                    };
+
+                    process_chunk(
+                        &mut pending,
+                        &transcript,
+                        &call_id,
+                        &session_ref,
+                        &turn_ref,
+                        chunk,
+                    ).await;
+                }
+            }
+        }
+    });
+}
+
+/// Spawn a background watcher that waits for the PTY to exit and then emits a
+/// single ExecCommandEnd event with the aggregated transcript.
+#[allow(clippy::too_many_arguments)]
+pub(crate) fn spawn_exit_watcher(
+    session: Arc<UnifiedExecSession>,
+    session_ref: Arc<Session>,
+    turn_ref: Arc<TurnContext>,
+    call_id: String,
+    command: Vec<String>,
+    cwd: PathBuf,
+    process_id: String,
+    transcript: Arc<Mutex<CommandTranscript>>,
+    started_at: Instant,
+) {
+    let exit_token = session.cancellation_token();
+    let output_drained = session.output_drained_notify();
+
+    tokio::spawn(async move {
+        exit_token.cancelled().await;
+        output_drained.notified().await;
+
+        let exit_code = session.exit_code().unwrap_or(-1);
+        let duration = Instant::now().saturating_duration_since(started_at);
+        emit_exec_end_for_unified_exec(
+            session_ref,
+            turn_ref,
+            call_id,
+            command,
+            cwd,
+            Some(process_id),
+            transcript,
+            String::new(),
+            exit_code,
+            duration,
+        )
+        .await;
+    });
+}
+
+async fn process_chunk(
+    pending: &mut Vec<u8>,
+    transcript: &Arc<Mutex<CommandTranscript>>,
+    call_id: &str,
+    session_ref: &Arc<Session>,
+    turn_ref: &Arc<TurnContext>,
+    chunk: Vec<u8>,
+) {
+    pending.extend_from_slice(&chunk);
+    while let Some(prefix) = split_valid_utf8_prefix(pending) {
+        {
+            let mut guard = transcript.lock().await;
+            guard.append(&prefix);
+        }
+
+        let event = ExecCommandOutputDeltaEvent {
+            call_id: call_id.to_string(),
+            stream: ExecOutputStream::Stdout,
+            chunk: prefix,
+        };
+        session_ref
+            .send_event(turn_ref.as_ref(), EventMsg::ExecCommandOutputDelta(event))
+            .await;
+    }
+}
+
+/// Emit an ExecCommandEnd event for a unified exec session, using the transcript
+/// as the primary source of aggregated_output and falling back to the provided
+/// text when the transcript is empty.
+#[allow(clippy::too_many_arguments)]
+pub(crate) async fn emit_exec_end_for_unified_exec(
+    session_ref: Arc<Session>,
+    turn_ref: Arc<TurnContext>,
+    call_id: String,
+    command: Vec<String>,
+    cwd: PathBuf,
+    process_id: Option<String>,
+    transcript: Arc<Mutex<CommandTranscript>>,
+    fallback_output: String,
+    exit_code: i32,
+    duration: Duration,
+) {
+    let aggregated_output = resolve_aggregated_output(&transcript, fallback_output).await;
+    let output = ExecToolCallOutput {
+        exit_code,
+        stdout: StreamOutput::new(aggregated_output.clone()),
+        stderr: StreamOutput::new(String::new()),
+        aggregated_output: StreamOutput::new(aggregated_output),
+        duration,
+        timed_out: false,
+    };
+    let event_ctx = ToolEventCtx::new(session_ref.as_ref(), turn_ref.as_ref(), &call_id, None);
+    let emitter = ToolEmitter::unified_exec(
+        &command,
+        cwd,
+        ExecCommandSource::UnifiedExecStartup,
+        process_id,
+    );
+    emitter
+        .emit(event_ctx, ToolEventStage::Success(output))
+        .await;
+}
+
+fn split_valid_utf8_prefix(buffer: &mut Vec<u8>) -> Option<Vec<u8>> {
+    if buffer.is_empty() {
+        return None;
+    }
+
+    let len = buffer.len();
+    let mut split = len;
+    while split > 0 {
+        if std::str::from_utf8(&buffer[..split]).is_ok() {
+            let prefix = buffer[..split].to_vec();
+            buffer.drain(..split);
+            return Some(prefix);
+        }
+
+        if len - split > 4 {
+            break;
+        }
+        split -= 1;
+    }
+
+    // If no valid UTF-8 prefix was found, emit the first byte so the stream
+    // keeps making progress and the transcript reflects all bytes.
+    let byte = buffer.drain(..1).collect();
+    Some(byte)
+}
+
+async fn resolve_aggregated_output(
+    transcript: &Arc<Mutex<CommandTranscript>>,
+    fallback: String,
+) -> String {
+    let guard = transcript.lock().await;
+    if guard.data.is_empty() {
+        return fallback;
+    }
+
+    String::from_utf8_lossy(&guard.data).to_string()
+}
--- a/codex-rs/core/src/unified_exec/mod.rs
+++ b/codex-rs/core/src/unified_exec/mod.rs
@@ -33,7 +33,9 @@ use tokio::sync::Mutex;

 use crate::codex::Session;
 use crate::codex::TurnContext;
+use crate::sandboxing::SandboxPermissions;

+mod async_watcher;
 mod errors;
 mod session;
 mod session_manager;
@@ -51,6 +53,24 @@ pub(crate) const MAX_UNIFIED_EXEC_SESSIONS: usize = 64;
 // Send a warning message to the models when it reaches this number of sessions.
 pub(crate) const WARNING_UNIFIED_EXEC_SESSIONS: usize = 60;

+#[derive(Debug, Default)]
+pub(crate) struct CommandTranscript {
+    pub data: Vec<u8>,
+}
+
+impl CommandTranscript {
+    pub fn append(&mut self, bytes: &[u8]) {
+        self.data.extend_from_slice(bytes);
+        if self.data.len() > UNIFIED_EXEC_OUTPUT_MAX_BYTES {
+            let excess = self
+                .data
+                .len()
+                .saturating_sub(UNIFIED_EXEC_OUTPUT_MAX_BYTES);
+            self.data.drain(..excess);
+        }
+    }
+}
+
 pub(crate) struct UnifiedExecContext {
    pub session: Arc<Session>,
    pub turn: Arc<TurnContext>,
@@ -74,7 +94,7 @@ pub(crate) struct ExecCommandRequest {
    pub yield_time_ms: u64,
    pub max_output_tokens: Option<usize>,
    pub workdir: Option<PathBuf>,
-    pub with_escalated_permissions: Option<bool>,
+    pub sandbox_permissions: SandboxPermissions,
    pub justification: Option<String>,
 }

@@ -92,18 +112,14 @@ pub(crate) struct UnifiedExecResponse {
    pub chunk_id: String,
    pub wall_time: Duration,
    pub output: String,
+    /// Raw bytes returned for this unified exec call before any truncation.
+    pub raw_output: Vec<u8>,
    pub process_id: Option<String>,
    pub exit_code: Option<i32>,
    pub original_token_count: Option<usize>,
    pub session_command: Option<Vec<String>>,
 }

-#[derive(Default)]
-pub(crate) struct UnifiedExecSessionManager {
-    session_store: Mutex<SessionStore>,
-}
-
-// Required for mutex sharing.
 #[derive(Default)]
 pub(crate) struct SessionStore {
    sessions: HashMap<String, SessionEntry>,
@@ -115,22 +131,27 @@ impl SessionStore {
        self.reserved_sessions_id.remove(session_id);
        self.sessions.remove(session_id)
    }
+}

-    pub(crate) fn clear(&mut self) {
-        self.reserved_sessions_id.clear();
-        self.sessions.clear();
+pub(crate) struct UnifiedExecSessionManager {
+    session_store: Mutex<SessionStore>,
+}
+
+impl Default for UnifiedExecSessionManager {
+    fn default() -> Self {
+        Self {
+            session_store: Mutex::new(SessionStore::default()),
+        }
    }
 }

 struct SessionEntry {
-    session: UnifiedExecSession,
+    session: Arc<UnifiedExecSession>,
    session_ref: Arc<Session>,
    turn_ref: Arc<TurnContext>,
    call_id: String,
    process_id: String,
    command: Vec<String>,
-    cwd: PathBuf,
-    started_at: tokio::time::Instant,
    last_used: tokio::time::Instant,
 }

@@ -197,7 +218,7 @@ mod tests {
                    yield_time_ms,
                    max_output_tokens: None,
                    workdir: None,
-                    with_escalated_permissions: None,
+                    sandbox_permissions: SandboxPermissions::UseDefault,
                    justification: None,
                },
                &context,
--- a/codex-rs/core/src/unified_exec/session.rs
+++ b/codex-rs/core/src/unified_exec/session.rs
@@ -79,6 +79,7 @@ pub(crate) struct UnifiedExecSession {
    output_buffer: OutputBuffer,
    output_notify: Arc<Notify>,
    cancellation_token: CancellationToken,
+    output_drained: Arc<Notify>,
    output_task: JoinHandle<()>,
    sandbox_type: SandboxType,
 }
@@ -92,10 +93,10 @@ impl UnifiedExecSession {
        let output_buffer = Arc::new(Mutex::new(OutputBufferState::default()));
        let output_notify = Arc::new(Notify::new());
        let cancellation_token = CancellationToken::new();
+        let output_drained = Arc::new(Notify::new());
        let mut receiver = initial_output_rx;
        let buffer_clone = Arc::clone(&output_buffer);
        let notify_clone = Arc::clone(&output_notify);
-        let cancellation_token_clone = cancellation_token.clone();
        let output_task = tokio::spawn(async move {
            loop {
                match receiver.recv().await {
@@ -106,11 +107,8 @@ impl UnifiedExecSession {
                        notify_clone.notify_waiters();
                    }
                    Err(tokio::sync::broadcast::error::RecvError::Lagged(_)) => continue,
-                    Err(tokio::sync::broadcast::error::RecvError::Closed) => {
-                        cancellation_token_clone.cancel();
-                        break;
-                    }
-                }
+                    Err(tokio::sync::broadcast::error::RecvError::Closed) => break,
+                };
            }
        });

@@ -119,6 +117,7 @@ impl UnifiedExecSession {
            output_buffer,
            output_notify,
            cancellation_token,
+            output_drained,
            output_task,
            sandbox_type,
        }
@@ -136,6 +135,18 @@ impl UnifiedExecSession {
        }
    }

+    pub(super) fn output_receiver(&self) -> tokio::sync::broadcast::Receiver<Vec<u8>> {
+        self.session.output_receiver()
+    }
+
+    pub(super) fn cancellation_token(&self) -> CancellationToken {
+        self.cancellation_token.clone()
+    }
+
+    pub(super) fn output_drained_notify(&self) -> Arc<Notify> {
+        Arc::clone(&self.output_drained)
+    }
+
    pub(super) fn has_exited(&self) -> bool {
        self.session.has_exited()
    }
@@ -144,6 +155,12 @@ impl UnifiedExecSession {
        self.session.exit_code()
    }

+    pub(super) fn terminate(&self) {
+        self.session.terminate();
+        self.cancellation_token.cancel();
+        self.output_task.abort();
+    }
+
    async fn snapshot_output(&self) -> Vec<Vec<u8>> {
        let guard = self.output_buffer.lock().await;
        guard.snapshot()
@@ -246,6 +263,6 @@ impl UnifiedExecSession {

 impl Drop for UnifiedExecSession {
    fn drop(&mut self) {
-        self.output_task.abort();
+        self.terminate();
    }
 }
--- a/codex-rs/core/src/unified_exec/session_manager.rs
+++ b/codex-rs/core/src/unified_exec/session_manager.rs
@@ -13,18 +13,12 @@ use tokio_util::sync::CancellationToken;
 use crate::bash::extract_bash_command;
 use crate::codex::Session;
 use crate::codex::TurnContext;
-use crate::exec::ExecToolCallOutput;
-use crate::exec::StreamOutput;
 use crate::exec_env::create_env;
 use crate::exec_policy::create_exec_approval_requirement_for_command;
 use crate::protocol::BackgroundEventEvent;
 use crate::protocol::EventMsg;
-use crate::protocol::ExecCommandSource;
 use crate::sandboxing::ExecEnv;
 use crate::sandboxing::SandboxPermissions;
-use crate::tools::events::ToolEmitter;
-use crate::tools::events::ToolEventCtx;
-use crate::tools::events::ToolEventStage;
 use crate::tools::orchestrator::ToolOrchestrator;
 use crate::tools::runtimes::unified_exec::UnifiedExecRequest as UnifiedExecToolRequest;
 use crate::tools::runtimes::unified_exec::UnifiedExecRuntime;
@@ -33,6 +27,7 @@ use crate::truncate::TruncationPolicy;
 use crate::truncate::approx_token_count;
 use crate::truncate::formatted_truncate_text;

+use super::CommandTranscript;
 use super::ExecCommandRequest;
 use super::MAX_UNIFIED_EXEC_SESSIONS;
 use super::SessionEntry;
@@ -43,6 +38,9 @@ use super::UnifiedExecResponse;
 use super::UnifiedExecSessionManager;
 use super::WARNING_UNIFIED_EXEC_SESSIONS;
 use super::WriteStdinRequest;
+use super::async_watcher::emit_exec_end_for_unified_exec;
+use super::async_watcher::spawn_exit_watcher;
+use super::async_watcher::start_streaming_output;
 use super::clamp_yield_time;
 use super::generate_chunk_id;
 use super::resolve_max_tokens;
@@ -128,24 +126,30 @@ impl UnifiedExecSessionManager {
            .open_session_with_sandbox(
                &request.command,
                cwd.clone(),
-                request.with_escalated_permissions,
+                request.sandbox_permissions,
                request.justification,
                context,
            )
            .await;

        let session = match session {
-            Ok(session) => session,
+            Ok(session) => Arc::new(session),
            Err(err) => {
                self.release_process_id(&request.process_id).await;
                return Err(err);
            }
        };

+        let transcript = Arc::new(tokio::sync::Mutex::new(CommandTranscript::default()));
+        start_streaming_output(&session, context, Arc::clone(&transcript));
+
        let max_tokens = resolve_max_tokens(request.max_output_tokens);
        let yield_time_ms = clamp_yield_time(request.yield_time_ms);

        let start = Instant::now();
+        // For the initial exec_command call, we both stream output to events
+        // (via start_streaming_output above) and collect a snapshot here for
+        // the tool response body.
        let OutputHandles {
            output_buffer,
            output_notify,
@@ -163,36 +167,44 @@ impl UnifiedExecSessionManager {

        let text = String::from_utf8_lossy(&collected).to_string();
        let output = formatted_truncate_text(&text, TruncationPolicy::Tokens(max_tokens));
-        let has_exited = session.has_exited();
        let exit_code = session.exit_code();
+        let has_exited = session.has_exited() || exit_code.is_some();
        let chunk_id = generate_chunk_id();
        let process_id = request.process_id.clone();
        if has_exited {
+            // Short‑lived command: emit ExecCommandEnd immediately using the
+            // same helper as the background watcher, so all end events share
+            // one implementation.
            self.release_process_id(&request.process_id).await;
            let exit = exit_code.unwrap_or(-1);
-            Self::emit_exec_end_from_context(
-                context,
-                &request.command,
+            emit_exec_end_for_unified_exec(
+                Arc::clone(&context.session),
+                Arc::clone(&context.turn),
+                context.call_id.clone(),
+                request.command.clone(),
                cwd,
+                Some(process_id),
+                Arc::clone(&transcript),
                output.clone(),
                exit,
                wall_time,
-                // We always emit the process ID in order to keep consistency between the Begin
-                // event and the End event.
-                Some(process_id),
            )
            .await;

            session.check_for_sandbox_denial_with_text(&text).await?;
        } else {
-            // Only store session if not exited.
+            // Long‑lived command: persist the session so write_stdin can reuse
+            // it, and register a background watcher that will emit
+            // ExecCommandEnd when the PTY eventually exits (even if no further
+            // tool calls are made).
            self.store_session(
-                session,
+                Arc::clone(&session),
                context,
                &request.command,
                cwd.clone(),
                start,
                process_id,
+                Arc::clone(&transcript),
            )
            .await;

@@ -205,6 +217,7 @@ impl UnifiedExecSessionManager {
            chunk_id,
            wall_time,
            output,
+            raw_output: collected,
            process_id: if has_exited {
                None
            } else {
@@ -238,6 +251,8 @@ impl UnifiedExecSessionManager {

        if !request.input.is_empty() {
            Self::send_input(&writer_tx, request.input.as_bytes()).await?;
+            // Give the remote process a brief window to react so that we are
+            // more likely to capture its output in the poll below.
            tokio::time::sleep(Duration::from_millis(100)).await;
        }

@@ -259,16 +274,20 @@ impl UnifiedExecSessionManager {
        let original_token_count = approx_token_count(&text);
        let chunk_id = generate_chunk_id();

+        // After polling, refresh_session_state tells us whether the PTY is
+        // still alive or has exited and been removed from the store; we thread
+        // that through so the handler can tag TerminalInteraction with an
+        // appropriate process_id and exit_code.
        let status = self.refresh_session_state(process_id.as_str()).await;
-        let (process_id, exit_code, completion_entry, event_call_id) = match status {
+        let (process_id, exit_code, event_call_id) = match status {
            SessionStatus::Alive {
                exit_code,
                call_id,
                process_id,
-            } => (Some(process_id), exit_code, None, call_id),
+            } => (Some(process_id), exit_code, call_id),
            SessionStatus::Exited { exit_code, entry } => {
                let call_id = entry.call_id.clone();
-                (None, exit_code, Some(*entry), call_id)
+                (None, exit_code, call_id)
            }
            SessionStatus::Unknown => {
                return Err(UnifiedExecError::UnknownSessionId {
@@ -282,6 +301,7 @@ impl UnifiedExecSessionManager {
            chunk_id,
            wall_time,
            output,
+            raw_output: collected,
            process_id,
            exit_code,
            original_token_count: Some(original_token_count),
@@ -292,12 +312,6 @@ impl UnifiedExecSessionManager {
            Self::emit_waiting_status(&session_ref, &turn_ref, &session_command).await;
        }

-        if let (Some(exit), Some(entry)) = (response.exit_code, completion_entry) {
-            let total_duration = Instant::now().saturating_duration_since(entry.started_at);
-            Self::emit_exec_end_from_entry(entry, response.output.clone(), exit, total_duration)
-                .await;
-        }
-
        Ok(response)
    }

@@ -371,28 +385,27 @@ impl UnifiedExecSessionManager {
    #[allow(clippy::too_many_arguments)]
    async fn store_session(
        &self,
-        session: UnifiedExecSession,
+        session: Arc<UnifiedExecSession>,
        context: &UnifiedExecContext,
        command: &[String],
        cwd: PathBuf,
        started_at: Instant,
        process_id: String,
+        transcript: Arc<tokio::sync::Mutex<CommandTranscript>>,
    ) {
        let entry = SessionEntry {
-            session,
+            session: Arc::clone(&session),
            session_ref: Arc::clone(&context.session),
            turn_ref: Arc::clone(&context.turn),
            call_id: context.call_id.clone(),
            process_id: process_id.clone(),
            command: command.to_vec(),
-            cwd,
-            started_at,
            last_used: started_at,
        };
        let number_sessions = {
            let mut store = self.session_store.lock().await;
            Self::prune_sessions_if_needed(&mut store);
-            store.sessions.insert(process_id, entry);
+            store.sessions.insert(process_id.clone(), entry);
            store.sessions.len()
        };

@@ -405,73 +418,18 @@ impl UnifiedExecSessionManager {
                )
                .await;
        };
-    }

-    async fn emit_exec_end_from_entry(
-        entry: SessionEntry,
-        aggregated_output: String,
-        exit_code: i32,
-        duration: Duration,
-    ) {
-        let output = ExecToolCallOutput {
-            exit_code,
-            stdout: StreamOutput::new(aggregated_output.clone()),
-            stderr: StreamOutput::new(String::new()),
-            aggregated_output: StreamOutput::new(aggregated_output),
-            duration,
-            timed_out: false,
-        };
-        let event_ctx = ToolEventCtx::new(
-            entry.session_ref.as_ref(),
-            entry.turn_ref.as_ref(),
-            &entry.call_id,
-            None,
-        );
-        let emitter = ToolEmitter::unified_exec(
-            &entry.command,
-            entry.cwd,
-            ExecCommandSource::UnifiedExecStartup,
-            None,
-            Some(entry.process_id.clone()),
-        );
-        emitter
-            .emit(event_ctx, ToolEventStage::Success(output))
-            .await;
-    }
-
-    async fn emit_exec_end_from_context(
-        context: &UnifiedExecContext,
-        command: &[String],
-        cwd: PathBuf,
-        aggregated_output: String,
-        exit_code: i32,
-        duration: Duration,
-        process_id: Option<String>,
-    ) {
-        let output = ExecToolCallOutput {
-            exit_code,
-            stdout: StreamOutput::new(aggregated_output.clone()),
-            stderr: StreamOutput::new(String::new()),
-            aggregated_output: StreamOutput::new(aggregated_output),
-            duration,
-            timed_out: false,
-        };
-        let event_ctx = ToolEventCtx::new(
-            context.session.as_ref(),
-            context.turn.as_ref(),
-            &context.call_id,
-            None,
-        );
-        let emitter = ToolEmitter::unified_exec(
-            command,
+        spawn_exit_watcher(
+            Arc::clone(&session),
+            Arc::clone(&context.session),
+            Arc::clone(&context.turn),
+            context.call_id.clone(),
+            command.to_vec(),
            cwd,
-            ExecCommandSource::UnifiedExecStartup,
-            None,
            process_id,
+            transcript,
+            started_at,
        );
-        emitter
-            .emit(event_ctx, ToolEventStage::Success(output))
-            .await;
    }

    async fn emit_waiting_status(
@@ -518,7 +476,7 @@ impl UnifiedExecSessionManager {
        &self,
        command: &[String],
        cwd: PathBuf,
-        with_escalated_permissions: Option<bool>,
+        sandbox_permissions: SandboxPermissions,
        justification: Option<String>,
        context: &UnifiedExecContext,
    ) -> Result<UnifiedExecSession, UnifiedExecError> {
@@ -532,14 +490,14 @@ impl UnifiedExecSessionManager {
            command,
            context.turn.approval_policy,
            &context.turn.sandbox_policy,
-            SandboxPermissions::from(with_escalated_permissions.unwrap_or(false)),
+            sandbox_permissions,
        )
        .await;
        let req = UnifiedExecToolRequest::new(
            command.to_vec(),
            cwd,
            env,
-            with_escalated_permissions,
+            sandbox_permissions,
            justification,
            exec_approval_requirement,
        );
@@ -567,7 +525,7 @@ impl UnifiedExecSessionManager {
        cancellation_token: &CancellationToken,
        deadline: Instant,
    ) -> Vec<u8> {
-        const POST_EXIT_OUTPUT_GRACE: Duration = Duration::from_millis(25);
+        const POST_EXIT_OUTPUT_GRACE: Duration = Duration::from_millis(50);

        let mut collected: Vec<u8> = Vec::with_capacity(4096);
        let mut exit_signal_received = cancellation_token.is_cancelled();
@@ -634,7 +592,9 @@ impl UnifiedExecSessionManager {
            .collect();

        if let Some(session_id) = Self::session_id_to_prune_from_meta(&meta) {
-            store.remove(&session_id);
+            if let Some(entry) = store.remove(&session_id) {
+                entry.session.terminate();
+            }
            return true;
        }

@@ -671,8 +631,17 @@ impl UnifiedExecSessionManager {
    }

    pub(crate) async fn terminate_all_sessions(&self) {
-        let mut sessions = self.session_store.lock().await;
-        sessions.clear();
+        let entries: Vec<SessionEntry> = {
+            let mut sessions = self.session_store.lock().await;
+            let entries: Vec<SessionEntry> =
+                sessions.sessions.drain().map(|(_, entry)| entry).collect();
+            sessions.reserved_sessions_id.clear();
+            entries
+        };
+
+        for entry in entries {
+            entry.session.terminate();
+        }
    }
 }

--- a/codex-rs/core/src/user_instructions.rs
+++ b/codex-rs/core/src/user_instructions.rs
@@ -6,6 +6,7 @@ use codex_protocol::models::ResponseItem;

 pub const USER_INSTRUCTIONS_OPEN_TAG_LEGACY: &str = "<user_instructions>";
 pub const USER_INSTRUCTIONS_PREFIX: &str = "# AGENTS.md instructions for ";
+pub const SKILL_INSTRUCTIONS_PREFIX: &str = "<skill";

 #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
 #[serde(rename = "user_instructions", rename_all = "snake_case")]
@@ -41,6 +42,39 @@ impl From<UserInstructions> for ResponseItem {
    }
 }

+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
+#[serde(rename = "skill_instructions", rename_all = "snake_case")]
+pub(crate) struct SkillInstructions {
+    pub name: String,
+    pub path: String,
+    pub contents: String,
+}
+
+impl SkillInstructions {
+    pub fn is_skill_instructions(message: &[ContentItem]) -> bool {
+        if let [ContentItem::InputText { text }] = message {
+            text.starts_with(SKILL_INSTRUCTIONS_PREFIX)
+        } else {
+            false
+        }
+    }
+}
+
+impl From<SkillInstructions> for ResponseItem {
+    fn from(si: SkillInstructions) -> Self {
+        ResponseItem::Message {
+            id: None,
+            role: "user".to_string(),
+            content: vec![ContentItem::InputText {
+                text: format!(
+                    "<skill>\n<name>{}</name>\n<path>{}</path>\n{}\n</skill>",
+                    si.name, si.path, si.contents
+                ),
+            }],
+        }
+    }
+}
+
 #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
 #[serde(rename = "developer_instructions", rename_all = "snake_case")]
 pub(crate) struct DeveloperInstructions {
@@ -72,6 +106,7 @@ impl From<DeveloperInstructions> for ResponseItem {
 #[cfg(test)]
 mod tests {
    use super::*;
+    use pretty_assertions::assert_eq;

    #[test]
    fn test_user_instructions() {
@@ -115,4 +150,44 @@ mod tests {
            }
        ]));
    }
+
+    #[test]
+    fn test_skill_instructions() {
+        let skill_instructions = SkillInstructions {
+            name: "demo-skill".to_string(),
+            path: "skills/demo/SKILL.md".to_string(),
+            contents: "body".to_string(),
+        };
+        let response_item: ResponseItem = skill_instructions.into();
+
+        let ResponseItem::Message { role, content, .. } = response_item else {
+            panic!("expected ResponseItem::Message");
+        };
+
+        assert_eq!(role, "user");
+
+        let [ContentItem::InputText { text }] = content.as_slice() else {
+            panic!("expected one InputText content item");
+        };
+
+        assert_eq!(
+            text,
+            "<skill>\n<name>demo-skill</name>\n<path>skills/demo/SKILL.md</path>\nbody\n</skill>",
+        );
+    }
+
+    #[test]
+    fn test_is_skill_instructions() {
+        assert!(SkillInstructions::is_skill_instructions(&[
+            ContentItem::InputText {
+                text: "<skill>\n<name>demo-skill</name>\n<path>skills/demo/SKILL.md</path>\nbody\n</skill>"
+                    .to_string(),
+            }
+        ]));
+        assert!(!SkillInstructions::is_skill_instructions(&[
+            ContentItem::InputText {
+                text: "regular text".to_string(),
+            }
+        ]));
+    }
 }
--- a/codex-rs/core/templates/parallel/instructions.md
+++ b/codex-rs/core/templates/parallel/instructions.md
@@ -1,13 +0,0 @@
-
-## Exploration and reading files
-
- **Think first.** Before any tool call, decide ALL files/resources you will need.
- **Batch everything.** If you need multiple files (even from different places), read them together.
- **multi_tool_use.parallel** Use `multi_tool_use.parallel` to parallelize tool calls and only this.
- **Only make sequential calls if you truly cannot know the next file without seeing a result first.**
- **Workflow:** (a) plan all needed reads → (b) issue one parallel batch → (c) analyze results → (d) repeat if new, unpredictable reads arise.
-
-**Additional notes**:
-* Always maximize parallelism. Never read files one-by-one unless logically unavoidable.
-* This concern every read/list/search operations including, but not only, `cat`, `rg`, `sed`, `ls`, `git show`, `nl`, `wc`, ...
-* Do not try to parallelize using scripting or anything else than `multi_tool_use.parallel`.
--- a/codex-rs/core/templates/sandboxing/assessment_prompt.md
+++ b/codex-rs/core/templates/sandboxing/assessment_prompt.md
@@ -1,24 +0,0 @@
-You are a security analyst evaluating shell commands that were blocked by a sandbox. Given the provided metadata, summarize the command's likely intent and assess the risk to help the user decide whether to approve command execution. Return strictly valid JSON with the keys:
- description (concise summary of command intent and potential effects, no more than one sentence, use present tense)
- risk_level ("low", "medium", or "high")
-Risk level examples:
- low: read-only inspections, listing files, printing configuration, fetching artifacts from trusted sources
- medium: modifying project files, installing dependencies
- high: deleting or overwriting data, exfiltrating secrets, escalating privileges, or disabling security controls
-If information is insufficient, choose the most cautious risk level supported by the evidence.
-Respond with JSON only, without markdown code fences or extra commentary.
-
---
-
-Command metadata:
-Platform: {{ platform }}
-Sandbox policy: {{ sandbox_policy }}
-{% if let Some(roots) = filesystem_roots %}
-Filesystem roots: {{ roots }}
-{% endif %}
-Working directory: {{ working_directory }}
-Command argv: {{ command_argv }}
-Command (joined): {{ command_joined }}
-{% if let Some(message) = sandbox_failure_message %}
-Sandbox failure message: {{ message }}
-{% endif %}
--- a/codex-rs/core/tests/chat_completions_payload.rs
+++ b/codex-rs/core/tests/chat_completions_payload.rs
@@ -1,3 +1,5 @@
+#![allow(clippy::expect_used)]
+
 use std::sync::Arc;

 use codex_app_server_protocol::AuthMode;
@@ -71,10 +73,11 @@ async fn run_request(input: Vec<ResponseItem>) -> Value {
    let config = Arc::new(config);

    let conversation_id = ConversationId::new();
-    let model_family = ModelsManager::construct_model_family_offline(&config.model, &config);
+    let model = ModelsManager::get_model_offline(config.model.as_deref());
+    let model_family = ModelsManager::construct_model_family_offline(model.as_str(), &config);
    let otel_event_manager = OtelEventManager::new(
        conversation_id,
-        config.model.as_str(),
+        model.as_str(),
        model_family.slug.as_str(),
        None,
        Some("test@test.com".to_string()),
@@ -108,11 +111,15 @@ async fn run_request(input: Vec<ResponseItem>) -> Value {
        }
    }

-    let requests = match server.received_requests().await {
-        Some(reqs) => reqs,
-        None => panic!("request not made"),
-    };
-    match requests[0].body_json() {
+    let all_requests = server.received_requests().await.expect("received requests");
+    let requests: Vec<_> = all_requests
+        .iter()
+        .filter(|req| req.method == "POST" && req.url.path().ends_with("/chat/completions"))
+        .collect();
+    let request = requests
+        .first()
+        .unwrap_or_else(|| panic!("expected POST request to /chat/completions"));
+    match request.body_json() {
        Ok(v) => v,
        Err(e) => panic!("invalid json body: {e}"),
    }
--- a/codex-rs/core/tests/chat_completions_sse.rs
+++ b/codex-rs/core/tests/chat_completions_sse.rs
@@ -74,10 +74,11 @@ async fn run_stream_with_bytes(sse_body: &[u8]) -> Vec<ResponseEvent> {
    let conversation_id = ConversationId::new();
    let auth_manager = AuthManager::from_auth_for_testing(CodexAuth::from_api_key("Test API Key"));
    let auth_mode = auth_manager.get_auth_mode();
-    let model_family = ModelsManager::construct_model_family_offline(&config.model, &config);
+    let model = ModelsManager::get_model_offline(config.model.as_deref());
+    let model_family = ModelsManager::construct_model_family_offline(model.as_str(), &config);
    let otel_event_manager = OtelEventManager::new(
        conversation_id,
-        config.model.as_str(),
+        model.as_str(),
        model_family.slug.as_str(),
        None,
        Some("test@test.com".to_string()),
--- a/codex-rs/core/tests/common/lib.rs
+++ b/codex-rs/core/tests/common/lib.rs
@@ -181,6 +181,16 @@ pub fn format_with_current_shell_display(command: &str) -> String {
    shlex::try_join(args.iter().map(String::as_str)).expect("serialize current shell command")
 }

+pub fn format_with_current_shell_non_login(command: &str) -> Vec<String> {
+    codex_core::shell::default_user_shell().derive_exec_args(command, false)
+}
+
+pub fn format_with_current_shell_display_non_login(command: &str) -> String {
+    let args = format_with_current_shell_non_login(command);
+    shlex::try_join(args.iter().map(String::as_str))
+        .expect("serialize current shell command without login")
+}
+
 pub mod fs_wait {
    use anyhow::Result;
    use anyhow::anyhow;
--- a/codex-rs/core/tests/common/responses.rs
+++ b/codex-rs/core/tests/common/responses.rs
@@ -689,6 +689,33 @@ pub async fn start_mock_server() -> MockServer {
    server
 }

+// todo(aibrahim): remove this and use our search matching patterns directly
+/// Get all POST requests to `/responses` endpoints from the mock server.
+/// Filters out GET requests (e.g., `/models`) .
+pub async fn get_responses_requests(server: &MockServer) -> Vec<wiremock::Request> {
+    server
+        .received_requests()
+        .await
+        .expect("mock server should not fail")
+        .into_iter()
+        .filter(|req| req.method == "POST" && req.url.path().ends_with("/responses"))
+        .collect()
+}
+
+// todo(aibrahim): remove this and use our search matching patterns directly
+/// Get request bodies as JSON values from POST requests to `/responses` endpoints.
+/// Filters out GET requests (e.g., `/models`) .
+pub async fn get_responses_request_bodies(server: &MockServer) -> Vec<Value> {
+    get_responses_requests(server)
+        .await
+        .into_iter()
+        .map(|req| {
+            req.body_json::<Value>()
+                .expect("request body to be valid JSON")
+        })
+        .collect()
+}
+
 #[derive(Clone)]
 pub struct FunctionCallResponseMocks {
    pub function_call: ResponseMock,
@@ -769,6 +796,10 @@ pub async fn mount_sse_sequence(server: &MockServer, bodies: Vec<String>) -> Res
 /// - Additionally, enforce symmetry: every `function_call`/`custom_tool_call`
 ///   in the `input` must have a matching output entry.
 fn validate_request_body_invariants(request: &wiremock::Request) {
+    // Skip GET requests (e.g., /models)
+    if request.method != "POST" || !request.url.path().ends_with("/responses") {
+        return;
+    }
    let Ok(body): Result<Value, _> = request.body_json() else {
        return;
    };
--- a/codex-rs/core/tests/common/test_codex.rs
+++ b/codex-rs/core/tests/common/test_codex.rs
@@ -23,10 +23,12 @@ use tempfile::TempDir;
 use wiremock::MockServer;

 use crate::load_default_config_for_test;
+use crate::responses::get_responses_request_bodies;
 use crate::responses::start_mock_server;
 use crate::wait_for_event;

 type ConfigMutator = dyn FnOnce(&mut Config) + Send;
+type PreBuildHook = dyn FnOnce(&Path) + Send + 'static;

 /// A collection of different ways the model can output an apply_patch call
 #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
@@ -50,6 +52,7 @@ pub enum ShellModelOutput {
 pub struct TestCodexBuilder {
    config_mutators: Vec<Box<ConfigMutator>>,
    auth: CodexAuth,
+    pre_build_hooks: Vec<Box<PreBuildHook>>,
 }

 impl TestCodexBuilder {
@@ -69,10 +72,18 @@ impl TestCodexBuilder {
    pub fn with_model(self, model: &str) -> Self {
        let new_model = model.to_string();
        self.with_config(move |config| {
-            config.model = new_model.clone();
+            config.model = Some(new_model.clone());
        })
    }

+    pub fn with_pre_build_hook<F>(mut self, hook: F) -> Self
+    where
+        F: FnOnce(&Path) + Send + 'static,
+    {
+        self.pre_build_hooks.push(Box::new(hook));
+        self
+    }
+
    pub async fn build(&mut self, server: &wiremock::MockServer) -> anyhow::Result<TestCodex> {
        let home = Arc::new(TempDir::new()?);
        self.build_with_home(server, home, None).await
@@ -96,7 +107,8 @@ impl TestCodexBuilder {
        let (config, cwd) = self.prepare_config(server, &home).await?;

        let auth = self.auth.clone();
-        let conversation_manager = ConversationManager::with_auth(auth.clone());
+        let conversation_manager =
+            ConversationManager::with_models_provider(auth.clone(), config.model_provider.clone());

        let new_conversation = match resume_from {
            Some(path) => {
@@ -135,6 +147,9 @@ impl TestCodexBuilder {
        let mut config = load_default_config_for_test(home);
        config.cwd = cwd.path().to_path_buf();
        config.model_provider = model_provider;
+        for hook in self.pre_build_hooks.drain(..) {
+            hook(home.path());
+        }
        if let Ok(cmd) = assert_cmd::Command::cargo_bin("codex") {
            config.codex_linux_sandbox_exe = Some(PathBuf::from(cmd.get_program().to_os_string()));
        }
@@ -169,6 +184,10 @@ impl TestCodex {
        self.cwd.path()
    }

+    pub fn codex_home_path(&self) -> &Path {
+        self.config.codex_home.as_path()
+    }
+
    pub fn workspace_path(&self, rel: impl AsRef<Path>) -> PathBuf {
        self.cwd_path().join(rel)
    }
@@ -272,13 +291,7 @@ impl TestCodexHarness {
    }

    pub async fn request_bodies(&self) -> Vec<Value> {
-        self.server
-            .received_requests()
-            .await
-            .expect("requests")
-            .into_iter()
-            .map(|req| serde_json::from_slice(&req.body).expect("request body json"))
-            .collect()
+        get_responses_request_bodies(&self.server).await
    }

    pub async fn function_call_output_value(&self, call_id: &str) -> Value {
@@ -355,5 +368,6 @@ pub fn test_codex() -> TestCodexBuilder {
    TestCodexBuilder {
        config_mutators: vec![],
        auth: CodexAuth::from_api_key("dummy"),
+        pre_build_hooks: vec![],
    }
 }
--- a/codex-rs/core/tests/responses_headers.rs
+++ b/codex-rs/core/tests/responses_headers.rs
@@ -61,14 +61,16 @@ async fn responses_stream_includes_subagent_header_on_review() {
    config.model_provider = provider.clone();
    let effort = config.model_reasoning_effort;
    let summary = config.model_reasoning_summary;
+    let model = ModelsManager::get_model_offline(config.model.as_deref());
+    config.model = Some(model.clone());
    let config = Arc::new(config);

    let conversation_id = ConversationId::new();
    let auth_mode = AuthMode::ChatGPT;
-    let model_family = ModelsManager::construct_model_family_offline(&config.model, &config);
+    let model_family = ModelsManager::construct_model_family_offline(model.as_str(), &config);
    let otel_event_manager = OtelEventManager::new(
        conversation_id,
-        config.model.as_str(),
+        model.as_str(),
        model_family.slug.as_str(),
        None,
        Some("test@test.com".to_string()),
@@ -151,15 +153,17 @@ async fn responses_stream_includes_subagent_header_on_other() {
    config.model_provider = provider.clone();
    let effort = config.model_reasoning_effort;
    let summary = config.model_reasoning_summary;
+    let model = ModelsManager::get_model_offline(config.model.as_deref());
+    config.model = Some(model.clone());
    let config = Arc::new(config);

    let conversation_id = ConversationId::new();
    let auth_mode = AuthMode::ChatGPT;
-    let model_family = ModelsManager::construct_model_family_offline(&config.model, &config);
+    let model_family = ModelsManager::construct_model_family_offline(model.as_str(), &config);

    let otel_event_manager = OtelEventManager::new(
        conversation_id,
-        config.model.as_str(),
+        model.as_str(),
        model_family.slug.as_str(),
        None,
        Some("test@test.com".to_string()),
@@ -235,7 +239,7 @@ async fn responses_respects_model_family_overrides_from_config() {

    let codex_home = TempDir::new().expect("failed to create TempDir");
    let mut config = load_default_config_for_test(&codex_home);
-    config.model = "gpt-3.5-turbo".to_string();
+    config.model = Some("gpt-3.5-turbo".to_string());
    config.model_provider_id = provider.name.clone();
    config.model_provider = provider.clone();
    config.model_supports_reasoning_summaries = Some(true);
@@ -243,15 +247,16 @@ async fn responses_respects_model_family_overrides_from_config() {
    config.model_reasoning_summary = ReasoningSummary::Detailed;
    let effort = config.model_reasoning_effort;
    let summary = config.model_reasoning_summary;
+    let model = config.model.clone().expect("model configured");
    let config = Arc::new(config);

    let conversation_id = ConversationId::new();
    let auth_mode =
        AuthManager::from_auth_for_testing(CodexAuth::from_api_key("Test API Key")).get_auth_mode();
-    let model_family = ModelsManager::construct_model_family_offline(&config.model, &config);
+    let model_family = ModelsManager::construct_model_family_offline(model.as_str(), &config);
    let otel_event_manager = OtelEventManager::new(
        conversation_id,
-        config.model.as_str(),
+        model.as_str(),
        model_family.slug.as_str(),
        None,
        Some("test@test.com".to_string()),
--- a/codex-rs/core/tests/suite/approvals.rs
+++ b/codex-rs/core/tests/suite/approvals.rs
@@ -9,6 +9,7 @@ use codex_core::protocol::ExecApprovalRequestEvent;
 use codex_core::protocol::ExecPolicyAmendment;
 use codex_core::protocol::Op;
 use codex_core::protocol::SandboxPolicy;
+use codex_core::sandboxing::SandboxPermissions;
 use codex_protocol::config_types::ReasoningSummary;
 use codex_protocol::protocol::ReviewDecision;
 use codex_protocol::user_input::UserInput;
@@ -96,14 +97,14 @@ impl ActionKind {
        test: &TestCodex,
        server: &MockServer,
        call_id: &str,
-        with_escalated_permissions: bool,
+        sandbox_permissions: SandboxPermissions,
    ) -> Result<(Value, Option<String>)> {
        match self {
            ActionKind::WriteFile { target, content } => {
                let (path, _) = target.resolve_for_patch(test);
                let _ = fs::remove_file(&path);
                let command = format!("printf {content:?} > {path:?} && cat {path:?}");
-                let event = shell_event(call_id, &command, 1_000, with_escalated_permissions)?;
+                let event = shell_event(call_id, &command, 1_000, sandbox_permissions)?;
                Ok((event, Some(command)))
            }
            ActionKind::FetchUrl {
@@ -125,11 +126,11 @@ impl ActionKind {
                );

                let command = format!("python3 -c \"{script}\"");
-                let event = shell_event(call_id, &command, 1_000, with_escalated_permissions)?;
+                let event = shell_event(call_id, &command, 3_000, sandbox_permissions)?;
                Ok((event, Some(command)))
            }
            ActionKind::RunCommand { command } => {
-                let event = shell_event(call_id, command, 1_000, with_escalated_permissions)?;
+                let event = shell_event(call_id, command, 1_000, sandbox_permissions)?;
                Ok((event, Some(command.to_string())))
            }
            ActionKind::RunUnifiedExecCommand {
@@ -140,7 +141,7 @@ impl ActionKind {
                    call_id,
                    command,
                    Some(1000),
-                    with_escalated_permissions,
+                    sandbox_permissions,
                    *justification,
                )?;
                Ok((event, Some(command.to_string())))
@@ -156,7 +157,7 @@ impl ActionKind {
                let _ = fs::remove_file(&path);
                let patch = build_add_file_patch(&patch_path, content);
                let command = shell_apply_patch_command(&patch);
-                let event = shell_event(call_id, &command, 5_000, with_escalated_permissions)?;
+                let event = shell_event(call_id, &command, 5_000, sandbox_permissions)?;
                Ok((event, Some(command)))
            }
        }
@@ -181,14 +182,14 @@ fn shell_event(
    call_id: &str,
    command: &str,
    timeout_ms: u64,
-    with_escalated_permissions: bool,
+    sandbox_permissions: SandboxPermissions,
 ) -> Result<Value> {
    let mut args = json!({
        "command": command,
        "timeout_ms": timeout_ms,
    });
-    if with_escalated_permissions {
-        args["with_escalated_permissions"] = json!(true);
+    if sandbox_permissions.requires_escalated_permissions() {
+        args["sandbox_permissions"] = json!(sandbox_permissions);
    }
    let args_str = serde_json::to_string(&args)?;
    Ok(ev_function_call(call_id, "shell_command", &args_str))
@@ -198,7 +199,7 @@ fn exec_command_event(
    call_id: &str,
    cmd: &str,
    yield_time_ms: Option<u64>,
-    with_escalated_permissions: bool,
+    sandbox_permissions: SandboxPermissions,
    justification: Option<&str>,
 ) -> Result<Value> {
    let mut args = json!({
@@ -207,8 +208,8 @@ fn exec_command_event(
    if let Some(yield_time_ms) = yield_time_ms {
        args["yield_time_ms"] = json!(yield_time_ms);
    }
-    if with_escalated_permissions {
-        args["with_escalated_permissions"] = json!(true);
+    if sandbox_permissions.requires_escalated_permissions() {
+        args["sandbox_permissions"] = json!(sandbox_permissions);
        let reason = justification.unwrap_or(DEFAULT_UNIFIED_EXEC_JUSTIFICATION);
        args["justification"] = json!(reason);
    }
@@ -466,7 +467,7 @@ struct ScenarioSpec {
    approval_policy: AskForApproval,
    sandbox_policy: SandboxPolicy,
    action: ActionKind,
-    with_escalated_permissions: bool,
+    sandbox_permissions: SandboxPermissions,
    features: Vec<Feature>,
    model_override: Option<&'static str>,
    outcome: Outcome,
@@ -637,7 +638,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
                target: TargetPath::OutsideWorkspace("dfa_on_request.txt"),
                content: "danger-on-request",
            },
-            with_escalated_permissions: false,
+            sandbox_permissions: SandboxPermissions::UseDefault,
            features: vec![],
            model_override: Some("gpt-5"),
            outcome: Outcome::Auto,
@@ -654,7 +655,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
                target: TargetPath::OutsideWorkspace("dfa_on_request_5_1.txt"),
                content: "danger-on-request",
            },
-            with_escalated_permissions: false,
+            sandbox_permissions: SandboxPermissions::UseDefault,
            features: vec![],
            model_override: Some("gpt-5.1"),
            outcome: Outcome::Auto,
@@ -671,7 +672,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
                endpoint: "/dfa/network",
                response_body: "danger-network-ok",
            },
-            with_escalated_permissions: false,
+            sandbox_permissions: SandboxPermissions::UseDefault,
            features: vec![],
            model_override: Some("gpt-5"),
            outcome: Outcome::Auto,
@@ -687,7 +688,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
                endpoint: "/dfa/network",
                response_body: "danger-network-ok",
            },
-            with_escalated_permissions: false,
+            sandbox_permissions: SandboxPermissions::UseDefault,
            features: vec![],
            model_override: Some("gpt-5.1"),
            outcome: Outcome::Auto,
@@ -702,7 +703,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
            action: ActionKind::RunCommand {
                command: "echo trusted-unless",
            },
-            with_escalated_permissions: false,
+            sandbox_permissions: SandboxPermissions::UseDefault,
            features: vec![],
            model_override: Some("gpt-5"),
            outcome: Outcome::Auto,
@@ -717,7 +718,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
            action: ActionKind::RunCommand {
                command: "echo trusted-unless",
            },
-            with_escalated_permissions: false,
+            sandbox_permissions: SandboxPermissions::UseDefault,
            features: vec![],
            model_override: Some("gpt-5.1"),
            outcome: Outcome::Auto,
@@ -733,7 +734,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
                target: TargetPath::OutsideWorkspace("dfa_on_failure.txt"),
                content: "danger-on-failure",
            },
-            with_escalated_permissions: false,
+            sandbox_permissions: SandboxPermissions::UseDefault,
            features: vec![],
            model_override: Some("gpt-5"),
            outcome: Outcome::Auto,
@@ -750,7 +751,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
                target: TargetPath::OutsideWorkspace("dfa_on_failure_5_1.txt"),
                content: "danger-on-failure",
            },
-            with_escalated_permissions: false,
+            sandbox_permissions: SandboxPermissions::UseDefault,
            features: vec![],
            model_override: Some("gpt-5.1"),
            outcome: Outcome::Auto,
@@ -767,7 +768,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
                target: TargetPath::OutsideWorkspace("dfa_unless_trusted.txt"),
                content: "danger-unless-trusted",
            },
-            with_escalated_permissions: false,
+            sandbox_permissions: SandboxPermissions::UseDefault,
            features: vec![],
            model_override: Some("gpt-5"),
            outcome: Outcome::ExecApproval {
@@ -787,7 +788,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
                target: TargetPath::OutsideWorkspace("dfa_unless_trusted_5_1.txt"),
                content: "danger-unless-trusted",
            },
-            with_escalated_permissions: false,
+            sandbox_permissions: SandboxPermissions::UseDefault,
            features: vec![],
            model_override: Some("gpt-5.1"),
            outcome: Outcome::ExecApproval {
@@ -807,7 +808,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
                target: TargetPath::OutsideWorkspace("dfa_never.txt"),
                content: "danger-never",
            },
-            with_escalated_permissions: false,
+            sandbox_permissions: SandboxPermissions::UseDefault,
            features: vec![],
            model_override: Some("gpt-5"),
            outcome: Outcome::Auto,
@@ -824,7 +825,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
                target: TargetPath::OutsideWorkspace("dfa_never_5_1.txt"),
                content: "danger-never",
            },
-            with_escalated_permissions: false,
+            sandbox_permissions: SandboxPermissions::UseDefault,
            features: vec![],
            model_override: Some("gpt-5.1"),
            outcome: Outcome::Auto,
@@ -841,7 +842,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
                target: TargetPath::Workspace("ro_on_request.txt"),
                content: "read-only-approval",
            },
-            with_escalated_permissions: true,
+            sandbox_permissions: SandboxPermissions::RequireEscalated,
            features: vec![],
            model_override: Some("gpt-5"),
            outcome: Outcome::ExecApproval {
@@ -861,7 +862,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
                target: TargetPath::Workspace("ro_on_request_5_1.txt"),
                content: "read-only-approval",
            },
-            with_escalated_permissions: true,
+            sandbox_permissions: SandboxPermissions::RequireEscalated,
            features: vec![],
            model_override: Some("gpt-5.1"),
            outcome: Outcome::ExecApproval {
@@ -880,7 +881,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
            action: ActionKind::RunCommand {
                command: "echo trusted-read-only",
            },
-            with_escalated_permissions: false,
+            sandbox_permissions: SandboxPermissions::UseDefault,
            features: vec![],
            model_override: Some("gpt-5"),
            outcome: Outcome::Auto,
@@ -895,7 +896,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
            action: ActionKind::RunCommand {
                command: "echo trusted-read-only",
            },
-            with_escalated_permissions: false,
+            sandbox_permissions: SandboxPermissions::UseDefault,
            features: vec![],
            model_override: Some("gpt-5.1"),
            outcome: Outcome::Auto,
@@ -911,7 +912,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
                endpoint: "/ro/network-blocked",
                response_body: "should-not-see",
            },
-            with_escalated_permissions: false,
+            sandbox_permissions: SandboxPermissions::UseDefault,
            features: vec![],
            model_override: None,
            outcome: Outcome::Auto,
@@ -925,7 +926,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
                target: TargetPath::Workspace("ro_on_request_denied.txt"),
                content: "should-not-write",
            },
-            with_escalated_permissions: true,
+            sandbox_permissions: SandboxPermissions::RequireEscalated,
            features: vec![],
            model_override: None,
            outcome: Outcome::ExecApproval {
@@ -946,7 +947,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
                target: TargetPath::Workspace("ro_on_failure.txt"),
                content: "read-only-on-failure",
            },
-            with_escalated_permissions: false,
+            sandbox_permissions: SandboxPermissions::UseDefault,
            features: vec![],
            model_override: Some("gpt-5"),
            outcome: Outcome::ExecApproval {
@@ -967,7 +968,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
                target: TargetPath::Workspace("ro_on_failure_5_1.txt"),
                content: "read-only-on-failure",
            },
-            with_escalated_permissions: false,
+            sandbox_permissions: SandboxPermissions::UseDefault,
            features: vec![],
            model_override: Some("gpt-5.1"),
            outcome: Outcome::ExecApproval {
@@ -987,7 +988,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
                endpoint: "/ro/network-approved",
                response_body: "read-only-network-ok",
            },
-            with_escalated_permissions: true,
+            sandbox_permissions: SandboxPermissions::RequireEscalated,
            features: vec![],
            model_override: Some("gpt-5"),
            outcome: Outcome::ExecApproval {
@@ -1006,7 +1007,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
                endpoint: "/ro/network-approved",
                response_body: "read-only-network-ok",
            },
-            with_escalated_permissions: true,
+            sandbox_permissions: SandboxPermissions::RequireEscalated,
            features: vec![],
            model_override: Some("gpt-5.1"),
            outcome: Outcome::ExecApproval {
@@ -1025,7 +1026,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
                target: TargetPath::Workspace("apply_patch_shell.txt"),
                content: "shell-apply-patch",
            },
-            with_escalated_permissions: false,
+            sandbox_permissions: SandboxPermissions::UseDefault,
            features: vec![],
            model_override: None,
            outcome: Outcome::PatchApproval {
@@ -1045,7 +1046,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
                target: TargetPath::Workspace("apply_patch_function.txt"),
                content: "function-apply-patch",
            },
-            with_escalated_permissions: false,
+            sandbox_permissions: SandboxPermissions::UseDefault,
            features: vec![],
            model_override: Some("gpt-5.1-codex"),
            outcome: Outcome::Auto,
@@ -1062,7 +1063,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
                target: TargetPath::OutsideWorkspace("apply_patch_function_danger.txt"),
                content: "function-patch-danger",
            },
-            with_escalated_permissions: false,
+            sandbox_permissions: SandboxPermissions::UseDefault,
            features: vec![Feature::ApplyPatchFreeform],
            model_override: Some("gpt-5.1-codex"),
            outcome: Outcome::Auto,
@@ -1079,7 +1080,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
                target: TargetPath::OutsideWorkspace("apply_patch_function_outside.txt"),
                content: "function-patch-outside",
            },
-            with_escalated_permissions: false,
+            sandbox_permissions: SandboxPermissions::UseDefault,
            features: vec![],
            model_override: Some("gpt-5.1-codex"),
            outcome: Outcome::PatchApproval {
@@ -1099,7 +1100,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
                target: TargetPath::OutsideWorkspace("apply_patch_function_outside_denied.txt"),
                content: "function-patch-outside-denied",
            },
-            with_escalated_permissions: false,
+            sandbox_permissions: SandboxPermissions::UseDefault,
            features: vec![],
            model_override: Some("gpt-5.1-codex"),
            outcome: Outcome::PatchApproval {
@@ -1119,7 +1120,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
                target: TargetPath::OutsideWorkspace("apply_patch_shell_outside.txt"),
                content: "shell-patch-outside",
            },
-            with_escalated_permissions: false,
+            sandbox_permissions: SandboxPermissions::UseDefault,
            features: vec![],
            model_override: None,
            outcome: Outcome::PatchApproval {
@@ -1139,7 +1140,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
                target: TargetPath::Workspace("apply_patch_function_unless_trusted.txt"),
                content: "function-patch-unless-trusted",
            },
-            with_escalated_permissions: false,
+            sandbox_permissions: SandboxPermissions::UseDefault,
            features: vec![],
            model_override: Some("gpt-5.1-codex"),
            outcome: Outcome::PatchApproval {
@@ -1159,7 +1160,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
                target: TargetPath::OutsideWorkspace("apply_patch_function_never.txt"),
                content: "function-patch-never",
            },
-            with_escalated_permissions: false,
+            sandbox_permissions: SandboxPermissions::UseDefault,
            features: vec![],
            model_override: Some("gpt-5.1-codex"),
            outcome: Outcome::Auto,
@@ -1178,7 +1179,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
                target: TargetPath::Workspace("ro_unless_trusted.txt"),
                content: "read-only-unless-trusted",
            },
-            with_escalated_permissions: false,
+            sandbox_permissions: SandboxPermissions::UseDefault,
            features: vec![],
            model_override: Some("gpt-5"),
            outcome: Outcome::ExecApproval {
@@ -1198,7 +1199,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
                target: TargetPath::Workspace("ro_unless_trusted_5_1.txt"),
                content: "read-only-unless-trusted",
            },
-            with_escalated_permissions: false,
+            sandbox_permissions: SandboxPermissions::UseDefault,
            features: vec![],
            model_override: Some("gpt-5.1"),
            outcome: Outcome::ExecApproval {
@@ -1218,7 +1219,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
                target: TargetPath::Workspace("ro_never.txt"),
                content: "read-only-never",
            },
-            with_escalated_permissions: false,
+            sandbox_permissions: SandboxPermissions::UseDefault,
            features: vec![],
            model_override: None,
            outcome: Outcome::Auto,
@@ -1241,7 +1242,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
            action: ActionKind::RunCommand {
                command: "echo trusted-never",
            },
-            with_escalated_permissions: false,
+            sandbox_permissions: SandboxPermissions::UseDefault,
            features: vec![],
            model_override: Some("gpt-5"),
            outcome: Outcome::Auto,
@@ -1257,7 +1258,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
                target: TargetPath::Workspace("ww_on_request.txt"),
                content: "workspace-on-request",
            },
-            with_escalated_permissions: false,
+            sandbox_permissions: SandboxPermissions::UseDefault,
            features: vec![],
            model_override: Some("gpt-5"),
            outcome: Outcome::Auto,
@@ -1274,7 +1275,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
                endpoint: "/ww/network-blocked",
                response_body: "workspace-network-blocked",
            },
-            with_escalated_permissions: false,
+            sandbox_permissions: SandboxPermissions::UseDefault,
            features: vec![],
            model_override: None,
            outcome: Outcome::Auto,
@@ -1288,7 +1289,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
                target: TargetPath::OutsideWorkspace("ww_on_request_outside.txt"),
                content: "workspace-on-request-outside",
            },
-            with_escalated_permissions: true,
+            sandbox_permissions: SandboxPermissions::RequireEscalated,
            features: vec![],
            model_override: Some("gpt-5"),
            outcome: Outcome::ExecApproval {
@@ -1308,7 +1309,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
                endpoint: "/ww/network-ok",
                response_body: "workspace-network-ok",
            },
-            with_escalated_permissions: false,
+            sandbox_permissions: SandboxPermissions::UseDefault,
            features: vec![],
            model_override: Some("gpt-5"),
            outcome: Outcome::Auto,
@@ -1325,7 +1326,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
                target: TargetPath::OutsideWorkspace("ww_on_failure.txt"),
                content: "workspace-on-failure",
            },
-            with_escalated_permissions: false,
+            sandbox_permissions: SandboxPermissions::UseDefault,
            features: vec![],
            model_override: Some("gpt-5"),
            outcome: Outcome::ExecApproval {
@@ -1345,7 +1346,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
                target: TargetPath::OutsideWorkspace("ww_unless_trusted.txt"),
                content: "workspace-unless-trusted",
            },
-            with_escalated_permissions: false,
+            sandbox_permissions: SandboxPermissions::UseDefault,
            features: vec![],
            model_override: Some("gpt-5"),
            outcome: Outcome::ExecApproval {
@@ -1365,7 +1366,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
                target: TargetPath::OutsideWorkspace("ww_never.txt"),
                content: "workspace-never",
            },
-            with_escalated_permissions: false,
+            sandbox_permissions: SandboxPermissions::UseDefault,
            features: vec![],
            model_override: None,
            outcome: Outcome::Auto,
@@ -1389,7 +1390,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
                command: "echo \"hello unified exec\"",
                justification: None,
            },
-            with_escalated_permissions: false,
+            sandbox_permissions: SandboxPermissions::UseDefault,
            features: vec![Feature::UnifiedExec],
            model_override: Some("gpt-5"),
            outcome: Outcome::Auto,
@@ -1407,7 +1408,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
                command: "python3 -c 'print('\"'\"'escalated unified exec'\"'\"')'",
                justification: Some(DEFAULT_UNIFIED_EXEC_JUSTIFICATION),
            },
-            with_escalated_permissions: true,
+            sandbox_permissions: SandboxPermissions::RequireEscalated,
            features: vec![Feature::UnifiedExec],
            model_override: Some("gpt-5"),
            outcome: Outcome::ExecApproval {
@@ -1426,7 +1427,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
                command: "git reset --hard",
                justification: None,
            },
-            with_escalated_permissions: false,
+            sandbox_permissions: SandboxPermissions::UseDefault,
            features: vec![Feature::UnifiedExec],
            model_override: None,
            outcome: Outcome::ExecApproval {
@@ -1472,7 +1473,7 @@ async fn run_scenario(scenario: &ScenarioSpec) -> Result<()> {
    let call_id = scenario.name;
    let (event, expected_command) = scenario
        .action
-        .prepare(&test, &server, call_id, scenario.with_escalated_permissions)
+        .prepare(&test, &server, call_id, scenario.sandbox_permissions)
        .await?;

    let _ = mount_sse_once(
@@ -1578,7 +1579,12 @@ async fn approving_execpolicy_amendment_persists_policy_and_skips_future_prompts
    let (first_event, expected_command) = ActionKind::RunCommand {
        command: "touch allow-prefix.txt",
    }
-    .prepare(&test, &server, call_id_first, false)
+    .prepare(
+        &test,
+        &server,
+        call_id_first,
+        SandboxPermissions::UseDefault,
+    )
    .await?;
    let expected_command =
        expected_command.expect("execpolicy amendment scenario should produce a shell command");
@@ -1656,7 +1662,12 @@ async fn approving_execpolicy_amendment_persists_policy_and_skips_future_prompts
    let (second_event, second_command) = ActionKind::RunCommand {
        command: "touch allow-prefix.txt",
    }
-    .prepare(&test, &server, call_id_second, false)
+    .prepare(
+        &test,
+        &server,
+        call_id_second,
+        SandboxPermissions::UseDefault,
+    )
    .await?;
    assert_eq!(second_command.as_deref(), Some(expected_command.as_str()));

--- a/codex-rs/core/tests/suite/client.rs
+++ b/codex-rs/core/tests/suite/client.rs
@@ -30,7 +30,12 @@ use codex_protocol::openai_models::ReasoningEffort;
 use codex_protocol::user_input::UserInput;
 use core_test_support::load_default_config_for_test;
 use core_test_support::load_sse_fixture_with_id;
-use core_test_support::responses;
+use core_test_support::responses::ev_completed_with_tokens;
+use core_test_support::responses::get_responses_requests;
+use core_test_support::responses::mount_sse_once;
+use core_test_support::responses::mount_sse_once_match;
+use core_test_support::responses::sse;
+use core_test_support::responses::sse_failed;
 use core_test_support::skip_if_no_network;
 use core_test_support::test_codex::TestCodex;
 use core_test_support::test_codex::test_codex;
@@ -240,7 +245,7 @@ async fn resume_includes_initial_messages_and_sends_prior_items() {

    // Mock server that will receive the resumed request
    let server = MockServer::start().await;
-    let resp_mock = responses::mount_sse_once(&server, sse_completed("resp1")).await;
+    let resp_mock = mount_sse_once(&server, sse_completed("resp1")).await;

    // Configure Codex to resume from our file
    let model_provider = ModelProviderInfo {
@@ -253,8 +258,10 @@ async fn resume_includes_initial_messages_and_sends_prior_items() {
    // Also configure user instructions to ensure they are NOT delivered on resume.
    config.user_instructions = Some("be nice".to_string());

-    let conversation_manager =
-        ConversationManager::with_auth(CodexAuth::from_api_key("Test API Key"));
+    let conversation_manager = ConversationManager::with_models_provider(
+        CodexAuth::from_api_key("Test API Key"),
+        config.model_provider.clone(),
+    );
    let auth_manager =
        codex_core::AuthManager::from_auth_for_testing(CodexAuth::from_api_key("Test API Key"));
    let NewConversation {
@@ -337,8 +344,10 @@ async fn includes_conversation_id_and_model_headers_in_request() {
    let mut config = load_default_config_for_test(&codex_home);
    config.model_provider = model_provider;

-    let conversation_manager =
-        ConversationManager::with_auth(CodexAuth::from_api_key("Test API Key"));
+    let conversation_manager = ConversationManager::with_models_provider(
+        CodexAuth::from_api_key("Test API Key"),
+        config.model_provider.clone(),
+    );
    let NewConversation {
        conversation: codex,
        conversation_id,
@@ -360,7 +369,10 @@ async fn includes_conversation_id_and_model_headers_in_request() {
    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;

    // get request from the server
-    let request = &server.received_requests().await.unwrap()[0];
+    let requests = get_responses_requests(&server).await;
+    let request = requests
+        .first()
+        .expect("expected POST request to /responses");
    let request_conversation_id = request.headers.get("conversation_id").unwrap();
    let request_authorization = request.headers.get("authorization").unwrap();
    let request_originator = request.headers.get("originator").unwrap();
@@ -381,7 +393,7 @@ async fn includes_base_instructions_override_in_request() {
    skip_if_no_network!();
    // Mock server
    let server = MockServer::start().await;
-    let resp_mock = responses::mount_sse_once(&server, sse_completed("resp1")).await;
+    let resp_mock = mount_sse_once(&server, sse_completed("resp1")).await;

    let model_provider = ModelProviderInfo {
        base_url: Some(format!("{}/v1", server.uri())),
@@ -393,8 +405,10 @@ async fn includes_base_instructions_override_in_request() {
    config.base_instructions = Some("test instructions".to_string());
    config.model_provider = model_provider;

-    let conversation_manager =
-        ConversationManager::with_auth(CodexAuth::from_api_key("Test API Key"));
+    let conversation_manager = ConversationManager::with_models_provider(
+        CodexAuth::from_api_key("Test API Key"),
+        config.model_provider.clone(),
+    );
    let codex = conversation_manager
        .new_conversation(config)
        .await
@@ -451,7 +465,10 @@ async fn chatgpt_auth_sends_correct_request() {
    let codex_home = TempDir::new().unwrap();
    let mut config = load_default_config_for_test(&codex_home);
    config.model_provider = model_provider;
-    let conversation_manager = ConversationManager::with_auth(create_dummy_codex_auth());
+    let conversation_manager = ConversationManager::with_models_provider(
+        create_dummy_codex_auth(),
+        config.model_provider.clone(),
+    );
    let NewConversation {
        conversation: codex,
        conversation_id,
@@ -473,7 +490,10 @@ async fn chatgpt_auth_sends_correct_request() {
    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;

    // get request from the server
-    let request = &server.received_requests().await.unwrap()[0];
+    let requests = get_responses_requests(&server).await;
+    let request = requests
+        .first()
+        .expect("expected POST request to /responses");
    let request_conversation_id = request.headers.get("conversation_id").unwrap();
    let request_authorization = request.headers.get("authorization").unwrap();
    let request_originator = request.headers.get("originator").unwrap();
@@ -569,7 +589,7 @@ async fn includes_user_instructions_message_in_request() {
    skip_if_no_network!();
    let server = MockServer::start().await;

-    let resp_mock = responses::mount_sse_once(&server, sse_completed("resp1")).await;
+    let resp_mock = mount_sse_once(&server, sse_completed("resp1")).await;

    let model_provider = ModelProviderInfo {
        base_url: Some(format!("{}/v1", server.uri())),
@@ -581,8 +601,10 @@ async fn includes_user_instructions_message_in_request() {
    config.model_provider = model_provider;
    config.user_instructions = Some("be nice".to_string());

-    let conversation_manager =
-        ConversationManager::with_auth(CodexAuth::from_api_key("Test API Key"));
+    let conversation_manager = ConversationManager::with_models_provider(
+        CodexAuth::from_api_key("Test API Key"),
+        config.model_provider.clone(),
+    );
    let codex = conversation_manager
        .new_conversation(config)
        .await
@@ -627,7 +649,7 @@ async fn skills_append_to_instructions_when_feature_enabled() {
    skip_if_no_network!();
    let server = MockServer::start().await;

-    let resp_mock = responses::mount_sse_once(&server, sse_completed("resp1")).await;
+    let resp_mock = mount_sse_once(&server, sse_completed("resp1")).await;

    let model_provider = ModelProviderInfo {
        base_url: Some(format!("{}/v1", server.uri())),
@@ -648,8 +670,10 @@ async fn skills_append_to_instructions_when_feature_enabled() {
    config.features.enable(Feature::Skills);
    config.cwd = codex_home.path().to_path_buf();

-    let conversation_manager =
-        ConversationManager::with_auth(CodexAuth::from_api_key("Test API Key"));
+    let conversation_manager = ConversationManager::with_models_provider(
+        CodexAuth::from_api_key("Test API Key"),
+        config.model_provider.clone(),
+    );
    let codex = conversation_manager
        .new_conversation(config)
        .await
@@ -695,7 +719,7 @@ async fn includes_configured_effort_in_request() -> anyhow::Result<()> {
    skip_if_no_network!(Ok(()));
    let server = MockServer::start().await;

-    let resp_mock = responses::mount_sse_once(&server, sse_completed("resp1")).await;
+    let resp_mock = mount_sse_once(&server, sse_completed("resp1")).await;
    let TestCodex { codex, .. } = test_codex()
        .with_model("gpt-5.1-codex")
        .with_config(|config| {
@@ -734,7 +758,7 @@ async fn includes_no_effort_in_request() -> anyhow::Result<()> {
    skip_if_no_network!(Ok(()));
    let server = MockServer::start().await;

-    let resp_mock = responses::mount_sse_once(&server, sse_completed("resp1")).await;
+    let resp_mock = mount_sse_once(&server, sse_completed("resp1")).await;
    let TestCodex { codex, .. } = test_codex()
        .with_model("gpt-5.1-codex")
        .build(&server)
@@ -771,7 +795,7 @@ async fn includes_default_reasoning_effort_in_request_when_defined_by_model_fami
    skip_if_no_network!(Ok(()));
    let server = MockServer::start().await;

-    let resp_mock = responses::mount_sse_once(&server, sse_completed("resp1")).await;
+    let resp_mock = mount_sse_once(&server, sse_completed("resp1")).await;
    let TestCodex { codex, .. } = test_codex().with_model("gpt-5.1").build(&server).await?;

    codex
@@ -804,7 +828,7 @@ async fn includes_default_verbosity_in_request() -> anyhow::Result<()> {
    skip_if_no_network!(Ok(()));
    let server = MockServer::start().await;

-    let resp_mock = responses::mount_sse_once(&server, sse_completed("resp1")).await;
+    let resp_mock = mount_sse_once(&server, sse_completed("resp1")).await;
    let TestCodex { codex, .. } = test_codex().with_model("gpt-5.1").build(&server).await?;

    codex
@@ -837,7 +861,7 @@ async fn configured_verbosity_not_sent_for_models_without_support() -> anyhow::R
    skip_if_no_network!(Ok(()));
    let server = MockServer::start().await;

-    let resp_mock = responses::mount_sse_once(&server, sse_completed("resp1")).await;
+    let resp_mock = mount_sse_once(&server, sse_completed("resp1")).await;
    let TestCodex { codex, .. } = test_codex()
        .with_model("gpt-5.1-codex")
        .with_config(|config| {
@@ -875,7 +899,7 @@ async fn configured_verbosity_is_sent() -> anyhow::Result<()> {
    skip_if_no_network!(Ok(()));
    let server = MockServer::start().await;

-    let resp_mock = responses::mount_sse_once(&server, sse_completed("resp1")).await;
+    let resp_mock = mount_sse_once(&server, sse_completed("resp1")).await;
    let TestCodex { codex, .. } = test_codex()
        .with_model("gpt-5.1")
        .with_config(|config| {
@@ -914,7 +938,7 @@ async fn includes_developer_instructions_message_in_request() {
    skip_if_no_network!();
    let server = MockServer::start().await;

-    let resp_mock = responses::mount_sse_once(&server, sse_completed("resp1")).await;
+    let resp_mock = mount_sse_once(&server, sse_completed("resp1")).await;

    let model_provider = ModelProviderInfo {
        base_url: Some(format!("{}/v1", server.uri())),
@@ -927,8 +951,10 @@ async fn includes_developer_instructions_message_in_request() {
    config.user_instructions = Some("be nice".to_string());
    config.developer_instructions = Some("be useful".to_string());

-    let conversation_manager =
-        ConversationManager::with_auth(CodexAuth::from_api_key("Test API Key"));
+    let conversation_manager = ConversationManager::with_models_provider(
+        CodexAuth::from_api_key("Test API Key"),
+        config.model_provider.clone(),
+    );
    let codex = conversation_manager
        .new_conversation(config)
        .await
@@ -1014,13 +1040,15 @@ async fn azure_responses_request_includes_store_and_reasoning_ids() {
    config.model_provider = provider.clone();
    let effort = config.model_reasoning_effort;
    let summary = config.model_reasoning_summary;
+    let model = ModelsManager::get_model_offline(config.model.as_deref());
+    config.model = Some(model.clone());
    let config = Arc::new(config);
-    let model_family = ModelsManager::construct_model_family_offline(&config.model, &config);
+    let model_family = ModelsManager::construct_model_family_offline(model.as_str(), &config);
    let conversation_id = ConversationId::new();
    let auth_manager = AuthManager::from_auth_for_testing(CodexAuth::from_api_key("Test API Key"));
    let otel_event_manager = OtelEventManager::new(
        conversation_id,
-        config.model.as_str(),
+        model.as_str(),
        model_family.slug.as_str(),
        None,
        Some("test@test.com".to_string()),
@@ -1103,11 +1131,8 @@ async fn azure_responses_request_includes_store_and_reasoning_ids() {
        }
    }

-    let requests = server
-        .received_requests()
-        .await
-        .expect("mock server collected requests");
-    assert_eq!(requests.len(), 1, "expected a single request");
+    let requests = get_responses_requests(&server).await;
+    assert_eq!(requests.len(), 1, "expected a single POST request");
    let body: serde_json::Value = requests[0]
        .body_json()
        .expect("request body to be valid JSON");
@@ -1128,7 +1153,7 @@ async fn token_count_includes_rate_limits_snapshot() {
    skip_if_no_network!();
    let server = MockServer::start().await;

-    let sse_body = responses::sse(vec![responses::ev_completed_with_tokens("resp_rate", 123)]);
+    let sse_body = sse(vec![ev_completed_with_tokens("resp_rate", 123)]);

    let response = ResponseTemplate::new(200)
        .insert_header("content-type", "text/event-stream")
@@ -1154,7 +1179,10 @@ async fn token_count_includes_rate_limits_snapshot() {
    let mut config = load_default_config_for_test(&home);
    config.model_provider = provider;

-    let conversation_manager = ConversationManager::with_auth(CodexAuth::from_api_key("test"));
+    let conversation_manager = ConversationManager::with_models_provider(
+        CodexAuth::from_api_key("test"),
+        config.model_provider.clone(),
+    );
    let codex = conversation_manager
        .new_conversation(config)
        .await
@@ -1361,10 +1389,10 @@ async fn context_window_error_sets_total_tokens_to_model_window() -> anyhow::Res

    const EFFECTIVE_CONTEXT_WINDOW: i64 = (272_000 * 95) / 100;

-    responses::mount_sse_once_match(
+    mount_sse_once_match(
        &server,
        body_string_contains("trigger context window"),
-        responses::sse_failed(
+        sse_failed(
            "resp_context_window",
            "context_length_exceeded",
            "Your input exceeds the context window of this model. Please adjust your input and try again.",
@@ -1372,7 +1400,7 @@ async fn context_window_error_sets_total_tokens_to_model_window() -> anyhow::Res
    )
    .await;

-    responses::mount_sse_once_match(
+    mount_sse_once_match(
        &server,
        body_string_contains("seed turn"),
        sse_completed("resp_seed"),
@@ -1381,7 +1409,7 @@ async fn context_window_error_sets_total_tokens_to_model_window() -> anyhow::Res

    let TestCodex { codex, .. } = test_codex()
        .with_config(|config| {
-            config.model = "gpt-5.1".to_string();
+            config.model = Some("gpt-5.1".to_string());
            config.model_context_window = Some(272_000);
        })
        .build(&server)
@@ -1505,7 +1533,10 @@ async fn azure_overrides_assign_properties_used_for_responses_url() {
    let mut config = load_default_config_for_test(&codex_home);
    config.model_provider = provider;

-    let conversation_manager = ConversationManager::with_auth(create_dummy_codex_auth());
+    let conversation_manager = ConversationManager::with_models_provider(
+        create_dummy_codex_auth(),
+        config.model_provider.clone(),
+    );
    let codex = conversation_manager
        .new_conversation(config)
        .await
@@ -1583,7 +1614,10 @@ async fn env_var_overrides_loaded_auth() {
    let mut config = load_default_config_for_test(&codex_home);
    config.model_provider = provider;

-    let conversation_manager = ConversationManager::with_auth(create_dummy_codex_auth());
+    let conversation_manager = ConversationManager::with_models_provider(
+        create_dummy_codex_auth(),
+        config.model_provider.clone(),
+    );
    let codex = conversation_manager
        .new_conversation(config)
        .await
@@ -1661,8 +1695,10 @@ async fn history_dedupes_streamed_and_final_messages_across_turns() {
    let mut config = load_default_config_for_test(&codex_home);
    config.model_provider = model_provider;

-    let conversation_manager =
-        ConversationManager::with_auth(CodexAuth::from_api_key("Test API Key"));
+    let conversation_manager = ConversationManager::with_models_provider(
+        CodexAuth::from_api_key("Test API Key"),
+        config.model_provider.clone(),
+    );
    let NewConversation {
        conversation: codex,
        ..
@@ -1699,7 +1735,7 @@ async fn history_dedupes_streamed_and_final_messages_across_turns() {
    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;

    // Inspect the three captured requests.
-    let requests = server.received_requests().await.unwrap();
+    let requests = get_responses_requests(&server).await;
    assert_eq!(requests.len(), 3, "expected 3 requests (one per turn)");

    // Replace full-array compare with tail-only raw JSON compare using a single hard-coded value.
--- a/codex-rs/core/tests/suite/codex_delegate.rs
+++ b/codex-rs/core/tests/suite/codex_delegate.rs
@@ -5,6 +5,7 @@ use codex_core::protocol::ReviewDecision;
 use codex_core::protocol::ReviewRequest;
 use codex_core::protocol::ReviewTarget;
 use codex_core::protocol::SandboxPolicy;
+use codex_core::sandboxing::SandboxPermissions;
 use core_test_support::responses::ev_apply_patch_function_call;
 use core_test_support::responses::ev_assistant_message;
 use core_test_support::responses::ev_completed;
@@ -31,7 +32,7 @@ async fn codex_delegate_forwards_exec_approval_and_proceeds_on_approval() {
    let args = serde_json::json!({
        "command": "rm -rf delegated",
        "timeout_ms": 1000,
-        "with_escalated_permissions": true,
+        "sandbox_permissions": SandboxPermissions::RequireEscalated,
    })
    .to_string();
    let sse1 = sse(vec![
--- a/Show More
+++ b/Show More