clean up when re-rendering

fixing terminal rendering
verify mime type of images (#5888 )
2026-02-02 23:13:37 +00:00 · 2025-10-28 17:43:55 -07:00 · 2025-10-28 16:59:38 -07:00 · 2025-10-28 14:52:51 -07:00 · 2025-10-28 14:42:46 -07:00 · 2025-10-28 21:28:56 +00:00
214 changed files with 13559 additions and 3725 deletions
--- a/.github/workflows/rust-ci.yml
+++ b/.github/workflows/rust-ci.yml
@@ -9,7 +9,7 @@ on:
 # CI builds in debug (dev) for faster signal.

 jobs:
-  # --- Detect what changed (always runs) -------------------------------------
+  # --- Detect what changed to detect which tests to run (always runs) -------------------------------------
  changed:
    name: Detect changed areas
    runs-on: ubuntu-24.04
@@ -84,8 +84,8 @@ jobs:
        run: cargo shear

  # --- CI to validate on different os/targets --------------------------------
-  lint_build_test:
-    name: ${{ matrix.runner }} - ${{ matrix.target }}${{ matrix.profile == 'release' && ' (release)' || '' }}
+  lint_build:
+    name: Lint/Build — ${{ matrix.runner }} - ${{ matrix.target }}${{ matrix.profile == 'release' && ' (release)' || '' }}
    runs-on: ${{ matrix.runner }}
    timeout-minutes: 30
    needs: changed
@@ -94,6 +94,11 @@ jobs:
    defaults:
      run:
        working-directory: codex-rs
+    env:
+      # Speed up repeated builds across CI runs by caching compiled objects.
+      RUSTC_WRAPPER: sccache
+      CARGO_INCREMENTAL: "0"
+      SCCACHE_CACHE_SIZE: 10G

    strategy:
      fail-fast: false
@@ -159,20 +164,83 @@ jobs:
            ~/.cargo/registry/index/
            ~/.cargo/registry/cache/
            ~/.cargo/git/db/
-          key: cargo-home-${{ matrix.runner }}-${{ matrix.target }}-${{ matrix.profile }}-${{ hashFiles('**/Cargo.lock') }}
+          key: cargo-home-${{ matrix.runner }}-${{ matrix.target }}-${{ matrix.profile }}-${{ hashFiles('**/Cargo.lock') }}-${{ hashFiles('codex-rs/rust-toolchain.toml') }}
+          restore-keys: |
+            cargo-home-${{ matrix.runner }}-${{ matrix.target }}-${{ matrix.profile }}-

-      - name: Restore target cache (except gnu-dev)
-        id: cache_target_restore
-        if: ${{ !(matrix.target == 'x86_64-unknown-linux-gnu' && matrix.profile != 'release') }}
+      # Install and restore sccache cache
+      - name: Install sccache
+        uses: taiki-e/install-action@0c5db7f7f897c03b771660e91d065338615679f4 # v2
+        with:
+          tool: sccache
+          version: 0.7.5
+
+      - name: Configure sccache backend
+        shell: bash
+        run: |
+          set -euo pipefail
+          if [[ -n "${ACTIONS_CACHE_URL:-}" && -n "${ACTIONS_RUNTIME_TOKEN:-}" ]]; then
+            echo "SCCACHE_GHA_ENABLED=true" >> "$GITHUB_ENV"
+            echo "Using sccache GitHub backend"
+          else
+            echo "SCCACHE_GHA_ENABLED=false" >> "$GITHUB_ENV"
+            echo "SCCACHE_DIR=${{ github.workspace }}/.sccache" >> "$GITHUB_ENV"
+            echo "Using sccache local disk + actions/cache fallback"
+          fi
+
+      - name: Restore sccache cache (fallback)
+        if: ${{ env.SCCACHE_GHA_ENABLED != 'true' }}
+        id: cache_sccache_restore
        uses: actions/cache/restore@v4
        with:
-          path: ${{ github.workspace }}/codex-rs/target/
-          key: cargo-target-${{ matrix.runner }}-${{ matrix.target }}-${{ matrix.profile }}-${{ hashFiles('**/Cargo.lock') }}
+          path: ${{ github.workspace }}/.sccache/
+          key: sccache-${{ matrix.runner }}-${{ matrix.target }}-${{ matrix.profile }}-${{ hashFiles('**/Cargo.lock') }}-${{ github.run_id }}
+          restore-keys: |
+            sccache-${{ matrix.runner }}-${{ matrix.target }}-${{ matrix.profile }}-${{ hashFiles('**/Cargo.lock') }}-
+            sccache-${{ matrix.runner }}-${{ matrix.target }}-${{ matrix.profile }}-
+
+      - if: ${{ matrix.target == 'x86_64-unknown-linux-musl' || matrix.target == 'aarch64-unknown-linux-musl'}}
+        name: Prepare APT cache directories (musl)
+        shell: bash
+        run: |
+          set -euo pipefail
+          sudo mkdir -p /var/cache/apt/archives /var/lib/apt/lists
+          sudo chown -R "$USER:$USER" /var/cache/apt /var/lib/apt/lists
+
+      - if: ${{ matrix.target == 'x86_64-unknown-linux-musl' || matrix.target == 'aarch64-unknown-linux-musl'}}
+        name: Restore APT cache (musl)
+        id: cache_apt_restore
+        uses: actions/cache/restore@v4
+        with:
+          path: |
+            /var/cache/apt
+          key: apt-${{ matrix.runner }}-${{ matrix.target }}-v1

      - if: ${{ matrix.target == 'x86_64-unknown-linux-musl' || matrix.target == 'aarch64-unknown-linux-musl'}}
        name: Install musl build tools
+        env:
+          DEBIAN_FRONTEND: noninteractive
+        shell: bash
        run: |
-          sudo apt install -y musl-tools pkg-config && sudo rm -rf /var/lib/apt/lists/*
+          set -euo pipefail
+          sudo apt-get -y update -o Acquire::Retries=3
+          sudo apt-get -y install --no-install-recommends musl-tools pkg-config
+
+      - name: Install cargo-chef
+        if: ${{ matrix.profile == 'release' }}
+        uses: taiki-e/install-action@0c5db7f7f897c03b771660e91d065338615679f4 # v2
+        with:
+          tool: cargo-chef
+          version: 0.1.71
+
+      - name: Pre-warm dependency cache (cargo-chef)
+        if: ${{ matrix.profile == 'release' }}
+        shell: bash
+        run: |
+          set -euo pipefail
+          RECIPE="${RUNNER_TEMP}/chef-recipe.json"
+          cargo chef prepare --recipe-path "$RECIPE"
+          cargo chef cook --recipe-path "$RECIPE" --target ${{ matrix.target }} --release --all-features

      - name: cargo clippy
        id: clippy
@@ -191,20 +259,6 @@ jobs:
          find . -name Cargo.toml -mindepth 2 -maxdepth 2 -print0 \
            | xargs -0 -n1 -I{} bash -c 'cd "$(dirname "{}")" && cargo check --profile ${{ matrix.profile }}'

-      - uses: taiki-e/install-action@0c5db7f7f897c03b771660e91d065338615679f4 # v2
-        with:
-          tool: nextest
-          version: 0.9.103
-
-      - name: tests
-        id: test
-        # Tests take too long for release builds to run them on every PR.
-        if: ${{ matrix.profile != 'release' }}
-        continue-on-error: true
-        run: cargo nextest run --all-features --no-fail-fast --target ${{ matrix.target }} --cargo-profile ci-test
-        env:
-          RUST_BACKTRACE: 1
-
      # Save caches explicitly; make non-fatal so cache packaging
      # never fails the overall job. Only save when key wasn't hit.
      - name: Save cargo home cache
@@ -217,33 +271,193 @@ jobs:
            ~/.cargo/registry/index/
            ~/.cargo/registry/cache/
            ~/.cargo/git/db/
-          key: cargo-home-${{ matrix.runner }}-${{ matrix.target }}-${{ matrix.profile }}-${{ hashFiles('**/Cargo.lock') }}
+          key: cargo-home-${{ matrix.runner }}-${{ matrix.target }}-${{ matrix.profile }}-${{ hashFiles('**/Cargo.lock') }}-${{ hashFiles('codex-rs/rust-toolchain.toml') }}

-      - name: Save target cache (except gnu-dev)
-        if: >-
-          always() && !cancelled() &&
-          (steps.cache_target_restore.outputs.cache-hit != 'true') &&
-          !(matrix.target == 'x86_64-unknown-linux-gnu' && matrix.profile != 'release')
+      - name: Save sccache cache (fallback)
+        if: always() && !cancelled() && env.SCCACHE_GHA_ENABLED != 'true'
        continue-on-error: true
        uses: actions/cache/save@v4
        with:
-          path: ${{ github.workspace }}/codex-rs/target/
-          key: cargo-target-${{ matrix.runner }}-${{ matrix.target }}-${{ matrix.profile }}-${{ hashFiles('**/Cargo.lock') }}
+          path: ${{ github.workspace }}/.sccache/
+          key: sccache-${{ matrix.runner }}-${{ matrix.target }}-${{ matrix.profile }}-${{ hashFiles('**/Cargo.lock') }}-${{ github.run_id }}
+
+      - name: sccache stats
+        if: always()
+        continue-on-error: true
+        run: sccache --show-stats || true
+
+      - name: sccache summary
+        if: always()
+        shell: bash
+        run: |
+          {
+            echo "### sccache stats — ${{ matrix.target }} (${{ matrix.profile }})";
+            echo;
+            echo '```';
+            sccache --show-stats || true;
+            echo '```';
+          } >> "$GITHUB_STEP_SUMMARY"
+
+      - name: Save APT cache (musl)
+        if: always() && !cancelled() && (matrix.target == 'x86_64-unknown-linux-musl' || matrix.target == 'aarch64-unknown-linux-musl') && steps.cache_apt_restore.outputs.cache-hit != 'true'
+        continue-on-error: true
+        uses: actions/cache/save@v4
+        with:
+          path: |
+            /var/cache/apt
+          key: apt-${{ matrix.runner }}-${{ matrix.target }}-v1

      # Fail the job if any of the previous steps failed.
      - name: verify all steps passed
        if: |
          steps.clippy.outcome == 'failure' ||
-          steps.cargo_check_all_crates.outcome == 'failure' ||
-          steps.test.outcome == 'failure'
+          steps.cargo_check_all_crates.outcome == 'failure'
        run: |
-          echo "One or more checks failed (clippy, cargo_check_all_crates, or test). See logs for details."
+          echo "One or more checks failed (clippy or cargo_check_all_crates). See logs for details."
+          exit 1
+
+  tests:
+    name: Tests — ${{ matrix.runner }} - ${{ matrix.target }}
+    runs-on: ${{ matrix.runner }}
+    timeout-minutes: 30
+    needs: changed
+    if: ${{ needs.changed.outputs.codex == 'true' || needs.changed.outputs.workflows == 'true' || github.event_name == 'push' }}
+    defaults:
+      run:
+        working-directory: codex-rs
+    env:
+      RUSTC_WRAPPER: sccache
+      CARGO_INCREMENTAL: "0"
+      SCCACHE_CACHE_SIZE: 10G
+
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - runner: macos-14
+            target: aarch64-apple-darwin
+            profile: dev
+          - runner: ubuntu-24.04
+            target: x86_64-unknown-linux-gnu
+            profile: dev
+          - runner: ubuntu-24.04-arm
+            target: aarch64-unknown-linux-gnu
+            profile: dev
+          - runner: windows-latest
+            target: x86_64-pc-windows-msvc
+            profile: dev
+          - runner: windows-11-arm
+            target: aarch64-pc-windows-msvc
+            profile: dev
+
+    steps:
+      - uses: actions/checkout@v5
+      - uses: dtolnay/rust-toolchain@1.90
+        with:
+          targets: ${{ matrix.target }}
+
+      - name: Restore cargo home cache
+        id: cache_cargo_home_restore
+        uses: actions/cache/restore@v4
+        with:
+          path: |
+            ~/.cargo/bin/
+            ~/.cargo/registry/index/
+            ~/.cargo/registry/cache/
+            ~/.cargo/git/db/
+          key: cargo-home-${{ matrix.runner }}-${{ matrix.target }}-${{ matrix.profile }}-${{ hashFiles('**/Cargo.lock') }}-${{ hashFiles('codex-rs/rust-toolchain.toml') }}
+          restore-keys: |
+            cargo-home-${{ matrix.runner }}-${{ matrix.target }}-${{ matrix.profile }}-
+
+      - name: Install sccache
+        uses: taiki-e/install-action@0c5db7f7f897c03b771660e91d065338615679f4 # v2
+        with:
+          tool: sccache
+          version: 0.7.5
+
+      - name: Configure sccache backend
+        shell: bash
+        run: |
+          set -euo pipefail
+          if [[ -n "${ACTIONS_CACHE_URL:-}" && -n "${ACTIONS_RUNTIME_TOKEN:-}" ]]; then
+            echo "SCCACHE_GHA_ENABLED=true" >> "$GITHUB_ENV"
+            echo "Using sccache GitHub backend"
+          else
+            echo "SCCACHE_GHA_ENABLED=false" >> "$GITHUB_ENV"
+            echo "SCCACHE_DIR=${{ github.workspace }}/.sccache" >> "$GITHUB_ENV"
+            echo "Using sccache local disk + actions/cache fallback"
+          fi
+
+      - name: Restore sccache cache (fallback)
+        if: ${{ env.SCCACHE_GHA_ENABLED != 'true' }}
+        id: cache_sccache_restore
+        uses: actions/cache/restore@v4
+        with:
+          path: ${{ github.workspace }}/.sccache/
+          key: sccache-${{ matrix.runner }}-${{ matrix.target }}-${{ matrix.profile }}-${{ hashFiles('**/Cargo.lock') }}-${{ github.run_id }}
+          restore-keys: |
+            sccache-${{ matrix.runner }}-${{ matrix.target }}-${{ matrix.profile }}-${{ hashFiles('**/Cargo.lock') }}-
+            sccache-${{ matrix.runner }}-${{ matrix.target }}-${{ matrix.profile }}-
+
+      - uses: taiki-e/install-action@0c5db7f7f897c03b771660e91d065338615679f4 # v2
+        with:
+          tool: nextest
+          version: 0.9.103
+
+      - name: tests
+        id: test
+        continue-on-error: true
+        run: cargo nextest run --all-features --no-fail-fast --target ${{ matrix.target }} --cargo-profile ci-test
+        env:
+          RUST_BACKTRACE: 1
+
+      - name: Save cargo home cache
+        if: always() && !cancelled() && steps.cache_cargo_home_restore.outputs.cache-hit != 'true'
+        continue-on-error: true
+        uses: actions/cache/save@v4
+        with:
+          path: |
+            ~/.cargo/bin/
+            ~/.cargo/registry/index/
+            ~/.cargo/registry/cache/
+            ~/.cargo/git/db/
+          key: cargo-home-${{ matrix.runner }}-${{ matrix.target }}-${{ matrix.profile }}-${{ hashFiles('**/Cargo.lock') }}-${{ hashFiles('codex-rs/rust-toolchain.toml') }}
+
+      - name: Save sccache cache (fallback)
+        if: always() && !cancelled() && env.SCCACHE_GHA_ENABLED != 'true'
+        continue-on-error: true
+        uses: actions/cache/save@v4
+        with:
+          path: ${{ github.workspace }}/.sccache/
+          key: sccache-${{ matrix.runner }}-${{ matrix.target }}-${{ matrix.profile }}-${{ hashFiles('**/Cargo.lock') }}-${{ github.run_id }}
+
+      - name: sccache stats
+        if: always()
+        continue-on-error: true
+        run: sccache --show-stats || true
+
+      - name: sccache summary
+        if: always()
+        shell: bash
+        run: |
+          {
+            echo "### sccache stats — ${{ matrix.target }} (tests)";
+            echo;
+            echo '```';
+            sccache --show-stats || true;
+            echo '```';
+          } >> "$GITHUB_STEP_SUMMARY"
+
+      - name: verify tests passed
+        if: steps.test.outcome == 'failure'
+        run: |
+          echo "Tests failed. See logs for details."
          exit 1

  # --- Gatherer job that you mark as the ONLY required status -----------------
  results:
    name: CI results (required)
-    needs: [changed, general, cargo_shear, lint_build_test]
+    needs: [changed, general, cargo_shear, lint_build, tests]
    if: always()
    runs-on: ubuntu-24.04
    steps:
@@ -252,7 +466,8 @@ jobs:
        run: |
          echo "general: ${{ needs.general.result }}"
          echo "shear  : ${{ needs.cargo_shear.result }}"
-          echo "matrix : ${{ needs.lint_build_test.result }}"
+          echo "lint   : ${{ needs.lint_build.result }}"
+          echo "tests  : ${{ needs.tests.result }}"

          # If nothing relevant changed (PR touching only root README, etc.),
          # declare success regardless of other jobs.
@@ -264,4 +479,10 @@ jobs:
          # Otherwise require the jobs to have succeeded
          [[ '${{ needs.general.result }}' == 'success' ]] || { echo 'general failed'; exit 1; }
          [[ '${{ needs.cargo_shear.result }}' == 'success' ]] || { echo 'cargo_shear failed'; exit 1; }
-          [[ '${{ needs.lint_build_test.result }}' == 'success' ]] || { echo 'matrix failed'; exit 1; }
+          [[ '${{ needs.lint_build.result }}' == 'success' ]] || { echo 'lint_build failed'; exit 1; }
+          [[ '${{ needs.tests.result }}' == 'success' ]] || { echo 'tests failed'; exit 1; }
+
+      - name: sccache summary note
+        if: always()
+        run: |
+          echo "Per-job sccache stats are attached to each matrix job's Step Summary."
--- a/README.md
+++ b/README.md
@@ -33,6 +33,8 @@ Then simply run `codex` to get started:
 codex
 ```

+If you're running into upgrade issues with Homebrew, see the [FAQ entry on brew upgrade codex](./docs/faq.md#brew-update-codex-isnt-upgrading-me).
+
 <details>
 <summary>You can also go to the <a href="https://github.com/openai/codex/releases/latest">latest GitHub Release</a> and download the appropriate binary for your platform.</summary>

--- a/codex-rs/Cargo.lock
+++ b/codex-rs/Cargo.lock
@@ -843,6 +843,7 @@ dependencies = [
 "codex-backend-client",
 "codex-common",
 "codex-core",
+ "codex-feedback",
 "codex-file-search",
 "codex-login",
 "codex-protocol",
@@ -853,6 +854,7 @@ dependencies = [
 "pretty_assertions",
 "serde",
 "serde_json",
+ "serial_test",
 "tempfile",
 "tokio",
 "toml",
@@ -1061,11 +1063,15 @@ dependencies = [
 "codex-apply-patch",
 "codex-async-utils",
 "codex-file-search",
+ "codex-git-tooling",
+ "codex-keyring-store",
 "codex-otel",
 "codex-protocol",
 "codex-rmcp-client",
 "codex-utils-pty",
+ "codex-utils-readiness",
 "codex-utils-string",
+ "codex-utils-tokenizer",
 "core-foundation 0.9.4",
 "core_test_support",
 "dirs",
@@ -1074,7 +1080,10 @@ dependencies = [
 "escargot",
 "eventsource-stream",
 "futures",
+ "http",
+ "image",
 "indexmap 2.10.0",
+ "keyring",
 "landlock",
 "libc",
 "maplit",
@@ -1091,6 +1100,7 @@ dependencies = [
 "serde_json",
 "serial_test",
 "sha1",
+ "sha2",
 "shlex",
 "similar",
 "strum_macros 0.27.2",
@@ -1206,11 +1216,22 @@ version = "0.0.0"
 dependencies = [
 "assert_matches",
 "pretty_assertions",
+ "schemars 0.8.22",
+ "serde",
 "tempfile",
 "thiserror 2.0.16",
+ "ts-rs",
 "walkdir",
 ]

+[[package]]
+name = "codex-keyring-store"
+version = "0.0.0"
+dependencies = [
+ "keyring",
+ "tracing",
+]
+
 [[package]]
 name = "codex-linux-sandbox"
 version = "0.0.0"
@@ -1325,6 +1346,8 @@ version = "0.0.0"
 dependencies = [
 "anyhow",
 "base64",
+ "codex-git-tooling",
+ "codex-utils-image",
 "icu_decimal",
 "icu_locale_core",
 "mcp-types",
@@ -1374,6 +1397,7 @@ version = "0.0.0"
 dependencies = [
 "anyhow",
 "axum",
+ "codex-keyring-store",
 "codex-protocol",
 "dirs",
 "escargot",
@@ -1425,7 +1449,6 @@ dependencies = [
 "codex-core",
 "codex-feedback",
 "codex-file-search",
- "codex-git-tooling",
 "codex-login",
 "codex-ollama",
 "codex-protocol",
@@ -1470,6 +1493,27 @@ dependencies = [
 "vt100",
 ]

+[[package]]
+name = "codex-utils-cache"
+version = "0.0.0"
+dependencies = [
+ "lru",
+ "sha1",
+ "tokio",
+]
+
+[[package]]
+name = "codex-utils-image"
+version = "0.0.0"
+dependencies = [
+ "base64",
+ "codex-utils-cache",
+ "image",
+ "tempfile",
+ "thiserror 2.0.16",
+ "tokio",
+]
+
 [[package]]
 name = "codex-utils-json-to-toml"
 version = "0.0.0"
@@ -1633,6 +1677,7 @@ dependencies = [
 "anyhow",
 "assert_cmd",
 "codex-core",
+ "codex-protocol",
 "notify",
 "regex-lite",
 "serde_json",
@@ -4952,9 +4997,9 @@ dependencies = [

 [[package]]
 name = "rmcp"
-version = "0.8.2"
+version = "0.8.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4e35d31f89beb59c83bc31363426da25b323ce0c2e5b53c7bf29867d16ee7898"
+checksum = "1fdad1258f7259fdc0f2dfc266939c82c3b5d1fd72bcde274d600cdc27e60243"
 dependencies = [
 "base64",
 "bytes",
@@ -4986,9 +5031,9 @@ dependencies = [

 [[package]]
 name = "rmcp-macros"
-version = "0.8.2"
+version = "0.8.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d88518b38110c439a03f0f4eee40e5105d648a530711cb87f98991e3f324a664"
+checksum = "ede0589a208cc7ce81d1be68aa7e74b917fcd03c81528408bab0457e187dcd9b"
 dependencies = [
 "darling 0.21.3",
 "proc-macro2",
@@ -5454,9 +5499,9 @@ dependencies = [

 [[package]]
 name = "serde"
-version = "1.0.226"
+version = "1.0.228"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0dca6411025b24b60bfa7ec1fe1f8e710ac09782dca409ee8237ba74b51295fd"
+checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e"
 dependencies = [
 "serde_core",
 "serde_derive",
@@ -5464,18 +5509,18 @@ dependencies = [

 [[package]]
 name = "serde_core"
-version = "1.0.226"
+version = "1.0.228"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ba2ba63999edb9dac981fb34b3e5c0d111a69b0924e253ed29d83f7c99e966a4"
+checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad"
 dependencies = [
 "serde_derive",
 ]

 [[package]]
 name = "serde_derive"
-version = "1.0.226"
+version = "1.0.228"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8db53ae22f34573731bafa1db20f04027b2d25e02d8205921b569171699cdb33"
+checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
 dependencies = [
 "proc-macro2",
 "quote",
--- a/codex-rs/Cargo.toml
+++ b/codex-rs/Cargo.toml
@@ -16,6 +16,7 @@ members = [
    "core",
    "exec",
    "execpolicy",
+    "keyring-store",
    "file-search",
    "git-tooling",
    "linux-sandbox",
@@ -32,9 +33,11 @@ members = [
    "otel",
    "tui",
    "git-apply",
+    "utils/cache",
+    "utils/image",
    "utils/json-to-toml",
-    "utils/readiness",
    "utils/pty",
+    "utils/readiness",
    "utils/string",
    "utils/tokenizer",
 ]
@@ -65,6 +68,7 @@ codex-exec = { path = "exec" }
 codex-feedback = { path = "feedback" }
 codex-file-search = { path = "file-search" }
 codex-git-tooling = { path = "git-tooling" }
+codex-keyring-store = { path = "keyring-store" }
 codex-linux-sandbox = { path = "linux-sandbox" }
 codex-login = { path = "login" }
 codex-mcp-server = { path = "mcp-server" }
@@ -77,6 +81,8 @@ codex-responses-api-proxy = { path = "responses-api-proxy" }
 codex-rmcp-client = { path = "rmcp-client" }
 codex-stdio-to-uds = { path = "stdio-to-uds" }
 codex-tui = { path = "tui" }
+codex-utils-cache = { path = "utils/cache" }
+codex-utils-image = { path = "utils/image" }
 codex-utils-json-to-toml = { path = "utils/json-to-toml" }
 codex-utils-pty = { path = "utils/pty" }
 codex-utils-readiness = { path = "utils/readiness" }
@@ -116,6 +122,7 @@ env_logger = "0.11.5"
 escargot = "0.5"
 eventsource-stream = "0.2.3"
 futures = { version = "0.3", default-features = false }
+http = "1.3.1"
 icu_decimal = "2.0.0"
 icu_locale_core = "2.0.0"
 ignore = "0.4.23"
@@ -128,6 +135,7 @@ landlock = "0.4.1"
 lazy_static = "1"
 libc = "0.2.175"
 log = "0.4"
+lru = "0.12.5"
 maplit = "1.0.2"
 mime_guess = "2.0.5"
 multimap = "0.10.0"
@@ -153,7 +161,7 @@ ratatui = "0.29.0"
 ratatui-macros = "0.6.0"
 regex-lite = "0.1.7"
 reqwest = "0.12"
-rmcp = { version = "0.8.2", default-features = false }
+rmcp = { version = "0.8.3", default-features = false }
 schemars = "0.8.22"
 seccompiler = "0.5.0"
 sentry = "0.34.0"
--- a/codex-rs/app-server-protocol/src/protocol.rs
+++ b/codex-rs/app-server-protocol/src/protocol.rs
@@ -5,6 +5,7 @@ use crate::JSONRPCNotification;
 use crate::JSONRPCRequest;
 use crate::RequestId;
 use codex_protocol::ConversationId;
+use codex_protocol::account::PlanType;
 use codex_protocol::config_types::ForcedLoginMethod;
 use codex_protocol::config_types::ReasoningEffort;
 use codex_protocol::config_types::ReasoningSummary;
@@ -16,6 +17,7 @@ use codex_protocol::protocol::EventMsg;
 use codex_protocol::protocol::FileChange;
 use codex_protocol::protocol::RateLimitSnapshot;
 use codex_protocol::protocol::ReviewDecision;
+use codex_protocol::protocol::SandboxCommandAssessment;
 use codex_protocol::protocol::SandboxPolicy;
 use codex_protocol::protocol::TurnAbortReason;
 use paste::paste;
@@ -93,6 +95,50 @@ macro_rules! client_request_definitions {
 }

 client_request_definitions! {
+    /// NEW APIs
+    #[serde(rename = "model/list")]
+    #[ts(rename = "model/list")]
+    ListModels {
+        params: ListModelsParams,
+        response: ListModelsResponse,
+    },
+
+    #[serde(rename = "account/login")]
+    #[ts(rename = "account/login")]
+    LoginAccount {
+        params: LoginAccountParams,
+        response: LoginAccountResponse,
+    },
+
+    #[serde(rename = "account/logout")]
+    #[ts(rename = "account/logout")]
+    LogoutAccount {
+        params: #[ts(type = "undefined")] #[serde(skip_serializing_if = "Option::is_none")] Option<()>,
+        response: LogoutAccountResponse,
+    },
+
+    #[serde(rename = "account/rateLimits/read")]
+    #[ts(rename = "account/rateLimits/read")]
+    GetAccountRateLimits {
+        params: #[ts(type = "undefined")] #[serde(skip_serializing_if = "Option::is_none")] Option<()>,
+        response: GetAccountRateLimitsResponse,
+    },
+
+    #[serde(rename = "feedback/upload")]
+    #[ts(rename = "feedback/upload")]
+    UploadFeedback {
+        params: UploadFeedbackParams,
+        response: UploadFeedbackResponse,
+    },
+
+    #[serde(rename = "account/read")]
+    #[ts(rename = "account/read")]
+    GetAccount {
+        params: #[ts(type = "undefined")] #[serde(skip_serializing_if = "Option::is_none")] Option<()>,
+        response: GetAccountResponse,
+    },
+
+    /// DEPRECATED APIs below
    Initialize {
        params: InitializeParams,
        response: InitializeResponse,
@@ -101,18 +147,15 @@ client_request_definitions! {
        params: NewConversationParams,
        response: NewConversationResponse,
    },
+    GetConversationSummary {
+        params: GetConversationSummaryParams,
+        response: GetConversationSummaryResponse,
+    },
    /// List recorded Codex conversations (rollouts) with optional pagination and search.
    ListConversations {
        params: ListConversationsParams,
        response: ListConversationsResponse,
    },
-    #[serde(rename = "model/list")]
-    #[ts(rename = "model/list")]
-    /// List available Codex models along with display metadata.
-    ListModels {
-        params: ListModelsParams,
-        response: ListModelsResponse,
-    },
    /// Resume a recorded Codex conversation from a rollout file.
    ResumeConversation {
        params: ResumeConversationParams,
@@ -191,14 +234,30 @@ client_request_definitions! {
        params: ExecOneOffCommandParams,
        response: ExecOneOffCommandResponse,
    },
-    #[serde(rename = "account/rateLimits/read")]
-    #[ts(rename = "account/rateLimits/read")]
-    GetAccountRateLimits {
-        params: #[ts(type = "undefined")] #[serde(skip_serializing_if = "Option::is_none")] Option<()>,
-        response: GetAccountRateLimitsResponse,
+}
+
+#[derive(Debug, Clone, PartialEq, Deserialize, Serialize, JsonSchema, TS)]
+#[serde(tag = "type", rename_all = "camelCase")]
+#[ts(tag = "type")]
+pub enum Account {
+    #[serde(rename = "apiKey", rename_all = "camelCase")]
+    #[ts(rename = "apiKey", rename_all = "camelCase")]
+    ApiKey { api_key: String },
+
+    #[serde(rename = "chatgpt", rename_all = "camelCase")]
+    #[ts(rename = "chatgpt", rename_all = "camelCase")]
+    ChatGpt {
+        email: Option<String>,
+        plan_type: PlanType,
    },
 }

+#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
+#[serde(rename_all = "camelCase")]
+pub struct GetAccountResponse {
+    pub account: Account,
+}
+
 #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Default, JsonSchema, TS)]
 #[serde(rename_all = "camelCase")]
 pub struct InitializeParams {
@@ -227,6 +286,10 @@ pub struct NewConversationParams {
    #[serde(skip_serializing_if = "Option::is_none")]
    pub model: Option<String>,

+    /// Override the model provider to use for this session.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub model_provider: Option<String>,
+
    /// Configuration profile from config.toml to specify default options.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub profile: Option<String>,
@@ -279,6 +342,18 @@ pub struct ResumeConversationResponse {
    pub initial_messages: Option<Vec<EventMsg>>,
 }

+#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
+#[serde(rename_all = "camelCase")]
+pub struct GetConversationSummaryParams {
+    pub rollout_path: PathBuf,
+}
+
+#[derive(Serialize, Deserialize, Debug, Clone, JsonSchema, TS)]
+#[serde(rename_all = "camelCase")]
+pub struct GetConversationSummaryResponse {
+    pub summary: ConversationSummary,
+}
+
 #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Default, JsonSchema, TS)]
 #[serde(rename_all = "camelCase")]
 pub struct ListConversationsParams {
@@ -288,6 +363,12 @@ pub struct ListConversationsParams {
    /// Opaque pagination cursor returned by a previous call.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub cursor: Option<String>,
+    /// Optional model provider filter (matches against session metadata).
+    /// - None => filter by the server's default model provider
+    /// - Some([]) => no filtering, include all providers
+    /// - Some([...]) => only include sessions with one of the specified providers
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub model_providers: Option<Vec<String>>,
 }

 #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
@@ -299,6 +380,8 @@ pub struct ConversationSummary {
    /// RFC3339 timestamp string for the session start, if available.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub timestamp: Option<String>,
+    /// Model provider recorded for the session (resolved when absent in metadata).
+    pub model_provider: String,
 }

 #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
@@ -352,6 +435,55 @@ pub struct ListModelsResponse {
    pub next_cursor: Option<String>,
 }

+#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
+#[serde(rename_all = "camelCase")]
+pub struct UploadFeedbackParams {
+    pub classification: String,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub reason: Option<String>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub conversation_id: Option<ConversationId>,
+    pub include_logs: bool,
+}
+
+#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
+#[serde(rename_all = "camelCase")]
+pub struct UploadFeedbackResponse {
+    pub thread_id: String,
+}
+
+#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
+#[serde(tag = "type")]
+#[ts(tag = "type")]
+pub enum LoginAccountParams {
+    #[serde(rename = "apiKey")]
+    #[ts(rename = "apiKey")]
+    ApiKey {
+        #[serde(rename = "apiKey")]
+        #[ts(rename = "apiKey")]
+        api_key: String,
+    },
+    #[serde(rename = "chatgpt")]
+    #[ts(rename = "chatgpt")]
+    ChatGpt,
+}
+
+#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
+#[serde(rename_all = "camelCase")]
+pub struct LoginAccountResponse {
+    /// Only set if the login method is ChatGPT.
+    #[schemars(with = "String")]
+    pub login_id: Option<Uuid>,
+
+    /// URL the client should open in a browser to initiate the OAuth flow.
+    /// Only set if the login method is ChatGPT.
+    pub auth_url: Option<String>,
+}
+
+#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
+#[serde(rename_all = "camelCase")]
+pub struct LogoutAccountResponse {}
+
 #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
 #[serde(rename_all = "camelCase")]
 pub struct ResumeConversationParams {
@@ -653,6 +785,8 @@ pub struct SendUserMessageResponse {}
 #[serde(rename_all = "camelCase")]
 pub struct AddConversationListenerParams {
    pub conversation_id: ConversationId,
+    #[serde(default)]
+    pub experimental_raw_events: bool,
 }

 #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
@@ -784,6 +918,8 @@ pub struct ExecCommandApprovalParams {
    pub cwd: PathBuf,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub reason: Option<String>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub risk: Option<SandboxCommandAssessment>,
    pub parsed_cmd: Vec<ParsedCommand>,
 }

@@ -875,6 +1011,13 @@ pub struct AuthStatusChangeNotification {
 #[serde(tag = "method", content = "params", rename_all = "camelCase")]
 #[strum(serialize_all = "camelCase")]
 pub enum ServerNotification {
+    /// NEW NOTIFICATIONS
+    #[serde(rename = "account/rateLimits/updated")]
+    #[ts(rename = "account/rateLimits/updated")]
+    #[strum(serialize = "account/rateLimits/updated")]
+    AccountRateLimitsUpdated(RateLimitSnapshot),
+
+    /// DEPRECATED NOTIFICATIONS below
    /// Authentication status changed
    AuthStatusChange(AuthStatusChangeNotification),

@@ -888,6 +1031,7 @@ pub enum ServerNotification {
 impl ServerNotification {
    pub fn to_params(self) -> Result<serde_json::Value, serde_json::Error> {
        match self {
+            ServerNotification::AccountRateLimitsUpdated(params) => serde_json::to_value(params),
            ServerNotification::AuthStatusChange(params) => serde_json::to_value(params),
            ServerNotification::LoginChatGptComplete(params) => serde_json::to_value(params),
            ServerNotification::SessionConfigured(params) => serde_json::to_value(params),
@@ -924,6 +1068,7 @@ mod tests {
            request_id: RequestId::Integer(42),
            params: NewConversationParams {
                model: Some("gpt-5-codex".to_string()),
+                model_provider: None,
                profile: None,
                cwd: None,
                approval_policy: Some(AskForApproval::OnRequest),
@@ -992,6 +1137,7 @@ mod tests {
            command: vec!["echo".to_string(), "hello".to_string()],
            cwd: PathBuf::from("/tmp"),
            reason: Some("because tests".to_string()),
+            risk: None,
            parsed_cmd: vec![ParsedCommand::Unknown {
                cmd: "echo hello".to_string(),
            }],
@@ -1043,16 +1189,118 @@ mod tests {
        Ok(())
    }

+    #[test]
+    fn serialize_account_login_api_key() -> Result<()> {
+        let request = ClientRequest::LoginAccount {
+            request_id: RequestId::Integer(2),
+            params: LoginAccountParams::ApiKey {
+                api_key: "secret".to_string(),
+            },
+        };
+        assert_eq!(
+            json!({
+                "method": "account/login",
+                "id": 2,
+                "params": {
+                    "type": "apiKey",
+                    "apiKey": "secret"
+                }
+            }),
+            serde_json::to_value(&request)?,
+        );
+        Ok(())
+    }
+
+    #[test]
+    fn serialize_account_login_chatgpt() -> Result<()> {
+        let request = ClientRequest::LoginAccount {
+            request_id: RequestId::Integer(3),
+            params: LoginAccountParams::ChatGpt,
+        };
+        assert_eq!(
+            json!({
+                "method": "account/login",
+                "id": 3,
+                "params": {
+                    "type": "chatgpt"
+                }
+            }),
+            serde_json::to_value(&request)?,
+        );
+        Ok(())
+    }
+
+    #[test]
+    fn serialize_account_logout() -> Result<()> {
+        let request = ClientRequest::LogoutAccount {
+            request_id: RequestId::Integer(4),
+            params: None,
+        };
+        assert_eq!(
+            json!({
+                "method": "account/logout",
+                "id": 4,
+            }),
+            serde_json::to_value(&request)?,
+        );
+        Ok(())
+    }
+
+    #[test]
+    fn serialize_get_account() -> Result<()> {
+        let request = ClientRequest::GetAccount {
+            request_id: RequestId::Integer(5),
+            params: None,
+        };
+        assert_eq!(
+            json!({
+                "method": "account/read",
+                "id": 5,
+            }),
+            serde_json::to_value(&request)?,
+        );
+        Ok(())
+    }
+
+    #[test]
+    fn account_serializes_fields_in_camel_case() -> Result<()> {
+        let api_key = Account::ApiKey {
+            api_key: "secret".to_string(),
+        };
+        assert_eq!(
+            json!({
+                "type": "apiKey",
+                "apiKey": "secret",
+            }),
+            serde_json::to_value(&api_key)?,
+        );
+
+        let chatgpt = Account::ChatGpt {
+            email: Some("user@example.com".to_string()),
+            plan_type: PlanType::Plus,
+        };
+        assert_eq!(
+            json!({
+                "type": "chatgpt",
+                "email": "user@example.com",
+                "planType": "plus",
+            }),
+            serde_json::to_value(&chatgpt)?,
+        );
+
+        Ok(())
+    }
+
    #[test]
    fn serialize_list_models() -> Result<()> {
        let request = ClientRequest::ListModels {
-            request_id: RequestId::Integer(2),
+            request_id: RequestId::Integer(6),
            params: ListModelsParams::default(),
        };
        assert_eq!(
            json!({
                "method": "model/list",
-                "id": 2,
+                "id": 6,
                "params": {}
            }),
            serde_json::to_value(&request)?,
--- a/codex-rs/app-server/Cargo.toml
+++ b/codex-rs/app-server/Cargo.toml
@@ -24,6 +24,7 @@ codex-file-search = { workspace = true }
 codex-login = { workspace = true }
 codex-protocol = { workspace = true }
 codex-app-server-protocol = { workspace = true }
+codex-feedback = { workspace = true }
 codex-utils-json-to-toml = { workspace = true }
 chrono = { workspace = true }
 serde = { workspace = true, features = ["derive"] }
@@ -47,6 +48,7 @@ base64 = { workspace = true }
 core_test_support = { workspace = true }
 os_info = { workspace = true }
 pretty_assertions = { workspace = true }
+serial_test = { workspace = true }
 tempfile = { workspace = true }
 toml = { workspace = true }
 wiremock = { workspace = true }
--- a/codex-rs/app-server/src/codex_message_processor.rs
+++ b/codex-rs/app-server/src/codex_message_processor.rs
@@ -21,6 +21,8 @@ use codex_app_server_protocol::ExecOneOffCommandResponse;
 use codex_app_server_protocol::FuzzyFileSearchParams;
 use codex_app_server_protocol::FuzzyFileSearchResponse;
 use codex_app_server_protocol::GetAccountRateLimitsResponse;
+use codex_app_server_protocol::GetConversationSummaryParams;
+use codex_app_server_protocol::GetConversationSummaryResponse;
 use codex_app_server_protocol::GetUserAgentResponse;
 use codex_app_server_protocol::GetUserSavedConfigResponse;
 use codex_app_server_protocol::GitDiffToRemoteResponse;
@@ -52,6 +54,8 @@ use codex_app_server_protocol::ServerRequestPayload;
 use codex_app_server_protocol::SessionConfiguredNotification;
 use codex_app_server_protocol::SetDefaultModelParams;
 use codex_app_server_protocol::SetDefaultModelResponse;
+use codex_app_server_protocol::UploadFeedbackParams;
+use codex_app_server_protocol::UploadFeedbackResponse;
 use codex_app_server_protocol::UserInfoResponse;
 use codex_app_server_protocol::UserSavedConfig;
 use codex_backend_client::Client as BackendClient;
@@ -64,9 +68,7 @@ use codex_core::NewConversation;
 use codex_core::RolloutRecorder;
 use codex_core::SessionMeta;
 use codex_core::auth::CLIENT_ID;
-use codex_core::auth::get_auth_file;
 use codex_core::auth::login_with_api_key;
-use codex_core::auth::try_read_auth_json;
 use codex_core::config::Config;
 use codex_core::config::ConfigOverrides;
 use codex_core::config::ConfigToml;
@@ -85,6 +87,8 @@ use codex_core::protocol::EventMsg;
 use codex_core::protocol::ExecApprovalRequestEvent;
 use codex_core::protocol::Op;
 use codex_core::protocol::ReviewDecision;
+use codex_core::read_head_for_summary;
+use codex_feedback::CodexFeedback;
 use codex_login::ServerOptions as LoginServerOptions;
 use codex_login::ShutdownHandle;
 use codex_login::run_login_server;
@@ -98,6 +102,8 @@ use codex_protocol::user_input::UserInput as CoreInputItem;
 use codex_utils_json_to_toml::json_to_toml;
 use std::collections::HashMap;
 use std::ffi::OsStr;
+use std::io::Error as IoError;
+use std::path::Path;
 use std::path::PathBuf;
 use std::sync::Arc;
 use std::sync::atomic::AtomicBool;
@@ -136,6 +142,7 @@ pub(crate) struct CodexMessageProcessor {
    // Queue of pending interrupt requests per conversation. We reply when TurnAborted arrives.
    pending_interrupts: Arc<Mutex<HashMap<ConversationId, Vec<RequestId>>>>,
    pending_fuzzy_searches: Arc<Mutex<HashMap<String, Arc<AtomicBool>>>>,
+    feedback: CodexFeedback,
 }

 impl CodexMessageProcessor {
@@ -145,6 +152,7 @@ impl CodexMessageProcessor {
        outgoing: Arc<OutgoingMessageSender>,
        codex_linux_sandbox_exe: Option<PathBuf>,
        config: Arc<Config>,
+        feedback: CodexFeedback,
    ) -> Self {
        Self {
            auth_manager,
@@ -156,6 +164,7 @@ impl CodexMessageProcessor {
            active_login: Arc::new(Mutex::new(None)),
            pending_interrupts: Arc::new(Mutex::new(HashMap::new())),
            pending_fuzzy_searches: Arc::new(Mutex::new(HashMap::new())),
+            feedback,
        }
    }

@@ -170,12 +179,36 @@ impl CodexMessageProcessor {
                // created before processing any subsequent messages.
                self.process_new_conversation(request_id, params).await;
            }
+            ClientRequest::GetConversationSummary { request_id, params } => {
+                self.get_conversation_summary(request_id, params).await;
+            }
            ClientRequest::ListConversations { request_id, params } => {
                self.handle_list_conversations(request_id, params).await;
            }
            ClientRequest::ListModels { request_id, params } => {
                self.list_models(request_id, params).await;
            }
+            ClientRequest::LoginAccount {
+                request_id,
+                params: _,
+            } => {
+                self.send_unimplemented_error(request_id, "account/login")
+                    .await;
+            }
+            ClientRequest::LogoutAccount {
+                request_id,
+                params: _,
+            } => {
+                self.send_unimplemented_error(request_id, "account/logout")
+                    .await;
+            }
+            ClientRequest::GetAccount {
+                request_id,
+                params: _,
+            } => {
+                self.send_unimplemented_error(request_id, "account/read")
+                    .await;
+            }
            ClientRequest::ResumeConversation { request_id, params } => {
                self.handle_resume_conversation(request_id, params).await;
            }
@@ -254,9 +287,21 @@ impl CodexMessageProcessor {
            } => {
                self.get_account_rate_limits(request_id).await;
            }
+            ClientRequest::UploadFeedback { request_id, params } => {
+                self.upload_feedback(request_id, params).await;
+            }
        }
    }

+    async fn send_unimplemented_error(&self, request_id: RequestId, method: &str) {
+        let error = JSONRPCErrorError {
+            code: INTERNAL_ERROR_CODE,
+            message: format!("{method} is not implemented yet"),
+            data: None,
+        };
+        self.outgoing.send_error(request_id, error).await;
+    }
+
    async fn login_api_key(&mut self, request_id: RequestId, params: LoginApiKeyParams) {
        if matches!(
            self.config.forced_login_method,
@@ -278,7 +323,11 @@ impl CodexMessageProcessor {
            }
        }

-        match login_with_api_key(&self.config.codex_home, &params.api_key) {
+        match login_with_api_key(
+            &self.config.codex_home,
+            &params.api_key,
+            self.config.cli_auth_credentials_store_mode,
+        ) {
            Ok(()) => {
                self.auth_manager.reload();
                self.outgoing
@@ -322,6 +371,7 @@ impl CodexMessageProcessor {
                config.codex_home.clone(),
                CLIENT_ID.to_string(),
                config.forced_chatgpt_workspace_id.clone(),
+                config.cli_auth_credentials_store_mode,
            )
        };

@@ -624,12 +674,8 @@ impl CodexMessageProcessor {
    }

    async fn get_user_info(&self, request_id: RequestId) {
-        // Read alleged user email from auth.json (best-effort; not verified).
-        let auth_path = get_auth_file(&self.config.codex_home);
-        let alleged_user_email = match try_read_auth_json(&auth_path) {
-            Ok(auth) => auth.tokens.and_then(|t| t.id_token.email),
-            Err(_) => None,
-        };
+        // Read alleged user email from cached auth (best-effort; not verified).
+        let alleged_user_email = self.auth_manager.auth().and_then(|a| a.get_account_email());

        let response = UserInfoResponse { alleged_user_email };
        self.outgoing.send_response(request_id, response).await;
@@ -783,24 +829,76 @@ impl CodexMessageProcessor {
        }
    }

+    async fn get_conversation_summary(
+        &self,
+        request_id: RequestId,
+        params: GetConversationSummaryParams,
+    ) {
+        let GetConversationSummaryParams { rollout_path } = params;
+        let path = if rollout_path.is_relative() {
+            self.config.codex_home.join(&rollout_path)
+        } else {
+            rollout_path.clone()
+        };
+        let fallback_provider = self.config.model_provider_id.as_str();
+
+        match read_summary_from_rollout(&path, fallback_provider).await {
+            Ok(summary) => {
+                let response = GetConversationSummaryResponse { summary };
+                self.outgoing.send_response(request_id, response).await;
+            }
+            Err(err) => {
+                let error = JSONRPCErrorError {
+                    code: INTERNAL_ERROR_CODE,
+                    message: format!(
+                        "failed to load conversation summary from {}: {}",
+                        path.display(),
+                        err
+                    ),
+                    data: None,
+                };
+                self.outgoing.send_error(request_id, error).await;
+            }
+        }
+    }
+
    async fn handle_list_conversations(
        &self,
        request_id: RequestId,
        params: ListConversationsParams,
    ) {
-        let page_size = params.page_size.unwrap_or(25);
+        let ListConversationsParams {
+            page_size,
+            cursor,
+            model_providers: model_provider,
+        } = params;
+        let page_size = page_size.unwrap_or(25);
        // Decode the optional cursor string to a Cursor via serde (Cursor implements Deserialize from string)
-        let cursor_obj: Option<RolloutCursor> = match params.cursor {
+        let cursor_obj: Option<RolloutCursor> = match cursor {
            Some(s) => serde_json::from_str::<RolloutCursor>(&format!("\"{s}\"")).ok(),
            None => None,
        };
        let cursor_ref = cursor_obj.as_ref();
+        let model_provider_filter = match model_provider {
+            Some(providers) => {
+                if providers.is_empty() {
+                    None
+                } else {
+                    Some(providers)
+                }
+            }
+            None => Some(vec![self.config.model_provider_id.clone()]),
+        };
+        let model_provider_slice = model_provider_filter.as_deref();
+        let fallback_provider = self.config.model_provider_id.clone();

        let page = match RolloutRecorder::list_conversations(
            &self.config.codex_home,
            page_size,
            cursor_ref,
            INTERACTIVE_SESSION_SOURCES,
+            model_provider_slice,
+            fallback_provider.as_str(),
        )
        .await
        {
@@ -819,7 +917,7 @@ impl CodexMessageProcessor {
        let items = page
            .items
            .into_iter()
-            .filter_map(|it| extract_conversation_summary(it.path, &it.head))
+            .filter_map(|it| extract_conversation_summary(it.path, &it.head, &fallback_provider))
            .collect();

        // Encode next_cursor as a plain string
@@ -1226,7 +1324,10 @@ impl CodexMessageProcessor {
        request_id: RequestId,
        params: AddConversationListenerParams,
    ) {
-        let AddConversationListenerParams { conversation_id } = params;
+        let AddConversationListenerParams {
+            conversation_id,
+            experimental_raw_events,
+        } = params;
        let Ok(conversation) = self
            .conversation_manager
            .get_conversation(conversation_id)
@@ -1263,6 +1364,11 @@ impl CodexMessageProcessor {
                            }
                        };

+                        if let EventMsg::RawResponseItem(_) = &event.msg
+                            && !experimental_raw_events {
+                                continue;
+                            }
+
                        // For now, we send a notification for every event,
                        // JSON-serializing the `Event` as-is, but these should
                        // be migrated to be variants of `ServerNotification`
@@ -1380,6 +1486,77 @@ impl CodexMessageProcessor {
        let response = FuzzyFileSearchResponse { files: results };
        self.outgoing.send_response(request_id, response).await;
    }
+
+    async fn upload_feedback(&self, request_id: RequestId, params: UploadFeedbackParams) {
+        let UploadFeedbackParams {
+            classification,
+            reason,
+            conversation_id,
+            include_logs,
+        } = params;
+
+        let snapshot = self.feedback.snapshot(conversation_id);
+        let thread_id = snapshot.thread_id.clone();
+
+        let validated_rollout_path = if include_logs {
+            match conversation_id {
+                Some(conv_id) => self.resolve_rollout_path(conv_id).await,
+                None => None,
+            }
+        } else {
+            None
+        };
+
+        let upload_result = tokio::task::spawn_blocking(move || {
+            let rollout_path_ref = validated_rollout_path.as_deref();
+            snapshot.upload_feedback(
+                &classification,
+                reason.as_deref(),
+                include_logs,
+                rollout_path_ref,
+            )
+        })
+        .await;
+
+        let upload_result = match upload_result {
+            Ok(result) => result,
+            Err(join_err) => {
+                let error = JSONRPCErrorError {
+                    code: INTERNAL_ERROR_CODE,
+                    message: format!("failed to upload feedback: {join_err}"),
+                    data: None,
+                };
+                self.outgoing.send_error(request_id, error).await;
+                return;
+            }
+        };
+
+        match upload_result {
+            Ok(()) => {
+                let response = UploadFeedbackResponse { thread_id };
+                self.outgoing.send_response(request_id, response).await;
+            }
+            Err(err) => {
+                let error = JSONRPCErrorError {
+                    code: INTERNAL_ERROR_CODE,
+                    message: format!("failed to upload feedback: {err}"),
+                    data: None,
+                };
+                self.outgoing.send_error(request_id, error).await;
+            }
+        }
+    }
+
+    async fn resolve_rollout_path(&self, conversation_id: ConversationId) -> Option<PathBuf> {
+        match self
+            .conversation_manager
+            .get_conversation(conversation_id)
+            .await
+        {
+            Ok(conv) => Some(conv.rollout_path()),
+            Err(_) => None,
+        }
+    }
 }

 async fn apply_bespoke_event_handling(
@@ -1417,6 +1594,7 @@ async fn apply_bespoke_event_handling(
            command,
            cwd,
            reason,
+            risk,
            parsed_cmd,
        }) => {
            let params = ExecCommandApprovalParams {
@@ -1425,6 +1603,7 @@ async fn apply_bespoke_event_handling(
                command,
                cwd,
                reason,
+                risk,
                parsed_cmd,
            };
            let rx = outgoing
@@ -1436,6 +1615,15 @@ async fn apply_bespoke_event_handling(
                on_exec_approval_response(event_id, rx, conversation).await;
            });
        }
+        EventMsg::TokenCount(token_count_event) => {
+            if let Some(rate_limits) = token_count_event.rate_limits {
+                outgoing
+                    .send_server_notification(ServerNotification::AccountRateLimitsUpdated(
+                        rate_limits,
+                    ))
+                    .await;
+            }
+        }
        // If this is a TurnAborted, reply to any pending interrupt requests.
        EventMsg::TurnAborted(turn_aborted_event) => {
            let pending = {
@@ -1462,6 +1650,7 @@ async fn derive_config_from_params(
 ) -> std::io::Result<Config> {
    let NewConversationParams {
        model,
+        model_provider,
        profile,
        cwd,
        approval_policy,
@@ -1477,13 +1666,14 @@ async fn derive_config_from_params(
        cwd: cwd.map(PathBuf::from),
        approval_policy,
        sandbox_mode,
-        model_provider: None,
+        model_provider,
        codex_linux_sandbox_exe,
        base_instructions,
        include_apply_patch_tool,
        include_view_image_tool: None,
        show_raw_agent_reasoning: None,
        tools_web_search_request: None,
+        experimental_sandbox_command_assessment: None,
        additional_writable_roots: Vec::new(),
    };

@@ -1574,9 +1764,54 @@ async fn on_exec_approval_response(
    }
 }

+async fn read_summary_from_rollout(
+    path: &Path,
+    fallback_provider: &str,
+) -> std::io::Result<ConversationSummary> {
+    let head = read_head_for_summary(path).await?;
+
+    let Some(first) = head.first() else {
+        return Err(IoError::other(format!(
+            "rollout at {} is empty",
+            path.display()
+        )));
+    };
+
+    let session_meta = serde_json::from_value::<SessionMeta>(first.clone()).map_err(|_| {
+        IoError::other(format!(
+            "rollout at {} does not start with session metadata",
+            path.display()
+        ))
+    })?;
+
+    if let Some(summary) =
+        extract_conversation_summary(path.to_path_buf(), &head, fallback_provider)
+    {
+        return Ok(summary);
+    }
+
+    let timestamp = if session_meta.timestamp.is_empty() {
+        None
+    } else {
+        Some(session_meta.timestamp.clone())
+    };
+    let model_provider = session_meta
+        .model_provider
+        .unwrap_or_else(|| fallback_provider.to_string());
+
+    Ok(ConversationSummary {
+        conversation_id: session_meta.id,
+        timestamp,
+        path: path.to_path_buf(),
+        preview: String::new(),
+        model_provider,
+    })
+}
+
 fn extract_conversation_summary(
    path: PathBuf,
    head: &[serde_json::Value],
+    fallback_provider: &str,
 ) -> Option<ConversationSummary> {
    let session_meta = match head.first() {
        Some(first_line) => serde_json::from_value::<SessionMeta>(first_line.clone()).ok()?,
@@ -1601,12 +1836,17 @@ fn extract_conversation_summary(
    } else {
        Some(session_meta.timestamp.clone())
    };
+    let conversation_id = session_meta.id;
+    let model_provider = session_meta
+        .model_provider
+        .unwrap_or_else(|| fallback_provider.to_string());

    Some(ConversationSummary {
-        conversation_id: session_meta.id,
+        conversation_id,
        timestamp,
        path,
        preview: preview.to_string(),
+        model_provider,
    })
 }

@@ -1616,6 +1856,7 @@ mod tests {
    use anyhow::Result;
    use pretty_assertions::assert_eq;
    use serde_json::json;
+    use tempfile::TempDir;

    #[test]
    fn extract_conversation_summary_prefers_plain_user_messages() -> Result<()> {
@@ -1630,7 +1871,8 @@ mod tests {
                "cwd": "/",
                "originator": "codex",
                "cli_version": "0.0.0",
-                "instructions": null
+                "instructions": null,
+                "model_provider": "test-provider"
            }),
            json!({
                "type": "message",
@@ -1650,15 +1892,62 @@ mod tests {
            }),
        ];

-        let summary = extract_conversation_summary(path.clone(), &head).expect("summary");
+        let summary =
+            extract_conversation_summary(path.clone(), &head, "test-provider").expect("summary");

-        assert_eq!(summary.conversation_id, conversation_id);
-        assert_eq!(
-            summary.timestamp,
-            Some("2025-09-05T16:53:11.850Z".to_string())
-        );
-        assert_eq!(summary.path, path);
-        assert_eq!(summary.preview, "Count to 5");
+        let expected = ConversationSummary {
+            conversation_id,
+            timestamp,
+            path,
+            preview: "Count to 5".to_string(),
+            model_provider: "test-provider".to_string(),
+        };
+
+        assert_eq!(summary, expected);
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn read_summary_from_rollout_returns_empty_preview_when_no_user_message() -> Result<()> {
+        use codex_protocol::protocol::RolloutItem;
+        use codex_protocol::protocol::RolloutLine;
+        use codex_protocol::protocol::SessionMetaLine;
+        use std::fs;
+
+        let temp_dir = TempDir::new()?;
+        let path = temp_dir.path().join("rollout.jsonl");
+
+        let conversation_id = ConversationId::from_string("bfd12a78-5900-467b-9bc5-d3d35df08191")?;
+        let timestamp = "2025-09-05T16:53:11.850Z".to_string();
+
+        let session_meta = SessionMeta {
+            id: conversation_id,
+            timestamp: timestamp.clone(),
+            model_provider: None,
+            ..SessionMeta::default()
+        };
+
+        let line = RolloutLine {
+            timestamp: timestamp.clone(),
+            item: RolloutItem::SessionMeta(SessionMetaLine {
+                meta: session_meta.clone(),
+                git: None,
+            }),
+        };
+
+        fs::write(&path, format!("{}\n", serde_json::to_string(&line)?))?;
+
+        let summary = read_summary_from_rollout(path.as_path(), "fallback").await?;
+
+        let expected = ConversationSummary {
+            conversation_id,
+            timestamp: Some(timestamp),
+            path: path.clone(),
+            preview: String::new(),
+            model_provider: "fallback".to_string(),
+        };
+
+        assert_eq!(summary, expected);
        Ok(())
    }
 }
--- a/codex-rs/app-server/src/fuzzy_file_search.rs
+++ b/codex-rs/app-server/src/fuzzy_file_search.rs
@@ -46,6 +46,7 @@ pub(crate) async fn run_fuzzy_file_search(
                threads,
                cancel_flag,
                COMPUTE_INDICES,
+                true,
            ) {
                Ok(res) => Ok((root, res)),
                Err(err) => Err((root, err)),
--- a/codex-rs/app-server/src/lib.rs
+++ b/codex-rs/app-server/src/lib.rs
@@ -12,16 +12,19 @@ use crate::message_processor::MessageProcessor;
 use crate::outgoing_message::OutgoingMessage;
 use crate::outgoing_message::OutgoingMessageSender;
 use codex_app_server_protocol::JSONRPCMessage;
+use codex_feedback::CodexFeedback;
 use tokio::io::AsyncBufReadExt;
 use tokio::io::AsyncWriteExt;
 use tokio::io::BufReader;
 use tokio::io::{self};
 use tokio::sync::mpsc;
+use tracing::Level;
 use tracing::debug;
 use tracing::error;
 use tracing::info;
 use tracing_subscriber::EnvFilter;
 use tracing_subscriber::Layer;
+use tracing_subscriber::filter::Targets;
 use tracing_subscriber::layer::SubscriberExt;
 use tracing_subscriber::util::SubscriberInitExt;

@@ -82,6 +85,8 @@ pub async fn run_main(
            std::io::Error::new(ErrorKind::InvalidData, format!("error loading config: {e}"))
        })?;

+    let feedback = CodexFeedback::new();
+
    let otel =
        codex_core::otel_init::build_provider(&config, env!("CARGO_PKG_VERSION")).map_err(|e| {
            std::io::Error::new(
@@ -96,8 +101,15 @@ pub async fn run_main(
        .with_writer(std::io::stderr)
        .with_filter(EnvFilter::from_default_env());

+    let feedback_layer = tracing_subscriber::fmt::layer()
+        .with_writer(feedback.make_writer())
+        .with_ansi(false)
+        .with_target(false)
+        .with_filter(Targets::new().with_default(Level::TRACE));
+
    let _ = tracing_subscriber::registry()
        .with(stderr_fmt)
+        .with(feedback_layer)
        .with(otel.as_ref().map(|provider| {
            OpenTelemetryTracingBridge::new(&provider.logger).with_filter(
                tracing_subscriber::filter::filter_fn(codex_core::otel_init::codex_export_filter),
@@ -112,6 +124,7 @@ pub async fn run_main(
            outgoing_message_sender,
            codex_linux_sandbox_exe,
            std::sync::Arc::new(config),
+            feedback.clone(),
        );
        async move {
            while let Some(msg) = incoming_rx.recv().await {
--- a/codex-rs/app-server/src/message_processor.rs
+++ b/codex-rs/app-server/src/message_processor.rs
@@ -17,6 +17,7 @@ use codex_core::ConversationManager;
 use codex_core::config::Config;
 use codex_core::default_client::USER_AGENT_SUFFIX;
 use codex_core::default_client::get_codex_user_agent;
+use codex_feedback::CodexFeedback;
 use codex_protocol::protocol::SessionSource;
 use std::sync::Arc;

@@ -33,9 +34,14 @@ impl MessageProcessor {
        outgoing: OutgoingMessageSender,
        codex_linux_sandbox_exe: Option<PathBuf>,
        config: Arc<Config>,
+        feedback: CodexFeedback,
    ) -> Self {
        let outgoing = Arc::new(outgoing);
-        let auth_manager = AuthManager::shared(config.codex_home.clone(), false);
+        let auth_manager = AuthManager::shared(
+            config.codex_home.clone(),
+            false,
+            config.cli_auth_credentials_store_mode,
+        );
        let conversation_manager = Arc::new(ConversationManager::new(
            auth_manager.clone(),
            SessionSource::VSCode,
@@ -46,6 +52,7 @@ impl MessageProcessor {
            outgoing.clone(),
            codex_linux_sandbox_exe,
            config,
+            feedback,
        );

        Self {
--- a/codex-rs/app-server/src/outgoing_message.rs
+++ b/codex-rs/app-server/src/outgoing_message.rs
@@ -142,6 +142,8 @@ pub(crate) struct OutgoingError {
 #[cfg(test)]
 mod tests {
    use codex_app_server_protocol::LoginChatGptCompleteNotification;
+    use codex_protocol::protocol::RateLimitSnapshot;
+    use codex_protocol::protocol::RateLimitWindow;
    use pretty_assertions::assert_eq;
    use serde_json::json;
    use uuid::Uuid;
@@ -171,4 +173,34 @@ mod tests {
            "ensure the strum macros serialize the method field correctly"
        );
    }
+
+    #[test]
+    fn verify_account_rate_limits_notification_serialization() {
+        let notification = ServerNotification::AccountRateLimitsUpdated(RateLimitSnapshot {
+            primary: Some(RateLimitWindow {
+                used_percent: 25.0,
+                window_minutes: Some(15),
+                resets_at: Some(123),
+            }),
+            secondary: None,
+        });
+
+        let jsonrpc_notification = OutgoingMessage::AppServerNotification(notification);
+        assert_eq!(
+            json!({
+                "method": "account/rateLimits/updated",
+                "params": {
+                    "primary": {
+                        "used_percent": 25.0,
+                        "window_minutes": 15,
+                        "resets_at": 123,
+                    },
+                    "secondary": null,
+                },
+            }),
+            serde_json::to_value(jsonrpc_notification)
+                .expect("ensure the notification serializes correctly"),
+            "ensure the notification serializes correctly"
+        );
+    }
 }
--- a/codex-rs/app-server/tests/common/auth_fixtures.rs
+++ b/codex-rs/app-server/tests/common/auth_fixtures.rs
@@ -6,9 +6,9 @@ use base64::Engine;
 use base64::engine::general_purpose::URL_SAFE_NO_PAD;
 use chrono::DateTime;
 use chrono::Utc;
+use codex_core::auth::AuthCredentialsStoreMode;
 use codex_core::auth::AuthDotJson;
-use codex_core::auth::get_auth_file;
-use codex_core::auth::write_auth_json;
+use codex_core::auth::save_auth;
 use codex_core::token_data::TokenData;
 use codex_core::token_data::parse_id_token;
 use serde_json::json;
@@ -109,7 +109,11 @@ pub fn encode_id_token(claims: &ChatGptIdTokenClaims) -> Result<String> {
    Ok(format!("{header_b64}.{payload_b64}.{signature_b64}"))
 }

-pub fn write_chatgpt_auth(codex_home: &Path, fixture: ChatGptAuthFixture) -> Result<()> {
+pub fn write_chatgpt_auth(
+    codex_home: &Path,
+    fixture: ChatGptAuthFixture,
+    cli_auth_credentials_store_mode: AuthCredentialsStoreMode,
+) -> Result<()> {
    let id_token_raw = encode_id_token(&fixture.claims)?;
    let id_token = parse_id_token(&id_token_raw).context("parse id token")?;
    let tokens = TokenData {
@@ -127,5 +131,5 @@ pub fn write_chatgpt_auth(codex_home: &Path, fixture: ChatGptAuthFixture) -> Res
        last_refresh,
    };

-    write_auth_json(&get_auth_file(codex_home), &auth).context("write auth.json")
+    save_auth(codex_home, &auth, cli_auth_credentials_store_mode).context("write auth.json")
 }
--- a/codex-rs/app-server/tests/common/mcp_process.rs
+++ b/codex-rs/app-server/tests/common/mcp_process.rs
@@ -30,6 +30,7 @@ use codex_app_server_protocol::SendUserMessageParams;
 use codex_app_server_protocol::SendUserTurnParams;
 use codex_app_server_protocol::ServerRequest;
 use codex_app_server_protocol::SetDefaultModelParams;
+use codex_app_server_protocol::UploadFeedbackParams;

 use codex_app_server_protocol::JSONRPCError;
 use codex_app_server_protocol::JSONRPCMessage;
@@ -242,6 +243,15 @@ impl McpProcess {
        self.send_request("account/rateLimits/read", None).await
    }

+    /// Send a `feedback/upload` JSON-RPC request.
+    pub async fn send_upload_feedback_request(
+        &mut self,
+        params: UploadFeedbackParams,
+    ) -> anyhow::Result<i64> {
+        let params = Some(serde_json::to_value(params)?);
+        self.send_request("feedback/upload", params).await
+    }
+
    /// Send a `userInfo` JSON-RPC request.
    pub async fn send_user_info_request(&mut self) -> anyhow::Result<i64> {
        self.send_request("userInfo", None).await
--- a/codex-rs/app-server/tests/suite/archive_conversation.rs
+++ b/codex-rs/app-server/tests/suite/archive_conversation.rs
@@ -1,5 +1,4 @@
-use std::path::Path;
-
+use anyhow::Result;
 use app_test_support::McpProcess;
 use app_test_support::to_response;
 use codex_app_server_protocol::ArchiveConversationParams;
@@ -9,45 +8,37 @@ use codex_app_server_protocol::NewConversationParams;
 use codex_app_server_protocol::NewConversationResponse;
 use codex_app_server_protocol::RequestId;
 use codex_core::ARCHIVED_SESSIONS_SUBDIR;
+use std::path::Path;
 use tempfile::TempDir;
 use tokio::time::timeout;

 const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(10);

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-async fn archive_conversation_moves_rollout_into_archived_directory() {
-    let codex_home = TempDir::new().expect("create temp dir");
-    create_config_toml(codex_home.path()).expect("write config.toml");
+async fn archive_conversation_moves_rollout_into_archived_directory() -> Result<()> {
+    let codex_home = TempDir::new()?;
+    create_config_toml(codex_home.path())?;

-    let mut mcp = McpProcess::new(codex_home.path())
-        .await
-        .expect("spawn mcp process");
-    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize())
-        .await
-        .expect("initialize timeout")
-        .expect("initialize request");
+    let mut mcp = McpProcess::new(codex_home.path()).await?;
+    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;

    let new_request_id = mcp
        .send_new_conversation_request(NewConversationParams {
            model: Some("mock-model".to_string()),
            ..Default::default()
        })
-        .await
-        .expect("send newConversation");
+        .await?;
    let new_response: JSONRPCResponse = timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_response_message(RequestId::Integer(new_request_id)),
    )
-    .await
-    .expect("newConversation timeout")
-    .expect("newConversation response");
+    .await??;

    let NewConversationResponse {
        conversation_id,
        rollout_path,
        ..
-    } = to_response::<NewConversationResponse>(new_response)
-        .expect("deserialize newConversation response");
+    } = to_response::<NewConversationResponse>(new_response)?;

    assert!(
        rollout_path.exists(),
@@ -60,19 +51,15 @@ async fn archive_conversation_moves_rollout_into_archived_directory() {
            conversation_id,
            rollout_path: rollout_path.clone(),
        })
-        .await
-        .expect("send archiveConversation");
+        .await?;
    let archive_response: JSONRPCResponse = timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_response_message(RequestId::Integer(archive_request_id)),
    )
-    .await
-    .expect("archiveConversation timeout")
-    .expect("archiveConversation response");
+    .await??;

    let _: ArchiveConversationResponse =
-        to_response::<ArchiveConversationResponse>(archive_response)
-            .expect("deserialize archiveConversation response");
+        to_response::<ArchiveConversationResponse>(archive_response)?;

    let archived_directory = codex_home.path().join(ARCHIVED_SESSIONS_SUBDIR);
    let archived_rollout_path =
@@ -90,6 +77,8 @@ async fn archive_conversation_moves_rollout_into_archived_directory() {
        "expected archived rollout path {} to exist",
        archived_rollout_path.display()
    );
+
+    Ok(())
 }

 fn create_config_toml(codex_home: &Path) -> std::io::Result<()> {
--- a/codex-rs/app-server/tests/suite/auth.rs
+++ b/codex-rs/app-server/tests/suite/auth.rs
@@ -1,5 +1,4 @@
-use std::path::Path;
-
+use anyhow::Result;
 use app_test_support::McpProcess;
 use app_test_support::to_response;
 use codex_app_server_protocol::AuthMode;
@@ -11,6 +10,7 @@ use codex_app_server_protocol::LoginApiKeyParams;
 use codex_app_server_protocol::LoginApiKeyResponse;
 use codex_app_server_protocol::RequestId;
 use pretty_assertions::assert_eq;
+use std::path::Path;
 use tempfile::TempDir;
 use tokio::time::timeout;

@@ -71,125 +71,99 @@ forced_login_method = "{forced_method}"
    std::fs::write(config_toml, contents)
 }

-async fn login_with_api_key_via_request(mcp: &mut McpProcess, api_key: &str) {
+async fn login_with_api_key_via_request(mcp: &mut McpProcess, api_key: &str) -> Result<()> {
    let request_id = mcp
        .send_login_api_key_request(LoginApiKeyParams {
            api_key: api_key.to_string(),
        })
-        .await
-        .unwrap_or_else(|e| panic!("send loginApiKey: {e}"));
+        .await?;

    let resp: JSONRPCResponse = timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_response_message(RequestId::Integer(request_id)),
    )
-    .await
-    .unwrap_or_else(|e| panic!("loginApiKey timeout: {e}"))
-    .unwrap_or_else(|e| panic!("loginApiKey response: {e}"));
-    let _: LoginApiKeyResponse =
-        to_response(resp).unwrap_or_else(|e| panic!("deserialize login response: {e}"));
+    .await??;
+    let _: LoginApiKeyResponse = to_response(resp)?;
+    Ok(())
 }

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-async fn get_auth_status_no_auth() {
-    let codex_home = TempDir::new().unwrap_or_else(|e| panic!("create tempdir: {e}"));
-    create_config_toml(codex_home.path()).unwrap_or_else(|err| panic!("write config.toml: {err}"));
+async fn get_auth_status_no_auth() -> Result<()> {
+    let codex_home = TempDir::new()?;
+    create_config_toml(codex_home.path())?;

-    let mut mcp = McpProcess::new_with_env(codex_home.path(), &[("OPENAI_API_KEY", None)])
-        .await
-        .expect("spawn mcp process");
-    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize())
-        .await
-        .expect("init timeout")
-        .expect("init failed");
+    let mut mcp = McpProcess::new_with_env(codex_home.path(), &[("OPENAI_API_KEY", None)]).await?;
+    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;

    let request_id = mcp
        .send_get_auth_status_request(GetAuthStatusParams {
            include_token: Some(true),
            refresh_token: Some(false),
        })
-        .await
-        .expect("send getAuthStatus");
+        .await?;

    let resp: JSONRPCResponse = timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_response_message(RequestId::Integer(request_id)),
    )
-    .await
-    .expect("getAuthStatus timeout")
-    .expect("getAuthStatus response");
-    let status: GetAuthStatusResponse = to_response(resp).expect("deserialize status");
+    .await??;
+    let status: GetAuthStatusResponse = to_response(resp)?;
    assert_eq!(status.auth_method, None, "expected no auth method");
    assert_eq!(status.auth_token, None, "expected no token");
+    Ok(())
 }

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-async fn get_auth_status_with_api_key() {
-    let codex_home = TempDir::new().unwrap_or_else(|e| panic!("create tempdir: {e}"));
-    create_config_toml(codex_home.path()).unwrap_or_else(|err| panic!("write config.toml: {err}"));
+async fn get_auth_status_with_api_key() -> Result<()> {
+    let codex_home = TempDir::new()?;
+    create_config_toml(codex_home.path())?;

-    let mut mcp = McpProcess::new(codex_home.path())
-        .await
-        .expect("spawn mcp process");
-    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize())
-        .await
-        .expect("init timeout")
-        .expect("init failed");
+    let mut mcp = McpProcess::new(codex_home.path()).await?;
+    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;

-    login_with_api_key_via_request(&mut mcp, "sk-test-key").await;
+    login_with_api_key_via_request(&mut mcp, "sk-test-key").await?;

    let request_id = mcp
        .send_get_auth_status_request(GetAuthStatusParams {
            include_token: Some(true),
            refresh_token: Some(false),
        })
-        .await
-        .expect("send getAuthStatus");
+        .await?;

    let resp: JSONRPCResponse = timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_response_message(RequestId::Integer(request_id)),
    )
-    .await
-    .expect("getAuthStatus timeout")
-    .expect("getAuthStatus response");
-    let status: GetAuthStatusResponse = to_response(resp).expect("deserialize status");
+    .await??;
+    let status: GetAuthStatusResponse = to_response(resp)?;
    assert_eq!(status.auth_method, Some(AuthMode::ApiKey));
    assert_eq!(status.auth_token, Some("sk-test-key".to_string()));
+    Ok(())
 }

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-async fn get_auth_status_with_api_key_when_auth_not_required() {
-    let codex_home = TempDir::new().unwrap_or_else(|e| panic!("create tempdir: {e}"));
-    create_config_toml_custom_provider(codex_home.path(), false)
-        .unwrap_or_else(|err| panic!("write config.toml: {err}"));
+async fn get_auth_status_with_api_key_when_auth_not_required() -> Result<()> {
+    let codex_home = TempDir::new()?;
+    create_config_toml_custom_provider(codex_home.path(), false)?;

-    let mut mcp = McpProcess::new(codex_home.path())
-        .await
-        .expect("spawn mcp process");
-    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize())
-        .await
-        .expect("init timeout")
-        .expect("init failed");
+    let mut mcp = McpProcess::new(codex_home.path()).await?;
+    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;

-    login_with_api_key_via_request(&mut mcp, "sk-test-key").await;
+    login_with_api_key_via_request(&mut mcp, "sk-test-key").await?;

    let request_id = mcp
        .send_get_auth_status_request(GetAuthStatusParams {
            include_token: Some(true),
            refresh_token: Some(false),
        })
-        .await
-        .expect("send getAuthStatus");
+        .await?;

    let resp: JSONRPCResponse = timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_response_message(RequestId::Integer(request_id)),
    )
-    .await
-    .expect("getAuthStatus timeout")
-    .expect("getAuthStatus response");
-    let status: GetAuthStatusResponse = to_response(resp).expect("deserialize status");
+    .await??;
+    let status: GetAuthStatusResponse = to_response(resp)?;
    assert_eq!(status.auth_method, None, "expected no auth method");
    assert_eq!(status.auth_token, None, "expected no token");
    assert_eq!(
@@ -197,76 +171,60 @@ async fn get_auth_status_with_api_key_when_auth_not_required() {
        Some(false),
        "requires_openai_auth should be false",
    );
+    Ok(())
 }

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-async fn get_auth_status_with_api_key_no_include_token() {
-    let codex_home = TempDir::new().unwrap_or_else(|e| panic!("create tempdir: {e}"));
-    create_config_toml(codex_home.path()).unwrap_or_else(|err| panic!("write config.toml: {err}"));
+async fn get_auth_status_with_api_key_no_include_token() -> Result<()> {
+    let codex_home = TempDir::new()?;
+    create_config_toml(codex_home.path())?;

-    let mut mcp = McpProcess::new(codex_home.path())
-        .await
-        .expect("spawn mcp process");
-    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize())
-        .await
-        .expect("init timeout")
-        .expect("init failed");
+    let mut mcp = McpProcess::new(codex_home.path()).await?;
+    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;

-    login_with_api_key_via_request(&mut mcp, "sk-test-key").await;
+    login_with_api_key_via_request(&mut mcp, "sk-test-key").await?;

    // Build params via struct so None field is omitted in wire JSON.
    let params = GetAuthStatusParams {
        include_token: None,
        refresh_token: Some(false),
    };
-    let request_id = mcp
-        .send_get_auth_status_request(params)
-        .await
-        .expect("send getAuthStatus");
+    let request_id = mcp.send_get_auth_status_request(params).await?;

    let resp: JSONRPCResponse = timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_response_message(RequestId::Integer(request_id)),
    )
-    .await
-    .expect("getAuthStatus timeout")
-    .expect("getAuthStatus response");
-    let status: GetAuthStatusResponse = to_response(resp).expect("deserialize status");
+    .await??;
+    let status: GetAuthStatusResponse = to_response(resp)?;
    assert_eq!(status.auth_method, Some(AuthMode::ApiKey));
    assert!(status.auth_token.is_none(), "token must be omitted");
+    Ok(())
 }

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-async fn login_api_key_rejected_when_forced_chatgpt() {
-    let codex_home = TempDir::new().unwrap_or_else(|e| panic!("create tempdir: {e}"));
-    create_config_toml_forced_login(codex_home.path(), "chatgpt")
-        .unwrap_or_else(|err| panic!("write config.toml: {err}"));
+async fn login_api_key_rejected_when_forced_chatgpt() -> Result<()> {
+    let codex_home = TempDir::new()?;
+    create_config_toml_forced_login(codex_home.path(), "chatgpt")?;

-    let mut mcp = McpProcess::new(codex_home.path())
-        .await
-        .expect("spawn mcp process");
-    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize())
-        .await
-        .expect("init timeout")
-        .expect("init failed");
+    let mut mcp = McpProcess::new(codex_home.path()).await?;
+    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;

    let request_id = mcp
        .send_login_api_key_request(LoginApiKeyParams {
            api_key: "sk-test-key".to_string(),
        })
-        .await
-        .expect("send loginApiKey");
+        .await?;

    let err: JSONRPCError = timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_error_message(RequestId::Integer(request_id)),
    )
-    .await
-    .expect("loginApiKey error timeout")
-    .expect("loginApiKey error");
+    .await??;

    assert_eq!(
        err.error.message,
        "API key login is disabled. Use ChatGPT login instead."
    );
+    Ok(())
 }
--- a/codex-rs/app-server/tests/suite/codex_message_processor_flow.rs
+++ b/codex-rs/app-server/tests/suite/codex_message_processor_flow.rs
@@ -1,5 +1,4 @@
-use std::path::Path;
-
+use anyhow::Result;
 use app_test_support::McpProcess;
 use app_test_support::create_final_assistant_message_sse_response;
 use app_test_support::create_mock_chat_completions_server;
@@ -32,26 +31,27 @@ use codex_protocol::protocol::Event;
 use codex_protocol::protocol::EventMsg;
 use pretty_assertions::assert_eq;
 use std::env;
+use std::path::Path;
 use tempfile::TempDir;
 use tokio::time::timeout;

 const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(10);

 #[tokio::test(flavor = "multi_thread", worker_threads = 4)]
-async fn test_codex_jsonrpc_conversation_flow() {
+async fn test_codex_jsonrpc_conversation_flow() -> Result<()> {
    if env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
        println!(
            "Skipping test because it cannot execute when network is disabled in a Codex sandbox."
        );
-        return;
+        return Ok(());
    }

-    let tmp = TempDir::new().expect("tmp dir");
+    let tmp = TempDir::new()?;
    // Temporary Codex home with config pointing at the mock server.
    let codex_home = tmp.path().join("codex_home");
-    std::fs::create_dir(&codex_home).expect("create codex home dir");
+    std::fs::create_dir(&codex_home)?;
    let working_directory = tmp.path().join("workdir");
-    std::fs::create_dir(&working_directory).expect("create working directory");
+    std::fs::create_dir(&working_directory)?;

    // Create a mock model server that immediately ends each turn.
    // Two turns are expected: initial session configure + one user message.
@@ -61,20 +61,15 @@ async fn test_codex_jsonrpc_conversation_flow() {
            Some(&working_directory),
            Some(5000),
            "call1234",
-        )
-        .expect("create shell sse response"),
-        create_final_assistant_message_sse_response("Enjoy your new git repo!")
-            .expect("create final assistant message"),
+        )?,
+        create_final_assistant_message_sse_response("Enjoy your new git repo!")?,
    ];
    let server = create_mock_chat_completions_server(responses).await;
-    create_config_toml(&codex_home, &server.uri()).expect("write config");
+    create_config_toml(&codex_home, &server.uri())?;

    // Start MCP server and initialize.
-    let mut mcp = McpProcess::new(&codex_home).await.expect("spawn mcp");
-    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize())
-        .await
-        .expect("init timeout")
-        .expect("init error");
+    let mut mcp = McpProcess::new(&codex_home).await?;
+    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;

    // 1) newConversation
    let new_conv_id = mcp
@@ -82,17 +77,13 @@ async fn test_codex_jsonrpc_conversation_flow() {
            cwd: Some(working_directory.to_string_lossy().into_owned()),
            ..Default::default()
        })
-        .await
-        .expect("send newConversation");
+        .await?;
    let new_conv_resp: JSONRPCResponse = timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_response_message(RequestId::Integer(new_conv_id)),
    )
-    .await
-    .expect("newConversation timeout")
-    .expect("newConversation resp");
-    let new_conv_resp = to_response::<NewConversationResponse>(new_conv_resp)
-        .expect("deserialize newConversation response");
+    .await??;
+    let new_conv_resp = to_response::<NewConversationResponse>(new_conv_resp)?;
    let NewConversationResponse {
        conversation_id,
        model,
@@ -103,19 +94,18 @@ async fn test_codex_jsonrpc_conversation_flow() {

    // 2) addConversationListener
    let add_listener_id = mcp
-        .send_add_conversation_listener_request(AddConversationListenerParams { conversation_id })
-        .await
-        .expect("send addConversationListener");
+        .send_add_conversation_listener_request(AddConversationListenerParams {
+            conversation_id,
+            experimental_raw_events: false,
+        })
+        .await?;
    let add_listener_resp: JSONRPCResponse = timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_response_message(RequestId::Integer(add_listener_id)),
    )
-    .await
-    .expect("addConversationListener timeout")
-    .expect("addConversationListener resp");
+    .await??;
    let AddConversationSubscriptionResponse { subscription_id } =
-        to_response::<AddConversationSubscriptionResponse>(add_listener_resp)
-            .expect("deserialize addConversationListener response");
+        to_response::<AddConversationSubscriptionResponse>(add_listener_resp)?;

    // 3) sendUserMessage (should trigger notifications; we only validate an OK response)
    let send_user_id = mcp
@@ -125,17 +115,13 @@ async fn test_codex_jsonrpc_conversation_flow() {
                text: "text".to_string(),
            }],
        })
-        .await
-        .expect("send sendUserMessage");
+        .await?;
    let send_user_resp: JSONRPCResponse = timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_response_message(RequestId::Integer(send_user_id)),
    )
-    .await
-    .expect("sendUserMessage timeout")
-    .expect("sendUserMessage resp");
-    let SendUserMessageResponse {} = to_response::<SendUserMessageResponse>(send_user_resp)
-        .expect("deserialize sendUserMessage response");
+    .await??;
+    let SendUserMessageResponse {} = to_response::<SendUserMessageResponse>(send_user_resp)?;

    // Verify the task_finished notification is received.
    // Note this also ensures that the final request to the server was made.
@@ -143,9 +129,7 @@ async fn test_codex_jsonrpc_conversation_flow() {
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_notification_message("codex/event/task_complete"),
    )
-    .await
-    .expect("task_finished_notification timeout")
-    .expect("task_finished_notification resp");
+    .await??;
    let serde_json::Value::Object(map) = task_finished_notification
        .params
        .expect("notification should have params")
@@ -163,33 +147,31 @@ async fn test_codex_jsonrpc_conversation_flow() {
        .send_remove_conversation_listener_request(RemoveConversationListenerParams {
            subscription_id,
        })
-        .await
-        .expect("send removeConversationListener");
+        .await?;
    let remove_listener_resp: JSONRPCResponse = timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_response_message(RequestId::Integer(remove_listener_id)),
    )
-    .await
-    .expect("removeConversationListener timeout")
-    .expect("removeConversationListener resp");
-    let RemoveConversationSubscriptionResponse {} =
-        to_response(remove_listener_resp).expect("deserialize removeConversationListener response");
+    .await??;
+    let RemoveConversationSubscriptionResponse {} = to_response(remove_listener_resp)?;
+
+    Ok(())
 }

 #[tokio::test(flavor = "multi_thread", worker_threads = 4)]
-async fn test_send_user_turn_changes_approval_policy_behavior() {
+async fn test_send_user_turn_changes_approval_policy_behavior() -> Result<()> {
    if env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
        println!(
            "Skipping test because it cannot execute when network is disabled in a Codex sandbox."
        );
-        return;
+        return Ok(());
    }

-    let tmp = TempDir::new().expect("tmp dir");
+    let tmp = TempDir::new()?;
    let codex_home = tmp.path().join("codex_home");
-    std::fs::create_dir(&codex_home).expect("create codex home dir");
+    std::fs::create_dir(&codex_home)?;
    let working_directory = tmp.path().join("workdir");
-    std::fs::create_dir(&working_directory).expect("create working directory");
+    std::fs::create_dir(&working_directory)?;

    // Mock server will request a python shell call for the first and second turn, then finish.
    let responses = vec![
@@ -202,10 +184,8 @@ async fn test_send_user_turn_changes_approval_policy_behavior() {
            Some(&working_directory),
            Some(5000),
            "call1",
-        )
-        .expect("create first shell sse response"),
-        create_final_assistant_message_sse_response("done 1")
-            .expect("create final assistant message 1"),
+        )?,
+        create_final_assistant_message_sse_response("done 1")?,
        create_shell_sse_response(
            vec![
                "python3".to_string(),
@@ -215,20 +195,15 @@ async fn test_send_user_turn_changes_approval_policy_behavior() {
            Some(&working_directory),
            Some(5000),
            "call2",
-        )
-        .expect("create second shell sse response"),
-        create_final_assistant_message_sse_response("done 2")
-            .expect("create final assistant message 2"),
+        )?,
+        create_final_assistant_message_sse_response("done 2")?,
    ];
    let server = create_mock_chat_completions_server(responses).await;
-    create_config_toml(&codex_home, &server.uri()).expect("write config");
+    create_config_toml(&codex_home, &server.uri())?;

    // Start MCP server and initialize.
-    let mut mcp = McpProcess::new(&codex_home).await.expect("spawn mcp");
-    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize())
-        .await
-        .expect("init timeout")
-        .expect("init error");
+    let mut mcp = McpProcess::new(&codex_home).await?;
+    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;

    // 1) Start conversation with approval_policy=untrusted
    let new_conv_id = mcp
@@ -236,36 +211,30 @@ async fn test_send_user_turn_changes_approval_policy_behavior() {
            cwd: Some(working_directory.to_string_lossy().into_owned()),
            ..Default::default()
        })
-        .await
-        .expect("send newConversation");
+        .await?;
    let new_conv_resp: JSONRPCResponse = timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_response_message(RequestId::Integer(new_conv_id)),
    )
-    .await
-    .expect("newConversation timeout")
-    .expect("newConversation resp");
+    .await??;
    let NewConversationResponse {
        conversation_id, ..
-    } = to_response::<NewConversationResponse>(new_conv_resp)
-        .expect("deserialize newConversation response");
+    } = to_response::<NewConversationResponse>(new_conv_resp)?;

    // 2) addConversationListener
    let add_listener_id = mcp
-        .send_add_conversation_listener_request(AddConversationListenerParams { conversation_id })
-        .await
-        .expect("send addConversationListener");
-    let _: AddConversationSubscriptionResponse =
-        to_response::<AddConversationSubscriptionResponse>(
-            timeout(
-                DEFAULT_READ_TIMEOUT,
-                mcp.read_stream_until_response_message(RequestId::Integer(add_listener_id)),
-            )
-            .await
-            .expect("addConversationListener timeout")
-            .expect("addConversationListener resp"),
+        .send_add_conversation_listener_request(AddConversationListenerParams {
+            conversation_id,
+            experimental_raw_events: false,
+        })
+        .await?;
+    let _: AddConversationSubscriptionResponse = to_response::<AddConversationSubscriptionResponse>(
+        timeout(
+            DEFAULT_READ_TIMEOUT,
+            mcp.read_stream_until_response_message(RequestId::Integer(add_listener_id)),
        )
-        .expect("deserialize addConversationListener response");
+        .await??,
+    )?;

    // 3) sendUserMessage triggers a shell call; approval policy is Untrusted so we should get an elicitation
    let send_user_id = mcp
@@ -275,27 +244,21 @@ async fn test_send_user_turn_changes_approval_policy_behavior() {
                text: "run python".to_string(),
            }],
        })
-        .await
-        .expect("send sendUserMessage");
+        .await?;
    let _send_user_resp: SendUserMessageResponse = to_response::<SendUserMessageResponse>(
        timeout(
            DEFAULT_READ_TIMEOUT,
            mcp.read_stream_until_response_message(RequestId::Integer(send_user_id)),
        )
-        .await
-        .expect("sendUserMessage timeout")
-        .expect("sendUserMessage resp"),
-    )
-    .expect("deserialize sendUserMessage response");
+        .await??,
+    )?;

    // Expect an ExecCommandApproval request (elicitation)
    let request = timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_request_message(),
    )
-    .await
-    .expect("waiting for exec approval request timeout")
-    .expect("exec approval request");
+    .await??;
    let ServerRequest::ExecCommandApproval { request_id, params } = request else {
        panic!("expected ExecCommandApproval request, got: {request:?}");
    };
@@ -311,6 +274,7 @@ async fn test_send_user_turn_changes_approval_policy_behavior() {
            ],
            cwd: working_directory.clone(),
            reason: None,
+            risk: None,
            parsed_cmd: vec![ParsedCommand::Unknown {
                cmd: "python3 -c 'print(42)'".to_string()
            }],
@@ -323,17 +287,14 @@ async fn test_send_user_turn_changes_approval_policy_behavior() {
        request_id,
        serde_json::json!({ "decision": codex_core::protocol::ReviewDecision::Approved }),
    )
-    .await
-    .expect("send approval response");
+    .await?;

    // Wait for first TaskComplete
    let _ = timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_notification_message("codex/event/task_complete"),
    )
-    .await
-    .expect("task_complete 1 timeout")
-    .expect("task_complete 1 notification");
+    .await??;

    // 4) sendUserTurn with approval_policy=never should run without elicitation
    let send_turn_id = mcp
@@ -349,19 +310,15 @@ async fn test_send_user_turn_changes_approval_policy_behavior() {
            effort: Some(ReasoningEffort::Medium),
            summary: ReasoningSummary::Auto,
        })
-        .await
-        .expect("send sendUserTurn");
+        .await?;
    // Acknowledge sendUserTurn
    let _send_turn_resp: SendUserTurnResponse = to_response::<SendUserTurnResponse>(
        timeout(
            DEFAULT_READ_TIMEOUT,
            mcp.read_stream_until_response_message(RequestId::Integer(send_turn_id)),
        )
-        .await
-        .expect("sendUserTurn timeout")
-        .expect("sendUserTurn resp"),
-    )
-    .expect("deserialize sendUserTurn response");
+        .await??,
+    )?;

    // Ensure we do NOT receive an ExecCommandApproval request before the task completes.
    // If any Request is seen while waiting for task_complete, the helper will error and the test fails.
@@ -369,31 +326,31 @@ async fn test_send_user_turn_changes_approval_policy_behavior() {
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_notification_message("codex/event/task_complete"),
    )
-    .await
-    .expect("task_complete 2 timeout")
-    .expect("task_complete 2 notification");
+    .await??;
+
+    Ok(())
 }

 // Helper: minimal config.toml pointing at mock provider.

 #[tokio::test(flavor = "multi_thread", worker_threads = 4)]
-async fn test_send_user_turn_updates_sandbox_and_cwd_between_turns() {
+async fn test_send_user_turn_updates_sandbox_and_cwd_between_turns() -> Result<()> {
    if env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
        println!(
            "Skipping test because it cannot execute when network is disabled in a Codex sandbox."
        );
-        return;
+        return Ok(());
    }

-    let tmp = TempDir::new().expect("tmp dir");
+    let tmp = TempDir::new()?;
    let codex_home = tmp.path().join("codex_home");
-    std::fs::create_dir(&codex_home).expect("create codex home dir");
+    std::fs::create_dir(&codex_home)?;
    let workspace_root = tmp.path().join("workspace");
-    std::fs::create_dir(&workspace_root).expect("create workspace root");
+    std::fs::create_dir(&workspace_root)?;
    let first_cwd = workspace_root.join("turn1");
    let second_cwd = workspace_root.join("turn2");
-    std::fs::create_dir(&first_cwd).expect("create first cwd");
-    std::fs::create_dir(&second_cwd).expect("create second cwd");
+    std::fs::create_dir(&first_cwd)?;
+    std::fs::create_dir(&second_cwd)?;

    let responses = vec![
        create_shell_sse_response(
@@ -405,10 +362,8 @@ async fn test_send_user_turn_updates_sandbox_and_cwd_between_turns() {
            None,
            Some(5000),
            "call-first",
-        )
-        .expect("create first shell response"),
-        create_final_assistant_message_sse_response("done first")
-            .expect("create first final assistant message"),
+        )?,
+        create_final_assistant_message_sse_response("done first")?,
        create_shell_sse_response(
            vec![
                "bash".to_string(),
@@ -418,21 +373,14 @@ async fn test_send_user_turn_updates_sandbox_and_cwd_between_turns() {
            None,
            Some(5000),
            "call-second",
-        )
-        .expect("create second shell response"),
-        create_final_assistant_message_sse_response("done second")
-            .expect("create second final assistant message"),
+        )?,
+        create_final_assistant_message_sse_response("done second")?,
    ];
    let server = create_mock_chat_completions_server(responses).await;
-    create_config_toml(&codex_home, &server.uri()).expect("write config");
+    create_config_toml(&codex_home, &server.uri())?;

-    let mut mcp = McpProcess::new(&codex_home)
-        .await
-        .expect("spawn mcp process");
-    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize())
-        .await
-        .expect("init timeout")
-        .expect("init failed");
+    let mut mcp = McpProcess::new(&codex_home).await?;
+    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;

    let new_conv_id = mcp
        .send_new_conversation_request(NewConversationParams {
@@ -441,33 +389,29 @@ async fn test_send_user_turn_updates_sandbox_and_cwd_between_turns() {
            sandbox: Some(SandboxMode::WorkspaceWrite),
            ..Default::default()
        })
-        .await
-        .expect("send newConversation");
+        .await?;
    let new_conv_resp: JSONRPCResponse = timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_response_message(RequestId::Integer(new_conv_id)),
    )
-    .await
-    .expect("newConversation timeout")
-    .expect("newConversation resp");
+    .await??;
    let NewConversationResponse {
        conversation_id,
        model,
        ..
-    } = to_response::<NewConversationResponse>(new_conv_resp)
-        .expect("deserialize newConversation response");
+    } = to_response::<NewConversationResponse>(new_conv_resp)?;

    let add_listener_id = mcp
-        .send_add_conversation_listener_request(AddConversationListenerParams { conversation_id })
-        .await
-        .expect("send addConversationListener");
+        .send_add_conversation_listener_request(AddConversationListenerParams {
+            conversation_id,
+            experimental_raw_events: false,
+        })
+        .await?;
    timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_response_message(RequestId::Integer(add_listener_id)),
    )
-    .await
-    .expect("addConversationListener timeout")
-    .expect("addConversationListener resp");
+    .await??;

    let first_turn_id = mcp
        .send_send_user_turn_request(SendUserTurnParams {
@@ -487,22 +431,17 @@ async fn test_send_user_turn_updates_sandbox_and_cwd_between_turns() {
            effort: Some(ReasoningEffort::Medium),
            summary: ReasoningSummary::Auto,
        })
-        .await
-        .expect("send first sendUserTurn");
+        .await?;
    timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_response_message(RequestId::Integer(first_turn_id)),
    )
-    .await
-    .expect("sendUserTurn 1 timeout")
-    .expect("sendUserTurn 1 resp");
+    .await??;
    timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_notification_message("codex/event/task_complete"),
    )
-    .await
-    .expect("task_complete 1 timeout")
-    .expect("task_complete 1 notification");
+    .await??;

    let second_turn_id = mcp
        .send_send_user_turn_request(SendUserTurnParams {
@@ -517,23 +456,18 @@ async fn test_send_user_turn_updates_sandbox_and_cwd_between_turns() {
            effort: Some(ReasoningEffort::Medium),
            summary: ReasoningSummary::Auto,
        })
-        .await
-        .expect("send second sendUserTurn");
+        .await?;
    timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_response_message(RequestId::Integer(second_turn_id)),
    )
-    .await
-    .expect("sendUserTurn 2 timeout")
-    .expect("sendUserTurn 2 resp");
+    .await??;

    let exec_begin_notification = timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_notification_message("codex/event/exec_command_begin"),
    )
-    .await
-    .expect("exec_command_begin timeout")
-    .expect("exec_command_begin notification");
+    .await??;
    let params = exec_begin_notification
        .params
        .clone()
@@ -561,9 +495,9 @@ async fn test_send_user_turn_updates_sandbox_and_cwd_between_turns() {
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_notification_message("codex/event/task_complete"),
    )
-    .await
-    .expect("task_complete 2 timeout")
-    .expect("task_complete 2 notification");
+    .await??;
+
+    Ok(())
 }

 fn create_config_toml(codex_home: &Path, server_uri: &str) -> std::io::Result<()> {
--- a/codex-rs/app-server/tests/suite/config.rs
+++ b/codex-rs/app-server/tests/suite/config.rs
@@ -1,6 +1,4 @@
-use std::collections::HashMap;
-use std::path::Path;
-
+use anyhow::Result;
 use app_test_support::McpProcess;
 use app_test_support::to_response;
 use codex_app_server_protocol::GetUserSavedConfigResponse;
@@ -17,6 +15,8 @@ use codex_protocol::config_types::ReasoningSummary;
 use codex_protocol::config_types::SandboxMode;
 use codex_protocol::config_types::Verbosity;
 use pretty_assertions::assert_eq;
+use std::collections::HashMap;
+use std::path::Path;
 use tempfile::TempDir;
 use tokio::time::timeout;

@@ -60,31 +60,21 @@ chatgpt_base_url = "https://api.chatgpt.com"
 }

 #[tokio::test(flavor = "multi_thread", worker_threads = 4)]
-async fn get_config_toml_parses_all_fields() {
-    let codex_home = TempDir::new().unwrap_or_else(|e| panic!("create tempdir: {e}"));
-    create_config_toml(codex_home.path()).expect("write config.toml");
+async fn get_config_toml_parses_all_fields() -> Result<()> {
+    let codex_home = TempDir::new()?;
+    create_config_toml(codex_home.path())?;

-    let mut mcp = McpProcess::new(codex_home.path())
-        .await
-        .expect("spawn mcp process");
-    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize())
-        .await
-        .expect("init timeout")
-        .expect("init failed");
+    let mut mcp = McpProcess::new(codex_home.path()).await?;
+    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;

-    let request_id = mcp
-        .send_get_user_saved_config_request()
-        .await
-        .expect("send getUserSavedConfig");
+    let request_id = mcp.send_get_user_saved_config_request().await?;
    let resp: JSONRPCResponse = timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_response_message(RequestId::Integer(request_id)),
    )
-    .await
-    .expect("getUserSavedConfig timeout")
-    .expect("getUserSavedConfig response");
+    .await??;

-    let config: GetUserSavedConfigResponse = to_response(resp).expect("deserialize config");
+    let config: GetUserSavedConfigResponse = to_response(resp)?;
    let expected = GetUserSavedConfigResponse {
        config: UserSavedConfig {
            approval_policy: Some(AskForApproval::OnRequest),
@@ -122,33 +112,24 @@ async fn get_config_toml_parses_all_fields() {
    };

    assert_eq!(config, expected);
+    Ok(())
 }

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-async fn get_config_toml_empty() {
-    let codex_home = TempDir::new().unwrap_or_else(|e| panic!("create tempdir: {e}"));
+async fn get_config_toml_empty() -> Result<()> {
+    let codex_home = TempDir::new()?;

-    let mut mcp = McpProcess::new(codex_home.path())
-        .await
-        .expect("spawn mcp process");
-    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize())
-        .await
-        .expect("init timeout")
-        .expect("init failed");
+    let mut mcp = McpProcess::new(codex_home.path()).await?;
+    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;

-    let request_id = mcp
-        .send_get_user_saved_config_request()
-        .await
-        .expect("send getUserSavedConfig");
+    let request_id = mcp.send_get_user_saved_config_request().await?;
    let resp: JSONRPCResponse = timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_response_message(RequestId::Integer(request_id)),
    )
-    .await
-    .expect("getUserSavedConfig timeout")
-    .expect("getUserSavedConfig response");
+    .await??;

-    let config: GetUserSavedConfigResponse = to_response(resp).expect("deserialize config");
+    let config: GetUserSavedConfigResponse = to_response(resp)?;
    let expected = GetUserSavedConfigResponse {
        config: UserSavedConfig {
            approval_policy: None,
@@ -167,4 +148,5 @@ async fn get_config_toml_empty() {
    };

    assert_eq!(config, expected);
+    Ok(())
 }
--- a/codex-rs/app-server/tests/suite/create_conversation.rs
+++ b/codex-rs/app-server/tests/suite/create_conversation.rs
@@ -1,5 +1,4 @@
-use std::path::Path;
-
+use anyhow::Result;
 use app_test_support::McpProcess;
 use app_test_support::create_final_assistant_message_sse_response;
 use app_test_support::create_mock_chat_completions_server;
@@ -15,31 +14,25 @@ use codex_app_server_protocol::SendUserMessageParams;
 use codex_app_server_protocol::SendUserMessageResponse;
 use pretty_assertions::assert_eq;
 use serde_json::json;
+use std::path::Path;
 use tempfile::TempDir;
 use tokio::time::timeout;

 const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(10);

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-async fn test_conversation_create_and_send_message_ok() {
+async fn test_conversation_create_and_send_message_ok() -> Result<()> {
    // Mock server – we won't strictly rely on it, but provide one to satisfy any model wiring.
-    let responses = vec![
-        create_final_assistant_message_sse_response("Done").expect("build mock assistant message"),
-    ];
+    let responses = vec![create_final_assistant_message_sse_response("Done")?];
    let server = create_mock_chat_completions_server(responses).await;

    // Temporary Codex home with config pointing at the mock server.
-    let codex_home = TempDir::new().expect("create temp dir");
-    create_config_toml(codex_home.path(), &server.uri()).expect("write config.toml");
+    let codex_home = TempDir::new()?;
+    create_config_toml(codex_home.path(), &server.uri())?;

    // Start MCP server process and initialize.
-    let mut mcp = McpProcess::new(codex_home.path())
-        .await
-        .expect("spawn mcp process");
-    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize())
-        .await
-        .expect("init timeout")
-        .expect("init failed");
+    let mut mcp = McpProcess::new(codex_home.path()).await?;
+    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;

    // Create a conversation via the new JSON-RPC API.
    let new_conv_id = mcp
@@ -47,40 +40,35 @@ async fn test_conversation_create_and_send_message_ok() {
            model: Some("o3".to_string()),
            ..Default::default()
        })
-        .await
-        .expect("send newConversation");
+        .await?;
    let new_conv_resp: JSONRPCResponse = timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_response_message(RequestId::Integer(new_conv_id)),
    )
-    .await
-    .expect("newConversation timeout")
-    .expect("newConversation resp");
+    .await??;
    let NewConversationResponse {
        conversation_id,
        model,
        reasoning_effort: _,
        rollout_path: _,
-    } = to_response::<NewConversationResponse>(new_conv_resp)
-        .expect("deserialize newConversation response");
+    } = to_response::<NewConversationResponse>(new_conv_resp)?;
    assert_eq!(model, "o3");

    // Add a listener so we receive notifications for this conversation (not strictly required for this test).
    let add_listener_id = mcp
-        .send_add_conversation_listener_request(AddConversationListenerParams { conversation_id })
-        .await
-        .expect("send addConversationListener");
+        .send_add_conversation_listener_request(AddConversationListenerParams {
+            conversation_id,
+            experimental_raw_events: false,
+        })
+        .await?;
    let _sub: AddConversationSubscriptionResponse =
        to_response::<AddConversationSubscriptionResponse>(
            timeout(
                DEFAULT_READ_TIMEOUT,
                mcp.read_stream_until_response_message(RequestId::Integer(add_listener_id)),
            )
-            .await
-            .expect("addConversationListener timeout")
-            .expect("addConversationListener resp"),
-        )
-        .expect("deserialize addConversationListener response");
+            .await??,
+        )?;

    // Now send a user message via the wire API and expect an OK (empty object) result.
    let send_id = mcp
@@ -90,36 +78,32 @@ async fn test_conversation_create_and_send_message_ok() {
                text: "Hello".to_string(),
            }],
        })
-        .await
-        .expect("send sendUserMessage");
+        .await?;
    let send_resp: JSONRPCResponse = timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_response_message(RequestId::Integer(send_id)),
    )
-    .await
-    .expect("sendUserMessage timeout")
-    .expect("sendUserMessage resp");
-    let _ok: SendUserMessageResponse = to_response::<SendUserMessageResponse>(send_resp)
-        .expect("deserialize sendUserMessage response");
+    .await??;
+    let _ok: SendUserMessageResponse = to_response::<SendUserMessageResponse>(send_resp)?;

    // avoid race condition by waiting for the mock server to receive the chat.completions request
    let deadline = std::time::Instant::now() + DEFAULT_READ_TIMEOUT;
-    loop {
+    let requests = loop {
        let requests = server.received_requests().await.unwrap_or_default();
        if !requests.is_empty() {
-            break;
+            break requests;
        }
        if std::time::Instant::now() >= deadline {
            panic!("mock server did not receive the chat.completions request in time");
        }
        tokio::time::sleep(std::time::Duration::from_millis(10)).await;
-    }
+    };

    // Verify the outbound request body matches expectations for Chat Completions.
-    let request = &server.received_requests().await.unwrap()[0];
-    let body = request
-        .body_json::<serde_json::Value>()
-        .expect("parse request body as JSON");
+    let request = requests
+        .first()
+        .expect("mock server should have received at least one request");
+    let body = request.body_json::<serde_json::Value>()?;
    assert_eq!(body["model"], json!("o3"));
    assert!(body["stream"].as_bool().unwrap_or(false));
    let messages = body["messages"]
@@ -130,6 +114,7 @@ async fn test_conversation_create_and_send_message_ok() {
    assert_eq!(last["content"], json!("Hello"));

    drop(server);
+    Ok(())
 }

 // Helper to create a config.toml pointing at the mock model server.
--- a/codex-rs/app-server/tests/suite/fuzzy_file_search.rs
+++ b/codex-rs/app-server/tests/suite/fuzzy_file_search.rs
@@ -1,5 +1,5 @@
-use anyhow::Context;
 use anyhow::Result;
+use anyhow::anyhow;
 use app_test_support::McpProcess;
 use codex_app_server_protocol::JSONRPCResponse;
 use codex_app_server_protocol::RequestId;
@@ -13,48 +13,39 @@ const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn test_fuzzy_file_search_sorts_and_includes_indices() -> Result<()> {
    // Prepare a temporary Codex home and a separate root with test files.
-    let codex_home = TempDir::new().context("create temp codex home")?;
-    let root = TempDir::new().context("create temp search root")?;
+    let codex_home = TempDir::new()?;
+    let root = TempDir::new()?;

    // Create files designed to have deterministic ordering for query "abe".
-    std::fs::write(root.path().join("abc"), "x").context("write file abc")?;
-    std::fs::write(root.path().join("abcde"), "x").context("write file abcde")?;
-    std::fs::write(root.path().join("abexy"), "x").context("write file abexy")?;
-    std::fs::write(root.path().join("zzz.txt"), "x").context("write file zzz")?;
+    std::fs::write(root.path().join("abc"), "x")?;
+    std::fs::write(root.path().join("abcde"), "x")?;
+    std::fs::write(root.path().join("abexy"), "x")?;
+    std::fs::write(root.path().join("zzz.txt"), "x")?;
    let sub_dir = root.path().join("sub");
-    std::fs::create_dir_all(&sub_dir).context("create sub dir")?;
+    std::fs::create_dir_all(&sub_dir)?;
    let sub_abce_path = sub_dir.join("abce");
-    std::fs::write(&sub_abce_path, "x").context("write file sub/abce")?;
+    std::fs::write(&sub_abce_path, "x")?;
    let sub_abce_rel = sub_abce_path
-        .strip_prefix(root.path())
-        .context("strip root prefix from sub/abce")?
+        .strip_prefix(root.path())?
        .to_string_lossy()
        .to_string();

    // Start MCP server and initialize.
-    let mut mcp = McpProcess::new(codex_home.path())
-        .await
-        .context("spawn mcp")?;
-    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize())
-        .await
-        .context("init timeout")?
-        .context("init failed")?;
+    let mut mcp = McpProcess::new(codex_home.path()).await?;
+    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;

    let root_path = root.path().to_string_lossy().to_string();
    // Send fuzzyFileSearch request.
    let request_id = mcp
        .send_fuzzy_file_search_request("abe", vec![root_path.clone()], None)
-        .await
-        .context("send fuzzyFileSearch")?;
+        .await?;

    // Read response and verify shape and ordering.
    let resp: JSONRPCResponse = timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_response_message(RequestId::Integer(request_id)),
    )
-    .await
-    .context("fuzzyFileSearch timeout")?
-    .context("fuzzyFileSearch resp")?;
+    .await??;

    let value = resp.result;
    // The path separator on Windows affects the score.
@@ -94,24 +85,18 @@ async fn test_fuzzy_file_search_sorts_and_includes_indices() -> Result<()> {

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn test_fuzzy_file_search_accepts_cancellation_token() -> Result<()> {
-    let codex_home = TempDir::new().context("create temp codex home")?;
-    let root = TempDir::new().context("create temp search root")?;
+    let codex_home = TempDir::new()?;
+    let root = TempDir::new()?;

-    std::fs::write(root.path().join("alpha.txt"), "contents").context("write alpha")?;
+    std::fs::write(root.path().join("alpha.txt"), "contents")?;

-    let mut mcp = McpProcess::new(codex_home.path())
-        .await
-        .context("spawn mcp")?;
-    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize())
-        .await
-        .context("init timeout")?
-        .context("init failed")?;
+    let mut mcp = McpProcess::new(codex_home.path()).await?;
+    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;

    let root_path = root.path().to_string_lossy().to_string();
    let request_id = mcp
        .send_fuzzy_file_search_request("alp", vec![root_path.clone()], None)
-        .await
-        .context("send fuzzyFileSearch")?;
+        .await?;

    let request_id_2 = mcp
        .send_fuzzy_file_search_request(
@@ -119,23 +104,20 @@ async fn test_fuzzy_file_search_accepts_cancellation_token() -> Result<()> {
            vec![root_path.clone()],
            Some(request_id.to_string()),
        )
-        .await
-        .context("send fuzzyFileSearch")?;
+        .await?;

    let resp: JSONRPCResponse = timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_response_message(RequestId::Integer(request_id_2)),
    )
-    .await
-    .context("fuzzyFileSearch timeout")?
-    .context("fuzzyFileSearch resp")?;
+    .await??;

    let files = resp
        .result
        .get("files")
-        .context("files key missing")?
+        .ok_or_else(|| anyhow!("files key missing"))?
        .as_array()
-        .context("files not array")?
+        .ok_or_else(|| anyhow!("files not array"))?
        .clone();

    assert_eq!(files.len(), 1);
--- a/codex-rs/app-server/tests/suite/interrupt.rs
+++ b/codex-rs/app-server/tests/suite/interrupt.rs
@@ -88,7 +88,10 @@ async fn shell_command_interruption() -> anyhow::Result<()> {

    // 2) addConversationListener
    let add_listener_id = mcp
-        .send_add_conversation_listener_request(AddConversationListenerParams { conversation_id })
+        .send_add_conversation_listener_request(AddConversationListenerParams {
+            conversation_id,
+            experimental_raw_events: false,
+        })
        .await?;
    let _add_listener_resp: JSONRPCResponse = timeout(
        DEFAULT_READ_TIMEOUT,
--- a/codex-rs/app-server/tests/suite/list_resume.rs
+++ b/codex-rs/app-server/tests/suite/list_resume.rs
@@ -1,6 +1,4 @@
-use std::fs;
-use std::path::Path;
-
+use anyhow::Result;
 use app_test_support::McpProcess;
 use app_test_support::to_response;
 use codex_app_server_protocol::JSONRPCNotification;
@@ -15,6 +13,8 @@ use codex_app_server_protocol::ServerNotification;
 use codex_app_server_protocol::SessionConfiguredNotification;
 use pretty_assertions::assert_eq;
 use serde_json::json;
+use std::fs;
+use std::path::Path;
 use tempfile::TempDir;
 use tokio::time::timeout;
 use uuid::Uuid;
@@ -22,58 +22,56 @@ use uuid::Uuid;
 const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(10);

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-async fn test_list_and_resume_conversations() {
+async fn test_list_and_resume_conversations() -> Result<()> {
    // Prepare a temporary CODEX_HOME with a few fake rollout files.
-    let codex_home = TempDir::new().expect("create temp dir");
+    let codex_home = TempDir::new()?;
    create_fake_rollout(
        codex_home.path(),
        "2025-01-02T12-00-00",
        "2025-01-02T12:00:00Z",
        "Hello A",
-    );
+        Some("openai"),
+    )?;
    create_fake_rollout(
        codex_home.path(),
        "2025-01-01T13-00-00",
        "2025-01-01T13:00:00Z",
        "Hello B",
-    );
+        Some("openai"),
+    )?;
    create_fake_rollout(
        codex_home.path(),
        "2025-01-01T12-00-00",
        "2025-01-01T12:00:00Z",
        "Hello C",
-    );
+        None,
+    )?;

-    let mut mcp = McpProcess::new(codex_home.path())
-        .await
-        .expect("spawn mcp process");
-    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize())
-        .await
-        .expect("init timeout")
-        .expect("init failed");
+    let mut mcp = McpProcess::new(codex_home.path()).await?;
+    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;

    // Request first page with size 2
    let req_id = mcp
        .send_list_conversations_request(ListConversationsParams {
            page_size: Some(2),
            cursor: None,
+            model_providers: None,
        })
-        .await
-        .expect("send listConversations");
+        .await?;
    let resp: JSONRPCResponse = timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_response_message(RequestId::Integer(req_id)),
    )
-    .await
-    .expect("listConversations timeout")
-    .expect("listConversations resp");
+    .await??;
    let ListConversationsResponse { items, next_cursor } =
-        to_response::<ListConversationsResponse>(resp).expect("deserialize response");
+        to_response::<ListConversationsResponse>(resp)?;

    assert_eq!(items.len(), 2);
    // Newest first; preview text should match
    assert_eq!(items[0].preview, "Hello A");
    assert_eq!(items[1].preview, "Hello B");
+    assert_eq!(items[0].model_provider, "openai");
+    assert_eq!(items[1].model_provider, "openai");
    assert!(items[0].path.is_absolute());
    assert!(next_cursor.is_some());

@@ -82,24 +80,93 @@ async fn test_list_and_resume_conversations() {
        .send_list_conversations_request(ListConversationsParams {
            page_size: Some(2),
            cursor: next_cursor,
+            model_providers: None,
        })
-        .await
-        .expect("send listConversations page 2");
+        .await?;
    let resp2: JSONRPCResponse = timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_response_message(RequestId::Integer(req_id2)),
    )
-    .await
-    .expect("listConversations page 2 timeout")
-    .expect("listConversations page 2 resp");
+    .await??;
    let ListConversationsResponse {
        items: items2,
        next_cursor: next2,
        ..
-    } = to_response::<ListConversationsResponse>(resp2).expect("deserialize response");
+    } = to_response::<ListConversationsResponse>(resp2)?;
    assert_eq!(items2.len(), 1);
    assert_eq!(items2[0].preview, "Hello C");
-    assert!(next2.is_some());
+    assert_eq!(items2[0].model_provider, "openai");
+    assert_eq!(next2, None);
+
+    // Add a conversation with an explicit non-OpenAI provider for filter tests.
+    create_fake_rollout(
+        codex_home.path(),
+        "2025-01-01T11-30-00",
+        "2025-01-01T11:30:00Z",
+        "Hello TP",
+        Some("test-provider"),
+    )?;
+
+    // Filtering by model provider should return only matching sessions.
+    let filter_req_id = mcp
+        .send_list_conversations_request(ListConversationsParams {
+            page_size: Some(10),
+            cursor: None,
+            model_providers: Some(vec!["test-provider".to_string()]),
+        })
+        .await?;
+    let filter_resp: JSONRPCResponse = timeout(
+        DEFAULT_READ_TIMEOUT,
+        mcp.read_stream_until_response_message(RequestId::Integer(filter_req_id)),
+    )
+    .await??;
+    let ListConversationsResponse {
+        items: filtered_items,
+        next_cursor: filtered_next,
+    } = to_response::<ListConversationsResponse>(filter_resp)?;
+    assert_eq!(filtered_items.len(), 1);
+    assert_eq!(filtered_next, None);
+    assert_eq!(filtered_items[0].preview, "Hello TP");
+    assert_eq!(filtered_items[0].model_provider, "test-provider");
+
+    // Empty filter should include every session regardless of provider metadata.
+    let unfiltered_req_id = mcp
+        .send_list_conversations_request(ListConversationsParams {
+            page_size: Some(10),
+            cursor: None,
+            model_providers: Some(Vec::new()),
+        })
+        .await?;
+    let unfiltered_resp: JSONRPCResponse = timeout(
+        DEFAULT_READ_TIMEOUT,
+        mcp.read_stream_until_response_message(RequestId::Integer(unfiltered_req_id)),
+    )
+    .await??;
+    let ListConversationsResponse {
+        items: unfiltered_items,
+        next_cursor: unfiltered_next,
+    } = to_response::<ListConversationsResponse>(unfiltered_resp)?;
+    assert_eq!(unfiltered_items.len(), 4);
+    assert!(unfiltered_next.is_none());
+
+    let empty_req_id = mcp
+        .send_list_conversations_request(ListConversationsParams {
+            page_size: Some(10),
+            cursor: None,
+            model_providers: Some(vec!["other".to_string()]),
+        })
+        .await?;
+    let empty_resp: JSONRPCResponse = timeout(
+        DEFAULT_READ_TIMEOUT,
+        mcp.read_stream_until_response_message(RequestId::Integer(empty_req_id)),
+    )
+    .await??;
+    let ListConversationsResponse {
+        items: empty_items,
+        next_cursor: empty_next,
+    } = to_response::<ListConversationsResponse>(empty_resp)?;
+    assert!(empty_items.is_empty());
+    assert!(empty_next.is_none());

    // Now resume one of the sessions and expect a SessionConfigured notification and response.
    let resume_req_id = mcp
@@ -110,20 +177,15 @@ async fn test_list_and_resume_conversations() {
                ..Default::default()
            }),
        })
-        .await
-        .expect("send resumeConversation");
+        .await?;

    // Expect a codex/event notification with msg.type == sessionConfigured
    let notification: JSONRPCNotification = timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_notification_message("sessionConfigured"),
    )
-    .await
-    .expect("sessionConfigured notification timeout")
-    .expect("sessionConfigured notification");
-    let session_configured: ServerNotification = notification
-        .try_into()
-        .expect("deserialize sessionConfigured notification");
+    .await??;
+    let session_configured: ServerNotification = notification.try_into()?;
    // Basic shape assertion: ensure event type is sessionConfigured
    let ServerNotification::SessionConfigured(SessionConfiguredNotification {
        model,
@@ -141,41 +203,50 @@ async fn test_list_and_resume_conversations() {
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_response_message(RequestId::Integer(resume_req_id)),
    )
-    .await
-    .expect("resumeConversation timeout")
-    .expect("resumeConversation resp");
+    .await??;
    let ResumeConversationResponse {
        conversation_id, ..
-    } = to_response::<ResumeConversationResponse>(resume_resp)
-        .expect("deserialize resumeConversation response");
+    } = to_response::<ResumeConversationResponse>(resume_resp)?;
    // conversation id should be a valid UUID
    assert!(!conversation_id.to_string().is_empty());
+
+    Ok(())
 }

-fn create_fake_rollout(codex_home: &Path, filename_ts: &str, meta_rfc3339: &str, preview: &str) {
+fn create_fake_rollout(
+    codex_home: &Path,
+    filename_ts: &str,
+    meta_rfc3339: &str,
+    preview: &str,
+    model_provider: Option<&str>,
+) -> Result<()> {
    let uuid = Uuid::new_v4();
    // sessions/YYYY/MM/DD/ derived from filename_ts (YYYY-MM-DDThh-mm-ss)
    let year = &filename_ts[0..4];
    let month = &filename_ts[5..7];
    let day = &filename_ts[8..10];
    let dir = codex_home.join("sessions").join(year).join(month).join(day);
-    fs::create_dir_all(&dir).unwrap_or_else(|e| panic!("create sessions dir: {e}"));
+    fs::create_dir_all(&dir)?;

    let file_path = dir.join(format!("rollout-{filename_ts}-{uuid}.jsonl"));
    let mut lines = Vec::new();
    // Meta line with timestamp (flattened meta in payload for new schema)
+    let mut payload = json!({
+        "id": uuid,
+        "timestamp": meta_rfc3339,
+        "cwd": "/",
+        "originator": "codex",
+        "cli_version": "0.0.0",
+        "instructions": null,
+    });
+    if let Some(provider) = model_provider {
+        payload["model_provider"] = json!(provider);
+    }
    lines.push(
        json!({
            "timestamp": meta_rfc3339,
            "type": "session_meta",
-            "payload": {
-                "id": uuid,
-                "timestamp": meta_rfc3339,
-                "cwd": "/",
-                "originator": "codex",
-                "cli_version": "0.0.0",
-                "instructions": null
-            }
+            "payload": payload
        })
        .to_string(),
    );
@@ -205,6 +276,6 @@ fn create_fake_rollout(codex_home: &Path, filename_ts: &str, meta_rfc3339: &str,
        })
        .to_string(),
    );
-    fs::write(file_path, lines.join("\n") + "\n")
-        .unwrap_or_else(|e| panic!("write rollout file: {e}"));
+    fs::write(file_path, lines.join("\n") + "\n")?;
+    Ok(())
 }
--- a/codex-rs/app-server/tests/suite/login.rs
+++ b/codex-rs/app-server/tests/suite/login.rs
@@ -1,6 +1,4 @@
-use std::path::Path;
-use std::time::Duration;
-
+use anyhow::Result;
 use app_test_support::McpProcess;
 use app_test_support::to_response;
 use codex_app_server_protocol::CancelLoginChatGptParams;
@@ -12,7 +10,11 @@ use codex_app_server_protocol::JSONRPCResponse;
 use codex_app_server_protocol::LoginChatGptResponse;
 use codex_app_server_protocol::LogoutChatGptResponse;
 use codex_app_server_protocol::RequestId;
+use codex_core::auth::AuthCredentialsStoreMode;
 use codex_login::login_with_api_key;
+use serial_test::serial;
+use std::path::Path;
+use std::time::Duration;
 use tempfile::TempDir;
 use tokio::time::timeout;

@@ -41,32 +43,26 @@ stream_max_retries = 0
 }

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-async fn logout_chatgpt_removes_auth() {
-    let codex_home = TempDir::new().unwrap_or_else(|e| panic!("create tempdir: {e}"));
-    create_config_toml(codex_home.path()).expect("write config.toml");
-    login_with_api_key(codex_home.path(), "sk-test-key").expect("seed api key");
+async fn logout_chatgpt_removes_auth() -> Result<()> {
+    let codex_home = TempDir::new()?;
+    create_config_toml(codex_home.path())?;
+    login_with_api_key(
+        codex_home.path(),
+        "sk-test-key",
+        AuthCredentialsStoreMode::File,
+    )?;
    assert!(codex_home.path().join("auth.json").exists());

-    let mut mcp = McpProcess::new_with_env(codex_home.path(), &[("OPENAI_API_KEY", None)])
-        .await
-        .expect("spawn mcp process");
-    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize())
-        .await
-        .expect("init timeout")
-        .expect("init failed");
+    let mut mcp = McpProcess::new_with_env(codex_home.path(), &[("OPENAI_API_KEY", None)]).await?;
+    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;

-    let id = mcp
-        .send_logout_chat_gpt_request()
-        .await
-        .expect("send logoutChatGpt");
+    let id = mcp.send_logout_chat_gpt_request().await?;
    let resp: JSONRPCResponse = timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_response_message(RequestId::Integer(id)),
    )
-    .await
-    .expect("logoutChatGpt timeout")
-    .expect("logoutChatGpt response");
-    let _ok: LogoutChatGptResponse = to_response(resp).expect("deserialize logout response");
+    .await??;
+    let _ok: LogoutChatGptResponse = to_response(resp)?;

    assert!(
        !codex_home.path().join("auth.json").exists(),
@@ -79,61 +75,47 @@ async fn logout_chatgpt_removes_auth() {
            include_token: Some(true),
            refresh_token: Some(false),
        })
-        .await
-        .expect("send getAuthStatus");
+        .await?;
    let status_resp: JSONRPCResponse = timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_response_message(RequestId::Integer(status_id)),
    )
-    .await
-    .expect("getAuthStatus timeout")
-    .expect("getAuthStatus response");
-    let status: GetAuthStatusResponse = to_response(status_resp).expect("deserialize status");
+    .await??;
+    let status: GetAuthStatusResponse = to_response(status_resp)?;
    assert_eq!(status.auth_method, None);
    assert_eq!(status.auth_token, None);
+    Ok(())
 }

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-async fn login_and_cancel_chatgpt() {
-    let codex_home = TempDir::new().unwrap_or_else(|e| panic!("create tempdir: {e}"));
-    create_config_toml(codex_home.path()).unwrap_or_else(|err| panic!("write config.toml: {err}"));
+// Serialize tests that launch the login server since it binds to a fixed port.
+#[serial(login_port)]
+async fn login_and_cancel_chatgpt() -> Result<()> {
+    let codex_home = TempDir::new()?;
+    create_config_toml(codex_home.path())?;

-    let mut mcp = McpProcess::new(codex_home.path())
-        .await
-        .expect("spawn mcp process");
-    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize())
-        .await
-        .expect("init timeout")
-        .expect("init failed");
+    let mut mcp = McpProcess::new(codex_home.path()).await?;
+    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;

-    let login_id = mcp
-        .send_login_chat_gpt_request()
-        .await
-        .expect("send loginChatGpt");
+    let login_id = mcp.send_login_chat_gpt_request().await?;
    let login_resp: JSONRPCResponse = timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_response_message(RequestId::Integer(login_id)),
    )
-    .await
-    .expect("loginChatGpt timeout")
-    .expect("loginChatGpt response");
-    let login: LoginChatGptResponse = to_response(login_resp).expect("deserialize login resp");
+    .await??;
+    let login: LoginChatGptResponse = to_response(login_resp)?;

    let cancel_id = mcp
        .send_cancel_login_chat_gpt_request(CancelLoginChatGptParams {
            login_id: login.login_id,
        })
-        .await
-        .expect("send cancelLoginChatGpt");
+        .await?;
    let cancel_resp: JSONRPCResponse = timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_response_message(RequestId::Integer(cancel_id)),
    )
-    .await
-    .expect("cancelLoginChatGpt timeout")
-    .expect("cancelLoginChatGpt response");
-    let _ok: CancelLoginChatGptResponse =
-        to_response(cancel_resp).expect("deserialize cancel response");
+    .await??;
+    let _ok: CancelLoginChatGptResponse = to_response(cancel_resp)?;

    // Optionally observe the completion notification; do not fail if it races.
    let maybe_note = timeout(
@@ -144,6 +126,7 @@ async fn login_and_cancel_chatgpt() {
    if maybe_note.is_err() {
        eprintln!("warning: did not observe login_chat_gpt_complete notification after cancel");
    }
+    Ok(())
 }

 fn create_config_toml_forced_login(codex_home: &Path, forced_method: &str) -> std::io::Result<()> {
@@ -176,66 +159,48 @@ forced_chatgpt_workspace_id = "{workspace_id}"
 }

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-async fn login_chatgpt_rejected_when_forced_api() {
-    let codex_home = TempDir::new().unwrap_or_else(|e| panic!("create tempdir: {e}"));
-    create_config_toml_forced_login(codex_home.path(), "api")
-        .unwrap_or_else(|err| panic!("write config.toml: {err}"));
+async fn login_chatgpt_rejected_when_forced_api() -> Result<()> {
+    let codex_home = TempDir::new()?;
+    create_config_toml_forced_login(codex_home.path(), "api")?;

-    let mut mcp = McpProcess::new(codex_home.path())
-        .await
-        .expect("spawn mcp process");
-    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize())
-        .await
-        .expect("init timeout")
-        .expect("init failed");
+    let mut mcp = McpProcess::new(codex_home.path()).await?;
+    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;

-    let request_id = mcp
-        .send_login_chat_gpt_request()
-        .await
-        .expect("send loginChatGpt");
+    let request_id = mcp.send_login_chat_gpt_request().await?;
    let err: JSONRPCError = timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_error_message(RequestId::Integer(request_id)),
    )
-    .await
-    .expect("loginChatGpt error timeout")
-    .expect("loginChatGpt error");
+    .await??;

    assert_eq!(
        err.error.message,
        "ChatGPT login is disabled. Use API key login instead."
    );
+    Ok(())
 }

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-async fn login_chatgpt_includes_forced_workspace_query_param() {
-    let codex_home = TempDir::new().unwrap_or_else(|e| panic!("create tempdir: {e}"));
-    create_config_toml_forced_workspace(codex_home.path(), "ws-forced")
-        .unwrap_or_else(|err| panic!("write config.toml: {err}"));
+// Serialize tests that launch the login server since it binds to a fixed port.
+#[serial(login_port)]
+async fn login_chatgpt_includes_forced_workspace_query_param() -> Result<()> {
+    let codex_home = TempDir::new()?;
+    create_config_toml_forced_workspace(codex_home.path(), "ws-forced")?;

-    let mut mcp = McpProcess::new(codex_home.path())
-        .await
-        .expect("spawn mcp process");
-    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize())
-        .await
-        .expect("init timeout")
-        .expect("init failed");
+    let mut mcp = McpProcess::new(codex_home.path()).await?;
+    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;

-    let request_id = mcp
-        .send_login_chat_gpt_request()
-        .await
-        .expect("send loginChatGpt");
+    let request_id = mcp.send_login_chat_gpt_request().await?;
    let resp: JSONRPCResponse = timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_response_message(RequestId::Integer(request_id)),
    )
-    .await
-    .expect("loginChatGpt timeout")
-    .expect("loginChatGpt response");
+    .await??;

-    let login: LoginChatGptResponse = to_response(resp).expect("deserialize login resp");
+    let login: LoginChatGptResponse = to_response(resp)?;
    assert!(
        login.auth_url.contains("allowed_workspace_id=ws-forced"),
        "auth URL should include forced workspace"
    );
+    Ok(())
 }
--- a/codex-rs/app-server/tests/suite/rate_limits.rs
+++ b/codex-rs/app-server/tests/suite/rate_limits.rs
@@ -1,4 +1,3 @@
-use anyhow::Context;
 use anyhow::Result;
 use app_test_support::ChatGptAuthFixture;
 use app_test_support::McpProcess;
@@ -9,6 +8,7 @@ use codex_app_server_protocol::JSONRPCError;
 use codex_app_server_protocol::JSONRPCResponse;
 use codex_app_server_protocol::LoginApiKeyParams;
 use codex_app_server_protocol::RequestId;
+use codex_core::auth::AuthCredentialsStoreMode;
 use codex_protocol::protocol::RateLimitSnapshot;
 use codex_protocol::protocol::RateLimitWindow;
 use pretty_assertions::assert_eq;
@@ -28,28 +28,18 @@ const INVALID_REQUEST_ERROR_CODE: i64 = -32600;

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn get_account_rate_limits_requires_auth() -> Result<()> {
-    let codex_home = TempDir::new().context("create codex home tempdir")?;
+    let codex_home = TempDir::new()?;

-    let mut mcp = McpProcess::new_with_env(codex_home.path(), &[("OPENAI_API_KEY", None)])
-        .await
-        .context("spawn mcp process")?;
-    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize())
-        .await
-        .context("initialize timeout")?
-        .context("initialize request")?;
+    let mut mcp = McpProcess::new_with_env(codex_home.path(), &[("OPENAI_API_KEY", None)]).await?;
+    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;

-    let request_id = mcp
-        .send_get_account_rate_limits_request()
-        .await
-        .context("send account/rateLimits/read")?;
+    let request_id = mcp.send_get_account_rate_limits_request().await?;

    let error: JSONRPCError = timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_error_message(RequestId::Integer(request_id)),
    )
-    .await
-    .context("account/rateLimits/read timeout")?
-    .context("account/rateLimits/read error")?;
+    .await??;

    assert_eq!(error.id, RequestId::Integer(request_id));
    assert_eq!(error.error.code, INVALID_REQUEST_ERROR_CODE);
@@ -63,30 +53,20 @@ async fn get_account_rate_limits_requires_auth() -> Result<()> {

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn get_account_rate_limits_requires_chatgpt_auth() -> Result<()> {
-    let codex_home = TempDir::new().context("create codex home tempdir")?;
+    let codex_home = TempDir::new()?;

-    let mut mcp = McpProcess::new(codex_home.path())
-        .await
-        .context("spawn mcp process")?;
-    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize())
-        .await
-        .context("initialize timeout")?
-        .context("initialize request")?;
+    let mut mcp = McpProcess::new(codex_home.path()).await?;
+    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;

    login_with_api_key(&mut mcp, "sk-test-key").await?;

-    let request_id = mcp
-        .send_get_account_rate_limits_request()
-        .await
-        .context("send account/rateLimits/read")?;
+    let request_id = mcp.send_get_account_rate_limits_request().await?;

    let error: JSONRPCError = timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_error_message(RequestId::Integer(request_id)),
    )
-    .await
-    .context("account/rateLimits/read timeout")?
-    .context("account/rateLimits/read error")?;
+    .await??;

    assert_eq!(error.id, RequestId::Integer(request_id));
    assert_eq!(error.error.code, INVALID_REQUEST_ERROR_CODE);
@@ -100,18 +80,18 @@ async fn get_account_rate_limits_requires_chatgpt_auth() -> Result<()> {

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn get_account_rate_limits_returns_snapshot() -> Result<()> {
-    let codex_home = TempDir::new().context("create codex home tempdir")?;
+    let codex_home = TempDir::new()?;
    write_chatgpt_auth(
        codex_home.path(),
        ChatGptAuthFixture::new("chatgpt-token")
            .account_id("account-123")
            .plan_type("pro"),
-    )
-    .context("write chatgpt auth")?;
+        AuthCredentialsStoreMode::File,
+    )?;

    let server = MockServer::start().await;
    let server_url = server.uri();
-    write_chatgpt_base_url(codex_home.path(), &server_url).context("write chatgpt base url")?;
+    write_chatgpt_base_url(codex_home.path(), &server_url)?;

    let primary_reset_timestamp = chrono::DateTime::parse_from_rfc3339("2025-01-01T00:02:00Z")
        .expect("parse primary reset timestamp")
@@ -147,29 +127,18 @@ async fn get_account_rate_limits_returns_snapshot() -> Result<()> {
        .mount(&server)
        .await;

-    let mut mcp = McpProcess::new_with_env(codex_home.path(), &[("OPENAI_API_KEY", None)])
-        .await
-        .context("spawn mcp process")?;
-    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize())
-        .await
-        .context("initialize timeout")?
-        .context("initialize request")?;
+    let mut mcp = McpProcess::new_with_env(codex_home.path(), &[("OPENAI_API_KEY", None)]).await?;
+    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;

-    let request_id = mcp
-        .send_get_account_rate_limits_request()
-        .await
-        .context("send account/rateLimits/read")?;
+    let request_id = mcp.send_get_account_rate_limits_request().await?;

    let response: JSONRPCResponse = timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_response_message(RequestId::Integer(request_id)),
    )
-    .await
-    .context("account/rateLimits/read timeout")?
-    .context("account/rateLimits/read response")?;
+    .await??;

-    let received: GetAccountRateLimitsResponse =
-        to_response(response).context("deserialize rate limit response")?;
+    let received: GetAccountRateLimitsResponse = to_response(response)?;

    let expected = GetAccountRateLimitsResponse {
        rate_limits: RateLimitSnapshot {
@@ -195,16 +164,13 @@ async fn login_with_api_key(mcp: &mut McpProcess, api_key: &str) -> Result<()> {
        .send_login_api_key_request(LoginApiKeyParams {
            api_key: api_key.to_string(),
        })
-        .await
-        .context("send loginApiKey")?;
+        .await?;

    timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_response_message(RequestId::Integer(request_id)),
    )
-    .await
-    .context("loginApiKey timeout")?
-    .context("loginApiKey response")?;
+    .await??;

    Ok(())
 }
--- a/codex-rs/app-server/tests/suite/send_message.rs
+++ b/codex-rs/app-server/tests/suite/send_message.rs
@@ -1,5 +1,4 @@
-use std::path::Path;
-
+use anyhow::Result;
 use app_test_support::McpProcess;
 use app_test_support::create_final_assistant_message_sse_response;
 use app_test_support::create_mock_chat_completions_server;
@@ -15,73 +14,73 @@ use codex_app_server_protocol::RequestId;
 use codex_app_server_protocol::SendUserMessageParams;
 use codex_app_server_protocol::SendUserMessageResponse;
 use codex_protocol::ConversationId;
+use codex_protocol::models::ContentItem;
+use codex_protocol::models::ResponseItem;
 use pretty_assertions::assert_eq;
+use std::path::Path;
 use tempfile::TempDir;
 use tokio::time::timeout;

 const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(10);

 #[tokio::test]
-async fn test_send_message_success() {
+async fn test_send_message_success() -> Result<()> {
    // Spin up a mock completions server that immediately ends the Codex turn.
    // Two Codex turns hit the mock model (session start + send-user-message). Provide two SSE responses.
    let responses = vec![
-        create_final_assistant_message_sse_response("Done").expect("build mock assistant message"),
-        create_final_assistant_message_sse_response("Done").expect("build mock assistant message"),
+        create_final_assistant_message_sse_response("Done")?,
+        create_final_assistant_message_sse_response("Done")?,
    ];
    let server = create_mock_chat_completions_server(responses).await;

    // Create a temporary Codex home with config pointing at the mock server.
-    let codex_home = TempDir::new().expect("create temp dir");
-    create_config_toml(codex_home.path(), &server.uri()).expect("write config.toml");
+    let codex_home = TempDir::new()?;
+    create_config_toml(codex_home.path(), &server.uri())?;

    // Start MCP server process and initialize.
-    let mut mcp = McpProcess::new(codex_home.path())
-        .await
-        .expect("spawn mcp process");
-    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize())
-        .await
-        .expect("init timed out")
-        .expect("init failed");
+    let mut mcp = McpProcess::new(codex_home.path()).await?;
+    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;

    // Start a conversation using the new wire API.
    let new_conv_id = mcp
        .send_new_conversation_request(NewConversationParams::default())
-        .await
-        .expect("send newConversation");
+        .await?;
    let new_conv_resp: JSONRPCResponse = timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_response_message(RequestId::Integer(new_conv_id)),
    )
-    .await
-    .expect("newConversation timeout")
-    .expect("newConversation resp");
+    .await??;
    let NewConversationResponse {
        conversation_id, ..
-    } = to_response::<_>(new_conv_resp).expect("deserialize newConversation response");
+    } = to_response::<_>(new_conv_resp)?;

    // 2) addConversationListener
    let add_listener_id = mcp
-        .send_add_conversation_listener_request(AddConversationListenerParams { conversation_id })
-        .await
-        .expect("send addConversationListener");
+        .send_add_conversation_listener_request(AddConversationListenerParams {
+            conversation_id,
+            experimental_raw_events: false,
+        })
+        .await?;
    let add_listener_resp: JSONRPCResponse = timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_response_message(RequestId::Integer(add_listener_id)),
    )
-    .await
-    .expect("addConversationListener timeout")
-    .expect("addConversationListener resp");
+    .await??;
    let AddConversationSubscriptionResponse { subscription_id: _ } =
-        to_response::<_>(add_listener_resp).expect("deserialize addConversationListener response");
+        to_response::<_>(add_listener_resp)?;

    // Now exercise sendUserMessage twice.
-    send_message("Hello", conversation_id, &mut mcp).await;
-    send_message("Hello again", conversation_id, &mut mcp).await;
+    send_message("Hello", conversation_id, &mut mcp).await?;
+    send_message("Hello again", conversation_id, &mut mcp).await?;
+    Ok(())
 }

 #[expect(clippy::expect_used)]
-async fn send_message(message: &str, conversation_id: ConversationId, mcp: &mut McpProcess) {
+async fn send_message(
+    message: &str,
+    conversation_id: ConversationId,
+    mcp: &mut McpProcess,
+) -> Result<()> {
    // Now exercise sendUserMessage.
    let send_id = mcp
        .send_send_user_message_request(SendUserMessageParams {
@@ -90,19 +89,15 @@ async fn send_message(message: &str, conversation_id: ConversationId, mcp: &mut
                text: message.to_string(),
            }],
        })
-        .await
-        .expect("send sendUserMessage");
+        .await?;

    let response: JSONRPCResponse = timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_response_message(RequestId::Integer(send_id)),
    )
-    .await
-    .expect("sendUserMessage response timeout")
-    .expect("sendUserMessage response error");
+    .await??;

-    let _ok: SendUserMessageResponse = to_response::<SendUserMessageResponse>(response)
-        .expect("deserialize sendUserMessage response");
+    let _ok: SendUserMessageResponse = to_response::<SendUserMessageResponse>(response)?;

    // Verify the task_finished notification is received.
    // Note this also ensures that the final request to the server was made.
@@ -110,9 +105,7 @@ async fn send_message(message: &str, conversation_id: ConversationId, mcp: &mut
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_notification_message("codex/event/task_complete"),
    )
-    .await
-    .expect("task_finished_notification timeout")
-    .expect("task_finished_notification resp");
+    .await??;
    let serde_json::Value::Object(map) = task_finished_notification
        .params
        .expect("notification should have params")
@@ -124,17 +117,99 @@ async fn send_message(message: &str, conversation_id: ConversationId, mcp: &mut
            .expect("should have conversationId"),
        &serde_json::Value::String(conversation_id.to_string())
    );
+
+    let raw_attempt = tokio::time::timeout(
+        std::time::Duration::from_millis(200),
+        mcp.read_stream_until_notification_message("codex/event/raw_response_item"),
+    )
+    .await;
+    assert!(
+        raw_attempt.is_err(),
+        "unexpected raw item notification when not opted in"
+    );
+    Ok(())
 }

 #[tokio::test]
-async fn test_send_message_session_not_found() {
+async fn test_send_message_raw_notifications_opt_in() -> Result<()> {
+    let responses = vec![create_final_assistant_message_sse_response("Done")?];
+    let server = create_mock_chat_completions_server(responses).await;
+
+    let codex_home = TempDir::new()?;
+    create_config_toml(codex_home.path(), &server.uri())?;
+
+    let mut mcp = McpProcess::new(codex_home.path()).await?;
+    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;
+
+    let new_conv_id = mcp
+        .send_new_conversation_request(NewConversationParams::default())
+        .await?;
+    let new_conv_resp: JSONRPCResponse = timeout(
+        DEFAULT_READ_TIMEOUT,
+        mcp.read_stream_until_response_message(RequestId::Integer(new_conv_id)),
+    )
+    .await??;
+    let NewConversationResponse {
+        conversation_id, ..
+    } = to_response::<_>(new_conv_resp)?;
+
+    let add_listener_id = mcp
+        .send_add_conversation_listener_request(AddConversationListenerParams {
+            conversation_id,
+            experimental_raw_events: true,
+        })
+        .await?;
+    let add_listener_resp: JSONRPCResponse = timeout(
+        DEFAULT_READ_TIMEOUT,
+        mcp.read_stream_until_response_message(RequestId::Integer(add_listener_id)),
+    )
+    .await??;
+    let AddConversationSubscriptionResponse { subscription_id: _ } =
+        to_response::<_>(add_listener_resp)?;
+
+    let send_id = mcp
+        .send_send_user_message_request(SendUserMessageParams {
+            conversation_id,
+            items: vec![InputItem::Text {
+                text: "Hello".to_string(),
+            }],
+        })
+        .await?;
+
+    let instructions = read_raw_response_item(&mut mcp, conversation_id).await;
+    assert_instructions_message(&instructions);
+
+    let environment = read_raw_response_item(&mut mcp, conversation_id).await;
+    assert_environment_message(&environment);
+
+    let response: JSONRPCResponse = timeout(
+        DEFAULT_READ_TIMEOUT,
+        mcp.read_stream_until_response_message(RequestId::Integer(send_id)),
+    )
+    .await??;
+    let _ok: SendUserMessageResponse = to_response::<SendUserMessageResponse>(response)?;
+
+    let user_message = read_raw_response_item(&mut mcp, conversation_id).await;
+    assert_user_message(&user_message, "Hello");
+
+    let assistant_message = read_raw_response_item(&mut mcp, conversation_id).await;
+    assert_assistant_message(&assistant_message, "Done");
+
+    let _ = tokio::time::timeout(
+        std::time::Duration::from_millis(250),
+        mcp.read_stream_until_notification_message("codex/event/task_complete"),
+    )
+    .await;
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn test_send_message_session_not_found() -> Result<()> {
    // Start MCP without creating a Codex session
-    let codex_home = TempDir::new().expect("tempdir");
-    let mut mcp = McpProcess::new(codex_home.path()).await.expect("spawn");
-    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize())
-        .await
-        .expect("timeout")
-        .expect("init");
+    let codex_home = TempDir::new()?;
+    let mut mcp = McpProcess::new(codex_home.path()).await?;
+    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;

    let unknown = ConversationId::new();
    let req_id = mcp
@@ -144,18 +219,16 @@ async fn test_send_message_session_not_found() {
                text: "ping".to_string(),
            }],
        })
-        .await
-        .expect("send sendUserMessage");
+        .await?;

    // Expect an error response for unknown conversation.
    let err = timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_error_message(RequestId::Integer(req_id)),
    )
-    .await
-    .expect("timeout")
-    .expect("error");
+    .await??;
    assert_eq!(err.id, RequestId::Integer(req_id));
+    Ok(())
 }

 // ---------------------------------------------------------------------------
@@ -184,3 +257,108 @@ stream_max_retries = 0
        ),
    )
 }
+
+#[expect(clippy::expect_used)]
+async fn read_raw_response_item(
+    mcp: &mut McpProcess,
+    conversation_id: ConversationId,
+) -> ResponseItem {
+    let raw_notification: JSONRPCNotification = timeout(
+        DEFAULT_READ_TIMEOUT,
+        mcp.read_stream_until_notification_message("codex/event/raw_response_item"),
+    )
+    .await
+    .expect("codex/event/raw_response_item notification timeout")
+    .expect("codex/event/raw_response_item notification resp");
+
+    let serde_json::Value::Object(params) = raw_notification
+        .params
+        .expect("codex/event/raw_response_item should have params")
+    else {
+        panic!("codex/event/raw_response_item should have params");
+    };
+
+    let conversation_id_value = params
+        .get("conversationId")
+        .and_then(|value| value.as_str())
+        .expect("raw response item should include conversationId");
+
+    assert_eq!(
+        conversation_id_value,
+        conversation_id.to_string(),
+        "raw response item conversation mismatch"
+    );
+
+    let msg_value = params
+        .get("msg")
+        .cloned()
+        .expect("raw response item should include msg payload");
+
+    serde_json::from_value(msg_value).expect("deserialize raw response item")
+}
+
+fn assert_instructions_message(item: &ResponseItem) {
+    match item {
+        ResponseItem::Message { role, content, .. } => {
+            assert_eq!(role, "user");
+            let texts = content_texts(content);
+            assert!(
+                texts
+                    .iter()
+                    .any(|text| text.contains("<user_instructions>")),
+                "expected instructions message, got {texts:?}"
+            );
+        }
+        other => panic!("expected instructions message, got {other:?}"),
+    }
+}
+
+fn assert_environment_message(item: &ResponseItem) {
+    match item {
+        ResponseItem::Message { role, content, .. } => {
+            assert_eq!(role, "user");
+            let texts = content_texts(content);
+            assert!(
+                texts
+                    .iter()
+                    .any(|text| text.contains("<environment_context>")),
+                "expected environment context message, got {texts:?}"
+            );
+        }
+        other => panic!("expected environment message, got {other:?}"),
+    }
+}
+
+fn assert_user_message(item: &ResponseItem, expected_text: &str) {
+    match item {
+        ResponseItem::Message { role, content, .. } => {
+            assert_eq!(role, "user");
+            let texts = content_texts(content);
+            assert_eq!(texts, vec![expected_text]);
+        }
+        other => panic!("expected user message, got {other:?}"),
+    }
+}
+
+fn assert_assistant_message(item: &ResponseItem, expected_text: &str) {
+    match item {
+        ResponseItem::Message { role, content, .. } => {
+            assert_eq!(role, "assistant");
+            let texts = content_texts(content);
+            assert_eq!(texts, vec![expected_text]);
+        }
+        other => panic!("expected assistant message, got {other:?}"),
+    }
+}
+
+fn content_texts(content: &[ContentItem]) -> Vec<&str> {
+    content
+        .iter()
+        .filter_map(|item| match item {
+            ContentItem::InputText { text } | ContentItem::OutputText { text } => {
+                Some(text.as_str())
+            }
+            _ => None,
+        })
+        .collect()
+}
--- a/codex-rs/app-server/tests/suite/set_default_model.rs
+++ b/codex-rs/app-server/tests/suite/set_default_model.rs
@@ -1,5 +1,4 @@
-use std::path::Path;
-
+use anyhow::Result;
 use app_test_support::McpProcess;
 use app_test_support::to_response;
 use codex_app_server_protocol::JSONRPCResponse;
@@ -8,50 +7,38 @@ use codex_app_server_protocol::SetDefaultModelParams;
 use codex_app_server_protocol::SetDefaultModelResponse;
 use codex_core::config::ConfigToml;
 use pretty_assertions::assert_eq;
+use std::path::Path;
 use tempfile::TempDir;
 use tokio::time::timeout;

 const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(10);

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-async fn set_default_model_persists_overrides() {
-    let codex_home = TempDir::new().expect("create tempdir");
-    create_config_toml(codex_home.path()).expect("write config.toml");
+async fn set_default_model_persists_overrides() -> Result<()> {
+    let codex_home = TempDir::new()?;
+    create_config_toml(codex_home.path())?;

-    let mut mcp = McpProcess::new(codex_home.path())
-        .await
-        .expect("spawn mcp process");
-    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize())
-        .await
-        .expect("init timeout")
-        .expect("init failed");
+    let mut mcp = McpProcess::new(codex_home.path()).await?;
+    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;

    let params = SetDefaultModelParams {
        model: Some("gpt-4.1".to_string()),
        reasoning_effort: None,
    };

-    let request_id = mcp
-        .send_set_default_model_request(params)
-        .await
-        .expect("send setDefaultModel");
+    let request_id = mcp.send_set_default_model_request(params).await?;

    let resp: JSONRPCResponse = timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_response_message(RequestId::Integer(request_id)),
    )
-    .await
-    .expect("setDefaultModel timeout")
-    .expect("setDefaultModel response");
+    .await??;

-    let _: SetDefaultModelResponse =
-        to_response(resp).expect("deserialize setDefaultModel response");
+    let _: SetDefaultModelResponse = to_response(resp)?;

    let config_path = codex_home.path().join("config.toml");
-    let config_contents = tokio::fs::read_to_string(&config_path)
-        .await
-        .expect("read config.toml");
-    let config_toml: ConfigToml = toml::from_str(&config_contents).expect("parse config.toml");
+    let config_contents = tokio::fs::read_to_string(&config_path).await?;
+    let config_toml: ConfigToml = toml::from_str(&config_contents)?;

    assert_eq!(
        ConfigToml {
@@ -61,6 +48,7 @@ async fn set_default_model_persists_overrides() {
        },
        config_toml,
    );
+    Ok(())
 }

 // Helper to create a config.toml; mirrors create_conversation.rs
--- a/codex-rs/app-server/tests/suite/user_agent.rs
+++ b/codex-rs/app-server/tests/suite/user_agent.rs
@@ -1,3 +1,4 @@
+use anyhow::Result;
 use app_test_support::McpProcess;
 use app_test_support::to_response;
 use codex_app_server_protocol::GetUserAgentResponse;
@@ -10,28 +11,18 @@ use tokio::time::timeout;
 const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(10);

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-async fn get_user_agent_returns_current_codex_user_agent() {
-    let codex_home = TempDir::new().unwrap_or_else(|err| panic!("create tempdir: {err}"));
+async fn get_user_agent_returns_current_codex_user_agent() -> Result<()> {
+    let codex_home = TempDir::new()?;

-    let mut mcp = McpProcess::new(codex_home.path())
-        .await
-        .expect("spawn mcp process");
-    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize())
-        .await
-        .expect("initialize timeout")
-        .expect("initialize request");
+    let mut mcp = McpProcess::new(codex_home.path()).await?;
+    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;

-    let request_id = mcp
-        .send_get_user_agent_request()
-        .await
-        .expect("send getUserAgent");
+    let request_id = mcp.send_get_user_agent_request().await?;
    let response: JSONRPCResponse = timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_response_message(RequestId::Integer(request_id)),
    )
-    .await
-    .expect("getUserAgent timeout")
-    .expect("getUserAgent response");
+    .await??;

    let os_info = os_info::get();
    let user_agent = format!(
@@ -42,9 +33,9 @@ async fn get_user_agent_returns_current_codex_user_agent() {
        codex_core::terminal::user_agent()
    );

-    let received: GetUserAgentResponse =
-        to_response(response).expect("deserialize getUserAgent response");
+    let received: GetUserAgentResponse = to_response(response)?;
    let expected = GetUserAgentResponse { user_agent };

    assert_eq!(received, expected);
+    Ok(())
 }
--- a/codex-rs/app-server/tests/suite/user_info.rs
+++ b/codex-rs/app-server/tests/suite/user_info.rs
@@ -1,5 +1,4 @@
-use std::time::Duration;
-
+use anyhow::Result;
 use app_test_support::ChatGptAuthFixture;
 use app_test_support::McpProcess;
 use app_test_support::to_response;
@@ -7,45 +6,41 @@ use app_test_support::write_chatgpt_auth;
 use codex_app_server_protocol::JSONRPCResponse;
 use codex_app_server_protocol::RequestId;
 use codex_app_server_protocol::UserInfoResponse;
+use codex_core::auth::AuthCredentialsStoreMode;
 use pretty_assertions::assert_eq;
+use std::time::Duration;
 use tempfile::TempDir;
 use tokio::time::timeout;

 const DEFAULT_READ_TIMEOUT: Duration = Duration::from_secs(10);

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-async fn user_info_returns_email_from_auth_json() {
-    let codex_home = TempDir::new().expect("create tempdir");
+async fn user_info_returns_email_from_auth_json() -> Result<()> {
+    let codex_home = TempDir::new()?;

    write_chatgpt_auth(
        codex_home.path(),
        ChatGptAuthFixture::new("access")
            .refresh_token("refresh")
            .email("user@example.com"),
-    )
-    .expect("write chatgpt auth");
+        AuthCredentialsStoreMode::File,
+    )?;

-    let mut mcp = McpProcess::new(codex_home.path())
-        .await
-        .expect("spawn mcp process");
-    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize())
-        .await
-        .expect("initialize timeout")
-        .expect("initialize request");
+    let mut mcp = McpProcess::new(codex_home.path()).await?;
+    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;

-    let request_id = mcp.send_user_info_request().await.expect("send userInfo");
+    let request_id = mcp.send_user_info_request().await?;
    let response: JSONRPCResponse = timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_response_message(RequestId::Integer(request_id)),
    )
-    .await
-    .expect("userInfo timeout")
-    .expect("userInfo response");
+    .await??;

-    let received: UserInfoResponse = to_response(response).expect("deserialize userInfo response");
+    let received: UserInfoResponse = to_response(response)?;
    let expected = UserInfoResponse {
        alleged_user_email: Some("user@example.com".to_string()),
    };

    assert_eq!(received, expected);
+    Ok(())
 }
--- a/codex-rs/apply-patch/tests/suite/mod.rs
+++ b/codex-rs/apply-patch/tests/suite/mod.rs
@@ -1 +1,3 @@
 mod cli;
+#[cfg(not(target_os = "windows"))]
+mod tool;
--- a/codex-rs/apply-patch/tests/suite/tool.rs
+++ b/codex-rs/apply-patch/tests/suite/tool.rs
@@ -0,0 +1,257 @@
+use assert_cmd::Command;
+use pretty_assertions::assert_eq;
+use std::fs;
+use std::path::Path;
+use tempfile::tempdir;
+
+fn run_apply_patch_in_dir(dir: &Path, patch: &str) -> anyhow::Result<assert_cmd::assert::Assert> {
+    let mut cmd = Command::cargo_bin("apply_patch")?;
+    cmd.current_dir(dir);
+    Ok(cmd.arg(patch).assert())
+}
+
+fn apply_patch_command(dir: &Path) -> anyhow::Result<Command> {
+    let mut cmd = Command::cargo_bin("apply_patch")?;
+    cmd.current_dir(dir);
+    Ok(cmd)
+}
+
+#[test]
+fn test_apply_patch_cli_applies_multiple_operations() -> anyhow::Result<()> {
+    let tmp = tempdir()?;
+    let modify_path = tmp.path().join("modify.txt");
+    let delete_path = tmp.path().join("delete.txt");
+
+    fs::write(&modify_path, "line1\nline2\n")?;
+    fs::write(&delete_path, "obsolete\n")?;
+
+    let patch = "*** Begin Patch\n*** Add File: nested/new.txt\n+created\n*** Delete File: delete.txt\n*** Update File: modify.txt\n@@\n-line2\n+changed\n*** End Patch";
+
+    run_apply_patch_in_dir(tmp.path(), patch)?.success().stdout(
+        "Success. Updated the following files:\nA nested/new.txt\nM modify.txt\nD delete.txt\n",
+    );
+
+    assert_eq!(
+        fs::read_to_string(tmp.path().join("nested/new.txt"))?,
+        "created\n"
+    );
+    assert_eq!(fs::read_to_string(&modify_path)?, "line1\nchanged\n");
+    assert!(!delete_path.exists());
+
+    Ok(())
+}
+
+#[test]
+fn test_apply_patch_cli_applies_multiple_chunks() -> anyhow::Result<()> {
+    let tmp = tempdir()?;
+    let target_path = tmp.path().join("multi.txt");
+    fs::write(&target_path, "line1\nline2\nline3\nline4\n")?;
+
+    let patch = "*** Begin Patch\n*** Update File: multi.txt\n@@\n-line2\n+changed2\n@@\n-line4\n+changed4\n*** End Patch";
+
+    run_apply_patch_in_dir(tmp.path(), patch)?
+        .success()
+        .stdout("Success. Updated the following files:\nM multi.txt\n");
+
+    assert_eq!(
+        fs::read_to_string(&target_path)?,
+        "line1\nchanged2\nline3\nchanged4\n"
+    );
+
+    Ok(())
+}
+
+#[test]
+fn test_apply_patch_cli_moves_file_to_new_directory() -> anyhow::Result<()> {
+    let tmp = tempdir()?;
+    let original_path = tmp.path().join("old/name.txt");
+    let new_path = tmp.path().join("renamed/dir/name.txt");
+    fs::create_dir_all(original_path.parent().expect("parent should exist"))?;
+    fs::write(&original_path, "old content\n")?;
+
+    let patch = "*** Begin Patch\n*** Update File: old/name.txt\n*** Move to: renamed/dir/name.txt\n@@\n-old content\n+new content\n*** End Patch";
+
+    run_apply_patch_in_dir(tmp.path(), patch)?
+        .success()
+        .stdout("Success. Updated the following files:\nM renamed/dir/name.txt\n");
+
+    assert!(!original_path.exists());
+    assert_eq!(fs::read_to_string(&new_path)?, "new content\n");
+
+    Ok(())
+}
+
+#[test]
+fn test_apply_patch_cli_rejects_empty_patch() -> anyhow::Result<()> {
+    let tmp = tempdir()?;
+
+    apply_patch_command(tmp.path())?
+        .arg("*** Begin Patch\n*** End Patch")
+        .assert()
+        .failure()
+        .stderr("No files were modified.\n");
+
+    Ok(())
+}
+
+#[test]
+fn test_apply_patch_cli_reports_missing_context() -> anyhow::Result<()> {
+    let tmp = tempdir()?;
+    let target_path = tmp.path().join("modify.txt");
+    fs::write(&target_path, "line1\nline2\n")?;
+
+    apply_patch_command(tmp.path())?
+        .arg("*** Begin Patch\n*** Update File: modify.txt\n@@\n-missing\n+changed\n*** End Patch")
+        .assert()
+        .failure()
+        .stderr("Failed to find expected lines in modify.txt:\nmissing\n");
+    assert_eq!(fs::read_to_string(&target_path)?, "line1\nline2\n");
+
+    Ok(())
+}
+
+#[test]
+fn test_apply_patch_cli_rejects_missing_file_delete() -> anyhow::Result<()> {
+    let tmp = tempdir()?;
+
+    apply_patch_command(tmp.path())?
+        .arg("*** Begin Patch\n*** Delete File: missing.txt\n*** End Patch")
+        .assert()
+        .failure()
+        .stderr("Failed to delete file missing.txt\n");
+
+    Ok(())
+}
+
+#[test]
+fn test_apply_patch_cli_rejects_empty_update_hunk() -> anyhow::Result<()> {
+    let tmp = tempdir()?;
+
+    apply_patch_command(tmp.path())?
+        .arg("*** Begin Patch\n*** Update File: foo.txt\n*** End Patch")
+        .assert()
+        .failure()
+        .stderr("Invalid patch hunk on line 2: Update file hunk for path 'foo.txt' is empty\n");
+
+    Ok(())
+}
+
+#[test]
+fn test_apply_patch_cli_requires_existing_file_for_update() -> anyhow::Result<()> {
+    let tmp = tempdir()?;
+
+    apply_patch_command(tmp.path())?
+        .arg("*** Begin Patch\n*** Update File: missing.txt\n@@\n-old\n+new\n*** End Patch")
+        .assert()
+        .failure()
+        .stderr(
+            "Failed to read file to update missing.txt: No such file or directory (os error 2)\n",
+        );
+
+    Ok(())
+}
+
+#[test]
+fn test_apply_patch_cli_move_overwrites_existing_destination() -> anyhow::Result<()> {
+    let tmp = tempdir()?;
+    let original_path = tmp.path().join("old/name.txt");
+    let destination = tmp.path().join("renamed/dir/name.txt");
+    fs::create_dir_all(original_path.parent().expect("parent should exist"))?;
+    fs::create_dir_all(destination.parent().expect("parent should exist"))?;
+    fs::write(&original_path, "from\n")?;
+    fs::write(&destination, "existing\n")?;
+
+    run_apply_patch_in_dir(
+        tmp.path(),
+        "*** Begin Patch\n*** Update File: old/name.txt\n*** Move to: renamed/dir/name.txt\n@@\n-from\n+new\n*** End Patch",
+    )?
+    .success()
+    .stdout("Success. Updated the following files:\nM renamed/dir/name.txt\n");
+
+    assert!(!original_path.exists());
+    assert_eq!(fs::read_to_string(&destination)?, "new\n");
+
+    Ok(())
+}
+
+#[test]
+fn test_apply_patch_cli_add_overwrites_existing_file() -> anyhow::Result<()> {
+    let tmp = tempdir()?;
+    let path = tmp.path().join("duplicate.txt");
+    fs::write(&path, "old content\n")?;
+
+    run_apply_patch_in_dir(
+        tmp.path(),
+        "*** Begin Patch\n*** Add File: duplicate.txt\n+new content\n*** End Patch",
+    )?
+    .success()
+    .stdout("Success. Updated the following files:\nA duplicate.txt\n");
+
+    assert_eq!(fs::read_to_string(&path)?, "new content\n");
+
+    Ok(())
+}
+
+#[test]
+fn test_apply_patch_cli_delete_directory_fails() -> anyhow::Result<()> {
+    let tmp = tempdir()?;
+    fs::create_dir(tmp.path().join("dir"))?;
+
+    apply_patch_command(tmp.path())?
+        .arg("*** Begin Patch\n*** Delete File: dir\n*** End Patch")
+        .assert()
+        .failure()
+        .stderr("Failed to delete file dir\n");
+
+    Ok(())
+}
+
+#[test]
+fn test_apply_patch_cli_rejects_invalid_hunk_header() -> anyhow::Result<()> {
+    let tmp = tempdir()?;
+
+    apply_patch_command(tmp.path())?
+        .arg("*** Begin Patch\n*** Frobnicate File: foo\n*** End Patch")
+        .assert()
+        .failure()
+        .stderr("Invalid patch hunk on line 2: '*** Frobnicate File: foo' is not a valid hunk header. Valid hunk headers: '*** Add File: {path}', '*** Delete File: {path}', '*** Update File: {path}'\n");
+
+    Ok(())
+}
+
+#[test]
+fn test_apply_patch_cli_updates_file_appends_trailing_newline() -> anyhow::Result<()> {
+    let tmp = tempdir()?;
+    let target_path = tmp.path().join("no_newline.txt");
+    fs::write(&target_path, "no newline at end")?;
+
+    run_apply_patch_in_dir(
+        tmp.path(),
+        "*** Begin Patch\n*** Update File: no_newline.txt\n@@\n-no newline at end\n+first line\n+second line\n*** End Patch",
+    )?
+    .success()
+    .stdout("Success. Updated the following files:\nM no_newline.txt\n");
+
+    let contents = fs::read_to_string(&target_path)?;
+    assert!(contents.ends_with('\n'));
+    assert_eq!(contents, "first line\nsecond line\n");
+
+    Ok(())
+}
+
+#[test]
+fn test_apply_patch_cli_failure_after_partial_success_leaves_changes() -> anyhow::Result<()> {
+    let tmp = tempdir()?;
+    let new_file = tmp.path().join("created.txt");
+
+    apply_patch_command(tmp.path())?
+        .arg("*** Begin Patch\n*** Add File: created.txt\n+hello\n*** Update File: missing.txt\n@@\n-old\n+new\n*** End Patch")
+        .assert()
+        .failure()
+        .stdout("")
+        .stderr("Failed to read file to update missing.txt: No such file or directory (os error 2)\n");
+
+    assert_eq!(fs::read_to_string(&new_file)?, "hello\n");
+
+    Ok(())
+}
--- a/codex-rs/chatgpt/src/apply_command.rs
+++ b/codex-rs/chatgpt/src/apply_command.rs
@@ -32,7 +32,8 @@ pub async fn run_apply_command(
    )
    .await?;

-    init_chatgpt_token_from_auth(&config.codex_home).await?;
+    init_chatgpt_token_from_auth(&config.codex_home, config.cli_auth_credentials_store_mode)
+        .await?;

    let task_response = get_task(&config, apply_cli.task_id).await?;
    apply_diff_from_task(task_response, cwd).await
--- a/codex-rs/chatgpt/src/chatgpt_client.rs
+++ b/codex-rs/chatgpt/src/chatgpt_client.rs
@@ -13,7 +13,8 @@ pub(crate) async fn chatgpt_get_request<T: DeserializeOwned>(
    path: String,
 ) -> anyhow::Result<T> {
    let chatgpt_base_url = &config.chatgpt_base_url;
-    init_chatgpt_token_from_auth(&config.codex_home).await?;
+    init_chatgpt_token_from_auth(&config.codex_home, config.cli_auth_credentials_store_mode)
+        .await?;

    // Make direct HTTP request to ChatGPT backend API with the token
    let client = create_client();
--- a/codex-rs/chatgpt/src/chatgpt_token.rs
+++ b/codex-rs/chatgpt/src/chatgpt_token.rs
@@ -3,6 +3,7 @@ use std::path::Path;
 use std::sync::LazyLock;
 use std::sync::RwLock;

+use codex_core::auth::AuthCredentialsStoreMode;
 use codex_core::token_data::TokenData;

 static CHATGPT_TOKEN: LazyLock<RwLock<Option<TokenData>>> = LazyLock::new(|| RwLock::new(None));
@@ -18,8 +19,11 @@ pub fn set_chatgpt_token_data(value: TokenData) {
 }

 /// Initialize the ChatGPT token from auth.json file
-pub async fn init_chatgpt_token_from_auth(codex_home: &Path) -> std::io::Result<()> {
-    let auth = CodexAuth::from_codex_home(codex_home)?;
+pub async fn init_chatgpt_token_from_auth(
+    codex_home: &Path,
+    auth_credentials_store_mode: AuthCredentialsStoreMode,
+) -> std::io::Result<()> {
+    let auth = CodexAuth::from_auth_storage(codex_home, auth_credentials_store_mode)?;
    if let Some(auth) = auth {
        let token_data = auth.get_token_data().await?;
        set_chatgpt_token_data(token_data);
--- a/codex-rs/cli/src/login.rs
+++ b/codex-rs/cli/src/login.rs
@@ -1,6 +1,7 @@
 use codex_app_server_protocol::AuthMode;
 use codex_common::CliConfigOverrides;
 use codex_core::CodexAuth;
+use codex_core::auth::AuthCredentialsStoreMode;
 use codex_core::auth::CLIENT_ID;
 use codex_core::auth::login_with_api_key;
 use codex_core::auth::logout;
@@ -17,11 +18,13 @@ use std::path::PathBuf;
 pub async fn login_with_chatgpt(
    codex_home: PathBuf,
    forced_chatgpt_workspace_id: Option<String>,
+    cli_auth_credentials_store_mode: AuthCredentialsStoreMode,
 ) -> std::io::Result<()> {
    let opts = ServerOptions::new(
        codex_home,
        CLIENT_ID.to_string(),
        forced_chatgpt_workspace_id,
+        cli_auth_credentials_store_mode,
    );
    let server = run_login_server(opts)?;

@@ -43,7 +46,13 @@ pub async fn run_login_with_chatgpt(cli_config_overrides: CliConfigOverrides) ->

    let forced_chatgpt_workspace_id = config.forced_chatgpt_workspace_id.clone();

-    match login_with_chatgpt(config.codex_home, forced_chatgpt_workspace_id).await {
+    match login_with_chatgpt(
+        config.codex_home,
+        forced_chatgpt_workspace_id,
+        config.cli_auth_credentials_store_mode,
+    )
+    .await
+    {
        Ok(_) => {
            eprintln!("Successfully logged in");
            std::process::exit(0);
@@ -66,7 +75,11 @@ pub async fn run_login_with_api_key(
        std::process::exit(1);
    }

-    match login_with_api_key(&config.codex_home, &api_key) {
+    match login_with_api_key(
+        &config.codex_home,
+        &api_key,
+        config.cli_auth_credentials_store_mode,
+    ) {
        Ok(_) => {
            eprintln!("Successfully logged in");
            std::process::exit(0);
@@ -121,6 +134,7 @@ pub async fn run_login_with_device_code(
        config.codex_home,
        client_id.unwrap_or(CLIENT_ID.to_string()),
        forced_chatgpt_workspace_id,
+        config.cli_auth_credentials_store_mode,
    );
    if let Some(iss) = issuer_base_url {
        opts.issuer = iss;
@@ -140,7 +154,7 @@ pub async fn run_login_with_device_code(
 pub async fn run_login_status(cli_config_overrides: CliConfigOverrides) -> ! {
    let config = load_config_or_exit(cli_config_overrides).await;

-    match CodexAuth::from_codex_home(&config.codex_home) {
+    match CodexAuth::from_auth_storage(&config.codex_home, config.cli_auth_credentials_store_mode) {
        Ok(Some(auth)) => match auth.mode {
            AuthMode::ApiKey => match auth.get_token().await {
                Ok(api_key) => {
@@ -171,7 +185,7 @@ pub async fn run_login_status(cli_config_overrides: CliConfigOverrides) -> ! {
 pub async fn run_logout(cli_config_overrides: CliConfigOverrides) -> ! {
    let config = load_config_or_exit(cli_config_overrides).await;

-    match logout(&config.codex_home) {
+    match logout(&config.codex_home, config.cli_auth_credentials_store_mode) {
        Ok(true) => {
            eprintln!("Successfully logged out");
            std::process::exit(0);
--- a/codex-rs/cli/src/main.rs
+++ b/codex-rs/cli/src/main.rs
@@ -29,6 +29,7 @@ mod mcp_cmd;
 use crate::mcp_cmd::McpCli;
 use codex_core::config::Config;
 use codex_core::config::ConfigOverrides;
+use codex_core::features::is_known_feature_key;

 /// Codex CLI
 ///
@@ -286,15 +287,25 @@ struct FeatureToggles {
 }

 impl FeatureToggles {
-    fn to_overrides(&self) -> Vec<String> {
+    fn to_overrides(&self) -> anyhow::Result<Vec<String>> {
        let mut v = Vec::new();
-        for k in &self.enable {
-            v.push(format!("features.{k}=true"));
+        for feature in &self.enable {
+            Self::validate_feature(feature)?;
+            v.push(format!("features.{feature}=true"));
        }
-        for k in &self.disable {
-            v.push(format!("features.{k}=false"));
+        for feature in &self.disable {
+            Self::validate_feature(feature)?;
+            v.push(format!("features.{feature}=false"));
+        }
+        Ok(v)
+    }
+
+    fn validate_feature(feature: &str) -> anyhow::Result<()> {
+        if is_known_feature_key(feature) {
+            Ok(())
+        } else {
+            anyhow::bail!("Unknown feature flag: {feature}")
        }
-        v
    }
 }

@@ -345,9 +356,8 @@ async fn cli_main(codex_linux_sandbox_exe: Option<PathBuf>) -> anyhow::Result<()
    } = MultitoolCli::parse();

    // Fold --enable/--disable into config overrides so they flow to all subcommands.
-    root_config_overrides
-        .raw_overrides
-        .extend(feature_toggles.to_overrides());
+    let toggle_overrides = feature_toggles.to_overrides()?;
+    root_config_overrides.raw_overrides.extend(toggle_overrides);

    match subcommand {
        None => {
@@ -605,6 +615,7 @@ mod tests {
    use assert_matches::assert_matches;
    use codex_core::protocol::TokenUsage;
    use codex_protocol::ConversationId;
+    use pretty_assertions::assert_eq;

    fn finalize_from_args(args: &[&str]) -> TuiCli {
        let cli = MultitoolCli::try_parse_from(args).expect("parse");
@@ -781,4 +792,32 @@ mod tests {
        assert!(!interactive.resume_last);
        assert_eq!(interactive.resume_session_id, None);
    }
+
+    #[test]
+    fn feature_toggles_known_features_generate_overrides() {
+        let toggles = FeatureToggles {
+            enable: vec!["web_search_request".to_string()],
+            disable: vec!["unified_exec".to_string()],
+        };
+        let overrides = toggles.to_overrides().expect("valid features");
+        assert_eq!(
+            overrides,
+            vec![
+                "features.web_search_request=true".to_string(),
+                "features.unified_exec=false".to_string(),
+            ]
+        );
+    }
+
+    #[test]
+    fn feature_toggles_unknown_feature_errors() {
+        let toggles = FeatureToggles {
+            enable: vec!["does_not_exist".to_string()],
+            disable: Vec::new(),
+        };
+        let err = toggles
+            .to_overrides()
+            .expect_err("feature should be rejected");
+        assert_eq!(err.to_string(), "Unknown feature flag: does_not_exist");
+    }
 }
--- a/codex-rs/cli/src/mcp_cmd.rs
+++ b/codex-rs/cli/src/mcp_cmd.rs
@@ -274,19 +274,33 @@ async fn run_add(config_overrides: &CliConfigOverrides, add_args: AddArgs) -> Re
        http_headers,
        env_http_headers,
    } = transport
-        && matches!(supports_oauth_login(&url).await, Ok(true))
    {
-        println!("Detected OAuth support. Starting OAuth flow…");
-        perform_oauth_login(
-            &name,
-            &url,
-            config.mcp_oauth_credentials_store_mode,
-            http_headers.clone(),
-            env_http_headers.clone(),
-            &Vec::new(),
-        )
-        .await?;
-        println!("Successfully logged in.");
+        match supports_oauth_login(&url).await {
+            Ok(true) => {
+                if !config.features.enabled(Feature::RmcpClient) {
+                    println!(
+                        "MCP server supports login. Add `experimental_use_rmcp_client = true` \
+                         to your config.toml and run `codex mcp login {name}` to login."
+                    );
+                } else {
+                    println!("Detected OAuth support. Starting OAuth flow…");
+                    perform_oauth_login(
+                        &name,
+                        &url,
+                        config.mcp_oauth_credentials_store_mode,
+                        http_headers.clone(),
+                        env_http_headers.clone(),
+                        &Vec::new(),
+                    )
+                    .await?;
+                    println!("Successfully logged in.");
+                }
+            }
+            Ok(false) => {}
+            Err(_) => println!(
+                "MCP server may or may not require login. Run `codex mcp login {name}` to login."
+            ),
+        }
    }

    Ok(())
@@ -523,10 +537,12 @@ async fn run_list(config_overrides: &CliConfigOverrides, list_args: ListArgs) ->
                    .map(|entry| entry.auth_status)
                    .unwrap_or(McpAuthStatus::Unsupported)
                    .to_string();
+                let bearer_token_display =
+                    bearer_token_env_var.as_deref().unwrap_or("-").to_string();
                http_rows.push([
                    name.clone(),
                    url.clone(),
-                    bearer_token_env_var.clone().unwrap_or("-".to_string()),
+                    bearer_token_display,
                    status,
                    auth_status,
                ]);
@@ -752,15 +768,15 @@ async fn run_get(config_overrides: &CliConfigOverrides, get_args: GetArgs) -> Re
        } => {
            println!("  transport: streamable_http");
            println!("  url: {url}");
-            let env_var = bearer_token_env_var.as_deref().unwrap_or("-");
-            println!("  bearer_token_env_var: {env_var}");
+            let bearer_token_display = bearer_token_env_var.as_deref().unwrap_or("-");
+            println!("  bearer_token_env_var: {bearer_token_display}");
            let headers_display = match http_headers {
                Some(map) if !map.is_empty() => {
                    let mut pairs: Vec<_> = map.iter().collect();
                    pairs.sort_by(|(a, _), (b, _)| a.cmp(b));
                    pairs
                        .into_iter()
-                        .map(|(k, v)| format!("{k}={v}"))
+                        .map(|(k, _)| format!("{k}=*****"))
                        .collect::<Vec<_>>()
                        .join(", ")
                }
@@ -773,7 +789,7 @@ async fn run_get(config_overrides: &CliConfigOverrides, get_args: GetArgs) -> Re
                    pairs.sort_by(|(a, _), (b, _)| a.cmp(b));
                    pairs
                        .into_iter()
-                        .map(|(k, v)| format!("{k}={v}"))
+                        .map(|(k, var)| format!("{k}={var}"))
                        .collect::<Vec<_>>()
                        .join(", ")
                }
--- a/codex-rs/cli/tests/mcp_list.rs
+++ b/codex-rs/cli/tests/mcp_list.rs
@@ -68,9 +68,9 @@ async fn list_and_get_render_expected_output() -> Result<()> {
    assert!(stdout.contains("Name"));
    assert!(stdout.contains("docs"));
    assert!(stdout.contains("docs-server"));
-    assert!(stdout.contains("TOKEN=secret"));
-    assert!(stdout.contains("APP_TOKEN=$APP_TOKEN"));
-    assert!(stdout.contains("WORKSPACE_ID=$WORKSPACE_ID"));
+    assert!(stdout.contains("TOKEN=*****"));
+    assert!(stdout.contains("APP_TOKEN=*****"));
+    assert!(stdout.contains("WORKSPACE_ID=*****"));
    assert!(stdout.contains("Status"));
    assert!(stdout.contains("Auth"));
    assert!(stdout.contains("enabled"));
@@ -119,9 +119,9 @@ async fn list_and_get_render_expected_output() -> Result<()> {
    assert!(stdout.contains("transport: stdio"));
    assert!(stdout.contains("command: docs-server"));
    assert!(stdout.contains("args: --port 4000"));
-    assert!(stdout.contains("env: TOKEN=secret"));
-    assert!(stdout.contains("APP_TOKEN=$APP_TOKEN"));
-    assert!(stdout.contains("WORKSPACE_ID=$WORKSPACE_ID"));
+    assert!(stdout.contains("env: TOKEN=*****"));
+    assert!(stdout.contains("APP_TOKEN=*****"));
+    assert!(stdout.contains("WORKSPACE_ID=*****"));
    assert!(stdout.contains("enabled: true"));
    assert!(stdout.contains("remove: codex mcp remove docs"));

--- a/codex-rs/cloud-tasks/src/lib.rs
+++ b/codex-rs/cloud-tasks/src/lib.rs
@@ -58,7 +58,16 @@ async fn init_backend(user_agent_suffix: &str) -> anyhow::Result<BackendContext>

    let auth = match codex_core::config::find_codex_home()
        .ok()
-        .map(|home| codex_login::AuthManager::new(home, false))
+        .map(|home| {
+            let store_mode = codex_core::config::Config::load_from_base_config_with_overrides(
+                codex_core::config::ConfigToml::default(),
+                codex_core::config::ConfigOverrides::default(),
+                home.clone(),
+            )
+            .map(|cfg| cfg.cli_auth_credentials_store_mode)
+            .unwrap_or_default();
+            codex_login::AuthManager::new(home, false, store_mode)
+        })
        .and_then(|am| am.auth())
    {
        Some(auth) => auth,
@@ -1086,7 +1095,19 @@ pub async fn run_main(cli: Cli, _codex_linux_sandbox_exe: Option<PathBuf>) -> an
                                                let backend = Arc::clone(&backend);
                                                let best_of_n = page.best_of_n;
                                                tokio::spawn(async move {
-                                                    let result = codex_cloud_tasks_client::CloudBackend::create_task(&*backend, &env, &text, "main", false, best_of_n).await;
+                                                    let git_ref = if let Ok(cwd) = std::env::current_dir() {
+                                                        if let Some(branch) = codex_core::git_info::default_branch_name(&cwd).await {
+                                                            branch
+                                                        } else if let Some(branch) = codex_core::git_info::current_branch_name(&cwd).await {
+                                                            branch
+                                                        } else {
+                                                            "main".to_string()
+                                                        }
+                                                    } else {
+                                                        "main".to_string()
+                                                    };
+
+                                                    let result = codex_cloud_tasks_client::CloudBackend::create_task(&*backend, &env, &text, &git_ref, false, best_of_n).await;
                                                    let evt = match result {
                                                        Ok(ok) => app::AppEvent::NewTaskSubmitted(Ok(ok)),
                                                        Err(e) => app::AppEvent::NewTaskSubmitted(Err(format!("{e}"))),
--- a/codex-rs/cloud-tasks/src/util.rs
+++ b/codex-rs/cloud-tasks/src/util.rs
@@ -70,7 +70,14 @@ pub async fn build_chatgpt_headers() -> HeaderMap {
        HeaderValue::from_str(&ua).unwrap_or(HeaderValue::from_static("codex-cli")),
    );
    if let Ok(home) = codex_core::config::find_codex_home() {
-        let am = codex_login::AuthManager::new(home, false);
+        let store_mode = codex_core::config::Config::load_from_base_config_with_overrides(
+            codex_core::config::ConfigToml::default(),
+            codex_core::config::ConfigOverrides::default(),
+            home.clone(),
+        )
+        .map(|cfg| cfg.cli_auth_credentials_store_mode)
+        .unwrap_or_default();
+        let am = codex_login::AuthManager::new(home, false, store_mode);
        if let Some(auth) = am.auth()
            && let Ok(tok) = auth.get_token().await
            && !tok.is_empty()
--- a/codex-rs/common/src/approval_presets.rs
+++ b/codex-rs/common/src/approval_presets.rs
@@ -24,21 +24,21 @@ pub fn builtin_approval_presets() -> Vec<ApprovalPreset> {
        ApprovalPreset {
            id: "read-only",
            label: "Read Only",
-            description: "Codex can read files and answer questions. Codex requires approval to make edits, run commands, or access network",
+            description: "Codex can read files and answer questions. Codex requires approval to make edits, run commands, or access network.",
            approval: AskForApproval::OnRequest,
            sandbox: SandboxPolicy::ReadOnly,
        },
        ApprovalPreset {
            id: "auto",
            label: "Auto",
-            description: "Codex can read files, make edits, and run commands in the workspace. Codex requires approval to work outside the workspace or access network",
+            description: "Codex can read files, make edits, and run commands in the workspace. Codex requires approval to work outside the workspace or access network.",
            approval: AskForApproval::OnRequest,
            sandbox: SandboxPolicy::new_workspace_write_policy(),
        },
        ApprovalPreset {
            id: "full-access",
            label: "Full Access",
-            description: "Codex can read files, make edits, and run commands with network access, without approval. Exercise caution",
+            description: "Codex can read files, make edits, and run commands with network access, without approval. Exercise caution.",
            approval: AskForApproval::Never,
            sandbox: SandboxPolicy::DangerFullAccess,
        },
--- a/codex-rs/common/src/format_env_display.rs
+++ b/codex-rs/common/src/format_env_display.rs
@@ -6,15 +6,11 @@ pub fn format_env_display(env: Option<&HashMap<String, String>>, env_vars: &[Str
    if let Some(map) = env {
        let mut pairs: Vec<_> = map.iter().collect();
        pairs.sort_by(|(a, _), (b, _)| a.cmp(b));
-        parts.extend(
-            pairs
-                .into_iter()
-                .map(|(key, value)| format!("{key}={value}")),
-        );
+        parts.extend(pairs.into_iter().map(|(key, _)| format!("{key}=*****")));
    }

    if !env_vars.is_empty() {
-        parts.extend(env_vars.iter().map(|var| format!("{var}=${var}")));
+        parts.extend(env_vars.iter().map(|var| format!("{var}=*****")));
    }

    if parts.is_empty() {
@@ -42,14 +38,14 @@ mod tests {
        env.insert("B".to_string(), "two".to_string());
        env.insert("A".to_string(), "one".to_string());

-        assert_eq!(format_env_display(Some(&env), &[]), "A=one, B=two");
+        assert_eq!(format_env_display(Some(&env), &[]), "A=*****, B=*****");
    }

    #[test]
    fn formats_env_vars_with_dollar_prefix() {
        let vars = vec!["TOKEN".to_string(), "PATH".to_string()];

-        assert_eq!(format_env_display(None, &vars), "TOKEN=$TOKEN, PATH=$PATH");
+        assert_eq!(format_env_display(None, &vars), "TOKEN=*****, PATH=*****");
    }

    #[test]
@@ -60,7 +56,7 @@ mod tests {

        assert_eq!(
            format_env_display(Some(&env), &vars),
-            "HOME=/tmp, TOKEN=$TOKEN"
+            "HOME=*****, TOKEN=*****"
        );
    }
 }
--- a/codex-rs/core/Cargo.toml
+++ b/codex-rs/core/Cargo.toml
@@ -21,19 +21,30 @@ bytes = { workspace = true }
 chrono = { workspace = true, features = ["serde"] }
 codex-app-server-protocol = { workspace = true }
 codex-apply-patch = { workspace = true }
+codex-async-utils = { workspace = true }
 codex-file-search = { workspace = true }
+codex-git-tooling = { workspace = true }
+codex-keyring-store = { workspace = true }
 codex-otel = { workspace = true, features = ["otel"] }
 codex-protocol = { workspace = true }
 codex-rmcp-client = { workspace = true }
-codex-async-utils = { workspace = true }
-codex-utils-string = { workspace = true }
 codex-utils-pty = { workspace = true }
+codex-utils-readiness = { workspace = true }
+codex-utils-string = { workspace = true }
+codex-utils-tokenizer = { workspace = true }
 dirs = { workspace = true }
 dunce = { workspace = true }
 env-flags = { workspace = true }
 eventsource-stream = { workspace = true }
 futures = { workspace = true }
+http = { workspace = true }
 indexmap = { workspace = true }
+keyring = { workspace = true, features = [
+    "apple-native",
+    "crypto-rust",
+    "linux-native-async-persistent",
+    "windows-native",
+] }
 libc = { workspace = true }
 mcp-types = { workspace = true }
 os_info = { workspace = true }
@@ -43,6 +54,7 @@ reqwest = { workspace = true, features = ["json", "stream"] }
 serde = { workspace = true, features = ["derive"] }
 serde_json = { workspace = true }
 sha1 = { workspace = true }
+sha2 = { workspace = true }
 shlex = { workspace = true }
 similar = { workspace = true }
 strum_macros = { workspace = true }
@@ -93,6 +105,7 @@ assert_cmd = { workspace = true }
 assert_matches = { workspace = true }
 core_test_support = { workspace = true }
 escargot = { workspace = true }
+image = { workspace = true, features = ["jpeg", "png"] }
 maplit = { workspace = true }
 predicates = { workspace = true }
 pretty_assertions = { workspace = true }
--- a/codex-rs/core/src/auth.rs
+++ b/codex-rs/core/src/auth.rs
@@ -1,16 +1,12 @@
-use chrono::DateTime;
+mod storage;
+
 use chrono::Utc;
 use serde::Deserialize;
 use serde::Serialize;
 #[cfg(test)]
 use serial_test::serial;
 use std::env;
-use std::fs::File;
-use std::fs::OpenOptions;
-use std::io::Read;
-use std::io::Write;
-#[cfg(unix)]
-use std::os::unix::fs::OpenOptionsExt;
+use std::fmt::Debug;
 use std::path::Path;
 use std::path::PathBuf;
 use std::sync::Arc;
@@ -20,7 +16,12 @@ use std::time::Duration;
 use codex_app_server_protocol::AuthMode;
 use codex_protocol::config_types::ForcedLoginMethod;

+pub use crate::auth::storage::AuthCredentialsStoreMode;
+pub use crate::auth::storage::AuthDotJson;
+use crate::auth::storage::AuthStorageBackend;
+use crate::auth::storage::create_auth_storage;
 use crate::config::Config;
+use crate::default_client::CodexHttpClient;
 use crate::token_data::PlanType;
 use crate::token_data::TokenData;
 use crate::token_data::parse_id_token;
@@ -31,8 +32,8 @@ pub struct CodexAuth {

    pub(crate) api_key: Option<String>,
    pub(crate) auth_dot_json: Arc<Mutex<Option<AuthDotJson>>>,
-    pub(crate) auth_file: PathBuf,
-    pub(crate) client: reqwest::Client,
+    storage: Arc<dyn AuthStorageBackend>,
+    pub(crate) client: CodexHttpClient,
 }

 impl PartialEq for CodexAuth {
@@ -43,6 +44,8 @@ impl PartialEq for CodexAuth {

 impl CodexAuth {
    pub async fn refresh_token(&self) -> Result<String, std::io::Error> {
+        tracing::info!("Refreshing token");
+
        let token_data = self
            .get_current_token_data()
            .ok_or(std::io::Error::other("Token data is not available."))?;
@@ -53,7 +56,7 @@ impl CodexAuth {
            .map_err(std::io::Error::other)?;

        let updated = update_tokens(
-            &self.auth_file,
+            &self.storage,
            refresh_response.id_token,
            refresh_response.access_token,
            refresh_response.refresh_token,
@@ -75,9 +78,12 @@ impl CodexAuth {
        Ok(access)
    }

-    /// Loads the available auth information from the auth.json.
-    pub fn from_codex_home(codex_home: &Path) -> std::io::Result<Option<CodexAuth>> {
-        load_auth(codex_home, false)
+    /// Loads the available auth information from auth storage.
+    pub fn from_auth_storage(
+        codex_home: &Path,
+        auth_credentials_store_mode: AuthCredentialsStoreMode,
+    ) -> std::io::Result<Option<CodexAuth>> {
+        load_auth(codex_home, false, auth_credentials_store_mode)
    }

    pub async fn get_token_data(&self) -> Result<TokenData, std::io::Error> {
@@ -100,7 +106,7 @@ impl CodexAuth {
                    .map_err(std::io::Error::other)?;

                    let updated_auth_dot_json = update_tokens(
-                        &self.auth_file,
+                        &self.storage,
                        refresh_response.id_token,
                        refresh_response.access_token,
                        refresh_response.refresh_token,
@@ -174,17 +180,17 @@ impl CodexAuth {
        Self {
            api_key: None,
            mode: AuthMode::ChatGPT,
-            auth_file: PathBuf::new(),
+            storage: create_auth_storage(PathBuf::new(), AuthCredentialsStoreMode::File),
            auth_dot_json,
            client: crate::default_client::create_client(),
        }
    }

-    fn from_api_key_with_client(api_key: &str, client: reqwest::Client) -> Self {
+    fn from_api_key_with_client(api_key: &str, client: CodexHttpClient) -> Self {
        Self {
            api_key: Some(api_key.to_owned()),
            mode: AuthMode::ApiKey,
-            auth_file: PathBuf::new(),
+            storage: create_auth_storage(PathBuf::new(), AuthCredentialsStoreMode::File),
            auth_dot_json: Arc::new(Mutex::new(None)),
            client,
        }
@@ -212,33 +218,57 @@ pub fn read_codex_api_key_from_env() -> Option<String> {
        .filter(|value| !value.is_empty())
 }

-pub fn get_auth_file(codex_home: &Path) -> PathBuf {
-    codex_home.join("auth.json")
-}
-
 /// Delete the auth.json file inside `codex_home` if it exists. Returns `Ok(true)`
 /// if a file was removed, `Ok(false)` if no auth file was present.
-pub fn logout(codex_home: &Path) -> std::io::Result<bool> {
-    let auth_file = get_auth_file(codex_home);
-    match std::fs::remove_file(&auth_file) {
-        Ok(_) => Ok(true),
-        Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(false),
-        Err(err) => Err(err),
-    }
+pub fn logout(
+    codex_home: &Path,
+    auth_credentials_store_mode: AuthCredentialsStoreMode,
+) -> std::io::Result<bool> {
+    let storage = create_auth_storage(codex_home.to_path_buf(), auth_credentials_store_mode);
+    storage.delete()
 }

 /// Writes an `auth.json` that contains only the API key.
-pub fn login_with_api_key(codex_home: &Path, api_key: &str) -> std::io::Result<()> {
+pub fn login_with_api_key(
+    codex_home: &Path,
+    api_key: &str,
+    auth_credentials_store_mode: AuthCredentialsStoreMode,
+) -> std::io::Result<()> {
    let auth_dot_json = AuthDotJson {
        openai_api_key: Some(api_key.to_string()),
        tokens: None,
        last_refresh: None,
    };
-    write_auth_json(&get_auth_file(codex_home), &auth_dot_json)
+    save_auth(codex_home, &auth_dot_json, auth_credentials_store_mode)
+}
+
+/// Persist the provided auth payload using the specified backend.
+pub fn save_auth(
+    codex_home: &Path,
+    auth: &AuthDotJson,
+    auth_credentials_store_mode: AuthCredentialsStoreMode,
+) -> std::io::Result<()> {
+    let storage = create_auth_storage(codex_home.to_path_buf(), auth_credentials_store_mode);
+    storage.save(auth)
+}
+
+/// Load CLI auth data using the configured credential store backend.
+/// Returns `None` when no credentials are stored.
+pub fn load_auth_dot_json(
+    codex_home: &Path,
+    auth_credentials_store_mode: AuthCredentialsStoreMode,
+) -> std::io::Result<Option<AuthDotJson>> {
+    let storage = create_auth_storage(codex_home.to_path_buf(), auth_credentials_store_mode);
+    storage.load()
 }

 pub async fn enforce_login_restrictions(config: &Config) -> std::io::Result<()> {
-    let Some(auth) = load_auth(&config.codex_home, true)? else {
+    let Some(auth) = load_auth(
+        &config.codex_home,
+        true,
+        config.cli_auth_credentials_store_mode,
+    )?
+    else {
        return Ok(());
    };

@@ -257,7 +287,11 @@ pub async fn enforce_login_restrictions(config: &Config) -> std::io::Result<()>
        };

        if let Some(message) = method_violation {
-            return logout_with_message(&config.codex_home, message);
+            return logout_with_message(
+                &config.codex_home,
+                message,
+                config.cli_auth_credentials_store_mode,
+            );
        }
    }

@@ -274,6 +308,7 @@ pub async fn enforce_login_restrictions(config: &Config) -> std::io::Result<()>
                    format!(
                        "Failed to load ChatGPT credentials while enforcing workspace restrictions: {err}. Logging out."
                    ),
+                    config.cli_auth_credentials_store_mode,
                );
            }
        };
@@ -289,15 +324,23 @@ pub async fn enforce_login_restrictions(config: &Config) -> std::io::Result<()>
                    "Login is restricted to workspace {expected_account_id}, but current credentials lack a workspace identifier. Logging out."
                ),
            };
-            return logout_with_message(&config.codex_home, message);
+            return logout_with_message(
+                &config.codex_home,
+                message,
+                config.cli_auth_credentials_store_mode,
+            );
        }
    }

    Ok(())
 }

-fn logout_with_message(codex_home: &Path, message: String) -> std::io::Result<()> {
-    match logout(codex_home) {
+fn logout_with_message(
+    codex_home: &Path,
+    message: String,
+    auth_credentials_store_mode: AuthCredentialsStoreMode,
+) -> std::io::Result<()> {
+    match logout(codex_home, auth_credentials_store_mode) {
        Ok(_) => Err(std::io::Error::other(message)),
        Err(err) => Err(std::io::Error::other(format!(
            "{message}. Failed to remove auth.json: {err}"
@@ -308,6 +351,7 @@ fn logout_with_message(codex_home: &Path, message: String) -> std::io::Result<()
 fn load_auth(
    codex_home: &Path,
    enable_codex_api_key_env: bool,
+    auth_credentials_store_mode: AuthCredentialsStoreMode,
 ) -> std::io::Result<Option<CodexAuth>> {
    if enable_codex_api_key_env && let Some(api_key) = read_codex_api_key_from_env() {
        let client = crate::default_client::create_client();
@@ -317,12 +361,12 @@ fn load_auth(
        )));
    }

-    let auth_file = get_auth_file(codex_home);
+    let storage = create_auth_storage(codex_home.to_path_buf(), auth_credentials_store_mode);
+
    let client = crate::default_client::create_client();
-    let auth_dot_json = match try_read_auth_json(&auth_file) {
-        Ok(auth) => auth,
-        Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(None),
-        Err(err) => return Err(err),
+    let auth_dot_json = match storage.load()? {
+        Some(auth) => auth,
+        None => return Ok(None),
    };

    let AuthDotJson {
@@ -339,7 +383,7 @@ fn load_auth(
    Ok(Some(CodexAuth {
        api_key: None,
        mode: AuthMode::ChatGPT,
-        auth_file,
+        storage: storage.clone(),
        auth_dot_json: Arc::new(Mutex::new(Some(AuthDotJson {
            openai_api_key: None,
            tokens,
@@ -349,44 +393,20 @@ fn load_auth(
    }))
 }

-/// Attempt to read and refresh the `auth.json` file in the given `CODEX_HOME` directory.
-/// Returns the full AuthDotJson structure after refreshing if necessary.
-pub fn try_read_auth_json(auth_file: &Path) -> std::io::Result<AuthDotJson> {
-    let mut file = File::open(auth_file)?;
-    let mut contents = String::new();
-    file.read_to_string(&mut contents)?;
-    let auth_dot_json: AuthDotJson = serde_json::from_str(&contents)?;
-
-    Ok(auth_dot_json)
-}
-
-pub fn write_auth_json(auth_file: &Path, auth_dot_json: &AuthDotJson) -> std::io::Result<()> {
-    if let Some(parent) = auth_file.parent() {
-        std::fs::create_dir_all(parent)?;
-    }
-    let json_data = serde_json::to_string_pretty(auth_dot_json)?;
-    let mut options = OpenOptions::new();
-    options.truncate(true).write(true).create(true);
-    #[cfg(unix)]
-    {
-        options.mode(0o600);
-    }
-    let mut file = options.open(auth_file)?;
-    file.write_all(json_data.as_bytes())?;
-    file.flush()?;
-    Ok(())
-}
-
 async fn update_tokens(
-    auth_file: &Path,
-    id_token: String,
+    storage: &Arc<dyn AuthStorageBackend>,
+    id_token: Option<String>,
    access_token: Option<String>,
    refresh_token: Option<String>,
 ) -> std::io::Result<AuthDotJson> {
-    let mut auth_dot_json = try_read_auth_json(auth_file)?;
+    let mut auth_dot_json = storage
+        .load()?
+        .ok_or(std::io::Error::other("Token data is not available."))?;

    let tokens = auth_dot_json.tokens.get_or_insert_with(TokenData::default);
-    tokens.id_token = parse_id_token(&id_token).map_err(std::io::Error::other)?;
+    if let Some(id_token) = id_token {
+        tokens.id_token = parse_id_token(&id_token).map_err(std::io::Error::other)?;
+    }
    if let Some(access_token) = access_token {
        tokens.access_token = access_token;
    }
@@ -394,13 +414,13 @@ async fn update_tokens(
        tokens.refresh_token = refresh_token;
    }
    auth_dot_json.last_refresh = Some(Utc::now());
-    write_auth_json(auth_file, &auth_dot_json)?;
+    storage.save(&auth_dot_json)?;
    Ok(auth_dot_json)
 }

 async fn try_refresh_token(
    refresh_token: String,
-    client: &reqwest::Client,
+    client: &CodexHttpClient,
 ) -> std::io::Result<RefreshResponse> {
    let refresh_request = RefreshRequest {
        client_id: CLIENT_ID,
@@ -442,24 +462,11 @@ struct RefreshRequest {

 #[derive(Deserialize, Clone)]
 struct RefreshResponse {
-    id_token: String,
+    id_token: Option<String>,
    access_token: Option<String>,
    refresh_token: Option<String>,
 }

-/// Expected structure for $CODEX_HOME/auth.json.
-#[derive(Deserialize, Serialize, Clone, Debug, PartialEq)]
-pub struct AuthDotJson {
-    #[serde(rename = "OPENAI_API_KEY")]
-    pub openai_api_key: Option<String>,
-
-    #[serde(default, skip_serializing_if = "Option::is_none")]
-    pub tokens: Option<TokenData>,
-
-    #[serde(default, skip_serializing_if = "Option::is_none")]
-    pub last_refresh: Option<DateTime<Utc>>,
-}
-
 // Shared constant for token refresh (client id used for oauth token refresh flow)
 pub const CLIENT_ID: &str = "app_EMoamEEZ73f0CkXaXp7hrann";

@@ -474,12 +481,15 @@ struct CachedAuth {
 #[cfg(test)]
 mod tests {
    use super::*;
+    use crate::auth::storage::FileAuthStorage;
+    use crate::auth::storage::get_auth_file;
    use crate::config::Config;
    use crate::config::ConfigOverrides;
    use crate::config::ConfigToml;
    use crate::token_data::IdTokenInfo;
    use crate::token_data::KnownPlan;
    use crate::token_data::PlanType;
+
    use base64::Engine;
    use codex_protocol::config_types::ForcedLoginMethod;
    use pretty_assertions::assert_eq;
@@ -488,9 +498,9 @@ mod tests {
    use tempfile::tempdir;

    #[tokio::test]
-    async fn roundtrip_auth_dot_json() {
+    async fn refresh_without_id_token() {
        let codex_home = tempdir().unwrap();
-        let _ = write_auth_file(
+        let fake_jwt = write_auth_file(
            AuthFileParams {
                openai_api_key: None,
                chatgpt_plan_type: "pro".to_string(),
@@ -500,12 +510,23 @@ mod tests {
        )
        .expect("failed to write auth file");

-        let file = get_auth_file(codex_home.path());
-        let auth_dot_json = try_read_auth_json(&file).unwrap();
-        write_auth_json(&file, &auth_dot_json).unwrap();
+        let storage = create_auth_storage(
+            codex_home.path().to_path_buf(),
+            AuthCredentialsStoreMode::File,
+        );
+        let updated = super::update_tokens(
+            &storage,
+            None,
+            Some("new-access-token".to_string()),
+            Some("new-refresh-token".to_string()),
+        )
+        .await
+        .expect("update_tokens should succeed");

-        let same_auth_dot_json = try_read_auth_json(&file).unwrap();
-        assert_eq!(auth_dot_json, same_auth_dot_json);
+        let tokens = updated.tokens.expect("tokens should exist");
+        assert_eq!(tokens.id_token.raw_jwt, fake_jwt);
+        assert_eq!(tokens.access_token, "new-access-token");
+        assert_eq!(tokens.refresh_token, "new-refresh-token");
    }

    #[test]
@@ -527,9 +548,13 @@ mod tests {
        )
        .unwrap();

-        super::login_with_api_key(dir.path(), "sk-new").expect("login_with_api_key should succeed");
+        super::login_with_api_key(dir.path(), "sk-new", AuthCredentialsStoreMode::File)
+            .expect("login_with_api_key should succeed");

-        let auth = super::try_read_auth_json(&auth_path).expect("auth.json should parse");
+        let storage = FileAuthStorage::new(dir.path().to_path_buf());
+        let auth = storage
+            .try_read_auth_json(&auth_path)
+            .expect("auth.json should parse");
        assert_eq!(auth.openai_api_key.as_deref(), Some("sk-new"));
        assert!(auth.tokens.is_none(), "tokens should be cleared");
    }
@@ -537,7 +562,8 @@ mod tests {
    #[test]
    fn missing_auth_json_returns_none() {
        let dir = tempdir().unwrap();
-        let auth = CodexAuth::from_codex_home(dir.path()).expect("call should succeed");
+        let auth = CodexAuth::from_auth_storage(dir.path(), AuthCredentialsStoreMode::File)
+            .expect("call should succeed");
        assert_eq!(auth, None);
    }

@@ -559,9 +585,11 @@ mod tests {
            api_key,
            mode,
            auth_dot_json,
-            auth_file: _,
+            storage: _,
            ..
-        } = super::load_auth(codex_home.path(), false).unwrap().unwrap();
+        } = super::load_auth(codex_home.path(), false, AuthCredentialsStoreMode::File)
+            .unwrap()
+            .unwrap();
        assert_eq!(None, api_key);
        assert_eq!(AuthMode::ChatGPT, mode);

@@ -602,7 +630,9 @@ mod tests {
        )
        .unwrap();

-        let auth = super::load_auth(dir.path(), false).unwrap().unwrap();
+        let auth = super::load_auth(dir.path(), false, AuthCredentialsStoreMode::File)
+            .unwrap()
+            .unwrap();
        assert_eq!(auth.mode, AuthMode::ApiKey);
        assert_eq!(auth.api_key, Some("sk-test-key".to_string()));

@@ -617,11 +647,11 @@ mod tests {
            tokens: None,
            last_refresh: None,
        };
-        write_auth_json(&get_auth_file(dir.path()), &auth_dot_json)?;
-        assert!(dir.path().join("auth.json").exists());
-        let removed = logout(dir.path())?;
-        assert!(removed);
-        assert!(!dir.path().join("auth.json").exists());
+        super::save_auth(dir.path(), &auth_dot_json, AuthCredentialsStoreMode::File)?;
+        let auth_file = get_auth_file(dir.path());
+        assert!(auth_file.exists());
+        assert!(logout(dir.path(), AuthCredentialsStoreMode::File)?);
+        assert!(!auth_file.exists());
        Ok(())
    }

@@ -729,7 +759,8 @@ mod tests {
    #[tokio::test]
    async fn enforce_login_restrictions_logs_out_for_method_mismatch() {
        let codex_home = tempdir().unwrap();
-        login_with_api_key(codex_home.path(), "sk-test").expect("seed api key");
+        login_with_api_key(codex_home.path(), "sk-test", AuthCredentialsStoreMode::File)
+            .expect("seed api key");

        let config = build_config(codex_home.path(), Some(ForcedLoginMethod::Chatgpt), None);

@@ -798,7 +829,8 @@ mod tests {
    async fn enforce_login_restrictions_allows_api_key_if_login_method_not_set_but_forced_chatgpt_workspace_id_is_set()
     {
        let codex_home = tempdir().unwrap();
-        login_with_api_key(codex_home.path(), "sk-test").expect("seed api key");
+        login_with_api_key(codex_home.path(), "sk-test", AuthCredentialsStoreMode::File)
+            .expect("seed api key");

        let config = build_config(codex_home.path(), None, Some("org_mine".to_string()));

@@ -842,6 +874,7 @@ pub struct AuthManager {
    codex_home: PathBuf,
    inner: RwLock<CachedAuth>,
    enable_codex_api_key_env: bool,
+    auth_credentials_store_mode: AuthCredentialsStoreMode,
 }

 impl AuthManager {
@@ -849,14 +882,23 @@ impl AuthManager {
    /// preferred auth method. Errors loading auth are swallowed; `auth()` will
    /// simply return `None` in that case so callers can treat it as an
    /// unauthenticated state.
-    pub fn new(codex_home: PathBuf, enable_codex_api_key_env: bool) -> Self {
-        let auth = load_auth(&codex_home, enable_codex_api_key_env)
-            .ok()
-            .flatten();
+    pub fn new(
+        codex_home: PathBuf,
+        enable_codex_api_key_env: bool,
+        auth_credentials_store_mode: AuthCredentialsStoreMode,
+    ) -> Self {
+        let auth = load_auth(
+            &codex_home,
+            enable_codex_api_key_env,
+            auth_credentials_store_mode,
+        )
+        .ok()
+        .flatten();
        Self {
            codex_home,
            inner: RwLock::new(CachedAuth { auth }),
            enable_codex_api_key_env,
+            auth_credentials_store_mode,
        }
    }

@@ -867,6 +909,7 @@ impl AuthManager {
            codex_home: PathBuf::new(),
            inner: RwLock::new(cached),
            enable_codex_api_key_env: false,
+            auth_credentials_store_mode: AuthCredentialsStoreMode::File,
        })
    }

@@ -878,9 +921,13 @@ impl AuthManager {
    /// Force a reload of the auth information from auth.json. Returns
    /// whether the auth value changed.
    pub fn reload(&self) -> bool {
-        let new_auth = load_auth(&self.codex_home, self.enable_codex_api_key_env)
-            .ok()
-            .flatten();
+        let new_auth = load_auth(
+            &self.codex_home,
+            self.enable_codex_api_key_env,
+            self.auth_credentials_store_mode,
+        )
+        .ok()
+        .flatten();
        if let Ok(mut guard) = self.inner.write() {
            let changed = !AuthManager::auths_equal(&guard.auth, &new_auth);
            guard.auth = new_auth;
@@ -899,8 +946,16 @@ impl AuthManager {
    }

    /// Convenience constructor returning an `Arc` wrapper.
-    pub fn shared(codex_home: PathBuf, enable_codex_api_key_env: bool) -> Arc<Self> {
-        Arc::new(Self::new(codex_home, enable_codex_api_key_env))
+    pub fn shared(
+        codex_home: PathBuf,
+        enable_codex_api_key_env: bool,
+        auth_credentials_store_mode: AuthCredentialsStoreMode,
+    ) -> Arc<Self> {
+        Arc::new(Self::new(
+            codex_home,
+            enable_codex_api_key_env,
+            auth_credentials_store_mode,
+        ))
    }

    /// Attempt to refresh the current auth token (if any). On success, reload
@@ -916,7 +971,10 @@ impl AuthManager {
                self.reload();
                Ok(Some(token))
            }
-            Err(e) => Err(e),
+            Err(e) => {
+                tracing::error!("Failed to refresh token: {}", e);
+                Err(e)
+            }
        }
    }

@@ -925,7 +983,7 @@ impl AuthManager {
    /// reloads the in‑memory auth cache so callers immediately observe the
    /// unauthenticated state.
    pub fn logout(&self) -> std::io::Result<bool> {
-        let removed = super::auth::logout(&self.codex_home)?;
+        let removed = super::auth::logout(&self.codex_home, self.auth_credentials_store_mode)?;
        // Always reload to clear any cached auth (even if file absent).
        self.reload();
        Ok(removed)
--- a/codex-rs/core/src/auth/storage.rs
+++ b/codex-rs/core/src/auth/storage.rs
@@ -0,0 +1,672 @@
+use chrono::DateTime;
+use chrono::Utc;
+use serde::Deserialize;
+use serde::Serialize;
+use sha2::Digest;
+use sha2::Sha256;
+use std::fmt::Debug;
+use std::fs::File;
+use std::fs::OpenOptions;
+use std::io::Read;
+use std::io::Write;
+#[cfg(unix)]
+use std::os::unix::fs::OpenOptionsExt;
+use std::path::Path;
+use std::path::PathBuf;
+use std::sync::Arc;
+use tracing::warn;
+
+use crate::token_data::TokenData;
+use codex_keyring_store::DefaultKeyringStore;
+use codex_keyring_store::KeyringStore;
+
+/// Determine where Codex should store CLI auth credentials.
+#[derive(Debug, Default, Copy, Clone, PartialEq, Eq, Serialize, Deserialize)]
+#[serde(rename_all = "lowercase")]
+pub enum AuthCredentialsStoreMode {
+    #[default]
+    /// Persist credentials in CODEX_HOME/auth.json.
+    File,
+    /// Persist credentials in the keyring. Fail if unavailable.
+    Keyring,
+    /// Use keyring when available; otherwise, fall back to a file in CODEX_HOME.
+    Auto,
+}
+
+/// Expected structure for $CODEX_HOME/auth.json.
+#[derive(Deserialize, Serialize, Clone, Debug, PartialEq)]
+pub struct AuthDotJson {
+    #[serde(rename = "OPENAI_API_KEY")]
+    pub openai_api_key: Option<String>,
+
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub tokens: Option<TokenData>,
+
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub last_refresh: Option<DateTime<Utc>>,
+}
+
+pub(super) fn get_auth_file(codex_home: &Path) -> PathBuf {
+    codex_home.join("auth.json")
+}
+
+pub(super) fn delete_file_if_exists(codex_home: &Path) -> std::io::Result<bool> {
+    let auth_file = get_auth_file(codex_home);
+    match std::fs::remove_file(&auth_file) {
+        Ok(()) => Ok(true),
+        Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(false),
+        Err(err) => Err(err),
+    }
+}
+
+pub(super) trait AuthStorageBackend: Debug + Send + Sync {
+    fn load(&self) -> std::io::Result<Option<AuthDotJson>>;
+    fn save(&self, auth: &AuthDotJson) -> std::io::Result<()>;
+    fn delete(&self) -> std::io::Result<bool>;
+}
+
+#[derive(Clone, Debug)]
+pub(super) struct FileAuthStorage {
+    codex_home: PathBuf,
+}
+
+impl FileAuthStorage {
+    pub(super) fn new(codex_home: PathBuf) -> Self {
+        Self { codex_home }
+    }
+
+    /// Attempt to read and refresh the `auth.json` file in the given `CODEX_HOME` directory.
+    /// Returns the full AuthDotJson structure after refreshing if necessary.
+    pub(super) fn try_read_auth_json(&self, auth_file: &Path) -> std::io::Result<AuthDotJson> {
+        let mut file = File::open(auth_file)?;
+        let mut contents = String::new();
+        file.read_to_string(&mut contents)?;
+        let auth_dot_json: AuthDotJson = serde_json::from_str(&contents)?;
+
+        Ok(auth_dot_json)
+    }
+}
+
+impl AuthStorageBackend for FileAuthStorage {
+    fn load(&self) -> std::io::Result<Option<AuthDotJson>> {
+        let auth_file = get_auth_file(&self.codex_home);
+        let auth_dot_json = match self.try_read_auth_json(&auth_file) {
+            Ok(auth) => auth,
+            Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(None),
+            Err(err) => return Err(err),
+        };
+        Ok(Some(auth_dot_json))
+    }
+
+    fn save(&self, auth_dot_json: &AuthDotJson) -> std::io::Result<()> {
+        let auth_file = get_auth_file(&self.codex_home);
+
+        if let Some(parent) = auth_file.parent() {
+            std::fs::create_dir_all(parent)?;
+        }
+        let json_data = serde_json::to_string_pretty(auth_dot_json)?;
+        let mut options = OpenOptions::new();
+        options.truncate(true).write(true).create(true);
+        #[cfg(unix)]
+        {
+            options.mode(0o600);
+        }
+        let mut file = options.open(auth_file)?;
+        file.write_all(json_data.as_bytes())?;
+        file.flush()?;
+        Ok(())
+    }
+
+    fn delete(&self) -> std::io::Result<bool> {
+        delete_file_if_exists(&self.codex_home)
+    }
+}
+
+const KEYRING_SERVICE: &str = "Codex Auth";
+
+// turns codex_home path into a stable, short key string
+fn compute_store_key(codex_home: &Path) -> std::io::Result<String> {
+    let canonical = codex_home
+        .canonicalize()
+        .unwrap_or_else(|_| codex_home.to_path_buf());
+    let path_str = canonical.to_string_lossy();
+    let mut hasher = Sha256::new();
+    hasher.update(path_str.as_bytes());
+    let digest = hasher.finalize();
+    let hex = format!("{digest:x}");
+    let truncated = hex.get(..16).unwrap_or(&hex);
+    Ok(format!("cli|{truncated}"))
+}
+
+#[derive(Clone, Debug)]
+struct KeyringAuthStorage {
+    codex_home: PathBuf,
+    keyring_store: Arc<dyn KeyringStore>,
+}
+
+impl KeyringAuthStorage {
+    fn new(codex_home: PathBuf, keyring_store: Arc<dyn KeyringStore>) -> Self {
+        Self {
+            codex_home,
+            keyring_store,
+        }
+    }
+
+    fn load_from_keyring(&self, key: &str) -> std::io::Result<Option<AuthDotJson>> {
+        match self.keyring_store.load(KEYRING_SERVICE, key) {
+            Ok(Some(serialized)) => serde_json::from_str(&serialized).map(Some).map_err(|err| {
+                std::io::Error::other(format!(
+                    "failed to deserialize CLI auth from keyring: {err}"
+                ))
+            }),
+            Ok(None) => Ok(None),
+            Err(error) => Err(std::io::Error::other(format!(
+                "failed to load CLI auth from keyring: {}",
+                error.message()
+            ))),
+        }
+    }
+
+    fn save_to_keyring(&self, key: &str, value: &str) -> std::io::Result<()> {
+        match self.keyring_store.save(KEYRING_SERVICE, key, value) {
+            Ok(()) => Ok(()),
+            Err(error) => {
+                let message = format!(
+                    "failed to write OAuth tokens to keyring: {}",
+                    error.message()
+                );
+                warn!("{message}");
+                Err(std::io::Error::other(message))
+            }
+        }
+    }
+}
+
+impl AuthStorageBackend for KeyringAuthStorage {
+    fn load(&self) -> std::io::Result<Option<AuthDotJson>> {
+        let key = compute_store_key(&self.codex_home)?;
+        self.load_from_keyring(&key)
+    }
+
+    fn save(&self, auth: &AuthDotJson) -> std::io::Result<()> {
+        let key = compute_store_key(&self.codex_home)?;
+        // Simpler error mapping per style: prefer method reference over closure
+        let serialized = serde_json::to_string(auth).map_err(std::io::Error::other)?;
+        self.save_to_keyring(&key, &serialized)?;
+        if let Err(err) = delete_file_if_exists(&self.codex_home) {
+            warn!("failed to remove CLI auth fallback file: {err}");
+        }
+        Ok(())
+    }
+
+    fn delete(&self) -> std::io::Result<bool> {
+        let key = compute_store_key(&self.codex_home)?;
+        let keyring_removed = self
+            .keyring_store
+            .delete(KEYRING_SERVICE, &key)
+            .map_err(|err| {
+                std::io::Error::other(format!("failed to delete auth from keyring: {err}"))
+            })?;
+        let file_removed = delete_file_if_exists(&self.codex_home)?;
+        Ok(keyring_removed || file_removed)
+    }
+}
+
+#[derive(Clone, Debug)]
+struct AutoAuthStorage {
+    keyring_storage: Arc<KeyringAuthStorage>,
+    file_storage: Arc<FileAuthStorage>,
+}
+
+impl AutoAuthStorage {
+    fn new(codex_home: PathBuf, keyring_store: Arc<dyn KeyringStore>) -> Self {
+        Self {
+            keyring_storage: Arc::new(KeyringAuthStorage::new(codex_home.clone(), keyring_store)),
+            file_storage: Arc::new(FileAuthStorage::new(codex_home)),
+        }
+    }
+}
+
+impl AuthStorageBackend for AutoAuthStorage {
+    fn load(&self) -> std::io::Result<Option<AuthDotJson>> {
+        match self.keyring_storage.load() {
+            Ok(Some(auth)) => Ok(Some(auth)),
+            Ok(None) => self.file_storage.load(),
+            Err(err) => {
+                warn!("failed to load CLI auth from keyring, falling back to file storage: {err}");
+                self.file_storage.load()
+            }
+        }
+    }
+
+    fn save(&self, auth: &AuthDotJson) -> std::io::Result<()> {
+        match self.keyring_storage.save(auth) {
+            Ok(()) => Ok(()),
+            Err(err) => {
+                warn!("failed to save auth to keyring, falling back to file storage: {err}");
+                self.file_storage.save(auth)
+            }
+        }
+    }
+
+    fn delete(&self) -> std::io::Result<bool> {
+        // Keyring storage will delete from disk as well
+        self.keyring_storage.delete()
+    }
+}
+
+pub(super) fn create_auth_storage(
+    codex_home: PathBuf,
+    mode: AuthCredentialsStoreMode,
+) -> Arc<dyn AuthStorageBackend> {
+    let keyring_store: Arc<dyn KeyringStore> = Arc::new(DefaultKeyringStore);
+    create_auth_storage_with_keyring_store(codex_home, mode, keyring_store)
+}
+
+fn create_auth_storage_with_keyring_store(
+    codex_home: PathBuf,
+    mode: AuthCredentialsStoreMode,
+    keyring_store: Arc<dyn KeyringStore>,
+) -> Arc<dyn AuthStorageBackend> {
+    match mode {
+        AuthCredentialsStoreMode::File => Arc::new(FileAuthStorage::new(codex_home)),
+        AuthCredentialsStoreMode::Keyring => {
+            Arc::new(KeyringAuthStorage::new(codex_home, keyring_store))
+        }
+        AuthCredentialsStoreMode::Auto => Arc::new(AutoAuthStorage::new(codex_home, keyring_store)),
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::token_data::IdTokenInfo;
+    use anyhow::Context;
+    use base64::Engine;
+    use pretty_assertions::assert_eq;
+    use serde_json::json;
+    use tempfile::tempdir;
+
+    use codex_keyring_store::tests::MockKeyringStore;
+    use keyring::Error as KeyringError;
+
+    #[tokio::test]
+    async fn file_storage_load_returns_auth_dot_json() -> anyhow::Result<()> {
+        let codex_home = tempdir()?;
+        let storage = FileAuthStorage::new(codex_home.path().to_path_buf());
+        let auth_dot_json = AuthDotJson {
+            openai_api_key: Some("test-key".to_string()),
+            tokens: None,
+            last_refresh: Some(Utc::now()),
+        };
+
+        storage
+            .save(&auth_dot_json)
+            .context("failed to save auth file")?;
+
+        let loaded = storage.load().context("failed to load auth file")?;
+        assert_eq!(Some(auth_dot_json), loaded);
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn file_storage_save_persists_auth_dot_json() -> anyhow::Result<()> {
+        let codex_home = tempdir()?;
+        let storage = FileAuthStorage::new(codex_home.path().to_path_buf());
+        let auth_dot_json = AuthDotJson {
+            openai_api_key: Some("test-key".to_string()),
+            tokens: None,
+            last_refresh: Some(Utc::now()),
+        };
+
+        let file = get_auth_file(codex_home.path());
+        storage
+            .save(&auth_dot_json)
+            .context("failed to save auth file")?;
+
+        let same_auth_dot_json = storage
+            .try_read_auth_json(&file)
+            .context("failed to read auth file after save")?;
+        assert_eq!(auth_dot_json, same_auth_dot_json);
+        Ok(())
+    }
+
+    #[test]
+    fn file_storage_delete_removes_auth_file() -> anyhow::Result<()> {
+        let dir = tempdir()?;
+        let auth_dot_json = AuthDotJson {
+            openai_api_key: Some("sk-test-key".to_string()),
+            tokens: None,
+            last_refresh: None,
+        };
+        let storage = create_auth_storage(dir.path().to_path_buf(), AuthCredentialsStoreMode::File);
+        storage.save(&auth_dot_json)?;
+        assert!(dir.path().join("auth.json").exists());
+        let storage = FileAuthStorage::new(dir.path().to_path_buf());
+        let removed = storage.delete()?;
+        assert!(removed);
+        assert!(!dir.path().join("auth.json").exists());
+        Ok(())
+    }
+
+    fn seed_keyring_and_fallback_auth_file_for_delete<F>(
+        mock_keyring: &MockKeyringStore,
+        codex_home: &Path,
+        compute_key: F,
+    ) -> anyhow::Result<(String, PathBuf)>
+    where
+        F: FnOnce() -> std::io::Result<String>,
+    {
+        let key = compute_key()?;
+        mock_keyring.save(KEYRING_SERVICE, &key, "{}")?;
+        let auth_file = get_auth_file(codex_home);
+        std::fs::write(&auth_file, "stale")?;
+        Ok((key, auth_file))
+    }
+
+    fn seed_keyring_with_auth<F>(
+        mock_keyring: &MockKeyringStore,
+        compute_key: F,
+        auth: &AuthDotJson,
+    ) -> anyhow::Result<()>
+    where
+        F: FnOnce() -> std::io::Result<String>,
+    {
+        let key = compute_key()?;
+        let serialized = serde_json::to_string(auth)?;
+        mock_keyring.save(KEYRING_SERVICE, &key, &serialized)?;
+        Ok(())
+    }
+
+    fn assert_keyring_saved_auth_and_removed_fallback(
+        mock_keyring: &MockKeyringStore,
+        key: &str,
+        codex_home: &Path,
+        expected: &AuthDotJson,
+    ) {
+        let saved_value = mock_keyring
+            .saved_value(key)
+            .expect("keyring entry should exist");
+        let expected_serialized = serde_json::to_string(expected).expect("serialize expected auth");
+        assert_eq!(saved_value, expected_serialized);
+        let auth_file = get_auth_file(codex_home);
+        assert!(
+            !auth_file.exists(),
+            "fallback auth.json should be removed after keyring save"
+        );
+    }
+
+    fn id_token_with_prefix(prefix: &str) -> IdTokenInfo {
+        #[derive(Serialize)]
+        struct Header {
+            alg: &'static str,
+            typ: &'static str,
+        }
+
+        let header = Header {
+            alg: "none",
+            typ: "JWT",
+        };
+        let payload = json!({
+            "email": format!("{prefix}@example.com"),
+            "https://api.openai.com/auth": {
+                "chatgpt_account_id": format!("{prefix}-account"),
+            },
+        });
+        let encode = |bytes: &[u8]| base64::engine::general_purpose::URL_SAFE_NO_PAD.encode(bytes);
+        let header_b64 = encode(&serde_json::to_vec(&header).expect("serialize header"));
+        let payload_b64 = encode(&serde_json::to_vec(&payload).expect("serialize payload"));
+        let signature_b64 = encode(b"sig");
+        let fake_jwt = format!("{header_b64}.{payload_b64}.{signature_b64}");
+
+        crate::token_data::parse_id_token(&fake_jwt).expect("fake JWT should parse")
+    }
+
+    fn auth_with_prefix(prefix: &str) -> AuthDotJson {
+        AuthDotJson {
+            openai_api_key: Some(format!("{prefix}-api-key")),
+            tokens: Some(TokenData {
+                id_token: id_token_with_prefix(prefix),
+                access_token: format!("{prefix}-access"),
+                refresh_token: format!("{prefix}-refresh"),
+                account_id: Some(format!("{prefix}-account-id")),
+            }),
+            last_refresh: None,
+        }
+    }
+
+    #[test]
+    fn keyring_auth_storage_load_returns_deserialized_auth() -> anyhow::Result<()> {
+        let codex_home = tempdir()?;
+        let mock_keyring = MockKeyringStore::default();
+        let storage = KeyringAuthStorage::new(
+            codex_home.path().to_path_buf(),
+            Arc::new(mock_keyring.clone()),
+        );
+        let expected = AuthDotJson {
+            openai_api_key: Some("sk-test".to_string()),
+            tokens: None,
+            last_refresh: None,
+        };
+        seed_keyring_with_auth(
+            &mock_keyring,
+            || compute_store_key(codex_home.path()),
+            &expected,
+        )?;
+
+        let loaded = storage.load()?;
+        assert_eq!(Some(expected), loaded);
+        Ok(())
+    }
+
+    #[test]
+    fn keyring_auth_storage_compute_store_key_for_home_directory() -> anyhow::Result<()> {
+        let codex_home = PathBuf::from("~/.codex");
+
+        let key = compute_store_key(codex_home.as_path())?;
+
+        assert_eq!(key, "cli|940db7b1d0e4eb40");
+        Ok(())
+    }
+
+    #[test]
+    fn keyring_auth_storage_save_persists_and_removes_fallback_file() -> anyhow::Result<()> {
+        let codex_home = tempdir()?;
+        let mock_keyring = MockKeyringStore::default();
+        let storage = KeyringAuthStorage::new(
+            codex_home.path().to_path_buf(),
+            Arc::new(mock_keyring.clone()),
+        );
+        let auth_file = get_auth_file(codex_home.path());
+        std::fs::write(&auth_file, "stale")?;
+        let auth = AuthDotJson {
+            openai_api_key: None,
+            tokens: Some(TokenData {
+                id_token: Default::default(),
+                access_token: "access".to_string(),
+                refresh_token: "refresh".to_string(),
+                account_id: Some("account".to_string()),
+            }),
+            last_refresh: Some(Utc::now()),
+        };
+
+        storage.save(&auth)?;
+
+        let key = compute_store_key(codex_home.path())?;
+        assert_keyring_saved_auth_and_removed_fallback(
+            &mock_keyring,
+            &key,
+            codex_home.path(),
+            &auth,
+        );
+        Ok(())
+    }
+
+    #[test]
+    fn keyring_auth_storage_delete_removes_keyring_and_file() -> anyhow::Result<()> {
+        let codex_home = tempdir()?;
+        let mock_keyring = MockKeyringStore::default();
+        let storage = KeyringAuthStorage::new(
+            codex_home.path().to_path_buf(),
+            Arc::new(mock_keyring.clone()),
+        );
+        let (key, auth_file) = seed_keyring_and_fallback_auth_file_for_delete(
+            &mock_keyring,
+            codex_home.path(),
+            || compute_store_key(codex_home.path()),
+        )?;
+
+        let removed = storage.delete()?;
+
+        assert!(removed, "delete should report removal");
+        assert!(
+            !mock_keyring.contains(&key),
+            "keyring entry should be removed"
+        );
+        assert!(
+            !auth_file.exists(),
+            "fallback auth.json should be removed after keyring delete"
+        );
+        Ok(())
+    }
+
+    #[test]
+    fn auto_auth_storage_load_prefers_keyring_value() -> anyhow::Result<()> {
+        let codex_home = tempdir()?;
+        let mock_keyring = MockKeyringStore::default();
+        let storage = AutoAuthStorage::new(
+            codex_home.path().to_path_buf(),
+            Arc::new(mock_keyring.clone()),
+        );
+        let keyring_auth = auth_with_prefix("keyring");
+        seed_keyring_with_auth(
+            &mock_keyring,
+            || compute_store_key(codex_home.path()),
+            &keyring_auth,
+        )?;
+
+        let file_auth = auth_with_prefix("file");
+        storage.file_storage.save(&file_auth)?;
+
+        let loaded = storage.load()?;
+        assert_eq!(loaded, Some(keyring_auth));
+        Ok(())
+    }
+
+    #[test]
+    fn auto_auth_storage_load_uses_file_when_keyring_empty() -> anyhow::Result<()> {
+        let codex_home = tempdir()?;
+        let mock_keyring = MockKeyringStore::default();
+        let storage = AutoAuthStorage::new(codex_home.path().to_path_buf(), Arc::new(mock_keyring));
+
+        let expected = auth_with_prefix("file-only");
+        storage.file_storage.save(&expected)?;
+
+        let loaded = storage.load()?;
+        assert_eq!(loaded, Some(expected));
+        Ok(())
+    }
+
+    #[test]
+    fn auto_auth_storage_load_falls_back_when_keyring_errors() -> anyhow::Result<()> {
+        let codex_home = tempdir()?;
+        let mock_keyring = MockKeyringStore::default();
+        let storage = AutoAuthStorage::new(
+            codex_home.path().to_path_buf(),
+            Arc::new(mock_keyring.clone()),
+        );
+        let key = compute_store_key(codex_home.path())?;
+        mock_keyring.set_error(&key, KeyringError::Invalid("error".into(), "load".into()));
+
+        let expected = auth_with_prefix("fallback");
+        storage.file_storage.save(&expected)?;
+
+        let loaded = storage.load()?;
+        assert_eq!(loaded, Some(expected));
+        Ok(())
+    }
+
+    #[test]
+    fn auto_auth_storage_save_prefers_keyring() -> anyhow::Result<()> {
+        let codex_home = tempdir()?;
+        let mock_keyring = MockKeyringStore::default();
+        let storage = AutoAuthStorage::new(
+            codex_home.path().to_path_buf(),
+            Arc::new(mock_keyring.clone()),
+        );
+        let key = compute_store_key(codex_home.path())?;
+
+        let stale = auth_with_prefix("stale");
+        storage.file_storage.save(&stale)?;
+
+        let expected = auth_with_prefix("to-save");
+        storage.save(&expected)?;
+
+        assert_keyring_saved_auth_and_removed_fallback(
+            &mock_keyring,
+            &key,
+            codex_home.path(),
+            &expected,
+        );
+        Ok(())
+    }
+
+    #[test]
+    fn auto_auth_storage_save_falls_back_when_keyring_errors() -> anyhow::Result<()> {
+        let codex_home = tempdir()?;
+        let mock_keyring = MockKeyringStore::default();
+        let storage = AutoAuthStorage::new(
+            codex_home.path().to_path_buf(),
+            Arc::new(mock_keyring.clone()),
+        );
+        let key = compute_store_key(codex_home.path())?;
+        mock_keyring.set_error(&key, KeyringError::Invalid("error".into(), "save".into()));
+
+        let auth = auth_with_prefix("fallback");
+        storage.save(&auth)?;
+
+        let auth_file = get_auth_file(codex_home.path());
+        assert!(
+            auth_file.exists(),
+            "fallback auth.json should be created when keyring save fails"
+        );
+        let saved = storage
+            .file_storage
+            .load()?
+            .context("fallback auth should exist")?;
+        assert_eq!(saved, auth);
+        assert!(
+            mock_keyring.saved_value(&key).is_none(),
+            "keyring should not contain value when save fails"
+        );
+        Ok(())
+    }
+
+    #[test]
+    fn auto_auth_storage_delete_removes_keyring_and_file() -> anyhow::Result<()> {
+        let codex_home = tempdir()?;
+        let mock_keyring = MockKeyringStore::default();
+        let storage = AutoAuthStorage::new(
+            codex_home.path().to_path_buf(),
+            Arc::new(mock_keyring.clone()),
+        );
+        let (key, auth_file) = seed_keyring_and_fallback_auth_file_for_delete(
+            &mock_keyring,
+            codex_home.path(),
+            || compute_store_key(codex_home.path()),
+        )?;
+
+        let removed = storage.delete()?;
+
+        assert!(removed, "delete should report removal");
+        assert!(
+            !mock_keyring.contains(&key),
+            "keyring entry should be removed"
+        );
+        assert!(
+            !auth_file.exists(),
+            "fallback auth.json should be removed after delete"
+        );
+        Ok(())
+    }
+}
--- a/codex-rs/core/src/chat_completions.rs
+++ b/codex-rs/core/src/chat_completions.rs
@@ -4,6 +4,7 @@ use crate::ModelProviderInfo;
 use crate::client_common::Prompt;
 use crate::client_common::ResponseEvent;
 use crate::client_common::ResponseStream;
+use crate::default_client::CodexHttpClient;
 use crate::error::CodexErr;
 use crate::error::ConnectionFailedError;
 use crate::error::ResponseStreamFailed;
@@ -16,6 +17,7 @@ use crate::util::backoff;
 use bytes::Bytes;
 use codex_otel::otel_event_manager::OtelEventManager;
 use codex_protocol::models::ContentItem;
+use codex_protocol::models::FunctionCallOutputContentItem;
 use codex_protocol::models::ReasoningItemContent;
 use codex_protocol::models::ResponseItem;
 use eventsource_stream::Eventsource;
@@ -36,7 +38,7 @@ use tracing::trace;
 pub(crate) async fn stream_chat_completions(
    prompt: &Prompt,
    model_family: &ModelFamily,
-    client: &reqwest::Client,
+    client: &CodexHttpClient,
    provider: &ModelProviderInfo,
    otel_event_manager: &OtelEventManager,
 ) -> Result<ResponseStream> {
@@ -75,6 +77,7 @@ pub(crate) async fn stream_chat_completions(
            ResponseItem::CustomToolCall { .. } => {}
            ResponseItem::CustomToolCallOutput { .. } => {}
            ResponseItem::WebSearchCall { .. } => {}
+            ResponseItem::GhostSnapshot { .. } => {}
        }
    }

@@ -157,16 +160,26 @@ pub(crate) async fn stream_chat_completions(
    for (idx, item) in input.iter().enumerate() {
        match item {
            ResponseItem::Message { role, content, .. } => {
+                // Build content either as a plain string (typical for assistant text)
+                // or as an array of content items when images are present (user/tool multimodal).
                let mut text = String::new();
+                let mut items: Vec<serde_json::Value> = Vec::new();
+                let mut saw_image = false;
+
                for c in content {
                    match c {
                        ContentItem::InputText { text: t }
                        | ContentItem::OutputText { text: t } => {
                            text.push_str(t);
+                            items.push(json!({"type":"text","text": t}));
+                        }
+                        ContentItem::InputImage { image_url } => {
+                            saw_image = true;
+                            items.push(json!({"type":"image_url","image_url": {"url": image_url}}));
                        }
-                        _ => {}
                    }
                }
+
                // Skip exact-duplicate assistant messages.
                if role == "assistant" {
                    if let Some(prev) = &last_assistant_text
@@ -177,7 +190,17 @@ pub(crate) async fn stream_chat_completions(
                    last_assistant_text = Some(text.clone());
                }

-                let mut msg = json!({"role": role, "content": text});
+                // For assistant messages, always send a plain string for compatibility.
+                // For user messages, if an image is present, send an array of content items.
+                let content_value = if role == "assistant" {
+                    json!(text)
+                } else if saw_image {
+                    json!(items)
+                } else {
+                    json!(text)
+                };
+
+                let mut msg = json!({"role": role, "content": content_value});
                if role == "assistant"
                    && let Some(reasoning) = reasoning_by_anchor_index.get(&idx)
                    && let Some(obj) = msg.as_object_mut()
@@ -236,10 +259,29 @@ pub(crate) async fn stream_chat_completions(
                messages.push(msg);
            }
            ResponseItem::FunctionCallOutput { call_id, output } => {
+                // Prefer structured content items when available (e.g., images)
+                // otherwise fall back to the legacy plain-string content.
+                let content_value = if let Some(items) = &output.content_items {
+                    let mapped: Vec<serde_json::Value> = items
+                        .iter()
+                        .map(|it| match it {
+                            FunctionCallOutputContentItem::InputText { text } => {
+                                json!({"type":"text","text": text})
+                            }
+                            FunctionCallOutputContentItem::InputImage { image_url } => {
+                                json!({"type":"image_url","image_url": {"url": image_url}})
+                            }
+                        })
+                        .collect();
+                    json!(mapped)
+                } else {
+                    json!(output.content)
+                };
+
                messages.push(json!({
                    "role": "tool",
                    "tool_call_id": call_id,
-                    "content": output.content,
+                    "content": content_value,
                }));
            }
            ResponseItem::CustomToolCall {
@@ -269,6 +311,10 @@ pub(crate) async fn stream_chat_completions(
                    "content": output,
                }));
            }
+            ResponseItem::GhostSnapshot { .. } => {
+                // Ghost snapshots annotate history but are not sent to the model.
+                continue;
+            }
            ResponseItem::Reasoning { .. }
            | ResponseItem::WebSearchCall { .. }
            | ResponseItem::Other => {
--- a/codex-rs/core/src/client.rs
+++ b/codex-rs/core/src/client.rs
@@ -39,6 +39,7 @@ use crate::client_common::ResponsesApiRequest;
 use crate::client_common::create_reasoning_param_for_request;
 use crate::client_common::create_text_param_for_request;
 use crate::config::Config;
+use crate::default_client::CodexHttpClient;
 use crate::default_client::create_client;
 use crate::error::CodexErr;
 use crate::error::ConnectionFailedError;
@@ -81,7 +82,7 @@ pub struct ModelClient {
    config: Arc<Config>,
    auth_manager: Option<Arc<AuthManager>>,
    otel_event_manager: OtelEventManager,
-    client: reqwest::Client,
+    client: CodexHttpClient,
    provider: ModelProviderInfo,
    conversation_id: ConversationId,
    effort: Option<ReasoningEffortConfig>,
@@ -133,6 +134,14 @@ impl ModelClient {
        self.stream_with_task_kind(prompt, TaskKind::Regular).await
    }

+    pub fn config(&self) -> Arc<Config> {
+        Arc::clone(&self.config)
+    }
+
+    pub fn provider(&self) -> &ModelProviderInfo {
+        &self.provider
+    }
+
    pub(crate) async fn stream_with_task_kind(
        &self,
        prompt: &Prompt,
@@ -214,18 +223,14 @@ impl ModelClient {

        let input_with_instructions = prompt.get_formatted_input();

-        let verbosity = match &self.config.model_family.family {
-            family if family == "gpt-5" => self.config.model_verbosity,
-            _ => {
-                if self.config.model_verbosity.is_some() {
-                    warn!(
-                        "model_verbosity is set but ignored for non-gpt-5 model family: {}",
-                        self.config.model_family.family
-                    );
-                }
-
-                None
-            }
+        let verbosity = if self.config.model_family.support_verbosity {
+            self.config.model_verbosity
+        } else {
+            warn!(
+                "model_verbosity is set but ignored as the model does not support verbosity: {}",
+                self.config.model_family.family
+            );
+            None
        };

        // Only include `text.verbosity` for GPT-5 family models
@@ -300,6 +305,7 @@ impl ModelClient {
            "POST to {}: {:?}",
            self.provider.get_full_url(&auth),
            serde_json::to_string(payload_json)
+                .unwrap_or("<unable to serialize payload>".to_string())
        );

        let mut req_builder = self
@@ -309,7 +315,6 @@ impl ModelClient {
            .map_err(StreamAttemptError::Fatal)?;

        req_builder = req_builder
-            .header("OpenAI-Beta", "responses=experimental")
            // Send session_id for compatibility.
            .header("conversation_id", self.conversation_id.to_string())
            .header("session_id", self.conversation_id.to_string())
@@ -335,13 +340,6 @@ impl ModelClient {
                .headers()
                .get("cf-ray")
                .map(|v| v.to_str().unwrap_or_default().to_string());
-
-            debug!(
-                "Response status: {}, cf-ray: {:?}, version: {:?}",
-                resp.status(),
-                request_id,
-                resp.version()
-            );
        }

        match res {
@@ -386,9 +384,14 @@ impl ModelClient {

                if status == StatusCode::UNAUTHORIZED
                    && let Some(manager) = auth_manager.as_ref()
-                    && manager.auth().is_some()
+                    && let Some(auth) = auth.as_ref()
+                    && auth.mode == AuthMode::ChatGPT
                {
-                    let _ = manager.refresh_token().await;
+                    manager.refresh_token().await.map_err(|err| {
+                        StreamAttemptError::Fatal(CodexErr::Fatal(format!(
+                            "Failed to refresh ChatGPT credentials: {err}"
+                        )))
+                    })?;
                }

                // The OpenAI Responses endpoint returns structured JSON bodies even for 4xx/5xx
--- a/codex-rs/core/src/codex.rs
+++ b/codex-rs/core/src/codex.rs
--- a/codex-rs/core/src/codex/compact.rs
+++ b/codex-rs/core/src/codex/compact.rs
@@ -85,7 +85,7 @@ async fn run_compact_task_inner(
    sess.persist_rollout_items(&[rollout_item]).await;

    loop {
-        let turn_input = history.get_history();
+        let turn_input = history.get_history_for_prompt();
        let prompt = Prompt {
            input: turn_input.clone(),
            ..Default::default()
@@ -132,7 +132,7 @@ async fn run_compact_task_inner(
                    let delay = backoff(retries);
                    sess.notify_stream_error(
                        turn_context.as_ref(),
-                        format!("Re-connecting... {retries}/{max_retries}"),
+                        format!("Reconnecting... {retries}/{max_retries}"),
                    )
                    .await;
                    tokio::time::sleep(delay).await;
@@ -148,11 +148,17 @@ async fn run_compact_task_inner(
        }
    }

-    let history_snapshot = sess.history_snapshot().await;
+    let history_snapshot = sess.clone_history().await.get_history();
    let summary_text = get_last_assistant_message_from_turn(&history_snapshot).unwrap_or_default();
    let user_messages = collect_user_messages(&history_snapshot);
    let initial_context = sess.build_initial_context(turn_context.as_ref());
-    let new_history = build_compacted_history(initial_context, &user_messages, &summary_text);
+    let mut new_history = build_compacted_history(initial_context, &user_messages, &summary_text);
+    let ghost_snapshots: Vec<ResponseItem> = history_snapshot
+        .iter()
+        .filter(|item| matches!(item, ResponseItem::GhostSnapshot { .. }))
+        .cloned()
+        .collect();
+    new_history.extend(ghost_snapshots);
    sess.replace_history(new_history).await;

    let rollout_item = RolloutItem::Compacted(CompactedItem {
@@ -200,7 +206,20 @@ pub(crate) fn build_compacted_history(
    user_messages: &[String],
    summary_text: &str,
 ) -> Vec<ResponseItem> {
-    let mut history = initial_context;
+    build_compacted_history_with_limit(
+        initial_context,
+        user_messages,
+        summary_text,
+        COMPACT_USER_MESSAGE_MAX_TOKENS * 4,
+    )
+}
+
+fn build_compacted_history_with_limit(
+    mut history: Vec<ResponseItem>,
+    user_messages: &[String],
+    summary_text: &str,
+    max_bytes: usize,
+) -> Vec<ResponseItem> {
    let mut user_messages_text = if user_messages.is_empty() {
        "(none)".to_string()
    } else {
@@ -208,7 +227,6 @@ pub(crate) fn build_compacted_history(
    };
    // Truncate the concatenated prior user messages so the bridge message
    // stays well under the context window (approx. 4 bytes/token).
-    let max_bytes = COMPACT_USER_MESSAGE_MAX_TOKENS * 4;
    if user_messages_text.len() > max_bytes {
        user_messages_text = truncate_middle(&user_messages_text, max_bytes).0;
    }
@@ -361,11 +379,16 @@ mod tests {

    #[test]
    fn build_compacted_history_truncates_overlong_user_messages() {
-        // Prepare a very large prior user message so the aggregated
-        // `user_messages_text` exceeds the truncation threshold used by
-        // `build_compacted_history` (80k bytes).
-        let big = "X".repeat(200_000);
-        let history = build_compacted_history(Vec::new(), std::slice::from_ref(&big), "SUMMARY");
+        // Use a small truncation limit so the test remains fast while still validating
+        // that oversized user content is truncated.
+        let max_bytes = 128;
+        let big = "X".repeat(max_bytes + 50);
+        let history = super::build_compacted_history_with_limit(
+            Vec::new(),
+            std::slice::from_ref(&big),
+            "SUMMARY",
+            max_bytes,
+        );

        // Expect exactly one bridge message added to history (plus any initial context we provided, which is none).
        assert_eq!(history.len(), 1);
--- a/codex-rs/core/src/codex_conversation.rs
+++ b/codex-rs/core/src/codex_conversation.rs
@@ -3,16 +3,21 @@ use crate::error::Result as CodexResult;
 use crate::protocol::Event;
 use crate::protocol::Op;
 use crate::protocol::Submission;
+use std::path::PathBuf;

 pub struct CodexConversation {
    codex: Codex,
+    rollout_path: PathBuf,
 }

 /// Conduit for the bidirectional stream of messages that compose a conversation
 /// in Codex.
 impl CodexConversation {
-    pub(crate) fn new(codex: Codex) -> Self {
-        Self { codex }
+    pub(crate) fn new(codex: Codex, rollout_path: PathBuf) -> Self {
+        Self {
+            codex,
+            rollout_path,
+        }
    }

    pub async fn submit(&self, op: Op) -> CodexResult<String> {
@@ -27,4 +32,8 @@ impl CodexConversation {
    pub async fn next_event(&self) -> CodexResult<Event> {
        self.codex.next_event().await
    }
+
+    pub fn rollout_path(&self) -> PathBuf {
+        self.rollout_path.clone()
+    }
 }
--- a/codex-rs/core/src/config.rs
+++ b/codex-rs/core/src/config.rs
@@ -1,3 +1,4 @@
+use crate::auth::AuthCredentialsStoreMode;
 use crate::config_loader::LoadedConfigLayers;
 pub use crate::config_loader::load_config_as_toml;
 use crate::config_loader::load_config_layers_with_overrides;
@@ -108,6 +109,10 @@ pub struct Config {
    /// for either of approval_policy or sandbox_mode.
    pub did_user_set_custom_approval_policy_or_sandbox_mode: bool,

+    /// On Windows, indicates that a previously configured workspace-write sandbox
+    /// was coerced to read-only because native auto mode is unsupported.
+    pub forced_auto_mode_downgraded_on_windows: bool,
+
    pub shell_environment_policy: ShellEnvironmentPolicy,

    /// When `true`, `AgentReasoning` events emitted by the backend will be
@@ -156,6 +161,12 @@ pub struct Config {
    /// resolved against this path.
    pub cwd: PathBuf,

+    /// Preferred store for CLI auth credentials.
+    /// file (default): Use a file in the Codex home directory.
+    /// keyring: Use an OS-specific keyring service.
+    /// auto: Use the OS-specific keyring service if available, otherwise use a file.
+    pub cli_auth_credentials_store_mode: AuthCredentialsStoreMode,
+
    /// Definition for MCP servers that Codex can reach out to for tool calls.
    pub mcp_servers: HashMap<String, McpServerConfig>,

@@ -223,6 +234,9 @@ pub struct Config {

    pub tools_web_search_request: bool,

+    /// When `true`, run a model-based assessment for commands denied by the sandbox.
+    pub experimental_sandbox_command_assessment: bool,
+
    pub use_experimental_streamable_shell_tool: bool,

    /// If set to `true`, used only the experimental unified exec tool.
@@ -866,6 +880,13 @@ pub struct ConfigToml {
    #[serde(default)]
    pub forced_login_method: Option<ForcedLoginMethod>,

+    /// Preferred backend for storing CLI auth credentials.
+    /// file (default): Use a file in the Codex home directory.
+    /// keyring: Use an OS-specific keyring service.
+    /// auto: Use the keyring if available, otherwise use a file.
+    #[serde(default)]
+    pub cli_auth_credentials_store: Option<AuthCredentialsStoreMode>,
+
    /// Definition for MCP servers that Codex can reach out to for tool calls.
    #[serde(default)]
    pub mcp_servers: HashMap<String, McpServerConfig>,
@@ -958,6 +979,7 @@ pub struct ConfigToml {
    pub experimental_use_unified_exec_tool: Option<bool>,
    pub experimental_use_rmcp_client: Option<bool>,
    pub experimental_use_freeform_apply_patch: Option<bool>,
+    pub experimental_sandbox_command_assessment: Option<bool>,
 }

 impl From<ConfigToml> for UserSavedConfig {
@@ -1018,14 +1040,22 @@ impl From<ToolsToml> for Tools {
    }
 }

+#[derive(Debug, PartialEq, Eq)]
+pub struct SandboxPolicyResolution {
+    pub policy: SandboxPolicy,
+    pub forced_auto_mode_downgraded_on_windows: bool,
+}
+
 impl ConfigToml {
    /// Derive the effective sandbox policy from the configuration.
    fn derive_sandbox_policy(
        &self,
        sandbox_mode_override: Option<SandboxMode>,
+        profile_sandbox_mode: Option<SandboxMode>,
        resolved_cwd: &Path,
-    ) -> SandboxPolicy {
+    ) -> SandboxPolicyResolution {
        let resolved_sandbox_mode = sandbox_mode_override
+            .or(profile_sandbox_mode)
            .or(self.sandbox_mode)
            .or_else(|| {
                // if no sandbox_mode is set, but user has marked directory as trusted, use WorkspaceWrite
@@ -1038,7 +1068,7 @@ impl ConfigToml {
                })
            })
            .unwrap_or_default();
-        match resolved_sandbox_mode {
+        let mut sandbox_policy = match resolved_sandbox_mode {
            SandboxMode::ReadOnly => SandboxPolicy::new_read_only_policy(),
            SandboxMode::WorkspaceWrite => match self.sandbox_workspace_write.as_ref() {
                Some(SandboxWorkspaceWrite {
@@ -1055,6 +1085,17 @@ impl ConfigToml {
                None => SandboxPolicy::new_workspace_write_policy(),
            },
            SandboxMode::DangerFullAccess => SandboxPolicy::DangerFullAccess,
+        };
+        let mut forced_auto_mode_downgraded_on_windows = false;
+        if cfg!(target_os = "windows")
+            && matches!(resolved_sandbox_mode, SandboxMode::WorkspaceWrite)
+        {
+            sandbox_policy = SandboxPolicy::new_read_only_policy();
+            forced_auto_mode_downgraded_on_windows = true;
+        }
+        SandboxPolicyResolution {
+            policy: sandbox_policy,
+            forced_auto_mode_downgraded_on_windows,
        }
    }

@@ -1118,6 +1159,7 @@ pub struct ConfigOverrides {
    pub include_view_image_tool: Option<bool>,
    pub show_raw_agent_reasoning: Option<bool>,
    pub tools_web_search_request: Option<bool>,
+    pub experimental_sandbox_command_assessment: Option<bool>,
    /// Additional directories that should be treated as writable roots for this session.
    pub additional_writable_roots: Vec<PathBuf>,
 }
@@ -1147,6 +1189,7 @@ impl Config {
            include_view_image_tool: include_view_image_tool_override,
            show_raw_agent_reasoning,
            tools_web_search_request: override_tools_web_search_request,
+            experimental_sandbox_command_assessment: sandbox_command_assessment_override,
            additional_writable_roots,
        } = overrides;

@@ -1172,6 +1215,7 @@ impl Config {
            include_apply_patch_tool: include_apply_patch_tool_override,
            include_view_image_tool: include_view_image_tool_override,
            web_search_request: override_tools_web_search_request,
+            experimental_sandbox_command_assessment: sandbox_command_assessment_override,
        };

        let features = Features::from_config(&cfg, &config_profile, feature_overrides);
@@ -1212,7 +1256,10 @@ impl Config {
            .get_active_project(&resolved_cwd)
            .unwrap_or(ProjectConfig { trust_level: None });

-        let mut sandbox_policy = cfg.derive_sandbox_policy(sandbox_mode, &resolved_cwd);
+        let SandboxPolicyResolution {
+            policy: mut sandbox_policy,
+            forced_auto_mode_downgraded_on_windows,
+        } = cfg.derive_sandbox_policy(sandbox_mode, config_profile.sandbox_mode, &resolved_cwd);
        if let SandboxPolicy::WorkspaceWrite { writable_roots, .. } = &mut sandbox_policy {
            for path in additional_writable_roots {
                if !writable_roots.iter().any(|existing| existing == &path) {
@@ -1235,8 +1282,8 @@ impl Config {
            .is_some()
            || config_profile.approval_policy.is_some()
            || cfg.approval_policy.is_some()
-            // TODO(#3034): profile.sandbox_mode is not implemented
            || sandbox_mode.is_some()
+            || config_profile.sandbox_mode.is_some()
            || cfg.sandbox_mode.is_some();

        let mut model_providers = built_in_model_providers();
@@ -1269,6 +1316,8 @@ impl Config {
        let use_experimental_streamable_shell_tool = features.enabled(Feature::StreamableShell);
        let use_experimental_unified_exec_tool = features.enabled(Feature::UnifiedExec);
        let use_experimental_use_rmcp_client = features.enabled(Feature::RmcpClient);
+        let experimental_sandbox_command_assessment =
+            features.enabled(Feature::SandboxCommandAssessment);

        let forced_chatgpt_workspace_id =
            cfg.forced_chatgpt_workspace_id.as_ref().and_then(|value| {
@@ -1341,10 +1390,14 @@ impl Config {
            approval_policy,
            sandbox_policy,
            did_user_set_custom_approval_policy_or_sandbox_mode,
+            forced_auto_mode_downgraded_on_windows,
            shell_environment_policy,
            notify: cfg.notify,
            user_instructions,
            base_instructions,
+            // The config.toml omits "_mode" because it's a config file. However, "_mode"
+            // is important in code to differentiate the mode from the store implementation.
+            cli_auth_credentials_store_mode: cfg.cli_auth_credentials_store.unwrap_or_default(),
            mcp_servers: cfg.mcp_servers,
            // The config.toml omits "_mode" because it's a config file. However, "_mode"
            // is important in code to differentiate the mode from the store implementation.
@@ -1390,6 +1443,7 @@ impl Config {
            forced_login_method,
            include_apply_patch_tool: include_apply_patch_tool_flag,
            tools_web_search_request,
+            experimental_sandbox_command_assessment,
            use_experimental_streamable_shell_tool,
            use_experimental_unified_exec_tool,
            use_experimental_use_rmcp_client,
@@ -1591,10 +1645,17 @@ network_access = false  # This should be ignored.
        let sandbox_full_access_cfg = toml::from_str::<ConfigToml>(sandbox_full_access)
            .expect("TOML deserialization should succeed");
        let sandbox_mode_override = None;
+        let resolution = sandbox_full_access_cfg.derive_sandbox_policy(
+            sandbox_mode_override,
+            None,
+            &PathBuf::from("/tmp/test"),
+        );
        assert_eq!(
-            SandboxPolicy::DangerFullAccess,
-            sandbox_full_access_cfg
-                .derive_sandbox_policy(sandbox_mode_override, &PathBuf::from("/tmp/test"))
+            resolution,
+            SandboxPolicyResolution {
+                policy: SandboxPolicy::DangerFullAccess,
+                forced_auto_mode_downgraded_on_windows: false,
+            }
        );

        let sandbox_read_only = r#"
@@ -1607,10 +1668,17 @@ network_access = true  # This should be ignored.
        let sandbox_read_only_cfg = toml::from_str::<ConfigToml>(sandbox_read_only)
            .expect("TOML deserialization should succeed");
        let sandbox_mode_override = None;
+        let resolution = sandbox_read_only_cfg.derive_sandbox_policy(
+            sandbox_mode_override,
+            None,
+            &PathBuf::from("/tmp/test"),
+        );
        assert_eq!(
-            SandboxPolicy::ReadOnly,
-            sandbox_read_only_cfg
-                .derive_sandbox_policy(sandbox_mode_override, &PathBuf::from("/tmp/test"))
+            resolution,
+            SandboxPolicyResolution {
+                policy: SandboxPolicy::ReadOnly,
+                forced_auto_mode_downgraded_on_windows: false,
+            }
        );

        let sandbox_workspace_write = r#"
@@ -1627,16 +1695,33 @@ exclude_slash_tmp = true
        let sandbox_workspace_write_cfg = toml::from_str::<ConfigToml>(sandbox_workspace_write)
            .expect("TOML deserialization should succeed");
        let sandbox_mode_override = None;
-        assert_eq!(
-            SandboxPolicy::WorkspaceWrite {
-                writable_roots: vec![PathBuf::from("/my/workspace")],
-                network_access: false,
-                exclude_tmpdir_env_var: true,
-                exclude_slash_tmp: true,
-            },
-            sandbox_workspace_write_cfg
-                .derive_sandbox_policy(sandbox_mode_override, &PathBuf::from("/tmp/test"))
+        let resolution = sandbox_workspace_write_cfg.derive_sandbox_policy(
+            sandbox_mode_override,
+            None,
+            &PathBuf::from("/tmp/test"),
        );
+        if cfg!(target_os = "windows") {
+            assert_eq!(
+                resolution,
+                SandboxPolicyResolution {
+                    policy: SandboxPolicy::ReadOnly,
+                    forced_auto_mode_downgraded_on_windows: true,
+                }
+            );
+        } else {
+            assert_eq!(
+                resolution,
+                SandboxPolicyResolution {
+                    policy: SandboxPolicy::WorkspaceWrite {
+                        writable_roots: vec![PathBuf::from("/my/workspace")],
+                        network_access: false,
+                        exclude_tmpdir_env_var: true,
+                        exclude_slash_tmp: true,
+                    },
+                    forced_auto_mode_downgraded_on_windows: false,
+                }
+            );
+        }

        let sandbox_workspace_write = r#"
 sandbox_mode = "workspace-write"
@@ -1655,16 +1740,33 @@ trust_level = "trusted"
        let sandbox_workspace_write_cfg = toml::from_str::<ConfigToml>(sandbox_workspace_write)
            .expect("TOML deserialization should succeed");
        let sandbox_mode_override = None;
-        assert_eq!(
-            SandboxPolicy::WorkspaceWrite {
-                writable_roots: vec![PathBuf::from("/my/workspace")],
-                network_access: false,
-                exclude_tmpdir_env_var: true,
-                exclude_slash_tmp: true,
-            },
-            sandbox_workspace_write_cfg
-                .derive_sandbox_policy(sandbox_mode_override, &PathBuf::from("/tmp/test"))
+        let resolution = sandbox_workspace_write_cfg.derive_sandbox_policy(
+            sandbox_mode_override,
+            None,
+            &PathBuf::from("/tmp/test"),
        );
+        if cfg!(target_os = "windows") {
+            assert_eq!(
+                resolution,
+                SandboxPolicyResolution {
+                    policy: SandboxPolicy::ReadOnly,
+                    forced_auto_mode_downgraded_on_windows: true,
+                }
+            );
+        } else {
+            assert_eq!(
+                resolution,
+                SandboxPolicyResolution {
+                    policy: SandboxPolicy::WorkspaceWrite {
+                        writable_roots: vec![PathBuf::from("/my/workspace")],
+                        network_access: false,
+                        exclude_tmpdir_env_var: true,
+                        exclude_slash_tmp: true,
+                    },
+                    forced_auto_mode_downgraded_on_windows: false,
+                }
+            );
+        }
    }

    #[test]
@@ -1689,24 +1791,76 @@ trust_level = "trusted"
        )?;

        let expected_backend = canonicalize(&backend).expect("canonicalize backend directory");
-        match config.sandbox_policy {
-            SandboxPolicy::WorkspaceWrite { writable_roots, .. } => {
-                assert_eq!(
-                    writable_roots
-                        .iter()
-                        .filter(|root| **root == expected_backend)
-                        .count(),
-                    1,
-                    "expected single writable root entry for {}",
-                    expected_backend.display()
-                );
+        if cfg!(target_os = "windows") {
+            assert!(
+                config.forced_auto_mode_downgraded_on_windows,
+                "expected workspace-write request to be downgraded on Windows"
+            );
+            match config.sandbox_policy {
+                SandboxPolicy::ReadOnly => {}
+                other => panic!("expected read-only policy on Windows, got {other:?}"),
+            }
+        } else {
+            match config.sandbox_policy {
+                SandboxPolicy::WorkspaceWrite { writable_roots, .. } => {
+                    assert_eq!(
+                        writable_roots
+                            .iter()
+                            .filter(|root| **root == expected_backend)
+                            .count(),
+                        1,
+                        "expected single writable root entry for {}",
+                        expected_backend.display()
+                    );
+                }
+                other => panic!("expected workspace-write policy, got {other:?}"),
            }
-            other => panic!("expected workspace-write policy, got {other:?}"),
        }

        Ok(())
    }

+    #[test]
+    fn config_defaults_to_file_cli_auth_store_mode() -> std::io::Result<()> {
+        let codex_home = TempDir::new()?;
+        let cfg = ConfigToml::default();
+
+        let config = Config::load_from_base_config_with_overrides(
+            cfg,
+            ConfigOverrides::default(),
+            codex_home.path().to_path_buf(),
+        )?;
+
+        assert_eq!(
+            config.cli_auth_credentials_store_mode,
+            AuthCredentialsStoreMode::File,
+        );
+
+        Ok(())
+    }
+
+    #[test]
+    fn config_honors_explicit_keyring_auth_store_mode() -> std::io::Result<()> {
+        let codex_home = TempDir::new()?;
+        let cfg = ConfigToml {
+            cli_auth_credentials_store: Some(AuthCredentialsStoreMode::Keyring),
+            ..Default::default()
+        };
+
+        let config = Config::load_from_base_config_with_overrides(
+            cfg,
+            ConfigOverrides::default(),
+            codex_home.path().to_path_buf(),
+        )?;
+
+        assert_eq!(
+            config.cli_auth_credentials_store_mode,
+            AuthCredentialsStoreMode::Keyring,
+        );
+
+        Ok(())
+    }
+
    #[test]
    fn config_defaults_to_auto_oauth_store_mode() -> std::io::Result<()> {
        let codex_home = TempDir::new()?;
@@ -1755,6 +1909,81 @@ trust_level = "trusted"
        Ok(())
    }

+    #[test]
+    fn profile_sandbox_mode_overrides_base() -> std::io::Result<()> {
+        let codex_home = TempDir::new()?;
+        let mut profiles = HashMap::new();
+        profiles.insert(
+            "work".to_string(),
+            ConfigProfile {
+                sandbox_mode: Some(SandboxMode::DangerFullAccess),
+                ..Default::default()
+            },
+        );
+        let cfg = ConfigToml {
+            profiles,
+            profile: Some("work".to_string()),
+            sandbox_mode: Some(SandboxMode::ReadOnly),
+            ..Default::default()
+        };
+
+        let config = Config::load_from_base_config_with_overrides(
+            cfg,
+            ConfigOverrides::default(),
+            codex_home.path().to_path_buf(),
+        )?;
+
+        assert!(matches!(
+            config.sandbox_policy,
+            SandboxPolicy::DangerFullAccess
+        ));
+        assert!(config.did_user_set_custom_approval_policy_or_sandbox_mode);
+
+        Ok(())
+    }
+
+    #[test]
+    fn cli_override_takes_precedence_over_profile_sandbox_mode() -> std::io::Result<()> {
+        let codex_home = TempDir::new()?;
+        let mut profiles = HashMap::new();
+        profiles.insert(
+            "work".to_string(),
+            ConfigProfile {
+                sandbox_mode: Some(SandboxMode::DangerFullAccess),
+                ..Default::default()
+            },
+        );
+        let cfg = ConfigToml {
+            profiles,
+            profile: Some("work".to_string()),
+            ..Default::default()
+        };
+
+        let overrides = ConfigOverrides {
+            sandbox_mode: Some(SandboxMode::WorkspaceWrite),
+            ..Default::default()
+        };
+
+        let config = Config::load_from_base_config_with_overrides(
+            cfg,
+            overrides,
+            codex_home.path().to_path_buf(),
+        )?;
+
+        if cfg!(target_os = "windows") {
+            assert!(matches!(config.sandbox_policy, SandboxPolicy::ReadOnly));
+            assert!(config.forced_auto_mode_downgraded_on_windows);
+        } else {
+            assert!(matches!(
+                config.sandbox_policy,
+                SandboxPolicy::WorkspaceWrite { .. }
+            ));
+            assert!(!config.forced_auto_mode_downgraded_on_windows);
+        }
+
+        Ok(())
+    }
+
    #[test]
    fn feature_table_overrides_legacy_flags() -> std::io::Result<()> {
        let codex_home = TempDir::new()?;
@@ -2849,10 +3078,12 @@ model_verbosity = "high"
                approval_policy: AskForApproval::Never,
                sandbox_policy: SandboxPolicy::new_read_only_policy(),
                did_user_set_custom_approval_policy_or_sandbox_mode: true,
+                forced_auto_mode_downgraded_on_windows: false,
                shell_environment_policy: ShellEnvironmentPolicy::default(),
                user_instructions: None,
                notify: None,
                cwd: fixture.cwd(),
+                cli_auth_credentials_store_mode: Default::default(),
                mcp_servers: HashMap::new(),
                mcp_oauth_credentials_store_mode: Default::default(),
                model_providers: fixture.model_provider_map.clone(),
@@ -2873,6 +3104,7 @@ model_verbosity = "high"
                forced_login_method: None,
                include_apply_patch_tool: false,
                tools_web_search_request: false,
+                experimental_sandbox_command_assessment: false,
                use_experimental_streamable_shell_tool: false,
                use_experimental_unified_exec_tool: false,
                use_experimental_use_rmcp_client: false,
@@ -2917,10 +3149,12 @@ model_verbosity = "high"
            approval_policy: AskForApproval::UnlessTrusted,
            sandbox_policy: SandboxPolicy::new_read_only_policy(),
            did_user_set_custom_approval_policy_or_sandbox_mode: true,
+            forced_auto_mode_downgraded_on_windows: false,
            shell_environment_policy: ShellEnvironmentPolicy::default(),
            user_instructions: None,
            notify: None,
            cwd: fixture.cwd(),
+            cli_auth_credentials_store_mode: Default::default(),
            mcp_servers: HashMap::new(),
            mcp_oauth_credentials_store_mode: Default::default(),
            model_providers: fixture.model_provider_map.clone(),
@@ -2941,6 +3175,7 @@ model_verbosity = "high"
            forced_login_method: None,
            include_apply_patch_tool: false,
            tools_web_search_request: false,
+            experimental_sandbox_command_assessment: false,
            use_experimental_streamable_shell_tool: false,
            use_experimental_unified_exec_tool: false,
            use_experimental_use_rmcp_client: false,
@@ -3000,10 +3235,12 @@ model_verbosity = "high"
            approval_policy: AskForApproval::OnFailure,
            sandbox_policy: SandboxPolicy::new_read_only_policy(),
            did_user_set_custom_approval_policy_or_sandbox_mode: true,
+            forced_auto_mode_downgraded_on_windows: false,
            shell_environment_policy: ShellEnvironmentPolicy::default(),
            user_instructions: None,
            notify: None,
            cwd: fixture.cwd(),
+            cli_auth_credentials_store_mode: Default::default(),
            mcp_servers: HashMap::new(),
            mcp_oauth_credentials_store_mode: Default::default(),
            model_providers: fixture.model_provider_map.clone(),
@@ -3024,6 +3261,7 @@ model_verbosity = "high"
            forced_login_method: None,
            include_apply_patch_tool: false,
            tools_web_search_request: false,
+            experimental_sandbox_command_assessment: false,
            use_experimental_streamable_shell_tool: false,
            use_experimental_unified_exec_tool: false,
            use_experimental_use_rmcp_client: false,
@@ -3069,10 +3307,12 @@ model_verbosity = "high"
            approval_policy: AskForApproval::OnFailure,
            sandbox_policy: SandboxPolicy::new_read_only_policy(),
            did_user_set_custom_approval_policy_or_sandbox_mode: true,
+            forced_auto_mode_downgraded_on_windows: false,
            shell_environment_policy: ShellEnvironmentPolicy::default(),
            user_instructions: None,
            notify: None,
            cwd: fixture.cwd(),
+            cli_auth_credentials_store_mode: Default::default(),
            mcp_servers: HashMap::new(),
            mcp_oauth_credentials_store_mode: Default::default(),
            model_providers: fixture.model_provider_map.clone(),
@@ -3093,6 +3333,7 @@ model_verbosity = "high"
            forced_login_method: None,
            include_apply_patch_tool: false,
            tools_web_search_request: false,
+            experimental_sandbox_command_assessment: false,
            use_experimental_streamable_shell_tool: false,
            use_experimental_unified_exec_tool: false,
            use_experimental_use_rmcp_client: false,
--- a/codex-rs/core/src/config_profile.rs
+++ b/codex-rs/core/src/config_profile.rs
@@ -4,6 +4,7 @@ use std::path::PathBuf;
 use crate::protocol::AskForApproval;
 use codex_protocol::config_types::ReasoningEffort;
 use codex_protocol::config_types::ReasoningSummary;
+use codex_protocol::config_types::SandboxMode;
 use codex_protocol::config_types::Verbosity;

 /// Collection of common configuration options that a user can define as a unit
@@ -15,6 +16,7 @@ pub struct ConfigProfile {
    /// [`ModelProviderInfo`] to use.
    pub model_provider: Option<String>,
    pub approval_policy: Option<AskForApproval>,
+    pub sandbox_mode: Option<SandboxMode>,
    pub model_reasoning_effort: Option<ReasoningEffort>,
    pub model_reasoning_summary: Option<ReasoningSummary>,
    pub model_verbosity: Option<Verbosity>,
@@ -26,6 +28,7 @@ pub struct ConfigProfile {
    pub experimental_use_exec_command_tool: Option<bool>,
    pub experimental_use_rmcp_client: Option<bool>,
    pub experimental_use_freeform_apply_patch: Option<bool>,
+    pub experimental_sandbox_command_assessment: Option<bool>,
    pub tools_web_search: Option<bool>,
    pub tools_view_image: Option<bool>,
    /// Optional feature toggles scoped to this profile.
--- a/codex-rs/core/src/conversation_history.rs
+++ b/codex-rs/core/src/conversation_history.rs
@@ -1,17 +1,47 @@
+use codex_protocol::models::FunctionCallOutputContentItem;
 use codex_protocol::models::FunctionCallOutputPayload;
 use codex_protocol::models::ResponseItem;
+use codex_protocol::protocol::TokenUsage;
+use codex_protocol::protocol::TokenUsageInfo;
+use codex_utils_string::take_bytes_at_char_boundary;
+use codex_utils_string::take_last_bytes_at_char_boundary;
+use std::ops::Deref;
 use tracing::error;

+// Model-formatting limits: clients get full streams; only content sent to the model is truncated.
+pub(crate) const MODEL_FORMAT_MAX_BYTES: usize = 10 * 1024; // 10 KiB
+pub(crate) const MODEL_FORMAT_MAX_LINES: usize = 256; // lines
+pub(crate) const MODEL_FORMAT_HEAD_LINES: usize = MODEL_FORMAT_MAX_LINES / 2;
+pub(crate) const MODEL_FORMAT_TAIL_LINES: usize = MODEL_FORMAT_MAX_LINES - MODEL_FORMAT_HEAD_LINES; // 128
+pub(crate) const MODEL_FORMAT_HEAD_BYTES: usize = MODEL_FORMAT_MAX_BYTES / 2;
+
 /// Transcript of conversation history
 #[derive(Debug, Clone, Default)]
 pub(crate) struct ConversationHistory {
    /// The oldest items are at the beginning of the vector.
    items: Vec<ResponseItem>,
+    token_info: Option<TokenUsageInfo>,
 }

 impl ConversationHistory {
    pub(crate) fn new() -> Self {
-        Self { items: Vec::new() }
+        Self {
+            items: Vec::new(),
+            token_info: TokenUsageInfo::new_or_append(&None, &None, None),
+        }
+    }
+
+    pub(crate) fn token_info(&self) -> Option<TokenUsageInfo> {
+        self.token_info.clone()
+    }
+
+    pub(crate) fn set_token_usage_full(&mut self, context_window: i64) {
+        match &mut self.token_info {
+            Some(info) => info.fill_to_context_window(context_window),
+            None => {
+                self.token_info = Some(TokenUsageInfo::full_context_window(context_window));
+            }
+        }
    }

    /// `items` is ordered from oldest to newest.
@@ -21,11 +51,14 @@ impl ConversationHistory {
        I::Item: std::ops::Deref<Target = ResponseItem>,
    {
        for item in items {
-            if !is_api_message(&item) {
+            let item_ref = item.deref();
+            let is_ghost_snapshot = matches!(item_ref, ResponseItem::GhostSnapshot { .. });
+            if !is_api_message(item_ref) && !is_ghost_snapshot {
                continue;
            }

-            self.items.push(item.clone());
+            let processed = Self::process_item(&item);
+            self.items.push(processed);
        }
    }

@@ -34,6 +67,15 @@ impl ConversationHistory {
        self.contents()
    }

+    // Returns the history prepared for sending to the model.
+    // With extra response items filtered out and GhostCommits removed.
+    pub(crate) fn get_history_for_prompt(&mut self) -> Vec<ResponseItem> {
+        let mut history = self.get_history();
+        Self::remove_ghost_snapshots(&mut history);
+        Self::remove_reasoning_before_last_turn(&mut history);
+        history
+    }
+
    pub(crate) fn remove_first_item(&mut self) {
        if !self.items.is_empty() {
            // Remove the oldest item (front of the list). Items are ordered from
@@ -46,6 +88,22 @@ impl ConversationHistory {
        }
    }

+    pub(crate) fn replace(&mut self, items: Vec<ResponseItem>) {
+        self.items = items;
+    }
+
+    pub(crate) fn update_token_info(
+        &mut self,
+        usage: &TokenUsage,
+        model_context_window: Option<i64>,
+    ) {
+        self.token_info = TokenUsageInfo::new_or_append(
+            &self.token_info,
+            &Some(usage.clone()),
+            model_context_window,
+        );
+    }
+
    /// This function enforces a couple of invariants on the in-memory history:
    /// 1. every call (function/custom) has a corresponding output entry
    /// 2. every output has a corresponding call entry
@@ -62,6 +120,29 @@ impl ConversationHistory {
        self.items.clone()
    }

+    fn remove_ghost_snapshots(items: &mut Vec<ResponseItem>) {
+        items.retain(|item| !matches!(item, ResponseItem::GhostSnapshot { .. }));
+    }
+
+    fn remove_reasoning_before_last_turn(items: &mut Vec<ResponseItem>) {
+        // Responses API drops reasoning items before the last user message.
+        // Sending them is harmless but can lead to validation errors when switching between API organizations.
+        // https://cookbook.openai.com/examples/responses_api/reasoning_items#caching
+        let Some(last_user_index) = items
+            .iter()
+            // Use last user message as the turn boundary.
+            .rposition(|item| matches!(item, ResponseItem::Message { role, .. } if role == "user"))
+        else {
+            return;
+        };
+        let mut index = 0usize;
+        items.retain(|item| {
+            let keep = index >= last_user_index || !matches!(item, ResponseItem::Reasoning { .. });
+            index += 1;
+            keep
+        });
+    }
+
    fn ensure_call_outputs_present(&mut self) {
        // Collect synthetic outputs to insert immediately after their calls.
        // Store the insertion position (index of call) alongside the item so
@@ -88,7 +169,7 @@ impl ConversationHistory {
                                call_id: call_id.clone(),
                                output: FunctionCallOutputPayload {
                                    content: "aborted".to_string(),
-                                    success: None,
+                                    ..Default::default()
                                },
                            },
                        ));
@@ -135,7 +216,7 @@ impl ConversationHistory {
                                    call_id: call_id.clone(),
                                    output: FunctionCallOutputPayload {
                                        content: "aborted".to_string(),
-                                        success: None,
+                                        ..Default::default()
                                    },
                                },
                            ));
@@ -146,6 +227,7 @@ impl ConversationHistory {
                | ResponseItem::WebSearchCall { .. }
                | ResponseItem::FunctionCallOutput { .. }
                | ResponseItem::CustomToolCallOutput { .. }
+                | ResponseItem::GhostSnapshot { .. }
                | ResponseItem::Other
                | ResponseItem::Message { .. } => {
                    // nothing to do for these variants
@@ -212,6 +294,7 @@ impl ConversationHistory {
                | ResponseItem::LocalShellCall { .. }
                | ResponseItem::Reasoning { .. }
                | ResponseItem::WebSearchCall { .. }
+                | ResponseItem::GhostSnapshot { .. }
                | ResponseItem::Other
                | ResponseItem::Message { .. } => {
                    // nothing to do for these variants
@@ -229,10 +312,6 @@ impl ConversationHistory {
        }
    }

-    pub(crate) fn replace(&mut self, items: Vec<ResponseItem>) {
-        self.items = items;
-    }
-
    /// Removes the corresponding paired item for the provided `item`, if any.
    ///
    /// Pairs:
@@ -301,6 +380,125 @@ impl ConversationHistory {
            self.items.remove(pos);
        }
    }
+
+    fn process_item(item: &ResponseItem) -> ResponseItem {
+        match item {
+            ResponseItem::FunctionCallOutput { call_id, output } => {
+                let truncated = format_output_for_model_body(output.content.as_str());
+                let truncated_items = output.content_items.as_ref().map(|items| {
+                    items
+                        .iter()
+                        .map(|it| match it {
+                            FunctionCallOutputContentItem::InputText { text } => {
+                                FunctionCallOutputContentItem::InputText {
+                                    text: format_output_for_model_body(text),
+                                }
+                            }
+                            FunctionCallOutputContentItem::InputImage { image_url } => {
+                                FunctionCallOutputContentItem::InputImage {
+                                    image_url: image_url.clone(),
+                                }
+                            }
+                        })
+                        .collect()
+                });
+                ResponseItem::FunctionCallOutput {
+                    call_id: call_id.clone(),
+                    output: FunctionCallOutputPayload {
+                        content: truncated,
+                        content_items: truncated_items,
+                        success: output.success,
+                    },
+                }
+            }
+            ResponseItem::CustomToolCallOutput { call_id, output } => {
+                let truncated = format_output_for_model_body(output);
+                ResponseItem::CustomToolCallOutput {
+                    call_id: call_id.clone(),
+                    output: truncated,
+                }
+            }
+            ResponseItem::Message { .. }
+            | ResponseItem::Reasoning { .. }
+            | ResponseItem::LocalShellCall { .. }
+            | ResponseItem::FunctionCall { .. }
+            | ResponseItem::WebSearchCall { .. }
+            | ResponseItem::CustomToolCall { .. }
+            | ResponseItem::GhostSnapshot { .. }
+            | ResponseItem::Other => item.clone(),
+        }
+    }
+}
+
+pub(crate) fn format_output_for_model_body(content: &str) -> String {
+    // Head+tail truncation for the model: show the beginning and end with an elision.
+    // Clients still receive full streams; only this formatted summary is capped.
+    let total_lines = content.lines().count();
+    if content.len() <= MODEL_FORMAT_MAX_BYTES && total_lines <= MODEL_FORMAT_MAX_LINES {
+        return content.to_string();
+    }
+    let output = truncate_formatted_exec_output(content, total_lines);
+    format!("Total output lines: {total_lines}\n\n{output}")
+}
+
+fn truncate_formatted_exec_output(content: &str, total_lines: usize) -> String {
+    let segments: Vec<&str> = content.split_inclusive('\n').collect();
+    let head_take = MODEL_FORMAT_HEAD_LINES.min(segments.len());
+    let tail_take = MODEL_FORMAT_TAIL_LINES.min(segments.len().saturating_sub(head_take));
+    let omitted = segments.len().saturating_sub(head_take + tail_take);
+
+    let head_slice_end: usize = segments
+        .iter()
+        .take(head_take)
+        .map(|segment| segment.len())
+        .sum();
+    let tail_slice_start: usize = if tail_take == 0 {
+        content.len()
+    } else {
+        content.len()
+            - segments
+                .iter()
+                .rev()
+                .take(tail_take)
+                .map(|segment| segment.len())
+                .sum::<usize>()
+    };
+    let head_slice = &content[..head_slice_end];
+    let tail_slice = &content[tail_slice_start..];
+    let truncated_by_bytes = content.len() > MODEL_FORMAT_MAX_BYTES;
+    // this is a bit wrong. We are counting metadata lines and not just shell output lines.
+    let marker = if omitted > 0 {
+        Some(format!(
+            "\n[... omitted {omitted} of {total_lines} lines ...]\n\n"
+        ))
+    } else if truncated_by_bytes {
+        Some(format!(
+            "\n[... output truncated to fit {MODEL_FORMAT_MAX_BYTES} bytes ...]\n\n"
+        ))
+    } else {
+        None
+    };
+
+    let marker_len = marker.as_ref().map_or(0, String::len);
+    let base_head_budget = MODEL_FORMAT_HEAD_BYTES.min(MODEL_FORMAT_MAX_BYTES);
+    let head_budget = base_head_budget.min(MODEL_FORMAT_MAX_BYTES.saturating_sub(marker_len));
+    let head_part = take_bytes_at_char_boundary(head_slice, head_budget);
+    let mut result = String::with_capacity(MODEL_FORMAT_MAX_BYTES.min(content.len()));
+
+    result.push_str(head_part);
+    if let Some(marker_text) = marker.as_ref() {
+        result.push_str(marker_text);
+    }
+
+    let remaining = MODEL_FORMAT_MAX_BYTES.saturating_sub(result.len());
+    if remaining == 0 {
+        return result;
+    }
+
+    let tail_part = take_last_bytes_at_char_boundary(tail_slice, remaining);
+    result.push_str(tail_part);
+
+    result
 }

 #[inline]
@@ -324,6 +522,7 @@ fn is_api_message(message: &ResponseItem) -> bool {
        | ResponseItem::LocalShellCall { .. }
        | ResponseItem::Reasoning { .. }
        | ResponseItem::WebSearchCall { .. } => true,
+        ResponseItem::GhostSnapshot { .. } => false,
        ResponseItem::Other => false,
    }
 }
@@ -331,6 +530,7 @@ fn is_api_message(message: &ResponseItem) -> bool {
 #[cfg(test)]
 mod tests {
    use super::*;
+    use codex_git_tooling::GhostCommit;
    use codex_protocol::models::ContentItem;
    use codex_protocol::models::FunctionCallOutputPayload;
    use codex_protocol::models::LocalShellAction;
@@ -348,6 +548,15 @@ mod tests {
        }
    }

+    fn reasoning(id: &str) -> ResponseItem {
+        ResponseItem::Reasoning {
+            id: id.to_string(),
+            summary: Vec::new(),
+            content: None,
+            encrypted_content: None,
+        }
+    }
+
    fn create_history_with_items(items: Vec<ResponseItem>) -> ConversationHistory {
        let mut h = ConversationHistory::new();
        h.record_items(items.iter());
@@ -404,6 +613,50 @@ mod tests {
        );
    }

+    #[test]
+    fn get_history_drops_reasoning_before_last_user_message() {
+        let mut history = ConversationHistory::new();
+        let items = vec![
+            user_msg("initial"),
+            reasoning("first"),
+            assistant_msg("ack"),
+            user_msg("latest"),
+            reasoning("second"),
+            assistant_msg("ack"),
+            reasoning("third"),
+        ];
+        history.record_items(items.iter());
+
+        let filtered = history.get_history_for_prompt();
+        assert_eq!(
+            filtered,
+            vec![
+                user_msg("initial"),
+                assistant_msg("ack"),
+                user_msg("latest"),
+                reasoning("second"),
+                assistant_msg("ack"),
+                reasoning("third"),
+            ]
+        );
+        let reasoning_count = history
+            .contents()
+            .iter()
+            .filter(|item| matches!(item, ResponseItem::Reasoning { .. }))
+            .count();
+        assert_eq!(reasoning_count, 3);
+    }
+
+    #[test]
+    fn get_history_for_prompt_drops_ghost_commits() {
+        let items = vec![ResponseItem::GhostSnapshot {
+            ghost_commit: GhostCommit::new("ghost-1".to_string(), None, Vec::new(), Vec::new()),
+        }];
+        let mut history = create_history_with_items(items);
+        let filtered = history.get_history_for_prompt();
+        assert_eq!(filtered, vec![]);
+    }
+
    #[test]
    fn remove_first_item_removes_matching_output_for_function_call() {
        let items = vec![
@@ -417,7 +670,7 @@ mod tests {
                call_id: "call-1".to_string(),
                output: FunctionCallOutputPayload {
                    content: "ok".to_string(),
-                    success: None,
+                    ..Default::default()
                },
            },
        ];
@@ -433,7 +686,7 @@ mod tests {
                call_id: "call-2".to_string(),
                output: FunctionCallOutputPayload {
                    content: "ok".to_string(),
-                    success: None,
+                    ..Default::default()
                },
            },
            ResponseItem::FunctionCall {
@@ -467,7 +720,7 @@ mod tests {
                call_id: "call-3".to_string(),
                output: FunctionCallOutputPayload {
                    content: "ok".to_string(),
-                    success: None,
+                    ..Default::default()
                },
            },
        ];
@@ -496,6 +749,184 @@ mod tests {
        assert_eq!(h.contents(), vec![]);
    }

+    #[test]
+    fn record_items_truncates_function_call_output_content() {
+        let mut history = ConversationHistory::new();
+        let long_line = "a very long line to trigger truncation\n";
+        let long_output = long_line.repeat(2_500);
+        let item = ResponseItem::FunctionCallOutput {
+            call_id: "call-100".to_string(),
+            output: FunctionCallOutputPayload {
+                content: long_output.clone(),
+                success: Some(true),
+                ..Default::default()
+            },
+        };
+
+        history.record_items([&item]);
+
+        assert_eq!(history.items.len(), 1);
+        match &history.items[0] {
+            ResponseItem::FunctionCallOutput { output, .. } => {
+                assert_ne!(output.content, long_output);
+                assert!(
+                    output.content.starts_with("Total output lines:"),
+                    "expected truncated summary, got {}",
+                    output.content
+                );
+            }
+            other => panic!("unexpected history item: {other:?}"),
+        }
+    }
+
+    #[test]
+    fn record_items_truncates_custom_tool_call_output_content() {
+        let mut history = ConversationHistory::new();
+        let line = "custom output that is very long\n";
+        let long_output = line.repeat(2_500);
+        let item = ResponseItem::CustomToolCallOutput {
+            call_id: "tool-200".to_string(),
+            output: long_output.clone(),
+        };
+
+        history.record_items([&item]);
+
+        assert_eq!(history.items.len(), 1);
+        match &history.items[0] {
+            ResponseItem::CustomToolCallOutput { output, .. } => {
+                assert_ne!(output, &long_output);
+                assert!(
+                    output.starts_with("Total output lines:"),
+                    "expected truncated summary, got {output}"
+                );
+            }
+            other => panic!("unexpected history item: {other:?}"),
+        }
+    }
+
+    // The following tests were adapted from tools::mod truncation tests to
+    // target the new truncation functions in conversation_history.
+
+    use regex_lite::Regex;
+
+    fn assert_truncated_message_matches(message: &str, line: &str, total_lines: usize) {
+        let pattern = truncated_message_pattern(line, total_lines);
+        let regex = Regex::new(&pattern).unwrap_or_else(|err| {
+            panic!("failed to compile regex {pattern}: {err}");
+        });
+        let captures = regex
+            .captures(message)
+            .unwrap_or_else(|| panic!("message failed to match pattern {pattern}: {message}"));
+        let body = captures
+            .name("body")
+            .expect("missing body capture")
+            .as_str();
+        assert!(
+            body.len() <= MODEL_FORMAT_MAX_BYTES,
+            "body exceeds byte limit: {} bytes",
+            body.len()
+        );
+    }
+
+    fn truncated_message_pattern(line: &str, total_lines: usize) -> String {
+        let head_take = MODEL_FORMAT_HEAD_LINES.min(total_lines);
+        let tail_take = MODEL_FORMAT_TAIL_LINES.min(total_lines.saturating_sub(head_take));
+        let omitted = total_lines.saturating_sub(head_take + tail_take);
+        let escaped_line = regex_lite::escape(line);
+        if omitted == 0 {
+            return format!(
+                r"(?s)^Total output lines: {total_lines}\n\n(?P<body>{escaped_line}.*\n\[\.{{3}} output truncated to fit {MODEL_FORMAT_MAX_BYTES} bytes \.{{3}}]\n\n.*)$",
+            );
+        }
+        format!(
+            r"(?s)^Total output lines: {total_lines}\n\n(?P<body>{escaped_line}.*\n\[\.{{3}} omitted {omitted} of {total_lines} lines \.{{3}}]\n\n.*)$",
+        )
+    }
+
+    #[test]
+    fn format_exec_output_truncates_large_error() {
+        let line = "very long execution error line that should trigger truncation\n";
+        let large_error = line.repeat(2_500); // way beyond both byte and line limits
+
+        let truncated = format_output_for_model_body(&large_error);
+
+        let total_lines = large_error.lines().count();
+        assert_truncated_message_matches(&truncated, line, total_lines);
+        assert_ne!(truncated, large_error);
+    }
+
+    #[test]
+    fn format_exec_output_marks_byte_truncation_without_omitted_lines() {
+        let long_line = "a".repeat(MODEL_FORMAT_MAX_BYTES + 50);
+        let truncated = format_output_for_model_body(&long_line);
+
+        assert_ne!(truncated, long_line);
+        let marker_line =
+            format!("[... output truncated to fit {MODEL_FORMAT_MAX_BYTES} bytes ...]");
+        assert!(
+            truncated.contains(&marker_line),
+            "missing byte truncation marker: {truncated}"
+        );
+        assert!(
+            !truncated.contains("omitted"),
+            "line omission marker should not appear when no lines were dropped: {truncated}"
+        );
+    }
+
+    #[test]
+    fn format_exec_output_returns_original_when_within_limits() {
+        let content = "example output\n".repeat(10);
+
+        assert_eq!(format_output_for_model_body(&content), content);
+    }
+
+    #[test]
+    fn format_exec_output_reports_omitted_lines_and_keeps_head_and_tail() {
+        let total_lines = MODEL_FORMAT_MAX_LINES + 100;
+        let content: String = (0..total_lines)
+            .map(|idx| format!("line-{idx}\n"))
+            .collect();
+
+        let truncated = format_output_for_model_body(&content);
+        let omitted = total_lines - MODEL_FORMAT_MAX_LINES;
+        let expected_marker = format!("[... omitted {omitted} of {total_lines} lines ...]");
+
+        assert!(
+            truncated.contains(&expected_marker),
+            "missing omitted marker: {truncated}"
+        );
+        assert!(
+            truncated.contains("line-0\n"),
+            "expected head line to remain: {truncated}"
+        );
+
+        let last_line = format!("line-{}\n", total_lines - 1);
+        assert!(
+            truncated.contains(&last_line),
+            "expected tail line to remain: {truncated}"
+        );
+    }
+
+    #[test]
+    fn format_exec_output_prefers_line_marker_when_both_limits_exceeded() {
+        let total_lines = MODEL_FORMAT_MAX_LINES + 42;
+        let long_line = "x".repeat(256);
+        let content: String = (0..total_lines)
+            .map(|idx| format!("line-{idx}-{long_line}\n"))
+            .collect();
+
+        let truncated = format_output_for_model_body(&content);
+
+        assert!(
+            truncated.contains("[... omitted 42 of 298 lines ...]"),
+            "expected omitted marker when line count exceeds limit: {truncated}"
+        );
+        assert!(
+            !truncated.contains("output truncated to fit"),
+            "line omission marker should take precedence over byte marker: {truncated}"
+        );
+    }
+
    //TODO(aibrahim): run CI in release mode.
    #[cfg(not(debug_assertions))]
    #[test]
@@ -523,7 +954,7 @@ mod tests {
                    call_id: "call-x".to_string(),
                    output: FunctionCallOutputPayload {
                        content: "aborted".to_string(),
-                        success: None,
+                        ..Default::default()
                    },
                },
            ]
@@ -600,7 +1031,7 @@ mod tests {
                    call_id: "shell-1".to_string(),
                    output: FunctionCallOutputPayload {
                        content: "aborted".to_string(),
-                        success: None,
+                        ..Default::default()
                    },
                },
            ]
@@ -614,7 +1045,7 @@ mod tests {
            call_id: "orphan-1".to_string(),
            output: FunctionCallOutputPayload {
                content: "ok".to_string(),
-                success: None,
+                ..Default::default()
            },
        }];
        let mut h = create_history_with_items(items);
@@ -654,7 +1085,7 @@ mod tests {
                call_id: "c2".to_string(),
                output: FunctionCallOutputPayload {
                    content: "ok".to_string(),
-                    success: None,
+                    ..Default::default()
                },
            },
            // Will get an inserted custom tool output
@@ -696,7 +1127,7 @@ mod tests {
                    call_id: "c1".to_string(),
                    output: FunctionCallOutputPayload {
                        content: "aborted".to_string(),
-                        success: None,
+                        ..Default::default()
                    },
                },
                ResponseItem::CustomToolCall {
@@ -726,7 +1157,7 @@ mod tests {
                    call_id: "s1".to_string(),
                    output: FunctionCallOutputPayload {
                        content: "aborted".to_string(),
-                        success: None,
+                        ..Default::default()
                    },
                },
            ]
@@ -791,7 +1222,7 @@ mod tests {
            call_id: "orphan-1".to_string(),
            output: FunctionCallOutputPayload {
                content: "ok".to_string(),
-                success: None,
+                ..Default::default()
            },
        }];
        let mut h = create_history_with_items(items);
@@ -825,7 +1256,7 @@ mod tests {
                call_id: "c2".to_string(),
                output: FunctionCallOutputPayload {
                    content: "ok".to_string(),
-                    success: None,
+                    ..Default::default()
                },
            },
            ResponseItem::CustomToolCall {
--- a/codex-rs/core/src/conversation_manager.rs
+++ b/codex-rs/core/src/conversation_manager.rs
@@ -98,7 +98,10 @@ impl ConversationManager {
            }
        };

-        let conversation = Arc::new(CodexConversation::new(codex));
+        let conversation = Arc::new(CodexConversation::new(
+            codex,
+            session_configured.rollout_path.clone(),
+        ));
        self.conversations
            .write()
            .await
--- a/codex-rs/core/src/default_client.rs
+++ b/codex-rs/core/src/default_client.rs
@@ -1,5 +1,13 @@
 use crate::spawn::CODEX_SANDBOX_ENV_VAR;
+use http::Error as HttpError;
+use reqwest::IntoUrl;
+use reqwest::Method;
+use reqwest::Response;
+use reqwest::header::HeaderName;
 use reqwest::header::HeaderValue;
+use serde::Serialize;
+use std::collections::HashMap;
+use std::fmt::Display;
 use std::sync::LazyLock;
 use std::sync::Mutex;
 use std::sync::OnceLock;
@@ -22,6 +30,130 @@ use std::sync::OnceLock;
 pub static USER_AGENT_SUFFIX: LazyLock<Mutex<Option<String>>> = LazyLock::new(|| Mutex::new(None));
 pub const DEFAULT_ORIGINATOR: &str = "codex_cli_rs";
 pub const CODEX_INTERNAL_ORIGINATOR_OVERRIDE_ENV_VAR: &str = "CODEX_INTERNAL_ORIGINATOR_OVERRIDE";
+
+#[derive(Clone, Debug)]
+pub struct CodexHttpClient {
+    inner: reqwest::Client,
+}
+
+impl CodexHttpClient {
+    fn new(inner: reqwest::Client) -> Self {
+        Self { inner }
+    }
+
+    pub fn get<U>(&self, url: U) -> CodexRequestBuilder
+    where
+        U: IntoUrl,
+    {
+        self.request(Method::GET, url)
+    }
+
+    pub fn post<U>(&self, url: U) -> CodexRequestBuilder
+    where
+        U: IntoUrl,
+    {
+        self.request(Method::POST, url)
+    }
+
+    pub fn request<U>(&self, method: Method, url: U) -> CodexRequestBuilder
+    where
+        U: IntoUrl,
+    {
+        let url_str = url.as_str().to_string();
+        CodexRequestBuilder::new(self.inner.request(method.clone(), url), method, url_str)
+    }
+}
+
+#[must_use = "requests are not sent unless `send` is awaited"]
+#[derive(Debug)]
+pub struct CodexRequestBuilder {
+    builder: reqwest::RequestBuilder,
+    method: Method,
+    url: String,
+}
+
+impl CodexRequestBuilder {
+    fn new(builder: reqwest::RequestBuilder, method: Method, url: String) -> Self {
+        Self {
+            builder,
+            method,
+            url,
+        }
+    }
+
+    fn map(self, f: impl FnOnce(reqwest::RequestBuilder) -> reqwest::RequestBuilder) -> Self {
+        Self {
+            builder: f(self.builder),
+            method: self.method,
+            url: self.url,
+        }
+    }
+
+    pub fn header<K, V>(self, key: K, value: V) -> Self
+    where
+        HeaderName: TryFrom<K>,
+        <HeaderName as TryFrom<K>>::Error: Into<HttpError>,
+        HeaderValue: TryFrom<V>,
+        <HeaderValue as TryFrom<V>>::Error: Into<HttpError>,
+    {
+        self.map(|builder| builder.header(key, value))
+    }
+
+    pub fn bearer_auth<T>(self, token: T) -> Self
+    where
+        T: Display,
+    {
+        self.map(|builder| builder.bearer_auth(token))
+    }
+
+    pub fn json<T>(self, value: &T) -> Self
+    where
+        T: ?Sized + Serialize,
+    {
+        self.map(|builder| builder.json(value))
+    }
+
+    pub async fn send(self) -> Result<Response, reqwest::Error> {
+        match self.builder.send().await {
+            Ok(response) => {
+                let request_ids = Self::extract_request_ids(&response);
+                tracing::debug!(
+                    method = %self.method,
+                    url = %self.url,
+                    status = %response.status(),
+                    request_ids = ?request_ids,
+                    version = ?response.version(),
+                    "Request completed"
+                );
+
+                Ok(response)
+            }
+            Err(error) => {
+                let status = error.status();
+                tracing::debug!(
+                    method = %self.method,
+                    url = %self.url,
+                    status = status.map(|s| s.as_u16()),
+                    error = %error,
+                    "Request failed"
+                );
+                Err(error)
+            }
+        }
+    }
+
+    fn extract_request_ids(response: &Response) -> HashMap<String, String> {
+        ["cf-ray", "x-request-id", "x-oai-request-id"]
+            .iter()
+            .filter_map(|&name| {
+                let header_name = HeaderName::from_static(name);
+                let value = response.headers().get(header_name)?;
+                let value = value.to_str().ok()?.to_owned();
+                Some((name.to_owned(), value))
+            })
+            .collect()
+    }
+}
 #[derive(Debug, Clone)]
 pub struct Originator {
    pub value: String,
@@ -124,8 +256,8 @@ fn sanitize_user_agent(candidate: String, fallback: &str) -> String {
    }
 }

-/// Create a reqwest client with default `originator` and `User-Agent` headers set.
-pub fn create_client() -> reqwest::Client {
+/// Create an HTTP client with default `originator` and `User-Agent` headers set.
+pub fn create_client() -> CodexHttpClient {
    use reqwest::header::HeaderMap;

    let mut headers = HeaderMap::new();
@@ -140,7 +272,8 @@ pub fn create_client() -> reqwest::Client {
        builder = builder.no_proxy();
    }

-    builder.build().unwrap_or_else(|_| reqwest::Client::new())
+    let inner = builder.build().unwrap_or_else(|_| reqwest::Client::new());
+    CodexHttpClient::new(inner)
 }

 fn is_sandboxed() -> bool {
--- a/codex-rs/core/src/error.rs
+++ b/codex-rs/core/src/error.rs
@@ -1,3 +1,4 @@
+use crate::codex::ProcessedResponseItem;
 use crate::exec::ExecToolCallOutput;
 use crate::token_data::KnownPlan;
 use crate::token_data::PlanType;
@@ -53,8 +54,11 @@ pub enum SandboxErr {

 #[derive(Error, Debug)]
 pub enum CodexErr {
-    #[error("turn aborted")]
-    TurnAborted,
+    // todo(aibrahim): git rid of this error carrying the dangling artifacts
+    #[error("turn aborted. Something went wrong? Hit `/feedback` to report the issue.")]
+    TurnAborted {
+        dangling_artifacts: Vec<ProcessedResponseItem>,
+    },

    /// Returned by ResponsesClient when the SSE stream disconnects or errors out **after** the HTTP
    /// handshake has succeeded but **before** it finished emitting `response.completed`.
@@ -87,7 +91,7 @@ pub enum CodexErr {

    /// Returned by run_command_stream when the user pressed Ctrl‑C (SIGINT). Session uses this to
    /// surface a polite FunctionCallOutput back to the model instead of crashing the CLI.
-    #[error("interrupted (Ctrl-C)")]
+    #[error("interrupted (Ctrl-C). Something went wrong? Hit `/feedback` to report the issue.")]
    Interrupted,

    /// Unexpected HTTP status code.
@@ -158,7 +162,9 @@ pub enum CodexErr {

 impl From<CancelErr> for CodexErr {
    fn from(_: CancelErr) -> Self {
-        CodexErr::TurnAborted
+        CodexErr::TurnAborted {
+            dangling_artifacts: Vec::new(),
+        }
    }
 }

--- a/codex-rs/core/src/features.rs
+++ b/codex-rs/core/src/features.rs
@@ -39,6 +39,10 @@ pub enum Feature {
    ViewImageTool,
    /// Allow the model to request web searches.
    WebSearchRequest,
+    /// Enable the model-based risk assessments for sandboxed commands.
+    SandboxCommandAssessment,
+    /// Create a ghost commit at each turn.
+    GhostCommit,
 }

 impl Feature {
@@ -73,6 +77,7 @@ pub struct FeatureOverrides {
    pub include_apply_patch_tool: Option<bool>,
    pub include_view_image_tool: Option<bool>,
    pub web_search_request: Option<bool>,
+    pub experimental_sandbox_command_assessment: Option<bool>,
 }

 impl FeatureOverrides {
@@ -137,6 +142,7 @@ impl Features {
        let mut features = Features::with_defaults();

        let base_legacy = LegacyFeatureToggles {
+            experimental_sandbox_command_assessment: cfg.experimental_sandbox_command_assessment,
            experimental_use_freeform_apply_patch: cfg.experimental_use_freeform_apply_patch,
            experimental_use_exec_command_tool: cfg.experimental_use_exec_command_tool,
            experimental_use_unified_exec_tool: cfg.experimental_use_unified_exec_tool,
@@ -154,6 +160,8 @@ impl Features {
        let profile_legacy = LegacyFeatureToggles {
            include_apply_patch_tool: config_profile.include_apply_patch_tool,
            include_view_image_tool: config_profile.include_view_image_tool,
+            experimental_sandbox_command_assessment: config_profile
+                .experimental_sandbox_command_assessment,
            experimental_use_freeform_apply_patch: config_profile
                .experimental_use_freeform_apply_patch,
            experimental_use_exec_command_tool: config_profile.experimental_use_exec_command_tool,
@@ -183,6 +191,11 @@ fn feature_for_key(key: &str) -> Option<Feature> {
    legacy::feature_for_key(key)
 }

+/// Returns `true` if the provided string matches a known feature toggle key.
+pub fn is_known_feature_key(key: &str) -> bool {
+    feature_for_key(key).is_some()
+}
+
 /// Deserializable features table for TOML.
 #[derive(Deserialize, Debug, Clone, Default, PartialEq)]
 pub struct FeaturesToml {
@@ -236,4 +249,16 @@ pub const FEATURES: &[FeatureSpec] = &[
        stage: Stage::Stable,
        default_enabled: false,
    },
+    FeatureSpec {
+        id: Feature::SandboxCommandAssessment,
+        key: "experimental_sandbox_command_assessment",
+        stage: Stage::Experimental,
+        default_enabled: false,
+    },
+    FeatureSpec {
+        id: Feature::GhostCommit,
+        key: "ghost_commit",
+        stage: Stage::Experimental,
+        default_enabled: false,
+    },
 ];
--- a/codex-rs/core/src/features/legacy.rs
+++ b/codex-rs/core/src/features/legacy.rs
@@ -9,6 +9,10 @@ struct Alias {
 }

 const ALIASES: &[Alias] = &[
+    Alias {
+        legacy_key: "experimental_sandbox_command_assessment",
+        feature: Feature::SandboxCommandAssessment,
+    },
    Alias {
        legacy_key: "experimental_use_unified_exec_tool",
        feature: Feature::UnifiedExec,
@@ -53,6 +57,7 @@ pub(crate) fn feature_for_key(key: &str) -> Option<Feature> {
 pub struct LegacyFeatureToggles {
    pub include_apply_patch_tool: Option<bool>,
    pub include_view_image_tool: Option<bool>,
+    pub experimental_sandbox_command_assessment: Option<bool>,
    pub experimental_use_freeform_apply_patch: Option<bool>,
    pub experimental_use_exec_command_tool: Option<bool>,
    pub experimental_use_unified_exec_tool: Option<bool>,
@@ -69,6 +74,12 @@ impl LegacyFeatureToggles {
            self.include_apply_patch_tool,
            "include_apply_patch_tool",
        );
+        set_if_some(
+            features,
+            Feature::SandboxCommandAssessment,
+            self.experimental_sandbox_command_assessment,
+            "experimental_sandbox_command_assessment",
+        );
        set_if_some(
            features,
            Feature::ApplyPatchFreeform,
--- a/codex-rs/core/src/git_info.rs
+++ b/codex-rs/core/src/git_info.rs
@@ -260,6 +260,16 @@ async fn get_default_branch(cwd: &Path) -> Option<String> {
    get_default_branch_local(cwd).await
 }

+/// Determine the repository's default branch name, if available.
+///
+/// This inspects remote configuration first (including the symbolic `HEAD`
+/// reference) and falls back to common local defaults such as `main` or
+/// `master`. Returns `None` when the information cannot be determined, for
+/// example when the current directory is not inside a Git repository.
+pub async fn default_branch_name(cwd: &Path) -> Option<String> {
+    get_default_branch(cwd).await
+}
+
 /// Attempt to determine the repository's default branch name from local branches.
 async fn get_default_branch_local(cwd: &Path) -> Option<String> {
    for candidate in ["main", "master"] {
--- a/codex-rs/core/src/lib.rs
+++ b/codex-rs/core/src/lib.rs
@@ -36,6 +36,7 @@ mod mcp_tool_call;
 mod message_history;
 mod model_provider_info;
 pub mod parse_command;
+mod response_processing;
 pub mod sandboxing;
 pub mod token_data;
 mod truncate;
@@ -76,6 +77,7 @@ pub use rollout::find_conversation_path_by_id_str;
 pub use rollout::list::ConversationItem;
 pub use rollout::list::ConversationsPage;
 pub use rollout::list::Cursor;
+pub use rollout::list::read_head_for_summary;
 mod function_tool;
 mod state;
 mod tasks;
--- a/codex-rs/core/src/mcp_connection_manager.rs
+++ b/codex-rs/core/src/mcp_connection_manager.rs
@@ -49,7 +49,7 @@ const MCP_TOOL_NAME_DELIMITER: &str = "__";
 const MAX_TOOL_NAME_LENGTH: usize = 64;

 /// Default timeout for initializing MCP server & initially listing tools.
-const DEFAULT_STARTUP_TIMEOUT: Duration = Duration::from_secs(10);
+pub const DEFAULT_STARTUP_TIMEOUT: Duration = Duration::from_secs(10);

 /// Default timeout for individual tool calls.
 const DEFAULT_TOOL_TIMEOUT: Duration = Duration::from_secs(60);
--- a/codex-rs/core/src/mcp_tool_call.rs
+++ b/codex-rs/core/src/mcp_tool_call.rs
@@ -35,6 +35,7 @@ pub(crate) async fn handle_mcp_tool_call(
                    output: FunctionCallOutputPayload {
                        content: format!("err: {e}"),
                        success: Some(false),
+                        ..Default::default()
                    },
                };
            }
--- a/codex-rs/core/src/model_family.rs
+++ b/codex-rs/core/src/model_family.rs
@@ -54,6 +54,9 @@ pub struct ModelFamily {
    /// This is applied when computing the effective context window seen by
    /// consumers.
    pub effective_context_window_percent: i64,
+
+    /// If the model family supports setting the verbosity level when using Responses API.
+    pub support_verbosity: bool,
 }

 macro_rules! model_family {
@@ -73,6 +76,7 @@ macro_rules! model_family {
            base_instructions: BASE_INSTRUCTIONS.to_string(),
            experimental_supported_tools: Vec::new(),
            effective_context_window_percent: 95,
+            support_verbosity: false,
        };
        // apply overrides
        $(
@@ -128,10 +132,11 @@ pub fn find_family_for_model(slug: &str) -> Option<ModelFamily> {
                "test_sync_tool".to_string(),
            ],
            supports_parallel_tool_calls: true,
+            support_verbosity: true,
        )

    // Internal models.
-    } else if slug.starts_with("codex-") {
+    } else if slug.starts_with("codex-exp-") {
        model_family!(
            slug, slug,
            supports_reasoning_summaries: true,
@@ -144,22 +149,25 @@ pub fn find_family_for_model(slug: &str) -> Option<ModelFamily> {
                "read_file".to_string(),
            ],
            supports_parallel_tool_calls: true,
+            support_verbosity: true,
        )

    // Production models.
-    } else if slug.starts_with("gpt-5-codex") {
+    } else if slug.starts_with("gpt-5-codex") || slug.starts_with("codex-") {
        model_family!(
            slug, slug,
            supports_reasoning_summaries: true,
            reasoning_summary_format: ReasoningSummaryFormat::Experimental,
            base_instructions: GPT_5_CODEX_INSTRUCTIONS.to_string(),
            apply_patch_tool_type: Some(ApplyPatchToolType::Freeform),
+            support_verbosity: true,
        )
    } else if slug.starts_with("gpt-5") {
        model_family!(
            slug, "gpt-5",
            supports_reasoning_summaries: true,
            needs_special_apply_patch_instructions: true,
+            support_verbosity: true,
        )
    } else {
        None
@@ -179,5 +187,6 @@ pub fn derive_default_model_family(model: &str) -> ModelFamily {
        base_instructions: BASE_INSTRUCTIONS.to_string(),
        experimental_supported_tools: Vec::new(),
        effective_context_window_percent: 95,
+        support_verbosity: false,
    }
 }
--- a/codex-rs/core/src/model_provider_info.rs
+++ b/codex-rs/core/src/model_provider_info.rs
@@ -6,6 +6,8 @@
 //!      key. These override or extend the defaults at runtime.

 use crate::CodexAuth;
+use crate::default_client::CodexHttpClient;
+use crate::default_client::CodexRequestBuilder;
 use codex_app_server_protocol::AuthMode;
 use serde::Deserialize;
 use serde::Serialize;
@@ -95,7 +97,7 @@ pub struct ModelProviderInfo {

 impl ModelProviderInfo {
    /// Construct a `POST` RequestBuilder for the given URL using the provided
-    /// reqwest Client applying:
+    /// [`CodexHttpClient`] applying:
    ///   • provider-specific headers (static + env based)
    ///   • Bearer auth header when an API key is available.
    ///   • Auth token for OAuth.
@@ -104,9 +106,9 @@ impl ModelProviderInfo {
    /// one produced by [`ModelProviderInfo::api_key`].
    pub async fn create_request_builder<'a>(
        &'a self,
-        client: &'a reqwest::Client,
+        client: &'a CodexHttpClient,
        auth: &Option<CodexAuth>,
-    ) -> crate::error::Result<reqwest::RequestBuilder> {
+    ) -> crate::error::Result<CodexRequestBuilder> {
        let effective_auth = if let Some(secret_key) = &self.experimental_bearer_token {
            Some(CodexAuth::from_api_key(secret_key))
        } else {
@@ -187,9 +189,9 @@ impl ModelProviderInfo {
    }

    /// Apply provider-specific HTTP headers (both static and environment-based)
-    /// onto an existing `reqwest::RequestBuilder` and return the updated
+    /// onto an existing [`CodexRequestBuilder`] and return the updated
    /// builder.
-    fn apply_http_headers(&self, mut builder: reqwest::RequestBuilder) -> reqwest::RequestBuilder {
+    fn apply_http_headers(&self, mut builder: CodexRequestBuilder) -> CodexRequestBuilder {
        if let Some(extra) = &self.http_headers {
            for (k, v) in extra {
                builder = builder.header(k, v);
--- a/codex-rs/core/src/response_processing.rs
+++ b/codex-rs/core/src/response_processing.rs
@@ -0,0 +1,111 @@
+use crate::codex::Session;
+use crate::codex::TurnContext;
+use crate::conversation_history::ConversationHistory;
+use codex_protocol::models::FunctionCallOutputPayload;
+use codex_protocol::models::ResponseInputItem;
+use codex_protocol::models::ResponseItem;
+use tracing::warn;
+
+/// Process streamed `ResponseItem`s from the model into the pair of:
+/// - items we should record in conversation history; and
+/// - `ResponseInputItem`s to send back to the model on the next turn.
+pub(crate) async fn process_items(
+    processed_items: Vec<crate::codex::ProcessedResponseItem>,
+    is_review_mode: bool,
+    review_thread_history: &mut ConversationHistory,
+    sess: &Session,
+    turn_context: &TurnContext,
+) -> (Vec<ResponseInputItem>, Vec<ResponseItem>) {
+    let mut items_to_record_in_conversation_history = Vec::<ResponseItem>::new();
+    let mut responses = Vec::<ResponseInputItem>::new();
+    for processed_response_item in processed_items {
+        let crate::codex::ProcessedResponseItem { item, response } = processed_response_item;
+        match (&item, &response) {
+            (ResponseItem::Message { role, .. }, None) if role == "assistant" => {
+                // If the model returned a message, we need to record it.
+                items_to_record_in_conversation_history.push(item);
+            }
+            (
+                ResponseItem::LocalShellCall { .. },
+                Some(ResponseInputItem::FunctionCallOutput { call_id, output }),
+            ) => {
+                items_to_record_in_conversation_history.push(item);
+                items_to_record_in_conversation_history.push(ResponseItem::FunctionCallOutput {
+                    call_id: call_id.clone(),
+                    output: output.clone(),
+                });
+            }
+            (
+                ResponseItem::FunctionCall { .. },
+                Some(ResponseInputItem::FunctionCallOutput { call_id, output }),
+            ) => {
+                items_to_record_in_conversation_history.push(item);
+                items_to_record_in_conversation_history.push(ResponseItem::FunctionCallOutput {
+                    call_id: call_id.clone(),
+                    output: output.clone(),
+                });
+            }
+            (
+                ResponseItem::CustomToolCall { .. },
+                Some(ResponseInputItem::CustomToolCallOutput { call_id, output }),
+            ) => {
+                items_to_record_in_conversation_history.push(item);
+                items_to_record_in_conversation_history.push(ResponseItem::CustomToolCallOutput {
+                    call_id: call_id.clone(),
+                    output: output.clone(),
+                });
+            }
+            (
+                ResponseItem::FunctionCall { .. },
+                Some(ResponseInputItem::McpToolCallOutput { call_id, result }),
+            ) => {
+                items_to_record_in_conversation_history.push(item);
+                let output = match result {
+                    Ok(call_tool_result) => FunctionCallOutputPayload::from(call_tool_result),
+                    Err(err) => FunctionCallOutputPayload {
+                        content: err.clone(),
+                        success: Some(false),
+                        ..Default::default()
+                    },
+                };
+                items_to_record_in_conversation_history.push(ResponseItem::FunctionCallOutput {
+                    call_id: call_id.clone(),
+                    output,
+                });
+            }
+            (
+                ResponseItem::Reasoning {
+                    id,
+                    summary,
+                    content,
+                    encrypted_content,
+                },
+                None,
+            ) => {
+                items_to_record_in_conversation_history.push(ResponseItem::Reasoning {
+                    id: id.clone(),
+                    summary: summary.clone(),
+                    content: content.clone(),
+                    encrypted_content: encrypted_content.clone(),
+                });
+            }
+            _ => {
+                warn!("Unexpected response item: {item:?} with response: {response:?}");
+            }
+        };
+        if let Some(response) = response {
+            responses.push(response);
+        }
+    }
+
+    // Only attempt to take the lock if there is something to record.
+    if !items_to_record_in_conversation_history.is_empty() {
+        if is_review_mode {
+            review_thread_history.record_items(items_to_record_in_conversation_history.iter());
+        } else {
+            sess.record_conversation_items(turn_context, &items_to_record_in_conversation_history)
+                .await;
+        }
+    }
+    (responses, items_to_record_in_conversation_history)
+}
--- a/codex-rs/core/src/rollout/list.rs
+++ b/codex-rs/core/src/rollout/list.rs
@@ -1,12 +1,11 @@
 use std::cmp::Reverse;
 use std::io::{self};
+use std::num::NonZero;
 use std::path::Path;
 use std::path::PathBuf;
-
-use codex_file_search as file_search;
-use std::num::NonZero;
 use std::sync::Arc;
 use std::sync::atomic::AtomicBool;
+
 use time::OffsetDateTime;
 use time::PrimitiveDateTime;
 use time::format_description::FormatItem;
@@ -15,6 +14,7 @@ use uuid::Uuid;

 use super::SESSIONS_SUBDIR;
 use crate::protocol::EventMsg;
+use codex_file_search as file_search;
 use codex_protocol::protocol::RolloutItem;
 use codex_protocol::protocol::RolloutLine;
 use codex_protocol::protocol::SessionSource;
@@ -54,6 +54,7 @@ struct HeadTailSummary {
    saw_session_meta: bool,
    saw_user_event: bool,
    source: Option<SessionSource>,
+    model_provider: Option<String>,
    created_at: Option<String>,
    updated_at: Option<String>,
 }
@@ -109,6 +110,8 @@ pub(crate) async fn get_conversations(
    page_size: usize,
    cursor: Option<&Cursor>,
    allowed_sources: &[SessionSource],
+    model_providers: Option<&[String]>,
+    default_provider: &str,
 ) -> io::Result<ConversationsPage> {
    let mut root = codex_home.to_path_buf();
    root.push(SESSIONS_SUBDIR);
@@ -124,8 +127,17 @@ pub(crate) async fn get_conversations(

    let anchor = cursor.cloned();

-    let result =
-        traverse_directories_for_paths(root.clone(), page_size, anchor, allowed_sources).await?;
+    let provider_matcher =
+        model_providers.and_then(|filters| ProviderMatcher::new(filters, default_provider));
+
+    let result = traverse_directories_for_paths(
+        root.clone(),
+        page_size,
+        anchor,
+        allowed_sources,
+        provider_matcher.as_ref(),
+    )
+    .await?;
    Ok(result)
 }

@@ -145,6 +157,7 @@ async fn traverse_directories_for_paths(
    page_size: usize,
    anchor: Option<Cursor>,
    allowed_sources: &[SessionSource],
+    provider_matcher: Option<&ProviderMatcher<'_>>,
 ) -> io::Result<ConversationsPage> {
    let mut items: Vec<ConversationItem> = Vec::with_capacity(page_size);
    let mut scanned_files = 0usize;
@@ -153,6 +166,7 @@ async fn traverse_directories_for_paths(
        Some(c) => (c.ts, c.id),
        None => (OffsetDateTime::UNIX_EPOCH, Uuid::nil()),
    };
+    let mut more_matches_available = false;

    let year_dirs = collect_dirs_desc(&root, |s| s.parse::<u16>().ok()).await?;

@@ -184,6 +198,7 @@ async fn traverse_directories_for_paths(
                for (ts, sid, _name_str, path) in day_files.into_iter() {
                    scanned_files += 1;
                    if scanned_files >= MAX_SCAN_FILES && items.len() >= page_size {
+                        more_matches_available = true;
                        break 'outer;
                    }
                    if !anchor_passed {
@@ -194,6 +209,7 @@ async fn traverse_directories_for_paths(
                        }
                    }
                    if items.len() == page_size {
+                        more_matches_available = true;
                        break 'outer;
                    }
                    // Read head and simultaneously detect message events within the same
@@ -208,6 +224,11 @@ async fn traverse_directories_for_paths(
                    {
                        continue;
                    }
+                    if let Some(matcher) = provider_matcher
+                        && !matcher.matches(summary.model_provider.as_deref())
+                    {
+                        continue;
+                    }
                    // Apply filters: must have session meta and at least one user message event
                    if summary.saw_session_meta && summary.saw_user_event {
                        let HeadTailSummary {
@@ -231,12 +252,21 @@ async fn traverse_directories_for_paths(
        }
    }

-    let next = build_next_cursor(&items);
+    let reached_scan_cap = scanned_files >= MAX_SCAN_FILES;
+    if reached_scan_cap && !items.is_empty() {
+        more_matches_available = true;
+    }
+
+    let next = if more_matches_available {
+        build_next_cursor(&items)
+    } else {
+        None
+    };
    Ok(ConversationsPage {
        items,
        next_cursor: next,
        num_scanned_files: scanned_files,
-        reached_scan_cap: scanned_files >= MAX_SCAN_FILES,
+        reached_scan_cap,
    })
 }

@@ -328,6 +358,32 @@ fn parse_timestamp_uuid_from_filename(name: &str) -> Option<(OffsetDateTime, Uui
    Some((ts, uuid))
 }

+struct ProviderMatcher<'a> {
+    filters: &'a [String],
+    matches_default_provider: bool,
+}
+
+impl<'a> ProviderMatcher<'a> {
+    fn new(filters: &'a [String], default_provider: &'a str) -> Option<Self> {
+        if filters.is_empty() {
+            return None;
+        }
+
+        let matches_default_provider = filters.iter().any(|provider| provider == default_provider);
+        Some(Self {
+            filters,
+            matches_default_provider,
+        })
+    }
+
+    fn matches(&self, session_provider: Option<&str>) -> bool {
+        match session_provider {
+            Some(provider) => self.filters.iter().any(|candidate| candidate == provider),
+            None => self.matches_default_provider,
+        }
+    }
+}
+
 async fn read_head_and_tail(
    path: &Path,
    head_limit: usize,
@@ -354,6 +410,7 @@ async fn read_head_and_tail(
        match rollout_line.item {
            RolloutItem::SessionMeta(session_meta_line) => {
                summary.source = Some(session_meta_line.meta.source);
+                summary.model_provider = session_meta_line.meta.model_provider.clone();
                summary.created_at = summary
                    .created_at
                    .clone()
@@ -394,6 +451,13 @@ async fn read_head_and_tail(
    Ok(summary)
 }

+/// Read up to `HEAD_RECORD_LIMIT` records from the start of the rollout file at `path`.
+/// This should be enough to produce a summary including the session meta line.
+pub async fn read_head_for_summary(path: &Path) -> io::Result<Vec<serde_json::Value>> {
+    let summary = read_head_and_tail(path, HEAD_RECORD_LIMIT, 0).await?;
+    Ok(summary.head)
+}
+
 async fn read_tail_records(
    path: &Path,
    max_records: usize,
@@ -515,6 +579,7 @@ pub async fn find_conversation_path_by_id_str(
        threads,
        cancel,
        compute_indices,
+        false,
    )
    .map_err(|e| io::Error::other(format!("file search failed: {e}")))?;

--- a/codex-rs/core/src/rollout/policy.rs
+++ b/codex-rs/core/src/rollout/policy.rs
@@ -26,7 +26,8 @@ pub(crate) fn should_persist_response_item(item: &ResponseItem) -> bool {
        | ResponseItem::FunctionCallOutput { .. }
        | ResponseItem::CustomToolCall { .. }
        | ResponseItem::CustomToolCallOutput { .. }
-        | ResponseItem::WebSearchCall { .. } => true,
+        | ResponseItem::WebSearchCall { .. }
+        | ResponseItem::GhostSnapshot { .. } => true,
        ResponseItem::Other => false,
    }
 }
@@ -42,6 +43,7 @@ pub(crate) fn should_persist_event_msg(ev: &EventMsg) -> bool {
        | EventMsg::TokenCount(_)
        | EventMsg::EnteredReviewMode(_)
        | EventMsg::ExitedReviewMode(_)
+        | EventMsg::UndoCompleted(_)
        | EventMsg::TurnAborted(_) => true,
        EventMsg::Error(_)
        | EventMsg::TaskStarted(_)
@@ -50,6 +52,7 @@ pub(crate) fn should_persist_event_msg(ev: &EventMsg) -> bool {
        | EventMsg::AgentReasoningDelta(_)
        | EventMsg::AgentReasoningRawContentDelta(_)
        | EventMsg::AgentReasoningSectionBreak(_)
+        | EventMsg::RawResponseItem(_)
        | EventMsg::SessionConfigured(_)
        | EventMsg::McpToolCallBegin(_)
        | EventMsg::McpToolCallEnd(_)
@@ -66,12 +69,12 @@ pub(crate) fn should_persist_event_msg(ev: &EventMsg) -> bool {
        | EventMsg::PatchApplyEnd(_)
        | EventMsg::TurnDiff(_)
        | EventMsg::GetHistoryEntryResponse(_)
+        | EventMsg::UndoStarted(_)
        | EventMsg::McpListToolsResponse(_)
        | EventMsg::ListCustomPromptsResponse(_)
        | EventMsg::PlanUpdate(_)
        | EventMsg::ShutdownComplete
        | EventMsg::ViewImageToolCall(_)
-        | EventMsg::ConversationPath(_)
        | EventMsg::ItemStarted(_)
        | EventMsg::ItemCompleted(_) => false,
    }
--- a/codex-rs/core/src/rollout/recorder.rs
+++ b/codex-rs/core/src/rollout/recorder.rs
@@ -97,8 +97,18 @@ impl RolloutRecorder {
        page_size: usize,
        cursor: Option<&Cursor>,
        allowed_sources: &[SessionSource],
+        model_providers: Option<&[String]>,
+        default_provider: &str,
    ) -> std::io::Result<ConversationsPage> {
-        get_conversations(codex_home, page_size, cursor, allowed_sources).await
+        get_conversations(
+            codex_home,
+            page_size,
+            cursor,
+            allowed_sources,
+            model_providers,
+            default_provider,
+        )
+        .await
    }

    /// Attempt to create a new [`RolloutRecorder`]. If the sessions directory
@@ -137,6 +147,7 @@ impl RolloutRecorder {
                        cli_version: env!("CARGO_PKG_VERSION").to_string(),
                        instructions,
                        source,
+                        model_provider: Some(config.model_provider_id.clone()),
                    }),
                )
            }
@@ -267,10 +278,6 @@ impl RolloutRecorder {
        }))
    }

-    pub(crate) fn get_rollout_path(&self) -> PathBuf {
-        self.rollout_path.clone()
-    }
-
    pub async fn shutdown(&self) -> std::io::Result<()> {
        let (tx_done, rx_done) = oneshot::channel();
        match self.tx.send(RolloutCmd::Shutdown { ack: tx_done }).await {
--- a/codex-rs/core/src/rollout/tests.rs
+++ b/codex-rs/core/src/rollout/tests.rs
@@ -32,6 +32,14 @@ use codex_protocol::protocol::SessionSource;
 use codex_protocol::protocol::UserMessageEvent;

 const NO_SOURCE_FILTER: &[SessionSource] = &[];
+const TEST_PROVIDER: &str = "test-provider";
+
+fn provider_vec(providers: &[&str]) -> Vec<String> {
+    providers
+        .iter()
+        .map(std::string::ToString::to_string)
+        .collect()
+}

 fn write_session_file(
    root: &Path,
@@ -39,6 +47,24 @@ fn write_session_file(
    uuid: Uuid,
    num_records: usize,
    source: Option<SessionSource>,
+) -> std::io::Result<(OffsetDateTime, Uuid)> {
+    write_session_file_with_provider(
+        root,
+        ts_str,
+        uuid,
+        num_records,
+        source,
+        Some("test-provider"),
+    )
+}
+
+fn write_session_file_with_provider(
+    root: &Path,
+    ts_str: &str,
+    uuid: Uuid,
+    num_records: usize,
+    source: Option<SessionSource>,
+    model_provider: Option<&str>,
 ) -> std::io::Result<(OffsetDateTime, Uuid)> {
    let format: &[FormatItem] =
        format_description!("[year]-[month]-[day]T[hour]-[minute]-[second]");
@@ -68,6 +94,9 @@ fn write_session_file(
    if let Some(source) = source {
        payload["source"] = serde_json::to_value(source).unwrap();
    }
+    if let Some(provider) = model_provider {
+        payload["model_provider"] = serde_json::Value::String(provider.to_string());
+    }

    let meta = serde_json::json!({
        "timestamp": ts_str,
@@ -134,9 +163,17 @@ async fn test_list_conversations_latest_first() {
    )
    .unwrap();

-    let page = get_conversations(home, 10, None, INTERACTIVE_SESSION_SOURCES)
-        .await
-        .unwrap();
+    let provider_filter = provider_vec(&[TEST_PROVIDER]);
+    let page = get_conversations(
+        home,
+        10,
+        None,
+        INTERACTIVE_SESSION_SOURCES,
+        Some(provider_filter.as_slice()),
+        TEST_PROVIDER,
+    )
+    .await
+    .unwrap();

    // Build expected objects
    let p1 = home
@@ -166,6 +203,7 @@ async fn test_list_conversations_latest_first() {
        "originator": "test_originator",
        "cli_version": "test_version",
        "source": "vscode",
+        "model_provider": "test-provider",
    })];
    let head_2 = vec![serde_json::json!({
        "id": u2,
@@ -175,6 +213,7 @@ async fn test_list_conversations_latest_first() {
        "originator": "test_originator",
        "cli_version": "test_version",
        "source": "vscode",
+        "model_provider": "test-provider",
    })];
    let head_1 = vec![serde_json::json!({
        "id": u1,
@@ -184,11 +223,9 @@ async fn test_list_conversations_latest_first() {
        "originator": "test_originator",
        "cli_version": "test_version",
        "source": "vscode",
+        "model_provider": "test-provider",
    })];

-    let expected_cursor: Cursor =
-        serde_json::from_str(&format!("\"2025-01-01T12-00-00|{u1}\"")).unwrap();
-
    let expected = ConversationsPage {
        items: vec![
            ConversationItem {
@@ -213,7 +250,7 @@ async fn test_list_conversations_latest_first() {
                updated_at: Some("2025-01-01T12-00-00".into()),
            },
        ],
-        next_cursor: Some(expected_cursor),
+        next_cursor: None,
        num_scanned_files: 3,
        reached_scan_cap: false,
    };
@@ -275,9 +312,17 @@ async fn test_pagination_cursor() {
    )
    .unwrap();

-    let page1 = get_conversations(home, 2, None, INTERACTIVE_SESSION_SOURCES)
-        .await
-        .unwrap();
+    let provider_filter = provider_vec(&[TEST_PROVIDER]);
+    let page1 = get_conversations(
+        home,
+        2,
+        None,
+        INTERACTIVE_SESSION_SOURCES,
+        Some(provider_filter.as_slice()),
+        TEST_PROVIDER,
+    )
+    .await
+    .unwrap();
    let p5 = home
        .join("sessions")
        .join("2025")
@@ -298,6 +343,7 @@ async fn test_pagination_cursor() {
        "originator": "test_originator",
        "cli_version": "test_version",
        "source": "vscode",
+        "model_provider": "test-provider",
    })];
    let head_4 = vec![serde_json::json!({
        "id": u4,
@@ -307,6 +353,7 @@ async fn test_pagination_cursor() {
        "originator": "test_originator",
        "cli_version": "test_version",
        "source": "vscode",
+        "model_provider": "test-provider",
    })];
    let expected_cursor1: Cursor =
        serde_json::from_str(&format!("\"2025-03-04T09-00-00|{u4}\"")).unwrap();
@@ -338,6 +385,8 @@ async fn test_pagination_cursor() {
        2,
        page1.next_cursor.as_ref(),
        INTERACTIVE_SESSION_SOURCES,
+        Some(provider_filter.as_slice()),
+        TEST_PROVIDER,
    )
    .await
    .unwrap();
@@ -361,6 +410,7 @@ async fn test_pagination_cursor() {
        "originator": "test_originator",
        "cli_version": "test_version",
        "source": "vscode",
+        "model_provider": "test-provider",
    })];
    let head_2 = vec![serde_json::json!({
        "id": u2,
@@ -370,6 +420,7 @@ async fn test_pagination_cursor() {
        "originator": "test_originator",
        "cli_version": "test_version",
        "source": "vscode",
+        "model_provider": "test-provider",
    })];
    let expected_cursor2: Cursor =
        serde_json::from_str(&format!("\"2025-03-02T09-00-00|{u2}\"")).unwrap();
@@ -401,6 +452,8 @@ async fn test_pagination_cursor() {
        2,
        page2.next_cursor.as_ref(),
        INTERACTIVE_SESSION_SOURCES,
+        Some(provider_filter.as_slice()),
+        TEST_PROVIDER,
    )
    .await
    .unwrap();
@@ -418,9 +471,8 @@ async fn test_pagination_cursor() {
        "originator": "test_originator",
        "cli_version": "test_version",
        "source": "vscode",
+        "model_provider": "test-provider",
    })];
-    let expected_cursor3: Cursor =
-        serde_json::from_str(&format!("\"2025-03-01T09-00-00|{u1}\"")).unwrap();
    let expected_page3 = ConversationsPage {
        items: vec![ConversationItem {
            path: p1,
@@ -429,7 +481,7 @@ async fn test_pagination_cursor() {
            created_at: Some("2025-03-01T09-00-00".into()),
            updated_at: Some("2025-03-01T09-00-00".into()),
        }],
-        next_cursor: Some(expected_cursor3),
+        next_cursor: None,
        num_scanned_files: 5, // scanned 05, 04 (anchor), 03, 02 (anchor), 01
        reached_scan_cap: false,
    };
@@ -445,9 +497,17 @@ async fn test_get_conversation_contents() {
    let ts = "2025-04-01T10-30-00";
    write_session_file(home, ts, uuid, 2, Some(SessionSource::VSCode)).unwrap();

-    let page = get_conversations(home, 1, None, INTERACTIVE_SESSION_SOURCES)
-        .await
-        .unwrap();
+    let provider_filter = provider_vec(&[TEST_PROVIDER]);
+    let page = get_conversations(
+        home,
+        1,
+        None,
+        INTERACTIVE_SESSION_SOURCES,
+        Some(provider_filter.as_slice()),
+        TEST_PROVIDER,
+    )
+    .await
+    .unwrap();
    let path = &page.items[0].path;

    let content = get_conversation(path).await.unwrap();
@@ -467,8 +527,8 @@ async fn test_get_conversation_contents() {
        "originator": "test_originator",
        "cli_version": "test_version",
        "source": "vscode",
+        "model_provider": "test-provider",
    })];
-    let expected_cursor: Cursor = serde_json::from_str(&format!("\"{ts}|{uuid}\"")).unwrap();
    let expected_page = ConversationsPage {
        items: vec![ConversationItem {
            path: expected_path,
@@ -477,7 +537,7 @@ async fn test_get_conversation_contents() {
            created_at: Some(ts.into()),
            updated_at: Some(ts.into()),
        }],
-        next_cursor: Some(expected_cursor),
+        next_cursor: None,
        num_scanned_files: 1,
        reached_scan_cap: false,
    };
@@ -495,6 +555,7 @@ async fn test_get_conversation_contents() {
            "originator": "test_originator",
            "cli_version": "test_version",
            "source": "vscode",
+            "model_provider": "test-provider",
        }
    });
    let user_event = serde_json::json!({
@@ -532,6 +593,7 @@ async fn test_tail_includes_last_response_items() -> Result<()> {
                originator: "test_originator".into(),
                cli_version: "test_version".into(),
                source: SessionSource::VSCode,
+                model_provider: Some("test-provider".into()),
            },
            git: None,
        }),
@@ -563,7 +625,16 @@ async fn test_tail_includes_last_response_items() -> Result<()> {
    }
    drop(file);

-    let page = get_conversations(home, 1, None, INTERACTIVE_SESSION_SOURCES).await?;
+    let provider_filter = provider_vec(&[TEST_PROVIDER]);
+    let page = get_conversations(
+        home,
+        1,
+        None,
+        INTERACTIVE_SESSION_SOURCES,
+        Some(provider_filter.as_slice()),
+        TEST_PROVIDER,
+    )
+    .await?;
    let item = page.items.first().expect("conversation item");
    let tail_len = item.tail.len();
    assert_eq!(tail_len, 10usize.min(total_messages));
@@ -615,6 +686,7 @@ async fn test_tail_handles_short_sessions() -> Result<()> {
                originator: "test_originator".into(),
                cli_version: "test_version".into(),
                source: SessionSource::VSCode,
+                model_provider: Some("test-provider".into()),
            },
            git: None,
        }),
@@ -645,7 +717,16 @@ async fn test_tail_handles_short_sessions() -> Result<()> {
    }
    drop(file);

-    let page = get_conversations(home, 1, None, INTERACTIVE_SESSION_SOURCES).await?;
+    let provider_filter = provider_vec(&[TEST_PROVIDER]);
+    let page = get_conversations(
+        home,
+        1,
+        None,
+        INTERACTIVE_SESSION_SOURCES,
+        Some(provider_filter.as_slice()),
+        TEST_PROVIDER,
+    )
+    .await?;
    let tail = &page.items.first().expect("conversation item").tail;

    assert_eq!(tail.len(), 3);
@@ -699,6 +780,7 @@ async fn test_tail_skips_trailing_non_responses() -> Result<()> {
                originator: "test_originator".into(),
                cli_version: "test_version".into(),
                source: SessionSource::VSCode,
+                model_provider: Some("test-provider".into()),
            },
            git: None,
        }),
@@ -743,7 +825,16 @@ async fn test_tail_skips_trailing_non_responses() -> Result<()> {
    writeln!(file, "{}", serde_json::to_string(&shutdown_event)?)?;
    drop(file);

-    let page = get_conversations(home, 1, None, INTERACTIVE_SESSION_SOURCES).await?;
+    let provider_filter = provider_vec(&[TEST_PROVIDER]);
+    let page = get_conversations(
+        home,
+        1,
+        None,
+        INTERACTIVE_SESSION_SOURCES,
+        Some(provider_filter.as_slice()),
+        TEST_PROVIDER,
+    )
+    .await?;
    let tail = &page.items.first().expect("conversation item").tail;

    let expected: Vec<serde_json::Value> = (0..4)
@@ -785,9 +876,17 @@ async fn test_stable_ordering_same_second_pagination() {
    write_session_file(home, ts, u2, 0, Some(SessionSource::VSCode)).unwrap();
    write_session_file(home, ts, u3, 0, Some(SessionSource::VSCode)).unwrap();

-    let page1 = get_conversations(home, 2, None, INTERACTIVE_SESSION_SOURCES)
-        .await
-        .unwrap();
+    let provider_filter = provider_vec(&[TEST_PROVIDER]);
+    let page1 = get_conversations(
+        home,
+        2,
+        None,
+        INTERACTIVE_SESSION_SOURCES,
+        Some(provider_filter.as_slice()),
+        TEST_PROVIDER,
+    )
+    .await
+    .unwrap();

    let p3 = home
        .join("sessions")
@@ -810,6 +909,7 @@ async fn test_stable_ordering_same_second_pagination() {
            "originator": "test_originator",
            "cli_version": "test_version",
            "source": "vscode",
+            "model_provider": "test-provider",
        })]
    };
    let expected_cursor1: Cursor = serde_json::from_str(&format!("\"{ts}|{u2}\"")).unwrap();
@@ -841,6 +941,8 @@ async fn test_stable_ordering_same_second_pagination() {
        2,
        page1.next_cursor.as_ref(),
        INTERACTIVE_SESSION_SOURCES,
+        Some(provider_filter.as_slice()),
+        TEST_PROVIDER,
    )
    .await
    .unwrap();
@@ -850,7 +952,6 @@ async fn test_stable_ordering_same_second_pagination() {
        .join("07")
        .join("01")
        .join(format!("rollout-2025-07-01T00-00-00-{u1}.jsonl"));
-    let expected_cursor2: Cursor = serde_json::from_str(&format!("\"{ts}|{u1}\"")).unwrap();
    let expected_page2 = ConversationsPage {
        items: vec![ConversationItem {
            path: p1,
@@ -859,7 +960,7 @@ async fn test_stable_ordering_same_second_pagination() {
            created_at: Some(ts.to_string()),
            updated_at: Some(ts.to_string()),
        }],
-        next_cursor: Some(expected_cursor2),
+        next_cursor: None,
        num_scanned_files: 3, // scanned u3, u2 (anchor), u1
        reached_scan_cap: false,
    };
@@ -891,9 +992,17 @@ async fn test_source_filter_excludes_non_matching_sessions() {
    )
    .unwrap();

-    let interactive_only = get_conversations(home, 10, None, INTERACTIVE_SESSION_SOURCES)
-        .await
-        .unwrap();
+    let provider_filter = provider_vec(&[TEST_PROVIDER]);
+    let interactive_only = get_conversations(
+        home,
+        10,
+        None,
+        INTERACTIVE_SESSION_SOURCES,
+        Some(provider_filter.as_slice()),
+        TEST_PROVIDER,
+    )
+    .await
+    .unwrap();
    let paths: Vec<_> = interactive_only
        .items
        .iter()
@@ -905,7 +1014,7 @@ async fn test_source_filter_excludes_non_matching_sessions() {
        path.ends_with("rollout-2025-08-02T10-00-00-00000000-0000-0000-0000-00000000002a.jsonl")
    }));

-    let all_sessions = get_conversations(home, 10, None, NO_SOURCE_FILTER)
+    let all_sessions = get_conversations(home, 10, None, NO_SOURCE_FILTER, None, TEST_PROVIDER)
        .await
        .unwrap();
    let all_paths: Vec<_> = all_sessions
@@ -921,3 +1030,102 @@ async fn test_source_filter_excludes_non_matching_sessions() {
        path.ends_with("rollout-2025-08-01T10-00-00-00000000-0000-0000-0000-00000000004d.jsonl")
    }));
 }
+
+#[tokio::test]
+async fn test_model_provider_filter_selects_only_matching_sessions() -> Result<()> {
+    let temp = TempDir::new().unwrap();
+    let home = temp.path();
+
+    let openai_id = Uuid::from_u128(1);
+    let beta_id = Uuid::from_u128(2);
+    let none_id = Uuid::from_u128(3);
+
+    write_session_file_with_provider(
+        home,
+        "2025-09-01T12-00-00",
+        openai_id,
+        1,
+        Some(SessionSource::VSCode),
+        Some("openai"),
+    )?;
+    write_session_file_with_provider(
+        home,
+        "2025-09-01T11-00-00",
+        beta_id,
+        1,
+        Some(SessionSource::VSCode),
+        Some("beta"),
+    )?;
+    write_session_file_with_provider(
+        home,
+        "2025-09-01T10-00-00",
+        none_id,
+        1,
+        Some(SessionSource::VSCode),
+        None,
+    )?;
+
+    let openai_id_str = openai_id.to_string();
+    let none_id_str = none_id.to_string();
+    let openai_filter = provider_vec(&["openai"]);
+    let openai_sessions = get_conversations(
+        home,
+        10,
+        None,
+        NO_SOURCE_FILTER,
+        Some(openai_filter.as_slice()),
+        "openai",
+    )
+    .await?;
+    assert_eq!(openai_sessions.items.len(), 2);
+    let openai_ids: Vec<_> = openai_sessions
+        .items
+        .iter()
+        .filter_map(|item| {
+            item.head
+                .first()
+                .and_then(|value| value.get("id"))
+                .and_then(serde_json::Value::as_str)
+                .map(str::to_string)
+        })
+        .collect();
+    assert!(openai_ids.contains(&openai_id_str));
+    assert!(openai_ids.contains(&none_id_str));
+
+    let beta_filter = provider_vec(&["beta"]);
+    let beta_sessions = get_conversations(
+        home,
+        10,
+        None,
+        NO_SOURCE_FILTER,
+        Some(beta_filter.as_slice()),
+        "openai",
+    )
+    .await?;
+    assert_eq!(beta_sessions.items.len(), 1);
+    let beta_id_str = beta_id.to_string();
+    let beta_head = beta_sessions
+        .items
+        .first()
+        .and_then(|item| item.head.first())
+        .and_then(|value| value.get("id"))
+        .and_then(serde_json::Value::as_str);
+    assert_eq!(beta_head, Some(beta_id_str.as_str()));
+
+    let unknown_filter = provider_vec(&["unknown"]);
+    let unknown_sessions = get_conversations(
+        home,
+        10,
+        None,
+        NO_SOURCE_FILTER,
+        Some(unknown_filter.as_slice()),
+        "openai",
+    )
+    .await?;
+    assert!(unknown_sessions.items.is_empty());
+
+    let all_sessions = get_conversations(home, 10, None, NO_SOURCE_FILTER, None, "openai").await?;
+    assert_eq!(all_sessions.items.len(), 3);
+
+    Ok(())
+}
--- a/codex-rs/core/src/sandboxing/assessment.rs
+++ b/codex-rs/core/src/sandboxing/assessment.rs
@@ -0,0 +1,275 @@
+use std::path::Path;
+use std::path::PathBuf;
+use std::sync::Arc;
+use std::time::Duration;
+use std::time::Instant;
+
+use crate::AuthManager;
+use crate::ModelProviderInfo;
+use crate::client::ModelClient;
+use crate::client_common::Prompt;
+use crate::client_common::ResponseEvent;
+use crate::config::Config;
+use crate::protocol::SandboxPolicy;
+use askama::Template;
+use codex_otel::otel_event_manager::OtelEventManager;
+use codex_protocol::ConversationId;
+use codex_protocol::models::ContentItem;
+use codex_protocol::models::ResponseItem;
+use codex_protocol::protocol::SandboxCommandAssessment;
+use futures::StreamExt;
+use serde_json::json;
+use tokio::time::timeout;
+use tracing::warn;
+
+const SANDBOX_ASSESSMENT_TIMEOUT: Duration = Duration::from_secs(5);
+
+const SANDBOX_RISK_CATEGORY_VALUES: &[&str] = &[
+    "data_deletion",
+    "data_exfiltration",
+    "privilege_escalation",
+    "system_modification",
+    "network_access",
+    "resource_exhaustion",
+    "compliance",
+];
+
+#[derive(Template)]
+#[template(path = "sandboxing/assessment_prompt.md", escape = "none")]
+struct SandboxAssessmentPromptTemplate<'a> {
+    platform: &'a str,
+    sandbox_policy: &'a str,
+    filesystem_roots: Option<&'a str>,
+    working_directory: &'a str,
+    command_argv: &'a str,
+    command_joined: &'a str,
+    sandbox_failure_message: Option<&'a str>,
+}
+
+#[allow(clippy::too_many_arguments)]
+pub(crate) async fn assess_command(
+    config: Arc<Config>,
+    provider: ModelProviderInfo,
+    auth_manager: Arc<AuthManager>,
+    parent_otel: &OtelEventManager,
+    conversation_id: ConversationId,
+    call_id: &str,
+    command: &[String],
+    sandbox_policy: &SandboxPolicy,
+    cwd: &Path,
+    failure_message: Option<&str>,
+) -> Option<SandboxCommandAssessment> {
+    if !config.experimental_sandbox_command_assessment || command.is_empty() {
+        return None;
+    }
+
+    let command_json = serde_json::to_string(command).unwrap_or_else(|_| "[]".to_string());
+    let command_joined =
+        shlex::try_join(command.iter().map(String::as_str)).unwrap_or_else(|_| command.join(" "));
+    let failure = failure_message
+        .map(str::trim)
+        .filter(|msg| !msg.is_empty())
+        .map(str::to_string);
+
+    let cwd_str = cwd.to_string_lossy().to_string();
+    let sandbox_summary = summarize_sandbox_policy(sandbox_policy);
+    let mut roots = sandbox_roots_for_prompt(sandbox_policy, cwd);
+    roots.sort();
+    roots.dedup();
+
+    let platform = std::env::consts::OS;
+    let roots_formatted = roots.iter().map(|root| root.to_string_lossy().to_string());
+    let filesystem_roots = match roots_formatted.collect::<Vec<_>>() {
+        collected if collected.is_empty() => None,
+        collected => Some(collected.join(", ")),
+    };
+
+    let prompt_template = SandboxAssessmentPromptTemplate {
+        platform,
+        sandbox_policy: sandbox_summary.as_str(),
+        filesystem_roots: filesystem_roots.as_deref(),
+        working_directory: cwd_str.as_str(),
+        command_argv: command_json.as_str(),
+        command_joined: command_joined.as_str(),
+        sandbox_failure_message: failure.as_deref(),
+    };
+    let rendered_prompt = match prompt_template.render() {
+        Ok(rendered) => rendered,
+        Err(err) => {
+            warn!("failed to render sandbox assessment prompt: {err}");
+            return None;
+        }
+    };
+    let (system_prompt_section, user_prompt_section) = match rendered_prompt.split_once("\n---\n") {
+        Some(split) => split,
+        None => {
+            warn!("rendered sandbox assessment prompt missing separator");
+            return None;
+        }
+    };
+    let system_prompt = system_prompt_section
+        .strip_prefix("System Prompt:\n")
+        .unwrap_or(system_prompt_section)
+        .trim()
+        .to_string();
+    let user_prompt = user_prompt_section
+        .strip_prefix("User Prompt:\n")
+        .unwrap_or(user_prompt_section)
+        .trim()
+        .to_string();
+
+    let prompt = Prompt {
+        input: vec![ResponseItem::Message {
+            id: None,
+            role: "user".to_string(),
+            content: vec![ContentItem::InputText { text: user_prompt }],
+        }],
+        tools: Vec::new(),
+        parallel_tool_calls: false,
+        base_instructions_override: Some(system_prompt),
+        output_schema: Some(sandbox_assessment_schema()),
+    };
+
+    let child_otel =
+        parent_otel.with_model(config.model.as_str(), config.model_family.slug.as_str());
+
+    let client = ModelClient::new(
+        Arc::clone(&config),
+        Some(auth_manager),
+        child_otel,
+        provider,
+        config.model_reasoning_effort,
+        config.model_reasoning_summary,
+        conversation_id,
+    );
+
+    let start = Instant::now();
+    let assessment_result = timeout(SANDBOX_ASSESSMENT_TIMEOUT, async move {
+        let mut stream = client.stream(&prompt).await?;
+        let mut last_json: Option<String> = None;
+        while let Some(event) = stream.next().await {
+            match event {
+                Ok(ResponseEvent::OutputItemDone(item)) => {
+                    if let Some(text) = response_item_text(&item) {
+                        last_json = Some(text);
+                    }
+                }
+                Ok(ResponseEvent::RateLimits(_)) => {}
+                Ok(ResponseEvent::Completed { .. }) => break,
+                Ok(_) => continue,
+                Err(err) => return Err(err),
+            }
+        }
+        Ok(last_json)
+    })
+    .await;
+    let duration = start.elapsed();
+    parent_otel.sandbox_assessment_latency(call_id, duration);
+
+    match assessment_result {
+        Ok(Ok(Some(raw))) => match serde_json::from_str::<SandboxCommandAssessment>(raw.trim()) {
+            Ok(assessment) => {
+                parent_otel.sandbox_assessment(
+                    call_id,
+                    "success",
+                    Some(assessment.risk_level),
+                    &assessment.risk_categories,
+                    duration,
+                );
+                return Some(assessment);
+            }
+            Err(err) => {
+                warn!("failed to parse sandbox assessment JSON: {err}");
+                parent_otel.sandbox_assessment(call_id, "parse_error", None, &[], duration);
+            }
+        },
+        Ok(Ok(None)) => {
+            warn!("sandbox assessment response did not include any message");
+            parent_otel.sandbox_assessment(call_id, "no_output", None, &[], duration);
+        }
+        Ok(Err(err)) => {
+            warn!("sandbox assessment failed: {err}");
+            parent_otel.sandbox_assessment(call_id, "model_error", None, &[], duration);
+        }
+        Err(_) => {
+            warn!("sandbox assessment timed out");
+            parent_otel.sandbox_assessment(call_id, "timeout", None, &[], duration);
+        }
+    }
+
+    None
+}
+
+fn summarize_sandbox_policy(policy: &SandboxPolicy) -> String {
+    match policy {
+        SandboxPolicy::DangerFullAccess => "danger-full-access".to_string(),
+        SandboxPolicy::ReadOnly => "read-only".to_string(),
+        SandboxPolicy::WorkspaceWrite { network_access, .. } => {
+            let network = if *network_access {
+                "network"
+            } else {
+                "no-network"
+            };
+            format!("workspace-write (network_access={network})")
+        }
+    }
+}
+
+fn sandbox_roots_for_prompt(policy: &SandboxPolicy, cwd: &Path) -> Vec<PathBuf> {
+    let mut roots = vec![cwd.to_path_buf()];
+    if let SandboxPolicy::WorkspaceWrite { writable_roots, .. } = policy {
+        roots.extend(writable_roots.iter().cloned());
+    }
+    roots
+}
+
+fn sandbox_assessment_schema() -> serde_json::Value {
+    json!({
+        "type": "object",
+        "required": ["description", "risk_level", "risk_categories"],
+        "properties": {
+            "description": {
+                "type": "string",
+                "minLength": 1,
+                "maxLength": 500
+            },
+            "risk_level": {
+                "type": "string",
+                "enum": ["low", "medium", "high"]
+            },
+            "risk_categories": {
+                "type": "array",
+                "items": {
+                    "type": "string",
+                    "enum": SANDBOX_RISK_CATEGORY_VALUES
+                }
+            }
+        },
+        "additionalProperties": false
+    })
+}
+
+fn response_item_text(item: &ResponseItem) -> Option<String> {
+    match item {
+        ResponseItem::Message { content, .. } => {
+            let mut buffers: Vec<&str> = Vec::new();
+            for segment in content {
+                match segment {
+                    ContentItem::InputText { text } | ContentItem::OutputText { text } => {
+                        if !text.is_empty() {
+                            buffers.push(text);
+                        }
+                    }
+                    ContentItem::InputImage { .. } => {}
+                }
+            }
+            if buffers.is_empty() {
+                None
+            } else {
+                Some(buffers.join("\n"))
+            }
+        }
+        ResponseItem::FunctionCallOutput { output, .. } => Some(output.content.clone()),
+        _ => None,
+    }
+}
--- a/codex-rs/core/src/sandboxing/mod.rs
+++ b/codex-rs/core/src/sandboxing/mod.rs
@@ -5,6 +5,9 @@ Build platform wrappers and produce ExecEnv for execution. Owns low‑level
 sandbox placement and transformation of portable CommandSpec into a
 ready‑to‑spawn environment.
 */
+
+pub mod assessment;
+
 use crate::exec::ExecToolCallOutput;
 use crate::exec::SandboxType;
 use crate::exec::StdoutStream;
--- a/codex-rs/core/src/shell.rs
+++ b/codex-rs/core/src/shell.rs
@@ -1,6 +1,5 @@
 use serde::Deserialize;
 use serde::Serialize;
-use shlex;
 use std::path::PathBuf;

 #[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize)]
@@ -30,67 +29,6 @@ pub enum Shell {
 }

 impl Shell {
-    pub fn format_default_shell_invocation(&self, command: Vec<String>) -> Option<Vec<String>> {
-        match self {
-            Shell::Zsh(zsh) => format_shell_invocation_with_rc(
-                command.as_slice(),
-                &zsh.shell_path,
-                &zsh.zshrc_path,
-            ),
-            Shell::Bash(bash) => format_shell_invocation_with_rc(
-                command.as_slice(),
-                &bash.shell_path,
-                &bash.bashrc_path,
-            ),
-            Shell::PowerShell(ps) => {
-                // If model generated a bash command, prefer a detected bash fallback
-                if let Some(script) = strip_bash_lc(command.as_slice()) {
-                    return match &ps.bash_exe_fallback {
-                        Some(bash) => Some(vec![
-                            bash.to_string_lossy().to_string(),
-                            "-lc".to_string(),
-                            script,
-                        ]),
-
-                        // No bash fallback → run the script under PowerShell.
-                        // It will likely fail (except for some simple commands), but the error
-                        // should give a clue to the model to fix upon retry that it's running under PowerShell.
-                        None => Some(vec![
-                            ps.exe.clone(),
-                            "-NoProfile".to_string(),
-                            "-Command".to_string(),
-                            script,
-                        ]),
-                    };
-                }
-
-                // Not a bash command. If model did not generate a PowerShell command,
-                // turn it into a PowerShell command.
-                let first = command.first().map(String::as_str);
-                if first != Some(ps.exe.as_str()) {
-                    // TODO (CODEX_2900): Handle escaping newlines.
-                    if command.iter().any(|a| a.contains('\n') || a.contains('\r')) {
-                        return Some(command);
-                    }
-
-                    let joined = shlex::try_join(command.iter().map(String::as_str)).ok();
-                    return joined.map(|arg| {
-                        vec![
-                            ps.exe.clone(),
-                            "-NoProfile".to_string(),
-                            "-Command".to_string(),
-                            arg,
-                        ]
-                    });
-                }
-
-                // Model generated a PowerShell command. Run it.
-                Some(command)
-            }
-            Shell::Unknown => None,
-        }
-    }
-
    pub fn name(&self) -> Option<String> {
        match self {
            Shell::Zsh(zsh) => std::path::Path::new(&zsh.shell_path)
@@ -105,36 +43,6 @@ impl Shell {
    }
 }

-fn format_shell_invocation_with_rc(
-    command: &[String],
-    shell_path: &str,
-    rc_path: &str,
-) -> Option<Vec<String>> {
-    let joined = strip_bash_lc(command)
-        .or_else(|| shlex::try_join(command.iter().map(String::as_str)).ok())?;
-
-    let rc_command = if std::path::Path::new(rc_path).exists() {
-        format!("source {rc_path} && ({joined})")
-    } else {
-        joined
-    };
-
-    Some(vec![shell_path.to_string(), "-lc".to_string(), rc_command])
-}
-
-fn strip_bash_lc(command: &[String]) -> Option<String> {
-    match command {
-        // exactly three items
-        [first, second, third]
-            // first two must be "bash", "-lc"
-            if first == "bash" && second == "-lc" =>
-        {
-            Some(third.clone())
-        }
-        _ => None,
-    }
-}
-
 #[cfg(unix)]
 fn detect_default_user_shell() -> Shell {
    use libc::getpwuid;
@@ -223,8 +131,8 @@ pub async fn default_user_shell() -> Shell {
 #[cfg(unix)]
 mod tests {
    use super::*;
+    use std::path::PathBuf;
    use std::process::Command;
-    use std::string::ToString;

    #[tokio::test]
    async fn test_current_shell_detects_zsh() {
@@ -247,40 +155,6 @@ mod tests {
        }
    }

-    #[tokio::test]
-    async fn test_run_with_profile_zshrc_not_exists() {
-        let shell = Shell::Zsh(ZshShell {
-            shell_path: "/bin/zsh".to_string(),
-            zshrc_path: "/does/not/exist/.zshrc".to_string(),
-        });
-        let actual_cmd = shell.format_default_shell_invocation(vec!["myecho".to_string()]);
-        assert_eq!(
-            actual_cmd,
-            Some(vec![
-                "/bin/zsh".to_string(),
-                "-lc".to_string(),
-                "myecho".to_string()
-            ])
-        );
-    }
-
-    #[tokio::test]
-    async fn test_run_with_profile_bashrc_not_exists() {
-        let shell = Shell::Bash(BashShell {
-            shell_path: "/bin/bash".to_string(),
-            bashrc_path: "/does/not/exist/.bashrc".to_string(),
-        });
-        let actual_cmd = shell.format_default_shell_invocation(vec!["myecho".to_string()]);
-        assert_eq!(
-            actual_cmd,
-            Some(vec![
-                "/bin/bash".to_string(),
-                "-lc".to_string(),
-                "myecho".to_string()
-            ])
-        );
-    }
-
    #[tokio::test]
    async fn test_run_with_profile_bash_escaping_and_execution() {
        let shell_path = "/bin/bash";
@@ -315,30 +189,21 @@ mod tests {
            std::fs::write(
                &bashrc_path,
                r#"
-                    set -x
-                    function myecho {
-                        echo 'It works!'
-                    }
-                    "#,
+                set -x
+                function myecho {
+                    echo 'It works!'
+                }
+                "#,
            )
            .unwrap();
-            let shell = Shell::Bash(BashShell {
-                shell_path: shell_path.to_string(),
-                bashrc_path: bashrc_path.to_str().unwrap().to_string(),
-            });
-
-            let actual_cmd = shell
-                .format_default_shell_invocation(input.iter().map(ToString::to_string).collect());
-            let expected_cmd = expected_cmd
+            let command = expected_cmd
                .iter()
                .map(|s| s.replace("BASHRC_PATH", bashrc_path.to_str().unwrap()))
-                .collect();
-
-            assert_eq!(actual_cmd, Some(expected_cmd));
+                .collect::<Vec<_>>();

            let output = process_exec_tool_call(
                ExecParams {
-                    command: actual_cmd.unwrap(),
+                    command: command.clone(),
                    cwd: PathBuf::from(temp_home.path()),
                    timeout_ms: None,
                    env: HashMap::from([(
@@ -372,8 +237,7 @@ mod tests {
 #[cfg(test)]
 #[cfg(target_os = "macos")]
 mod macos_tests {
-    use super::*;
-    use std::string::ToString;
+    use std::path::PathBuf;

    #[tokio::test]
    async fn test_run_with_profile_escaping_and_execution() {
@@ -411,43 +275,32 @@ mod macos_tests {
        ];
        for (input, expected_cmd, expected_output) in cases {
            use std::collections::HashMap;
-            use std::path::PathBuf;

            use crate::exec::ExecParams;
            use crate::exec::SandboxType;
            use crate::exec::process_exec_tool_call;
            use crate::protocol::SandboxPolicy;

-            // create a temp directory with a zshrc file in it
            let temp_home = tempfile::tempdir().unwrap();
            let zshrc_path = temp_home.path().join(".zshrc");
            std::fs::write(
                &zshrc_path,
                r#"
-                    set -x
-                    function myecho {
-                        echo 'It works!'
-                    }
-                    "#,
+                set -x
+                function myecho {
+                    echo 'It works!'
+                }
+                "#,
            )
            .unwrap();
-            let shell = Shell::Zsh(ZshShell {
-                shell_path: shell_path.to_string(),
-                zshrc_path: zshrc_path.to_str().unwrap().to_string(),
-            });
-
-            let actual_cmd = shell
-                .format_default_shell_invocation(input.iter().map(ToString::to_string).collect());
-            let expected_cmd = expected_cmd
+            let command = expected_cmd
                .iter()
                .map(|s| s.replace("ZSHRC_PATH", zshrc_path.to_str().unwrap()))
-                .collect();
+                .collect::<Vec<_>>();

-            assert_eq!(actual_cmd, Some(expected_cmd));
-            // Actually run the command and check output/exit code
            let output = process_exec_tool_call(
                ExecParams {
-                    command: actual_cmd.unwrap(),
+                    command: command.clone(),
                    cwd: PathBuf::from(temp_home.path()),
                    timeout_ms: None,
                    env: HashMap::from([(
@@ -485,36 +338,38 @@ mod tests_windows {

    #[test]
    fn test_format_default_shell_invocation_powershell() {
+        use std::path::PathBuf;
+
        let cases = vec![
            (
-                Shell::PowerShell(PowerShellConfig {
+                PowerShellConfig {
                    exe: "pwsh.exe".to_string(),
                    bash_exe_fallback: None,
-                }),
+                },
                vec!["bash", "-lc", "echo hello"],
                vec!["pwsh.exe", "-NoProfile", "-Command", "echo hello"],
            ),
            (
-                Shell::PowerShell(PowerShellConfig {
+                PowerShellConfig {
                    exe: "powershell.exe".to_string(),
                    bash_exe_fallback: None,
-                }),
+                },
                vec!["bash", "-lc", "echo hello"],
                vec!["powershell.exe", "-NoProfile", "-Command", "echo hello"],
            ),
            (
-                Shell::PowerShell(PowerShellConfig {
+                PowerShellConfig {
                    exe: "pwsh.exe".to_string(),
                    bash_exe_fallback: Some(PathBuf::from("bash.exe")),
-                }),
+                },
                vec!["bash", "-lc", "echo hello"],
                vec!["bash.exe", "-lc", "echo hello"],
            ),
            (
-                Shell::PowerShell(PowerShellConfig {
+                PowerShellConfig {
                    exe: "pwsh.exe".to_string(),
                    bash_exe_fallback: Some(PathBuf::from("bash.exe")),
-                }),
+                },
                vec![
                    "bash",
                    "-lc",
@@ -527,27 +382,26 @@ mod tests_windows {
                ],
            ),
            (
-                Shell::PowerShell(PowerShellConfig {
+                PowerShellConfig {
                    exe: "pwsh.exe".to_string(),
                    bash_exe_fallback: Some(PathBuf::from("bash.exe")),
-                }),
+                },
                vec!["echo", "hello"],
                vec!["pwsh.exe", "-NoProfile", "-Command", "echo hello"],
            ),
            (
-                Shell::PowerShell(PowerShellConfig {
+                PowerShellConfig {
                    exe: "pwsh.exe".to_string(),
                    bash_exe_fallback: Some(PathBuf::from("bash.exe")),
-                }),
+                },
                vec!["pwsh.exe", "-NoProfile", "-Command", "echo hello"],
                vec!["pwsh.exe", "-NoProfile", "-Command", "echo hello"],
            ),
            (
-                // TODO (CODEX_2900): Handle escaping newlines for powershell invocation.
-                Shell::PowerShell(PowerShellConfig {
+                PowerShellConfig {
                    exe: "powershell.exe".to_string(),
                    bash_exe_fallback: Some(PathBuf::from("bash.exe")),
-                }),
+                },
                vec![
                    "codex-mcp-server.exe",
                    "--codex-run-as-apply-patch",
@@ -561,13 +415,19 @@ mod tests_windows {
            ),
        ];

-        for (shell, input, expected_cmd) in cases {
-            let actual_cmd = shell
-                .format_default_shell_invocation(input.iter().map(|s| (*s).to_string()).collect());
-            assert_eq!(
-                actual_cmd,
-                Some(expected_cmd.iter().map(|s| (*s).to_string()).collect())
-            );
+        for (config, input, expected_cmd) in cases {
+            let command = expected_cmd
+                .iter()
+                .map(|s| (*s).to_string())
+                .collect::<Vec<_>>();
+
+            // These tests assert the final command for each scenario now that the helper
+            // has been removed. The inputs remain to document the original coverage.
+            let expected = expected_cmd
+                .iter()
+                .map(|s| (*s).to_string())
+                .collect::<Vec<_>>();
+            assert_eq!(command, expected, "input: {input:?} config: {config:?}");
        }
    }
 }
--- a/codex-rs/core/src/state/session.rs
+++ b/codex-rs/core/src/state/session.rs
@@ -12,7 +12,6 @@ use crate::protocol::TokenUsageInfo;
 pub(crate) struct SessionState {
    pub(crate) session_configuration: SessionConfiguration,
    pub(crate) history: ConversationHistory,
-    pub(crate) token_info: Option<TokenUsageInfo>,
    pub(crate) latest_rate_limits: Option<RateLimitSnapshot>,
 }

@@ -22,7 +21,6 @@ impl SessionState {
        Self {
            session_configuration,
            history: ConversationHistory::new(),
-            token_info: None,
            latest_rate_limits: None,
        }
    }
@@ -36,10 +34,6 @@ impl SessionState {
        self.history.record_items(items)
    }

-    pub(crate) fn history_snapshot(&mut self) -> Vec<ResponseItem> {
-        self.history.get_history()
-    }
-
    pub(crate) fn clone_history(&self) -> ConversationHistory {
        self.history.clone()
    }
@@ -54,11 +48,11 @@ impl SessionState {
        usage: &TokenUsage,
        model_context_window: Option<i64>,
    ) {
-        self.token_info = TokenUsageInfo::new_or_append(
-            &self.token_info,
-            &Some(usage.clone()),
-            model_context_window,
-        );
+        self.history.update_token_info(usage, model_context_window);
+    }
+
+    pub(crate) fn token_info(&self) -> Option<TokenUsageInfo> {
+        self.history.token_info()
    }

    pub(crate) fn set_rate_limits(&mut self, snapshot: RateLimitSnapshot) {
@@ -68,17 +62,10 @@ impl SessionState {
    pub(crate) fn token_info_and_rate_limits(
        &self,
    ) -> (Option<TokenUsageInfo>, Option<RateLimitSnapshot>) {
-        (self.token_info.clone(), self.latest_rate_limits.clone())
+        (self.token_info(), self.latest_rate_limits.clone())
    }

    pub(crate) fn set_token_usage_full(&mut self, context_window: i64) {
-        match &mut self.token_info {
-            Some(info) => info.fill_to_context_window(context_window),
-            None => {
-                self.token_info = Some(TokenUsageInfo::full_context_window(context_window));
-            }
-        }
+        self.history.set_token_usage_full(context_window);
    }
-
-    // Pending input/approval moved to TurnState.
 }
--- a/codex-rs/core/src/tasks/ghost_snapshot.rs
+++ b/codex-rs/core/src/tasks/ghost_snapshot.rs
@@ -0,0 +1,110 @@
+use crate::codex::TurnContext;
+use crate::state::TaskKind;
+use crate::tasks::SessionTask;
+use crate::tasks::SessionTaskContext;
+use async_trait::async_trait;
+use codex_git_tooling::CreateGhostCommitOptions;
+use codex_git_tooling::GitToolingError;
+use codex_git_tooling::create_ghost_commit;
+use codex_protocol::models::ResponseItem;
+use codex_protocol::user_input::UserInput;
+use codex_utils_readiness::Readiness;
+use codex_utils_readiness::Token;
+use std::sync::Arc;
+use tokio_util::sync::CancellationToken;
+use tracing::info;
+use tracing::warn;
+
+pub(crate) struct GhostSnapshotTask {
+    token: Token,
+}
+
+#[async_trait]
+impl SessionTask for GhostSnapshotTask {
+    fn kind(&self) -> TaskKind {
+        TaskKind::Regular
+    }
+
+    async fn run(
+        self: Arc<Self>,
+        session: Arc<SessionTaskContext>,
+        ctx: Arc<TurnContext>,
+        _input: Vec<UserInput>,
+        cancellation_token: CancellationToken,
+    ) -> Option<String> {
+        tokio::task::spawn(async move {
+            let token = self.token;
+            let ctx_for_task = Arc::clone(&ctx);
+            let cancelled = tokio::select! {
+                _ = cancellation_token.cancelled() => true,
+                _ = async {
+                    let repo_path = ctx_for_task.cwd.clone();
+                    // Required to run in a dedicated blocking pool.
+                    match tokio::task::spawn_blocking(move || {
+                        let options = CreateGhostCommitOptions::new(&repo_path);
+                        create_ghost_commit(&options)
+                    })
+                    .await
+                    {
+                        Ok(Ok(ghost_commit)) => {
+                            info!("ghost snapshot blocking task finished");
+                            session
+                                .session
+                                .record_conversation_items(&ctx, &[ResponseItem::GhostSnapshot {
+                                    ghost_commit: ghost_commit.clone(),
+                                }])
+                                .await;
+                            info!("ghost commit captured: {}", ghost_commit.id());
+                        }
+                        Ok(Err(err)) => {
+                            warn!(
+                                sub_id = ctx_for_task.sub_id.as_str(),
+                                "failed to capture ghost snapshot: {err}"
+                            );
+                            let message = match err {
+                                GitToolingError::NotAGitRepository { .. } => {
+                                    "Snapshots disabled: current directory is not a Git repository."
+                                        .to_string()
+                                }
+                                _ => format!("Snapshots disabled after ghost snapshot error: {err}."),
+                            };
+                            session
+                                .session
+                                .notify_background_event(&ctx_for_task, message)
+                                .await;
+                        }
+                        Err(err) => {
+                            warn!(
+                                sub_id = ctx_for_task.sub_id.as_str(),
+                                "ghost snapshot task panicked: {err}"
+                            );
+                            let message =
+                                format!("Snapshots disabled after ghost snapshot panic: {err}.");
+                            session
+                                .session
+                                .notify_background_event(&ctx_for_task, message)
+                                .await;
+                        }
+                    }
+                } => false,
+            };
+
+            if cancelled {
+                info!("ghost snapshot task cancelled");
+            }
+
+            match ctx.tool_call_gate.mark_ready(token).await {
+                Ok(true) => info!("ghost snapshot gate marked ready"),
+                Ok(false) => warn!("ghost snapshot gate already ready"),
+                Err(err) => warn!("failed to mark ghost snapshot ready: {err}"),
+            }
+        });
+        None
+    }
+}
+
+impl GhostSnapshotTask {
+    pub(crate) fn new(token: Token) -> Self {
+        Self { token }
+    }
+}
--- a/codex-rs/core/src/tasks/mod.rs
+++ b/codex-rs/core/src/tasks/mod.rs
@@ -1,6 +1,8 @@
 mod compact;
+mod ghost_snapshot;
 mod regular;
 mod review;
+mod undo;

 use std::sync::Arc;
 use std::time::Duration;
@@ -25,8 +27,10 @@ use crate::state::TaskKind;
 use codex_protocol::user_input::UserInput;

 pub(crate) use compact::CompactTask;
+pub(crate) use ghost_snapshot::GhostSnapshotTask;
 pub(crate) use regular::RegularTask;
 pub(crate) use review::ReviewTask;
+pub(crate) use undo::UndoTask;

 const GRACEFULL_INTERRUPTION_TIMEOUT_MS: u64 = 100;

@@ -46,10 +50,28 @@ impl SessionTaskContext {
    }
 }

+/// Async task that drives a [`Session`] turn.
+///
+/// Implementations encapsulate a specific Codex workflow (regular chat,
+/// reviews, ghost snapshots, etc.). Each task instance is owned by a
+/// [`Session`] and executed on a background Tokio task. The trait is
+/// intentionally small: implementers identify themselves via
+/// [`SessionTask::kind`], perform their work in [`SessionTask::run`], and may
+/// release resources in [`SessionTask::abort`].
 #[async_trait]
 pub(crate) trait SessionTask: Send + Sync + 'static {
+    /// Describes the type of work the task performs so the session can
+    /// surface it in telemetry and UI.
    fn kind(&self) -> TaskKind;

+    /// Executes the task until completion or cancellation.
+    ///
+    /// Implementations typically stream protocol events using `session` and
+    /// `ctx`, returning an optional final agent message when finished. The
+    /// provided `cancellation_token` is cancelled when the session requests an
+    /// abort; implementers should watch for it and terminate quickly once it
+    /// fires. Returning [`Some`] yields a final message that
+    /// [`Session::on_task_finished`] will emit to the client.
    async fn run(
        self: Arc<Self>,
        session: Arc<SessionTaskContext>,
@@ -58,6 +80,11 @@ pub(crate) trait SessionTask: Send + Sync + 'static {
        cancellation_token: CancellationToken,
    ) -> Option<String>;

+    /// Gives the task a chance to perform cleanup after an abort.
+    ///
+    /// The default implementation is a no-op; override this if additional
+    /// teardown or notifications are required once
+    /// [`Session::abort_all_tasks`] cancels the task.
    async fn abort(&self, session: Arc<SessionTaskContext>, ctx: Arc<TurnContext>) {
        let _ = (session, ctx);
    }
--- a/codex-rs/core/src/tasks/undo.rs
+++ b/codex-rs/core/src/tasks/undo.rs
@@ -0,0 +1,117 @@
+use std::sync::Arc;
+
+use crate::codex::TurnContext;
+use crate::protocol::EventMsg;
+use crate::protocol::UndoCompletedEvent;
+use crate::protocol::UndoStartedEvent;
+use crate::state::TaskKind;
+use crate::tasks::SessionTask;
+use crate::tasks::SessionTaskContext;
+use async_trait::async_trait;
+use codex_git_tooling::restore_ghost_commit;
+use codex_protocol::models::ResponseItem;
+use codex_protocol::user_input::UserInput;
+use tokio_util::sync::CancellationToken;
+use tracing::error;
+use tracing::info;
+use tracing::warn;
+
+pub(crate) struct UndoTask;
+
+impl UndoTask {
+    pub(crate) fn new() -> Self {
+        Self
+    }
+}
+
+#[async_trait]
+impl SessionTask for UndoTask {
+    fn kind(&self) -> TaskKind {
+        TaskKind::Regular
+    }
+
+    async fn run(
+        self: Arc<Self>,
+        session: Arc<SessionTaskContext>,
+        ctx: Arc<TurnContext>,
+        _input: Vec<UserInput>,
+        cancellation_token: CancellationToken,
+    ) -> Option<String> {
+        let sess = session.clone_session();
+        sess.send_event(
+            ctx.as_ref(),
+            EventMsg::UndoStarted(UndoStartedEvent {
+                message: Some("Undo in progress...".to_string()),
+            }),
+        )
+        .await;
+
+        if cancellation_token.is_cancelled() {
+            sess.send_event(
+                ctx.as_ref(),
+                EventMsg::UndoCompleted(UndoCompletedEvent {
+                    success: false,
+                    message: Some("Undo cancelled.".to_string()),
+                }),
+            )
+            .await;
+            return None;
+        }
+
+        let mut history = sess.clone_history().await;
+        let mut items = history.get_history();
+        let mut completed = UndoCompletedEvent {
+            success: false,
+            message: None,
+        };
+
+        let Some((idx, ghost_commit)) =
+            items
+                .iter()
+                .enumerate()
+                .rev()
+                .find_map(|(idx, item)| match item {
+                    ResponseItem::GhostSnapshot { ghost_commit } => {
+                        Some((idx, ghost_commit.clone()))
+                    }
+                    _ => None,
+                })
+        else {
+            completed.message = Some("No ghost snapshot available to undo.".to_string());
+            sess.send_event(ctx.as_ref(), EventMsg::UndoCompleted(completed))
+                .await;
+            return None;
+        };
+
+        let commit_id = ghost_commit.id().to_string();
+        let repo_path = ctx.cwd.clone();
+        let restore_result =
+            tokio::task::spawn_blocking(move || restore_ghost_commit(&repo_path, &ghost_commit))
+                .await;
+
+        match restore_result {
+            Ok(Ok(())) => {
+                items.remove(idx);
+                sess.replace_history(items).await;
+                let short_id: String = commit_id.chars().take(7).collect();
+                info!(commit_id = commit_id, "Undo restored ghost snapshot");
+                completed.success = true;
+                completed.message = Some(format!("Undo restored snapshot {short_id}."));
+            }
+            Ok(Err(err)) => {
+                let message = format!("Failed to restore snapshot {commit_id}: {err}");
+                warn!("{message}");
+                completed.message = Some(message);
+            }
+            Err(err) => {
+                let message = format!("Failed to restore snapshot {commit_id}: {err}");
+                error!("{message}");
+                completed.message = Some(message);
+            }
+        }
+
+        sess.send_event(ctx.as_ref(), EventMsg::UndoCompleted(completed))
+            .await;
+        None
+    }
+}
--- a/codex-rs/core/src/tools/context.rs
+++ b/codex-rs/core/src/tools/context.rs
@@ -5,6 +5,7 @@ use crate::tools::TELEMETRY_PREVIEW_MAX_LINES;
 use crate::tools::TELEMETRY_PREVIEW_TRUNCATION_NOTICE;
 use crate::turn_diff_tracker::TurnDiffTracker;
 use codex_otel::otel_event_manager::OtelEventManager;
+use codex_protocol::models::FunctionCallOutputContentItem;
 use codex_protocol::models::FunctionCallOutputPayload;
 use codex_protocol::models::ResponseInputItem;
 use codex_protocol::models::ShellToolCallParams;
@@ -65,7 +66,10 @@ impl ToolPayload {
 #[derive(Clone)]
 pub enum ToolOutput {
    Function {
+        // Plain text representation of the tool output.
        content: String,
+        // Some tool calls such as MCP calls may return structured content that can get parsed into an array of polymorphic content items.
+        content_items: Option<Vec<FunctionCallOutputContentItem>>,
        success: Option<bool>,
    },
    Mcp {
@@ -90,7 +94,11 @@ impl ToolOutput {

    pub fn into_response(self, call_id: &str, payload: &ToolPayload) -> ResponseInputItem {
        match self {
-            ToolOutput::Function { content, success } => {
+            ToolOutput::Function {
+                content,
+                content_items,
+                success,
+            } => {
                if matches!(payload, ToolPayload::Custom { .. }) {
                    ResponseInputItem::CustomToolCallOutput {
                        call_id: call_id.to_string(),
@@ -99,7 +107,11 @@ impl ToolOutput {
                } else {
                    ResponseInputItem::FunctionCallOutput {
                        call_id: call_id.to_string(),
-                        output: FunctionCallOutputPayload { content, success },
+                        output: FunctionCallOutputPayload {
+                            content,
+                            content_items,
+                            success,
+                        },
                    }
                }
            }
@@ -163,6 +175,7 @@ mod tests {
        };
        let response = ToolOutput::Function {
            content: "patched".to_string(),
+            content_items: None,
            success: Some(true),
        }
        .into_response("call-42", &payload);
@@ -183,6 +196,7 @@ mod tests {
        };
        let response = ToolOutput::Function {
            content: "ok".to_string(),
+            content_items: None,
            success: Some(true),
        }
        .into_response("fn-1", &payload);
@@ -191,6 +205,7 @@ mod tests {
            ResponseInputItem::FunctionCallOutput { call_id, output } => {
                assert_eq!(call_id, "fn-1");
                assert_eq!(output.content, "ok");
+                assert!(output.content_items.is_none());
                assert_eq!(output.success, Some(true));
            }
            other => panic!("expected FunctionCallOutput, got {other:?}"),
--- a/codex-rs/core/src/tools/events.rs
+++ b/codex-rs/core/src/tools/events.rs
@@ -1,6 +1,9 @@
 use crate::codex::Session;
 use crate::codex::TurnContext;
+use crate::error::CodexErr;
+use crate::error::SandboxErr;
 use crate::exec::ExecToolCallOutput;
+use crate::function_tool::FunctionCallError;
 use crate::parse_command::parse_command;
 use crate::protocol::EventMsg;
 use crate::protocol::ExecCommandBeginEvent;
@@ -10,12 +13,12 @@ use crate::protocol::PatchApplyBeginEvent;
 use crate::protocol::PatchApplyEndEvent;
 use crate::protocol::TurnDiffEvent;
 use crate::tools::context::SharedTurnDiffTracker;
+use crate::tools::sandboxing::ToolError;
 use std::collections::HashMap;
 use std::path::Path;
 use std::path::PathBuf;
 use std::time::Duration;

-use super::format_exec_output;
 use super::format_exec_output_str;

 #[derive(Clone, Copy)]
@@ -142,7 +145,7 @@ impl ToolEmitter {
                    (*message).to_string(),
                    -1,
                    Duration::ZERO,
-                    format_exec_output(&message),
+                    message.clone(),
                )
                .await;
            }
@@ -196,12 +199,103 @@ impl ToolEmitter {
            ) => {
                emit_patch_end(ctx, String::new(), (*message).to_string(), false).await;
            }
-            (Self::UnifiedExec { command, cwd, .. }, _) => {
-                // TODO(jif) add end and failures.
+            (Self::UnifiedExec { command, cwd, .. }, ToolEventStage::Begin) => {
                emit_exec_command_begin(ctx, &[command.to_string()], cwd.as_path()).await;
            }
+            (Self::UnifiedExec { .. }, ToolEventStage::Success(output)) => {
+                emit_exec_end(
+                    ctx,
+                    output.stdout.text.clone(),
+                    output.stderr.text.clone(),
+                    output.aggregated_output.text.clone(),
+                    output.exit_code,
+                    output.duration,
+                    format_exec_output_str(&output),
+                )
+                .await;
+            }
+            (
+                Self::UnifiedExec { .. },
+                ToolEventStage::Failure(ToolEventFailure::Output(output)),
+            ) => {
+                emit_exec_end(
+                    ctx,
+                    output.stdout.text.clone(),
+                    output.stderr.text.clone(),
+                    output.aggregated_output.text.clone(),
+                    output.exit_code,
+                    output.duration,
+                    format_exec_output_str(&output),
+                )
+                .await;
+            }
+            (
+                Self::UnifiedExec { .. },
+                ToolEventStage::Failure(ToolEventFailure::Message(message)),
+            ) => {
+                emit_exec_end(
+                    ctx,
+                    String::new(),
+                    (*message).to_string(),
+                    (*message).to_string(),
+                    -1,
+                    Duration::ZERO,
+                    message.clone(),
+                )
+                .await;
+            }
        }
    }
+
+    pub async fn begin(&self, ctx: ToolEventCtx<'_>) {
+        self.emit(ctx, ToolEventStage::Begin).await;
+    }
+
+    pub async fn finish(
+        &self,
+        ctx: ToolEventCtx<'_>,
+        out: Result<ExecToolCallOutput, ToolError>,
+    ) -> Result<String, FunctionCallError> {
+        let event;
+        let result = match out {
+            Ok(output) => {
+                let content = super::format_exec_output_for_model(&output);
+                let exit_code = output.exit_code;
+                event = ToolEventStage::Success(output);
+                if exit_code == 0 {
+                    Ok(content)
+                } else {
+                    Err(FunctionCallError::RespondToModel(content))
+                }
+            }
+            Err(ToolError::Codex(CodexErr::Sandbox(SandboxErr::Timeout { output })))
+            | Err(ToolError::Codex(CodexErr::Sandbox(SandboxErr::Denied { output }))) => {
+                let response = super::format_exec_output_for_model(&output);
+                event = ToolEventStage::Failure(ToolEventFailure::Output(*output));
+                Err(FunctionCallError::RespondToModel(response))
+            }
+            Err(ToolError::Codex(err)) => {
+                let message = format!("execution error: {err:?}");
+                let response = message.clone();
+                event = ToolEventStage::Failure(ToolEventFailure::Message(message));
+                Err(FunctionCallError::RespondToModel(response))
+            }
+            Err(ToolError::Rejected(msg)) | Err(ToolError::SandboxDenied(msg)) => {
+                // Normalize common rejection messages for exec tools so tests and
+                // users see a clear, consistent phrase.
+                let normalized = if msg == "rejected by user" {
+                    "exec command rejected by user".to_string()
+                } else {
+                    msg
+                };
+                let response = &normalized;
+                event = ToolEventStage::Failure(ToolEventFailure::Message(normalized.clone()));
+                Err(FunctionCallError::RespondToModel(response.clone()))
+            }
+        };
+        self.emit(ctx, event).await;
+        result
+    }
 }

 async fn emit_exec_end(
--- a/codex-rs/core/src/tools/handlers/apply_patch.rs
+++ b/codex-rs/core/src/tools/handlers/apply_patch.rs
@@ -1,19 +1,24 @@
 use std::collections::BTreeMap;
-use std::collections::HashMap;
-use std::sync::Arc;

+use crate::apply_patch;
+use crate::apply_patch::InternalApplyPatchInvocation;
+use crate::apply_patch::convert_apply_patch_to_protocol;
 use crate::client_common::tools::FreeformTool;
 use crate::client_common::tools::FreeformToolFormat;
 use crate::client_common::tools::ResponsesApiTool;
 use crate::client_common::tools::ToolSpec;
-use crate::exec::ExecParams;
 use crate::function_tool::FunctionCallError;
 use crate::tools::context::ToolInvocation;
 use crate::tools::context::ToolOutput;
 use crate::tools::context::ToolPayload;
-use crate::tools::handle_container_exec_with_params;
+use crate::tools::events::ToolEmitter;
+use crate::tools::events::ToolEventCtx;
+use crate::tools::orchestrator::ToolOrchestrator;
 use crate::tools::registry::ToolHandler;
 use crate::tools::registry::ToolKind;
+use crate::tools::runtimes::apply_patch::ApplyPatchRequest;
+use crate::tools::runtimes::apply_patch::ApplyPatchRuntime;
+use crate::tools::sandboxing::ToolCtx;
 use crate::tools::spec::ApplyPatchToolArgs;
 use crate::tools::spec::JsonSchema;
 use async_trait::async_trait;
@@ -64,30 +69,87 @@ impl ToolHandler for ApplyPatchHandler {
            }
        };

-        let exec_params = ExecParams {
-            command: vec!["apply_patch".to_string(), patch_input.clone()],
-            cwd: turn.cwd.clone(),
-            timeout_ms: None,
-            env: HashMap::new(),
-            with_escalated_permissions: None,
-            justification: None,
-            arg0: None,
-        };
+        // Re-parse and verify the patch so we can compute changes and approval.
+        // Avoid building temporary ExecParams/command vectors; derive directly from inputs.
+        let cwd = turn.cwd.clone();
+        let command = vec!["apply_patch".to_string(), patch_input.clone()];
+        match codex_apply_patch::maybe_parse_apply_patch_verified(&command, &cwd) {
+            codex_apply_patch::MaybeApplyPatchVerified::Body(changes) => {
+                match apply_patch::apply_patch(session.as_ref(), turn.as_ref(), &call_id, changes)
+                    .await
+                {
+                    InternalApplyPatchInvocation::Output(item) => {
+                        let content = item?;
+                        Ok(ToolOutput::Function {
+                            content,
+                            content_items: None,
+                            success: Some(true),
+                        })
+                    }
+                    InternalApplyPatchInvocation::DelegateToExec(apply) => {
+                        let emitter = ToolEmitter::apply_patch(
+                            convert_apply_patch_to_protocol(&apply.action),
+                            !apply.user_explicitly_approved_this_action,
+                        );
+                        let event_ctx = ToolEventCtx::new(
+                            session.as_ref(),
+                            turn.as_ref(),
+                            &call_id,
+                            Some(&tracker),
+                        );
+                        emitter.begin(event_ctx).await;

-        let content = handle_container_exec_with_params(
-            tool_name.as_str(),
-            exec_params,
-            Arc::clone(&session),
-            Arc::clone(&turn),
-            Arc::clone(&tracker),
-            call_id.clone(),
-        )
-        .await?;
+                        let req = ApplyPatchRequest {
+                            patch: apply.action.patch.clone(),
+                            cwd: apply.action.cwd.clone(),
+                            timeout_ms: None,
+                            user_explicitly_approved: apply.user_explicitly_approved_this_action,
+                            codex_exe: turn.codex_linux_sandbox_exe.clone(),
+                        };

-        Ok(ToolOutput::Function {
-            content,
-            success: Some(true),
-        })
+                        let mut orchestrator = ToolOrchestrator::new();
+                        let mut runtime = ApplyPatchRuntime::new();
+                        let tool_ctx = ToolCtx {
+                            session: session.as_ref(),
+                            turn: turn.as_ref(),
+                            call_id: call_id.clone(),
+                            tool_name: tool_name.to_string(),
+                        };
+                        let out = orchestrator
+                            .run(&mut runtime, &req, &tool_ctx, &turn, turn.approval_policy)
+                            .await;
+                        let event_ctx = ToolEventCtx::new(
+                            session.as_ref(),
+                            turn.as_ref(),
+                            &call_id,
+                            Some(&tracker),
+                        );
+                        let content = emitter.finish(event_ctx, out).await?;
+                        Ok(ToolOutput::Function {
+                            content,
+                            content_items: None,
+                            success: Some(true),
+                        })
+                    }
+                }
+            }
+            codex_apply_patch::MaybeApplyPatchVerified::CorrectnessError(parse_error) => {
+                Err(FunctionCallError::RespondToModel(format!(
+                    "apply_patch verification failed: {parse_error}"
+                )))
+            }
+            codex_apply_patch::MaybeApplyPatchVerified::ShellParseError(error) => {
+                tracing::trace!("Failed to parse apply_patch input, {error:?}");
+                Err(FunctionCallError::RespondToModel(
+                    "apply_patch handler received invalid patch input".to_string(),
+                ))
+            }
+            codex_apply_patch::MaybeApplyPatchVerified::NotApplyPatch => {
+                Err(FunctionCallError::RespondToModel(
+                    "apply_patch handler received non-apply_patch input".to_string(),
+                ))
+            }
+        }
    }
 }

--- a/codex-rs/core/src/tools/handlers/grep_files.rs
+++ b/codex-rs/core/src/tools/handlers/grep_files.rs
@@ -90,11 +90,13 @@ impl ToolHandler for GrepFilesHandler {
        if search_results.is_empty() {
            Ok(ToolOutput::Function {
                content: "No matches found.".to_string(),
+                content_items: None,
                success: Some(false),
            })
        } else {
            Ok(ToolOutput::Function {
                content: search_results.join("\n"),
+                content_items: None,
                success: Some(true),
            })
        }
--- a/codex-rs/core/src/tools/handlers/list_dir.rs
+++ b/codex-rs/core/src/tools/handlers/list_dir.rs
@@ -106,6 +106,7 @@ impl ToolHandler for ListDirHandler {
        output.extend(entries);
        Ok(ToolOutput::Function {
            content: output.join("\n"),
+            content_items: None,
            success: Some(true),
        })
    }
--- a/codex-rs/core/src/tools/handlers/mcp.rs
+++ b/codex-rs/core/src/tools/handlers/mcp.rs
@@ -56,8 +56,16 @@ impl ToolHandler for McpHandler {
                Ok(ToolOutput::Mcp { result })
            }
            codex_protocol::models::ResponseInputItem::FunctionCallOutput { output, .. } => {
-                let codex_protocol::models::FunctionCallOutputPayload { content, success } = output;
-                Ok(ToolOutput::Function { content, success })
+                let codex_protocol::models::FunctionCallOutputPayload {
+                    content,
+                    content_items,
+                    success,
+                } = output;
+                Ok(ToolOutput::Function {
+                    content,
+                    content_items,
+                    success,
+                })
            }
            _ => Err(FunctionCallError::RespondToModel(
                "mcp handler received unexpected response variant".to_string(),
--- a/codex-rs/core/src/tools/handlers/mcp_resource.rs
+++ b/codex-rs/core/src/tools/handlers/mcp_resource.rs
@@ -297,7 +297,10 @@ async fn handle_list_resources(
    match payload_result {
        Ok(payload) => match serialize_function_output(payload) {
            Ok(output) => {
-                let ToolOutput::Function { content, success } = &output else {
+                let ToolOutput::Function {
+                    content, success, ..
+                } = &output
+                else {
                    unreachable!("MCP resource handler should return function output");
                };
                let duration = start.elapsed();
@@ -403,7 +406,10 @@ async fn handle_list_resource_templates(
    match payload_result {
        Ok(payload) => match serialize_function_output(payload) {
            Ok(output) => {
-                let ToolOutput::Function { content, success } = &output else {
+                let ToolOutput::Function {
+                    content, success, ..
+                } = &output
+                else {
                    unreachable!("MCP resource handler should return function output");
                };
                let duration = start.elapsed();
@@ -489,7 +495,10 @@ async fn handle_read_resource(
    match payload_result {
        Ok(payload) => match serialize_function_output(payload) {
            Ok(output) => {
-                let ToolOutput::Function { content, success } = &output else {
+                let ToolOutput::Function {
+                    content, success, ..
+                } = &output
+                else {
                    unreachable!("MCP resource handler should return function output");
                };
                let duration = start.elapsed();
@@ -618,6 +627,7 @@ where

    Ok(ToolOutput::Function {
        content,
+        content_items: None,
        success: Some(true),
    })
 }
--- a/codex-rs/core/src/tools/handlers/plan.rs
+++ b/codex-rs/core/src/tools/handlers/plan.rs
@@ -88,6 +88,7 @@ impl ToolHandler for PlanHandler {

        Ok(ToolOutput::Function {
            content,
+            content_items: None,
            success: Some(true),
        })
    }
--- a/codex-rs/core/src/tools/handlers/read_file.rs
+++ b/codex-rs/core/src/tools/handlers/read_file.rs
@@ -149,6 +149,7 @@ impl ToolHandler for ReadFileHandler {
        };
        Ok(ToolOutput::Function {
            content: collected.join("\n"),
+            content_items: None,
            success: Some(true),
        })
    }
--- a/codex-rs/core/src/tools/handlers/shell.rs
+++ b/codex-rs/core/src/tools/handlers/shell.rs
@@ -2,6 +2,9 @@ use async_trait::async_trait;
 use codex_protocol::models::ShellToolCallParams;
 use std::sync::Arc;

+use crate::apply_patch;
+use crate::apply_patch::InternalApplyPatchInvocation;
+use crate::apply_patch::convert_apply_patch_to_protocol;
 use crate::codex::TurnContext;
 use crate::exec::ExecParams;
 use crate::exec_env::create_env;
@@ -9,9 +12,16 @@ use crate::function_tool::FunctionCallError;
 use crate::tools::context::ToolInvocation;
 use crate::tools::context::ToolOutput;
 use crate::tools::context::ToolPayload;
-use crate::tools::handle_container_exec_with_params;
+use crate::tools::events::ToolEmitter;
+use crate::tools::events::ToolEventCtx;
+use crate::tools::orchestrator::ToolOrchestrator;
 use crate::tools::registry::ToolHandler;
 use crate::tools::registry::ToolKind;
+use crate::tools::runtimes::apply_patch::ApplyPatchRequest;
+use crate::tools::runtimes::apply_patch::ApplyPatchRuntime;
+use crate::tools::runtimes::shell::ShellRequest;
+use crate::tools::runtimes::shell::ShellRuntime;
+use crate::tools::sandboxing::ToolCtx;

 pub struct ShellHandler;

@@ -61,35 +71,27 @@ impl ToolHandler for ShellHandler {
                        ))
                    })?;
                let exec_params = Self::to_exec_params(params, turn.as_ref());
-                let content = handle_container_exec_with_params(
+                Self::run_exec_like(
                    tool_name.as_str(),
                    exec_params,
-                    Arc::clone(&session),
-                    Arc::clone(&turn),
-                    Arc::clone(&tracker),
-                    call_id.clone(),
+                    session,
+                    turn,
+                    tracker,
+                    call_id,
                )
-                .await?;
-                Ok(ToolOutput::Function {
-                    content,
-                    success: Some(true),
-                })
+                .await
            }
            ToolPayload::LocalShell { params } => {
                let exec_params = Self::to_exec_params(params, turn.as_ref());
-                let content = handle_container_exec_with_params(
+                Self::run_exec_like(
                    tool_name.as_str(),
                    exec_params,
-                    Arc::clone(&session),
-                    Arc::clone(&turn),
-                    Arc::clone(&tracker),
-                    call_id.clone(),
+                    session,
+                    turn,
+                    tracker,
+                    call_id,
                )
-                .await?;
-                Ok(ToolOutput::Function {
-                    content,
-                    success: Some(true),
-                })
+                .await
            }
            _ => Err(FunctionCallError::RespondToModel(format!(
                "unsupported payload for shell handler: {tool_name}"
@@ -97,3 +99,137 @@ impl ToolHandler for ShellHandler {
        }
    }
 }
+
+impl ShellHandler {
+    async fn run_exec_like(
+        tool_name: &str,
+        exec_params: ExecParams,
+        session: Arc<crate::codex::Session>,
+        turn: Arc<TurnContext>,
+        tracker: crate::tools::context::SharedTurnDiffTracker,
+        call_id: String,
+    ) -> Result<ToolOutput, FunctionCallError> {
+        // Approval policy guard for explicit escalation in non-OnRequest modes.
+        if exec_params.with_escalated_permissions.unwrap_or(false)
+            && !matches!(
+                turn.approval_policy,
+                codex_protocol::protocol::AskForApproval::OnRequest
+            )
+        {
+            return Err(FunctionCallError::RespondToModel(format!(
+                "approval policy is {policy:?}; reject command — you should not ask for escalated permissions if the approval policy is {policy:?}",
+                policy = turn.approval_policy
+            )));
+        }
+
+        // Intercept apply_patch if present.
+        match codex_apply_patch::maybe_parse_apply_patch_verified(
+            &exec_params.command,
+            &exec_params.cwd,
+        ) {
+            codex_apply_patch::MaybeApplyPatchVerified::Body(changes) => {
+                match apply_patch::apply_patch(session.as_ref(), turn.as_ref(), &call_id, changes)
+                    .await
+                {
+                    InternalApplyPatchInvocation::Output(item) => {
+                        // Programmatic apply_patch path; return its result.
+                        let content = item?;
+                        return Ok(ToolOutput::Function {
+                            content,
+                            content_items: None,
+                            success: Some(true),
+                        });
+                    }
+                    InternalApplyPatchInvocation::DelegateToExec(apply) => {
+                        let emitter = ToolEmitter::apply_patch(
+                            convert_apply_patch_to_protocol(&apply.action),
+                            !apply.user_explicitly_approved_this_action,
+                        );
+                        let event_ctx = ToolEventCtx::new(
+                            session.as_ref(),
+                            turn.as_ref(),
+                            &call_id,
+                            Some(&tracker),
+                        );
+                        emitter.begin(event_ctx).await;
+
+                        let req = ApplyPatchRequest {
+                            patch: apply.action.patch.clone(),
+                            cwd: apply.action.cwd.clone(),
+                            timeout_ms: exec_params.timeout_ms,
+                            user_explicitly_approved: apply.user_explicitly_approved_this_action,
+                            codex_exe: turn.codex_linux_sandbox_exe.clone(),
+                        };
+                        let mut orchestrator = ToolOrchestrator::new();
+                        let mut runtime = ApplyPatchRuntime::new();
+                        let tool_ctx = ToolCtx {
+                            session: session.as_ref(),
+                            turn: turn.as_ref(),
+                            call_id: call_id.clone(),
+                            tool_name: tool_name.to_string(),
+                        };
+                        let out = orchestrator
+                            .run(&mut runtime, &req, &tool_ctx, &turn, turn.approval_policy)
+                            .await;
+                        let event_ctx = ToolEventCtx::new(
+                            session.as_ref(),
+                            turn.as_ref(),
+                            &call_id,
+                            Some(&tracker),
+                        );
+                        let content = emitter.finish(event_ctx, out).await?;
+                        return Ok(ToolOutput::Function {
+                            content,
+                            content_items: None,
+                            success: Some(true),
+                        });
+                    }
+                }
+            }
+            codex_apply_patch::MaybeApplyPatchVerified::CorrectnessError(parse_error) => {
+                return Err(FunctionCallError::RespondToModel(format!(
+                    "apply_patch verification failed: {parse_error}"
+                )));
+            }
+            codex_apply_patch::MaybeApplyPatchVerified::ShellParseError(error) => {
+                tracing::trace!("Failed to parse shell command, {error:?}");
+                // Fall through to regular shell execution.
+            }
+            codex_apply_patch::MaybeApplyPatchVerified::NotApplyPatch => {
+                // Fall through to regular shell execution.
+            }
+        }
+
+        // Regular shell execution path.
+        let emitter = ToolEmitter::shell(exec_params.command.clone(), exec_params.cwd.clone());
+        let event_ctx = ToolEventCtx::new(session.as_ref(), turn.as_ref(), &call_id, None);
+        emitter.begin(event_ctx).await;
+
+        let req = ShellRequest {
+            command: exec_params.command.clone(),
+            cwd: exec_params.cwd.clone(),
+            timeout_ms: exec_params.timeout_ms,
+            env: exec_params.env.clone(),
+            with_escalated_permissions: exec_params.with_escalated_permissions,
+            justification: exec_params.justification.clone(),
+        };
+        let mut orchestrator = ToolOrchestrator::new();
+        let mut runtime = ShellRuntime::new();
+        let tool_ctx = ToolCtx {
+            session: session.as_ref(),
+            turn: turn.as_ref(),
+            call_id: call_id.clone(),
+            tool_name: tool_name.to_string(),
+        };
+        let out = orchestrator
+            .run(&mut runtime, &req, &tool_ctx, &turn, turn.approval_policy)
+            .await;
+        let event_ctx = ToolEventCtx::new(session.as_ref(), turn.as_ref(), &call_id, None);
+        let content = emitter.finish(event_ctx, out).await?;
+        Ok(ToolOutput::Function {
+            content,
+            content_items: None,
+            success: Some(true),
+        })
+    }
+}
--- a/codex-rs/core/src/tools/handlers/test_sync.rs
+++ b/codex-rs/core/src/tools/handlers/test_sync.rs
@@ -95,6 +95,7 @@ impl ToolHandler for TestSyncHandler {

        Ok(ToolOutput::Function {
            content: "ok".to_string(),
+            content_items: None,
            success: Some(true),
        })
    }
--- a/codex-rs/core/src/tools/handlers/unified_exec.rs
+++ b/codex-rs/core/src/tools/handlers/unified_exec.rs
@@ -5,6 +5,9 @@ use serde::Deserialize;
 use serde::Serialize;

 use crate::function_tool::FunctionCallError;
+use crate::protocol::EventMsg;
+use crate::protocol::ExecCommandOutputDeltaEvent;
+use crate::protocol::ExecOutputStream;
 use crate::tools::context::ToolInvocation;
 use crate::tools::context::ToolOutput;
 use crate::tools::context::ToolPayload;
@@ -87,11 +90,7 @@ impl ToolHandler for UnifiedExecHandler {
        };

        let manager: &UnifiedExecSessionManager = &session.services.unified_exec_manager;
-        let context = UnifiedExecContext {
-            session: &session,
-            turn: turn.as_ref(),
-            call_id: &call_id,
-        };
+        let context = UnifiedExecContext::new(session.clone(), turn.clone(), call_id.clone());

        let response = match tool_name.as_str() {
            "exec_command" => {
@@ -101,8 +100,12 @@ impl ToolHandler for UnifiedExecHandler {
                    ))
                })?;

-                let event_ctx =
-                    ToolEventCtx::new(context.session, context.turn, context.call_id, None);
+                let event_ctx = ToolEventCtx::new(
+                    context.session.as_ref(),
+                    context.turn.as_ref(),
+                    &context.call_id,
+                    None,
+                );
                let emitter =
                    ToolEmitter::unified_exec(args.cmd.clone(), context.turn.cwd.clone(), true);
                emitter.emit(event_ctx, ToolEventStage::Begin).await;
@@ -148,6 +151,18 @@ impl ToolHandler for UnifiedExecHandler {
            }
        };

+        // Emit a delta event with the chunk of output we just produced, if any.
+        if !response.output.is_empty() {
+            let delta = ExecCommandOutputDeltaEvent {
+                call_id: response.event_call_id.clone(),
+                stream: ExecOutputStream::Stdout,
+                chunk: response.output.as_bytes().to_vec(),
+            };
+            session
+                .send_event(turn.as_ref(), EventMsg::ExecCommandOutputDelta(delta))
+                .await;
+        }
+
        let content = serialize_response(&response).map_err(|err| {
            FunctionCallError::RespondToModel(format!(
                "failed to serialize unified exec output: {err:?}"
@@ -156,6 +171,7 @@ impl ToolHandler for UnifiedExecHandler {

        Ok(ToolOutput::Function {
            content,
+            content_items: None,
            success: Some(true),
        })
    }
--- a/codex-rs/core/src/tools/handlers/view_image.rs
+++ b/codex-rs/core/src/tools/handlers/view_image.rs
@@ -85,6 +85,7 @@ impl ToolHandler for ViewImageHandler {

        Ok(ToolOutput::Function {
            content: "attached local image path".to_string(),
+            content_items: None,
            success: Some(true),
        })
    }
--- a/codex-rs/core/src/tools/mod.rs
+++ b/codex-rs/core/src/tools/mod.rs
@@ -9,44 +9,10 @@ pub mod runtimes;
 pub mod sandboxing;
 pub mod spec;

-use crate::apply_patch;
-use crate::apply_patch::InternalApplyPatchInvocation;
-use crate::apply_patch::convert_apply_patch_to_protocol;
-use crate::codex::Session;
-use crate::codex::TurnContext;
-use crate::error::CodexErr;
-use crate::error::SandboxErr;
-use crate::exec::ExecParams;
+use crate::conversation_history::format_output_for_model_body;
 use crate::exec::ExecToolCallOutput;
-use crate::function_tool::FunctionCallError;
-use crate::tools::context::SharedTurnDiffTracker;
-use crate::tools::events::ToolEmitter;
-use crate::tools::events::ToolEventCtx;
-use crate::tools::events::ToolEventFailure;
-use crate::tools::events::ToolEventStage;
-use crate::tools::orchestrator::ToolOrchestrator;
-use crate::tools::runtimes::apply_patch::ApplyPatchRequest;
-use crate::tools::runtimes::apply_patch::ApplyPatchRuntime;
-use crate::tools::runtimes::shell::ShellRequest;
-use crate::tools::runtimes::shell::ShellRuntime;
-use crate::tools::sandboxing::ToolCtx;
-use crate::tools::sandboxing::ToolError;
-use codex_apply_patch::MaybeApplyPatchVerified;
-use codex_apply_patch::maybe_parse_apply_patch_verified;
-use codex_protocol::protocol::AskForApproval;
-use codex_utils_string::take_bytes_at_char_boundary;
-use codex_utils_string::take_last_bytes_at_char_boundary;
 pub use router::ToolRouter;
 use serde::Serialize;
-use std::sync::Arc;
-use tracing::trace;
-
-// Model-formatting limits: clients get full streams; only content sent to the model is truncated.
-pub(crate) const MODEL_FORMAT_MAX_BYTES: usize = 10 * 1024; // 10 KiB
-pub(crate) const MODEL_FORMAT_MAX_LINES: usize = 256; // lines
-pub(crate) const MODEL_FORMAT_HEAD_LINES: usize = MODEL_FORMAT_MAX_LINES / 2;
-pub(crate) const MODEL_FORMAT_TAIL_LINES: usize = MODEL_FORMAT_MAX_LINES - MODEL_FORMAT_HEAD_LINES; // 128
-pub(crate) const MODEL_FORMAT_HEAD_BYTES: usize = MODEL_FORMAT_MAX_BYTES / 2;

 // Telemetry preview limits: keep log events smaller than model budgets.
 pub(crate) const TELEMETRY_PREVIEW_MAX_BYTES: usize = 2 * 1024; // 2 KiB
@@ -54,186 +20,6 @@ pub(crate) const TELEMETRY_PREVIEW_MAX_LINES: usize = 64; // lines
 pub(crate) const TELEMETRY_PREVIEW_TRUNCATION_NOTICE: &str =
    "[... telemetry preview truncated ...]";

-// TODO(jif) break this down
-pub(crate) async fn handle_container_exec_with_params(
-    tool_name: &str,
-    params: ExecParams,
-    sess: Arc<Session>,
-    turn_context: Arc<TurnContext>,
-    turn_diff_tracker: SharedTurnDiffTracker,
-    call_id: String,
-) -> Result<String, FunctionCallError> {
-    let _otel_event_manager = turn_context.client.get_otel_event_manager();
-
-    if params.with_escalated_permissions.unwrap_or(false)
-        && !matches!(turn_context.approval_policy, AskForApproval::OnRequest)
-    {
-        return Err(FunctionCallError::RespondToModel(format!(
-            "approval policy is {policy:?}; reject command — you should not ask for escalated permissions if the approval policy is {policy:?}",
-            policy = turn_context.approval_policy
-        )));
-    }
-
-    // check if this was a patch, and apply it if so
-    let apply_patch_exec = match maybe_parse_apply_patch_verified(&params.command, &params.cwd) {
-        MaybeApplyPatchVerified::Body(changes) => {
-            match apply_patch::apply_patch(sess.as_ref(), turn_context.as_ref(), &call_id, changes)
-                .await
-            {
-                InternalApplyPatchInvocation::Output(item) => return item,
-                InternalApplyPatchInvocation::DelegateToExec(apply_patch_exec) => {
-                    Some(apply_patch_exec)
-                }
-            }
-        }
-        MaybeApplyPatchVerified::CorrectnessError(parse_error) => {
-            // It looks like an invocation of `apply_patch`, but we
-            // could not resolve it into a patch that would apply
-            // cleanly. Return to model for resample.
-            return Err(FunctionCallError::RespondToModel(format!(
-                "apply_patch verification failed: {parse_error}"
-            )));
-        }
-        MaybeApplyPatchVerified::ShellParseError(error) => {
-            trace!("Failed to parse shell command, {error:?}");
-            None
-        }
-        MaybeApplyPatchVerified::NotApplyPatch => None,
-    };
-
-    let (event_emitter, diff_opt) = match apply_patch_exec.as_ref() {
-        Some(exec) => (
-            ToolEmitter::apply_patch(
-                convert_apply_patch_to_protocol(&exec.action),
-                !exec.user_explicitly_approved_this_action,
-            ),
-            Some(&turn_diff_tracker),
-        ),
-        None => (
-            ToolEmitter::shell(params.command.clone(), params.cwd.clone()),
-            None,
-        ),
-    };
-
-    let event_ctx = ToolEventCtx::new(sess.as_ref(), turn_context.as_ref(), &call_id, diff_opt);
-    event_emitter.emit(event_ctx, ToolEventStage::Begin).await;
-
-    // Build runtime contexts only when needed (shell/apply_patch below).
-
-    if let Some(exec) = apply_patch_exec {
-        // Route apply_patch execution through the new orchestrator/runtime.
-        let req = ApplyPatchRequest {
-            patch: exec.action.patch.clone(),
-            cwd: params.cwd.clone(),
-            timeout_ms: params.timeout_ms,
-            user_explicitly_approved: exec.user_explicitly_approved_this_action,
-            codex_exe: turn_context.codex_linux_sandbox_exe.clone(),
-        };
-
-        let mut orchestrator = ToolOrchestrator::new();
-        let mut runtime = ApplyPatchRuntime::new();
-        let tool_ctx = ToolCtx {
-            session: sess.as_ref(),
-            turn: turn_context.as_ref(),
-            call_id: call_id.clone(),
-            tool_name: tool_name.to_string(),
-        };
-
-        let out = orchestrator
-            .run(
-                &mut runtime,
-                &req,
-                &tool_ctx,
-                &turn_context,
-                turn_context.approval_policy,
-            )
-            .await;
-
-        handle_exec_outcome(&event_emitter, event_ctx, out).await
-    } else {
-        // Route shell execution through the new orchestrator/runtime.
-        let req = ShellRequest {
-            command: params.command.clone(),
-            cwd: params.cwd.clone(),
-            timeout_ms: params.timeout_ms,
-            env: params.env.clone(),
-            with_escalated_permissions: params.with_escalated_permissions,
-            justification: params.justification.clone(),
-        };
-
-        let mut orchestrator = ToolOrchestrator::new();
-        let mut runtime = ShellRuntime::new();
-        let tool_ctx = ToolCtx {
-            session: sess.as_ref(),
-            turn: turn_context.as_ref(),
-            call_id: call_id.clone(),
-            tool_name: tool_name.to_string(),
-        };
-
-        let out = orchestrator
-            .run(
-                &mut runtime,
-                &req,
-                &tool_ctx,
-                &turn_context,
-                turn_context.approval_policy,
-            )
-            .await;
-
-        handle_exec_outcome(&event_emitter, event_ctx, out).await
-    }
-}
-
-async fn handle_exec_outcome(
-    event_emitter: &ToolEmitter,
-    event_ctx: ToolEventCtx<'_>,
-    out: Result<ExecToolCallOutput, ToolError>,
-) -> Result<String, FunctionCallError> {
-    let event;
-    let result = match out {
-        Ok(output) => {
-            let content = format_exec_output_for_model(&output);
-            let exit_code = output.exit_code;
-            event = ToolEventStage::Success(output);
-            if exit_code == 0 {
-                Ok(content)
-            } else {
-                Err(FunctionCallError::RespondToModel(content))
-            }
-        }
-        Err(ToolError::Codex(CodexErr::Sandbox(SandboxErr::Timeout { output })))
-        | Err(ToolError::Codex(CodexErr::Sandbox(SandboxErr::Denied { output }))) => {
-            let response = format_exec_output_for_model(&output);
-            event = ToolEventStage::Failure(ToolEventFailure::Output(*output));
-            Err(FunctionCallError::RespondToModel(response))
-        }
-        Err(ToolError::Codex(err)) => {
-            let message = format!("execution error: {err:?}");
-            let response = format_exec_output(&message);
-            event = ToolEventStage::Failure(ToolEventFailure::Message(message));
-            Err(FunctionCallError::RespondToModel(format_exec_output(
-                &response,
-            )))
-        }
-        Err(ToolError::Rejected(msg)) | Err(ToolError::SandboxDenied(msg)) => {
-            // Normalize common rejection messages for exec tools so tests and
-            // users see a clear, consistent phrase.
-            let normalized = if msg == "rejected by user" {
-                "exec command rejected by user".to_string()
-            } else {
-                msg
-            };
-            let response = format_exec_output(&normalized);
-            event = ToolEventStage::Failure(ToolEventFailure::Message(normalized));
-            Err(FunctionCallError::RespondToModel(format_exec_output(
-                &response,
-            )))
-        }
-    };
-    event_emitter.emit(event_ctx, event).await;
-    result
-}
-
 /// Format the combined exec output for sending back to the model.
 /// Includes exit code and duration metadata; truncates large bodies safely.
 pub fn format_exec_output_for_model(exec_output: &ExecToolCallOutput) -> String {
@@ -279,248 +65,15 @@ pub fn format_exec_output_str(exec_output: &ExecToolCallOutput) -> String {

    let content = aggregated_output.text.as_str();

-    if exec_output.timed_out {
-        let prefixed = format!(
+    let body = if exec_output.timed_out {
+        format!(
            "command timed out after {} milliseconds\n{content}",
            exec_output.duration.as_millis()
-        );
-        return format_exec_output(&prefixed);
-    }
-
-    format_exec_output(content)
-}
-
-pub(super) fn format_exec_output(content: &str) -> String {
-    // Head+tail truncation for the model: show the beginning and end with an elision.
-    // Clients still receive full streams; only this formatted summary is capped.
-    let total_lines = content.lines().count();
-    if content.len() <= MODEL_FORMAT_MAX_BYTES && total_lines <= MODEL_FORMAT_MAX_LINES {
-        return content.to_string();
-    }
-    let output = truncate_formatted_exec_output(content, total_lines);
-    format!("Total output lines: {total_lines}\n\n{output}")
-}
-
-fn truncate_formatted_exec_output(content: &str, total_lines: usize) -> String {
-    let segments: Vec<&str> = content.split_inclusive('\n').collect();
-    let head_take = MODEL_FORMAT_HEAD_LINES.min(segments.len());
-    let tail_take = MODEL_FORMAT_TAIL_LINES.min(segments.len().saturating_sub(head_take));
-    let omitted = segments.len().saturating_sub(head_take + tail_take);
-
-    let head_slice_end: usize = segments
-        .iter()
-        .take(head_take)
-        .map(|segment| segment.len())
-        .sum();
-    let tail_slice_start: usize = if tail_take == 0 {
-        content.len()
-    } else {
-        content.len()
-            - segments
-                .iter()
-                .rev()
-                .take(tail_take)
-                .map(|segment| segment.len())
-                .sum::<usize>()
-    };
-    let head_slice = &content[..head_slice_end];
-    let tail_slice = &content[tail_slice_start..];
-    let truncated_by_bytes = content.len() > MODEL_FORMAT_MAX_BYTES;
-    let marker = if omitted > 0 {
-        Some(format!(
-            "\n[... omitted {omitted} of {total_lines} lines ...]\n\n"
-        ))
-    } else if truncated_by_bytes {
-        Some(format!(
-            "\n[... output truncated to fit {MODEL_FORMAT_MAX_BYTES} bytes ...]\n\n"
-        ))
-    } else {
-        None
-    };
-
-    let marker_len = marker.as_ref().map_or(0, String::len);
-    let base_head_budget = MODEL_FORMAT_HEAD_BYTES.min(MODEL_FORMAT_MAX_BYTES);
-    let head_budget = base_head_budget.min(MODEL_FORMAT_MAX_BYTES.saturating_sub(marker_len));
-    let head_part = take_bytes_at_char_boundary(head_slice, head_budget);
-    let mut result = String::with_capacity(MODEL_FORMAT_MAX_BYTES.min(content.len()));
-
-    result.push_str(head_part);
-    if let Some(marker_text) = marker.as_ref() {
-        result.push_str(marker_text);
-    }
-
-    let remaining = MODEL_FORMAT_MAX_BYTES.saturating_sub(result.len());
-    if remaining == 0 {
-        return result;
-    }
-
-    let tail_part = take_last_bytes_at_char_boundary(tail_slice, remaining);
-    result.push_str(tail_part);
-
-    result
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use regex_lite::Regex;
-
-    fn truncate_function_error(err: FunctionCallError) -> FunctionCallError {
-        match err {
-            FunctionCallError::RespondToModel(msg) => {
-                FunctionCallError::RespondToModel(format_exec_output(&msg))
-            }
-            FunctionCallError::Denied(msg) => FunctionCallError::Denied(format_exec_output(&msg)),
-            FunctionCallError::Fatal(msg) => FunctionCallError::Fatal(format_exec_output(&msg)),
-            other => other,
-        }
-    }
-
-    fn assert_truncated_message_matches(message: &str, line: &str, total_lines: usize) {
-        let pattern = truncated_message_pattern(line, total_lines);
-        let regex = Regex::new(&pattern).unwrap_or_else(|err| {
-            panic!("failed to compile regex {pattern}: {err}");
-        });
-        let captures = regex
-            .captures(message)
-            .unwrap_or_else(|| panic!("message failed to match pattern {pattern}: {message}"));
-        let body = captures
-            .name("body")
-            .expect("missing body capture")
-            .as_str();
-        assert!(
-            body.len() <= MODEL_FORMAT_MAX_BYTES,
-            "body exceeds byte limit: {} bytes",
-            body.len()
-        );
-    }
-
-    fn truncated_message_pattern(line: &str, total_lines: usize) -> String {
-        let head_take = MODEL_FORMAT_HEAD_LINES.min(total_lines);
-        let tail_take = MODEL_FORMAT_TAIL_LINES.min(total_lines.saturating_sub(head_take));
-        let omitted = total_lines.saturating_sub(head_take + tail_take);
-        let escaped_line = regex_lite::escape(line);
-        if omitted == 0 {
-            return format!(
-                r"(?s)^Total output lines: {total_lines}\n\n(?P<body>{escaped_line}.*\n\[\.{{3}} output truncated to fit {MODEL_FORMAT_MAX_BYTES} bytes \.{{3}}]\n\n.*)$",
-            );
-        }
-        format!(
-            r"(?s)^Total output lines: {total_lines}\n\n(?P<body>{escaped_line}.*\n\[\.{{3}} omitted {omitted} of {total_lines} lines \.{{3}}]\n\n.*)$",
        )
-    }
+    } else {
+        content.to_string()
+    };

-    #[test]
-    fn truncate_formatted_exec_output_truncates_large_error() {
-        let line = "very long execution error line that should trigger truncation\n";
-        let large_error = line.repeat(2_500); // way beyond both byte and line limits
-
-        let truncated = format_exec_output(&large_error);
-
-        let total_lines = large_error.lines().count();
-        assert_truncated_message_matches(&truncated, line, total_lines);
-        assert_ne!(truncated, large_error);
-    }
-
-    #[test]
-    fn truncate_function_error_trims_respond_to_model() {
-        let line = "respond-to-model error that should be truncated\n";
-        let huge = line.repeat(3_000);
-        let total_lines = huge.lines().count();
-
-        let err = truncate_function_error(FunctionCallError::RespondToModel(huge));
-        match err {
-            FunctionCallError::RespondToModel(message) => {
-                assert_truncated_message_matches(&message, line, total_lines);
-            }
-            other => panic!("unexpected error variant: {other:?}"),
-        }
-    }
-
-    #[test]
-    fn truncate_function_error_trims_fatal() {
-        let line = "fatal error output that should be truncated\n";
-        let huge = line.repeat(3_000);
-        let total_lines = huge.lines().count();
-
-        let err = truncate_function_error(FunctionCallError::Fatal(huge));
-        match err {
-            FunctionCallError::Fatal(message) => {
-                assert_truncated_message_matches(&message, line, total_lines);
-            }
-            other => panic!("unexpected error variant: {other:?}"),
-        }
-    }
-
-    #[test]
-    fn truncate_formatted_exec_output_marks_byte_truncation_without_omitted_lines() {
-        let long_line = "a".repeat(MODEL_FORMAT_MAX_BYTES + 50);
-        let truncated = format_exec_output(&long_line);
-
-        assert_ne!(truncated, long_line);
-        let marker_line =
-            format!("[... output truncated to fit {MODEL_FORMAT_MAX_BYTES} bytes ...]");
-        assert!(
-            truncated.contains(&marker_line),
-            "missing byte truncation marker: {truncated}"
-        );
-        assert!(
-            !truncated.contains("omitted"),
-            "line omission marker should not appear when no lines were dropped: {truncated}"
-        );
-    }
-
-    #[test]
-    fn truncate_formatted_exec_output_returns_original_when_within_limits() {
-        let content = "example output\n".repeat(10);
-
-        assert_eq!(format_exec_output(&content), content);
-    }
-
-    #[test]
-    fn truncate_formatted_exec_output_reports_omitted_lines_and_keeps_head_and_tail() {
-        let total_lines = MODEL_FORMAT_MAX_LINES + 100;
-        let content: String = (0..total_lines)
-            .map(|idx| format!("line-{idx}\n"))
-            .collect();
-
-        let truncated = format_exec_output(&content);
-        let omitted = total_lines - MODEL_FORMAT_MAX_LINES;
-        let expected_marker = format!("[... omitted {omitted} of {total_lines} lines ...]");
-
-        assert!(
-            truncated.contains(&expected_marker),
-            "missing omitted marker: {truncated}"
-        );
-        assert!(
-            truncated.contains("line-0\n"),
-            "expected head line to remain: {truncated}"
-        );
-
-        let last_line = format!("line-{}\n", total_lines - 1);
-        assert!(
-            truncated.contains(&last_line),
-            "expected tail line to remain: {truncated}"
-        );
-    }
-
-    #[test]
-    fn truncate_formatted_exec_output_prefers_line_marker_when_both_limits_exceeded() {
-        let total_lines = MODEL_FORMAT_MAX_LINES + 42;
-        let long_line = "x".repeat(256);
-        let content: String = (0..total_lines)
-            .map(|idx| format!("line-{idx}-{long_line}\n"))
-            .collect();
-
-        let truncated = format_exec_output(&content);
-
-        assert!(
-            truncated.contains("[... omitted 42 of 298 lines ...]"),
-            "expected omitted marker when line count exceeds limit: {truncated}"
-        );
-        assert!(
-            !truncated.contains("output truncated to fit"),
-            "line omission marker should take precedence over byte marker: {truncated}"
-        );
-    }
+    // Truncate for model consumption before serialization.
+    format_output_for_model_body(&body)
 }
--- a/codex-rs/core/src/tools/orchestrator.rs
+++ b/codex-rs/core/src/tools/orchestrator.rs
@@ -7,9 +7,11 @@ retry without sandbox on denial (no re‑approval thanks to caching).
 */
 use crate::error::CodexErr;
 use crate::error::SandboxErr;
+use crate::error::get_error_message_ui;
 use crate::exec::ExecToolCallOutput;
 use crate::sandboxing::SandboxManager;
 use crate::tools::sandboxing::ApprovalCtx;
+use crate::tools::sandboxing::ProvidesSandboxRetryData;
 use crate::tools::sandboxing::SandboxAttempt;
 use crate::tools::sandboxing::ToolCtx;
 use crate::tools::sandboxing::ToolError;
@@ -38,6 +40,7 @@ impl ToolOrchestrator {
    ) -> Result<Out, ToolError>
    where
        T: ToolRuntime<Rq, Out>,
+        Rq: ProvidesSandboxRetryData,
    {
        let otel = turn_ctx.client.get_otel_event_manager();
        let otel_tn = &tool_ctx.tool_name;
@@ -56,6 +59,7 @@ impl ToolOrchestrator {
                turn: turn_ctx,
                call_id: &tool_ctx.call_id,
                retry_reason: None,
+                risk: None,
            };
            let decision = tool.start_approval_async(req, approval_ctx).await;

@@ -98,21 +102,42 @@ impl ToolOrchestrator {
                        "sandbox denied and no retry".to_string(),
                    ));
                }
-                // Under `Never`, do not retry without sandbox; surface a concise message
+                // Under `Never` or `OnRequest`, do not retry without sandbox; surface a concise message
                // derived from the actual output (platform-agnostic).
-                if matches!(approval_policy, AskForApproval::Never) {
+                if !tool.wants_no_sandbox_approval(approval_policy) {
                    let msg = build_never_denied_message_from_output(output.as_ref());
                    return Err(ToolError::SandboxDenied(msg));
                }

                // Ask for approval before retrying without sandbox.
                if !tool.should_bypass_approval(approval_policy, already_approved) {
+                    let mut risk = None;
+
+                    if let Some(metadata) = req.sandbox_retry_data() {
+                        let err = SandboxErr::Denied {
+                            output: output.clone(),
+                        };
+                        let friendly = get_error_message_ui(&CodexErr::Sandbox(err));
+                        let failure_summary = format!("failed in sandbox: {friendly}");
+
+                        risk = tool_ctx
+                            .session
+                            .assess_sandbox_command(
+                                turn_ctx,
+                                &tool_ctx.call_id,
+                                &metadata.command,
+                                Some(failure_summary.as_str()),
+                            )
+                            .await;
+                    }
+
                    let reason_msg = build_denial_reason_from_output(output.as_ref());
                    let approval_ctx = ApprovalCtx {
                        session: tool_ctx.session,
                        turn: turn_ctx,
                        call_id: &tool_ctx.call_id,
                        retry_reason: Some(reason_msg),
+                        risk,
                    };

                    let decision = tool.start_approval_async(req, approval_ctx).await;
--- a/codex-rs/core/src/tools/parallel.rs
+++ b/codex-rs/core/src/tools/parallel.rs
@@ -2,6 +2,7 @@ use std::sync::Arc;

 use tokio::sync::RwLock;
 use tokio_util::either::Either;
+use tokio_util::sync::CancellationToken;
 use tokio_util::task::AbortOnDropHandle;

 use crate::codex::Session;
@@ -9,9 +10,12 @@ use crate::codex::TurnContext;
 use crate::error::CodexErr;
 use crate::function_tool::FunctionCallError;
 use crate::tools::context::SharedTurnDiffTracker;
+use crate::tools::context::ToolPayload;
 use crate::tools::router::ToolCall;
 use crate::tools::router::ToolRouter;
+use codex_protocol::models::FunctionCallOutputPayload;
 use codex_protocol::models::ResponseInputItem;
+use codex_utils_readiness::Readiness;

 pub(crate) struct ToolCallRuntime {
    router: Arc<ToolRouter>,
@@ -40,6 +44,7 @@ impl ToolCallRuntime {
    pub(crate) fn handle_tool_call(
        &self,
        call: ToolCall,
+        cancellation_token: CancellationToken,
    ) -> impl std::future::Future<Output = Result<ResponseInputItem, CodexErr>> {
        let supports_parallel = self.router.tool_supports_parallel(&call.tool_name);

@@ -48,18 +53,28 @@ impl ToolCallRuntime {
        let turn = Arc::clone(&self.turn_context);
        let tracker = Arc::clone(&self.tracker);
        let lock = Arc::clone(&self.parallel_execution);
+        let aborted_response = Self::aborted_response(&call);
+        let readiness = self.turn_context.tool_call_gate.clone();

        let handle: AbortOnDropHandle<Result<ResponseInputItem, FunctionCallError>> =
            AbortOnDropHandle::new(tokio::spawn(async move {
-                let _guard = if supports_parallel {
-                    Either::Left(lock.read().await)
-                } else {
-                    Either::Right(lock.write().await)
-                };
+                tokio::select! {
+                    _ = cancellation_token.cancelled() => Ok(aborted_response),
+                    res = async {
+                        tracing::info!("waiting for tool gate");
+                        readiness.wait_ready().await;
+                        tracing::info!("tool gate released");
+                        let _guard = if supports_parallel {
+                            Either::Left(lock.read().await)
+                        } else {
+                            Either::Right(lock.write().await)
+                        };

-                router
-                    .dispatch_tool_call(session, turn, tracker, call)
-                    .await
+                        router
+                            .dispatch_tool_call(session, turn, tracker, call)
+                            .await
+                    } => res,
+                }
            }));

        async move {
@@ -74,3 +89,25 @@ impl ToolCallRuntime {
        }
    }
 }
+
+impl ToolCallRuntime {
+    fn aborted_response(call: &ToolCall) -> ResponseInputItem {
+        match &call.payload {
+            ToolPayload::Custom { .. } => ResponseInputItem::CustomToolCallOutput {
+                call_id: call.call_id.clone(),
+                output: "aborted".to_string(),
+            },
+            ToolPayload::Mcp { .. } => ResponseInputItem::McpToolCallOutput {
+                call_id: call.call_id.clone(),
+                result: Err("aborted".to_string()),
+            },
+            _ => ResponseInputItem::FunctionCallOutput {
+                call_id: call.call_id.clone(),
+                output: FunctionCallOutputPayload {
+                    content: "aborted".to_string(),
+                    ..Default::default()
+                },
+            },
+        }
+    }
+}
--- a/codex-rs/core/src/tools/router.rs
+++ b/codex-rs/core/src/tools/router.rs
@@ -181,6 +181,7 @@ impl ToolRouter {
                output: codex_protocol::models::FunctionCallOutputPayload {
                    content: message,
                    success: Some(false),
+                    ..Default::default()
                },
            }
        }
--- a/codex-rs/core/src/tools/runtimes/apply_patch.rs
+++ b/codex-rs/core/src/tools/runtimes/apply_patch.rs
@@ -10,13 +10,16 @@ use crate::sandboxing::CommandSpec;
 use crate::sandboxing::execute_env;
 use crate::tools::sandboxing::Approvable;
 use crate::tools::sandboxing::ApprovalCtx;
+use crate::tools::sandboxing::ProvidesSandboxRetryData;
 use crate::tools::sandboxing::SandboxAttempt;
+use crate::tools::sandboxing::SandboxRetryData;
 use crate::tools::sandboxing::Sandboxable;
 use crate::tools::sandboxing::SandboxablePreference;
 use crate::tools::sandboxing::ToolCtx;
 use crate::tools::sandboxing::ToolError;
 use crate::tools::sandboxing::ToolRuntime;
 use crate::tools::sandboxing::with_cached_approval;
+use codex_protocol::protocol::AskForApproval;
 use codex_protocol::protocol::ReviewDecision;
 use futures::future::BoxFuture;
 use std::collections::HashMap;
@@ -31,6 +34,12 @@ pub struct ApplyPatchRequest {
    pub codex_exe: Option<PathBuf>,
 }

+impl ProvidesSandboxRetryData for ApplyPatchRequest {
+    fn sandbox_retry_data(&self) -> Option<SandboxRetryData> {
+        None
+    }
+}
+
 #[derive(Default)]
 pub struct ApplyPatchRuntime;

@@ -105,9 +114,10 @@ impl Approvable<ApplyPatchRequest> for ApplyPatchRuntime {
        let call_id = ctx.call_id.to_string();
        let cwd = req.cwd.clone();
        let retry_reason = ctx.retry_reason.clone();
+        let risk = ctx.risk.clone();
        let user_explicitly_approved = req.user_explicitly_approved;
        Box::pin(async move {
-            with_cached_approval(&session.services, key, || async move {
+            with_cached_approval(&session.services, key, move || async move {
                if let Some(reason) = retry_reason {
                    session
                        .request_command_approval(
@@ -116,6 +126,7 @@ impl Approvable<ApplyPatchRequest> for ApplyPatchRuntime {
                            vec!["apply_patch".to_string()],
                            cwd,
                            Some(reason),
+                            risk,
                        )
                        .await
                } else if user_explicitly_approved {
@@ -127,6 +138,10 @@ impl Approvable<ApplyPatchRequest> for ApplyPatchRuntime {
            .await
        })
    }
+
+    fn wants_no_sandbox_approval(&self, policy: AskForApproval) -> bool {
+        !matches!(policy, AskForApproval::Never)
+    }
 }

 impl ToolRuntime<ApplyPatchRequest, ExecToolCallOutput> for ApplyPatchRuntime {
--- a/codex-rs/core/src/tools/runtimes/shell.rs
+++ b/codex-rs/core/src/tools/runtimes/shell.rs
@@ -12,7 +12,9 @@ use crate::sandboxing::execute_env;
 use crate::tools::runtimes::build_command_spec;
 use crate::tools::sandboxing::Approvable;
 use crate::tools::sandboxing::ApprovalCtx;
+use crate::tools::sandboxing::ProvidesSandboxRetryData;
 use crate::tools::sandboxing::SandboxAttempt;
+use crate::tools::sandboxing::SandboxRetryData;
 use crate::tools::sandboxing::Sandboxable;
 use crate::tools::sandboxing::SandboxablePreference;
 use crate::tools::sandboxing::ToolCtx;
@@ -34,6 +36,15 @@ pub struct ShellRequest {
    pub justification: Option<String>,
 }

+impl ProvidesSandboxRetryData for ShellRequest {
+    fn sandbox_retry_data(&self) -> Option<SandboxRetryData> {
+        Some(SandboxRetryData {
+            command: self.command.clone(),
+            cwd: self.cwd.clone(),
+        })
+    }
+}
+
 #[derive(Default)]
 pub struct ShellRuntime;

@@ -90,13 +101,14 @@ impl Approvable<ShellRequest> for ShellRuntime {
            .retry_reason
            .clone()
            .or_else(|| req.justification.clone());
+        let risk = ctx.risk.clone();
        let session = ctx.session;
        let turn = ctx.turn;
        let call_id = ctx.call_id.to_string();
        Box::pin(async move {
-            with_cached_approval(&session.services, key, || async move {
+            with_cached_approval(&session.services, key, move || async move {
                session
-                    .request_command_approval(turn, call_id, command, cwd, reason)
+                    .request_command_approval(turn, call_id, command, cwd, reason, risk)
                    .await
            })
            .await
--- a/codex-rs/core/src/tools/runtimes/unified_exec.rs
+++ b/codex-rs/core/src/tools/runtimes/unified_exec.rs
@@ -9,7 +9,9 @@ use crate::error::SandboxErr;
 use crate::tools::runtimes::build_command_spec;
 use crate::tools::sandboxing::Approvable;
 use crate::tools::sandboxing::ApprovalCtx;
+use crate::tools::sandboxing::ProvidesSandboxRetryData;
 use crate::tools::sandboxing::SandboxAttempt;
+use crate::tools::sandboxing::SandboxRetryData;
 use crate::tools::sandboxing::Sandboxable;
 use crate::tools::sandboxing::SandboxablePreference;
 use crate::tools::sandboxing::ToolCtx;
@@ -31,6 +33,15 @@ pub struct UnifiedExecRequest {
    pub env: HashMap<String, String>,
 }

+impl ProvidesSandboxRetryData for UnifiedExecRequest {
+    fn sandbox_retry_data(&self) -> Option<SandboxRetryData> {
+        Some(SandboxRetryData {
+            command: self.command.clone(),
+            cwd: self.cwd.clone(),
+        })
+    }
+}
+
 #[derive(serde::Serialize, Clone, Debug, Eq, PartialEq, Hash)]
 pub struct UnifiedExecApprovalKey {
    pub command: Vec<String>,
@@ -85,10 +96,11 @@ impl Approvable<UnifiedExecRequest> for UnifiedExecRuntime<'_> {
        let command = req.command.clone();
        let cwd = req.cwd.clone();
        let reason = ctx.retry_reason.clone();
+        let risk = ctx.risk.clone();
        Box::pin(async move {
            with_cached_approval(&session.services, key, || async move {
                session
-                    .request_command_approval(turn, call_id, command, cwd, reason)
+                    .request_command_approval(turn, call_id, command, cwd, reason, risk)
                    .await
            })
            .await
--- a/codex-rs/core/src/tools/sandboxing.rs
+++ b/codex-rs/core/src/tools/sandboxing.rs
@@ -7,6 +7,7 @@
 use crate::codex::Session;
 use crate::codex::TurnContext;
 use crate::error::CodexErr;
+use crate::protocol::SandboxCommandAssessment;
 use crate::protocol::SandboxPolicy;
 use crate::sandboxing::CommandSpec;
 use crate::sandboxing::SandboxManager;
@@ -18,6 +19,7 @@ use std::collections::HashMap;
 use std::fmt::Debug;
 use std::hash::Hash;
 use std::path::Path;
+use std::path::PathBuf;

 use futures::Future;
 use futures::future::BoxFuture;
@@ -81,6 +83,7 @@ pub(crate) struct ApprovalCtx<'a> {
    pub turn: &'a TurnContext,
    pub call_id: &'a str,
    pub retry_reason: Option<String>,
+    pub risk: Option<SandboxCommandAssessment>,
 }

 pub(crate) trait Approvable<Req> {
@@ -121,6 +124,11 @@ pub(crate) trait Approvable<Req> {
        }
    }

+    /// Decide we can request an approval for no-sandbox execution.
+    fn wants_no_sandbox_approval(&self, policy: AskForApproval) -> bool {
+        !matches!(policy, AskForApproval::Never | AskForApproval::OnRequest)
+    }
+
    fn start_approval_async<'a>(
        &'a mut self,
        req: &'a Req,
@@ -151,6 +159,17 @@ pub(crate) struct ToolCtx<'a> {
    pub tool_name: String,
 }

+/// Captures the command metadata needed to re-run a tool request without sandboxing.
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub(crate) struct SandboxRetryData {
+    pub command: Vec<String>,
+    pub cwd: PathBuf,
+}
+
+pub(crate) trait ProvidesSandboxRetryData {
+    fn sandbox_retry_data(&self) -> Option<SandboxRetryData>;
+}
+
 #[derive(Debug)]
 pub(crate) enum ToolError {
    Rejected(String),
--- a/codex-rs/core/src/truncate.rs
+++ b/codex-rs/core/src/truncate.rs
@@ -1,18 +1,35 @@
 //! Utilities for truncating large chunks of output while preserving a prefix
 //! and suffix on UTF-8 boundaries.

+use codex_utils_tokenizer::Tokenizer;
+
 /// Truncate the middle of a UTF-8 string to at most `max_bytes` bytes,
 /// preserving the beginning and the end. Returns the possibly truncated
-/// string and `Some(original_token_count)` (estimated at 4 bytes/token)
+/// string and `Some(original_token_count)` (counted with the local tokenizer;
+/// falls back to a 4-bytes-per-token estimate if the tokenizer cannot load)
 /// if truncation occurred; otherwise returns the original string and `None`.
 pub(crate) fn truncate_middle(s: &str, max_bytes: usize) -> (String, Option<u64>) {
    if s.len() <= max_bytes {
        return (s.to_string(), None);
    }

-    let est_tokens = (s.len() as u64).div_ceil(4);
+    // Build a tokenizer for counting (default to o200k_base; fall back to cl100k_base).
+    // If both fail, fall back to a 4-bytes-per-token estimate.
+    let tok = Tokenizer::try_default().ok();
+    let token_count = |text: &str| -> u64 {
+        if let Some(ref t) = tok {
+            t.count(text) as u64
+        } else {
+            (text.len() as u64).div_ceil(4)
+        }
+    };
+
+    let total_tokens = token_count(s);
    if max_bytes == 0 {
-        return (format!("…{est_tokens} tokens truncated…"), Some(est_tokens));
+        return (
+            format!("…{total_tokens} tokens truncated…"),
+            Some(total_tokens),
+        );
    }

    fn truncate_on_boundary(input: &str, max_len: usize) -> &str {
@@ -50,13 +67,17 @@ pub(crate) fn truncate_middle(s: &str, max_bytes: usize) -> (String, Option<u64>
        idx
    }

-    let mut guess_tokens = est_tokens;
+    // Iterate to stabilize marker length → keep budget → boundaries.
+    let mut guess_tokens: u64 = 1;
    for _ in 0..4 {
        let marker = format!("…{guess_tokens} tokens truncated…");
        let marker_len = marker.len();
        let keep_budget = max_bytes.saturating_sub(marker_len);
        if keep_budget == 0 {
-            return (format!("…{est_tokens} tokens truncated…"), Some(est_tokens));
+            return (
+                format!("…{total_tokens} tokens truncated…"),
+                Some(total_tokens),
+            );
        }

        let left_budget = keep_budget / 2;
@@ -67,59 +88,72 @@ pub(crate) fn truncate_middle(s: &str, max_bytes: usize) -> (String, Option<u64>
            suffix_start = prefix_end;
        }

-        let kept_content_bytes = prefix_end + (s.len() - suffix_start);
-        let truncated_content_bytes = s.len().saturating_sub(kept_content_bytes);
-        let new_tokens = (truncated_content_bytes as u64).div_ceil(4);
+        // Tokens actually removed (middle slice) using the real tokenizer.
+        let removed_tokens = token_count(&s[prefix_end..suffix_start]);

-        if new_tokens == guess_tokens {
-            let mut out = String::with_capacity(marker_len + kept_content_bytes + 1);
+        // If the number of digits in the token count does not change the marker length,
+        // we can finalize output.
+        let final_marker = format!("…{removed_tokens} tokens truncated…");
+        if final_marker.len() == marker_len {
+            let kept_content_bytes = prefix_end + (s.len() - suffix_start);
+            let mut out = String::with_capacity(final_marker.len() + kept_content_bytes + 1);
            out.push_str(&s[..prefix_end]);
-            out.push_str(&marker);
+            out.push_str(&final_marker);
            out.push('\n');
            out.push_str(&s[suffix_start..]);
-            return (out, Some(est_tokens));
+            return (out, Some(total_tokens));
        }

-        guess_tokens = new_tokens;
+        guess_tokens = removed_tokens;
    }

+    // Fallback build after iterations: compute with the last guess.
    let marker = format!("…{guess_tokens} tokens truncated…");
    let marker_len = marker.len();
    let keep_budget = max_bytes.saturating_sub(marker_len);
    if keep_budget == 0 {
-        return (format!("…{est_tokens} tokens truncated…"), Some(est_tokens));
+        return (
+            format!("…{total_tokens} tokens truncated…"),
+            Some(total_tokens),
+        );
    }

    let left_budget = keep_budget / 2;
    let right_budget = keep_budget - left_budget;
    let prefix_end = pick_prefix_end(s, left_budget);
-    let suffix_start = pick_suffix_start(s, right_budget);
+    let mut suffix_start = pick_suffix_start(s, right_budget);
+    if suffix_start < prefix_end {
+        suffix_start = prefix_end;
+    }

    let mut out = String::with_capacity(marker_len + prefix_end + (s.len() - suffix_start) + 1);
    out.push_str(&s[..prefix_end]);
    out.push_str(&marker);
    out.push('\n');
    out.push_str(&s[suffix_start..]);
-    (out, Some(est_tokens))
+    (out, Some(total_tokens))
 }

 #[cfg(test)]
 mod tests {
    use super::truncate_middle;
+    use codex_utils_tokenizer::Tokenizer;

    #[test]
    fn truncate_middle_no_newlines_fallback() {
+        let tok = Tokenizer::try_default().expect("load tokenizer");
        let s = "abcdefghijklmnopqrstuvwxyz0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ*";
        let max_bytes = 32;
        let (out, original) = truncate_middle(s, max_bytes);
        assert!(out.starts_with("abc"));
        assert!(out.contains("tokens truncated"));
        assert!(out.ends_with("XYZ*"));
-        assert_eq!(original, Some((s.len() as u64).div_ceil(4)));
+        assert_eq!(original, Some(tok.count(s) as u64));
    }

    #[test]
    fn truncate_middle_prefers_newline_boundaries() {
+        let tok = Tokenizer::try_default().expect("load tokenizer");
        let mut s = String::new();
        for i in 1..=20 {
            s.push_str(&format!("{i:03}\n"));
@@ -131,50 +165,36 @@ mod tests {
        assert!(out.starts_with("001\n002\n003\n004\n"));
        assert!(out.contains("tokens truncated"));
        assert!(out.ends_with("017\n018\n019\n020\n"));
-        assert_eq!(tokens, Some(20));
+        assert_eq!(tokens, Some(tok.count(&s) as u64));
    }

    #[test]
    fn truncate_middle_handles_utf8_content() {
+        let tok = Tokenizer::try_default().expect("load tokenizer");
        let s = "😀😀😀😀😀😀😀😀😀😀\nsecond line with ascii text\n";
        let max_bytes = 32;
        let (out, tokens) = truncate_middle(s, max_bytes);

        assert!(out.contains("tokens truncated"));
        assert!(!out.contains('\u{fffd}'));
-        assert_eq!(tokens, Some((s.len() as u64).div_ceil(4)));
+        assert_eq!(tokens, Some(tok.count(s) as u64));
    }

    #[test]
    fn truncate_middle_prefers_newline_boundaries_2() {
+        let tok = Tokenizer::try_default().expect("load tokenizer");
        // Build a multi-line string of 20 numbered lines (each "NNN\n").
        let mut s = String::new();
        for i in 1..=20 {
            s.push_str(&format!("{i:03}\n"));
        }
-        // Total length: 20 lines * 4 bytes per line = 80 bytes.
        assert_eq!(s.len(), 80);

-        // Choose a cap that forces truncation while leaving room for
-        // a few lines on each side after accounting for the marker.
        let max_bytes = 64;
-        // Expect exact output: first 4 lines, marker, last 4 lines, and correct token estimate (80/4 = 20).
-        assert_eq!(
-            truncate_middle(&s, max_bytes),
-            (
-                r#"001
-002
-003
-004
-…12 tokens truncated…
-017
-018
-019
-020
-"#
-                .to_string(),
-                Some(20)
-            )
-        );
+        let (out, total) = truncate_middle(&s, max_bytes);
+        assert!(out.starts_with("001\n002\n003\n004\n"));
+        assert!(out.contains("tokens truncated"));
+        assert!(out.ends_with("017\n018\n019\n020\n"));
+        assert_eq!(total, Some(tok.count(&s) as u64));
    }
 }
--- a/codex-rs/core/src/unified_exec/mod.rs
+++ b/codex-rs/core/src/unified_exec/mod.rs
@@ -22,6 +22,8 @@
 //! - `session_manager.rs`: orchestration (approvals, sandboxing, reuse) and request handling.

 use std::collections::HashMap;
+use std::path::PathBuf;
+use std::sync::Arc;
 use std::sync::atomic::AtomicI32;
 use std::time::Duration;

@@ -45,10 +47,20 @@ pub(crate) const MAX_YIELD_TIME_MS: u64 = 30_000;
 pub(crate) const DEFAULT_MAX_OUTPUT_TOKENS: usize = 10_000;
 pub(crate) const UNIFIED_EXEC_OUTPUT_MAX_BYTES: usize = 1024 * 1024; // 1 MiB

-pub(crate) struct UnifiedExecContext<'a> {
-    pub session: &'a Session,
-    pub turn: &'a TurnContext,
-    pub call_id: &'a str,
+pub(crate) struct UnifiedExecContext {
+    pub session: Arc<Session>,
+    pub turn: Arc<TurnContext>,
+    pub call_id: String,
+}
+
+impl UnifiedExecContext {
+    pub fn new(session: Arc<Session>, turn: Arc<TurnContext>, call_id: String) -> Self {
+        Self {
+            session,
+            turn,
+            call_id,
+        }
+    }
 }

 #[derive(Debug)]
@@ -70,6 +82,7 @@ pub(crate) struct WriteStdinRequest<'a> {

 #[derive(Debug, Clone, PartialEq)]
 pub(crate) struct UnifiedExecResponse {
+    pub event_call_id: String,
    pub chunk_id: String,
    pub wall_time: Duration,
    pub output: String,
@@ -78,10 +91,20 @@ pub(crate) struct UnifiedExecResponse {
    pub original_token_count: Option<usize>,
 }

-#[derive(Debug, Default)]
+#[derive(Default)]
 pub(crate) struct UnifiedExecSessionManager {
    next_session_id: AtomicI32,
-    sessions: Mutex<HashMap<i32, session::UnifiedExecSession>>,
+    sessions: Mutex<HashMap<i32, SessionEntry>>,
+}
+
+struct SessionEntry {
+    session: session::UnifiedExecSession,
+    session_ref: Arc<Session>,
+    turn_ref: Arc<TurnContext>,
+    call_id: String,
+    command: String,
+    cwd: PathBuf,
+    started_at: tokio::time::Instant,
 }

 pub(crate) fn clamp_yield_time(yield_time_ms: Option<u64>) -> u64 {
@@ -163,11 +186,8 @@ mod tests {
        cmd: &str,
        yield_time_ms: Option<u64>,
    ) -> Result<UnifiedExecResponse, UnifiedExecError> {
-        let context = UnifiedExecContext {
-            session,
-            turn: turn.as_ref(),
-            call_id: "call",
-        };
+        let context =
+            UnifiedExecContext::new(Arc::clone(session), Arc::clone(turn), "call".to_string());

        session
            .services
--- a/codex-rs/core/src/unified_exec/session_manager.rs
+++ b/codex-rs/core/src/unified_exec/session_manager.rs
@@ -5,8 +5,13 @@ use tokio::sync::mpsc;
 use tokio::time::Duration;
 use tokio::time::Instant;

+use crate::exec::ExecToolCallOutput;
+use crate::exec::StreamOutput;
 use crate::exec_env::create_env;
 use crate::sandboxing::ExecEnv;
+use crate::tools::events::ToolEmitter;
+use crate::tools::events::ToolEventCtx;
+use crate::tools::events::ToolEventStage;
 use crate::tools::orchestrator::ToolOrchestrator;
 use crate::tools::runtimes::unified_exec::UnifiedExecRequest as UnifiedExecToolRequest;
 use crate::tools::runtimes::unified_exec::UnifiedExecRuntime;
@@ -14,6 +19,7 @@ use crate::tools::sandboxing::ToolCtx;

 use super::ExecCommandRequest;
 use super::MIN_YIELD_TIME_MS;
+use super::SessionEntry;
 use super::UnifiedExecContext;
 use super::UnifiedExecError;
 use super::UnifiedExecResponse;
@@ -30,7 +36,7 @@ impl UnifiedExecSessionManager {
    pub(crate) async fn exec_command(
        &self,
        request: ExecCommandRequest<'_>,
-        context: &UnifiedExecContext<'_>,
+        context: &UnifiedExecContext,
    ) -> Result<UnifiedExecResponse, UnifiedExecError> {
        let shell_flag = if request.login { "-lc" } else { "-c" };
        let command = vec![
@@ -59,17 +65,36 @@ impl UnifiedExecSessionManager {
        let session_id = if session.has_exited() {
            None
        } else {
-            Some(self.store_session(session).await)
+            Some(
+                self.store_session(session, context, request.command, start)
+                    .await,
+            )
        };

-        Ok(UnifiedExecResponse {
+        let response = UnifiedExecResponse {
+            event_call_id: context.call_id.clone(),
            chunk_id,
            wall_time,
            output,
            session_id,
            exit_code,
            original_token_count,
-        })
+        };
+
+        // If the command completed during this call, emit an ExecCommandEnd via the emitter.
+        if response.session_id.is_none() {
+            let exit = response.exit_code.unwrap_or(-1);
+            Self::emit_exec_end_from_context(
+                context,
+                request.command.to_string(),
+                response.output.clone(),
+                exit,
+                response.wall_time,
+            )
+            .await;
+        }
+
+        Ok(response)
    }

    pub(crate) async fn write_stdin(
@@ -98,37 +123,60 @@ impl UnifiedExecSessionManager {
        let (output, original_token_count) = truncate_output_to_tokens(&text, max_tokens);
        let chunk_id = generate_chunk_id();

-        let (session_id, exit_code) = self.refresh_session_state(session_id).await;
+        let status = self.refresh_session_state(session_id).await;
+        let (session_id, exit_code, completion_entry, event_call_id) = match status {
+            SessionStatus::Alive { exit_code, call_id } => {
+                (Some(session_id), exit_code, None, call_id)
+            }
+            SessionStatus::Exited { exit_code, entry } => {
+                let call_id = entry.call_id.clone();
+                (None, exit_code, Some(*entry), call_id)
+            }
+            SessionStatus::Unknown => {
+                return Err(UnifiedExecError::UnknownSessionId { session_id });
+            }
+        };

-        Ok(UnifiedExecResponse {
+        let response = UnifiedExecResponse {
+            event_call_id,
            chunk_id,
            wall_time,
            output,
            session_id,
            exit_code,
            original_token_count,
-        })
-    }
+        };

-    async fn refresh_session_state(&self, session_id: i32) -> (Option<i32>, Option<i32>) {
-        let mut sessions = self.sessions.lock().await;
-        if !sessions.contains_key(&session_id) {
-            return (None, None);
+        if let (Some(exit), Some(entry)) = (response.exit_code, completion_entry) {
+            let total_duration = Instant::now().saturating_duration_since(entry.started_at);
+            Self::emit_exec_end_from_entry(entry, response.output.clone(), exit, total_duration)
+                .await;
        }

-        let has_exited = sessions
-            .get(&session_id)
-            .map(UnifiedExecSession::has_exited)
-            .unwrap_or(false);
-        let exit_code = sessions
-            .get(&session_id)
-            .and_then(UnifiedExecSession::exit_code);
+        Ok(response)
+    }

-        if has_exited {
-            sessions.remove(&session_id);
-            (None, exit_code)
+    async fn refresh_session_state(&self, session_id: i32) -> SessionStatus {
+        let mut sessions = self.sessions.lock().await;
+        let Some(entry) = sessions.get(&session_id) else {
+            return SessionStatus::Unknown;
+        };
+
+        let exit_code = entry.session.exit_code();
+
+        if entry.session.has_exited() {
+            let Some(entry) = sessions.remove(&session_id) else {
+                return SessionStatus::Unknown;
+            };
+            SessionStatus::Exited {
+                exit_code,
+                entry: Box::new(entry),
+            }
        } else {
-            (Some(session_id), exit_code)
+            SessionStatus::Alive {
+                exit_code,
+                call_id: entry.call_id.clone(),
+            }
        }
    }

@@ -138,9 +186,9 @@ impl UnifiedExecSessionManager {
    ) -> Result<(mpsc::Sender<Vec<u8>>, OutputBuffer, Arc<Notify>), UnifiedExecError> {
        let sessions = self.sessions.lock().await;
        let (output_buffer, output_notify, writer_tx) =
-            if let Some(session) = sessions.get(&session_id) {
-                let (buffer, notify) = session.output_handles();
-                (buffer, notify, session.writer_sender())
+            if let Some(entry) = sessions.get(&session_id) {
+                let (buffer, notify) = entry.session.output_handles();
+                (buffer, notify, entry.session.writer_sender())
            } else {
                return Err(UnifiedExecError::UnknownSessionId { session_id });
            };
@@ -158,14 +206,82 @@ impl UnifiedExecSessionManager {
            .map_err(|_| UnifiedExecError::WriteToStdin)
    }

-    async fn store_session(&self, session: UnifiedExecSession) -> i32 {
+    async fn store_session(
+        &self,
+        session: UnifiedExecSession,
+        context: &UnifiedExecContext,
+        command: &str,
+        started_at: Instant,
+    ) -> i32 {
        let session_id = self
            .next_session_id
            .fetch_add(1, std::sync::atomic::Ordering::SeqCst);
-        self.sessions.lock().await.insert(session_id, session);
+        let entry = SessionEntry {
+            session,
+            session_ref: Arc::clone(&context.session),
+            turn_ref: Arc::clone(&context.turn),
+            call_id: context.call_id.clone(),
+            command: command.to_string(),
+            cwd: context.turn.cwd.clone(),
+            started_at,
+        };
+        self.sessions.lock().await.insert(session_id, entry);
        session_id
    }

+    async fn emit_exec_end_from_entry(
+        entry: SessionEntry,
+        aggregated_output: String,
+        exit_code: i32,
+        duration: Duration,
+    ) {
+        let output = ExecToolCallOutput {
+            exit_code,
+            stdout: StreamOutput::new(aggregated_output.clone()),
+            stderr: StreamOutput::new(String::new()),
+            aggregated_output: StreamOutput::new(aggregated_output),
+            duration,
+            timed_out: false,
+        };
+        let event_ctx = ToolEventCtx::new(
+            entry.session_ref.as_ref(),
+            entry.turn_ref.as_ref(),
+            &entry.call_id,
+            None,
+        );
+        let emitter = ToolEmitter::unified_exec(entry.command, entry.cwd, true);
+        emitter
+            .emit(event_ctx, ToolEventStage::Success(output))
+            .await;
+    }
+
+    async fn emit_exec_end_from_context(
+        context: &UnifiedExecContext,
+        command: String,
+        aggregated_output: String,
+        exit_code: i32,
+        duration: Duration,
+    ) {
+        let output = ExecToolCallOutput {
+            exit_code,
+            stdout: StreamOutput::new(aggregated_output.clone()),
+            stderr: StreamOutput::new(String::new()),
+            aggregated_output: StreamOutput::new(aggregated_output),
+            duration,
+            timed_out: false,
+        };
+        let event_ctx = ToolEventCtx::new(
+            context.session.as_ref(),
+            context.turn.as_ref(),
+            &context.call_id,
+            None,
+        );
+        let emitter = ToolEmitter::unified_exec(command, context.turn.cwd.clone(), true);
+        emitter
+            .emit(event_ctx, ToolEventStage::Success(output))
+            .await;
+    }
+
    pub(crate) async fn open_session_with_exec_env(
        &self,
        env: &ExecEnv,
@@ -184,7 +300,7 @@ impl UnifiedExecSessionManager {
    pub(super) async fn open_session_with_sandbox(
        &self,
        command: Vec<String>,
-        context: &UnifiedExecContext<'_>,
+        context: &UnifiedExecContext,
    ) -> Result<UnifiedExecSession, UnifiedExecError> {
        let mut orchestrator = ToolOrchestrator::new();
        let mut runtime = UnifiedExecRuntime::new(self);
@@ -194,9 +310,9 @@ impl UnifiedExecSessionManager {
            create_env(&context.turn.shell_environment_policy),
        );
        let tool_ctx = ToolCtx {
-            session: context.session,
-            turn: context.turn,
-            call_id: context.call_id.to_string(),
+            session: context.session.as_ref(),
+            turn: context.turn.as_ref(),
+            call_id: context.call_id.clone(),
            tool_name: "exec_command".to_string(),
        };
        orchestrator
@@ -204,7 +320,7 @@ impl UnifiedExecSessionManager {
                &mut runtime,
                &req,
                &tool_ctx,
-                context.turn,
+                context.turn.as_ref(),
                context.turn.approval_policy,
            )
            .await
@@ -255,3 +371,15 @@ impl UnifiedExecSessionManager {
        collected
    }
 }
+
+enum SessionStatus {
+    Alive {
+        exit_code: Option<i32>,
+        call_id: String,
+    },
+    Exited {
+        exit_code: Option<i32>,
+        entry: Box<SessionEntry>,
+    },
+    Unknown,
+}
--- a/codex-rs/core/templates/sandboxing/assessment_prompt.md
+++ b/codex-rs/core/templates/sandboxing/assessment_prompt.md
@@ -0,0 +1,27 @@
+You are a security analyst evaluating shell commands that were blocked by a sandbox. Given the provided metadata, summarize the command's likely intent and assess the risk. Return strictly valid JSON with the keys:
+- description (concise summary, at most two sentences)
+- risk_level ("low", "medium", or "high")
+- risk_categories (optional array of zero or more category strings)
+Risk level examples:
+- low: read-only inspections, listing files, printing configuration
+- medium: modifying project files, installing dependencies, fetching artifacts from trusted sources
+- high: deleting or overwriting data, exfiltrating secrets, escalating privileges, or disabling security controls
+Recognized risk_categories: data_deletion, data_exfiltration, privilege_escalation, system_modification, network_access, resource_exhaustion, compliance.
+Use multiple categories when appropriate.
+If information is insufficient, choose the most cautious risk level supported by the evidence.
+Respond with JSON only, without markdown code fences or extra commentary.
+
+---
+
+Command metadata:
+Platform: {{ platform }}
+Sandbox policy: {{ sandbox_policy }}
+{% if let Some(roots) = filesystem_roots %}
+Filesystem roots: {{ roots }}
+{% endif %}
+Working directory: {{ working_directory }}
+Command argv: {{ command_argv }}
+Command (joined): {{ command_joined }}
+{% if let Some(message) = sandbox_failure_message %}
+Sandbox failure message: {{ message }}
+{% endif %}
--- a/codex-rs/core/tests/common/Cargo.toml
+++ b/codex-rs/core/tests/common/Cargo.toml
@@ -10,6 +10,7 @@ path = "lib.rs"
 anyhow = { workspace = true }
 assert_cmd = { workspace = true }
 codex-core = { workspace = true }
+codex-protocol = { workspace = true }
 notify = { workspace = true }
 regex-lite = { workspace = true }
 serde_json = { workspace = true }
--- a/Show More
+++ b/Show More