Merge branch 'main' into update_agents_md

.
2026-02-02 15:03:38 +00:00 · 2025-10-28 09:04:15 -07:00 · 2025-10-28 09:02:11 -07:00 · 2025-10-28 09:00:32 -07:00 · 2025-10-28 15:23:46 +00:00 · 2025-10-28 08:10:23 -07:00
167 changed files with 8946 additions and 2862 deletions
--- a/.github/workflows/rust-ci.yml
+++ b/.github/workflows/rust-ci.yml
@@ -9,7 +9,7 @@ on:
 # CI builds in debug (dev) for faster signal.

 jobs:
-  # --- Detect what changed (always runs) -------------------------------------
+  # --- Detect what changed to detect which tests to run (always runs) -------------------------------------
  changed:
    name: Detect changed areas
    runs-on: ubuntu-24.04
@@ -84,8 +84,8 @@ jobs:
        run: cargo shear

  # --- CI to validate on different os/targets --------------------------------
-  lint_build_test:
-    name: ${{ matrix.runner }} - ${{ matrix.target }}${{ matrix.profile == 'release' && ' (release)' || '' }}
+  lint_build:
+    name: Lint/Build — ${{ matrix.runner }} - ${{ matrix.target }}${{ matrix.profile == 'release' && ' (release)' || '' }}
    runs-on: ${{ matrix.runner }}
    timeout-minutes: 30
    needs: changed
@@ -94,6 +94,11 @@ jobs:
    defaults:
      run:
        working-directory: codex-rs
+    env:
+      # Speed up repeated builds across CI runs by caching compiled objects.
+      RUSTC_WRAPPER: sccache
+      CARGO_INCREMENTAL: "0"
+      SCCACHE_CACHE_SIZE: 10G

    strategy:
      fail-fast: false
@@ -159,20 +164,83 @@ jobs:
            ~/.cargo/registry/index/
            ~/.cargo/registry/cache/
            ~/.cargo/git/db/
-          key: cargo-home-${{ matrix.runner }}-${{ matrix.target }}-${{ matrix.profile }}-${{ hashFiles('**/Cargo.lock') }}
+          key: cargo-home-${{ matrix.runner }}-${{ matrix.target }}-${{ matrix.profile }}-${{ hashFiles('**/Cargo.lock') }}-${{ hashFiles('codex-rs/rust-toolchain.toml') }}
+          restore-keys: |
+            cargo-home-${{ matrix.runner }}-${{ matrix.target }}-${{ matrix.profile }}-

-      - name: Restore target cache (except gnu-dev)
-        id: cache_target_restore
-        if: ${{ !(matrix.target == 'x86_64-unknown-linux-gnu' && matrix.profile != 'release') }}
+      # Install and restore sccache cache
+      - name: Install sccache
+        uses: taiki-e/install-action@0c5db7f7f897c03b771660e91d065338615679f4 # v2
+        with:
+          tool: sccache
+          version: 0.7.5
+
+      - name: Configure sccache backend
+        shell: bash
+        run: |
+          set -euo pipefail
+          if [[ -n "${ACTIONS_CACHE_URL:-}" && -n "${ACTIONS_RUNTIME_TOKEN:-}" ]]; then
+            echo "SCCACHE_GHA_ENABLED=true" >> "$GITHUB_ENV"
+            echo "Using sccache GitHub backend"
+          else
+            echo "SCCACHE_GHA_ENABLED=false" >> "$GITHUB_ENV"
+            echo "SCCACHE_DIR=${{ github.workspace }}/.sccache" >> "$GITHUB_ENV"
+            echo "Using sccache local disk + actions/cache fallback"
+          fi
+
+      - name: Restore sccache cache (fallback)
+        if: ${{ env.SCCACHE_GHA_ENABLED != 'true' }}
+        id: cache_sccache_restore
        uses: actions/cache/restore@v4
        with:
-          path: ${{ github.workspace }}/codex-rs/target/
-          key: cargo-target-${{ matrix.runner }}-${{ matrix.target }}-${{ matrix.profile }}-${{ hashFiles('**/Cargo.lock') }}
+          path: ${{ github.workspace }}/.sccache/
+          key: sccache-${{ matrix.runner }}-${{ matrix.target }}-${{ matrix.profile }}-${{ hashFiles('**/Cargo.lock') }}-${{ github.run_id }}
+          restore-keys: |
+            sccache-${{ matrix.runner }}-${{ matrix.target }}-${{ matrix.profile }}-${{ hashFiles('**/Cargo.lock') }}-
+            sccache-${{ matrix.runner }}-${{ matrix.target }}-${{ matrix.profile }}-
+
+      - if: ${{ matrix.target == 'x86_64-unknown-linux-musl' || matrix.target == 'aarch64-unknown-linux-musl'}}
+        name: Prepare APT cache directories (musl)
+        shell: bash
+        run: |
+          set -euo pipefail
+          sudo mkdir -p /var/cache/apt/archives /var/lib/apt/lists
+          sudo chown -R "$USER:$USER" /var/cache/apt /var/lib/apt/lists
+
+      - if: ${{ matrix.target == 'x86_64-unknown-linux-musl' || matrix.target == 'aarch64-unknown-linux-musl'}}
+        name: Restore APT cache (musl)
+        id: cache_apt_restore
+        uses: actions/cache/restore@v4
+        with:
+          path: |
+            /var/cache/apt
+          key: apt-${{ matrix.runner }}-${{ matrix.target }}-v1

      - if: ${{ matrix.target == 'x86_64-unknown-linux-musl' || matrix.target == 'aarch64-unknown-linux-musl'}}
        name: Install musl build tools
+        env:
+          DEBIAN_FRONTEND: noninteractive
+        shell: bash
        run: |
-          sudo apt install -y musl-tools pkg-config && sudo rm -rf /var/lib/apt/lists/*
+          set -euo pipefail
+          sudo apt-get -y update -o Acquire::Retries=3
+          sudo apt-get -y install --no-install-recommends musl-tools pkg-config
+
+      - name: Install cargo-chef
+        if: ${{ matrix.profile == 'release' }}
+        uses: taiki-e/install-action@0c5db7f7f897c03b771660e91d065338615679f4 # v2
+        with:
+          tool: cargo-chef
+          version: 0.1.71
+
+      - name: Pre-warm dependency cache (cargo-chef)
+        if: ${{ matrix.profile == 'release' }}
+        shell: bash
+        run: |
+          set -euo pipefail
+          RECIPE="${RUNNER_TEMP}/chef-recipe.json"
+          cargo chef prepare --recipe-path "$RECIPE"
+          cargo chef cook --recipe-path "$RECIPE" --target ${{ matrix.target }} --release --all-features

      - name: cargo clippy
        id: clippy
@@ -191,20 +259,6 @@ jobs:
          find . -name Cargo.toml -mindepth 2 -maxdepth 2 -print0 \
            | xargs -0 -n1 -I{} bash -c 'cd "$(dirname "{}")" && cargo check --profile ${{ matrix.profile }}'

-      - uses: taiki-e/install-action@0c5db7f7f897c03b771660e91d065338615679f4 # v2
-        with:
-          tool: nextest
-          version: 0.9.103
-
-      - name: tests
-        id: test
-        # Tests take too long for release builds to run them on every PR.
-        if: ${{ matrix.profile != 'release' }}
-        continue-on-error: true
-        run: cargo nextest run --all-features --no-fail-fast --target ${{ matrix.target }} --cargo-profile ci-test
-        env:
-          RUST_BACKTRACE: 1
-
      # Save caches explicitly; make non-fatal so cache packaging
      # never fails the overall job. Only save when key wasn't hit.
      - name: Save cargo home cache
@@ -217,33 +271,193 @@ jobs:
            ~/.cargo/registry/index/
            ~/.cargo/registry/cache/
            ~/.cargo/git/db/
-          key: cargo-home-${{ matrix.runner }}-${{ matrix.target }}-${{ matrix.profile }}-${{ hashFiles('**/Cargo.lock') }}
+          key: cargo-home-${{ matrix.runner }}-${{ matrix.target }}-${{ matrix.profile }}-${{ hashFiles('**/Cargo.lock') }}-${{ hashFiles('codex-rs/rust-toolchain.toml') }}

-      - name: Save target cache (except gnu-dev)
-        if: >-
-          always() && !cancelled() &&
-          (steps.cache_target_restore.outputs.cache-hit != 'true') &&
-          !(matrix.target == 'x86_64-unknown-linux-gnu' && matrix.profile != 'release')
+      - name: Save sccache cache (fallback)
+        if: always() && !cancelled() && env.SCCACHE_GHA_ENABLED != 'true'
        continue-on-error: true
        uses: actions/cache/save@v4
        with:
-          path: ${{ github.workspace }}/codex-rs/target/
-          key: cargo-target-${{ matrix.runner }}-${{ matrix.target }}-${{ matrix.profile }}-${{ hashFiles('**/Cargo.lock') }}
+          path: ${{ github.workspace }}/.sccache/
+          key: sccache-${{ matrix.runner }}-${{ matrix.target }}-${{ matrix.profile }}-${{ hashFiles('**/Cargo.lock') }}-${{ github.run_id }}
+
+      - name: sccache stats
+        if: always()
+        continue-on-error: true
+        run: sccache --show-stats || true
+
+      - name: sccache summary
+        if: always()
+        shell: bash
+        run: |
+          {
+            echo "### sccache stats — ${{ matrix.target }} (${{ matrix.profile }})";
+            echo;
+            echo '```';
+            sccache --show-stats || true;
+            echo '```';
+          } >> "$GITHUB_STEP_SUMMARY"
+
+      - name: Save APT cache (musl)
+        if: always() && !cancelled() && (matrix.target == 'x86_64-unknown-linux-musl' || matrix.target == 'aarch64-unknown-linux-musl') && steps.cache_apt_restore.outputs.cache-hit != 'true'
+        continue-on-error: true
+        uses: actions/cache/save@v4
+        with:
+          path: |
+            /var/cache/apt
+          key: apt-${{ matrix.runner }}-${{ matrix.target }}-v1

      # Fail the job if any of the previous steps failed.
      - name: verify all steps passed
        if: |
          steps.clippy.outcome == 'failure' ||
-          steps.cargo_check_all_crates.outcome == 'failure' ||
-          steps.test.outcome == 'failure'
+          steps.cargo_check_all_crates.outcome == 'failure'
        run: |
-          echo "One or more checks failed (clippy, cargo_check_all_crates, or test). See logs for details."
+          echo "One or more checks failed (clippy or cargo_check_all_crates). See logs for details."
+          exit 1
+
+  tests:
+    name: Tests — ${{ matrix.runner }} - ${{ matrix.target }}
+    runs-on: ${{ matrix.runner }}
+    timeout-minutes: 30
+    needs: changed
+    if: ${{ needs.changed.outputs.codex == 'true' || needs.changed.outputs.workflows == 'true' || github.event_name == 'push' }}
+    defaults:
+      run:
+        working-directory: codex-rs
+    env:
+      RUSTC_WRAPPER: sccache
+      CARGO_INCREMENTAL: "0"
+      SCCACHE_CACHE_SIZE: 10G
+
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - runner: macos-14
+            target: aarch64-apple-darwin
+            profile: dev
+          - runner: ubuntu-24.04
+            target: x86_64-unknown-linux-gnu
+            profile: dev
+          - runner: ubuntu-24.04-arm
+            target: aarch64-unknown-linux-gnu
+            profile: dev
+          - runner: windows-latest
+            target: x86_64-pc-windows-msvc
+            profile: dev
+          - runner: windows-11-arm
+            target: aarch64-pc-windows-msvc
+            profile: dev
+
+    steps:
+      - uses: actions/checkout@v5
+      - uses: dtolnay/rust-toolchain@1.90
+        with:
+          targets: ${{ matrix.target }}
+
+      - name: Restore cargo home cache
+        id: cache_cargo_home_restore
+        uses: actions/cache/restore@v4
+        with:
+          path: |
+            ~/.cargo/bin/
+            ~/.cargo/registry/index/
+            ~/.cargo/registry/cache/
+            ~/.cargo/git/db/
+          key: cargo-home-${{ matrix.runner }}-${{ matrix.target }}-${{ matrix.profile }}-${{ hashFiles('**/Cargo.lock') }}-${{ hashFiles('codex-rs/rust-toolchain.toml') }}
+          restore-keys: |
+            cargo-home-${{ matrix.runner }}-${{ matrix.target }}-${{ matrix.profile }}-
+
+      - name: Install sccache
+        uses: taiki-e/install-action@0c5db7f7f897c03b771660e91d065338615679f4 # v2
+        with:
+          tool: sccache
+          version: 0.7.5
+
+      - name: Configure sccache backend
+        shell: bash
+        run: |
+          set -euo pipefail
+          if [[ -n "${ACTIONS_CACHE_URL:-}" && -n "${ACTIONS_RUNTIME_TOKEN:-}" ]]; then
+            echo "SCCACHE_GHA_ENABLED=true" >> "$GITHUB_ENV"
+            echo "Using sccache GitHub backend"
+          else
+            echo "SCCACHE_GHA_ENABLED=false" >> "$GITHUB_ENV"
+            echo "SCCACHE_DIR=${{ github.workspace }}/.sccache" >> "$GITHUB_ENV"
+            echo "Using sccache local disk + actions/cache fallback"
+          fi
+
+      - name: Restore sccache cache (fallback)
+        if: ${{ env.SCCACHE_GHA_ENABLED != 'true' }}
+        id: cache_sccache_restore
+        uses: actions/cache/restore@v4
+        with:
+          path: ${{ github.workspace }}/.sccache/
+          key: sccache-${{ matrix.runner }}-${{ matrix.target }}-${{ matrix.profile }}-${{ hashFiles('**/Cargo.lock') }}-${{ github.run_id }}
+          restore-keys: |
+            sccache-${{ matrix.runner }}-${{ matrix.target }}-${{ matrix.profile }}-${{ hashFiles('**/Cargo.lock') }}-
+            sccache-${{ matrix.runner }}-${{ matrix.target }}-${{ matrix.profile }}-
+
+      - uses: taiki-e/install-action@0c5db7f7f897c03b771660e91d065338615679f4 # v2
+        with:
+          tool: nextest
+          version: 0.9.103
+
+      - name: tests
+        id: test
+        continue-on-error: true
+        run: cargo nextest run --all-features --no-fail-fast --target ${{ matrix.target }} --cargo-profile ci-test
+        env:
+          RUST_BACKTRACE: 1
+
+      - name: Save cargo home cache
+        if: always() && !cancelled() && steps.cache_cargo_home_restore.outputs.cache-hit != 'true'
+        continue-on-error: true
+        uses: actions/cache/save@v4
+        with:
+          path: |
+            ~/.cargo/bin/
+            ~/.cargo/registry/index/
+            ~/.cargo/registry/cache/
+            ~/.cargo/git/db/
+          key: cargo-home-${{ matrix.runner }}-${{ matrix.target }}-${{ matrix.profile }}-${{ hashFiles('**/Cargo.lock') }}-${{ hashFiles('codex-rs/rust-toolchain.toml') }}
+
+      - name: Save sccache cache (fallback)
+        if: always() && !cancelled() && env.SCCACHE_GHA_ENABLED != 'true'
+        continue-on-error: true
+        uses: actions/cache/save@v4
+        with:
+          path: ${{ github.workspace }}/.sccache/
+          key: sccache-${{ matrix.runner }}-${{ matrix.target }}-${{ matrix.profile }}-${{ hashFiles('**/Cargo.lock') }}-${{ github.run_id }}
+
+      - name: sccache stats
+        if: always()
+        continue-on-error: true
+        run: sccache --show-stats || true
+
+      - name: sccache summary
+        if: always()
+        shell: bash
+        run: |
+          {
+            echo "### sccache stats — ${{ matrix.target }} (tests)";
+            echo;
+            echo '```';
+            sccache --show-stats || true;
+            echo '```';
+          } >> "$GITHUB_STEP_SUMMARY"
+
+      - name: verify tests passed
+        if: steps.test.outcome == 'failure'
+        run: |
+          echo "Tests failed. See logs for details."
          exit 1

  # --- Gatherer job that you mark as the ONLY required status -----------------
  results:
    name: CI results (required)
-    needs: [changed, general, cargo_shear, lint_build_test]
+    needs: [changed, general, cargo_shear, lint_build, tests]
    if: always()
    runs-on: ubuntu-24.04
    steps:
@@ -252,7 +466,8 @@ jobs:
        run: |
          echo "general: ${{ needs.general.result }}"
          echo "shear  : ${{ needs.cargo_shear.result }}"
-          echo "matrix : ${{ needs.lint_build_test.result }}"
+          echo "lint   : ${{ needs.lint_build.result }}"
+          echo "tests  : ${{ needs.tests.result }}"

          # If nothing relevant changed (PR touching only root README, etc.),
          # declare success regardless of other jobs.
@@ -264,4 +479,10 @@ jobs:
          # Otherwise require the jobs to have succeeded
          [[ '${{ needs.general.result }}' == 'success' ]] || { echo 'general failed'; exit 1; }
          [[ '${{ needs.cargo_shear.result }}' == 'success' ]] || { echo 'cargo_shear failed'; exit 1; }
-          [[ '${{ needs.lint_build_test.result }}' == 'success' ]] || { echo 'matrix failed'; exit 1; }
+          [[ '${{ needs.lint_build.result }}' == 'success' ]] || { echo 'lint_build failed'; exit 1; }
+          [[ '${{ needs.tests.result }}' == 'success' ]] || { echo 'tests failed'; exit 1; }
+
+      - name: sccache summary note
+        if: always()
+        run: |
+          echo "Per-job sccache stats are attached to each matrix job's Step Summary."
--- a/.github/workflows/rust-release.yml
+++ b/.github/workflows/rust-release.yml
@@ -53,6 +53,10 @@ jobs:
    defaults:
      run:
        working-directory: codex-rs
+    env:
+      RUSTC_WRAPPER: sccache
+      CARGO_INCREMENTAL: "0"
+      SCCACHE_CACHE_SIZE: 10G

    strategy:
      fail-fast: false
@@ -88,14 +92,80 @@ jobs:
            ~/.cargo/registry/index/
            ~/.cargo/registry/cache/
            ~/.cargo/git/db/
-            ${{ github.workspace }}/codex-rs/target/
-          key: cargo-${{ matrix.runner }}-${{ matrix.target }}-release-${{ hashFiles('**/Cargo.lock') }}
+          key: cargo-${{ matrix.runner }}-${{ matrix.target }}-release-${{ hashFiles('**/Cargo.lock') }}-${{ hashFiles('codex-rs/rust-toolchain.toml') }}
+          restore-keys: |
+            cargo-${{ matrix.runner }}-${{ matrix.target }}-release-
+
+      - name: Install sccache
+        uses: taiki-e/install-action@0c5db7f7f897c03b771660e91d065338615679f4 # v2
+        with:
+          tool: sccache
+          version: 0.7.5
+
+      - name: Configure sccache backend
+        shell: bash
+        run: |
+          set -euo pipefail
+          if [[ -n "${ACTIONS_CACHE_URL:-}" && -n "${ACTIONS_RUNTIME_TOKEN:-}" ]]; then
+            echo "SCCACHE_GHA_ENABLED=true" >> "$GITHUB_ENV"
+            echo "Using sccache GitHub backend"
+          else
+            echo "SCCACHE_GHA_ENABLED=false" >> "$GITHUB_ENV"
+            echo "SCCACHE_DIR=${{ github.workspace }}/.sccache" >> "$GITHUB_ENV"
+            echo "Using sccache local disk + actions/cache fallback"
+          fi
+
+      - name: Restore sccache cache (fallback)
+        if: ${{ env.SCCACHE_GHA_ENABLED != 'true' }}
+        id: cache_sccache_restore
+        uses: actions/cache/restore@v4
+        with:
+          path: ${{ github.workspace }}/.sccache/
+          key: sccache-${{ matrix.runner }}-${{ matrix.target }}-release-${{ hashFiles('**/Cargo.lock') }}-${{ github.run_id }}
+          restore-keys: |
+            sccache-${{ matrix.runner }}-${{ matrix.target }}-release-${{ hashFiles('**/Cargo.lock') }}-
+            sccache-${{ matrix.runner }}-${{ matrix.target }}-release-
+
+      - if: ${{ matrix.target == 'x86_64-unknown-linux-musl' || matrix.target == 'aarch64-unknown-linux-musl'}}
+        name: Prepare APT cache directories (musl)
+        shell: bash
+        run: |
+          set -euo pipefail
+          sudo mkdir -p /var/cache/apt/archives /var/lib/apt/lists
+          sudo chown -R "$USER:$USER" /var/cache/apt /var/lib/apt/lists
+
+      - if: ${{ matrix.target == 'x86_64-unknown-linux-musl' || matrix.target == 'aarch64-unknown-linux-musl'}}
+        name: Restore APT cache (musl)
+        id: cache_apt_restore
+        uses: actions/cache/restore@v4
+        with:
+          path: |
+            /var/cache/apt
+          key: apt-${{ matrix.runner }}-${{ matrix.target }}-v1

      - if: ${{ matrix.target == 'x86_64-unknown-linux-musl' || matrix.target == 'aarch64-unknown-linux-musl'}}
        name: Install musl build tools
+        env:
+          DEBIAN_FRONTEND: noninteractive
+        shell: bash
        run: |
-          sudo apt-get update
-          sudo apt-get install -y musl-tools pkg-config
+          set -euo pipefail
+          sudo apt-get -y update -o Acquire::Retries=3
+          sudo apt-get -y install --no-install-recommends musl-tools pkg-config
+
+      - name: Install cargo-chef
+        uses: taiki-e/install-action@0c5db7f7f897c03b771660e91d065338615679f4 # v2
+        with:
+          tool: cargo-chef
+          version: 0.1.71
+
+      - name: Pre-warm dependency cache (cargo-chef)
+        shell: bash
+        run: |
+          set -euo pipefail
+          RECIPE="${RUNNER_TEMP}/chef-recipe.json"
+          cargo chef prepare --recipe-path "$RECIPE"
+          cargo chef cook --recipe-path "$RECIPE" --target ${{ matrix.target }} --release --all-features

      - name: Cargo build
        run: cargo build --target ${{ matrix.target }} --release --bin codex --bin codex-responses-api-proxy
@@ -327,6 +397,40 @@ jobs:
            zstd -T0 -19 --rm "$dest/$base"
          done

+      - name: Save APT cache (musl)
+        if: always() && !cancelled() && (matrix.target == 'x86_64-unknown-linux-musl' || matrix.target == 'aarch64-unknown-linux-musl') && steps.cache_apt_restore.outputs.cache-hit != 'true'
+        continue-on-error: true
+        uses: actions/cache/save@v4
+        with:
+          path: |
+            /var/cache/apt
+          key: apt-${{ matrix.runner }}-${{ matrix.target }}-v1
+
+      - name: Save sccache cache (fallback)
+        if: always() && !cancelled() && env.SCCACHE_GHA_ENABLED != 'true'
+        continue-on-error: true
+        uses: actions/cache/save@v4
+        with:
+          path: ${{ github.workspace }}/.sccache/
+          key: sccache-${{ matrix.runner }}-${{ matrix.target }}-release-${{ hashFiles('**/Cargo.lock') }}-${{ github.run_id }}
+
+      - name: sccache stats
+        if: always()
+        continue-on-error: true
+        run: sccache --show-stats || true
+
+      - name: sccache summary
+        if: always()
+        shell: bash
+        run: |
+          {
+            echo "### sccache stats — ${{ matrix.target }} (release)";
+            echo;
+            echo '```';
+            sccache --show-stats || true;
+            echo '```';
+          } >> "$GITHUB_STEP_SUMMARY"
+
      - name: Remove signing keychain
        if: ${{ always() && matrix.runner == 'macos-15-xlarge' }}
        shell: bash
--- a/codex-rs/Cargo.lock
+++ b/codex-rs/Cargo.lock
@@ -843,6 +843,7 @@ dependencies = [
 "codex-backend-client",
 "codex-common",
 "codex-core",
+ "codex-feedback",
 "codex-file-search",
 "codex-login",
 "codex-protocol",
@@ -853,6 +854,7 @@ dependencies = [
 "pretty_assertions",
 "serde",
 "serde_json",
+ "serial_test",
 "tempfile",
 "tokio",
 "toml",
@@ -1061,10 +1063,13 @@ dependencies = [
 "codex-apply-patch",
 "codex-async-utils",
 "codex-file-search",
+ "codex-git-tooling",
+ "codex-keyring-store",
 "codex-otel",
 "codex-protocol",
 "codex-rmcp-client",
 "codex-utils-pty",
+ "codex-utils-readiness",
 "codex-utils-string",
 "codex-utils-tokenizer",
 "core-foundation 0.9.4",
@@ -1076,7 +1081,9 @@ dependencies = [
 "eventsource-stream",
 "futures",
 "http",
+ "image",
 "indexmap 2.10.0",
+ "keyring",
 "landlock",
 "libc",
 "maplit",
@@ -1093,6 +1100,7 @@ dependencies = [
 "serde_json",
 "serial_test",
 "sha1",
+ "sha2",
 "shlex",
 "similar",
 "strum_macros 0.27.2",
@@ -1208,11 +1216,22 @@ version = "0.0.0"
 dependencies = [
 "assert_matches",
 "pretty_assertions",
+ "schemars 0.8.22",
+ "serde",
 "tempfile",
 "thiserror 2.0.16",
+ "ts-rs",
 "walkdir",
 ]

+[[package]]
+name = "codex-keyring-store"
+version = "0.0.0"
+dependencies = [
+ "keyring",
+ "tracing",
+]
+
 [[package]]
 name = "codex-linux-sandbox"
 version = "0.0.0"
@@ -1327,6 +1346,8 @@ version = "0.0.0"
 dependencies = [
 "anyhow",
 "base64",
+ "codex-git-tooling",
+ "codex-utils-image",
 "icu_decimal",
 "icu_locale_core",
 "mcp-types",
@@ -1376,6 +1397,7 @@ version = "0.0.0"
 dependencies = [
 "anyhow",
 "axum",
+ "codex-keyring-store",
 "codex-protocol",
 "dirs",
 "escargot",
@@ -1427,7 +1449,6 @@ dependencies = [
 "codex-core",
 "codex-feedback",
 "codex-file-search",
- "codex-git-tooling",
 "codex-login",
 "codex-ollama",
 "codex-protocol",
@@ -1472,6 +1493,27 @@ dependencies = [
 "vt100",
 ]

+[[package]]
+name = "codex-utils-cache"
+version = "0.0.0"
+dependencies = [
+ "lru",
+ "sha1",
+ "tokio",
+]
+
+[[package]]
+name = "codex-utils-image"
+version = "0.0.0"
+dependencies = [
+ "base64",
+ "codex-utils-cache",
+ "image",
+ "tempfile",
+ "thiserror 2.0.16",
+ "tokio",
+]
+
 [[package]]
 name = "codex-utils-json-to-toml"
 version = "0.0.0"
@@ -5457,9 +5499,9 @@ dependencies = [

 [[package]]
 name = "serde"
-version = "1.0.226"
+version = "1.0.228"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0dca6411025b24b60bfa7ec1fe1f8e710ac09782dca409ee8237ba74b51295fd"
+checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e"
 dependencies = [
 "serde_core",
 "serde_derive",
@@ -5467,18 +5509,18 @@ dependencies = [

 [[package]]
 name = "serde_core"
-version = "1.0.226"
+version = "1.0.228"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ba2ba63999edb9dac981fb34b3e5c0d111a69b0924e253ed29d83f7c99e966a4"
+checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad"
 dependencies = [
 "serde_derive",
 ]

 [[package]]
 name = "serde_derive"
-version = "1.0.226"
+version = "1.0.228"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8db53ae22f34573731bafa1db20f04027b2d25e02d8205921b569171699cdb33"
+checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
 dependencies = [
 "proc-macro2",
 "quote",
--- a/codex-rs/Cargo.toml
+++ b/codex-rs/Cargo.toml
@@ -16,6 +16,7 @@ members = [
    "core",
    "exec",
    "execpolicy",
+    "keyring-store",
    "file-search",
    "git-tooling",
    "linux-sandbox",
@@ -32,9 +33,11 @@ members = [
    "otel",
    "tui",
    "git-apply",
+    "utils/cache",
+    "utils/image",
    "utils/json-to-toml",
-    "utils/readiness",
    "utils/pty",
+    "utils/readiness",
    "utils/string",
    "utils/tokenizer",
 ]
@@ -65,6 +68,7 @@ codex-exec = { path = "exec" }
 codex-feedback = { path = "feedback" }
 codex-file-search = { path = "file-search" }
 codex-git-tooling = { path = "git-tooling" }
+codex-keyring-store = { path = "keyring-store" }
 codex-linux-sandbox = { path = "linux-sandbox" }
 codex-login = { path = "login" }
 codex-mcp-server = { path = "mcp-server" }
@@ -77,6 +81,8 @@ codex-responses-api-proxy = { path = "responses-api-proxy" }
 codex-rmcp-client = { path = "rmcp-client" }
 codex-stdio-to-uds = { path = "stdio-to-uds" }
 codex-tui = { path = "tui" }
+codex-utils-cache = { path = "utils/cache" }
+codex-utils-image = { path = "utils/image" }
 codex-utils-json-to-toml = { path = "utils/json-to-toml" }
 codex-utils-pty = { path = "utils/pty" }
 codex-utils-readiness = { path = "utils/readiness" }
@@ -129,6 +135,7 @@ landlock = "0.4.1"
 lazy_static = "1"
 libc = "0.2.175"
 log = "0.4"
+lru = "0.12.5"
 maplit = "1.0.2"
 mime_guess = "2.0.5"
 multimap = "0.10.0"
--- a/codex-rs/app-server-protocol/src/export.rs
+++ b/codex-rs/app-server-protocol/src/export.rs
@@ -23,7 +23,6 @@ use std::io::Write;
 use std::path::Path;
 use std::path::PathBuf;
 use std::process::Command;
-use ts_rs::ExportError;
 use ts_rs::TS;

 const HEADER: &str = "// GENERATED CODE! DO NOT MODIFY BY HAND!\n\n";
@@ -105,19 +104,6 @@ macro_rules! for_each_schema_type {
    };
 }

-fn export_ts_with_context<F>(label: &str, export: F) -> Result<()>
-where
-    F: FnOnce() -> std::result::Result<(), ExportError>,
-{
-    match export() {
-        Ok(()) => Ok(()),
-        Err(ExportError::CannotBeExported(ty)) => Err(anyhow!(
-            "failed to export {label}: dependency {ty} cannot be exported"
-        )),
-        Err(err) => Err(err.into()),
-    }
-}
-
 pub fn generate_types(out_dir: &Path, prettier: Option<&Path>) -> Result<()> {
    generate_ts(out_dir, prettier)?;
    generate_json(out_dir)?;
@@ -127,17 +113,13 @@ pub fn generate_types(out_dir: &Path, prettier: Option<&Path>) -> Result<()> {
 pub fn generate_ts(out_dir: &Path, prettier: Option<&Path>) -> Result<()> {
    ensure_dir(out_dir)?;

-    export_ts_with_context("ClientRequest", || ClientRequest::export_all_to(out_dir))?;
-    export_ts_with_context("client responses", || export_client_responses(out_dir))?;
-    export_ts_with_context("ClientNotification", || {
-        ClientNotification::export_all_to(out_dir)
-    })?;
+    ClientRequest::export_all_to(out_dir)?;
+    export_client_responses(out_dir)?;
+    ClientNotification::export_all_to(out_dir)?;

-    export_ts_with_context("ServerRequest", || ServerRequest::export_all_to(out_dir))?;
-    export_ts_with_context("server responses", || export_server_responses(out_dir))?;
-    export_ts_with_context("ServerNotification", || {
-        ServerNotification::export_all_to(out_dir)
-    })?;
+    ServerRequest::export_all_to(out_dir)?;
+    export_server_responses(out_dir)?;
+    ServerNotification::export_all_to(out_dir)?;

    generate_index_ts(out_dir)?;

--- a/codex-rs/app-server-protocol/src/protocol.rs
+++ b/codex-rs/app-server-protocol/src/protocol.rs
@@ -5,7 +5,7 @@ use crate::JSONRPCNotification;
 use crate::JSONRPCRequest;
 use crate::RequestId;
 use codex_protocol::ConversationId;
-use codex_protocol::account::Account;
+use codex_protocol::account::PlanType;
 use codex_protocol::config_types::ForcedLoginMethod;
 use codex_protocol::config_types::ReasoningEffort;
 use codex_protocol::config_types::ReasoningSummary;
@@ -17,6 +17,7 @@ use codex_protocol::protocol::EventMsg;
 use codex_protocol::protocol::FileChange;
 use codex_protocol::protocol::RateLimitSnapshot;
 use codex_protocol::protocol::ReviewDecision;
+use codex_protocol::protocol::SandboxCommandAssessment;
 use codex_protocol::protocol::SandboxPolicy;
 use codex_protocol::protocol::TurnAbortReason;
 use paste::paste;
@@ -123,6 +124,13 @@ client_request_definitions! {
        response: GetAccountRateLimitsResponse,
    },

+    #[serde(rename = "feedback/upload")]
+    #[ts(rename = "feedback/upload")]
+    UploadFeedback {
+        params: UploadFeedbackParams,
+        response: UploadFeedbackResponse,
+    },
+
    #[serde(rename = "account/read")]
    #[ts(rename = "account/read")]
    GetAccount {
@@ -139,6 +147,10 @@ client_request_definitions! {
        params: NewConversationParams,
        response: NewConversationResponse,
    },
+    GetConversationSummary {
+        params: GetConversationSummaryParams,
+        response: GetConversationSummaryResponse,
+    },
    /// List recorded Codex conversations (rollouts) with optional pagination and search.
    ListConversations {
        params: ListConversationsParams,
@@ -224,6 +236,28 @@ client_request_definitions! {
    },
 }

+#[derive(Debug, Clone, PartialEq, Deserialize, Serialize, JsonSchema, TS)]
+#[serde(tag = "type", rename_all = "camelCase")]
+#[ts(tag = "type")]
+pub enum Account {
+    #[serde(rename = "apiKey", rename_all = "camelCase")]
+    #[ts(rename = "apiKey", rename_all = "camelCase")]
+    ApiKey { api_key: String },
+
+    #[serde(rename = "chatgpt", rename_all = "camelCase")]
+    #[ts(rename = "chatgpt", rename_all = "camelCase")]
+    ChatGpt {
+        email: Option<String>,
+        plan_type: PlanType,
+    },
+}
+
+#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
+#[serde(rename_all = "camelCase")]
+pub struct GetAccountResponse {
+    pub account: Account,
+}
+
 #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Default, JsonSchema, TS)]
 #[serde(rename_all = "camelCase")]
 pub struct InitializeParams {
@@ -252,6 +286,10 @@ pub struct NewConversationParams {
    #[serde(skip_serializing_if = "Option::is_none")]
    pub model: Option<String>,

+    /// Override the model provider to use for this session.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub model_provider: Option<String>,
+
    /// Configuration profile from config.toml to specify default options.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub profile: Option<String>,
@@ -304,6 +342,18 @@ pub struct ResumeConversationResponse {
    pub initial_messages: Option<Vec<EventMsg>>,
 }

+#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
+#[serde(rename_all = "camelCase")]
+pub struct GetConversationSummaryParams {
+    pub rollout_path: PathBuf,
+}
+
+#[derive(Serialize, Deserialize, Debug, Clone, JsonSchema, TS)]
+#[serde(rename_all = "camelCase")]
+pub struct GetConversationSummaryResponse {
+    pub summary: ConversationSummary,
+}
+
 #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Default, JsonSchema, TS)]
 #[serde(rename_all = "camelCase")]
 pub struct ListConversationsParams {
@@ -313,6 +363,12 @@ pub struct ListConversationsParams {
    /// Opaque pagination cursor returned by a previous call.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub cursor: Option<String>,
+    /// Optional model provider filter (matches against session metadata).
+    /// - None => filter by the server's default model provider
+    /// - Some([]) => no filtering, include all providers
+    /// - Some([...]) => only include sessions with one of the specified providers
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub model_providers: Option<Vec<String>>,
 }

 #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
@@ -324,6 +380,8 @@ pub struct ConversationSummary {
    /// RFC3339 timestamp string for the session start, if available.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub timestamp: Option<String>,
+    /// Model provider recorded for the session (resolved when absent in metadata).
+    pub model_provider: String,
 }

 #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
@@ -377,6 +435,23 @@ pub struct ListModelsResponse {
    pub next_cursor: Option<String>,
 }

+#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
+#[serde(rename_all = "camelCase")]
+pub struct UploadFeedbackParams {
+    pub classification: String,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub reason: Option<String>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub conversation_id: Option<ConversationId>,
+    pub include_logs: bool,
+}
+
+#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
+#[serde(rename_all = "camelCase")]
+pub struct UploadFeedbackResponse {
+    pub thread_id: String,
+}
+
 #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
 #[serde(tag = "type")]
 #[ts(tag = "type")]
@@ -534,12 +609,6 @@ pub struct GetAccountRateLimitsResponse {
    pub rate_limits: RateLimitSnapshot,
 }

-#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
-#[serde(transparent)]
-#[ts(export)]
-#[ts(type = "Account | null")]
-pub struct GetAccountResponse(#[ts(type = "Account | null")] pub Option<Account>);
-
 #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
 #[serde(rename_all = "camelCase")]
 pub struct GetAuthStatusResponse {
@@ -716,6 +785,8 @@ pub struct SendUserMessageResponse {}
 #[serde(rename_all = "camelCase")]
 pub struct AddConversationListenerParams {
    pub conversation_id: ConversationId,
+    #[serde(default)]
+    pub experimental_raw_events: bool,
 }

 #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
@@ -847,6 +918,8 @@ pub struct ExecCommandApprovalParams {
    pub cwd: PathBuf,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub reason: Option<String>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub risk: Option<SandboxCommandAssessment>,
    pub parsed_cmd: Vec<ParsedCommand>,
 }

@@ -995,6 +1068,7 @@ mod tests {
            request_id: RequestId::Integer(42),
            params: NewConversationParams {
                model: Some("gpt-5-codex".to_string()),
+                model_provider: None,
                profile: None,
                cwd: None,
                approval_policy: Some(AskForApproval::OnRequest),
@@ -1063,6 +1137,7 @@ mod tests {
            command: vec!["echo".to_string(), "hello".to_string()],
            cwd: PathBuf::from("/tmp"),
            reason: Some("because tests".to_string()),
+            risk: None,
            parsed_cmd: vec![ParsedCommand::Unknown {
                cmd: "echo hello".to_string(),
            }],
@@ -1187,6 +1262,35 @@ mod tests {
        Ok(())
    }

+    #[test]
+    fn account_serializes_fields_in_camel_case() -> Result<()> {
+        let api_key = Account::ApiKey {
+            api_key: "secret".to_string(),
+        };
+        assert_eq!(
+            json!({
+                "type": "apiKey",
+                "apiKey": "secret",
+            }),
+            serde_json::to_value(&api_key)?,
+        );
+
+        let chatgpt = Account::ChatGpt {
+            email: Some("user@example.com".to_string()),
+            plan_type: PlanType::Plus,
+        };
+        assert_eq!(
+            json!({
+                "type": "chatgpt",
+                "email": "user@example.com",
+                "planType": "plus",
+            }),
+            serde_json::to_value(&chatgpt)?,
+        );
+
+        Ok(())
+    }
+
    #[test]
    fn serialize_list_models() -> Result<()> {
        let request = ClientRequest::ListModels {
--- a/codex-rs/app-server/Cargo.toml
+++ b/codex-rs/app-server/Cargo.toml
@@ -24,6 +24,7 @@ codex-file-search = { workspace = true }
 codex-login = { workspace = true }
 codex-protocol = { workspace = true }
 codex-app-server-protocol = { workspace = true }
+codex-feedback = { workspace = true }
 codex-utils-json-to-toml = { workspace = true }
 chrono = { workspace = true }
 serde = { workspace = true, features = ["derive"] }
@@ -47,6 +48,7 @@ base64 = { workspace = true }
 core_test_support = { workspace = true }
 os_info = { workspace = true }
 pretty_assertions = { workspace = true }
+serial_test = { workspace = true }
 tempfile = { workspace = true }
 toml = { workspace = true }
 wiremock = { workspace = true }
--- a/codex-rs/app-server/src/codex_message_processor.rs
+++ b/codex-rs/app-server/src/codex_message_processor.rs
@@ -21,6 +21,8 @@ use codex_app_server_protocol::ExecOneOffCommandResponse;
 use codex_app_server_protocol::FuzzyFileSearchParams;
 use codex_app_server_protocol::FuzzyFileSearchResponse;
 use codex_app_server_protocol::GetAccountRateLimitsResponse;
+use codex_app_server_protocol::GetConversationSummaryParams;
+use codex_app_server_protocol::GetConversationSummaryResponse;
 use codex_app_server_protocol::GetUserAgentResponse;
 use codex_app_server_protocol::GetUserSavedConfigResponse;
 use codex_app_server_protocol::GitDiffToRemoteResponse;
@@ -52,6 +54,8 @@ use codex_app_server_protocol::ServerRequestPayload;
 use codex_app_server_protocol::SessionConfiguredNotification;
 use codex_app_server_protocol::SetDefaultModelParams;
 use codex_app_server_protocol::SetDefaultModelResponse;
+use codex_app_server_protocol::UploadFeedbackParams;
+use codex_app_server_protocol::UploadFeedbackResponse;
 use codex_app_server_protocol::UserInfoResponse;
 use codex_app_server_protocol::UserSavedConfig;
 use codex_backend_client::Client as BackendClient;
@@ -64,9 +68,7 @@ use codex_core::NewConversation;
 use codex_core::RolloutRecorder;
 use codex_core::SessionMeta;
 use codex_core::auth::CLIENT_ID;
-use codex_core::auth::get_auth_file;
 use codex_core::auth::login_with_api_key;
-use codex_core::auth::try_read_auth_json;
 use codex_core::config::Config;
 use codex_core::config::ConfigOverrides;
 use codex_core::config::ConfigToml;
@@ -85,6 +87,8 @@ use codex_core::protocol::EventMsg;
 use codex_core::protocol::ExecApprovalRequestEvent;
 use codex_core::protocol::Op;
 use codex_core::protocol::ReviewDecision;
+use codex_core::read_head_for_summary;
+use codex_feedback::CodexFeedback;
 use codex_login::ServerOptions as LoginServerOptions;
 use codex_login::ShutdownHandle;
 use codex_login::run_login_server;
@@ -98,6 +102,8 @@ use codex_protocol::user_input::UserInput as CoreInputItem;
 use codex_utils_json_to_toml::json_to_toml;
 use std::collections::HashMap;
 use std::ffi::OsStr;
+use std::io::Error as IoError;
+use std::path::Path;
 use std::path::PathBuf;
 use std::sync::Arc;
 use std::sync::atomic::AtomicBool;
@@ -136,6 +142,7 @@ pub(crate) struct CodexMessageProcessor {
    // Queue of pending interrupt requests per conversation. We reply when TurnAborted arrives.
    pending_interrupts: Arc<Mutex<HashMap<ConversationId, Vec<RequestId>>>>,
    pending_fuzzy_searches: Arc<Mutex<HashMap<String, Arc<AtomicBool>>>>,
+    feedback: CodexFeedback,
 }

 impl CodexMessageProcessor {
@@ -145,6 +152,7 @@ impl CodexMessageProcessor {
        outgoing: Arc<OutgoingMessageSender>,
        codex_linux_sandbox_exe: Option<PathBuf>,
        config: Arc<Config>,
+        feedback: CodexFeedback,
    ) -> Self {
        Self {
            auth_manager,
@@ -156,6 +164,7 @@ impl CodexMessageProcessor {
            active_login: Arc::new(Mutex::new(None)),
            pending_interrupts: Arc::new(Mutex::new(HashMap::new())),
            pending_fuzzy_searches: Arc::new(Mutex::new(HashMap::new())),
+            feedback,
        }
    }

@@ -170,6 +179,9 @@ impl CodexMessageProcessor {
                // created before processing any subsequent messages.
                self.process_new_conversation(request_id, params).await;
            }
+            ClientRequest::GetConversationSummary { request_id, params } => {
+                self.get_conversation_summary(request_id, params).await;
+            }
            ClientRequest::ListConversations { request_id, params } => {
                self.handle_list_conversations(request_id, params).await;
            }
@@ -275,6 +287,9 @@ impl CodexMessageProcessor {
            } => {
                self.get_account_rate_limits(request_id).await;
            }
+            ClientRequest::UploadFeedback { request_id, params } => {
+                self.upload_feedback(request_id, params).await;
+            }
        }
    }

@@ -308,7 +323,11 @@ impl CodexMessageProcessor {
            }
        }

-        match login_with_api_key(&self.config.codex_home, &params.api_key) {
+        match login_with_api_key(
+            &self.config.codex_home,
+            &params.api_key,
+            self.config.cli_auth_credentials_store_mode,
+        ) {
            Ok(()) => {
                self.auth_manager.reload();
                self.outgoing
@@ -352,6 +371,7 @@ impl CodexMessageProcessor {
                config.codex_home.clone(),
                CLIENT_ID.to_string(),
                config.forced_chatgpt_workspace_id.clone(),
+                config.cli_auth_credentials_store_mode,
            )
        };

@@ -654,12 +674,8 @@ impl CodexMessageProcessor {
    }

    async fn get_user_info(&self, request_id: RequestId) {
-        // Read alleged user email from auth.json (best-effort; not verified).
-        let auth_path = get_auth_file(&self.config.codex_home);
-        let alleged_user_email = match try_read_auth_json(&auth_path) {
-            Ok(auth) => auth.tokens.and_then(|t| t.id_token.email),
-            Err(_) => None,
-        };
+        // Read alleged user email from cached auth (best-effort; not verified).
+        let alleged_user_email = self.auth_manager.auth().and_then(|a| a.get_account_email());

        let response = UserInfoResponse { alleged_user_email };
        self.outgoing.send_response(request_id, response).await;
@@ -813,24 +829,76 @@ impl CodexMessageProcessor {
        }
    }

+    async fn get_conversation_summary(
+        &self,
+        request_id: RequestId,
+        params: GetConversationSummaryParams,
+    ) {
+        let GetConversationSummaryParams { rollout_path } = params;
+        let path = if rollout_path.is_relative() {
+            self.config.codex_home.join(&rollout_path)
+        } else {
+            rollout_path.clone()
+        };
+        let fallback_provider = self.config.model_provider_id.as_str();
+
+        match read_summary_from_rollout(&path, fallback_provider).await {
+            Ok(summary) => {
+                let response = GetConversationSummaryResponse { summary };
+                self.outgoing.send_response(request_id, response).await;
+            }
+            Err(err) => {
+                let error = JSONRPCErrorError {
+                    code: INTERNAL_ERROR_CODE,
+                    message: format!(
+                        "failed to load conversation summary from {}: {}",
+                        path.display(),
+                        err
+                    ),
+                    data: None,
+                };
+                self.outgoing.send_error(request_id, error).await;
+            }
+        }
+    }
+
    async fn handle_list_conversations(
        &self,
        request_id: RequestId,
        params: ListConversationsParams,
    ) {
-        let page_size = params.page_size.unwrap_or(25);
+        let ListConversationsParams {
+            page_size,
+            cursor,
+            model_providers: model_provider,
+        } = params;
+        let page_size = page_size.unwrap_or(25);
        // Decode the optional cursor string to a Cursor via serde (Cursor implements Deserialize from string)
-        let cursor_obj: Option<RolloutCursor> = match params.cursor {
+        let cursor_obj: Option<RolloutCursor> = match cursor {
            Some(s) => serde_json::from_str::<RolloutCursor>(&format!("\"{s}\"")).ok(),
            None => None,
        };
        let cursor_ref = cursor_obj.as_ref();
+        let model_provider_filter = match model_provider {
+            Some(providers) => {
+                if providers.is_empty() {
+                    None
+                } else {
+                    Some(providers)
+                }
+            }
+            None => Some(vec![self.config.model_provider_id.clone()]),
+        };
+        let model_provider_slice = model_provider_filter.as_deref();
+        let fallback_provider = self.config.model_provider_id.clone();

        let page = match RolloutRecorder::list_conversations(
            &self.config.codex_home,
            page_size,
            cursor_ref,
            INTERACTIVE_SESSION_SOURCES,
+            model_provider_slice,
+            fallback_provider.as_str(),
        )
        .await
        {
@@ -849,7 +917,7 @@ impl CodexMessageProcessor {
        let items = page
            .items
            .into_iter()
-            .filter_map(|it| extract_conversation_summary(it.path, &it.head))
+            .filter_map(|it| extract_conversation_summary(it.path, &it.head, &fallback_provider))
            .collect();

        // Encode next_cursor as a plain string
@@ -1256,7 +1324,10 @@ impl CodexMessageProcessor {
        request_id: RequestId,
        params: AddConversationListenerParams,
    ) {
-        let AddConversationListenerParams { conversation_id } = params;
+        let AddConversationListenerParams {
+            conversation_id,
+            experimental_raw_events,
+        } = params;
        let Ok(conversation) = self
            .conversation_manager
            .get_conversation(conversation_id)
@@ -1293,6 +1364,11 @@ impl CodexMessageProcessor {
                            }
                        };

+                        if let EventMsg::RawResponseItem(_) = &event.msg
+                            && !experimental_raw_events {
+                                continue;
+                            }
+
                        // For now, we send a notification for every event,
                        // JSON-serializing the `Event` as-is, but these should
                        // be migrated to be variants of `ServerNotification`
@@ -1410,6 +1486,77 @@ impl CodexMessageProcessor {
        let response = FuzzyFileSearchResponse { files: results };
        self.outgoing.send_response(request_id, response).await;
    }
+
+    async fn upload_feedback(&self, request_id: RequestId, params: UploadFeedbackParams) {
+        let UploadFeedbackParams {
+            classification,
+            reason,
+            conversation_id,
+            include_logs,
+        } = params;
+
+        let snapshot = self.feedback.snapshot(conversation_id);
+        let thread_id = snapshot.thread_id.clone();
+
+        let validated_rollout_path = if include_logs {
+            match conversation_id {
+                Some(conv_id) => self.resolve_rollout_path(conv_id).await,
+                None => None,
+            }
+        } else {
+            None
+        };
+
+        let upload_result = tokio::task::spawn_blocking(move || {
+            let rollout_path_ref = validated_rollout_path.as_deref();
+            snapshot.upload_feedback(
+                &classification,
+                reason.as_deref(),
+                include_logs,
+                rollout_path_ref,
+            )
+        })
+        .await;
+
+        let upload_result = match upload_result {
+            Ok(result) => result,
+            Err(join_err) => {
+                let error = JSONRPCErrorError {
+                    code: INTERNAL_ERROR_CODE,
+                    message: format!("failed to upload feedback: {join_err}"),
+                    data: None,
+                };
+                self.outgoing.send_error(request_id, error).await;
+                return;
+            }
+        };
+
+        match upload_result {
+            Ok(()) => {
+                let response = UploadFeedbackResponse { thread_id };
+                self.outgoing.send_response(request_id, response).await;
+            }
+            Err(err) => {
+                let error = JSONRPCErrorError {
+                    code: INTERNAL_ERROR_CODE,
+                    message: format!("failed to upload feedback: {err}"),
+                    data: None,
+                };
+                self.outgoing.send_error(request_id, error).await;
+            }
+        }
+    }
+
+    async fn resolve_rollout_path(&self, conversation_id: ConversationId) -> Option<PathBuf> {
+        match self
+            .conversation_manager
+            .get_conversation(conversation_id)
+            .await
+        {
+            Ok(conv) => Some(conv.rollout_path()),
+            Err(_) => None,
+        }
+    }
 }

 async fn apply_bespoke_event_handling(
@@ -1447,6 +1594,7 @@ async fn apply_bespoke_event_handling(
            command,
            cwd,
            reason,
+            risk,
            parsed_cmd,
        }) => {
            let params = ExecCommandApprovalParams {
@@ -1455,6 +1603,7 @@ async fn apply_bespoke_event_handling(
                command,
                cwd,
                reason,
+                risk,
                parsed_cmd,
            };
            let rx = outgoing
@@ -1501,6 +1650,7 @@ async fn derive_config_from_params(
 ) -> std::io::Result<Config> {
    let NewConversationParams {
        model,
+        model_provider,
        profile,
        cwd,
        approval_policy,
@@ -1516,13 +1666,14 @@ async fn derive_config_from_params(
        cwd: cwd.map(PathBuf::from),
        approval_policy,
        sandbox_mode,
-        model_provider: None,
+        model_provider,
        codex_linux_sandbox_exe,
        base_instructions,
        include_apply_patch_tool,
        include_view_image_tool: None,
        show_raw_agent_reasoning: None,
        tools_web_search_request: None,
+        experimental_sandbox_command_assessment: None,
        additional_writable_roots: Vec::new(),
    };

@@ -1613,9 +1764,54 @@ async fn on_exec_approval_response(
    }
 }

+async fn read_summary_from_rollout(
+    path: &Path,
+    fallback_provider: &str,
+) -> std::io::Result<ConversationSummary> {
+    let head = read_head_for_summary(path).await?;
+
+    let Some(first) = head.first() else {
+        return Err(IoError::other(format!(
+            "rollout at {} is empty",
+            path.display()
+        )));
+    };
+
+    let session_meta = serde_json::from_value::<SessionMeta>(first.clone()).map_err(|_| {
+        IoError::other(format!(
+            "rollout at {} does not start with session metadata",
+            path.display()
+        ))
+    })?;
+
+    if let Some(summary) =
+        extract_conversation_summary(path.to_path_buf(), &head, fallback_provider)
+    {
+        return Ok(summary);
+    }
+
+    let timestamp = if session_meta.timestamp.is_empty() {
+        None
+    } else {
+        Some(session_meta.timestamp.clone())
+    };
+    let model_provider = session_meta
+        .model_provider
+        .unwrap_or_else(|| fallback_provider.to_string());
+
+    Ok(ConversationSummary {
+        conversation_id: session_meta.id,
+        timestamp,
+        path: path.to_path_buf(),
+        preview: String::new(),
+        model_provider,
+    })
+}
+
 fn extract_conversation_summary(
    path: PathBuf,
    head: &[serde_json::Value],
+    fallback_provider: &str,
 ) -> Option<ConversationSummary> {
    let session_meta = match head.first() {
        Some(first_line) => serde_json::from_value::<SessionMeta>(first_line.clone()).ok()?,
@@ -1640,12 +1836,17 @@ fn extract_conversation_summary(
    } else {
        Some(session_meta.timestamp.clone())
    };
+    let conversation_id = session_meta.id;
+    let model_provider = session_meta
+        .model_provider
+        .unwrap_or_else(|| fallback_provider.to_string());

    Some(ConversationSummary {
-        conversation_id: session_meta.id,
+        conversation_id,
        timestamp,
        path,
        preview: preview.to_string(),
+        model_provider,
    })
 }

@@ -1655,6 +1856,7 @@ mod tests {
    use anyhow::Result;
    use pretty_assertions::assert_eq;
    use serde_json::json;
+    use tempfile::TempDir;

    #[test]
    fn extract_conversation_summary_prefers_plain_user_messages() -> Result<()> {
@@ -1669,7 +1871,8 @@ mod tests {
                "cwd": "/",
                "originator": "codex",
                "cli_version": "0.0.0",
-                "instructions": null
+                "instructions": null,
+                "model_provider": "test-provider"
            }),
            json!({
                "type": "message",
@@ -1689,15 +1892,62 @@ mod tests {
            }),
        ];

-        let summary = extract_conversation_summary(path.clone(), &head).expect("summary");
+        let summary =
+            extract_conversation_summary(path.clone(), &head, "test-provider").expect("summary");

-        assert_eq!(summary.conversation_id, conversation_id);
-        assert_eq!(
-            summary.timestamp,
-            Some("2025-09-05T16:53:11.850Z".to_string())
-        );
-        assert_eq!(summary.path, path);
-        assert_eq!(summary.preview, "Count to 5");
+        let expected = ConversationSummary {
+            conversation_id,
+            timestamp,
+            path,
+            preview: "Count to 5".to_string(),
+            model_provider: "test-provider".to_string(),
+        };
+
+        assert_eq!(summary, expected);
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn read_summary_from_rollout_returns_empty_preview_when_no_user_message() -> Result<()> {
+        use codex_protocol::protocol::RolloutItem;
+        use codex_protocol::protocol::RolloutLine;
+        use codex_protocol::protocol::SessionMetaLine;
+        use std::fs;
+
+        let temp_dir = TempDir::new()?;
+        let path = temp_dir.path().join("rollout.jsonl");
+
+        let conversation_id = ConversationId::from_string("bfd12a78-5900-467b-9bc5-d3d35df08191")?;
+        let timestamp = "2025-09-05T16:53:11.850Z".to_string();
+
+        let session_meta = SessionMeta {
+            id: conversation_id,
+            timestamp: timestamp.clone(),
+            model_provider: None,
+            ..SessionMeta::default()
+        };
+
+        let line = RolloutLine {
+            timestamp: timestamp.clone(),
+            item: RolloutItem::SessionMeta(SessionMetaLine {
+                meta: session_meta.clone(),
+                git: None,
+            }),
+        };
+
+        fs::write(&path, format!("{}\n", serde_json::to_string(&line)?))?;
+
+        let summary = read_summary_from_rollout(path.as_path(), "fallback").await?;
+
+        let expected = ConversationSummary {
+            conversation_id,
+            timestamp: Some(timestamp),
+            path: path.clone(),
+            preview: String::new(),
+            model_provider: "fallback".to_string(),
+        };
+
+        assert_eq!(summary, expected);
        Ok(())
    }
 }
--- a/codex-rs/app-server/src/lib.rs
+++ b/codex-rs/app-server/src/lib.rs
@@ -12,16 +12,19 @@ use crate::message_processor::MessageProcessor;
 use crate::outgoing_message::OutgoingMessage;
 use crate::outgoing_message::OutgoingMessageSender;
 use codex_app_server_protocol::JSONRPCMessage;
+use codex_feedback::CodexFeedback;
 use tokio::io::AsyncBufReadExt;
 use tokio::io::AsyncWriteExt;
 use tokio::io::BufReader;
 use tokio::io::{self};
 use tokio::sync::mpsc;
+use tracing::Level;
 use tracing::debug;
 use tracing::error;
 use tracing::info;
 use tracing_subscriber::EnvFilter;
 use tracing_subscriber::Layer;
+use tracing_subscriber::filter::Targets;
 use tracing_subscriber::layer::SubscriberExt;
 use tracing_subscriber::util::SubscriberInitExt;

@@ -82,6 +85,8 @@ pub async fn run_main(
            std::io::Error::new(ErrorKind::InvalidData, format!("error loading config: {e}"))
        })?;

+    let feedback = CodexFeedback::new();
+
    let otel =
        codex_core::otel_init::build_provider(&config, env!("CARGO_PKG_VERSION")).map_err(|e| {
            std::io::Error::new(
@@ -96,8 +101,15 @@ pub async fn run_main(
        .with_writer(std::io::stderr)
        .with_filter(EnvFilter::from_default_env());

+    let feedback_layer = tracing_subscriber::fmt::layer()
+        .with_writer(feedback.make_writer())
+        .with_ansi(false)
+        .with_target(false)
+        .with_filter(Targets::new().with_default(Level::TRACE));
+
    let _ = tracing_subscriber::registry()
        .with(stderr_fmt)
+        .with(feedback_layer)
        .with(otel.as_ref().map(|provider| {
            OpenTelemetryTracingBridge::new(&provider.logger).with_filter(
                tracing_subscriber::filter::filter_fn(codex_core::otel_init::codex_export_filter),
@@ -112,6 +124,7 @@ pub async fn run_main(
            outgoing_message_sender,
            codex_linux_sandbox_exe,
            std::sync::Arc::new(config),
+            feedback.clone(),
        );
        async move {
            while let Some(msg) = incoming_rx.recv().await {
--- a/codex-rs/app-server/src/message_processor.rs
+++ b/codex-rs/app-server/src/message_processor.rs
@@ -17,6 +17,7 @@ use codex_core::ConversationManager;
 use codex_core::config::Config;
 use codex_core::default_client::USER_AGENT_SUFFIX;
 use codex_core::default_client::get_codex_user_agent;
+use codex_feedback::CodexFeedback;
 use codex_protocol::protocol::SessionSource;
 use std::sync::Arc;

@@ -33,9 +34,14 @@ impl MessageProcessor {
        outgoing: OutgoingMessageSender,
        codex_linux_sandbox_exe: Option<PathBuf>,
        config: Arc<Config>,
+        feedback: CodexFeedback,
    ) -> Self {
        let outgoing = Arc::new(outgoing);
-        let auth_manager = AuthManager::shared(config.codex_home.clone(), false);
+        let auth_manager = AuthManager::shared(
+            config.codex_home.clone(),
+            false,
+            config.cli_auth_credentials_store_mode,
+        );
        let conversation_manager = Arc::new(ConversationManager::new(
            auth_manager.clone(),
            SessionSource::VSCode,
@@ -46,6 +52,7 @@ impl MessageProcessor {
            outgoing.clone(),
            codex_linux_sandbox_exe,
            config,
+            feedback,
        );

        Self {
--- a/codex-rs/app-server/tests/common/auth_fixtures.rs
+++ b/codex-rs/app-server/tests/common/auth_fixtures.rs
@@ -6,9 +6,9 @@ use base64::Engine;
 use base64::engine::general_purpose::URL_SAFE_NO_PAD;
 use chrono::DateTime;
 use chrono::Utc;
+use codex_core::auth::AuthCredentialsStoreMode;
 use codex_core::auth::AuthDotJson;
-use codex_core::auth::get_auth_file;
-use codex_core::auth::write_auth_json;
+use codex_core::auth::save_auth;
 use codex_core::token_data::TokenData;
 use codex_core::token_data::parse_id_token;
 use serde_json::json;
@@ -109,7 +109,11 @@ pub fn encode_id_token(claims: &ChatGptIdTokenClaims) -> Result<String> {
    Ok(format!("{header_b64}.{payload_b64}.{signature_b64}"))
 }

-pub fn write_chatgpt_auth(codex_home: &Path, fixture: ChatGptAuthFixture) -> Result<()> {
+pub fn write_chatgpt_auth(
+    codex_home: &Path,
+    fixture: ChatGptAuthFixture,
+    cli_auth_credentials_store_mode: AuthCredentialsStoreMode,
+) -> Result<()> {
    let id_token_raw = encode_id_token(&fixture.claims)?;
    let id_token = parse_id_token(&id_token_raw).context("parse id token")?;
    let tokens = TokenData {
@@ -127,5 +131,5 @@ pub fn write_chatgpt_auth(codex_home: &Path, fixture: ChatGptAuthFixture) -> Res
        last_refresh,
    };

-    write_auth_json(&get_auth_file(codex_home), &auth).context("write auth.json")
+    save_auth(codex_home, &auth, cli_auth_credentials_store_mode).context("write auth.json")
 }
--- a/codex-rs/app-server/tests/common/mcp_process.rs
+++ b/codex-rs/app-server/tests/common/mcp_process.rs
@@ -30,6 +30,7 @@ use codex_app_server_protocol::SendUserMessageParams;
 use codex_app_server_protocol::SendUserTurnParams;
 use codex_app_server_protocol::ServerRequest;
 use codex_app_server_protocol::SetDefaultModelParams;
+use codex_app_server_protocol::UploadFeedbackParams;

 use codex_app_server_protocol::JSONRPCError;
 use codex_app_server_protocol::JSONRPCMessage;
@@ -242,6 +243,15 @@ impl McpProcess {
        self.send_request("account/rateLimits/read", None).await
    }

+    /// Send a `feedback/upload` JSON-RPC request.
+    pub async fn send_upload_feedback_request(
+        &mut self,
+        params: UploadFeedbackParams,
+    ) -> anyhow::Result<i64> {
+        let params = Some(serde_json::to_value(params)?);
+        self.send_request("feedback/upload", params).await
+    }
+
    /// Send a `userInfo` JSON-RPC request.
    pub async fn send_user_info_request(&mut self) -> anyhow::Result<i64> {
        self.send_request("userInfo", None).await
--- a/codex-rs/app-server/tests/suite/archive_conversation.rs
+++ b/codex-rs/app-server/tests/suite/archive_conversation.rs
@@ -1,5 +1,4 @@
-use std::path::Path;
-
+use anyhow::Result;
 use app_test_support::McpProcess;
 use app_test_support::to_response;
 use codex_app_server_protocol::ArchiveConversationParams;
@@ -9,45 +8,37 @@ use codex_app_server_protocol::NewConversationParams;
 use codex_app_server_protocol::NewConversationResponse;
 use codex_app_server_protocol::RequestId;
 use codex_core::ARCHIVED_SESSIONS_SUBDIR;
+use std::path::Path;
 use tempfile::TempDir;
 use tokio::time::timeout;

 const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(10);

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-async fn archive_conversation_moves_rollout_into_archived_directory() {
-    let codex_home = TempDir::new().expect("create temp dir");
-    create_config_toml(codex_home.path()).expect("write config.toml");
+async fn archive_conversation_moves_rollout_into_archived_directory() -> Result<()> {
+    let codex_home = TempDir::new()?;
+    create_config_toml(codex_home.path())?;

-    let mut mcp = McpProcess::new(codex_home.path())
-        .await
-        .expect("spawn mcp process");
-    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize())
-        .await
-        .expect("initialize timeout")
-        .expect("initialize request");
+    let mut mcp = McpProcess::new(codex_home.path()).await?;
+    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;

    let new_request_id = mcp
        .send_new_conversation_request(NewConversationParams {
            model: Some("mock-model".to_string()),
            ..Default::default()
        })
-        .await
-        .expect("send newConversation");
+        .await?;
    let new_response: JSONRPCResponse = timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_response_message(RequestId::Integer(new_request_id)),
    )
-    .await
-    .expect("newConversation timeout")
-    .expect("newConversation response");
+    .await??;

    let NewConversationResponse {
        conversation_id,
        rollout_path,
        ..
-    } = to_response::<NewConversationResponse>(new_response)
-        .expect("deserialize newConversation response");
+    } = to_response::<NewConversationResponse>(new_response)?;

    assert!(
        rollout_path.exists(),
@@ -60,19 +51,15 @@ async fn archive_conversation_moves_rollout_into_archived_directory() {
            conversation_id,
            rollout_path: rollout_path.clone(),
        })
-        .await
-        .expect("send archiveConversation");
+        .await?;
    let archive_response: JSONRPCResponse = timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_response_message(RequestId::Integer(archive_request_id)),
    )
-    .await
-    .expect("archiveConversation timeout")
-    .expect("archiveConversation response");
+    .await??;

    let _: ArchiveConversationResponse =
-        to_response::<ArchiveConversationResponse>(archive_response)
-            .expect("deserialize archiveConversation response");
+        to_response::<ArchiveConversationResponse>(archive_response)?;

    let archived_directory = codex_home.path().join(ARCHIVED_SESSIONS_SUBDIR);
    let archived_rollout_path =
@@ -90,6 +77,8 @@ async fn archive_conversation_moves_rollout_into_archived_directory() {
        "expected archived rollout path {} to exist",
        archived_rollout_path.display()
    );
+
+    Ok(())
 }

 fn create_config_toml(codex_home: &Path) -> std::io::Result<()> {
--- a/codex-rs/app-server/tests/suite/auth.rs
+++ b/codex-rs/app-server/tests/suite/auth.rs
@@ -1,5 +1,4 @@
-use std::path::Path;
-
+use anyhow::Result;
 use app_test_support::McpProcess;
 use app_test_support::to_response;
 use codex_app_server_protocol::AuthMode;
@@ -11,6 +10,7 @@ use codex_app_server_protocol::LoginApiKeyParams;
 use codex_app_server_protocol::LoginApiKeyResponse;
 use codex_app_server_protocol::RequestId;
 use pretty_assertions::assert_eq;
+use std::path::Path;
 use tempfile::TempDir;
 use tokio::time::timeout;

@@ -71,125 +71,99 @@ forced_login_method = "{forced_method}"
    std::fs::write(config_toml, contents)
 }

-async fn login_with_api_key_via_request(mcp: &mut McpProcess, api_key: &str) {
+async fn login_with_api_key_via_request(mcp: &mut McpProcess, api_key: &str) -> Result<()> {
    let request_id = mcp
        .send_login_api_key_request(LoginApiKeyParams {
            api_key: api_key.to_string(),
        })
-        .await
-        .unwrap_or_else(|e| panic!("send loginApiKey: {e}"));
+        .await?;

    let resp: JSONRPCResponse = timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_response_message(RequestId::Integer(request_id)),
    )
-    .await
-    .unwrap_or_else(|e| panic!("loginApiKey timeout: {e}"))
-    .unwrap_or_else(|e| panic!("loginApiKey response: {e}"));
-    let _: LoginApiKeyResponse =
-        to_response(resp).unwrap_or_else(|e| panic!("deserialize login response: {e}"));
+    .await??;
+    let _: LoginApiKeyResponse = to_response(resp)?;
+    Ok(())
 }

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-async fn get_auth_status_no_auth() {
-    let codex_home = TempDir::new().unwrap_or_else(|e| panic!("create tempdir: {e}"));
-    create_config_toml(codex_home.path()).unwrap_or_else(|err| panic!("write config.toml: {err}"));
+async fn get_auth_status_no_auth() -> Result<()> {
+    let codex_home = TempDir::new()?;
+    create_config_toml(codex_home.path())?;

-    let mut mcp = McpProcess::new_with_env(codex_home.path(), &[("OPENAI_API_KEY", None)])
-        .await
-        .expect("spawn mcp process");
-    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize())
-        .await
-        .expect("init timeout")
-        .expect("init failed");
+    let mut mcp = McpProcess::new_with_env(codex_home.path(), &[("OPENAI_API_KEY", None)]).await?;
+    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;

    let request_id = mcp
        .send_get_auth_status_request(GetAuthStatusParams {
            include_token: Some(true),
            refresh_token: Some(false),
        })
-        .await
-        .expect("send getAuthStatus");
+        .await?;

    let resp: JSONRPCResponse = timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_response_message(RequestId::Integer(request_id)),
    )
-    .await
-    .expect("getAuthStatus timeout")
-    .expect("getAuthStatus response");
-    let status: GetAuthStatusResponse = to_response(resp).expect("deserialize status");
+    .await??;
+    let status: GetAuthStatusResponse = to_response(resp)?;
    assert_eq!(status.auth_method, None, "expected no auth method");
    assert_eq!(status.auth_token, None, "expected no token");
+    Ok(())
 }

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-async fn get_auth_status_with_api_key() {
-    let codex_home = TempDir::new().unwrap_or_else(|e| panic!("create tempdir: {e}"));
-    create_config_toml(codex_home.path()).unwrap_or_else(|err| panic!("write config.toml: {err}"));
+async fn get_auth_status_with_api_key() -> Result<()> {
+    let codex_home = TempDir::new()?;
+    create_config_toml(codex_home.path())?;

-    let mut mcp = McpProcess::new(codex_home.path())
-        .await
-        .expect("spawn mcp process");
-    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize())
-        .await
-        .expect("init timeout")
-        .expect("init failed");
+    let mut mcp = McpProcess::new(codex_home.path()).await?;
+    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;

-    login_with_api_key_via_request(&mut mcp, "sk-test-key").await;
+    login_with_api_key_via_request(&mut mcp, "sk-test-key").await?;

    let request_id = mcp
        .send_get_auth_status_request(GetAuthStatusParams {
            include_token: Some(true),
            refresh_token: Some(false),
        })
-        .await
-        .expect("send getAuthStatus");
+        .await?;

    let resp: JSONRPCResponse = timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_response_message(RequestId::Integer(request_id)),
    )
-    .await
-    .expect("getAuthStatus timeout")
-    .expect("getAuthStatus response");
-    let status: GetAuthStatusResponse = to_response(resp).expect("deserialize status");
+    .await??;
+    let status: GetAuthStatusResponse = to_response(resp)?;
    assert_eq!(status.auth_method, Some(AuthMode::ApiKey));
    assert_eq!(status.auth_token, Some("sk-test-key".to_string()));
+    Ok(())
 }

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-async fn get_auth_status_with_api_key_when_auth_not_required() {
-    let codex_home = TempDir::new().unwrap_or_else(|e| panic!("create tempdir: {e}"));
-    create_config_toml_custom_provider(codex_home.path(), false)
-        .unwrap_or_else(|err| panic!("write config.toml: {err}"));
+async fn get_auth_status_with_api_key_when_auth_not_required() -> Result<()> {
+    let codex_home = TempDir::new()?;
+    create_config_toml_custom_provider(codex_home.path(), false)?;

-    let mut mcp = McpProcess::new(codex_home.path())
-        .await
-        .expect("spawn mcp process");
-    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize())
-        .await
-        .expect("init timeout")
-        .expect("init failed");
+    let mut mcp = McpProcess::new(codex_home.path()).await?;
+    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;

-    login_with_api_key_via_request(&mut mcp, "sk-test-key").await;
+    login_with_api_key_via_request(&mut mcp, "sk-test-key").await?;

    let request_id = mcp
        .send_get_auth_status_request(GetAuthStatusParams {
            include_token: Some(true),
            refresh_token: Some(false),
        })
-        .await
-        .expect("send getAuthStatus");
+        .await?;

    let resp: JSONRPCResponse = timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_response_message(RequestId::Integer(request_id)),
    )
-    .await
-    .expect("getAuthStatus timeout")
-    .expect("getAuthStatus response");
-    let status: GetAuthStatusResponse = to_response(resp).expect("deserialize status");
+    .await??;
+    let status: GetAuthStatusResponse = to_response(resp)?;
    assert_eq!(status.auth_method, None, "expected no auth method");
    assert_eq!(status.auth_token, None, "expected no token");
    assert_eq!(
@@ -197,76 +171,60 @@ async fn get_auth_status_with_api_key_when_auth_not_required() {
        Some(false),
        "requires_openai_auth should be false",
    );
+    Ok(())
 }

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-async fn get_auth_status_with_api_key_no_include_token() {
-    let codex_home = TempDir::new().unwrap_or_else(|e| panic!("create tempdir: {e}"));
-    create_config_toml(codex_home.path()).unwrap_or_else(|err| panic!("write config.toml: {err}"));
+async fn get_auth_status_with_api_key_no_include_token() -> Result<()> {
+    let codex_home = TempDir::new()?;
+    create_config_toml(codex_home.path())?;

-    let mut mcp = McpProcess::new(codex_home.path())
-        .await
-        .expect("spawn mcp process");
-    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize())
-        .await
-        .expect("init timeout")
-        .expect("init failed");
+    let mut mcp = McpProcess::new(codex_home.path()).await?;
+    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;

-    login_with_api_key_via_request(&mut mcp, "sk-test-key").await;
+    login_with_api_key_via_request(&mut mcp, "sk-test-key").await?;

    // Build params via struct so None field is omitted in wire JSON.
    let params = GetAuthStatusParams {
        include_token: None,
        refresh_token: Some(false),
    };
-    let request_id = mcp
-        .send_get_auth_status_request(params)
-        .await
-        .expect("send getAuthStatus");
+    let request_id = mcp.send_get_auth_status_request(params).await?;

    let resp: JSONRPCResponse = timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_response_message(RequestId::Integer(request_id)),
    )
-    .await
-    .expect("getAuthStatus timeout")
-    .expect("getAuthStatus response");
-    let status: GetAuthStatusResponse = to_response(resp).expect("deserialize status");
+    .await??;
+    let status: GetAuthStatusResponse = to_response(resp)?;
    assert_eq!(status.auth_method, Some(AuthMode::ApiKey));
    assert!(status.auth_token.is_none(), "token must be omitted");
+    Ok(())
 }

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-async fn login_api_key_rejected_when_forced_chatgpt() {
-    let codex_home = TempDir::new().unwrap_or_else(|e| panic!("create tempdir: {e}"));
-    create_config_toml_forced_login(codex_home.path(), "chatgpt")
-        .unwrap_or_else(|err| panic!("write config.toml: {err}"));
+async fn login_api_key_rejected_when_forced_chatgpt() -> Result<()> {
+    let codex_home = TempDir::new()?;
+    create_config_toml_forced_login(codex_home.path(), "chatgpt")?;

-    let mut mcp = McpProcess::new(codex_home.path())
-        .await
-        .expect("spawn mcp process");
-    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize())
-        .await
-        .expect("init timeout")
-        .expect("init failed");
+    let mut mcp = McpProcess::new(codex_home.path()).await?;
+    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;

    let request_id = mcp
        .send_login_api_key_request(LoginApiKeyParams {
            api_key: "sk-test-key".to_string(),
        })
-        .await
-        .expect("send loginApiKey");
+        .await?;

    let err: JSONRPCError = timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_error_message(RequestId::Integer(request_id)),
    )
-    .await
-    .expect("loginApiKey error timeout")
-    .expect("loginApiKey error");
+    .await??;

    assert_eq!(
        err.error.message,
        "API key login is disabled. Use ChatGPT login instead."
    );
+    Ok(())
 }
--- a/codex-rs/app-server/tests/suite/codex_message_processor_flow.rs
+++ b/codex-rs/app-server/tests/suite/codex_message_processor_flow.rs
@@ -1,5 +1,4 @@
-use std::path::Path;
-
+use anyhow::Result;
 use app_test_support::McpProcess;
 use app_test_support::create_final_assistant_message_sse_response;
 use app_test_support::create_mock_chat_completions_server;
@@ -32,26 +31,27 @@ use codex_protocol::protocol::Event;
 use codex_protocol::protocol::EventMsg;
 use pretty_assertions::assert_eq;
 use std::env;
+use std::path::Path;
 use tempfile::TempDir;
 use tokio::time::timeout;

 const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(10);

 #[tokio::test(flavor = "multi_thread", worker_threads = 4)]
-async fn test_codex_jsonrpc_conversation_flow() {
+async fn test_codex_jsonrpc_conversation_flow() -> Result<()> {
    if env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
        println!(
            "Skipping test because it cannot execute when network is disabled in a Codex sandbox."
        );
-        return;
+        return Ok(());
    }

-    let tmp = TempDir::new().expect("tmp dir");
+    let tmp = TempDir::new()?;
    // Temporary Codex home with config pointing at the mock server.
    let codex_home = tmp.path().join("codex_home");
-    std::fs::create_dir(&codex_home).expect("create codex home dir");
+    std::fs::create_dir(&codex_home)?;
    let working_directory = tmp.path().join("workdir");
-    std::fs::create_dir(&working_directory).expect("create working directory");
+    std::fs::create_dir(&working_directory)?;

    // Create a mock model server that immediately ends each turn.
    // Two turns are expected: initial session configure + one user message.
@@ -61,20 +61,15 @@ async fn test_codex_jsonrpc_conversation_flow() {
            Some(&working_directory),
            Some(5000),
            "call1234",
-        )
-        .expect("create shell sse response"),
-        create_final_assistant_message_sse_response("Enjoy your new git repo!")
-            .expect("create final assistant message"),
+        )?,
+        create_final_assistant_message_sse_response("Enjoy your new git repo!")?,
    ];
    let server = create_mock_chat_completions_server(responses).await;
-    create_config_toml(&codex_home, &server.uri()).expect("write config");
+    create_config_toml(&codex_home, &server.uri())?;

    // Start MCP server and initialize.
-    let mut mcp = McpProcess::new(&codex_home).await.expect("spawn mcp");
-    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize())
-        .await
-        .expect("init timeout")
-        .expect("init error");
+    let mut mcp = McpProcess::new(&codex_home).await?;
+    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;

    // 1) newConversation
    let new_conv_id = mcp
@@ -82,17 +77,13 @@ async fn test_codex_jsonrpc_conversation_flow() {
            cwd: Some(working_directory.to_string_lossy().into_owned()),
            ..Default::default()
        })
-        .await
-        .expect("send newConversation");
+        .await?;
    let new_conv_resp: JSONRPCResponse = timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_response_message(RequestId::Integer(new_conv_id)),
    )
-    .await
-    .expect("newConversation timeout")
-    .expect("newConversation resp");
-    let new_conv_resp = to_response::<NewConversationResponse>(new_conv_resp)
-        .expect("deserialize newConversation response");
+    .await??;
+    let new_conv_resp = to_response::<NewConversationResponse>(new_conv_resp)?;
    let NewConversationResponse {
        conversation_id,
        model,
@@ -103,19 +94,18 @@ async fn test_codex_jsonrpc_conversation_flow() {

    // 2) addConversationListener
    let add_listener_id = mcp
-        .send_add_conversation_listener_request(AddConversationListenerParams { conversation_id })
-        .await
-        .expect("send addConversationListener");
+        .send_add_conversation_listener_request(AddConversationListenerParams {
+            conversation_id,
+            experimental_raw_events: false,
+        })
+        .await?;
    let add_listener_resp: JSONRPCResponse = timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_response_message(RequestId::Integer(add_listener_id)),
    )
-    .await
-    .expect("addConversationListener timeout")
-    .expect("addConversationListener resp");
+    .await??;
    let AddConversationSubscriptionResponse { subscription_id } =
-        to_response::<AddConversationSubscriptionResponse>(add_listener_resp)
-            .expect("deserialize addConversationListener response");
+        to_response::<AddConversationSubscriptionResponse>(add_listener_resp)?;

    // 3) sendUserMessage (should trigger notifications; we only validate an OK response)
    let send_user_id = mcp
@@ -125,17 +115,13 @@ async fn test_codex_jsonrpc_conversation_flow() {
                text: "text".to_string(),
            }],
        })
-        .await
-        .expect("send sendUserMessage");
+        .await?;
    let send_user_resp: JSONRPCResponse = timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_response_message(RequestId::Integer(send_user_id)),
    )
-    .await
-    .expect("sendUserMessage timeout")
-    .expect("sendUserMessage resp");
-    let SendUserMessageResponse {} = to_response::<SendUserMessageResponse>(send_user_resp)
-        .expect("deserialize sendUserMessage response");
+    .await??;
+    let SendUserMessageResponse {} = to_response::<SendUserMessageResponse>(send_user_resp)?;

    // Verify the task_finished notification is received.
    // Note this also ensures that the final request to the server was made.
@@ -143,9 +129,7 @@ async fn test_codex_jsonrpc_conversation_flow() {
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_notification_message("codex/event/task_complete"),
    )
-    .await
-    .expect("task_finished_notification timeout")
-    .expect("task_finished_notification resp");
+    .await??;
    let serde_json::Value::Object(map) = task_finished_notification
        .params
        .expect("notification should have params")
@@ -163,33 +147,31 @@ async fn test_codex_jsonrpc_conversation_flow() {
        .send_remove_conversation_listener_request(RemoveConversationListenerParams {
            subscription_id,
        })
-        .await
-        .expect("send removeConversationListener");
+        .await?;
    let remove_listener_resp: JSONRPCResponse = timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_response_message(RequestId::Integer(remove_listener_id)),
    )
-    .await
-    .expect("removeConversationListener timeout")
-    .expect("removeConversationListener resp");
-    let RemoveConversationSubscriptionResponse {} =
-        to_response(remove_listener_resp).expect("deserialize removeConversationListener response");
+    .await??;
+    let RemoveConversationSubscriptionResponse {} = to_response(remove_listener_resp)?;
+
+    Ok(())
 }

 #[tokio::test(flavor = "multi_thread", worker_threads = 4)]
-async fn test_send_user_turn_changes_approval_policy_behavior() {
+async fn test_send_user_turn_changes_approval_policy_behavior() -> Result<()> {
    if env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
        println!(
            "Skipping test because it cannot execute when network is disabled in a Codex sandbox."
        );
-        return;
+        return Ok(());
    }

-    let tmp = TempDir::new().expect("tmp dir");
+    let tmp = TempDir::new()?;
    let codex_home = tmp.path().join("codex_home");
-    std::fs::create_dir(&codex_home).expect("create codex home dir");
+    std::fs::create_dir(&codex_home)?;
    let working_directory = tmp.path().join("workdir");
-    std::fs::create_dir(&working_directory).expect("create working directory");
+    std::fs::create_dir(&working_directory)?;

    // Mock server will request a python shell call for the first and second turn, then finish.
    let responses = vec![
@@ -202,10 +184,8 @@ async fn test_send_user_turn_changes_approval_policy_behavior() {
            Some(&working_directory),
            Some(5000),
            "call1",
-        )
-        .expect("create first shell sse response"),
-        create_final_assistant_message_sse_response("done 1")
-            .expect("create final assistant message 1"),
+        )?,
+        create_final_assistant_message_sse_response("done 1")?,
        create_shell_sse_response(
            vec![
                "python3".to_string(),
@@ -215,20 +195,15 @@ async fn test_send_user_turn_changes_approval_policy_behavior() {
            Some(&working_directory),
            Some(5000),
            "call2",
-        )
-        .expect("create second shell sse response"),
-        create_final_assistant_message_sse_response("done 2")
-            .expect("create final assistant message 2"),
+        )?,
+        create_final_assistant_message_sse_response("done 2")?,
    ];
    let server = create_mock_chat_completions_server(responses).await;
-    create_config_toml(&codex_home, &server.uri()).expect("write config");
+    create_config_toml(&codex_home, &server.uri())?;

    // Start MCP server and initialize.
-    let mut mcp = McpProcess::new(&codex_home).await.expect("spawn mcp");
-    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize())
-        .await
-        .expect("init timeout")
-        .expect("init error");
+    let mut mcp = McpProcess::new(&codex_home).await?;
+    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;

    // 1) Start conversation with approval_policy=untrusted
    let new_conv_id = mcp
@@ -236,36 +211,30 @@ async fn test_send_user_turn_changes_approval_policy_behavior() {
            cwd: Some(working_directory.to_string_lossy().into_owned()),
            ..Default::default()
        })
-        .await
-        .expect("send newConversation");
+        .await?;
    let new_conv_resp: JSONRPCResponse = timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_response_message(RequestId::Integer(new_conv_id)),
    )
-    .await
-    .expect("newConversation timeout")
-    .expect("newConversation resp");
+    .await??;
    let NewConversationResponse {
        conversation_id, ..
-    } = to_response::<NewConversationResponse>(new_conv_resp)
-        .expect("deserialize newConversation response");
+    } = to_response::<NewConversationResponse>(new_conv_resp)?;

    // 2) addConversationListener
    let add_listener_id = mcp
-        .send_add_conversation_listener_request(AddConversationListenerParams { conversation_id })
-        .await
-        .expect("send addConversationListener");
-    let _: AddConversationSubscriptionResponse =
-        to_response::<AddConversationSubscriptionResponse>(
-            timeout(
-                DEFAULT_READ_TIMEOUT,
-                mcp.read_stream_until_response_message(RequestId::Integer(add_listener_id)),
-            )
-            .await
-            .expect("addConversationListener timeout")
-            .expect("addConversationListener resp"),
+        .send_add_conversation_listener_request(AddConversationListenerParams {
+            conversation_id,
+            experimental_raw_events: false,
+        })
+        .await?;
+    let _: AddConversationSubscriptionResponse = to_response::<AddConversationSubscriptionResponse>(
+        timeout(
+            DEFAULT_READ_TIMEOUT,
+            mcp.read_stream_until_response_message(RequestId::Integer(add_listener_id)),
        )
-        .expect("deserialize addConversationListener response");
+        .await??,
+    )?;

    // 3) sendUserMessage triggers a shell call; approval policy is Untrusted so we should get an elicitation
    let send_user_id = mcp
@@ -275,27 +244,21 @@ async fn test_send_user_turn_changes_approval_policy_behavior() {
                text: "run python".to_string(),
            }],
        })
-        .await
-        .expect("send sendUserMessage");
+        .await?;
    let _send_user_resp: SendUserMessageResponse = to_response::<SendUserMessageResponse>(
        timeout(
            DEFAULT_READ_TIMEOUT,
            mcp.read_stream_until_response_message(RequestId::Integer(send_user_id)),
        )
-        .await
-        .expect("sendUserMessage timeout")
-        .expect("sendUserMessage resp"),
-    )
-    .expect("deserialize sendUserMessage response");
+        .await??,
+    )?;

    // Expect an ExecCommandApproval request (elicitation)
    let request = timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_request_message(),
    )
-    .await
-    .expect("waiting for exec approval request timeout")
-    .expect("exec approval request");
+    .await??;
    let ServerRequest::ExecCommandApproval { request_id, params } = request else {
        panic!("expected ExecCommandApproval request, got: {request:?}");
    };
@@ -311,6 +274,7 @@ async fn test_send_user_turn_changes_approval_policy_behavior() {
            ],
            cwd: working_directory.clone(),
            reason: None,
+            risk: None,
            parsed_cmd: vec![ParsedCommand::Unknown {
                cmd: "python3 -c 'print(42)'".to_string()
            }],
@@ -323,17 +287,14 @@ async fn test_send_user_turn_changes_approval_policy_behavior() {
        request_id,
        serde_json::json!({ "decision": codex_core::protocol::ReviewDecision::Approved }),
    )
-    .await
-    .expect("send approval response");
+    .await?;

    // Wait for first TaskComplete
    let _ = timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_notification_message("codex/event/task_complete"),
    )
-    .await
-    .expect("task_complete 1 timeout")
-    .expect("task_complete 1 notification");
+    .await??;

    // 4) sendUserTurn with approval_policy=never should run without elicitation
    let send_turn_id = mcp
@@ -349,19 +310,15 @@ async fn test_send_user_turn_changes_approval_policy_behavior() {
            effort: Some(ReasoningEffort::Medium),
            summary: ReasoningSummary::Auto,
        })
-        .await
-        .expect("send sendUserTurn");
+        .await?;
    // Acknowledge sendUserTurn
    let _send_turn_resp: SendUserTurnResponse = to_response::<SendUserTurnResponse>(
        timeout(
            DEFAULT_READ_TIMEOUT,
            mcp.read_stream_until_response_message(RequestId::Integer(send_turn_id)),
        )
-        .await
-        .expect("sendUserTurn timeout")
-        .expect("sendUserTurn resp"),
-    )
-    .expect("deserialize sendUserTurn response");
+        .await??,
+    )?;

    // Ensure we do NOT receive an ExecCommandApproval request before the task completes.
    // If any Request is seen while waiting for task_complete, the helper will error and the test fails.
@@ -369,31 +326,31 @@ async fn test_send_user_turn_changes_approval_policy_behavior() {
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_notification_message("codex/event/task_complete"),
    )
-    .await
-    .expect("task_complete 2 timeout")
-    .expect("task_complete 2 notification");
+    .await??;
+
+    Ok(())
 }

 // Helper: minimal config.toml pointing at mock provider.

 #[tokio::test(flavor = "multi_thread", worker_threads = 4)]
-async fn test_send_user_turn_updates_sandbox_and_cwd_between_turns() {
+async fn test_send_user_turn_updates_sandbox_and_cwd_between_turns() -> Result<()> {
    if env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
        println!(
            "Skipping test because it cannot execute when network is disabled in a Codex sandbox."
        );
-        return;
+        return Ok(());
    }

-    let tmp = TempDir::new().expect("tmp dir");
+    let tmp = TempDir::new()?;
    let codex_home = tmp.path().join("codex_home");
-    std::fs::create_dir(&codex_home).expect("create codex home dir");
+    std::fs::create_dir(&codex_home)?;
    let workspace_root = tmp.path().join("workspace");
-    std::fs::create_dir(&workspace_root).expect("create workspace root");
+    std::fs::create_dir(&workspace_root)?;
    let first_cwd = workspace_root.join("turn1");
    let second_cwd = workspace_root.join("turn2");
-    std::fs::create_dir(&first_cwd).expect("create first cwd");
-    std::fs::create_dir(&second_cwd).expect("create second cwd");
+    std::fs::create_dir(&first_cwd)?;
+    std::fs::create_dir(&second_cwd)?;

    let responses = vec![
        create_shell_sse_response(
@@ -405,10 +362,8 @@ async fn test_send_user_turn_updates_sandbox_and_cwd_between_turns() {
            None,
            Some(5000),
            "call-first",
-        )
-        .expect("create first shell response"),
-        create_final_assistant_message_sse_response("done first")
-            .expect("create first final assistant message"),
+        )?,
+        create_final_assistant_message_sse_response("done first")?,
        create_shell_sse_response(
            vec![
                "bash".to_string(),
@@ -418,21 +373,14 @@ async fn test_send_user_turn_updates_sandbox_and_cwd_between_turns() {
            None,
            Some(5000),
            "call-second",
-        )
-        .expect("create second shell response"),
-        create_final_assistant_message_sse_response("done second")
-            .expect("create second final assistant message"),
+        )?,
+        create_final_assistant_message_sse_response("done second")?,
    ];
    let server = create_mock_chat_completions_server(responses).await;
-    create_config_toml(&codex_home, &server.uri()).expect("write config");
+    create_config_toml(&codex_home, &server.uri())?;

-    let mut mcp = McpProcess::new(&codex_home)
-        .await
-        .expect("spawn mcp process");
-    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize())
-        .await
-        .expect("init timeout")
-        .expect("init failed");
+    let mut mcp = McpProcess::new(&codex_home).await?;
+    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;

    let new_conv_id = mcp
        .send_new_conversation_request(NewConversationParams {
@@ -441,33 +389,29 @@ async fn test_send_user_turn_updates_sandbox_and_cwd_between_turns() {
            sandbox: Some(SandboxMode::WorkspaceWrite),
            ..Default::default()
        })
-        .await
-        .expect("send newConversation");
+        .await?;
    let new_conv_resp: JSONRPCResponse = timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_response_message(RequestId::Integer(new_conv_id)),
    )
-    .await
-    .expect("newConversation timeout")
-    .expect("newConversation resp");
+    .await??;
    let NewConversationResponse {
        conversation_id,
        model,
        ..
-    } = to_response::<NewConversationResponse>(new_conv_resp)
-        .expect("deserialize newConversation response");
+    } = to_response::<NewConversationResponse>(new_conv_resp)?;

    let add_listener_id = mcp
-        .send_add_conversation_listener_request(AddConversationListenerParams { conversation_id })
-        .await
-        .expect("send addConversationListener");
+        .send_add_conversation_listener_request(AddConversationListenerParams {
+            conversation_id,
+            experimental_raw_events: false,
+        })
+        .await?;
    timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_response_message(RequestId::Integer(add_listener_id)),
    )
-    .await
-    .expect("addConversationListener timeout")
-    .expect("addConversationListener resp");
+    .await??;

    let first_turn_id = mcp
        .send_send_user_turn_request(SendUserTurnParams {
@@ -487,22 +431,17 @@ async fn test_send_user_turn_updates_sandbox_and_cwd_between_turns() {
            effort: Some(ReasoningEffort::Medium),
            summary: ReasoningSummary::Auto,
        })
-        .await
-        .expect("send first sendUserTurn");
+        .await?;
    timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_response_message(RequestId::Integer(first_turn_id)),
    )
-    .await
-    .expect("sendUserTurn 1 timeout")
-    .expect("sendUserTurn 1 resp");
+    .await??;
    timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_notification_message("codex/event/task_complete"),
    )
-    .await
-    .expect("task_complete 1 timeout")
-    .expect("task_complete 1 notification");
+    .await??;

    let second_turn_id = mcp
        .send_send_user_turn_request(SendUserTurnParams {
@@ -517,23 +456,18 @@ async fn test_send_user_turn_updates_sandbox_and_cwd_between_turns() {
            effort: Some(ReasoningEffort::Medium),
            summary: ReasoningSummary::Auto,
        })
-        .await
-        .expect("send second sendUserTurn");
+        .await?;
    timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_response_message(RequestId::Integer(second_turn_id)),
    )
-    .await
-    .expect("sendUserTurn 2 timeout")
-    .expect("sendUserTurn 2 resp");
+    .await??;

    let exec_begin_notification = timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_notification_message("codex/event/exec_command_begin"),
    )
-    .await
-    .expect("exec_command_begin timeout")
-    .expect("exec_command_begin notification");
+    .await??;
    let params = exec_begin_notification
        .params
        .clone()
@@ -561,9 +495,9 @@ async fn test_send_user_turn_updates_sandbox_and_cwd_between_turns() {
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_notification_message("codex/event/task_complete"),
    )
-    .await
-    .expect("task_complete 2 timeout")
-    .expect("task_complete 2 notification");
+    .await??;
+
+    Ok(())
 }

 fn create_config_toml(codex_home: &Path, server_uri: &str) -> std::io::Result<()> {
--- a/codex-rs/app-server/tests/suite/config.rs
+++ b/codex-rs/app-server/tests/suite/config.rs
@@ -1,6 +1,4 @@
-use std::collections::HashMap;
-use std::path::Path;
-
+use anyhow::Result;
 use app_test_support::McpProcess;
 use app_test_support::to_response;
 use codex_app_server_protocol::GetUserSavedConfigResponse;
@@ -17,6 +15,8 @@ use codex_protocol::config_types::ReasoningSummary;
 use codex_protocol::config_types::SandboxMode;
 use codex_protocol::config_types::Verbosity;
 use pretty_assertions::assert_eq;
+use std::collections::HashMap;
+use std::path::Path;
 use tempfile::TempDir;
 use tokio::time::timeout;

@@ -60,31 +60,21 @@ chatgpt_base_url = "https://api.chatgpt.com"
 }

 #[tokio::test(flavor = "multi_thread", worker_threads = 4)]
-async fn get_config_toml_parses_all_fields() {
-    let codex_home = TempDir::new().unwrap_or_else(|e| panic!("create tempdir: {e}"));
-    create_config_toml(codex_home.path()).expect("write config.toml");
+async fn get_config_toml_parses_all_fields() -> Result<()> {
+    let codex_home = TempDir::new()?;
+    create_config_toml(codex_home.path())?;

-    let mut mcp = McpProcess::new(codex_home.path())
-        .await
-        .expect("spawn mcp process");
-    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize())
-        .await
-        .expect("init timeout")
-        .expect("init failed");
+    let mut mcp = McpProcess::new(codex_home.path()).await?;
+    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;

-    let request_id = mcp
-        .send_get_user_saved_config_request()
-        .await
-        .expect("send getUserSavedConfig");
+    let request_id = mcp.send_get_user_saved_config_request().await?;
    let resp: JSONRPCResponse = timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_response_message(RequestId::Integer(request_id)),
    )
-    .await
-    .expect("getUserSavedConfig timeout")
-    .expect("getUserSavedConfig response");
+    .await??;

-    let config: GetUserSavedConfigResponse = to_response(resp).expect("deserialize config");
+    let config: GetUserSavedConfigResponse = to_response(resp)?;
    let expected = GetUserSavedConfigResponse {
        config: UserSavedConfig {
            approval_policy: Some(AskForApproval::OnRequest),
@@ -122,33 +112,24 @@ async fn get_config_toml_parses_all_fields() {
    };

    assert_eq!(config, expected);
+    Ok(())
 }

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-async fn get_config_toml_empty() {
-    let codex_home = TempDir::new().unwrap_or_else(|e| panic!("create tempdir: {e}"));
+async fn get_config_toml_empty() -> Result<()> {
+    let codex_home = TempDir::new()?;

-    let mut mcp = McpProcess::new(codex_home.path())
-        .await
-        .expect("spawn mcp process");
-    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize())
-        .await
-        .expect("init timeout")
-        .expect("init failed");
+    let mut mcp = McpProcess::new(codex_home.path()).await?;
+    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;

-    let request_id = mcp
-        .send_get_user_saved_config_request()
-        .await
-        .expect("send getUserSavedConfig");
+    let request_id = mcp.send_get_user_saved_config_request().await?;
    let resp: JSONRPCResponse = timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_response_message(RequestId::Integer(request_id)),
    )
-    .await
-    .expect("getUserSavedConfig timeout")
-    .expect("getUserSavedConfig response");
+    .await??;

-    let config: GetUserSavedConfigResponse = to_response(resp).expect("deserialize config");
+    let config: GetUserSavedConfigResponse = to_response(resp)?;
    let expected = GetUserSavedConfigResponse {
        config: UserSavedConfig {
            approval_policy: None,
@@ -167,4 +148,5 @@ async fn get_config_toml_empty() {
    };

    assert_eq!(config, expected);
+    Ok(())
 }
--- a/codex-rs/app-server/tests/suite/create_conversation.rs
+++ b/codex-rs/app-server/tests/suite/create_conversation.rs
@@ -1,5 +1,4 @@
-use std::path::Path;
-
+use anyhow::Result;
 use app_test_support::McpProcess;
 use app_test_support::create_final_assistant_message_sse_response;
 use app_test_support::create_mock_chat_completions_server;
@@ -15,31 +14,25 @@ use codex_app_server_protocol::SendUserMessageParams;
 use codex_app_server_protocol::SendUserMessageResponse;
 use pretty_assertions::assert_eq;
 use serde_json::json;
+use std::path::Path;
 use tempfile::TempDir;
 use tokio::time::timeout;

 const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(10);

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-async fn test_conversation_create_and_send_message_ok() {
+async fn test_conversation_create_and_send_message_ok() -> Result<()> {
    // Mock server – we won't strictly rely on it, but provide one to satisfy any model wiring.
-    let responses = vec![
-        create_final_assistant_message_sse_response("Done").expect("build mock assistant message"),
-    ];
+    let responses = vec![create_final_assistant_message_sse_response("Done")?];
    let server = create_mock_chat_completions_server(responses).await;

    // Temporary Codex home with config pointing at the mock server.
-    let codex_home = TempDir::new().expect("create temp dir");
-    create_config_toml(codex_home.path(), &server.uri()).expect("write config.toml");
+    let codex_home = TempDir::new()?;
+    create_config_toml(codex_home.path(), &server.uri())?;

    // Start MCP server process and initialize.
-    let mut mcp = McpProcess::new(codex_home.path())
-        .await
-        .expect("spawn mcp process");
-    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize())
-        .await
-        .expect("init timeout")
-        .expect("init failed");
+    let mut mcp = McpProcess::new(codex_home.path()).await?;
+    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;

    // Create a conversation via the new JSON-RPC API.
    let new_conv_id = mcp
@@ -47,40 +40,35 @@ async fn test_conversation_create_and_send_message_ok() {
            model: Some("o3".to_string()),
            ..Default::default()
        })
-        .await
-        .expect("send newConversation");
+        .await?;
    let new_conv_resp: JSONRPCResponse = timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_response_message(RequestId::Integer(new_conv_id)),
    )
-    .await
-    .expect("newConversation timeout")
-    .expect("newConversation resp");
+    .await??;
    let NewConversationResponse {
        conversation_id,
        model,
        reasoning_effort: _,
        rollout_path: _,
-    } = to_response::<NewConversationResponse>(new_conv_resp)
-        .expect("deserialize newConversation response");
+    } = to_response::<NewConversationResponse>(new_conv_resp)?;
    assert_eq!(model, "o3");

    // Add a listener so we receive notifications for this conversation (not strictly required for this test).
    let add_listener_id = mcp
-        .send_add_conversation_listener_request(AddConversationListenerParams { conversation_id })
-        .await
-        .expect("send addConversationListener");
+        .send_add_conversation_listener_request(AddConversationListenerParams {
+            conversation_id,
+            experimental_raw_events: false,
+        })
+        .await?;
    let _sub: AddConversationSubscriptionResponse =
        to_response::<AddConversationSubscriptionResponse>(
            timeout(
                DEFAULT_READ_TIMEOUT,
                mcp.read_stream_until_response_message(RequestId::Integer(add_listener_id)),
            )
-            .await
-            .expect("addConversationListener timeout")
-            .expect("addConversationListener resp"),
-        )
-        .expect("deserialize addConversationListener response");
+            .await??,
+        )?;

    // Now send a user message via the wire API and expect an OK (empty object) result.
    let send_id = mcp
@@ -90,36 +78,32 @@ async fn test_conversation_create_and_send_message_ok() {
                text: "Hello".to_string(),
            }],
        })
-        .await
-        .expect("send sendUserMessage");
+        .await?;
    let send_resp: JSONRPCResponse = timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_response_message(RequestId::Integer(send_id)),
    )
-    .await
-    .expect("sendUserMessage timeout")
-    .expect("sendUserMessage resp");
-    let _ok: SendUserMessageResponse = to_response::<SendUserMessageResponse>(send_resp)
-        .expect("deserialize sendUserMessage response");
+    .await??;
+    let _ok: SendUserMessageResponse = to_response::<SendUserMessageResponse>(send_resp)?;

    // avoid race condition by waiting for the mock server to receive the chat.completions request
    let deadline = std::time::Instant::now() + DEFAULT_READ_TIMEOUT;
-    loop {
+    let requests = loop {
        let requests = server.received_requests().await.unwrap_or_default();
        if !requests.is_empty() {
-            break;
+            break requests;
        }
        if std::time::Instant::now() >= deadline {
            panic!("mock server did not receive the chat.completions request in time");
        }
        tokio::time::sleep(std::time::Duration::from_millis(10)).await;
-    }
+    };

    // Verify the outbound request body matches expectations for Chat Completions.
-    let request = &server.received_requests().await.unwrap()[0];
-    let body = request
-        .body_json::<serde_json::Value>()
-        .expect("parse request body as JSON");
+    let request = requests
+        .first()
+        .expect("mock server should have received at least one request");
+    let body = request.body_json::<serde_json::Value>()?;
    assert_eq!(body["model"], json!("o3"));
    assert!(body["stream"].as_bool().unwrap_or(false));
    let messages = body["messages"]
@@ -130,6 +114,7 @@ async fn test_conversation_create_and_send_message_ok() {
    assert_eq!(last["content"], json!("Hello"));

    drop(server);
+    Ok(())
 }

 // Helper to create a config.toml pointing at the mock model server.
--- a/codex-rs/app-server/tests/suite/fuzzy_file_search.rs
+++ b/codex-rs/app-server/tests/suite/fuzzy_file_search.rs
@@ -1,5 +1,5 @@
-use anyhow::Context;
 use anyhow::Result;
+use anyhow::anyhow;
 use app_test_support::McpProcess;
 use codex_app_server_protocol::JSONRPCResponse;
 use codex_app_server_protocol::RequestId;
@@ -13,48 +13,39 @@ const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn test_fuzzy_file_search_sorts_and_includes_indices() -> Result<()> {
    // Prepare a temporary Codex home and a separate root with test files.
-    let codex_home = TempDir::new().context("create temp codex home")?;
-    let root = TempDir::new().context("create temp search root")?;
+    let codex_home = TempDir::new()?;
+    let root = TempDir::new()?;

    // Create files designed to have deterministic ordering for query "abe".
-    std::fs::write(root.path().join("abc"), "x").context("write file abc")?;
-    std::fs::write(root.path().join("abcde"), "x").context("write file abcde")?;
-    std::fs::write(root.path().join("abexy"), "x").context("write file abexy")?;
-    std::fs::write(root.path().join("zzz.txt"), "x").context("write file zzz")?;
+    std::fs::write(root.path().join("abc"), "x")?;
+    std::fs::write(root.path().join("abcde"), "x")?;
+    std::fs::write(root.path().join("abexy"), "x")?;
+    std::fs::write(root.path().join("zzz.txt"), "x")?;
    let sub_dir = root.path().join("sub");
-    std::fs::create_dir_all(&sub_dir).context("create sub dir")?;
+    std::fs::create_dir_all(&sub_dir)?;
    let sub_abce_path = sub_dir.join("abce");
-    std::fs::write(&sub_abce_path, "x").context("write file sub/abce")?;
+    std::fs::write(&sub_abce_path, "x")?;
    let sub_abce_rel = sub_abce_path
-        .strip_prefix(root.path())
-        .context("strip root prefix from sub/abce")?
+        .strip_prefix(root.path())?
        .to_string_lossy()
        .to_string();

    // Start MCP server and initialize.
-    let mut mcp = McpProcess::new(codex_home.path())
-        .await
-        .context("spawn mcp")?;
-    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize())
-        .await
-        .context("init timeout")?
-        .context("init failed")?;
+    let mut mcp = McpProcess::new(codex_home.path()).await?;
+    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;

    let root_path = root.path().to_string_lossy().to_string();
    // Send fuzzyFileSearch request.
    let request_id = mcp
        .send_fuzzy_file_search_request("abe", vec![root_path.clone()], None)
-        .await
-        .context("send fuzzyFileSearch")?;
+        .await?;

    // Read response and verify shape and ordering.
    let resp: JSONRPCResponse = timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_response_message(RequestId::Integer(request_id)),
    )
-    .await
-    .context("fuzzyFileSearch timeout")?
-    .context("fuzzyFileSearch resp")?;
+    .await??;

    let value = resp.result;
    // The path separator on Windows affects the score.
@@ -94,24 +85,18 @@ async fn test_fuzzy_file_search_sorts_and_includes_indices() -> Result<()> {

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn test_fuzzy_file_search_accepts_cancellation_token() -> Result<()> {
-    let codex_home = TempDir::new().context("create temp codex home")?;
-    let root = TempDir::new().context("create temp search root")?;
+    let codex_home = TempDir::new()?;
+    let root = TempDir::new()?;

-    std::fs::write(root.path().join("alpha.txt"), "contents").context("write alpha")?;
+    std::fs::write(root.path().join("alpha.txt"), "contents")?;

-    let mut mcp = McpProcess::new(codex_home.path())
-        .await
-        .context("spawn mcp")?;
-    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize())
-        .await
-        .context("init timeout")?
-        .context("init failed")?;
+    let mut mcp = McpProcess::new(codex_home.path()).await?;
+    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;

    let root_path = root.path().to_string_lossy().to_string();
    let request_id = mcp
        .send_fuzzy_file_search_request("alp", vec![root_path.clone()], None)
-        .await
-        .context("send fuzzyFileSearch")?;
+        .await?;

    let request_id_2 = mcp
        .send_fuzzy_file_search_request(
@@ -119,23 +104,20 @@ async fn test_fuzzy_file_search_accepts_cancellation_token() -> Result<()> {
            vec![root_path.clone()],
            Some(request_id.to_string()),
        )
-        .await
-        .context("send fuzzyFileSearch")?;
+        .await?;

    let resp: JSONRPCResponse = timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_response_message(RequestId::Integer(request_id_2)),
    )
-    .await
-    .context("fuzzyFileSearch timeout")?
-    .context("fuzzyFileSearch resp")?;
+    .await??;

    let files = resp
        .result
        .get("files")
-        .context("files key missing")?
+        .ok_or_else(|| anyhow!("files key missing"))?
        .as_array()
-        .context("files not array")?
+        .ok_or_else(|| anyhow!("files not array"))?
        .clone();

    assert_eq!(files.len(), 1);
--- a/codex-rs/app-server/tests/suite/interrupt.rs
+++ b/codex-rs/app-server/tests/suite/interrupt.rs
@@ -88,7 +88,10 @@ async fn shell_command_interruption() -> anyhow::Result<()> {

    // 2) addConversationListener
    let add_listener_id = mcp
-        .send_add_conversation_listener_request(AddConversationListenerParams { conversation_id })
+        .send_add_conversation_listener_request(AddConversationListenerParams {
+            conversation_id,
+            experimental_raw_events: false,
+        })
        .await?;
    let _add_listener_resp: JSONRPCResponse = timeout(
        DEFAULT_READ_TIMEOUT,
--- a/codex-rs/app-server/tests/suite/list_resume.rs
+++ b/codex-rs/app-server/tests/suite/list_resume.rs
@@ -1,6 +1,4 @@
-use std::fs;
-use std::path::Path;
-
+use anyhow::Result;
 use app_test_support::McpProcess;
 use app_test_support::to_response;
 use codex_app_server_protocol::JSONRPCNotification;
@@ -15,6 +13,8 @@ use codex_app_server_protocol::ServerNotification;
 use codex_app_server_protocol::SessionConfiguredNotification;
 use pretty_assertions::assert_eq;
 use serde_json::json;
+use std::fs;
+use std::path::Path;
 use tempfile::TempDir;
 use tokio::time::timeout;
 use uuid::Uuid;
@@ -22,58 +22,56 @@ use uuid::Uuid;
 const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(10);

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-async fn test_list_and_resume_conversations() {
+async fn test_list_and_resume_conversations() -> Result<()> {
    // Prepare a temporary CODEX_HOME with a few fake rollout files.
-    let codex_home = TempDir::new().expect("create temp dir");
+    let codex_home = TempDir::new()?;
    create_fake_rollout(
        codex_home.path(),
        "2025-01-02T12-00-00",
        "2025-01-02T12:00:00Z",
        "Hello A",
-    );
+        Some("openai"),
+    )?;
    create_fake_rollout(
        codex_home.path(),
        "2025-01-01T13-00-00",
        "2025-01-01T13:00:00Z",
        "Hello B",
-    );
+        Some("openai"),
+    )?;
    create_fake_rollout(
        codex_home.path(),
        "2025-01-01T12-00-00",
        "2025-01-01T12:00:00Z",
        "Hello C",
-    );
+        None,
+    )?;

-    let mut mcp = McpProcess::new(codex_home.path())
-        .await
-        .expect("spawn mcp process");
-    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize())
-        .await
-        .expect("init timeout")
-        .expect("init failed");
+    let mut mcp = McpProcess::new(codex_home.path()).await?;
+    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;

    // Request first page with size 2
    let req_id = mcp
        .send_list_conversations_request(ListConversationsParams {
            page_size: Some(2),
            cursor: None,
+            model_providers: None,
        })
-        .await
-        .expect("send listConversations");
+        .await?;
    let resp: JSONRPCResponse = timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_response_message(RequestId::Integer(req_id)),
    )
-    .await
-    .expect("listConversations timeout")
-    .expect("listConversations resp");
+    .await??;
    let ListConversationsResponse { items, next_cursor } =
-        to_response::<ListConversationsResponse>(resp).expect("deserialize response");
+        to_response::<ListConversationsResponse>(resp)?;

    assert_eq!(items.len(), 2);
    // Newest first; preview text should match
    assert_eq!(items[0].preview, "Hello A");
    assert_eq!(items[1].preview, "Hello B");
+    assert_eq!(items[0].model_provider, "openai");
+    assert_eq!(items[1].model_provider, "openai");
    assert!(items[0].path.is_absolute());
    assert!(next_cursor.is_some());

@@ -82,24 +80,93 @@ async fn test_list_and_resume_conversations() {
        .send_list_conversations_request(ListConversationsParams {
            page_size: Some(2),
            cursor: next_cursor,
+            model_providers: None,
        })
-        .await
-        .expect("send listConversations page 2");
+        .await?;
    let resp2: JSONRPCResponse = timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_response_message(RequestId::Integer(req_id2)),
    )
-    .await
-    .expect("listConversations page 2 timeout")
-    .expect("listConversations page 2 resp");
+    .await??;
    let ListConversationsResponse {
        items: items2,
        next_cursor: next2,
        ..
-    } = to_response::<ListConversationsResponse>(resp2).expect("deserialize response");
+    } = to_response::<ListConversationsResponse>(resp2)?;
    assert_eq!(items2.len(), 1);
    assert_eq!(items2[0].preview, "Hello C");
-    assert!(next2.is_some());
+    assert_eq!(items2[0].model_provider, "openai");
+    assert_eq!(next2, None);
+
+    // Add a conversation with an explicit non-OpenAI provider for filter tests.
+    create_fake_rollout(
+        codex_home.path(),
+        "2025-01-01T11-30-00",
+        "2025-01-01T11:30:00Z",
+        "Hello TP",
+        Some("test-provider"),
+    )?;
+
+    // Filtering by model provider should return only matching sessions.
+    let filter_req_id = mcp
+        .send_list_conversations_request(ListConversationsParams {
+            page_size: Some(10),
+            cursor: None,
+            model_providers: Some(vec!["test-provider".to_string()]),
+        })
+        .await?;
+    let filter_resp: JSONRPCResponse = timeout(
+        DEFAULT_READ_TIMEOUT,
+        mcp.read_stream_until_response_message(RequestId::Integer(filter_req_id)),
+    )
+    .await??;
+    let ListConversationsResponse {
+        items: filtered_items,
+        next_cursor: filtered_next,
+    } = to_response::<ListConversationsResponse>(filter_resp)?;
+    assert_eq!(filtered_items.len(), 1);
+    assert_eq!(filtered_next, None);
+    assert_eq!(filtered_items[0].preview, "Hello TP");
+    assert_eq!(filtered_items[0].model_provider, "test-provider");
+
+    // Empty filter should include every session regardless of provider metadata.
+    let unfiltered_req_id = mcp
+        .send_list_conversations_request(ListConversationsParams {
+            page_size: Some(10),
+            cursor: None,
+            model_providers: Some(Vec::new()),
+        })
+        .await?;
+    let unfiltered_resp: JSONRPCResponse = timeout(
+        DEFAULT_READ_TIMEOUT,
+        mcp.read_stream_until_response_message(RequestId::Integer(unfiltered_req_id)),
+    )
+    .await??;
+    let ListConversationsResponse {
+        items: unfiltered_items,
+        next_cursor: unfiltered_next,
+    } = to_response::<ListConversationsResponse>(unfiltered_resp)?;
+    assert_eq!(unfiltered_items.len(), 4);
+    assert!(unfiltered_next.is_none());
+
+    let empty_req_id = mcp
+        .send_list_conversations_request(ListConversationsParams {
+            page_size: Some(10),
+            cursor: None,
+            model_providers: Some(vec!["other".to_string()]),
+        })
+        .await?;
+    let empty_resp: JSONRPCResponse = timeout(
+        DEFAULT_READ_TIMEOUT,
+        mcp.read_stream_until_response_message(RequestId::Integer(empty_req_id)),
+    )
+    .await??;
+    let ListConversationsResponse {
+        items: empty_items,
+        next_cursor: empty_next,
+    } = to_response::<ListConversationsResponse>(empty_resp)?;
+    assert!(empty_items.is_empty());
+    assert!(empty_next.is_none());

    // Now resume one of the sessions and expect a SessionConfigured notification and response.
    let resume_req_id = mcp
@@ -110,20 +177,15 @@ async fn test_list_and_resume_conversations() {
                ..Default::default()
            }),
        })
-        .await
-        .expect("send resumeConversation");
+        .await?;

    // Expect a codex/event notification with msg.type == sessionConfigured
    let notification: JSONRPCNotification = timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_notification_message("sessionConfigured"),
    )
-    .await
-    .expect("sessionConfigured notification timeout")
-    .expect("sessionConfigured notification");
-    let session_configured: ServerNotification = notification
-        .try_into()
-        .expect("deserialize sessionConfigured notification");
+    .await??;
+    let session_configured: ServerNotification = notification.try_into()?;
    // Basic shape assertion: ensure event type is sessionConfigured
    let ServerNotification::SessionConfigured(SessionConfiguredNotification {
        model,
@@ -141,41 +203,50 @@ async fn test_list_and_resume_conversations() {
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_response_message(RequestId::Integer(resume_req_id)),
    )
-    .await
-    .expect("resumeConversation timeout")
-    .expect("resumeConversation resp");
+    .await??;
    let ResumeConversationResponse {
        conversation_id, ..
-    } = to_response::<ResumeConversationResponse>(resume_resp)
-        .expect("deserialize resumeConversation response");
+    } = to_response::<ResumeConversationResponse>(resume_resp)?;
    // conversation id should be a valid UUID
    assert!(!conversation_id.to_string().is_empty());
+
+    Ok(())
 }

-fn create_fake_rollout(codex_home: &Path, filename_ts: &str, meta_rfc3339: &str, preview: &str) {
+fn create_fake_rollout(
+    codex_home: &Path,
+    filename_ts: &str,
+    meta_rfc3339: &str,
+    preview: &str,
+    model_provider: Option<&str>,
+) -> Result<()> {
    let uuid = Uuid::new_v4();
    // sessions/YYYY/MM/DD/ derived from filename_ts (YYYY-MM-DDThh-mm-ss)
    let year = &filename_ts[0..4];
    let month = &filename_ts[5..7];
    let day = &filename_ts[8..10];
    let dir = codex_home.join("sessions").join(year).join(month).join(day);
-    fs::create_dir_all(&dir).unwrap_or_else(|e| panic!("create sessions dir: {e}"));
+    fs::create_dir_all(&dir)?;

    let file_path = dir.join(format!("rollout-{filename_ts}-{uuid}.jsonl"));
    let mut lines = Vec::new();
    // Meta line with timestamp (flattened meta in payload for new schema)
+    let mut payload = json!({
+        "id": uuid,
+        "timestamp": meta_rfc3339,
+        "cwd": "/",
+        "originator": "codex",
+        "cli_version": "0.0.0",
+        "instructions": null,
+    });
+    if let Some(provider) = model_provider {
+        payload["model_provider"] = json!(provider);
+    }
    lines.push(
        json!({
            "timestamp": meta_rfc3339,
            "type": "session_meta",
-            "payload": {
-                "id": uuid,
-                "timestamp": meta_rfc3339,
-                "cwd": "/",
-                "originator": "codex",
-                "cli_version": "0.0.0",
-                "instructions": null
-            }
+            "payload": payload
        })
        .to_string(),
    );
@@ -205,6 +276,6 @@ fn create_fake_rollout(codex_home: &Path, filename_ts: &str, meta_rfc3339: &str,
        })
        .to_string(),
    );
-    fs::write(file_path, lines.join("\n") + "\n")
-        .unwrap_or_else(|e| panic!("write rollout file: {e}"));
+    fs::write(file_path, lines.join("\n") + "\n")?;
+    Ok(())
 }
--- a/codex-rs/app-server/tests/suite/login.rs
+++ b/codex-rs/app-server/tests/suite/login.rs
@@ -1,6 +1,4 @@
-use std::path::Path;
-use std::time::Duration;
-
+use anyhow::Result;
 use app_test_support::McpProcess;
 use app_test_support::to_response;
 use codex_app_server_protocol::CancelLoginChatGptParams;
@@ -12,7 +10,11 @@ use codex_app_server_protocol::JSONRPCResponse;
 use codex_app_server_protocol::LoginChatGptResponse;
 use codex_app_server_protocol::LogoutChatGptResponse;
 use codex_app_server_protocol::RequestId;
+use codex_core::auth::AuthCredentialsStoreMode;
 use codex_login::login_with_api_key;
+use serial_test::serial;
+use std::path::Path;
+use std::time::Duration;
 use tempfile::TempDir;
 use tokio::time::timeout;

@@ -41,32 +43,26 @@ stream_max_retries = 0
 }

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-async fn logout_chatgpt_removes_auth() {
-    let codex_home = TempDir::new().unwrap_or_else(|e| panic!("create tempdir: {e}"));
-    create_config_toml(codex_home.path()).expect("write config.toml");
-    login_with_api_key(codex_home.path(), "sk-test-key").expect("seed api key");
+async fn logout_chatgpt_removes_auth() -> Result<()> {
+    let codex_home = TempDir::new()?;
+    create_config_toml(codex_home.path())?;
+    login_with_api_key(
+        codex_home.path(),
+        "sk-test-key",
+        AuthCredentialsStoreMode::File,
+    )?;
    assert!(codex_home.path().join("auth.json").exists());

-    let mut mcp = McpProcess::new_with_env(codex_home.path(), &[("OPENAI_API_KEY", None)])
-        .await
-        .expect("spawn mcp process");
-    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize())
-        .await
-        .expect("init timeout")
-        .expect("init failed");
+    let mut mcp = McpProcess::new_with_env(codex_home.path(), &[("OPENAI_API_KEY", None)]).await?;
+    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;

-    let id = mcp
-        .send_logout_chat_gpt_request()
-        .await
-        .expect("send logoutChatGpt");
+    let id = mcp.send_logout_chat_gpt_request().await?;
    let resp: JSONRPCResponse = timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_response_message(RequestId::Integer(id)),
    )
-    .await
-    .expect("logoutChatGpt timeout")
-    .expect("logoutChatGpt response");
-    let _ok: LogoutChatGptResponse = to_response(resp).expect("deserialize logout response");
+    .await??;
+    let _ok: LogoutChatGptResponse = to_response(resp)?;

    assert!(
        !codex_home.path().join("auth.json").exists(),
@@ -79,61 +75,47 @@ async fn logout_chatgpt_removes_auth() {
            include_token: Some(true),
            refresh_token: Some(false),
        })
-        .await
-        .expect("send getAuthStatus");
+        .await?;
    let status_resp: JSONRPCResponse = timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_response_message(RequestId::Integer(status_id)),
    )
-    .await
-    .expect("getAuthStatus timeout")
-    .expect("getAuthStatus response");
-    let status: GetAuthStatusResponse = to_response(status_resp).expect("deserialize status");
+    .await??;
+    let status: GetAuthStatusResponse = to_response(status_resp)?;
    assert_eq!(status.auth_method, None);
    assert_eq!(status.auth_token, None);
+    Ok(())
 }

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-async fn login_and_cancel_chatgpt() {
-    let codex_home = TempDir::new().unwrap_or_else(|e| panic!("create tempdir: {e}"));
-    create_config_toml(codex_home.path()).unwrap_or_else(|err| panic!("write config.toml: {err}"));
+// Serialize tests that launch the login server since it binds to a fixed port.
+#[serial(login_port)]
+async fn login_and_cancel_chatgpt() -> Result<()> {
+    let codex_home = TempDir::new()?;
+    create_config_toml(codex_home.path())?;

-    let mut mcp = McpProcess::new(codex_home.path())
-        .await
-        .expect("spawn mcp process");
-    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize())
-        .await
-        .expect("init timeout")
-        .expect("init failed");
+    let mut mcp = McpProcess::new(codex_home.path()).await?;
+    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;

-    let login_id = mcp
-        .send_login_chat_gpt_request()
-        .await
-        .expect("send loginChatGpt");
+    let login_id = mcp.send_login_chat_gpt_request().await?;
    let login_resp: JSONRPCResponse = timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_response_message(RequestId::Integer(login_id)),
    )
-    .await
-    .expect("loginChatGpt timeout")
-    .expect("loginChatGpt response");
-    let login: LoginChatGptResponse = to_response(login_resp).expect("deserialize login resp");
+    .await??;
+    let login: LoginChatGptResponse = to_response(login_resp)?;

    let cancel_id = mcp
        .send_cancel_login_chat_gpt_request(CancelLoginChatGptParams {
            login_id: login.login_id,
        })
-        .await
-        .expect("send cancelLoginChatGpt");
+        .await?;
    let cancel_resp: JSONRPCResponse = timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_response_message(RequestId::Integer(cancel_id)),
    )
-    .await
-    .expect("cancelLoginChatGpt timeout")
-    .expect("cancelLoginChatGpt response");
-    let _ok: CancelLoginChatGptResponse =
-        to_response(cancel_resp).expect("deserialize cancel response");
+    .await??;
+    let _ok: CancelLoginChatGptResponse = to_response(cancel_resp)?;

    // Optionally observe the completion notification; do not fail if it races.
    let maybe_note = timeout(
@@ -144,6 +126,7 @@ async fn login_and_cancel_chatgpt() {
    if maybe_note.is_err() {
        eprintln!("warning: did not observe login_chat_gpt_complete notification after cancel");
    }
+    Ok(())
 }

 fn create_config_toml_forced_login(codex_home: &Path, forced_method: &str) -> std::io::Result<()> {
@@ -176,66 +159,48 @@ forced_chatgpt_workspace_id = "{workspace_id}"
 }

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-async fn login_chatgpt_rejected_when_forced_api() {
-    let codex_home = TempDir::new().unwrap_or_else(|e| panic!("create tempdir: {e}"));
-    create_config_toml_forced_login(codex_home.path(), "api")
-        .unwrap_or_else(|err| panic!("write config.toml: {err}"));
+async fn login_chatgpt_rejected_when_forced_api() -> Result<()> {
+    let codex_home = TempDir::new()?;
+    create_config_toml_forced_login(codex_home.path(), "api")?;

-    let mut mcp = McpProcess::new(codex_home.path())
-        .await
-        .expect("spawn mcp process");
-    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize())
-        .await
-        .expect("init timeout")
-        .expect("init failed");
+    let mut mcp = McpProcess::new(codex_home.path()).await?;
+    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;

-    let request_id = mcp
-        .send_login_chat_gpt_request()
-        .await
-        .expect("send loginChatGpt");
+    let request_id = mcp.send_login_chat_gpt_request().await?;
    let err: JSONRPCError = timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_error_message(RequestId::Integer(request_id)),
    )
-    .await
-    .expect("loginChatGpt error timeout")
-    .expect("loginChatGpt error");
+    .await??;

    assert_eq!(
        err.error.message,
        "ChatGPT login is disabled. Use API key login instead."
    );
+    Ok(())
 }

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-async fn login_chatgpt_includes_forced_workspace_query_param() {
-    let codex_home = TempDir::new().unwrap_or_else(|e| panic!("create tempdir: {e}"));
-    create_config_toml_forced_workspace(codex_home.path(), "ws-forced")
-        .unwrap_or_else(|err| panic!("write config.toml: {err}"));
+// Serialize tests that launch the login server since it binds to a fixed port.
+#[serial(login_port)]
+async fn login_chatgpt_includes_forced_workspace_query_param() -> Result<()> {
+    let codex_home = TempDir::new()?;
+    create_config_toml_forced_workspace(codex_home.path(), "ws-forced")?;

-    let mut mcp = McpProcess::new(codex_home.path())
-        .await
-        .expect("spawn mcp process");
-    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize())
-        .await
-        .expect("init timeout")
-        .expect("init failed");
+    let mut mcp = McpProcess::new(codex_home.path()).await?;
+    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;

-    let request_id = mcp
-        .send_login_chat_gpt_request()
-        .await
-        .expect("send loginChatGpt");
+    let request_id = mcp.send_login_chat_gpt_request().await?;
    let resp: JSONRPCResponse = timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_response_message(RequestId::Integer(request_id)),
    )
-    .await
-    .expect("loginChatGpt timeout")
-    .expect("loginChatGpt response");
+    .await??;

-    let login: LoginChatGptResponse = to_response(resp).expect("deserialize login resp");
+    let login: LoginChatGptResponse = to_response(resp)?;
    assert!(
        login.auth_url.contains("allowed_workspace_id=ws-forced"),
        "auth URL should include forced workspace"
    );
+    Ok(())
 }
--- a/codex-rs/app-server/tests/suite/rate_limits.rs
+++ b/codex-rs/app-server/tests/suite/rate_limits.rs
@@ -1,4 +1,3 @@
-use anyhow::Context;
 use anyhow::Result;
 use app_test_support::ChatGptAuthFixture;
 use app_test_support::McpProcess;
@@ -9,6 +8,7 @@ use codex_app_server_protocol::JSONRPCError;
 use codex_app_server_protocol::JSONRPCResponse;
 use codex_app_server_protocol::LoginApiKeyParams;
 use codex_app_server_protocol::RequestId;
+use codex_core::auth::AuthCredentialsStoreMode;
 use codex_protocol::protocol::RateLimitSnapshot;
 use codex_protocol::protocol::RateLimitWindow;
 use pretty_assertions::assert_eq;
@@ -28,28 +28,18 @@ const INVALID_REQUEST_ERROR_CODE: i64 = -32600;

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn get_account_rate_limits_requires_auth() -> Result<()> {
-    let codex_home = TempDir::new().context("create codex home tempdir")?;
+    let codex_home = TempDir::new()?;

-    let mut mcp = McpProcess::new_with_env(codex_home.path(), &[("OPENAI_API_KEY", None)])
-        .await
-        .context("spawn mcp process")?;
-    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize())
-        .await
-        .context("initialize timeout")?
-        .context("initialize request")?;
+    let mut mcp = McpProcess::new_with_env(codex_home.path(), &[("OPENAI_API_KEY", None)]).await?;
+    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;

-    let request_id = mcp
-        .send_get_account_rate_limits_request()
-        .await
-        .context("send account/rateLimits/read")?;
+    let request_id = mcp.send_get_account_rate_limits_request().await?;

    let error: JSONRPCError = timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_error_message(RequestId::Integer(request_id)),
    )
-    .await
-    .context("account/rateLimits/read timeout")?
-    .context("account/rateLimits/read error")?;
+    .await??;

    assert_eq!(error.id, RequestId::Integer(request_id));
    assert_eq!(error.error.code, INVALID_REQUEST_ERROR_CODE);
@@ -63,30 +53,20 @@ async fn get_account_rate_limits_requires_auth() -> Result<()> {

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn get_account_rate_limits_requires_chatgpt_auth() -> Result<()> {
-    let codex_home = TempDir::new().context("create codex home tempdir")?;
+    let codex_home = TempDir::new()?;

-    let mut mcp = McpProcess::new(codex_home.path())
-        .await
-        .context("spawn mcp process")?;
-    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize())
-        .await
-        .context("initialize timeout")?
-        .context("initialize request")?;
+    let mut mcp = McpProcess::new(codex_home.path()).await?;
+    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;

    login_with_api_key(&mut mcp, "sk-test-key").await?;

-    let request_id = mcp
-        .send_get_account_rate_limits_request()
-        .await
-        .context("send account/rateLimits/read")?;
+    let request_id = mcp.send_get_account_rate_limits_request().await?;

    let error: JSONRPCError = timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_error_message(RequestId::Integer(request_id)),
    )
-    .await
-    .context("account/rateLimits/read timeout")?
-    .context("account/rateLimits/read error")?;
+    .await??;

    assert_eq!(error.id, RequestId::Integer(request_id));
    assert_eq!(error.error.code, INVALID_REQUEST_ERROR_CODE);
@@ -100,18 +80,18 @@ async fn get_account_rate_limits_requires_chatgpt_auth() -> Result<()> {

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn get_account_rate_limits_returns_snapshot() -> Result<()> {
-    let codex_home = TempDir::new().context("create codex home tempdir")?;
+    let codex_home = TempDir::new()?;
    write_chatgpt_auth(
        codex_home.path(),
        ChatGptAuthFixture::new("chatgpt-token")
            .account_id("account-123")
            .plan_type("pro"),
-    )
-    .context("write chatgpt auth")?;
+        AuthCredentialsStoreMode::File,
+    )?;

    let server = MockServer::start().await;
    let server_url = server.uri();
-    write_chatgpt_base_url(codex_home.path(), &server_url).context("write chatgpt base url")?;
+    write_chatgpt_base_url(codex_home.path(), &server_url)?;

    let primary_reset_timestamp = chrono::DateTime::parse_from_rfc3339("2025-01-01T00:02:00Z")
        .expect("parse primary reset timestamp")
@@ -147,29 +127,18 @@ async fn get_account_rate_limits_returns_snapshot() -> Result<()> {
        .mount(&server)
        .await;

-    let mut mcp = McpProcess::new_with_env(codex_home.path(), &[("OPENAI_API_KEY", None)])
-        .await
-        .context("spawn mcp process")?;
-    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize())
-        .await
-        .context("initialize timeout")?
-        .context("initialize request")?;
+    let mut mcp = McpProcess::new_with_env(codex_home.path(), &[("OPENAI_API_KEY", None)]).await?;
+    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;

-    let request_id = mcp
-        .send_get_account_rate_limits_request()
-        .await
-        .context("send account/rateLimits/read")?;
+    let request_id = mcp.send_get_account_rate_limits_request().await?;

    let response: JSONRPCResponse = timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_response_message(RequestId::Integer(request_id)),
    )
-    .await
-    .context("account/rateLimits/read timeout")?
-    .context("account/rateLimits/read response")?;
+    .await??;

-    let received: GetAccountRateLimitsResponse =
-        to_response(response).context("deserialize rate limit response")?;
+    let received: GetAccountRateLimitsResponse = to_response(response)?;

    let expected = GetAccountRateLimitsResponse {
        rate_limits: RateLimitSnapshot {
@@ -195,16 +164,13 @@ async fn login_with_api_key(mcp: &mut McpProcess, api_key: &str) -> Result<()> {
        .send_login_api_key_request(LoginApiKeyParams {
            api_key: api_key.to_string(),
        })
-        .await
-        .context("send loginApiKey")?;
+        .await?;

    timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_response_message(RequestId::Integer(request_id)),
    )
-    .await
-    .context("loginApiKey timeout")?
-    .context("loginApiKey response")?;
+    .await??;

    Ok(())
 }
--- a/codex-rs/app-server/tests/suite/send_message.rs
+++ b/codex-rs/app-server/tests/suite/send_message.rs
@@ -1,5 +1,4 @@
-use std::path::Path;
-
+use anyhow::Result;
 use app_test_support::McpProcess;
 use app_test_support::create_final_assistant_message_sse_response;
 use app_test_support::create_mock_chat_completions_server;
@@ -15,73 +14,73 @@ use codex_app_server_protocol::RequestId;
 use codex_app_server_protocol::SendUserMessageParams;
 use codex_app_server_protocol::SendUserMessageResponse;
 use codex_protocol::ConversationId;
+use codex_protocol::models::ContentItem;
+use codex_protocol::models::ResponseItem;
 use pretty_assertions::assert_eq;
+use std::path::Path;
 use tempfile::TempDir;
 use tokio::time::timeout;

 const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(10);

 #[tokio::test]
-async fn test_send_message_success() {
+async fn test_send_message_success() -> Result<()> {
    // Spin up a mock completions server that immediately ends the Codex turn.
    // Two Codex turns hit the mock model (session start + send-user-message). Provide two SSE responses.
    let responses = vec![
-        create_final_assistant_message_sse_response("Done").expect("build mock assistant message"),
-        create_final_assistant_message_sse_response("Done").expect("build mock assistant message"),
+        create_final_assistant_message_sse_response("Done")?,
+        create_final_assistant_message_sse_response("Done")?,
    ];
    let server = create_mock_chat_completions_server(responses).await;

    // Create a temporary Codex home with config pointing at the mock server.
-    let codex_home = TempDir::new().expect("create temp dir");
-    create_config_toml(codex_home.path(), &server.uri()).expect("write config.toml");
+    let codex_home = TempDir::new()?;
+    create_config_toml(codex_home.path(), &server.uri())?;

    // Start MCP server process and initialize.
-    let mut mcp = McpProcess::new(codex_home.path())
-        .await
-        .expect("spawn mcp process");
-    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize())
-        .await
-        .expect("init timed out")
-        .expect("init failed");
+    let mut mcp = McpProcess::new(codex_home.path()).await?;
+    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;

    // Start a conversation using the new wire API.
    let new_conv_id = mcp
        .send_new_conversation_request(NewConversationParams::default())
-        .await
-        .expect("send newConversation");
+        .await?;
    let new_conv_resp: JSONRPCResponse = timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_response_message(RequestId::Integer(new_conv_id)),
    )
-    .await
-    .expect("newConversation timeout")
-    .expect("newConversation resp");
+    .await??;
    let NewConversationResponse {
        conversation_id, ..
-    } = to_response::<_>(new_conv_resp).expect("deserialize newConversation response");
+    } = to_response::<_>(new_conv_resp)?;

    // 2) addConversationListener
    let add_listener_id = mcp
-        .send_add_conversation_listener_request(AddConversationListenerParams { conversation_id })
-        .await
-        .expect("send addConversationListener");
+        .send_add_conversation_listener_request(AddConversationListenerParams {
+            conversation_id,
+            experimental_raw_events: false,
+        })
+        .await?;
    let add_listener_resp: JSONRPCResponse = timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_response_message(RequestId::Integer(add_listener_id)),
    )
-    .await
-    .expect("addConversationListener timeout")
-    .expect("addConversationListener resp");
+    .await??;
    let AddConversationSubscriptionResponse { subscription_id: _ } =
-        to_response::<_>(add_listener_resp).expect("deserialize addConversationListener response");
+        to_response::<_>(add_listener_resp)?;

    // Now exercise sendUserMessage twice.
-    send_message("Hello", conversation_id, &mut mcp).await;
-    send_message("Hello again", conversation_id, &mut mcp).await;
+    send_message("Hello", conversation_id, &mut mcp).await?;
+    send_message("Hello again", conversation_id, &mut mcp).await?;
+    Ok(())
 }

 #[expect(clippy::expect_used)]
-async fn send_message(message: &str, conversation_id: ConversationId, mcp: &mut McpProcess) {
+async fn send_message(
+    message: &str,
+    conversation_id: ConversationId,
+    mcp: &mut McpProcess,
+) -> Result<()> {
    // Now exercise sendUserMessage.
    let send_id = mcp
        .send_send_user_message_request(SendUserMessageParams {
@@ -90,19 +89,15 @@ async fn send_message(message: &str, conversation_id: ConversationId, mcp: &mut
                text: message.to_string(),
            }],
        })
-        .await
-        .expect("send sendUserMessage");
+        .await?;

    let response: JSONRPCResponse = timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_response_message(RequestId::Integer(send_id)),
    )
-    .await
-    .expect("sendUserMessage response timeout")
-    .expect("sendUserMessage response error");
+    .await??;

-    let _ok: SendUserMessageResponse = to_response::<SendUserMessageResponse>(response)
-        .expect("deserialize sendUserMessage response");
+    let _ok: SendUserMessageResponse = to_response::<SendUserMessageResponse>(response)?;

    // Verify the task_finished notification is received.
    // Note this also ensures that the final request to the server was made.
@@ -110,9 +105,7 @@ async fn send_message(message: &str, conversation_id: ConversationId, mcp: &mut
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_notification_message("codex/event/task_complete"),
    )
-    .await
-    .expect("task_finished_notification timeout")
-    .expect("task_finished_notification resp");
+    .await??;
    let serde_json::Value::Object(map) = task_finished_notification
        .params
        .expect("notification should have params")
@@ -124,17 +117,99 @@ async fn send_message(message: &str, conversation_id: ConversationId, mcp: &mut
            .expect("should have conversationId"),
        &serde_json::Value::String(conversation_id.to_string())
    );
+
+    let raw_attempt = tokio::time::timeout(
+        std::time::Duration::from_millis(200),
+        mcp.read_stream_until_notification_message("codex/event/raw_response_item"),
+    )
+    .await;
+    assert!(
+        raw_attempt.is_err(),
+        "unexpected raw item notification when not opted in"
+    );
+    Ok(())
 }

 #[tokio::test]
-async fn test_send_message_session_not_found() {
+async fn test_send_message_raw_notifications_opt_in() -> Result<()> {
+    let responses = vec![create_final_assistant_message_sse_response("Done")?];
+    let server = create_mock_chat_completions_server(responses).await;
+
+    let codex_home = TempDir::new()?;
+    create_config_toml(codex_home.path(), &server.uri())?;
+
+    let mut mcp = McpProcess::new(codex_home.path()).await?;
+    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;
+
+    let new_conv_id = mcp
+        .send_new_conversation_request(NewConversationParams::default())
+        .await?;
+    let new_conv_resp: JSONRPCResponse = timeout(
+        DEFAULT_READ_TIMEOUT,
+        mcp.read_stream_until_response_message(RequestId::Integer(new_conv_id)),
+    )
+    .await??;
+    let NewConversationResponse {
+        conversation_id, ..
+    } = to_response::<_>(new_conv_resp)?;
+
+    let add_listener_id = mcp
+        .send_add_conversation_listener_request(AddConversationListenerParams {
+            conversation_id,
+            experimental_raw_events: true,
+        })
+        .await?;
+    let add_listener_resp: JSONRPCResponse = timeout(
+        DEFAULT_READ_TIMEOUT,
+        mcp.read_stream_until_response_message(RequestId::Integer(add_listener_id)),
+    )
+    .await??;
+    let AddConversationSubscriptionResponse { subscription_id: _ } =
+        to_response::<_>(add_listener_resp)?;
+
+    let send_id = mcp
+        .send_send_user_message_request(SendUserMessageParams {
+            conversation_id,
+            items: vec![InputItem::Text {
+                text: "Hello".to_string(),
+            }],
+        })
+        .await?;
+
+    let instructions = read_raw_response_item(&mut mcp, conversation_id).await;
+    assert_instructions_message(&instructions);
+
+    let environment = read_raw_response_item(&mut mcp, conversation_id).await;
+    assert_environment_message(&environment);
+
+    let response: JSONRPCResponse = timeout(
+        DEFAULT_READ_TIMEOUT,
+        mcp.read_stream_until_response_message(RequestId::Integer(send_id)),
+    )
+    .await??;
+    let _ok: SendUserMessageResponse = to_response::<SendUserMessageResponse>(response)?;
+
+    let user_message = read_raw_response_item(&mut mcp, conversation_id).await;
+    assert_user_message(&user_message, "Hello");
+
+    let assistant_message = read_raw_response_item(&mut mcp, conversation_id).await;
+    assert_assistant_message(&assistant_message, "Done");
+
+    let _ = tokio::time::timeout(
+        std::time::Duration::from_millis(250),
+        mcp.read_stream_until_notification_message("codex/event/task_complete"),
+    )
+    .await;
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn test_send_message_session_not_found() -> Result<()> {
    // Start MCP without creating a Codex session
-    let codex_home = TempDir::new().expect("tempdir");
-    let mut mcp = McpProcess::new(codex_home.path()).await.expect("spawn");
-    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize())
-        .await
-        .expect("timeout")
-        .expect("init");
+    let codex_home = TempDir::new()?;
+    let mut mcp = McpProcess::new(codex_home.path()).await?;
+    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;

    let unknown = ConversationId::new();
    let req_id = mcp
@@ -144,18 +219,16 @@ async fn test_send_message_session_not_found() {
                text: "ping".to_string(),
            }],
        })
-        .await
-        .expect("send sendUserMessage");
+        .await?;

    // Expect an error response for unknown conversation.
    let err = timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_error_message(RequestId::Integer(req_id)),
    )
-    .await
-    .expect("timeout")
-    .expect("error");
+    .await??;
    assert_eq!(err.id, RequestId::Integer(req_id));
+    Ok(())
 }

 // ---------------------------------------------------------------------------
@@ -184,3 +257,108 @@ stream_max_retries = 0
        ),
    )
 }
+
+#[expect(clippy::expect_used)]
+async fn read_raw_response_item(
+    mcp: &mut McpProcess,
+    conversation_id: ConversationId,
+) -> ResponseItem {
+    let raw_notification: JSONRPCNotification = timeout(
+        DEFAULT_READ_TIMEOUT,
+        mcp.read_stream_until_notification_message("codex/event/raw_response_item"),
+    )
+    .await
+    .expect("codex/event/raw_response_item notification timeout")
+    .expect("codex/event/raw_response_item notification resp");
+
+    let serde_json::Value::Object(params) = raw_notification
+        .params
+        .expect("codex/event/raw_response_item should have params")
+    else {
+        panic!("codex/event/raw_response_item should have params");
+    };
+
+    let conversation_id_value = params
+        .get("conversationId")
+        .and_then(|value| value.as_str())
+        .expect("raw response item should include conversationId");
+
+    assert_eq!(
+        conversation_id_value,
+        conversation_id.to_string(),
+        "raw response item conversation mismatch"
+    );
+
+    let msg_value = params
+        .get("msg")
+        .cloned()
+        .expect("raw response item should include msg payload");
+
+    serde_json::from_value(msg_value).expect("deserialize raw response item")
+}
+
+fn assert_instructions_message(item: &ResponseItem) {
+    match item {
+        ResponseItem::Message { role, content, .. } => {
+            assert_eq!(role, "user");
+            let texts = content_texts(content);
+            assert!(
+                texts
+                    .iter()
+                    .any(|text| text.contains("<user_instructions>")),
+                "expected instructions message, got {texts:?}"
+            );
+        }
+        other => panic!("expected instructions message, got {other:?}"),
+    }
+}
+
+fn assert_environment_message(item: &ResponseItem) {
+    match item {
+        ResponseItem::Message { role, content, .. } => {
+            assert_eq!(role, "user");
+            let texts = content_texts(content);
+            assert!(
+                texts
+                    .iter()
+                    .any(|text| text.contains("<environment_context>")),
+                "expected environment context message, got {texts:?}"
+            );
+        }
+        other => panic!("expected environment message, got {other:?}"),
+    }
+}
+
+fn assert_user_message(item: &ResponseItem, expected_text: &str) {
+    match item {
+        ResponseItem::Message { role, content, .. } => {
+            assert_eq!(role, "user");
+            let texts = content_texts(content);
+            assert_eq!(texts, vec![expected_text]);
+        }
+        other => panic!("expected user message, got {other:?}"),
+    }
+}
+
+fn assert_assistant_message(item: &ResponseItem, expected_text: &str) {
+    match item {
+        ResponseItem::Message { role, content, .. } => {
+            assert_eq!(role, "assistant");
+            let texts = content_texts(content);
+            assert_eq!(texts, vec![expected_text]);
+        }
+        other => panic!("expected assistant message, got {other:?}"),
+    }
+}
+
+fn content_texts(content: &[ContentItem]) -> Vec<&str> {
+    content
+        .iter()
+        .filter_map(|item| match item {
+            ContentItem::InputText { text } | ContentItem::OutputText { text } => {
+                Some(text.as_str())
+            }
+            _ => None,
+        })
+        .collect()
+}
--- a/codex-rs/app-server/tests/suite/set_default_model.rs
+++ b/codex-rs/app-server/tests/suite/set_default_model.rs
@@ -1,5 +1,4 @@
-use std::path::Path;
-
+use anyhow::Result;
 use app_test_support::McpProcess;
 use app_test_support::to_response;
 use codex_app_server_protocol::JSONRPCResponse;
@@ -8,50 +7,38 @@ use codex_app_server_protocol::SetDefaultModelParams;
 use codex_app_server_protocol::SetDefaultModelResponse;
 use codex_core::config::ConfigToml;
 use pretty_assertions::assert_eq;
+use std::path::Path;
 use tempfile::TempDir;
 use tokio::time::timeout;

 const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(10);

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-async fn set_default_model_persists_overrides() {
-    let codex_home = TempDir::new().expect("create tempdir");
-    create_config_toml(codex_home.path()).expect("write config.toml");
+async fn set_default_model_persists_overrides() -> Result<()> {
+    let codex_home = TempDir::new()?;
+    create_config_toml(codex_home.path())?;

-    let mut mcp = McpProcess::new(codex_home.path())
-        .await
-        .expect("spawn mcp process");
-    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize())
-        .await
-        .expect("init timeout")
-        .expect("init failed");
+    let mut mcp = McpProcess::new(codex_home.path()).await?;
+    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;

    let params = SetDefaultModelParams {
        model: Some("gpt-4.1".to_string()),
        reasoning_effort: None,
    };

-    let request_id = mcp
-        .send_set_default_model_request(params)
-        .await
-        .expect("send setDefaultModel");
+    let request_id = mcp.send_set_default_model_request(params).await?;

    let resp: JSONRPCResponse = timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_response_message(RequestId::Integer(request_id)),
    )
-    .await
-    .expect("setDefaultModel timeout")
-    .expect("setDefaultModel response");
+    .await??;

-    let _: SetDefaultModelResponse =
-        to_response(resp).expect("deserialize setDefaultModel response");
+    let _: SetDefaultModelResponse = to_response(resp)?;

    let config_path = codex_home.path().join("config.toml");
-    let config_contents = tokio::fs::read_to_string(&config_path)
-        .await
-        .expect("read config.toml");
-    let config_toml: ConfigToml = toml::from_str(&config_contents).expect("parse config.toml");
+    let config_contents = tokio::fs::read_to_string(&config_path).await?;
+    let config_toml: ConfigToml = toml::from_str(&config_contents)?;

    assert_eq!(
        ConfigToml {
@@ -61,6 +48,7 @@ async fn set_default_model_persists_overrides() {
        },
        config_toml,
    );
+    Ok(())
 }

 // Helper to create a config.toml; mirrors create_conversation.rs
--- a/codex-rs/app-server/tests/suite/user_agent.rs
+++ b/codex-rs/app-server/tests/suite/user_agent.rs
@@ -1,3 +1,4 @@
+use anyhow::Result;
 use app_test_support::McpProcess;
 use app_test_support::to_response;
 use codex_app_server_protocol::GetUserAgentResponse;
@@ -10,28 +11,18 @@ use tokio::time::timeout;
 const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(10);

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-async fn get_user_agent_returns_current_codex_user_agent() {
-    let codex_home = TempDir::new().unwrap_or_else(|err| panic!("create tempdir: {err}"));
+async fn get_user_agent_returns_current_codex_user_agent() -> Result<()> {
+    let codex_home = TempDir::new()?;

-    let mut mcp = McpProcess::new(codex_home.path())
-        .await
-        .expect("spawn mcp process");
-    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize())
-        .await
-        .expect("initialize timeout")
-        .expect("initialize request");
+    let mut mcp = McpProcess::new(codex_home.path()).await?;
+    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;

-    let request_id = mcp
-        .send_get_user_agent_request()
-        .await
-        .expect("send getUserAgent");
+    let request_id = mcp.send_get_user_agent_request().await?;
    let response: JSONRPCResponse = timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_response_message(RequestId::Integer(request_id)),
    )
-    .await
-    .expect("getUserAgent timeout")
-    .expect("getUserAgent response");
+    .await??;

    let os_info = os_info::get();
    let user_agent = format!(
@@ -42,9 +33,9 @@ async fn get_user_agent_returns_current_codex_user_agent() {
        codex_core::terminal::user_agent()
    );

-    let received: GetUserAgentResponse =
-        to_response(response).expect("deserialize getUserAgent response");
+    let received: GetUserAgentResponse = to_response(response)?;
    let expected = GetUserAgentResponse { user_agent };

    assert_eq!(received, expected);
+    Ok(())
 }
--- a/codex-rs/app-server/tests/suite/user_info.rs
+++ b/codex-rs/app-server/tests/suite/user_info.rs
@@ -1,5 +1,4 @@
-use std::time::Duration;
-
+use anyhow::Result;
 use app_test_support::ChatGptAuthFixture;
 use app_test_support::McpProcess;
 use app_test_support::to_response;
@@ -7,45 +6,41 @@ use app_test_support::write_chatgpt_auth;
 use codex_app_server_protocol::JSONRPCResponse;
 use codex_app_server_protocol::RequestId;
 use codex_app_server_protocol::UserInfoResponse;
+use codex_core::auth::AuthCredentialsStoreMode;
 use pretty_assertions::assert_eq;
+use std::time::Duration;
 use tempfile::TempDir;
 use tokio::time::timeout;

 const DEFAULT_READ_TIMEOUT: Duration = Duration::from_secs(10);

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-async fn user_info_returns_email_from_auth_json() {
-    let codex_home = TempDir::new().expect("create tempdir");
+async fn user_info_returns_email_from_auth_json() -> Result<()> {
+    let codex_home = TempDir::new()?;

    write_chatgpt_auth(
        codex_home.path(),
        ChatGptAuthFixture::new("access")
            .refresh_token("refresh")
            .email("user@example.com"),
-    )
-    .expect("write chatgpt auth");
+        AuthCredentialsStoreMode::File,
+    )?;

-    let mut mcp = McpProcess::new(codex_home.path())
-        .await
-        .expect("spawn mcp process");
-    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize())
-        .await
-        .expect("initialize timeout")
-        .expect("initialize request");
+    let mut mcp = McpProcess::new(codex_home.path()).await?;
+    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;

-    let request_id = mcp.send_user_info_request().await.expect("send userInfo");
+    let request_id = mcp.send_user_info_request().await?;
    let response: JSONRPCResponse = timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_response_message(RequestId::Integer(request_id)),
    )
-    .await
-    .expect("userInfo timeout")
-    .expect("userInfo response");
+    .await??;

-    let received: UserInfoResponse = to_response(response).expect("deserialize userInfo response");
+    let received: UserInfoResponse = to_response(response)?;
    let expected = UserInfoResponse {
        alleged_user_email: Some("user@example.com".to_string()),
    };

    assert_eq!(received, expected);
+    Ok(())
 }
--- a/codex-rs/chatgpt/src/apply_command.rs
+++ b/codex-rs/chatgpt/src/apply_command.rs
@@ -32,7 +32,8 @@ pub async fn run_apply_command(
    )
    .await?;

-    init_chatgpt_token_from_auth(&config.codex_home).await?;
+    init_chatgpt_token_from_auth(&config.codex_home, config.cli_auth_credentials_store_mode)
+        .await?;

    let task_response = get_task(&config, apply_cli.task_id).await?;
    apply_diff_from_task(task_response, cwd).await
--- a/codex-rs/chatgpt/src/chatgpt_client.rs
+++ b/codex-rs/chatgpt/src/chatgpt_client.rs
@@ -13,7 +13,8 @@ pub(crate) async fn chatgpt_get_request<T: DeserializeOwned>(
    path: String,
 ) -> anyhow::Result<T> {
    let chatgpt_base_url = &config.chatgpt_base_url;
-    init_chatgpt_token_from_auth(&config.codex_home).await?;
+    init_chatgpt_token_from_auth(&config.codex_home, config.cli_auth_credentials_store_mode)
+        .await?;

    // Make direct HTTP request to ChatGPT backend API with the token
    let client = create_client();
--- a/codex-rs/chatgpt/src/chatgpt_token.rs
+++ b/codex-rs/chatgpt/src/chatgpt_token.rs
@@ -3,6 +3,7 @@ use std::path::Path;
 use std::sync::LazyLock;
 use std::sync::RwLock;

+use codex_core::auth::AuthCredentialsStoreMode;
 use codex_core::token_data::TokenData;

 static CHATGPT_TOKEN: LazyLock<RwLock<Option<TokenData>>> = LazyLock::new(|| RwLock::new(None));
@@ -18,8 +19,11 @@ pub fn set_chatgpt_token_data(value: TokenData) {
 }

 /// Initialize the ChatGPT token from auth.json file
-pub async fn init_chatgpt_token_from_auth(codex_home: &Path) -> std::io::Result<()> {
-    let auth = CodexAuth::from_codex_home(codex_home)?;
+pub async fn init_chatgpt_token_from_auth(
+    codex_home: &Path,
+    auth_credentials_store_mode: AuthCredentialsStoreMode,
+) -> std::io::Result<()> {
+    let auth = CodexAuth::from_auth_storage(codex_home, auth_credentials_store_mode)?;
    if let Some(auth) = auth {
        let token_data = auth.get_token_data().await?;
        set_chatgpt_token_data(token_data);
--- a/codex-rs/cli/src/login.rs
+++ b/codex-rs/cli/src/login.rs
@@ -1,6 +1,7 @@
 use codex_app_server_protocol::AuthMode;
 use codex_common::CliConfigOverrides;
 use codex_core::CodexAuth;
+use codex_core::auth::AuthCredentialsStoreMode;
 use codex_core::auth::CLIENT_ID;
 use codex_core::auth::login_with_api_key;
 use codex_core::auth::logout;
@@ -17,11 +18,13 @@ use std::path::PathBuf;
 pub async fn login_with_chatgpt(
    codex_home: PathBuf,
    forced_chatgpt_workspace_id: Option<String>,
+    cli_auth_credentials_store_mode: AuthCredentialsStoreMode,
 ) -> std::io::Result<()> {
    let opts = ServerOptions::new(
        codex_home,
        CLIENT_ID.to_string(),
        forced_chatgpt_workspace_id,
+        cli_auth_credentials_store_mode,
    );
    let server = run_login_server(opts)?;

@@ -43,7 +46,13 @@ pub async fn run_login_with_chatgpt(cli_config_overrides: CliConfigOverrides) ->

    let forced_chatgpt_workspace_id = config.forced_chatgpt_workspace_id.clone();

-    match login_with_chatgpt(config.codex_home, forced_chatgpt_workspace_id).await {
+    match login_with_chatgpt(
+        config.codex_home,
+        forced_chatgpt_workspace_id,
+        config.cli_auth_credentials_store_mode,
+    )
+    .await
+    {
        Ok(_) => {
            eprintln!("Successfully logged in");
            std::process::exit(0);
@@ -66,7 +75,11 @@ pub async fn run_login_with_api_key(
        std::process::exit(1);
    }

-    match login_with_api_key(&config.codex_home, &api_key) {
+    match login_with_api_key(
+        &config.codex_home,
+        &api_key,
+        config.cli_auth_credentials_store_mode,
+    ) {
        Ok(_) => {
            eprintln!("Successfully logged in");
            std::process::exit(0);
@@ -121,6 +134,7 @@ pub async fn run_login_with_device_code(
        config.codex_home,
        client_id.unwrap_or(CLIENT_ID.to_string()),
        forced_chatgpt_workspace_id,
+        config.cli_auth_credentials_store_mode,
    );
    if let Some(iss) = issuer_base_url {
        opts.issuer = iss;
@@ -140,7 +154,7 @@ pub async fn run_login_with_device_code(
 pub async fn run_login_status(cli_config_overrides: CliConfigOverrides) -> ! {
    let config = load_config_or_exit(cli_config_overrides).await;

-    match CodexAuth::from_codex_home(&config.codex_home) {
+    match CodexAuth::from_auth_storage(&config.codex_home, config.cli_auth_credentials_store_mode) {
        Ok(Some(auth)) => match auth.mode {
            AuthMode::ApiKey => match auth.get_token().await {
                Ok(api_key) => {
@@ -171,7 +185,7 @@ pub async fn run_login_status(cli_config_overrides: CliConfigOverrides) -> ! {
 pub async fn run_logout(cli_config_overrides: CliConfigOverrides) -> ! {
    let config = load_config_or_exit(cli_config_overrides).await;

-    match logout(&config.codex_home) {
+    match logout(&config.codex_home, config.cli_auth_credentials_store_mode) {
        Ok(true) => {
            eprintln!("Successfully logged out");
            std::process::exit(0);
--- a/codex-rs/cli/src/main.rs
+++ b/codex-rs/cli/src/main.rs
@@ -29,6 +29,7 @@ mod mcp_cmd;
 use crate::mcp_cmd::McpCli;
 use codex_core::config::Config;
 use codex_core::config::ConfigOverrides;
+use codex_core::features::is_known_feature_key;

 /// Codex CLI
 ///
@@ -286,15 +287,25 @@ struct FeatureToggles {
 }

 impl FeatureToggles {
-    fn to_overrides(&self) -> Vec<String> {
+    fn to_overrides(&self) -> anyhow::Result<Vec<String>> {
        let mut v = Vec::new();
-        for k in &self.enable {
-            v.push(format!("features.{k}=true"));
+        for feature in &self.enable {
+            Self::validate_feature(feature)?;
+            v.push(format!("features.{feature}=true"));
        }
-        for k in &self.disable {
-            v.push(format!("features.{k}=false"));
+        for feature in &self.disable {
+            Self::validate_feature(feature)?;
+            v.push(format!("features.{feature}=false"));
+        }
+        Ok(v)
+    }
+
+    fn validate_feature(feature: &str) -> anyhow::Result<()> {
+        if is_known_feature_key(feature) {
+            Ok(())
+        } else {
+            anyhow::bail!("Unknown feature flag: {feature}")
        }
-        v
    }
 }

@@ -345,9 +356,8 @@ async fn cli_main(codex_linux_sandbox_exe: Option<PathBuf>) -> anyhow::Result<()
    } = MultitoolCli::parse();

    // Fold --enable/--disable into config overrides so they flow to all subcommands.
-    root_config_overrides
-        .raw_overrides
-        .extend(feature_toggles.to_overrides());
+    let toggle_overrides = feature_toggles.to_overrides()?;
+    root_config_overrides.raw_overrides.extend(toggle_overrides);

    match subcommand {
        None => {
@@ -605,6 +615,7 @@ mod tests {
    use assert_matches::assert_matches;
    use codex_core::protocol::TokenUsage;
    use codex_protocol::ConversationId;
+    use pretty_assertions::assert_eq;

    fn finalize_from_args(args: &[&str]) -> TuiCli {
        let cli = MultitoolCli::try_parse_from(args).expect("parse");
@@ -781,4 +792,32 @@ mod tests {
        assert!(!interactive.resume_last);
        assert_eq!(interactive.resume_session_id, None);
    }
+
+    #[test]
+    fn feature_toggles_known_features_generate_overrides() {
+        let toggles = FeatureToggles {
+            enable: vec!["web_search_request".to_string()],
+            disable: vec!["unified_exec".to_string()],
+        };
+        let overrides = toggles.to_overrides().expect("valid features");
+        assert_eq!(
+            overrides,
+            vec![
+                "features.web_search_request=true".to_string(),
+                "features.unified_exec=false".to_string(),
+            ]
+        );
+    }
+
+    #[test]
+    fn feature_toggles_unknown_feature_errors() {
+        let toggles = FeatureToggles {
+            enable: vec!["does_not_exist".to_string()],
+            disable: Vec::new(),
+        };
+        let err = toggles
+            .to_overrides()
+            .expect_err("feature should be rejected");
+        assert_eq!(err.to_string(), "Unknown feature flag: does_not_exist");
+    }
 }
--- a/codex-rs/cli/src/mcp_cmd.rs
+++ b/codex-rs/cli/src/mcp_cmd.rs
@@ -274,19 +274,33 @@ async fn run_add(config_overrides: &CliConfigOverrides, add_args: AddArgs) -> Re
        http_headers,
        env_http_headers,
    } = transport
-        && matches!(supports_oauth_login(&url).await, Ok(true))
    {
-        println!("Detected OAuth support. Starting OAuth flow…");
-        perform_oauth_login(
-            &name,
-            &url,
-            config.mcp_oauth_credentials_store_mode,
-            http_headers.clone(),
-            env_http_headers.clone(),
-            &Vec::new(),
-        )
-        .await?;
-        println!("Successfully logged in.");
+        match supports_oauth_login(&url).await {
+            Ok(true) => {
+                if !config.features.enabled(Feature::RmcpClient) {
+                    println!(
+                        "MCP server supports login. Add `experimental_use_rmcp_client = true` \
+                         to your config.toml and run `codex mcp login {name}` to login."
+                    );
+                } else {
+                    println!("Detected OAuth support. Starting OAuth flow…");
+                    perform_oauth_login(
+                        &name,
+                        &url,
+                        config.mcp_oauth_credentials_store_mode,
+                        http_headers.clone(),
+                        env_http_headers.clone(),
+                        &Vec::new(),
+                    )
+                    .await?;
+                    println!("Successfully logged in.");
+                }
+            }
+            Ok(false) => {}
+            Err(_) => println!(
+                "MCP server may or may not require login. Run `codex mcp login {name}` to login."
+            ),
+        }
    }

    Ok(())
@@ -523,10 +537,12 @@ async fn run_list(config_overrides: &CliConfigOverrides, list_args: ListArgs) ->
                    .map(|entry| entry.auth_status)
                    .unwrap_or(McpAuthStatus::Unsupported)
                    .to_string();
+                let bearer_token_display =
+                    bearer_token_env_var.as_deref().unwrap_or("-").to_string();
                http_rows.push([
                    name.clone(),
                    url.clone(),
-                    bearer_token_env_var.clone().unwrap_or("-".to_string()),
+                    bearer_token_display,
                    status,
                    auth_status,
                ]);
@@ -752,15 +768,15 @@ async fn run_get(config_overrides: &CliConfigOverrides, get_args: GetArgs) -> Re
        } => {
            println!("  transport: streamable_http");
            println!("  url: {url}");
-            let env_var = bearer_token_env_var.as_deref().unwrap_or("-");
-            println!("  bearer_token_env_var: {env_var}");
+            let bearer_token_display = bearer_token_env_var.as_deref().unwrap_or("-");
+            println!("  bearer_token_env_var: {bearer_token_display}");
            let headers_display = match http_headers {
                Some(map) if !map.is_empty() => {
                    let mut pairs: Vec<_> = map.iter().collect();
                    pairs.sort_by(|(a, _), (b, _)| a.cmp(b));
                    pairs
                        .into_iter()
-                        .map(|(k, v)| format!("{k}={v}"))
+                        .map(|(k, _)| format!("{k}=*****"))
                        .collect::<Vec<_>>()
                        .join(", ")
                }
@@ -773,7 +789,7 @@ async fn run_get(config_overrides: &CliConfigOverrides, get_args: GetArgs) -> Re
                    pairs.sort_by(|(a, _), (b, _)| a.cmp(b));
                    pairs
                        .into_iter()
-                        .map(|(k, v)| format!("{k}={v}"))
+                        .map(|(k, var)| format!("{k}={var}"))
                        .collect::<Vec<_>>()
                        .join(", ")
                }
--- a/codex-rs/cli/tests/mcp_list.rs
+++ b/codex-rs/cli/tests/mcp_list.rs
@@ -68,9 +68,9 @@ async fn list_and_get_render_expected_output() -> Result<()> {
    assert!(stdout.contains("Name"));
    assert!(stdout.contains("docs"));
    assert!(stdout.contains("docs-server"));
-    assert!(stdout.contains("TOKEN=secret"));
-    assert!(stdout.contains("APP_TOKEN=$APP_TOKEN"));
-    assert!(stdout.contains("WORKSPACE_ID=$WORKSPACE_ID"));
+    assert!(stdout.contains("TOKEN=*****"));
+    assert!(stdout.contains("APP_TOKEN=*****"));
+    assert!(stdout.contains("WORKSPACE_ID=*****"));
    assert!(stdout.contains("Status"));
    assert!(stdout.contains("Auth"));
    assert!(stdout.contains("enabled"));
@@ -119,9 +119,9 @@ async fn list_and_get_render_expected_output() -> Result<()> {
    assert!(stdout.contains("transport: stdio"));
    assert!(stdout.contains("command: docs-server"));
    assert!(stdout.contains("args: --port 4000"));
-    assert!(stdout.contains("env: TOKEN=secret"));
-    assert!(stdout.contains("APP_TOKEN=$APP_TOKEN"));
-    assert!(stdout.contains("WORKSPACE_ID=$WORKSPACE_ID"));
+    assert!(stdout.contains("env: TOKEN=*****"));
+    assert!(stdout.contains("APP_TOKEN=*****"));
+    assert!(stdout.contains("WORKSPACE_ID=*****"));
    assert!(stdout.contains("enabled: true"));
    assert!(stdout.contains("remove: codex mcp remove docs"));

--- a/codex-rs/cloud-tasks/src/lib.rs
+++ b/codex-rs/cloud-tasks/src/lib.rs
@@ -58,7 +58,16 @@ async fn init_backend(user_agent_suffix: &str) -> anyhow::Result<BackendContext>

    let auth = match codex_core::config::find_codex_home()
        .ok()
-        .map(|home| codex_login::AuthManager::new(home, false))
+        .map(|home| {
+            let store_mode = codex_core::config::Config::load_from_base_config_with_overrides(
+                codex_core::config::ConfigToml::default(),
+                codex_core::config::ConfigOverrides::default(),
+                home.clone(),
+            )
+            .map(|cfg| cfg.cli_auth_credentials_store_mode)
+            .unwrap_or_default();
+            codex_login::AuthManager::new(home, false, store_mode)
+        })
        .and_then(|am| am.auth())
    {
        Some(auth) => auth,
--- a/codex-rs/cloud-tasks/src/util.rs
+++ b/codex-rs/cloud-tasks/src/util.rs
@@ -70,7 +70,14 @@ pub async fn build_chatgpt_headers() -> HeaderMap {
        HeaderValue::from_str(&ua).unwrap_or(HeaderValue::from_static("codex-cli")),
    );
    if let Ok(home) = codex_core::config::find_codex_home() {
-        let am = codex_login::AuthManager::new(home, false);
+        let store_mode = codex_core::config::Config::load_from_base_config_with_overrides(
+            codex_core::config::ConfigToml::default(),
+            codex_core::config::ConfigOverrides::default(),
+            home.clone(),
+        )
+        .map(|cfg| cfg.cli_auth_credentials_store_mode)
+        .unwrap_or_default();
+        let am = codex_login::AuthManager::new(home, false, store_mode);
        if let Some(auth) = am.auth()
            && let Ok(tok) = auth.get_token().await
            && !tok.is_empty()
--- a/codex-rs/common/src/approval_presets.rs
+++ b/codex-rs/common/src/approval_presets.rs
@@ -24,21 +24,21 @@ pub fn builtin_approval_presets() -> Vec<ApprovalPreset> {
        ApprovalPreset {
            id: "read-only",
            label: "Read Only",
-            description: "Codex can read files and answer questions. Codex requires approval to make edits, run commands, or access network",
+            description: "Codex can read files and answer questions. Codex requires approval to make edits, run commands, or access network.",
            approval: AskForApproval::OnRequest,
            sandbox: SandboxPolicy::ReadOnly,
        },
        ApprovalPreset {
            id: "auto",
            label: "Auto",
-            description: "Codex can read files, make edits, and run commands in the workspace. Codex requires approval to work outside the workspace or access network",
+            description: "Codex can read files, make edits, and run commands in the workspace. Codex requires approval to work outside the workspace or access network.",
            approval: AskForApproval::OnRequest,
            sandbox: SandboxPolicy::new_workspace_write_policy(),
        },
        ApprovalPreset {
            id: "full-access",
            label: "Full Access",
-            description: "Codex can read files, make edits, and run commands with network access, without approval. Exercise caution",
+            description: "Codex can read files, make edits, and run commands with network access, without approval. Exercise caution.",
            approval: AskForApproval::Never,
            sandbox: SandboxPolicy::DangerFullAccess,
        },
--- a/codex-rs/common/src/format_env_display.rs
+++ b/codex-rs/common/src/format_env_display.rs
@@ -6,15 +6,11 @@ pub fn format_env_display(env: Option<&HashMap<String, String>>, env_vars: &[Str
    if let Some(map) = env {
        let mut pairs: Vec<_> = map.iter().collect();
        pairs.sort_by(|(a, _), (b, _)| a.cmp(b));
-        parts.extend(
-            pairs
-                .into_iter()
-                .map(|(key, value)| format!("{key}={value}")),
-        );
+        parts.extend(pairs.into_iter().map(|(key, _)| format!("{key}=*****")));
    }

    if !env_vars.is_empty() {
-        parts.extend(env_vars.iter().map(|var| format!("{var}=${var}")));
+        parts.extend(env_vars.iter().map(|var| format!("{var}=*****")));
    }

    if parts.is_empty() {
@@ -42,14 +38,14 @@ mod tests {
        env.insert("B".to_string(), "two".to_string());
        env.insert("A".to_string(), "one".to_string());

-        assert_eq!(format_env_display(Some(&env), &[]), "A=one, B=two");
+        assert_eq!(format_env_display(Some(&env), &[]), "A=*****, B=*****");
    }

    #[test]
    fn formats_env_vars_with_dollar_prefix() {
        let vars = vec!["TOKEN".to_string(), "PATH".to_string()];

-        assert_eq!(format_env_display(None, &vars), "TOKEN=$TOKEN, PATH=$PATH");
+        assert_eq!(format_env_display(None, &vars), "TOKEN=*****, PATH=*****");
    }

    #[test]
@@ -60,7 +56,7 @@ mod tests {

        assert_eq!(
            format_env_display(Some(&env), &vars),
-            "HOME=/tmp, TOKEN=$TOKEN"
+            "HOME=*****, TOKEN=*****"
        );
    }
 }
--- a/codex-rs/core/Cargo.toml
+++ b/codex-rs/core/Cargo.toml
@@ -21,13 +21,16 @@ bytes = { workspace = true }
 chrono = { workspace = true, features = ["serde"] }
 codex-app-server-protocol = { workspace = true }
 codex-apply-patch = { workspace = true }
+codex-async-utils = { workspace = true }
 codex-file-search = { workspace = true }
+codex-git-tooling = { workspace = true }
+codex-keyring-store = { workspace = true }
 codex-otel = { workspace = true, features = ["otel"] }
 codex-protocol = { workspace = true }
 codex-rmcp-client = { workspace = true }
-codex-async-utils = { workspace = true }
-codex-utils-string = { workspace = true }
 codex-utils-pty = { workspace = true }
+codex-utils-readiness = { workspace = true }
+codex-utils-string = { workspace = true }
 codex-utils-tokenizer = { workspace = true }
 dirs = { workspace = true }
 dunce = { workspace = true }
@@ -36,6 +39,12 @@ eventsource-stream = { workspace = true }
 futures = { workspace = true }
 http = { workspace = true }
 indexmap = { workspace = true }
+keyring = { workspace = true, features = [
+    "apple-native",
+    "crypto-rust",
+    "linux-native-async-persistent",
+    "windows-native",
+] }
 libc = { workspace = true }
 mcp-types = { workspace = true }
 os_info = { workspace = true }
@@ -45,6 +54,7 @@ reqwest = { workspace = true, features = ["json", "stream"] }
 serde = { workspace = true, features = ["derive"] }
 serde_json = { workspace = true }
 sha1 = { workspace = true }
+sha2 = { workspace = true }
 shlex = { workspace = true }
 similar = { workspace = true }
 strum_macros = { workspace = true }
@@ -95,6 +105,7 @@ assert_cmd = { workspace = true }
 assert_matches = { workspace = true }
 core_test_support = { workspace = true }
 escargot = { workspace = true }
+image = { workspace = true, features = ["jpeg", "png"] }
 maplit = { workspace = true }
 predicates = { workspace = true }
 pretty_assertions = { workspace = true }
--- a/codex-rs/core/src/auth.rs
+++ b/codex-rs/core/src/auth.rs
@@ -1,16 +1,12 @@
-use chrono::DateTime;
+mod storage;
+
 use chrono::Utc;
 use serde::Deserialize;
 use serde::Serialize;
 #[cfg(test)]
 use serial_test::serial;
 use std::env;
-use std::fs::File;
-use std::fs::OpenOptions;
-use std::io::Read;
-use std::io::Write;
-#[cfg(unix)]
-use std::os::unix::fs::OpenOptionsExt;
+use std::fmt::Debug;
 use std::path::Path;
 use std::path::PathBuf;
 use std::sync::Arc;
@@ -20,6 +16,10 @@ use std::time::Duration;
 use codex_app_server_protocol::AuthMode;
 use codex_protocol::config_types::ForcedLoginMethod;

+pub use crate::auth::storage::AuthCredentialsStoreMode;
+pub use crate::auth::storage::AuthDotJson;
+use crate::auth::storage::AuthStorageBackend;
+use crate::auth::storage::create_auth_storage;
 use crate::config::Config;
 use crate::default_client::CodexHttpClient;
 use crate::token_data::PlanType;
@@ -32,7 +32,7 @@ pub struct CodexAuth {

    pub(crate) api_key: Option<String>,
    pub(crate) auth_dot_json: Arc<Mutex<Option<AuthDotJson>>>,
-    pub(crate) auth_file: PathBuf,
+    storage: Arc<dyn AuthStorageBackend>,
    pub(crate) client: CodexHttpClient,
 }

@@ -56,7 +56,7 @@ impl CodexAuth {
            .map_err(std::io::Error::other)?;

        let updated = update_tokens(
-            &self.auth_file,
+            &self.storage,
            refresh_response.id_token,
            refresh_response.access_token,
            refresh_response.refresh_token,
@@ -78,9 +78,12 @@ impl CodexAuth {
        Ok(access)
    }

-    /// Loads the available auth information from the auth.json.
-    pub fn from_codex_home(codex_home: &Path) -> std::io::Result<Option<CodexAuth>> {
-        load_auth(codex_home, false)
+    /// Loads the available auth information from auth storage.
+    pub fn from_auth_storage(
+        codex_home: &Path,
+        auth_credentials_store_mode: AuthCredentialsStoreMode,
+    ) -> std::io::Result<Option<CodexAuth>> {
+        load_auth(codex_home, false, auth_credentials_store_mode)
    }

    pub async fn get_token_data(&self) -> Result<TokenData, std::io::Error> {
@@ -103,7 +106,7 @@ impl CodexAuth {
                    .map_err(std::io::Error::other)?;

                    let updated_auth_dot_json = update_tokens(
-                        &self.auth_file,
+                        &self.storage,
                        refresh_response.id_token,
                        refresh_response.access_token,
                        refresh_response.refresh_token,
@@ -177,7 +180,7 @@ impl CodexAuth {
        Self {
            api_key: None,
            mode: AuthMode::ChatGPT,
-            auth_file: PathBuf::new(),
+            storage: create_auth_storage(PathBuf::new(), AuthCredentialsStoreMode::File),
            auth_dot_json,
            client: crate::default_client::create_client(),
        }
@@ -187,7 +190,7 @@ impl CodexAuth {
        Self {
            api_key: Some(api_key.to_owned()),
            mode: AuthMode::ApiKey,
-            auth_file: PathBuf::new(),
+            storage: create_auth_storage(PathBuf::new(), AuthCredentialsStoreMode::File),
            auth_dot_json: Arc::new(Mutex::new(None)),
            client,
        }
@@ -215,33 +218,57 @@ pub fn read_codex_api_key_from_env() -> Option<String> {
        .filter(|value| !value.is_empty())
 }

-pub fn get_auth_file(codex_home: &Path) -> PathBuf {
-    codex_home.join("auth.json")
-}
-
 /// Delete the auth.json file inside `codex_home` if it exists. Returns `Ok(true)`
 /// if a file was removed, `Ok(false)` if no auth file was present.
-pub fn logout(codex_home: &Path) -> std::io::Result<bool> {
-    let auth_file = get_auth_file(codex_home);
-    match std::fs::remove_file(&auth_file) {
-        Ok(_) => Ok(true),
-        Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(false),
-        Err(err) => Err(err),
-    }
+pub fn logout(
+    codex_home: &Path,
+    auth_credentials_store_mode: AuthCredentialsStoreMode,
+) -> std::io::Result<bool> {
+    let storage = create_auth_storage(codex_home.to_path_buf(), auth_credentials_store_mode);
+    storage.delete()
 }

 /// Writes an `auth.json` that contains only the API key.
-pub fn login_with_api_key(codex_home: &Path, api_key: &str) -> std::io::Result<()> {
+pub fn login_with_api_key(
+    codex_home: &Path,
+    api_key: &str,
+    auth_credentials_store_mode: AuthCredentialsStoreMode,
+) -> std::io::Result<()> {
    let auth_dot_json = AuthDotJson {
        openai_api_key: Some(api_key.to_string()),
        tokens: None,
        last_refresh: None,
    };
-    write_auth_json(&get_auth_file(codex_home), &auth_dot_json)
+    save_auth(codex_home, &auth_dot_json, auth_credentials_store_mode)
+}
+
+/// Persist the provided auth payload using the specified backend.
+pub fn save_auth(
+    codex_home: &Path,
+    auth: &AuthDotJson,
+    auth_credentials_store_mode: AuthCredentialsStoreMode,
+) -> std::io::Result<()> {
+    let storage = create_auth_storage(codex_home.to_path_buf(), auth_credentials_store_mode);
+    storage.save(auth)
+}
+
+/// Load CLI auth data using the configured credential store backend.
+/// Returns `None` when no credentials are stored.
+pub fn load_auth_dot_json(
+    codex_home: &Path,
+    auth_credentials_store_mode: AuthCredentialsStoreMode,
+) -> std::io::Result<Option<AuthDotJson>> {
+    let storage = create_auth_storage(codex_home.to_path_buf(), auth_credentials_store_mode);
+    storage.load()
 }

 pub async fn enforce_login_restrictions(config: &Config) -> std::io::Result<()> {
-    let Some(auth) = load_auth(&config.codex_home, true)? else {
+    let Some(auth) = load_auth(
+        &config.codex_home,
+        true,
+        config.cli_auth_credentials_store_mode,
+    )?
+    else {
        return Ok(());
    };

@@ -260,7 +287,11 @@ pub async fn enforce_login_restrictions(config: &Config) -> std::io::Result<()>
        };

        if let Some(message) = method_violation {
-            return logout_with_message(&config.codex_home, message);
+            return logout_with_message(
+                &config.codex_home,
+                message,
+                config.cli_auth_credentials_store_mode,
+            );
        }
    }

@@ -277,6 +308,7 @@ pub async fn enforce_login_restrictions(config: &Config) -> std::io::Result<()>
                    format!(
                        "Failed to load ChatGPT credentials while enforcing workspace restrictions: {err}. Logging out."
                    ),
+                    config.cli_auth_credentials_store_mode,
                );
            }
        };
@@ -292,15 +324,23 @@ pub async fn enforce_login_restrictions(config: &Config) -> std::io::Result<()>
                    "Login is restricted to workspace {expected_account_id}, but current credentials lack a workspace identifier. Logging out."
                ),
            };
-            return logout_with_message(&config.codex_home, message);
+            return logout_with_message(
+                &config.codex_home,
+                message,
+                config.cli_auth_credentials_store_mode,
+            );
        }
    }

    Ok(())
 }

-fn logout_with_message(codex_home: &Path, message: String) -> std::io::Result<()> {
-    match logout(codex_home) {
+fn logout_with_message(
+    codex_home: &Path,
+    message: String,
+    auth_credentials_store_mode: AuthCredentialsStoreMode,
+) -> std::io::Result<()> {
+    match logout(codex_home, auth_credentials_store_mode) {
        Ok(_) => Err(std::io::Error::other(message)),
        Err(err) => Err(std::io::Error::other(format!(
            "{message}. Failed to remove auth.json: {err}"
@@ -311,6 +351,7 @@ fn logout_with_message(codex_home: &Path, message: String) -> std::io::Result<()
 fn load_auth(
    codex_home: &Path,
    enable_codex_api_key_env: bool,
+    auth_credentials_store_mode: AuthCredentialsStoreMode,
 ) -> std::io::Result<Option<CodexAuth>> {
    if enable_codex_api_key_env && let Some(api_key) = read_codex_api_key_from_env() {
        let client = crate::default_client::create_client();
@@ -320,12 +361,12 @@ fn load_auth(
        )));
    }

-    let auth_file = get_auth_file(codex_home);
+    let storage = create_auth_storage(codex_home.to_path_buf(), auth_credentials_store_mode);
+
    let client = crate::default_client::create_client();
-    let auth_dot_json = match try_read_auth_json(&auth_file) {
-        Ok(auth) => auth,
-        Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(None),
-        Err(err) => return Err(err),
+    let auth_dot_json = match storage.load()? {
+        Some(auth) => auth,
+        None => return Ok(None),
    };

    let AuthDotJson {
@@ -342,7 +383,7 @@ fn load_auth(
    Ok(Some(CodexAuth {
        api_key: None,
        mode: AuthMode::ChatGPT,
-        auth_file,
+        storage: storage.clone(),
        auth_dot_json: Arc::new(Mutex::new(Some(AuthDotJson {
            openai_api_key: None,
            tokens,
@@ -352,44 +393,20 @@ fn load_auth(
    }))
 }

-/// Attempt to read and refresh the `auth.json` file in the given `CODEX_HOME` directory.
-/// Returns the full AuthDotJson structure after refreshing if necessary.
-pub fn try_read_auth_json(auth_file: &Path) -> std::io::Result<AuthDotJson> {
-    let mut file = File::open(auth_file)?;
-    let mut contents = String::new();
-    file.read_to_string(&mut contents)?;
-    let auth_dot_json: AuthDotJson = serde_json::from_str(&contents)?;
-
-    Ok(auth_dot_json)
-}
-
-pub fn write_auth_json(auth_file: &Path, auth_dot_json: &AuthDotJson) -> std::io::Result<()> {
-    if let Some(parent) = auth_file.parent() {
-        std::fs::create_dir_all(parent)?;
-    }
-    let json_data = serde_json::to_string_pretty(auth_dot_json)?;
-    let mut options = OpenOptions::new();
-    options.truncate(true).write(true).create(true);
-    #[cfg(unix)]
-    {
-        options.mode(0o600);
-    }
-    let mut file = options.open(auth_file)?;
-    file.write_all(json_data.as_bytes())?;
-    file.flush()?;
-    Ok(())
-}
-
 async fn update_tokens(
-    auth_file: &Path,
-    id_token: String,
+    storage: &Arc<dyn AuthStorageBackend>,
+    id_token: Option<String>,
    access_token: Option<String>,
    refresh_token: Option<String>,
 ) -> std::io::Result<AuthDotJson> {
-    let mut auth_dot_json = try_read_auth_json(auth_file)?;
+    let mut auth_dot_json = storage
+        .load()?
+        .ok_or(std::io::Error::other("Token data is not available."))?;

    let tokens = auth_dot_json.tokens.get_or_insert_with(TokenData::default);
-    tokens.id_token = parse_id_token(&id_token).map_err(std::io::Error::other)?;
+    if let Some(id_token) = id_token {
+        tokens.id_token = parse_id_token(&id_token).map_err(std::io::Error::other)?;
+    }
    if let Some(access_token) = access_token {
        tokens.access_token = access_token;
    }
@@ -397,7 +414,7 @@ async fn update_tokens(
        tokens.refresh_token = refresh_token;
    }
    auth_dot_json.last_refresh = Some(Utc::now());
-    write_auth_json(auth_file, &auth_dot_json)?;
+    storage.save(&auth_dot_json)?;
    Ok(auth_dot_json)
 }

@@ -445,24 +462,11 @@ struct RefreshRequest {

 #[derive(Deserialize, Clone)]
 struct RefreshResponse {
-    id_token: String,
+    id_token: Option<String>,
    access_token: Option<String>,
    refresh_token: Option<String>,
 }

-/// Expected structure for $CODEX_HOME/auth.json.
-#[derive(Deserialize, Serialize, Clone, Debug, PartialEq)]
-pub struct AuthDotJson {
-    #[serde(rename = "OPENAI_API_KEY")]
-    pub openai_api_key: Option<String>,
-
-    #[serde(default, skip_serializing_if = "Option::is_none")]
-    pub tokens: Option<TokenData>,
-
-    #[serde(default, skip_serializing_if = "Option::is_none")]
-    pub last_refresh: Option<DateTime<Utc>>,
-}
-
 // Shared constant for token refresh (client id used for oauth token refresh flow)
 pub const CLIENT_ID: &str = "app_EMoamEEZ73f0CkXaXp7hrann";

@@ -477,12 +481,15 @@ struct CachedAuth {
 #[cfg(test)]
 mod tests {
    use super::*;
+    use crate::auth::storage::FileAuthStorage;
+    use crate::auth::storage::get_auth_file;
    use crate::config::Config;
    use crate::config::ConfigOverrides;
    use crate::config::ConfigToml;
    use crate::token_data::IdTokenInfo;
    use crate::token_data::KnownPlan;
    use crate::token_data::PlanType;
+
    use base64::Engine;
    use codex_protocol::config_types::ForcedLoginMethod;
    use pretty_assertions::assert_eq;
@@ -491,9 +498,9 @@ mod tests {
    use tempfile::tempdir;

    #[tokio::test]
-    async fn roundtrip_auth_dot_json() {
+    async fn refresh_without_id_token() {
        let codex_home = tempdir().unwrap();
-        let _ = write_auth_file(
+        let fake_jwt = write_auth_file(
            AuthFileParams {
                openai_api_key: None,
                chatgpt_plan_type: "pro".to_string(),
@@ -503,12 +510,23 @@ mod tests {
        )
        .expect("failed to write auth file");

-        let file = get_auth_file(codex_home.path());
-        let auth_dot_json = try_read_auth_json(&file).unwrap();
-        write_auth_json(&file, &auth_dot_json).unwrap();
+        let storage = create_auth_storage(
+            codex_home.path().to_path_buf(),
+            AuthCredentialsStoreMode::File,
+        );
+        let updated = super::update_tokens(
+            &storage,
+            None,
+            Some("new-access-token".to_string()),
+            Some("new-refresh-token".to_string()),
+        )
+        .await
+        .expect("update_tokens should succeed");

-        let same_auth_dot_json = try_read_auth_json(&file).unwrap();
-        assert_eq!(auth_dot_json, same_auth_dot_json);
+        let tokens = updated.tokens.expect("tokens should exist");
+        assert_eq!(tokens.id_token.raw_jwt, fake_jwt);
+        assert_eq!(tokens.access_token, "new-access-token");
+        assert_eq!(tokens.refresh_token, "new-refresh-token");
    }

    #[test]
@@ -530,9 +548,13 @@ mod tests {
        )
        .unwrap();

-        super::login_with_api_key(dir.path(), "sk-new").expect("login_with_api_key should succeed");
+        super::login_with_api_key(dir.path(), "sk-new", AuthCredentialsStoreMode::File)
+            .expect("login_with_api_key should succeed");

-        let auth = super::try_read_auth_json(&auth_path).expect("auth.json should parse");
+        let storage = FileAuthStorage::new(dir.path().to_path_buf());
+        let auth = storage
+            .try_read_auth_json(&auth_path)
+            .expect("auth.json should parse");
        assert_eq!(auth.openai_api_key.as_deref(), Some("sk-new"));
        assert!(auth.tokens.is_none(), "tokens should be cleared");
    }
@@ -540,7 +562,8 @@ mod tests {
    #[test]
    fn missing_auth_json_returns_none() {
        let dir = tempdir().unwrap();
-        let auth = CodexAuth::from_codex_home(dir.path()).expect("call should succeed");
+        let auth = CodexAuth::from_auth_storage(dir.path(), AuthCredentialsStoreMode::File)
+            .expect("call should succeed");
        assert_eq!(auth, None);
    }

@@ -562,9 +585,11 @@ mod tests {
            api_key,
            mode,
            auth_dot_json,
-            auth_file: _,
+            storage: _,
            ..
-        } = super::load_auth(codex_home.path(), false).unwrap().unwrap();
+        } = super::load_auth(codex_home.path(), false, AuthCredentialsStoreMode::File)
+            .unwrap()
+            .unwrap();
        assert_eq!(None, api_key);
        assert_eq!(AuthMode::ChatGPT, mode);

@@ -605,7 +630,9 @@ mod tests {
        )
        .unwrap();

-        let auth = super::load_auth(dir.path(), false).unwrap().unwrap();
+        let auth = super::load_auth(dir.path(), false, AuthCredentialsStoreMode::File)
+            .unwrap()
+            .unwrap();
        assert_eq!(auth.mode, AuthMode::ApiKey);
        assert_eq!(auth.api_key, Some("sk-test-key".to_string()));

@@ -620,11 +647,11 @@ mod tests {
            tokens: None,
            last_refresh: None,
        };
-        write_auth_json(&get_auth_file(dir.path()), &auth_dot_json)?;
-        assert!(dir.path().join("auth.json").exists());
-        let removed = logout(dir.path())?;
-        assert!(removed);
-        assert!(!dir.path().join("auth.json").exists());
+        super::save_auth(dir.path(), &auth_dot_json, AuthCredentialsStoreMode::File)?;
+        let auth_file = get_auth_file(dir.path());
+        assert!(auth_file.exists());
+        assert!(logout(dir.path(), AuthCredentialsStoreMode::File)?);
+        assert!(!auth_file.exists());
        Ok(())
    }

@@ -732,7 +759,8 @@ mod tests {
    #[tokio::test]
    async fn enforce_login_restrictions_logs_out_for_method_mismatch() {
        let codex_home = tempdir().unwrap();
-        login_with_api_key(codex_home.path(), "sk-test").expect("seed api key");
+        login_with_api_key(codex_home.path(), "sk-test", AuthCredentialsStoreMode::File)
+            .expect("seed api key");

        let config = build_config(codex_home.path(), Some(ForcedLoginMethod::Chatgpt), None);

@@ -801,7 +829,8 @@ mod tests {
    async fn enforce_login_restrictions_allows_api_key_if_login_method_not_set_but_forced_chatgpt_workspace_id_is_set()
     {
        let codex_home = tempdir().unwrap();
-        login_with_api_key(codex_home.path(), "sk-test").expect("seed api key");
+        login_with_api_key(codex_home.path(), "sk-test", AuthCredentialsStoreMode::File)
+            .expect("seed api key");

        let config = build_config(codex_home.path(), None, Some("org_mine".to_string()));

@@ -845,6 +874,7 @@ pub struct AuthManager {
    codex_home: PathBuf,
    inner: RwLock<CachedAuth>,
    enable_codex_api_key_env: bool,
+    auth_credentials_store_mode: AuthCredentialsStoreMode,
 }

 impl AuthManager {
@@ -852,14 +882,23 @@ impl AuthManager {
    /// preferred auth method. Errors loading auth are swallowed; `auth()` will
    /// simply return `None` in that case so callers can treat it as an
    /// unauthenticated state.
-    pub fn new(codex_home: PathBuf, enable_codex_api_key_env: bool) -> Self {
-        let auth = load_auth(&codex_home, enable_codex_api_key_env)
-            .ok()
-            .flatten();
+    pub fn new(
+        codex_home: PathBuf,
+        enable_codex_api_key_env: bool,
+        auth_credentials_store_mode: AuthCredentialsStoreMode,
+    ) -> Self {
+        let auth = load_auth(
+            &codex_home,
+            enable_codex_api_key_env,
+            auth_credentials_store_mode,
+        )
+        .ok()
+        .flatten();
        Self {
            codex_home,
            inner: RwLock::new(CachedAuth { auth }),
            enable_codex_api_key_env,
+            auth_credentials_store_mode,
        }
    }

@@ -870,6 +909,7 @@ impl AuthManager {
            codex_home: PathBuf::new(),
            inner: RwLock::new(cached),
            enable_codex_api_key_env: false,
+            auth_credentials_store_mode: AuthCredentialsStoreMode::File,
        })
    }

@@ -881,9 +921,13 @@ impl AuthManager {
    /// Force a reload of the auth information from auth.json. Returns
    /// whether the auth value changed.
    pub fn reload(&self) -> bool {
-        let new_auth = load_auth(&self.codex_home, self.enable_codex_api_key_env)
-            .ok()
-            .flatten();
+        let new_auth = load_auth(
+            &self.codex_home,
+            self.enable_codex_api_key_env,
+            self.auth_credentials_store_mode,
+        )
+        .ok()
+        .flatten();
        if let Ok(mut guard) = self.inner.write() {
            let changed = !AuthManager::auths_equal(&guard.auth, &new_auth);
            guard.auth = new_auth;
@@ -902,8 +946,16 @@ impl AuthManager {
    }

    /// Convenience constructor returning an `Arc` wrapper.
-    pub fn shared(codex_home: PathBuf, enable_codex_api_key_env: bool) -> Arc<Self> {
-        Arc::new(Self::new(codex_home, enable_codex_api_key_env))
+    pub fn shared(
+        codex_home: PathBuf,
+        enable_codex_api_key_env: bool,
+        auth_credentials_store_mode: AuthCredentialsStoreMode,
+    ) -> Arc<Self> {
+        Arc::new(Self::new(
+            codex_home,
+            enable_codex_api_key_env,
+            auth_credentials_store_mode,
+        ))
    }

    /// Attempt to refresh the current auth token (if any). On success, reload
@@ -931,7 +983,7 @@ impl AuthManager {
    /// reloads the in‑memory auth cache so callers immediately observe the
    /// unauthenticated state.
    pub fn logout(&self) -> std::io::Result<bool> {
-        let removed = super::auth::logout(&self.codex_home)?;
+        let removed = super::auth::logout(&self.codex_home, self.auth_credentials_store_mode)?;
        // Always reload to clear any cached auth (even if file absent).
        self.reload();
        Ok(removed)
--- a/codex-rs/core/src/auth/storage.rs
+++ b/codex-rs/core/src/auth/storage.rs
@@ -0,0 +1,672 @@
+use chrono::DateTime;
+use chrono::Utc;
+use serde::Deserialize;
+use serde::Serialize;
+use sha2::Digest;
+use sha2::Sha256;
+use std::fmt::Debug;
+use std::fs::File;
+use std::fs::OpenOptions;
+use std::io::Read;
+use std::io::Write;
+#[cfg(unix)]
+use std::os::unix::fs::OpenOptionsExt;
+use std::path::Path;
+use std::path::PathBuf;
+use std::sync::Arc;
+use tracing::warn;
+
+use crate::token_data::TokenData;
+use codex_keyring_store::DefaultKeyringStore;
+use codex_keyring_store::KeyringStore;
+
+/// Determine where Codex should store CLI auth credentials.
+#[derive(Debug, Default, Copy, Clone, PartialEq, Eq, Serialize, Deserialize)]
+#[serde(rename_all = "lowercase")]
+pub enum AuthCredentialsStoreMode {
+    #[default]
+    /// Persist credentials in CODEX_HOME/auth.json.
+    File,
+    /// Persist credentials in the keyring. Fail if unavailable.
+    Keyring,
+    /// Use keyring when available; otherwise, fall back to a file in CODEX_HOME.
+    Auto,
+}
+
+/// Expected structure for $CODEX_HOME/auth.json.
+#[derive(Deserialize, Serialize, Clone, Debug, PartialEq)]
+pub struct AuthDotJson {
+    #[serde(rename = "OPENAI_API_KEY")]
+    pub openai_api_key: Option<String>,
+
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub tokens: Option<TokenData>,
+
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub last_refresh: Option<DateTime<Utc>>,
+}
+
+pub(super) fn get_auth_file(codex_home: &Path) -> PathBuf {
+    codex_home.join("auth.json")
+}
+
+pub(super) fn delete_file_if_exists(codex_home: &Path) -> std::io::Result<bool> {
+    let auth_file = get_auth_file(codex_home);
+    match std::fs::remove_file(&auth_file) {
+        Ok(()) => Ok(true),
+        Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(false),
+        Err(err) => Err(err),
+    }
+}
+
+pub(super) trait AuthStorageBackend: Debug + Send + Sync {
+    fn load(&self) -> std::io::Result<Option<AuthDotJson>>;
+    fn save(&self, auth: &AuthDotJson) -> std::io::Result<()>;
+    fn delete(&self) -> std::io::Result<bool>;
+}
+
+#[derive(Clone, Debug)]
+pub(super) struct FileAuthStorage {
+    codex_home: PathBuf,
+}
+
+impl FileAuthStorage {
+    pub(super) fn new(codex_home: PathBuf) -> Self {
+        Self { codex_home }
+    }
+
+    /// Attempt to read and refresh the `auth.json` file in the given `CODEX_HOME` directory.
+    /// Returns the full AuthDotJson structure after refreshing if necessary.
+    pub(super) fn try_read_auth_json(&self, auth_file: &Path) -> std::io::Result<AuthDotJson> {
+        let mut file = File::open(auth_file)?;
+        let mut contents = String::new();
+        file.read_to_string(&mut contents)?;
+        let auth_dot_json: AuthDotJson = serde_json::from_str(&contents)?;
+
+        Ok(auth_dot_json)
+    }
+}
+
+impl AuthStorageBackend for FileAuthStorage {
+    fn load(&self) -> std::io::Result<Option<AuthDotJson>> {
+        let auth_file = get_auth_file(&self.codex_home);
+        let auth_dot_json = match self.try_read_auth_json(&auth_file) {
+            Ok(auth) => auth,
+            Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(None),
+            Err(err) => return Err(err),
+        };
+        Ok(Some(auth_dot_json))
+    }
+
+    fn save(&self, auth_dot_json: &AuthDotJson) -> std::io::Result<()> {
+        let auth_file = get_auth_file(&self.codex_home);
+
+        if let Some(parent) = auth_file.parent() {
+            std::fs::create_dir_all(parent)?;
+        }
+        let json_data = serde_json::to_string_pretty(auth_dot_json)?;
+        let mut options = OpenOptions::new();
+        options.truncate(true).write(true).create(true);
+        #[cfg(unix)]
+        {
+            options.mode(0o600);
+        }
+        let mut file = options.open(auth_file)?;
+        file.write_all(json_data.as_bytes())?;
+        file.flush()?;
+        Ok(())
+    }
+
+    fn delete(&self) -> std::io::Result<bool> {
+        delete_file_if_exists(&self.codex_home)
+    }
+}
+
+const KEYRING_SERVICE: &str = "Codex Auth";
+
+// turns codex_home path into a stable, short key string
+fn compute_store_key(codex_home: &Path) -> std::io::Result<String> {
+    let canonical = codex_home
+        .canonicalize()
+        .unwrap_or_else(|_| codex_home.to_path_buf());
+    let path_str = canonical.to_string_lossy();
+    let mut hasher = Sha256::new();
+    hasher.update(path_str.as_bytes());
+    let digest = hasher.finalize();
+    let hex = format!("{digest:x}");
+    let truncated = hex.get(..16).unwrap_or(&hex);
+    Ok(format!("cli|{truncated}"))
+}
+
+#[derive(Clone, Debug)]
+struct KeyringAuthStorage {
+    codex_home: PathBuf,
+    keyring_store: Arc<dyn KeyringStore>,
+}
+
+impl KeyringAuthStorage {
+    fn new(codex_home: PathBuf, keyring_store: Arc<dyn KeyringStore>) -> Self {
+        Self {
+            codex_home,
+            keyring_store,
+        }
+    }
+
+    fn load_from_keyring(&self, key: &str) -> std::io::Result<Option<AuthDotJson>> {
+        match self.keyring_store.load(KEYRING_SERVICE, key) {
+            Ok(Some(serialized)) => serde_json::from_str(&serialized).map(Some).map_err(|err| {
+                std::io::Error::other(format!(
+                    "failed to deserialize CLI auth from keyring: {err}"
+                ))
+            }),
+            Ok(None) => Ok(None),
+            Err(error) => Err(std::io::Error::other(format!(
+                "failed to load CLI auth from keyring: {}",
+                error.message()
+            ))),
+        }
+    }
+
+    fn save_to_keyring(&self, key: &str, value: &str) -> std::io::Result<()> {
+        match self.keyring_store.save(KEYRING_SERVICE, key, value) {
+            Ok(()) => Ok(()),
+            Err(error) => {
+                let message = format!(
+                    "failed to write OAuth tokens to keyring: {}",
+                    error.message()
+                );
+                warn!("{message}");
+                Err(std::io::Error::other(message))
+            }
+        }
+    }
+}
+
+impl AuthStorageBackend for KeyringAuthStorage {
+    fn load(&self) -> std::io::Result<Option<AuthDotJson>> {
+        let key = compute_store_key(&self.codex_home)?;
+        self.load_from_keyring(&key)
+    }
+
+    fn save(&self, auth: &AuthDotJson) -> std::io::Result<()> {
+        let key = compute_store_key(&self.codex_home)?;
+        // Simpler error mapping per style: prefer method reference over closure
+        let serialized = serde_json::to_string(auth).map_err(std::io::Error::other)?;
+        self.save_to_keyring(&key, &serialized)?;
+        if let Err(err) = delete_file_if_exists(&self.codex_home) {
+            warn!("failed to remove CLI auth fallback file: {err}");
+        }
+        Ok(())
+    }
+
+    fn delete(&self) -> std::io::Result<bool> {
+        let key = compute_store_key(&self.codex_home)?;
+        let keyring_removed = self
+            .keyring_store
+            .delete(KEYRING_SERVICE, &key)
+            .map_err(|err| {
+                std::io::Error::other(format!("failed to delete auth from keyring: {err}"))
+            })?;
+        let file_removed = delete_file_if_exists(&self.codex_home)?;
+        Ok(keyring_removed || file_removed)
+    }
+}
+
+#[derive(Clone, Debug)]
+struct AutoAuthStorage {
+    keyring_storage: Arc<KeyringAuthStorage>,
+    file_storage: Arc<FileAuthStorage>,
+}
+
+impl AutoAuthStorage {
+    fn new(codex_home: PathBuf, keyring_store: Arc<dyn KeyringStore>) -> Self {
+        Self {
+            keyring_storage: Arc::new(KeyringAuthStorage::new(codex_home.clone(), keyring_store)),
+            file_storage: Arc::new(FileAuthStorage::new(codex_home)),
+        }
+    }
+}
+
+impl AuthStorageBackend for AutoAuthStorage {
+    fn load(&self) -> std::io::Result<Option<AuthDotJson>> {
+        match self.keyring_storage.load() {
+            Ok(Some(auth)) => Ok(Some(auth)),
+            Ok(None) => self.file_storage.load(),
+            Err(err) => {
+                warn!("failed to load CLI auth from keyring, falling back to file storage: {err}");
+                self.file_storage.load()
+            }
+        }
+    }
+
+    fn save(&self, auth: &AuthDotJson) -> std::io::Result<()> {
+        match self.keyring_storage.save(auth) {
+            Ok(()) => Ok(()),
+            Err(err) => {
+                warn!("failed to save auth to keyring, falling back to file storage: {err}");
+                self.file_storage.save(auth)
+            }
+        }
+    }
+
+    fn delete(&self) -> std::io::Result<bool> {
+        // Keyring storage will delete from disk as well
+        self.keyring_storage.delete()
+    }
+}
+
+pub(super) fn create_auth_storage(
+    codex_home: PathBuf,
+    mode: AuthCredentialsStoreMode,
+) -> Arc<dyn AuthStorageBackend> {
+    let keyring_store: Arc<dyn KeyringStore> = Arc::new(DefaultKeyringStore);
+    create_auth_storage_with_keyring_store(codex_home, mode, keyring_store)
+}
+
+fn create_auth_storage_with_keyring_store(
+    codex_home: PathBuf,
+    mode: AuthCredentialsStoreMode,
+    keyring_store: Arc<dyn KeyringStore>,
+) -> Arc<dyn AuthStorageBackend> {
+    match mode {
+        AuthCredentialsStoreMode::File => Arc::new(FileAuthStorage::new(codex_home)),
+        AuthCredentialsStoreMode::Keyring => {
+            Arc::new(KeyringAuthStorage::new(codex_home, keyring_store))
+        }
+        AuthCredentialsStoreMode::Auto => Arc::new(AutoAuthStorage::new(codex_home, keyring_store)),
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::token_data::IdTokenInfo;
+    use anyhow::Context;
+    use base64::Engine;
+    use pretty_assertions::assert_eq;
+    use serde_json::json;
+    use tempfile::tempdir;
+
+    use codex_keyring_store::tests::MockKeyringStore;
+    use keyring::Error as KeyringError;
+
+    #[tokio::test]
+    async fn file_storage_load_returns_auth_dot_json() -> anyhow::Result<()> {
+        let codex_home = tempdir()?;
+        let storage = FileAuthStorage::new(codex_home.path().to_path_buf());
+        let auth_dot_json = AuthDotJson {
+            openai_api_key: Some("test-key".to_string()),
+            tokens: None,
+            last_refresh: Some(Utc::now()),
+        };
+
+        storage
+            .save(&auth_dot_json)
+            .context("failed to save auth file")?;
+
+        let loaded = storage.load().context("failed to load auth file")?;
+        assert_eq!(Some(auth_dot_json), loaded);
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn file_storage_save_persists_auth_dot_json() -> anyhow::Result<()> {
+        let codex_home = tempdir()?;
+        let storage = FileAuthStorage::new(codex_home.path().to_path_buf());
+        let auth_dot_json = AuthDotJson {
+            openai_api_key: Some("test-key".to_string()),
+            tokens: None,
+            last_refresh: Some(Utc::now()),
+        };
+
+        let file = get_auth_file(codex_home.path());
+        storage
+            .save(&auth_dot_json)
+            .context("failed to save auth file")?;
+
+        let same_auth_dot_json = storage
+            .try_read_auth_json(&file)
+            .context("failed to read auth file after save")?;
+        assert_eq!(auth_dot_json, same_auth_dot_json);
+        Ok(())
+    }
+
+    #[test]
+    fn file_storage_delete_removes_auth_file() -> anyhow::Result<()> {
+        let dir = tempdir()?;
+        let auth_dot_json = AuthDotJson {
+            openai_api_key: Some("sk-test-key".to_string()),
+            tokens: None,
+            last_refresh: None,
+        };
+        let storage = create_auth_storage(dir.path().to_path_buf(), AuthCredentialsStoreMode::File);
+        storage.save(&auth_dot_json)?;
+        assert!(dir.path().join("auth.json").exists());
+        let storage = FileAuthStorage::new(dir.path().to_path_buf());
+        let removed = storage.delete()?;
+        assert!(removed);
+        assert!(!dir.path().join("auth.json").exists());
+        Ok(())
+    }
+
+    fn seed_keyring_and_fallback_auth_file_for_delete<F>(
+        mock_keyring: &MockKeyringStore,
+        codex_home: &Path,
+        compute_key: F,
+    ) -> anyhow::Result<(String, PathBuf)>
+    where
+        F: FnOnce() -> std::io::Result<String>,
+    {
+        let key = compute_key()?;
+        mock_keyring.save(KEYRING_SERVICE, &key, "{}")?;
+        let auth_file = get_auth_file(codex_home);
+        std::fs::write(&auth_file, "stale")?;
+        Ok((key, auth_file))
+    }
+
+    fn seed_keyring_with_auth<F>(
+        mock_keyring: &MockKeyringStore,
+        compute_key: F,
+        auth: &AuthDotJson,
+    ) -> anyhow::Result<()>
+    where
+        F: FnOnce() -> std::io::Result<String>,
+    {
+        let key = compute_key()?;
+        let serialized = serde_json::to_string(auth)?;
+        mock_keyring.save(KEYRING_SERVICE, &key, &serialized)?;
+        Ok(())
+    }
+
+    fn assert_keyring_saved_auth_and_removed_fallback(
+        mock_keyring: &MockKeyringStore,
+        key: &str,
+        codex_home: &Path,
+        expected: &AuthDotJson,
+    ) {
+        let saved_value = mock_keyring
+            .saved_value(key)
+            .expect("keyring entry should exist");
+        let expected_serialized = serde_json::to_string(expected).expect("serialize expected auth");
+        assert_eq!(saved_value, expected_serialized);
+        let auth_file = get_auth_file(codex_home);
+        assert!(
+            !auth_file.exists(),
+            "fallback auth.json should be removed after keyring save"
+        );
+    }
+
+    fn id_token_with_prefix(prefix: &str) -> IdTokenInfo {
+        #[derive(Serialize)]
+        struct Header {
+            alg: &'static str,
+            typ: &'static str,
+        }
+
+        let header = Header {
+            alg: "none",
+            typ: "JWT",
+        };
+        let payload = json!({
+            "email": format!("{prefix}@example.com"),
+            "https://api.openai.com/auth": {
+                "chatgpt_account_id": format!("{prefix}-account"),
+            },
+        });
+        let encode = |bytes: &[u8]| base64::engine::general_purpose::URL_SAFE_NO_PAD.encode(bytes);
+        let header_b64 = encode(&serde_json::to_vec(&header).expect("serialize header"));
+        let payload_b64 = encode(&serde_json::to_vec(&payload).expect("serialize payload"));
+        let signature_b64 = encode(b"sig");
+        let fake_jwt = format!("{header_b64}.{payload_b64}.{signature_b64}");
+
+        crate::token_data::parse_id_token(&fake_jwt).expect("fake JWT should parse")
+    }
+
+    fn auth_with_prefix(prefix: &str) -> AuthDotJson {
+        AuthDotJson {
+            openai_api_key: Some(format!("{prefix}-api-key")),
+            tokens: Some(TokenData {
+                id_token: id_token_with_prefix(prefix),
+                access_token: format!("{prefix}-access"),
+                refresh_token: format!("{prefix}-refresh"),
+                account_id: Some(format!("{prefix}-account-id")),
+            }),
+            last_refresh: None,
+        }
+    }
+
+    #[test]
+    fn keyring_auth_storage_load_returns_deserialized_auth() -> anyhow::Result<()> {
+        let codex_home = tempdir()?;
+        let mock_keyring = MockKeyringStore::default();
+        let storage = KeyringAuthStorage::new(
+            codex_home.path().to_path_buf(),
+            Arc::new(mock_keyring.clone()),
+        );
+        let expected = AuthDotJson {
+            openai_api_key: Some("sk-test".to_string()),
+            tokens: None,
+            last_refresh: None,
+        };
+        seed_keyring_with_auth(
+            &mock_keyring,
+            || compute_store_key(codex_home.path()),
+            &expected,
+        )?;
+
+        let loaded = storage.load()?;
+        assert_eq!(Some(expected), loaded);
+        Ok(())
+    }
+
+    #[test]
+    fn keyring_auth_storage_compute_store_key_for_home_directory() -> anyhow::Result<()> {
+        let codex_home = PathBuf::from("~/.codex");
+
+        let key = compute_store_key(codex_home.as_path())?;
+
+        assert_eq!(key, "cli|940db7b1d0e4eb40");
+        Ok(())
+    }
+
+    #[test]
+    fn keyring_auth_storage_save_persists_and_removes_fallback_file() -> anyhow::Result<()> {
+        let codex_home = tempdir()?;
+        let mock_keyring = MockKeyringStore::default();
+        let storage = KeyringAuthStorage::new(
+            codex_home.path().to_path_buf(),
+            Arc::new(mock_keyring.clone()),
+        );
+        let auth_file = get_auth_file(codex_home.path());
+        std::fs::write(&auth_file, "stale")?;
+        let auth = AuthDotJson {
+            openai_api_key: None,
+            tokens: Some(TokenData {
+                id_token: Default::default(),
+                access_token: "access".to_string(),
+                refresh_token: "refresh".to_string(),
+                account_id: Some("account".to_string()),
+            }),
+            last_refresh: Some(Utc::now()),
+        };
+
+        storage.save(&auth)?;
+
+        let key = compute_store_key(codex_home.path())?;
+        assert_keyring_saved_auth_and_removed_fallback(
+            &mock_keyring,
+            &key,
+            codex_home.path(),
+            &auth,
+        );
+        Ok(())
+    }
+
+    #[test]
+    fn keyring_auth_storage_delete_removes_keyring_and_file() -> anyhow::Result<()> {
+        let codex_home = tempdir()?;
+        let mock_keyring = MockKeyringStore::default();
+        let storage = KeyringAuthStorage::new(
+            codex_home.path().to_path_buf(),
+            Arc::new(mock_keyring.clone()),
+        );
+        let (key, auth_file) = seed_keyring_and_fallback_auth_file_for_delete(
+            &mock_keyring,
+            codex_home.path(),
+            || compute_store_key(codex_home.path()),
+        )?;
+
+        let removed = storage.delete()?;
+
+        assert!(removed, "delete should report removal");
+        assert!(
+            !mock_keyring.contains(&key),
+            "keyring entry should be removed"
+        );
+        assert!(
+            !auth_file.exists(),
+            "fallback auth.json should be removed after keyring delete"
+        );
+        Ok(())
+    }
+
+    #[test]
+    fn auto_auth_storage_load_prefers_keyring_value() -> anyhow::Result<()> {
+        let codex_home = tempdir()?;
+        let mock_keyring = MockKeyringStore::default();
+        let storage = AutoAuthStorage::new(
+            codex_home.path().to_path_buf(),
+            Arc::new(mock_keyring.clone()),
+        );
+        let keyring_auth = auth_with_prefix("keyring");
+        seed_keyring_with_auth(
+            &mock_keyring,
+            || compute_store_key(codex_home.path()),
+            &keyring_auth,
+        )?;
+
+        let file_auth = auth_with_prefix("file");
+        storage.file_storage.save(&file_auth)?;
+
+        let loaded = storage.load()?;
+        assert_eq!(loaded, Some(keyring_auth));
+        Ok(())
+    }
+
+    #[test]
+    fn auto_auth_storage_load_uses_file_when_keyring_empty() -> anyhow::Result<()> {
+        let codex_home = tempdir()?;
+        let mock_keyring = MockKeyringStore::default();
+        let storage = AutoAuthStorage::new(codex_home.path().to_path_buf(), Arc::new(mock_keyring));
+
+        let expected = auth_with_prefix("file-only");
+        storage.file_storage.save(&expected)?;
+
+        let loaded = storage.load()?;
+        assert_eq!(loaded, Some(expected));
+        Ok(())
+    }
+
+    #[test]
+    fn auto_auth_storage_load_falls_back_when_keyring_errors() -> anyhow::Result<()> {
+        let codex_home = tempdir()?;
+        let mock_keyring = MockKeyringStore::default();
+        let storage = AutoAuthStorage::new(
+            codex_home.path().to_path_buf(),
+            Arc::new(mock_keyring.clone()),
+        );
+        let key = compute_store_key(codex_home.path())?;
+        mock_keyring.set_error(&key, KeyringError::Invalid("error".into(), "load".into()));
+
+        let expected = auth_with_prefix("fallback");
+        storage.file_storage.save(&expected)?;
+
+        let loaded = storage.load()?;
+        assert_eq!(loaded, Some(expected));
+        Ok(())
+    }
+
+    #[test]
+    fn auto_auth_storage_save_prefers_keyring() -> anyhow::Result<()> {
+        let codex_home = tempdir()?;
+        let mock_keyring = MockKeyringStore::default();
+        let storage = AutoAuthStorage::new(
+            codex_home.path().to_path_buf(),
+            Arc::new(mock_keyring.clone()),
+        );
+        let key = compute_store_key(codex_home.path())?;
+
+        let stale = auth_with_prefix("stale");
+        storage.file_storage.save(&stale)?;
+
+        let expected = auth_with_prefix("to-save");
+        storage.save(&expected)?;
+
+        assert_keyring_saved_auth_and_removed_fallback(
+            &mock_keyring,
+            &key,
+            codex_home.path(),
+            &expected,
+        );
+        Ok(())
+    }
+
+    #[test]
+    fn auto_auth_storage_save_falls_back_when_keyring_errors() -> anyhow::Result<()> {
+        let codex_home = tempdir()?;
+        let mock_keyring = MockKeyringStore::default();
+        let storage = AutoAuthStorage::new(
+            codex_home.path().to_path_buf(),
+            Arc::new(mock_keyring.clone()),
+        );
+        let key = compute_store_key(codex_home.path())?;
+        mock_keyring.set_error(&key, KeyringError::Invalid("error".into(), "save".into()));
+
+        let auth = auth_with_prefix("fallback");
+        storage.save(&auth)?;
+
+        let auth_file = get_auth_file(codex_home.path());
+        assert!(
+            auth_file.exists(),
+            "fallback auth.json should be created when keyring save fails"
+        );
+        let saved = storage
+            .file_storage
+            .load()?
+            .context("fallback auth should exist")?;
+        assert_eq!(saved, auth);
+        assert!(
+            mock_keyring.saved_value(&key).is_none(),
+            "keyring should not contain value when save fails"
+        );
+        Ok(())
+    }
+
+    #[test]
+    fn auto_auth_storage_delete_removes_keyring_and_file() -> anyhow::Result<()> {
+        let codex_home = tempdir()?;
+        let mock_keyring = MockKeyringStore::default();
+        let storage = AutoAuthStorage::new(
+            codex_home.path().to_path_buf(),
+            Arc::new(mock_keyring.clone()),
+        );
+        let (key, auth_file) = seed_keyring_and_fallback_auth_file_for_delete(
+            &mock_keyring,
+            codex_home.path(),
+            || compute_store_key(codex_home.path()),
+        )?;
+
+        let removed = storage.delete()?;
+
+        assert!(removed, "delete should report removal");
+        assert!(
+            !mock_keyring.contains(&key),
+            "keyring entry should be removed"
+        );
+        assert!(
+            !auth_file.exists(),
+            "fallback auth.json should be removed after delete"
+        );
+        Ok(())
+    }
+}
--- a/codex-rs/core/src/chat_completions.rs
+++ b/codex-rs/core/src/chat_completions.rs
@@ -17,6 +17,7 @@ use crate::util::backoff;
 use bytes::Bytes;
 use codex_otel::otel_event_manager::OtelEventManager;
 use codex_protocol::models::ContentItem;
+use codex_protocol::models::FunctionCallOutputContentItem;
 use codex_protocol::models::ReasoningItemContent;
 use codex_protocol::models::ResponseItem;
 use eventsource_stream::Eventsource;
@@ -76,6 +77,7 @@ pub(crate) async fn stream_chat_completions(
            ResponseItem::CustomToolCall { .. } => {}
            ResponseItem::CustomToolCallOutput { .. } => {}
            ResponseItem::WebSearchCall { .. } => {}
+            ResponseItem::GhostSnapshot { .. } => {}
        }
    }

@@ -158,16 +160,26 @@ pub(crate) async fn stream_chat_completions(
    for (idx, item) in input.iter().enumerate() {
        match item {
            ResponseItem::Message { role, content, .. } => {
+                // Build content either as a plain string (typical for assistant text)
+                // or as an array of content items when images are present (user/tool multimodal).
                let mut text = String::new();
+                let mut items: Vec<serde_json::Value> = Vec::new();
+                let mut saw_image = false;
+
                for c in content {
                    match c {
                        ContentItem::InputText { text: t }
                        | ContentItem::OutputText { text: t } => {
                            text.push_str(t);
+                            items.push(json!({"type":"text","text": t}));
+                        }
+                        ContentItem::InputImage { image_url } => {
+                            saw_image = true;
+                            items.push(json!({"type":"image_url","image_url": {"url": image_url}}));
                        }
-                        _ => {}
                    }
                }
+
                // Skip exact-duplicate assistant messages.
                if role == "assistant" {
                    if let Some(prev) = &last_assistant_text
@@ -178,7 +190,17 @@ pub(crate) async fn stream_chat_completions(
                    last_assistant_text = Some(text.clone());
                }

-                let mut msg = json!({"role": role, "content": text});
+                // For assistant messages, always send a plain string for compatibility.
+                // For user messages, if an image is present, send an array of content items.
+                let content_value = if role == "assistant" {
+                    json!(text)
+                } else if saw_image {
+                    json!(items)
+                } else {
+                    json!(text)
+                };
+
+                let mut msg = json!({"role": role, "content": content_value});
                if role == "assistant"
                    && let Some(reasoning) = reasoning_by_anchor_index.get(&idx)
                    && let Some(obj) = msg.as_object_mut()
@@ -237,10 +259,29 @@ pub(crate) async fn stream_chat_completions(
                messages.push(msg);
            }
            ResponseItem::FunctionCallOutput { call_id, output } => {
+                // Prefer structured content items when available (e.g., images)
+                // otherwise fall back to the legacy plain-string content.
+                let content_value = if let Some(items) = &output.content_items {
+                    let mapped: Vec<serde_json::Value> = items
+                        .iter()
+                        .map(|it| match it {
+                            FunctionCallOutputContentItem::InputText { text } => {
+                                json!({"type":"text","text": text})
+                            }
+                            FunctionCallOutputContentItem::InputImage { image_url } => {
+                                json!({"type":"image_url","image_url": {"url": image_url}})
+                            }
+                        })
+                        .collect();
+                    json!(mapped)
+                } else {
+                    json!(output.content)
+                };
+
                messages.push(json!({
                    "role": "tool",
                    "tool_call_id": call_id,
-                    "content": output.content,
+                    "content": content_value,
                }));
            }
            ResponseItem::CustomToolCall {
@@ -270,6 +311,10 @@ pub(crate) async fn stream_chat_completions(
                    "content": output,
                }));
            }
+            ResponseItem::GhostSnapshot { .. } => {
+                // Ghost snapshots annotate history but are not sent to the model.
+                continue;
+            }
            ResponseItem::Reasoning { .. }
            | ResponseItem::WebSearchCall { .. }
            | ResponseItem::Other => {
--- a/codex-rs/core/src/client.rs
+++ b/codex-rs/core/src/client.rs
@@ -134,6 +134,14 @@ impl ModelClient {
        self.stream_with_task_kind(prompt, TaskKind::Regular).await
    }

+    pub fn config(&self) -> Arc<Config> {
+        Arc::clone(&self.config)
+    }
+
+    pub fn provider(&self) -> &ModelProviderInfo {
+        &self.provider
+    }
+
    pub(crate) async fn stream_with_task_kind(
        &self,
        prompt: &Prompt,
@@ -215,18 +223,14 @@ impl ModelClient {

        let input_with_instructions = prompt.get_formatted_input();

-        let verbosity = match &self.config.model_family.family {
-            family if family == "gpt-5" => self.config.model_verbosity,
-            _ => {
-                if self.config.model_verbosity.is_some() {
-                    warn!(
-                        "model_verbosity is set but ignored for non-gpt-5 model family: {}",
-                        self.config.model_family.family
-                    );
-                }
-
-                None
-            }
+        let verbosity = if self.config.model_family.support_verbosity {
+            self.config.model_verbosity
+        } else {
+            warn!(
+                "model_verbosity is set but ignored as the model does not support verbosity: {}",
+                self.config.model_family.family
+            );
+            None
        };

        // Only include `text.verbosity` for GPT-5 family models
@@ -381,9 +385,14 @@ impl ModelClient {

                if status == StatusCode::UNAUTHORIZED
                    && let Some(manager) = auth_manager.as_ref()
-                    && manager.auth().is_some()
+                    && let Some(auth) = auth.as_ref()
+                    && auth.mode == AuthMode::ChatGPT
                {
-                    let _ = manager.refresh_token().await;
+                    manager.refresh_token().await.map_err(|err| {
+                        StreamAttemptError::Fatal(CodexErr::Fatal(format!(
+                            "Failed to refresh ChatGPT credentials: {err}"
+                        )))
+                    })?;
                }

                // The OpenAI Responses endpoint returns structured JSON bodies even for 4xx/5xx
--- a/codex-rs/core/src/codex.rs
+++ b/codex-rs/core/src/codex.rs
--- a/codex-rs/core/src/codex/compact.rs
+++ b/codex-rs/core/src/codex/compact.rs
@@ -2,6 +2,7 @@ use std::sync::Arc;

 use super::Session;
 use super::TurnContext;
+use super::filter_model_visible_history;
 use super::get_last_assistant_message_from_turn;
 use crate::Prompt;
 use crate::client_common::ResponseEvent;
@@ -86,8 +87,9 @@ async fn run_compact_task_inner(

    loop {
        let turn_input = history.get_history();
+        let prompt_input = filter_model_visible_history(turn_input.clone());
        let prompt = Prompt {
-            input: turn_input.clone(),
+            input: prompt_input.clone(),
            ..Default::default()
        };
        let attempt_result = drain_to_completed(&sess, turn_context.as_ref(), &prompt).await;
@@ -109,7 +111,7 @@ async fn run_compact_task_inner(
                return;
            }
            Err(e @ CodexErr::ContextWindowExceeded) => {
-                if turn_input.len() > 1 {
+                if prompt_input.len() > 1 {
                    // Trim from the beginning to preserve cache (prefix-based) and keep recent messages intact.
                    error!(
                        "Context window exceeded while compacting; removing oldest history item. Error: {e}"
@@ -132,7 +134,7 @@ async fn run_compact_task_inner(
                    let delay = backoff(retries);
                    sess.notify_stream_error(
                        turn_context.as_ref(),
-                        format!("Re-connecting... {retries}/{max_retries}"),
+                        format!("Reconnecting... {retries}/{max_retries}"),
                    )
                    .await;
                    tokio::time::sleep(delay).await;
@@ -152,7 +154,13 @@ async fn run_compact_task_inner(
    let summary_text = get_last_assistant_message_from_turn(&history_snapshot).unwrap_or_default();
    let user_messages = collect_user_messages(&history_snapshot);
    let initial_context = sess.build_initial_context(turn_context.as_ref());
-    let new_history = build_compacted_history(initial_context, &user_messages, &summary_text);
+    let mut new_history = build_compacted_history(initial_context, &user_messages, &summary_text);
+    let ghost_snapshots: Vec<ResponseItem> = history_snapshot
+        .iter()
+        .filter(|item| matches!(item, ResponseItem::GhostSnapshot { .. }))
+        .cloned()
+        .collect();
+    new_history.extend(ghost_snapshots);
    sess.replace_history(new_history).await;

    let rollout_item = RolloutItem::Compacted(CompactedItem {
@@ -200,7 +208,20 @@ pub(crate) fn build_compacted_history(
    user_messages: &[String],
    summary_text: &str,
 ) -> Vec<ResponseItem> {
-    let mut history = initial_context;
+    build_compacted_history_with_limit(
+        initial_context,
+        user_messages,
+        summary_text,
+        COMPACT_USER_MESSAGE_MAX_TOKENS * 4,
+    )
+}
+
+fn build_compacted_history_with_limit(
+    mut history: Vec<ResponseItem>,
+    user_messages: &[String],
+    summary_text: &str,
+    max_bytes: usize,
+) -> Vec<ResponseItem> {
    let mut user_messages_text = if user_messages.is_empty() {
        "(none)".to_string()
    } else {
@@ -208,7 +229,6 @@ pub(crate) fn build_compacted_history(
    };
    // Truncate the concatenated prior user messages so the bridge message
    // stays well under the context window (approx. 4 bytes/token).
-    let max_bytes = COMPACT_USER_MESSAGE_MAX_TOKENS * 4;
    if user_messages_text.len() > max_bytes {
        user_messages_text = truncate_middle(&user_messages_text, max_bytes).0;
    }
@@ -361,11 +381,16 @@ mod tests {

    #[test]
    fn build_compacted_history_truncates_overlong_user_messages() {
-        // Prepare a very large prior user message so the aggregated
-        // `user_messages_text` exceeds the truncation threshold used by
-        // `build_compacted_history` (80k bytes).
-        let big = "X".repeat(200_000);
-        let history = build_compacted_history(Vec::new(), std::slice::from_ref(&big), "SUMMARY");
+        // Use a small truncation limit so the test remains fast while still validating
+        // that oversized user content is truncated.
+        let max_bytes = 128;
+        let big = "X".repeat(max_bytes + 50);
+        let history = super::build_compacted_history_with_limit(
+            Vec::new(),
+            std::slice::from_ref(&big),
+            "SUMMARY",
+            max_bytes,
+        );

        // Expect exactly one bridge message added to history (plus any initial context we provided, which is none).
        assert_eq!(history.len(), 1);
--- a/codex-rs/core/src/codex_conversation.rs
+++ b/codex-rs/core/src/codex_conversation.rs
@@ -3,16 +3,21 @@ use crate::error::Result as CodexResult;
 use crate::protocol::Event;
 use crate::protocol::Op;
 use crate::protocol::Submission;
+use std::path::PathBuf;

 pub struct CodexConversation {
    codex: Codex,
+    rollout_path: PathBuf,
 }

 /// Conduit for the bidirectional stream of messages that compose a conversation
 /// in Codex.
 impl CodexConversation {
-    pub(crate) fn new(codex: Codex) -> Self {
-        Self { codex }
+    pub(crate) fn new(codex: Codex, rollout_path: PathBuf) -> Self {
+        Self {
+            codex,
+            rollout_path,
+        }
    }

    pub async fn submit(&self, op: Op) -> CodexResult<String> {
@@ -27,4 +32,8 @@ impl CodexConversation {
    pub async fn next_event(&self) -> CodexResult<Event> {
        self.codex.next_event().await
    }
+
+    pub fn rollout_path(&self) -> PathBuf {
+        self.rollout_path.clone()
+    }
 }
--- a/codex-rs/core/src/config.rs
+++ b/codex-rs/core/src/config.rs
@@ -1,3 +1,4 @@
+use crate::auth::AuthCredentialsStoreMode;
 use crate::config_loader::LoadedConfigLayers;
 pub use crate::config_loader::load_config_as_toml;
 use crate::config_loader::load_config_layers_with_overrides;
@@ -108,6 +109,10 @@ pub struct Config {
    /// for either of approval_policy or sandbox_mode.
    pub did_user_set_custom_approval_policy_or_sandbox_mode: bool,

+    /// On Windows, indicates that a previously configured workspace-write sandbox
+    /// was coerced to read-only because native auto mode is unsupported.
+    pub forced_auto_mode_downgraded_on_windows: bool,
+
    pub shell_environment_policy: ShellEnvironmentPolicy,

    /// When `true`, `AgentReasoning` events emitted by the backend will be
@@ -156,6 +161,12 @@ pub struct Config {
    /// resolved against this path.
    pub cwd: PathBuf,

+    /// Preferred store for CLI auth credentials.
+    /// file (default): Use a file in the Codex home directory.
+    /// keyring: Use an OS-specific keyring service.
+    /// auto: Use the OS-specific keyring service if available, otherwise use a file.
+    pub cli_auth_credentials_store_mode: AuthCredentialsStoreMode,
+
    /// Definition for MCP servers that Codex can reach out to for tool calls.
    pub mcp_servers: HashMap<String, McpServerConfig>,

@@ -223,6 +234,9 @@ pub struct Config {

    pub tools_web_search_request: bool,

+    /// When `true`, run a model-based assessment for commands denied by the sandbox.
+    pub experimental_sandbox_command_assessment: bool,
+
    pub use_experimental_streamable_shell_tool: bool,

    /// If set to `true`, used only the experimental unified exec tool.
@@ -866,6 +880,13 @@ pub struct ConfigToml {
    #[serde(default)]
    pub forced_login_method: Option<ForcedLoginMethod>,

+    /// Preferred backend for storing CLI auth credentials.
+    /// file (default): Use a file in the Codex home directory.
+    /// keyring: Use an OS-specific keyring service.
+    /// auto: Use the keyring if available, otherwise use a file.
+    #[serde(default)]
+    pub cli_auth_credentials_store: Option<AuthCredentialsStoreMode>,
+
    /// Definition for MCP servers that Codex can reach out to for tool calls.
    #[serde(default)]
    pub mcp_servers: HashMap<String, McpServerConfig>,
@@ -958,6 +979,7 @@ pub struct ConfigToml {
    pub experimental_use_unified_exec_tool: Option<bool>,
    pub experimental_use_rmcp_client: Option<bool>,
    pub experimental_use_freeform_apply_patch: Option<bool>,
+    pub experimental_sandbox_command_assessment: Option<bool>,
 }

 impl From<ConfigToml> for UserSavedConfig {
@@ -1018,14 +1040,22 @@ impl From<ToolsToml> for Tools {
    }
 }

+#[derive(Debug, PartialEq, Eq)]
+pub struct SandboxPolicyResolution {
+    pub policy: SandboxPolicy,
+    pub forced_auto_mode_downgraded_on_windows: bool,
+}
+
 impl ConfigToml {
    /// Derive the effective sandbox policy from the configuration.
    fn derive_sandbox_policy(
        &self,
        sandbox_mode_override: Option<SandboxMode>,
+        profile_sandbox_mode: Option<SandboxMode>,
        resolved_cwd: &Path,
-    ) -> SandboxPolicy {
+    ) -> SandboxPolicyResolution {
        let resolved_sandbox_mode = sandbox_mode_override
+            .or(profile_sandbox_mode)
            .or(self.sandbox_mode)
            .or_else(|| {
                // if no sandbox_mode is set, but user has marked directory as trusted, use WorkspaceWrite
@@ -1038,7 +1068,7 @@ impl ConfigToml {
                })
            })
            .unwrap_or_default();
-        match resolved_sandbox_mode {
+        let mut sandbox_policy = match resolved_sandbox_mode {
            SandboxMode::ReadOnly => SandboxPolicy::new_read_only_policy(),
            SandboxMode::WorkspaceWrite => match self.sandbox_workspace_write.as_ref() {
                Some(SandboxWorkspaceWrite {
@@ -1055,6 +1085,17 @@ impl ConfigToml {
                None => SandboxPolicy::new_workspace_write_policy(),
            },
            SandboxMode::DangerFullAccess => SandboxPolicy::DangerFullAccess,
+        };
+        let mut forced_auto_mode_downgraded_on_windows = false;
+        if cfg!(target_os = "windows")
+            && matches!(resolved_sandbox_mode, SandboxMode::WorkspaceWrite)
+        {
+            sandbox_policy = SandboxPolicy::new_read_only_policy();
+            forced_auto_mode_downgraded_on_windows = true;
+        }
+        SandboxPolicyResolution {
+            policy: sandbox_policy,
+            forced_auto_mode_downgraded_on_windows,
        }
    }

@@ -1118,6 +1159,7 @@ pub struct ConfigOverrides {
    pub include_view_image_tool: Option<bool>,
    pub show_raw_agent_reasoning: Option<bool>,
    pub tools_web_search_request: Option<bool>,
+    pub experimental_sandbox_command_assessment: Option<bool>,
    /// Additional directories that should be treated as writable roots for this session.
    pub additional_writable_roots: Vec<PathBuf>,
 }
@@ -1147,6 +1189,7 @@ impl Config {
            include_view_image_tool: include_view_image_tool_override,
            show_raw_agent_reasoning,
            tools_web_search_request: override_tools_web_search_request,
+            experimental_sandbox_command_assessment: sandbox_command_assessment_override,
            additional_writable_roots,
        } = overrides;

@@ -1172,6 +1215,7 @@ impl Config {
            include_apply_patch_tool: include_apply_patch_tool_override,
            include_view_image_tool: include_view_image_tool_override,
            web_search_request: override_tools_web_search_request,
+            experimental_sandbox_command_assessment: sandbox_command_assessment_override,
        };

        let features = Features::from_config(&cfg, &config_profile, feature_overrides);
@@ -1212,7 +1256,10 @@ impl Config {
            .get_active_project(&resolved_cwd)
            .unwrap_or(ProjectConfig { trust_level: None });

-        let mut sandbox_policy = cfg.derive_sandbox_policy(sandbox_mode, &resolved_cwd);
+        let SandboxPolicyResolution {
+            policy: mut sandbox_policy,
+            forced_auto_mode_downgraded_on_windows,
+        } = cfg.derive_sandbox_policy(sandbox_mode, config_profile.sandbox_mode, &resolved_cwd);
        if let SandboxPolicy::WorkspaceWrite { writable_roots, .. } = &mut sandbox_policy {
            for path in additional_writable_roots {
                if !writable_roots.iter().any(|existing| existing == &path) {
@@ -1235,8 +1282,8 @@ impl Config {
            .is_some()
            || config_profile.approval_policy.is_some()
            || cfg.approval_policy.is_some()
-            // TODO(#3034): profile.sandbox_mode is not implemented
            || sandbox_mode.is_some()
+            || config_profile.sandbox_mode.is_some()
            || cfg.sandbox_mode.is_some();

        let mut model_providers = built_in_model_providers();
@@ -1269,6 +1316,8 @@ impl Config {
        let use_experimental_streamable_shell_tool = features.enabled(Feature::StreamableShell);
        let use_experimental_unified_exec_tool = features.enabled(Feature::UnifiedExec);
        let use_experimental_use_rmcp_client = features.enabled(Feature::RmcpClient);
+        let experimental_sandbox_command_assessment =
+            features.enabled(Feature::SandboxCommandAssessment);

        let forced_chatgpt_workspace_id =
            cfg.forced_chatgpt_workspace_id.as_ref().and_then(|value| {
@@ -1341,10 +1390,14 @@ impl Config {
            approval_policy,
            sandbox_policy,
            did_user_set_custom_approval_policy_or_sandbox_mode,
+            forced_auto_mode_downgraded_on_windows,
            shell_environment_policy,
            notify: cfg.notify,
            user_instructions,
            base_instructions,
+            // The config.toml omits "_mode" because it's a config file. However, "_mode"
+            // is important in code to differentiate the mode from the store implementation.
+            cli_auth_credentials_store_mode: cfg.cli_auth_credentials_store.unwrap_or_default(),
            mcp_servers: cfg.mcp_servers,
            // The config.toml omits "_mode" because it's a config file. However, "_mode"
            // is important in code to differentiate the mode from the store implementation.
@@ -1390,6 +1443,7 @@ impl Config {
            forced_login_method,
            include_apply_patch_tool: include_apply_patch_tool_flag,
            tools_web_search_request,
+            experimental_sandbox_command_assessment,
            use_experimental_streamable_shell_tool,
            use_experimental_unified_exec_tool,
            use_experimental_use_rmcp_client,
@@ -1591,10 +1645,17 @@ network_access = false  # This should be ignored.
        let sandbox_full_access_cfg = toml::from_str::<ConfigToml>(sandbox_full_access)
            .expect("TOML deserialization should succeed");
        let sandbox_mode_override = None;
+        let resolution = sandbox_full_access_cfg.derive_sandbox_policy(
+            sandbox_mode_override,
+            None,
+            &PathBuf::from("/tmp/test"),
+        );
        assert_eq!(
-            SandboxPolicy::DangerFullAccess,
-            sandbox_full_access_cfg
-                .derive_sandbox_policy(sandbox_mode_override, &PathBuf::from("/tmp/test"))
+            resolution,
+            SandboxPolicyResolution {
+                policy: SandboxPolicy::DangerFullAccess,
+                forced_auto_mode_downgraded_on_windows: false,
+            }
        );

        let sandbox_read_only = r#"
@@ -1607,10 +1668,17 @@ network_access = true  # This should be ignored.
        let sandbox_read_only_cfg = toml::from_str::<ConfigToml>(sandbox_read_only)
            .expect("TOML deserialization should succeed");
        let sandbox_mode_override = None;
+        let resolution = sandbox_read_only_cfg.derive_sandbox_policy(
+            sandbox_mode_override,
+            None,
+            &PathBuf::from("/tmp/test"),
+        );
        assert_eq!(
-            SandboxPolicy::ReadOnly,
-            sandbox_read_only_cfg
-                .derive_sandbox_policy(sandbox_mode_override, &PathBuf::from("/tmp/test"))
+            resolution,
+            SandboxPolicyResolution {
+                policy: SandboxPolicy::ReadOnly,
+                forced_auto_mode_downgraded_on_windows: false,
+            }
        );

        let sandbox_workspace_write = r#"
@@ -1627,16 +1695,33 @@ exclude_slash_tmp = true
        let sandbox_workspace_write_cfg = toml::from_str::<ConfigToml>(sandbox_workspace_write)
            .expect("TOML deserialization should succeed");
        let sandbox_mode_override = None;
-        assert_eq!(
-            SandboxPolicy::WorkspaceWrite {
-                writable_roots: vec![PathBuf::from("/my/workspace")],
-                network_access: false,
-                exclude_tmpdir_env_var: true,
-                exclude_slash_tmp: true,
-            },
-            sandbox_workspace_write_cfg
-                .derive_sandbox_policy(sandbox_mode_override, &PathBuf::from("/tmp/test"))
+        let resolution = sandbox_workspace_write_cfg.derive_sandbox_policy(
+            sandbox_mode_override,
+            None,
+            &PathBuf::from("/tmp/test"),
        );
+        if cfg!(target_os = "windows") {
+            assert_eq!(
+                resolution,
+                SandboxPolicyResolution {
+                    policy: SandboxPolicy::ReadOnly,
+                    forced_auto_mode_downgraded_on_windows: true,
+                }
+            );
+        } else {
+            assert_eq!(
+                resolution,
+                SandboxPolicyResolution {
+                    policy: SandboxPolicy::WorkspaceWrite {
+                        writable_roots: vec![PathBuf::from("/my/workspace")],
+                        network_access: false,
+                        exclude_tmpdir_env_var: true,
+                        exclude_slash_tmp: true,
+                    },
+                    forced_auto_mode_downgraded_on_windows: false,
+                }
+            );
+        }

        let sandbox_workspace_write = r#"
 sandbox_mode = "workspace-write"
@@ -1655,16 +1740,33 @@ trust_level = "trusted"
        let sandbox_workspace_write_cfg = toml::from_str::<ConfigToml>(sandbox_workspace_write)
            .expect("TOML deserialization should succeed");
        let sandbox_mode_override = None;
-        assert_eq!(
-            SandboxPolicy::WorkspaceWrite {
-                writable_roots: vec![PathBuf::from("/my/workspace")],
-                network_access: false,
-                exclude_tmpdir_env_var: true,
-                exclude_slash_tmp: true,
-            },
-            sandbox_workspace_write_cfg
-                .derive_sandbox_policy(sandbox_mode_override, &PathBuf::from("/tmp/test"))
+        let resolution = sandbox_workspace_write_cfg.derive_sandbox_policy(
+            sandbox_mode_override,
+            None,
+            &PathBuf::from("/tmp/test"),
        );
+        if cfg!(target_os = "windows") {
+            assert_eq!(
+                resolution,
+                SandboxPolicyResolution {
+                    policy: SandboxPolicy::ReadOnly,
+                    forced_auto_mode_downgraded_on_windows: true,
+                }
+            );
+        } else {
+            assert_eq!(
+                resolution,
+                SandboxPolicyResolution {
+                    policy: SandboxPolicy::WorkspaceWrite {
+                        writable_roots: vec![PathBuf::from("/my/workspace")],
+                        network_access: false,
+                        exclude_tmpdir_env_var: true,
+                        exclude_slash_tmp: true,
+                    },
+                    forced_auto_mode_downgraded_on_windows: false,
+                }
+            );
+        }
    }

    #[test]
@@ -1689,24 +1791,76 @@ trust_level = "trusted"
        )?;

        let expected_backend = canonicalize(&backend).expect("canonicalize backend directory");
-        match config.sandbox_policy {
-            SandboxPolicy::WorkspaceWrite { writable_roots, .. } => {
-                assert_eq!(
-                    writable_roots
-                        .iter()
-                        .filter(|root| **root == expected_backend)
-                        .count(),
-                    1,
-                    "expected single writable root entry for {}",
-                    expected_backend.display()
-                );
+        if cfg!(target_os = "windows") {
+            assert!(
+                config.forced_auto_mode_downgraded_on_windows,
+                "expected workspace-write request to be downgraded on Windows"
+            );
+            match config.sandbox_policy {
+                SandboxPolicy::ReadOnly => {}
+                other => panic!("expected read-only policy on Windows, got {other:?}"),
+            }
+        } else {
+            match config.sandbox_policy {
+                SandboxPolicy::WorkspaceWrite { writable_roots, .. } => {
+                    assert_eq!(
+                        writable_roots
+                            .iter()
+                            .filter(|root| **root == expected_backend)
+                            .count(),
+                        1,
+                        "expected single writable root entry for {}",
+                        expected_backend.display()
+                    );
+                }
+                other => panic!("expected workspace-write policy, got {other:?}"),
            }
-            other => panic!("expected workspace-write policy, got {other:?}"),
        }

        Ok(())
    }

+    #[test]
+    fn config_defaults_to_file_cli_auth_store_mode() -> std::io::Result<()> {
+        let codex_home = TempDir::new()?;
+        let cfg = ConfigToml::default();
+
+        let config = Config::load_from_base_config_with_overrides(
+            cfg,
+            ConfigOverrides::default(),
+            codex_home.path().to_path_buf(),
+        )?;
+
+        assert_eq!(
+            config.cli_auth_credentials_store_mode,
+            AuthCredentialsStoreMode::File,
+        );
+
+        Ok(())
+    }
+
+    #[test]
+    fn config_honors_explicit_keyring_auth_store_mode() -> std::io::Result<()> {
+        let codex_home = TempDir::new()?;
+        let cfg = ConfigToml {
+            cli_auth_credentials_store: Some(AuthCredentialsStoreMode::Keyring),
+            ..Default::default()
+        };
+
+        let config = Config::load_from_base_config_with_overrides(
+            cfg,
+            ConfigOverrides::default(),
+            codex_home.path().to_path_buf(),
+        )?;
+
+        assert_eq!(
+            config.cli_auth_credentials_store_mode,
+            AuthCredentialsStoreMode::Keyring,
+        );
+
+        Ok(())
+    }
+
    #[test]
    fn config_defaults_to_auto_oauth_store_mode() -> std::io::Result<()> {
        let codex_home = TempDir::new()?;
@@ -1755,6 +1909,81 @@ trust_level = "trusted"
        Ok(())
    }

+    #[test]
+    fn profile_sandbox_mode_overrides_base() -> std::io::Result<()> {
+        let codex_home = TempDir::new()?;
+        let mut profiles = HashMap::new();
+        profiles.insert(
+            "work".to_string(),
+            ConfigProfile {
+                sandbox_mode: Some(SandboxMode::DangerFullAccess),
+                ..Default::default()
+            },
+        );
+        let cfg = ConfigToml {
+            profiles,
+            profile: Some("work".to_string()),
+            sandbox_mode: Some(SandboxMode::ReadOnly),
+            ..Default::default()
+        };
+
+        let config = Config::load_from_base_config_with_overrides(
+            cfg,
+            ConfigOverrides::default(),
+            codex_home.path().to_path_buf(),
+        )?;
+
+        assert!(matches!(
+            config.sandbox_policy,
+            SandboxPolicy::DangerFullAccess
+        ));
+        assert!(config.did_user_set_custom_approval_policy_or_sandbox_mode);
+
+        Ok(())
+    }
+
+    #[test]
+    fn cli_override_takes_precedence_over_profile_sandbox_mode() -> std::io::Result<()> {
+        let codex_home = TempDir::new()?;
+        let mut profiles = HashMap::new();
+        profiles.insert(
+            "work".to_string(),
+            ConfigProfile {
+                sandbox_mode: Some(SandboxMode::DangerFullAccess),
+                ..Default::default()
+            },
+        );
+        let cfg = ConfigToml {
+            profiles,
+            profile: Some("work".to_string()),
+            ..Default::default()
+        };
+
+        let overrides = ConfigOverrides {
+            sandbox_mode: Some(SandboxMode::WorkspaceWrite),
+            ..Default::default()
+        };
+
+        let config = Config::load_from_base_config_with_overrides(
+            cfg,
+            overrides,
+            codex_home.path().to_path_buf(),
+        )?;
+
+        if cfg!(target_os = "windows") {
+            assert!(matches!(config.sandbox_policy, SandboxPolicy::ReadOnly));
+            assert!(config.forced_auto_mode_downgraded_on_windows);
+        } else {
+            assert!(matches!(
+                config.sandbox_policy,
+                SandboxPolicy::WorkspaceWrite { .. }
+            ));
+            assert!(!config.forced_auto_mode_downgraded_on_windows);
+        }
+
+        Ok(())
+    }
+
    #[test]
    fn feature_table_overrides_legacy_flags() -> std::io::Result<()> {
        let codex_home = TempDir::new()?;
@@ -2849,10 +3078,12 @@ model_verbosity = "high"
                approval_policy: AskForApproval::Never,
                sandbox_policy: SandboxPolicy::new_read_only_policy(),
                did_user_set_custom_approval_policy_or_sandbox_mode: true,
+                forced_auto_mode_downgraded_on_windows: false,
                shell_environment_policy: ShellEnvironmentPolicy::default(),
                user_instructions: None,
                notify: None,
                cwd: fixture.cwd(),
+                cli_auth_credentials_store_mode: Default::default(),
                mcp_servers: HashMap::new(),
                mcp_oauth_credentials_store_mode: Default::default(),
                model_providers: fixture.model_provider_map.clone(),
@@ -2873,6 +3104,7 @@ model_verbosity = "high"
                forced_login_method: None,
                include_apply_patch_tool: false,
                tools_web_search_request: false,
+                experimental_sandbox_command_assessment: false,
                use_experimental_streamable_shell_tool: false,
                use_experimental_unified_exec_tool: false,
                use_experimental_use_rmcp_client: false,
@@ -2917,10 +3149,12 @@ model_verbosity = "high"
            approval_policy: AskForApproval::UnlessTrusted,
            sandbox_policy: SandboxPolicy::new_read_only_policy(),
            did_user_set_custom_approval_policy_or_sandbox_mode: true,
+            forced_auto_mode_downgraded_on_windows: false,
            shell_environment_policy: ShellEnvironmentPolicy::default(),
            user_instructions: None,
            notify: None,
            cwd: fixture.cwd(),
+            cli_auth_credentials_store_mode: Default::default(),
            mcp_servers: HashMap::new(),
            mcp_oauth_credentials_store_mode: Default::default(),
            model_providers: fixture.model_provider_map.clone(),
@@ -2941,6 +3175,7 @@ model_verbosity = "high"
            forced_login_method: None,
            include_apply_patch_tool: false,
            tools_web_search_request: false,
+            experimental_sandbox_command_assessment: false,
            use_experimental_streamable_shell_tool: false,
            use_experimental_unified_exec_tool: false,
            use_experimental_use_rmcp_client: false,
@@ -3000,10 +3235,12 @@ model_verbosity = "high"
            approval_policy: AskForApproval::OnFailure,
            sandbox_policy: SandboxPolicy::new_read_only_policy(),
            did_user_set_custom_approval_policy_or_sandbox_mode: true,
+            forced_auto_mode_downgraded_on_windows: false,
            shell_environment_policy: ShellEnvironmentPolicy::default(),
            user_instructions: None,
            notify: None,
            cwd: fixture.cwd(),
+            cli_auth_credentials_store_mode: Default::default(),
            mcp_servers: HashMap::new(),
            mcp_oauth_credentials_store_mode: Default::default(),
            model_providers: fixture.model_provider_map.clone(),
@@ -3024,6 +3261,7 @@ model_verbosity = "high"
            forced_login_method: None,
            include_apply_patch_tool: false,
            tools_web_search_request: false,
+            experimental_sandbox_command_assessment: false,
            use_experimental_streamable_shell_tool: false,
            use_experimental_unified_exec_tool: false,
            use_experimental_use_rmcp_client: false,
@@ -3069,10 +3307,12 @@ model_verbosity = "high"
            approval_policy: AskForApproval::OnFailure,
            sandbox_policy: SandboxPolicy::new_read_only_policy(),
            did_user_set_custom_approval_policy_or_sandbox_mode: true,
+            forced_auto_mode_downgraded_on_windows: false,
            shell_environment_policy: ShellEnvironmentPolicy::default(),
            user_instructions: None,
            notify: None,
            cwd: fixture.cwd(),
+            cli_auth_credentials_store_mode: Default::default(),
            mcp_servers: HashMap::new(),
            mcp_oauth_credentials_store_mode: Default::default(),
            model_providers: fixture.model_provider_map.clone(),
@@ -3093,6 +3333,7 @@ model_verbosity = "high"
            forced_login_method: None,
            include_apply_patch_tool: false,
            tools_web_search_request: false,
+            experimental_sandbox_command_assessment: false,
            use_experimental_streamable_shell_tool: false,
            use_experimental_unified_exec_tool: false,
            use_experimental_use_rmcp_client: false,
--- a/codex-rs/core/src/config_profile.rs
+++ b/codex-rs/core/src/config_profile.rs
@@ -4,6 +4,7 @@ use std::path::PathBuf;
 use crate::protocol::AskForApproval;
 use codex_protocol::config_types::ReasoningEffort;
 use codex_protocol::config_types::ReasoningSummary;
+use codex_protocol::config_types::SandboxMode;
 use codex_protocol::config_types::Verbosity;

 /// Collection of common configuration options that a user can define as a unit
@@ -15,6 +16,7 @@ pub struct ConfigProfile {
    /// [`ModelProviderInfo`] to use.
    pub model_provider: Option<String>,
    pub approval_policy: Option<AskForApproval>,
+    pub sandbox_mode: Option<SandboxMode>,
    pub model_reasoning_effort: Option<ReasoningEffort>,
    pub model_reasoning_summary: Option<ReasoningSummary>,
    pub model_verbosity: Option<Verbosity>,
@@ -26,6 +28,7 @@ pub struct ConfigProfile {
    pub experimental_use_exec_command_tool: Option<bool>,
    pub experimental_use_rmcp_client: Option<bool>,
    pub experimental_use_freeform_apply_patch: Option<bool>,
+    pub experimental_sandbox_command_assessment: Option<bool>,
    pub tools_web_search: Option<bool>,
    pub tools_view_image: Option<bool>,
    /// Optional feature toggles scoped to this profile.
--- a/codex-rs/core/src/conversation_history.rs
+++ b/codex-rs/core/src/conversation_history.rs
@@ -1,9 +1,20 @@
+use codex_protocol::models::FunctionCallOutputContentItem;
 use codex_protocol::models::FunctionCallOutputPayload;
 use codex_protocol::models::ResponseItem;
 use codex_protocol::protocol::TokenUsage;
 use codex_protocol::protocol::TokenUsageInfo;
+use codex_utils_string::take_bytes_at_char_boundary;
+use codex_utils_string::take_last_bytes_at_char_boundary;
+use std::ops::Deref;
 use tracing::error;

+// Model-formatting limits: clients get full streams; only content sent to the model is truncated.
+pub(crate) const MODEL_FORMAT_MAX_BYTES: usize = 10 * 1024; // 10 KiB
+pub(crate) const MODEL_FORMAT_MAX_LINES: usize = 256; // lines
+pub(crate) const MODEL_FORMAT_HEAD_LINES: usize = MODEL_FORMAT_MAX_LINES / 2;
+pub(crate) const MODEL_FORMAT_TAIL_LINES: usize = MODEL_FORMAT_MAX_LINES - MODEL_FORMAT_HEAD_LINES; // 128
+pub(crate) const MODEL_FORMAT_HEAD_BYTES: usize = MODEL_FORMAT_MAX_BYTES / 2;
+
 /// Transcript of conversation history
 #[derive(Debug, Clone, Default)]
 pub(crate) struct ConversationHistory {
@@ -40,11 +51,14 @@ impl ConversationHistory {
        I::Item: std::ops::Deref<Target = ResponseItem>,
    {
        for item in items {
-            if !is_api_message(&item) {
+            let item_ref = item.deref();
+            let is_ghost_snapshot = matches!(item_ref, ResponseItem::GhostSnapshot { .. });
+            if !is_api_message(item_ref) && !is_ghost_snapshot {
                continue;
            }

-            self.items.push(item.clone());
+            let processed = Self::process_item(&item);
+            self.items.push(processed);
        }
    }

@@ -65,6 +79,22 @@ impl ConversationHistory {
        }
    }

+    pub(crate) fn replace(&mut self, items: Vec<ResponseItem>) {
+        self.items = items;
+    }
+
+    pub(crate) fn update_token_info(
+        &mut self,
+        usage: &TokenUsage,
+        model_context_window: Option<i64>,
+    ) {
+        self.token_info = TokenUsageInfo::new_or_append(
+            &self.token_info,
+            &Some(usage.clone()),
+            model_context_window,
+        );
+    }
+
    /// This function enforces a couple of invariants on the in-memory history:
    /// 1. every call (function/custom) has a corresponding output entry
    /// 2. every output has a corresponding call entry
@@ -107,7 +137,7 @@ impl ConversationHistory {
                                call_id: call_id.clone(),
                                output: FunctionCallOutputPayload {
                                    content: "aborted".to_string(),
-                                    success: None,
+                                    ..Default::default()
                                },
                            },
                        ));
@@ -154,7 +184,7 @@ impl ConversationHistory {
                                    call_id: call_id.clone(),
                                    output: FunctionCallOutputPayload {
                                        content: "aborted".to_string(),
-                                        success: None,
+                                        ..Default::default()
                                    },
                                },
                            ));
@@ -165,6 +195,7 @@ impl ConversationHistory {
                | ResponseItem::WebSearchCall { .. }
                | ResponseItem::FunctionCallOutput { .. }
                | ResponseItem::CustomToolCallOutput { .. }
+                | ResponseItem::GhostSnapshot { .. }
                | ResponseItem::Other
                | ResponseItem::Message { .. } => {
                    // nothing to do for these variants
@@ -231,6 +262,7 @@ impl ConversationHistory {
                | ResponseItem::LocalShellCall { .. }
                | ResponseItem::Reasoning { .. }
                | ResponseItem::WebSearchCall { .. }
+                | ResponseItem::GhostSnapshot { .. }
                | ResponseItem::Other
                | ResponseItem::Message { .. } => {
                    // nothing to do for these variants
@@ -248,10 +280,6 @@ impl ConversationHistory {
        }
    }

-    pub(crate) fn replace(&mut self, items: Vec<ResponseItem>) {
-        self.items = items;
-    }
-
    /// Removes the corresponding paired item for the provided `item`, if any.
    ///
    /// Pairs:
@@ -321,19 +349,126 @@ impl ConversationHistory {
        }
    }

-    pub(crate) fn update_token_info(
-        &mut self,
-        usage: &TokenUsage,
-        model_context_window: Option<i64>,
-    ) {
-        self.token_info = TokenUsageInfo::new_or_append(
-            &self.token_info,
-            &Some(usage.clone()),
-            model_context_window,
-        );
+    fn process_item(item: &ResponseItem) -> ResponseItem {
+        match item {
+            ResponseItem::FunctionCallOutput { call_id, output } => {
+                let truncated = format_output_for_model_body(output.content.as_str());
+                let truncated_items = output.content_items.as_ref().map(|items| {
+                    items
+                        .iter()
+                        .map(|it| match it {
+                            FunctionCallOutputContentItem::InputText { text } => {
+                                FunctionCallOutputContentItem::InputText {
+                                    text: format_output_for_model_body(text),
+                                }
+                            }
+                            FunctionCallOutputContentItem::InputImage { image_url } => {
+                                FunctionCallOutputContentItem::InputImage {
+                                    image_url: image_url.clone(),
+                                }
+                            }
+                        })
+                        .collect()
+                });
+                ResponseItem::FunctionCallOutput {
+                    call_id: call_id.clone(),
+                    output: FunctionCallOutputPayload {
+                        content: truncated,
+                        content_items: truncated_items,
+                        success: output.success,
+                    },
+                }
+            }
+            ResponseItem::CustomToolCallOutput { call_id, output } => {
+                let truncated = format_output_for_model_body(output);
+                ResponseItem::CustomToolCallOutput {
+                    call_id: call_id.clone(),
+                    output: truncated,
+                }
+            }
+            ResponseItem::Message { .. }
+            | ResponseItem::Reasoning { .. }
+            | ResponseItem::LocalShellCall { .. }
+            | ResponseItem::FunctionCall { .. }
+            | ResponseItem::WebSearchCall { .. }
+            | ResponseItem::CustomToolCall { .. }
+            | ResponseItem::GhostSnapshot { .. }
+            | ResponseItem::Other => item.clone(),
+        }
    }
 }

+pub(crate) fn format_output_for_model_body(content: &str) -> String {
+    // Head+tail truncation for the model: show the beginning and end with an elision.
+    // Clients still receive full streams; only this formatted summary is capped.
+    let total_lines = content.lines().count();
+    if content.len() <= MODEL_FORMAT_MAX_BYTES && total_lines <= MODEL_FORMAT_MAX_LINES {
+        return content.to_string();
+    }
+    let output = truncate_formatted_exec_output(content, total_lines);
+    format!("Total output lines: {total_lines}\n\n{output}")
+}
+
+fn truncate_formatted_exec_output(content: &str, total_lines: usize) -> String {
+    let segments: Vec<&str> = content.split_inclusive('\n').collect();
+    let head_take = MODEL_FORMAT_HEAD_LINES.min(segments.len());
+    let tail_take = MODEL_FORMAT_TAIL_LINES.min(segments.len().saturating_sub(head_take));
+    let omitted = segments.len().saturating_sub(head_take + tail_take);
+
+    let head_slice_end: usize = segments
+        .iter()
+        .take(head_take)
+        .map(|segment| segment.len())
+        .sum();
+    let tail_slice_start: usize = if tail_take == 0 {
+        content.len()
+    } else {
+        content.len()
+            - segments
+                .iter()
+                .rev()
+                .take(tail_take)
+                .map(|segment| segment.len())
+                .sum::<usize>()
+    };
+    let head_slice = &content[..head_slice_end];
+    let tail_slice = &content[tail_slice_start..];
+    let truncated_by_bytes = content.len() > MODEL_FORMAT_MAX_BYTES;
+    // this is a bit wrong. We are counting metadata lines and not just shell output lines.
+    let marker = if omitted > 0 {
+        Some(format!(
+            "\n[... omitted {omitted} of {total_lines} lines ...]\n\n"
+        ))
+    } else if truncated_by_bytes {
+        Some(format!(
+            "\n[... output truncated to fit {MODEL_FORMAT_MAX_BYTES} bytes ...]\n\n"
+        ))
+    } else {
+        None
+    };
+
+    let marker_len = marker.as_ref().map_or(0, String::len);
+    let base_head_budget = MODEL_FORMAT_HEAD_BYTES.min(MODEL_FORMAT_MAX_BYTES);
+    let head_budget = base_head_budget.min(MODEL_FORMAT_MAX_BYTES.saturating_sub(marker_len));
+    let head_part = take_bytes_at_char_boundary(head_slice, head_budget);
+    let mut result = String::with_capacity(MODEL_FORMAT_MAX_BYTES.min(content.len()));
+
+    result.push_str(head_part);
+    if let Some(marker_text) = marker.as_ref() {
+        result.push_str(marker_text);
+    }
+
+    let remaining = MODEL_FORMAT_MAX_BYTES.saturating_sub(result.len());
+    if remaining == 0 {
+        return result;
+    }
+
+    let tail_part = take_last_bytes_at_char_boundary(tail_slice, remaining);
+    result.push_str(tail_part);
+
+    result
+}
+
 #[inline]
 fn error_or_panic(message: String) {
    if cfg!(debug_assertions) || env!("CARGO_PKG_VERSION").contains("alpha") {
@@ -355,6 +490,7 @@ fn is_api_message(message: &ResponseItem) -> bool {
        | ResponseItem::LocalShellCall { .. }
        | ResponseItem::Reasoning { .. }
        | ResponseItem::WebSearchCall { .. } => true,
+        ResponseItem::GhostSnapshot { .. } => false,
        ResponseItem::Other => false,
    }
 }
@@ -448,7 +584,7 @@ mod tests {
                call_id: "call-1".to_string(),
                output: FunctionCallOutputPayload {
                    content: "ok".to_string(),
-                    success: None,
+                    ..Default::default()
                },
            },
        ];
@@ -464,7 +600,7 @@ mod tests {
                call_id: "call-2".to_string(),
                output: FunctionCallOutputPayload {
                    content: "ok".to_string(),
-                    success: None,
+                    ..Default::default()
                },
            },
            ResponseItem::FunctionCall {
@@ -498,7 +634,7 @@ mod tests {
                call_id: "call-3".to_string(),
                output: FunctionCallOutputPayload {
                    content: "ok".to_string(),
-                    success: None,
+                    ..Default::default()
                },
            },
        ];
@@ -527,6 +663,184 @@ mod tests {
        assert_eq!(h.contents(), vec![]);
    }

+    #[test]
+    fn record_items_truncates_function_call_output_content() {
+        let mut history = ConversationHistory::new();
+        let long_line = "a very long line to trigger truncation\n";
+        let long_output = long_line.repeat(2_500);
+        let item = ResponseItem::FunctionCallOutput {
+            call_id: "call-100".to_string(),
+            output: FunctionCallOutputPayload {
+                content: long_output.clone(),
+                success: Some(true),
+                ..Default::default()
+            },
+        };
+
+        history.record_items([&item]);
+
+        assert_eq!(history.items.len(), 1);
+        match &history.items[0] {
+            ResponseItem::FunctionCallOutput { output, .. } => {
+                assert_ne!(output.content, long_output);
+                assert!(
+                    output.content.starts_with("Total output lines:"),
+                    "expected truncated summary, got {}",
+                    output.content
+                );
+            }
+            other => panic!("unexpected history item: {other:?}"),
+        }
+    }
+
+    #[test]
+    fn record_items_truncates_custom_tool_call_output_content() {
+        let mut history = ConversationHistory::new();
+        let line = "custom output that is very long\n";
+        let long_output = line.repeat(2_500);
+        let item = ResponseItem::CustomToolCallOutput {
+            call_id: "tool-200".to_string(),
+            output: long_output.clone(),
+        };
+
+        history.record_items([&item]);
+
+        assert_eq!(history.items.len(), 1);
+        match &history.items[0] {
+            ResponseItem::CustomToolCallOutput { output, .. } => {
+                assert_ne!(output, &long_output);
+                assert!(
+                    output.starts_with("Total output lines:"),
+                    "expected truncated summary, got {output}"
+                );
+            }
+            other => panic!("unexpected history item: {other:?}"),
+        }
+    }
+
+    // The following tests were adapted from tools::mod truncation tests to
+    // target the new truncation functions in conversation_history.
+
+    use regex_lite::Regex;
+
+    fn assert_truncated_message_matches(message: &str, line: &str, total_lines: usize) {
+        let pattern = truncated_message_pattern(line, total_lines);
+        let regex = Regex::new(&pattern).unwrap_or_else(|err| {
+            panic!("failed to compile regex {pattern}: {err}");
+        });
+        let captures = regex
+            .captures(message)
+            .unwrap_or_else(|| panic!("message failed to match pattern {pattern}: {message}"));
+        let body = captures
+            .name("body")
+            .expect("missing body capture")
+            .as_str();
+        assert!(
+            body.len() <= MODEL_FORMAT_MAX_BYTES,
+            "body exceeds byte limit: {} bytes",
+            body.len()
+        );
+    }
+
+    fn truncated_message_pattern(line: &str, total_lines: usize) -> String {
+        let head_take = MODEL_FORMAT_HEAD_LINES.min(total_lines);
+        let tail_take = MODEL_FORMAT_TAIL_LINES.min(total_lines.saturating_sub(head_take));
+        let omitted = total_lines.saturating_sub(head_take + tail_take);
+        let escaped_line = regex_lite::escape(line);
+        if omitted == 0 {
+            return format!(
+                r"(?s)^Total output lines: {total_lines}\n\n(?P<body>{escaped_line}.*\n\[\.{{3}} output truncated to fit {MODEL_FORMAT_MAX_BYTES} bytes \.{{3}}]\n\n.*)$",
+            );
+        }
+        format!(
+            r"(?s)^Total output lines: {total_lines}\n\n(?P<body>{escaped_line}.*\n\[\.{{3}} omitted {omitted} of {total_lines} lines \.{{3}}]\n\n.*)$",
+        )
+    }
+
+    #[test]
+    fn format_exec_output_truncates_large_error() {
+        let line = "very long execution error line that should trigger truncation\n";
+        let large_error = line.repeat(2_500); // way beyond both byte and line limits
+
+        let truncated = format_output_for_model_body(&large_error);
+
+        let total_lines = large_error.lines().count();
+        assert_truncated_message_matches(&truncated, line, total_lines);
+        assert_ne!(truncated, large_error);
+    }
+
+    #[test]
+    fn format_exec_output_marks_byte_truncation_without_omitted_lines() {
+        let long_line = "a".repeat(MODEL_FORMAT_MAX_BYTES + 50);
+        let truncated = format_output_for_model_body(&long_line);
+
+        assert_ne!(truncated, long_line);
+        let marker_line =
+            format!("[... output truncated to fit {MODEL_FORMAT_MAX_BYTES} bytes ...]");
+        assert!(
+            truncated.contains(&marker_line),
+            "missing byte truncation marker: {truncated}"
+        );
+        assert!(
+            !truncated.contains("omitted"),
+            "line omission marker should not appear when no lines were dropped: {truncated}"
+        );
+    }
+
+    #[test]
+    fn format_exec_output_returns_original_when_within_limits() {
+        let content = "example output\n".repeat(10);
+
+        assert_eq!(format_output_for_model_body(&content), content);
+    }
+
+    #[test]
+    fn format_exec_output_reports_omitted_lines_and_keeps_head_and_tail() {
+        let total_lines = MODEL_FORMAT_MAX_LINES + 100;
+        let content: String = (0..total_lines)
+            .map(|idx| format!("line-{idx}\n"))
+            .collect();
+
+        let truncated = format_output_for_model_body(&content);
+        let omitted = total_lines - MODEL_FORMAT_MAX_LINES;
+        let expected_marker = format!("[... omitted {omitted} of {total_lines} lines ...]");
+
+        assert!(
+            truncated.contains(&expected_marker),
+            "missing omitted marker: {truncated}"
+        );
+        assert!(
+            truncated.contains("line-0\n"),
+            "expected head line to remain: {truncated}"
+        );
+
+        let last_line = format!("line-{}\n", total_lines - 1);
+        assert!(
+            truncated.contains(&last_line),
+            "expected tail line to remain: {truncated}"
+        );
+    }
+
+    #[test]
+    fn format_exec_output_prefers_line_marker_when_both_limits_exceeded() {
+        let total_lines = MODEL_FORMAT_MAX_LINES + 42;
+        let long_line = "x".repeat(256);
+        let content: String = (0..total_lines)
+            .map(|idx| format!("line-{idx}-{long_line}\n"))
+            .collect();
+
+        let truncated = format_output_for_model_body(&content);
+
+        assert!(
+            truncated.contains("[... omitted 42 of 298 lines ...]"),
+            "expected omitted marker when line count exceeds limit: {truncated}"
+        );
+        assert!(
+            !truncated.contains("output truncated to fit"),
+            "line omission marker should take precedence over byte marker: {truncated}"
+        );
+    }
+
    //TODO(aibrahim): run CI in release mode.
    #[cfg(not(debug_assertions))]
    #[test]
@@ -554,7 +868,7 @@ mod tests {
                    call_id: "call-x".to_string(),
                    output: FunctionCallOutputPayload {
                        content: "aborted".to_string(),
-                        success: None,
+                        ..Default::default()
                    },
                },
            ]
@@ -631,7 +945,7 @@ mod tests {
                    call_id: "shell-1".to_string(),
                    output: FunctionCallOutputPayload {
                        content: "aborted".to_string(),
-                        success: None,
+                        ..Default::default()
                    },
                },
            ]
@@ -645,7 +959,7 @@ mod tests {
            call_id: "orphan-1".to_string(),
            output: FunctionCallOutputPayload {
                content: "ok".to_string(),
-                success: None,
+                ..Default::default()
            },
        }];
        let mut h = create_history_with_items(items);
@@ -685,7 +999,7 @@ mod tests {
                call_id: "c2".to_string(),
                output: FunctionCallOutputPayload {
                    content: "ok".to_string(),
-                    success: None,
+                    ..Default::default()
                },
            },
            // Will get an inserted custom tool output
@@ -727,7 +1041,7 @@ mod tests {
                    call_id: "c1".to_string(),
                    output: FunctionCallOutputPayload {
                        content: "aborted".to_string(),
-                        success: None,
+                        ..Default::default()
                    },
                },
                ResponseItem::CustomToolCall {
@@ -757,7 +1071,7 @@ mod tests {
                    call_id: "s1".to_string(),
                    output: FunctionCallOutputPayload {
                        content: "aborted".to_string(),
-                        success: None,
+                        ..Default::default()
                    },
                },
            ]
@@ -822,7 +1136,7 @@ mod tests {
            call_id: "orphan-1".to_string(),
            output: FunctionCallOutputPayload {
                content: "ok".to_string(),
-                success: None,
+                ..Default::default()
            },
        }];
        let mut h = create_history_with_items(items);
@@ -856,7 +1170,7 @@ mod tests {
                call_id: "c2".to_string(),
                output: FunctionCallOutputPayload {
                    content: "ok".to_string(),
-                    success: None,
+                    ..Default::default()
                },
            },
            ResponseItem::CustomToolCall {
--- a/codex-rs/core/src/conversation_manager.rs
+++ b/codex-rs/core/src/conversation_manager.rs
@@ -98,7 +98,10 @@ impl ConversationManager {
            }
        };

-        let conversation = Arc::new(CodexConversation::new(codex));
+        let conversation = Arc::new(CodexConversation::new(
+            codex,
+            session_configured.rollout_path.clone(),
+        ));
        self.conversations
            .write()
            .await
--- a/codex-rs/core/src/error.rs
+++ b/codex-rs/core/src/error.rs
@@ -55,7 +55,7 @@ pub enum SandboxErr {
 #[derive(Error, Debug)]
 pub enum CodexErr {
    // todo(aibrahim): git rid of this error carrying the dangling artifacts
-    #[error("turn aborted")]
+    #[error("turn aborted. Something went wrong? Hit `/feedback` to report the issue.")]
    TurnAborted {
        dangling_artifacts: Vec<ProcessedResponseItem>,
    },
@@ -91,7 +91,7 @@ pub enum CodexErr {

    /// Returned by run_command_stream when the user pressed Ctrl‑C (SIGINT). Session uses this to
    /// surface a polite FunctionCallOutput back to the model instead of crashing the CLI.
-    #[error("interrupted (Ctrl-C)")]
+    #[error("interrupted (Ctrl-C). Something went wrong? Hit `/feedback` to report the issue.")]
    Interrupted,

    /// Unexpected HTTP status code.
--- a/codex-rs/core/src/features.rs
+++ b/codex-rs/core/src/features.rs
@@ -39,6 +39,10 @@ pub enum Feature {
    ViewImageTool,
    /// Allow the model to request web searches.
    WebSearchRequest,
+    /// Enable the model-based risk assessments for sandboxed commands.
+    SandboxCommandAssessment,
+    /// Create a ghost commit at each turn.
+    GhostCommit,
 }

 impl Feature {
@@ -73,6 +77,7 @@ pub struct FeatureOverrides {
    pub include_apply_patch_tool: Option<bool>,
    pub include_view_image_tool: Option<bool>,
    pub web_search_request: Option<bool>,
+    pub experimental_sandbox_command_assessment: Option<bool>,
 }

 impl FeatureOverrides {
@@ -137,6 +142,7 @@ impl Features {
        let mut features = Features::with_defaults();

        let base_legacy = LegacyFeatureToggles {
+            experimental_sandbox_command_assessment: cfg.experimental_sandbox_command_assessment,
            experimental_use_freeform_apply_patch: cfg.experimental_use_freeform_apply_patch,
            experimental_use_exec_command_tool: cfg.experimental_use_exec_command_tool,
            experimental_use_unified_exec_tool: cfg.experimental_use_unified_exec_tool,
@@ -154,6 +160,8 @@ impl Features {
        let profile_legacy = LegacyFeatureToggles {
            include_apply_patch_tool: config_profile.include_apply_patch_tool,
            include_view_image_tool: config_profile.include_view_image_tool,
+            experimental_sandbox_command_assessment: config_profile
+                .experimental_sandbox_command_assessment,
            experimental_use_freeform_apply_patch: config_profile
                .experimental_use_freeform_apply_patch,
            experimental_use_exec_command_tool: config_profile.experimental_use_exec_command_tool,
@@ -183,6 +191,11 @@ fn feature_for_key(key: &str) -> Option<Feature> {
    legacy::feature_for_key(key)
 }

+/// Returns `true` if the provided string matches a known feature toggle key.
+pub fn is_known_feature_key(key: &str) -> bool {
+    feature_for_key(key).is_some()
+}
+
 /// Deserializable features table for TOML.
 #[derive(Deserialize, Debug, Clone, Default, PartialEq)]
 pub struct FeaturesToml {
@@ -236,4 +249,16 @@ pub const FEATURES: &[FeatureSpec] = &[
        stage: Stage::Stable,
        default_enabled: false,
    },
+    FeatureSpec {
+        id: Feature::SandboxCommandAssessment,
+        key: "experimental_sandbox_command_assessment",
+        stage: Stage::Experimental,
+        default_enabled: false,
+    },
+    FeatureSpec {
+        id: Feature::GhostCommit,
+        key: "ghost_commit",
+        stage: Stage::Experimental,
+        default_enabled: false,
+    },
 ];
--- a/codex-rs/core/src/features/legacy.rs
+++ b/codex-rs/core/src/features/legacy.rs
@@ -9,6 +9,10 @@ struct Alias {
 }

 const ALIASES: &[Alias] = &[
+    Alias {
+        legacy_key: "experimental_sandbox_command_assessment",
+        feature: Feature::SandboxCommandAssessment,
+    },
    Alias {
        legacy_key: "experimental_use_unified_exec_tool",
        feature: Feature::UnifiedExec,
@@ -53,6 +57,7 @@ pub(crate) fn feature_for_key(key: &str) -> Option<Feature> {
 pub struct LegacyFeatureToggles {
    pub include_apply_patch_tool: Option<bool>,
    pub include_view_image_tool: Option<bool>,
+    pub experimental_sandbox_command_assessment: Option<bool>,
    pub experimental_use_freeform_apply_patch: Option<bool>,
    pub experimental_use_exec_command_tool: Option<bool>,
    pub experimental_use_unified_exec_tool: Option<bool>,
@@ -69,6 +74,12 @@ impl LegacyFeatureToggles {
            self.include_apply_patch_tool,
            "include_apply_patch_tool",
        );
+        set_if_some(
+            features,
+            Feature::SandboxCommandAssessment,
+            self.experimental_sandbox_command_assessment,
+            "experimental_sandbox_command_assessment",
+        );
        set_if_some(
            features,
            Feature::ApplyPatchFreeform,
--- a/codex-rs/core/src/lib.rs
+++ b/codex-rs/core/src/lib.rs
@@ -77,6 +77,7 @@ pub use rollout::find_conversation_path_by_id_str;
 pub use rollout::list::ConversationItem;
 pub use rollout::list::ConversationsPage;
 pub use rollout::list::Cursor;
+pub use rollout::list::read_head_for_summary;
 mod function_tool;
 mod state;
 mod tasks;
--- a/codex-rs/core/src/mcp_tool_call.rs
+++ b/codex-rs/core/src/mcp_tool_call.rs
@@ -35,6 +35,7 @@ pub(crate) async fn handle_mcp_tool_call(
                    output: FunctionCallOutputPayload {
                        content: format!("err: {e}"),
                        success: Some(false),
+                        ..Default::default()
                    },
                };
            }
--- a/codex-rs/core/src/model_family.rs
+++ b/codex-rs/core/src/model_family.rs
@@ -54,6 +54,9 @@ pub struct ModelFamily {
    /// This is applied when computing the effective context window seen by
    /// consumers.
    pub effective_context_window_percent: i64,
+
+    /// If the model family supports setting the verbosity level when using Responses API.
+    pub support_verbosity: bool,
 }

 macro_rules! model_family {
@@ -73,6 +76,7 @@ macro_rules! model_family {
            base_instructions: BASE_INSTRUCTIONS.to_string(),
            experimental_supported_tools: Vec::new(),
            effective_context_window_percent: 95,
+            support_verbosity: false,
        };
        // apply overrides
        $(
@@ -128,10 +132,11 @@ pub fn find_family_for_model(slug: &str) -> Option<ModelFamily> {
                "test_sync_tool".to_string(),
            ],
            supports_parallel_tool_calls: true,
+            support_verbosity: true,
        )

    // Internal models.
-    } else if slug.starts_with("codex-") {
+    } else if slug.starts_with("codex-exp-") {
        model_family!(
            slug, slug,
            supports_reasoning_summaries: true,
@@ -144,22 +149,25 @@ pub fn find_family_for_model(slug: &str) -> Option<ModelFamily> {
                "read_file".to_string(),
            ],
            supports_parallel_tool_calls: true,
+            support_verbosity: true,
        )

    // Production models.
-    } else if slug.starts_with("gpt-5-codex") {
+    } else if slug.starts_with("gpt-5-codex") || slug.starts_with("codex-") {
        model_family!(
            slug, slug,
            supports_reasoning_summaries: true,
            reasoning_summary_format: ReasoningSummaryFormat::Experimental,
            base_instructions: GPT_5_CODEX_INSTRUCTIONS.to_string(),
            apply_patch_tool_type: Some(ApplyPatchToolType::Freeform),
+            support_verbosity: true,
        )
    } else if slug.starts_with("gpt-5") {
        model_family!(
            slug, "gpt-5",
            supports_reasoning_summaries: true,
            needs_special_apply_patch_instructions: true,
+            support_verbosity: true,
        )
    } else {
        None
@@ -179,5 +187,6 @@ pub fn derive_default_model_family(model: &str) -> ModelFamily {
        base_instructions: BASE_INSTRUCTIONS.to_string(),
        experimental_supported_tools: Vec::new(),
        effective_context_window_percent: 95,
+        support_verbosity: false,
    }
 }
--- a/codex-rs/core/src/response_processing.rs
+++ b/codex-rs/core/src/response_processing.rs
@@ -1,4 +1,5 @@
 use crate::codex::Session;
+use crate::codex::TurnContext;
 use crate::conversation_history::ConversationHistory;
 use codex_protocol::models::FunctionCallOutputPayload;
 use codex_protocol::models::ResponseInputItem;
@@ -13,6 +14,7 @@ pub(crate) async fn process_items(
    is_review_mode: bool,
    review_thread_history: &mut ConversationHistory,
    sess: &Session,
+    turn_context: &TurnContext,
 ) -> (Vec<ResponseInputItem>, Vec<ResponseItem>) {
    let mut items_to_record_in_conversation_history = Vec::<ResponseItem>::new();
    let mut responses = Vec::<ResponseInputItem>::new();
@@ -59,14 +61,11 @@ pub(crate) async fn process_items(
            ) => {
                items_to_record_in_conversation_history.push(item);
                let output = match result {
-                    Ok(call_tool_result) => {
-                        crate::codex::convert_call_tool_result_to_function_call_output_payload(
-                            call_tool_result,
-                        )
-                    }
+                    Ok(call_tool_result) => FunctionCallOutputPayload::from(call_tool_result),
                    Err(err) => FunctionCallOutputPayload {
                        content: err.clone(),
                        success: Some(false),
+                        ..Default::default()
                    },
                };
                items_to_record_in_conversation_history.push(ResponseItem::FunctionCallOutput {
@@ -104,7 +103,7 @@ pub(crate) async fn process_items(
        if is_review_mode {
            review_thread_history.record_items(items_to_record_in_conversation_history.iter());
        } else {
-            sess.record_conversation_items(&items_to_record_in_conversation_history)
+            sess.record_conversation_items(turn_context, &items_to_record_in_conversation_history)
                .await;
        }
    }
--- a/codex-rs/core/src/rollout/list.rs
+++ b/codex-rs/core/src/rollout/list.rs
@@ -54,6 +54,7 @@ struct HeadTailSummary {
    saw_session_meta: bool,
    saw_user_event: bool,
    source: Option<SessionSource>,
+    model_provider: Option<String>,
    created_at: Option<String>,
    updated_at: Option<String>,
 }
@@ -109,6 +110,8 @@ pub(crate) async fn get_conversations(
    page_size: usize,
    cursor: Option<&Cursor>,
    allowed_sources: &[SessionSource],
+    model_providers: Option<&[String]>,
+    default_provider: &str,
 ) -> io::Result<ConversationsPage> {
    let mut root = codex_home.to_path_buf();
    root.push(SESSIONS_SUBDIR);
@@ -124,8 +127,17 @@ pub(crate) async fn get_conversations(

    let anchor = cursor.cloned();

-    let result =
-        traverse_directories_for_paths(root.clone(), page_size, anchor, allowed_sources).await?;
+    let provider_matcher =
+        model_providers.and_then(|filters| ProviderMatcher::new(filters, default_provider));
+
+    let result = traverse_directories_for_paths(
+        root.clone(),
+        page_size,
+        anchor,
+        allowed_sources,
+        provider_matcher.as_ref(),
+    )
+    .await?;
    Ok(result)
 }

@@ -145,6 +157,7 @@ async fn traverse_directories_for_paths(
    page_size: usize,
    anchor: Option<Cursor>,
    allowed_sources: &[SessionSource],
+    provider_matcher: Option<&ProviderMatcher<'_>>,
 ) -> io::Result<ConversationsPage> {
    let mut items: Vec<ConversationItem> = Vec::with_capacity(page_size);
    let mut scanned_files = 0usize;
@@ -153,6 +166,7 @@ async fn traverse_directories_for_paths(
        Some(c) => (c.ts, c.id),
        None => (OffsetDateTime::UNIX_EPOCH, Uuid::nil()),
    };
+    let mut more_matches_available = false;

    let year_dirs = collect_dirs_desc(&root, |s| s.parse::<u16>().ok()).await?;

@@ -184,6 +198,7 @@ async fn traverse_directories_for_paths(
                for (ts, sid, _name_str, path) in day_files.into_iter() {
                    scanned_files += 1;
                    if scanned_files >= MAX_SCAN_FILES && items.len() >= page_size {
+                        more_matches_available = true;
                        break 'outer;
                    }
                    if !anchor_passed {
@@ -194,6 +209,7 @@ async fn traverse_directories_for_paths(
                        }
                    }
                    if items.len() == page_size {
+                        more_matches_available = true;
                        break 'outer;
                    }
                    // Read head and simultaneously detect message events within the same
@@ -208,6 +224,11 @@ async fn traverse_directories_for_paths(
                    {
                        continue;
                    }
+                    if let Some(matcher) = provider_matcher
+                        && !matcher.matches(summary.model_provider.as_deref())
+                    {
+                        continue;
+                    }
                    // Apply filters: must have session meta and at least one user message event
                    if summary.saw_session_meta && summary.saw_user_event {
                        let HeadTailSummary {
@@ -231,12 +252,21 @@ async fn traverse_directories_for_paths(
        }
    }

-    let next = build_next_cursor(&items);
+    let reached_scan_cap = scanned_files >= MAX_SCAN_FILES;
+    if reached_scan_cap && !items.is_empty() {
+        more_matches_available = true;
+    }
+
+    let next = if more_matches_available {
+        build_next_cursor(&items)
+    } else {
+        None
+    };
    Ok(ConversationsPage {
        items,
        next_cursor: next,
        num_scanned_files: scanned_files,
-        reached_scan_cap: scanned_files >= MAX_SCAN_FILES,
+        reached_scan_cap,
    })
 }

@@ -328,6 +358,32 @@ fn parse_timestamp_uuid_from_filename(name: &str) -> Option<(OffsetDateTime, Uui
    Some((ts, uuid))
 }

+struct ProviderMatcher<'a> {
+    filters: &'a [String],
+    matches_default_provider: bool,
+}
+
+impl<'a> ProviderMatcher<'a> {
+    fn new(filters: &'a [String], default_provider: &'a str) -> Option<Self> {
+        if filters.is_empty() {
+            return None;
+        }
+
+        let matches_default_provider = filters.iter().any(|provider| provider == default_provider);
+        Some(Self {
+            filters,
+            matches_default_provider,
+        })
+    }
+
+    fn matches(&self, session_provider: Option<&str>) -> bool {
+        match session_provider {
+            Some(provider) => self.filters.iter().any(|candidate| candidate == provider),
+            None => self.matches_default_provider,
+        }
+    }
+}
+
 async fn read_head_and_tail(
    path: &Path,
    head_limit: usize,
@@ -354,6 +410,7 @@ async fn read_head_and_tail(
        match rollout_line.item {
            RolloutItem::SessionMeta(session_meta_line) => {
                summary.source = Some(session_meta_line.meta.source);
+                summary.model_provider = session_meta_line.meta.model_provider.clone();
                summary.created_at = summary
                    .created_at
                    .clone()
@@ -394,6 +451,13 @@ async fn read_head_and_tail(
    Ok(summary)
 }

+/// Read up to `HEAD_RECORD_LIMIT` records from the start of the rollout file at `path`.
+/// This should be enough to produce a summary including the session meta line.
+pub async fn read_head_for_summary(path: &Path) -> io::Result<Vec<serde_json::Value>> {
+    let summary = read_head_and_tail(path, HEAD_RECORD_LIMIT, 0).await?;
+    Ok(summary.head)
+}
+
 async fn read_tail_records(
    path: &Path,
    max_records: usize,
--- a/codex-rs/core/src/rollout/policy.rs
+++ b/codex-rs/core/src/rollout/policy.rs
@@ -26,7 +26,8 @@ pub(crate) fn should_persist_response_item(item: &ResponseItem) -> bool {
        | ResponseItem::FunctionCallOutput { .. }
        | ResponseItem::CustomToolCall { .. }
        | ResponseItem::CustomToolCallOutput { .. }
-        | ResponseItem::WebSearchCall { .. } => true,
+        | ResponseItem::WebSearchCall { .. }
+        | ResponseItem::GhostSnapshot { .. } => true,
        ResponseItem::Other => false,
    }
 }
@@ -42,6 +43,7 @@ pub(crate) fn should_persist_event_msg(ev: &EventMsg) -> bool {
        | EventMsg::TokenCount(_)
        | EventMsg::EnteredReviewMode(_)
        | EventMsg::ExitedReviewMode(_)
+        | EventMsg::UndoCompleted(_)
        | EventMsg::TurnAborted(_) => true,
        EventMsg::Error(_)
        | EventMsg::TaskStarted(_)
@@ -50,6 +52,7 @@ pub(crate) fn should_persist_event_msg(ev: &EventMsg) -> bool {
        | EventMsg::AgentReasoningDelta(_)
        | EventMsg::AgentReasoningRawContentDelta(_)
        | EventMsg::AgentReasoningSectionBreak(_)
+        | EventMsg::RawResponseItem(_)
        | EventMsg::SessionConfigured(_)
        | EventMsg::McpToolCallBegin(_)
        | EventMsg::McpToolCallEnd(_)
@@ -66,12 +69,12 @@ pub(crate) fn should_persist_event_msg(ev: &EventMsg) -> bool {
        | EventMsg::PatchApplyEnd(_)
        | EventMsg::TurnDiff(_)
        | EventMsg::GetHistoryEntryResponse(_)
+        | EventMsg::UndoStarted(_)
        | EventMsg::McpListToolsResponse(_)
        | EventMsg::ListCustomPromptsResponse(_)
        | EventMsg::PlanUpdate(_)
        | EventMsg::ShutdownComplete
        | EventMsg::ViewImageToolCall(_)
-        | EventMsg::ConversationPath(_)
        | EventMsg::ItemStarted(_)
        | EventMsg::ItemCompleted(_) => false,
    }
--- a/codex-rs/core/src/rollout/recorder.rs
+++ b/codex-rs/core/src/rollout/recorder.rs
@@ -97,8 +97,18 @@ impl RolloutRecorder {
        page_size: usize,
        cursor: Option<&Cursor>,
        allowed_sources: &[SessionSource],
+        model_providers: Option<&[String]>,
+        default_provider: &str,
    ) -> std::io::Result<ConversationsPage> {
-        get_conversations(codex_home, page_size, cursor, allowed_sources).await
+        get_conversations(
+            codex_home,
+            page_size,
+            cursor,
+            allowed_sources,
+            model_providers,
+            default_provider,
+        )
+        .await
    }

    /// Attempt to create a new [`RolloutRecorder`]. If the sessions directory
@@ -137,6 +147,7 @@ impl RolloutRecorder {
                        cli_version: env!("CARGO_PKG_VERSION").to_string(),
                        instructions,
                        source,
+                        model_provider: Some(config.model_provider_id.clone()),
                    }),
                )
            }
@@ -267,10 +278,6 @@ impl RolloutRecorder {
        }))
    }

-    pub(crate) fn get_rollout_path(&self) -> PathBuf {
-        self.rollout_path.clone()
-    }
-
    pub async fn shutdown(&self) -> std::io::Result<()> {
        let (tx_done, rx_done) = oneshot::channel();
        match self.tx.send(RolloutCmd::Shutdown { ack: tx_done }).await {
--- a/codex-rs/core/src/rollout/tests.rs
+++ b/codex-rs/core/src/rollout/tests.rs
@@ -32,6 +32,14 @@ use codex_protocol::protocol::SessionSource;
 use codex_protocol::protocol::UserMessageEvent;

 const NO_SOURCE_FILTER: &[SessionSource] = &[];
+const TEST_PROVIDER: &str = "test-provider";
+
+fn provider_vec(providers: &[&str]) -> Vec<String> {
+    providers
+        .iter()
+        .map(std::string::ToString::to_string)
+        .collect()
+}

 fn write_session_file(
    root: &Path,
@@ -39,6 +47,24 @@ fn write_session_file(
    uuid: Uuid,
    num_records: usize,
    source: Option<SessionSource>,
+) -> std::io::Result<(OffsetDateTime, Uuid)> {
+    write_session_file_with_provider(
+        root,
+        ts_str,
+        uuid,
+        num_records,
+        source,
+        Some("test-provider"),
+    )
+}
+
+fn write_session_file_with_provider(
+    root: &Path,
+    ts_str: &str,
+    uuid: Uuid,
+    num_records: usize,
+    source: Option<SessionSource>,
+    model_provider: Option<&str>,
 ) -> std::io::Result<(OffsetDateTime, Uuid)> {
    let format: &[FormatItem] =
        format_description!("[year]-[month]-[day]T[hour]-[minute]-[second]");
@@ -68,6 +94,9 @@ fn write_session_file(
    if let Some(source) = source {
        payload["source"] = serde_json::to_value(source).unwrap();
    }
+    if let Some(provider) = model_provider {
+        payload["model_provider"] = serde_json::Value::String(provider.to_string());
+    }

    let meta = serde_json::json!({
        "timestamp": ts_str,
@@ -134,9 +163,17 @@ async fn test_list_conversations_latest_first() {
    )
    .unwrap();

-    let page = get_conversations(home, 10, None, INTERACTIVE_SESSION_SOURCES)
-        .await
-        .unwrap();
+    let provider_filter = provider_vec(&[TEST_PROVIDER]);
+    let page = get_conversations(
+        home,
+        10,
+        None,
+        INTERACTIVE_SESSION_SOURCES,
+        Some(provider_filter.as_slice()),
+        TEST_PROVIDER,
+    )
+    .await
+    .unwrap();

    // Build expected objects
    let p1 = home
@@ -166,6 +203,7 @@ async fn test_list_conversations_latest_first() {
        "originator": "test_originator",
        "cli_version": "test_version",
        "source": "vscode",
+        "model_provider": "test-provider",
    })];
    let head_2 = vec![serde_json::json!({
        "id": u2,
@@ -175,6 +213,7 @@ async fn test_list_conversations_latest_first() {
        "originator": "test_originator",
        "cli_version": "test_version",
        "source": "vscode",
+        "model_provider": "test-provider",
    })];
    let head_1 = vec![serde_json::json!({
        "id": u1,
@@ -184,11 +223,9 @@ async fn test_list_conversations_latest_first() {
        "originator": "test_originator",
        "cli_version": "test_version",
        "source": "vscode",
+        "model_provider": "test-provider",
    })];

-    let expected_cursor: Cursor =
-        serde_json::from_str(&format!("\"2025-01-01T12-00-00|{u1}\"")).unwrap();
-
    let expected = ConversationsPage {
        items: vec![
            ConversationItem {
@@ -213,7 +250,7 @@ async fn test_list_conversations_latest_first() {
                updated_at: Some("2025-01-01T12-00-00".into()),
            },
        ],
-        next_cursor: Some(expected_cursor),
+        next_cursor: None,
        num_scanned_files: 3,
        reached_scan_cap: false,
    };
@@ -275,9 +312,17 @@ async fn test_pagination_cursor() {
    )
    .unwrap();

-    let page1 = get_conversations(home, 2, None, INTERACTIVE_SESSION_SOURCES)
-        .await
-        .unwrap();
+    let provider_filter = provider_vec(&[TEST_PROVIDER]);
+    let page1 = get_conversations(
+        home,
+        2,
+        None,
+        INTERACTIVE_SESSION_SOURCES,
+        Some(provider_filter.as_slice()),
+        TEST_PROVIDER,
+    )
+    .await
+    .unwrap();
    let p5 = home
        .join("sessions")
        .join("2025")
@@ -298,6 +343,7 @@ async fn test_pagination_cursor() {
        "originator": "test_originator",
        "cli_version": "test_version",
        "source": "vscode",
+        "model_provider": "test-provider",
    })];
    let head_4 = vec![serde_json::json!({
        "id": u4,
@@ -307,6 +353,7 @@ async fn test_pagination_cursor() {
        "originator": "test_originator",
        "cli_version": "test_version",
        "source": "vscode",
+        "model_provider": "test-provider",
    })];
    let expected_cursor1: Cursor =
        serde_json::from_str(&format!("\"2025-03-04T09-00-00|{u4}\"")).unwrap();
@@ -338,6 +385,8 @@ async fn test_pagination_cursor() {
        2,
        page1.next_cursor.as_ref(),
        INTERACTIVE_SESSION_SOURCES,
+        Some(provider_filter.as_slice()),
+        TEST_PROVIDER,
    )
    .await
    .unwrap();
@@ -361,6 +410,7 @@ async fn test_pagination_cursor() {
        "originator": "test_originator",
        "cli_version": "test_version",
        "source": "vscode",
+        "model_provider": "test-provider",
    })];
    let head_2 = vec![serde_json::json!({
        "id": u2,
@@ -370,6 +420,7 @@ async fn test_pagination_cursor() {
        "originator": "test_originator",
        "cli_version": "test_version",
        "source": "vscode",
+        "model_provider": "test-provider",
    })];
    let expected_cursor2: Cursor =
        serde_json::from_str(&format!("\"2025-03-02T09-00-00|{u2}\"")).unwrap();
@@ -401,6 +452,8 @@ async fn test_pagination_cursor() {
        2,
        page2.next_cursor.as_ref(),
        INTERACTIVE_SESSION_SOURCES,
+        Some(provider_filter.as_slice()),
+        TEST_PROVIDER,
    )
    .await
    .unwrap();
@@ -418,9 +471,8 @@ async fn test_pagination_cursor() {
        "originator": "test_originator",
        "cli_version": "test_version",
        "source": "vscode",
+        "model_provider": "test-provider",
    })];
-    let expected_cursor3: Cursor =
-        serde_json::from_str(&format!("\"2025-03-01T09-00-00|{u1}\"")).unwrap();
    let expected_page3 = ConversationsPage {
        items: vec![ConversationItem {
            path: p1,
@@ -429,7 +481,7 @@ async fn test_pagination_cursor() {
            created_at: Some("2025-03-01T09-00-00".into()),
            updated_at: Some("2025-03-01T09-00-00".into()),
        }],
-        next_cursor: Some(expected_cursor3),
+        next_cursor: None,
        num_scanned_files: 5, // scanned 05, 04 (anchor), 03, 02 (anchor), 01
        reached_scan_cap: false,
    };
@@ -445,9 +497,17 @@ async fn test_get_conversation_contents() {
    let ts = "2025-04-01T10-30-00";
    write_session_file(home, ts, uuid, 2, Some(SessionSource::VSCode)).unwrap();

-    let page = get_conversations(home, 1, None, INTERACTIVE_SESSION_SOURCES)
-        .await
-        .unwrap();
+    let provider_filter = provider_vec(&[TEST_PROVIDER]);
+    let page = get_conversations(
+        home,
+        1,
+        None,
+        INTERACTIVE_SESSION_SOURCES,
+        Some(provider_filter.as_slice()),
+        TEST_PROVIDER,
+    )
+    .await
+    .unwrap();
    let path = &page.items[0].path;

    let content = get_conversation(path).await.unwrap();
@@ -467,8 +527,8 @@ async fn test_get_conversation_contents() {
        "originator": "test_originator",
        "cli_version": "test_version",
        "source": "vscode",
+        "model_provider": "test-provider",
    })];
-    let expected_cursor: Cursor = serde_json::from_str(&format!("\"{ts}|{uuid}\"")).unwrap();
    let expected_page = ConversationsPage {
        items: vec![ConversationItem {
            path: expected_path,
@@ -477,7 +537,7 @@ async fn test_get_conversation_contents() {
            created_at: Some(ts.into()),
            updated_at: Some(ts.into()),
        }],
-        next_cursor: Some(expected_cursor),
+        next_cursor: None,
        num_scanned_files: 1,
        reached_scan_cap: false,
    };
@@ -495,6 +555,7 @@ async fn test_get_conversation_contents() {
            "originator": "test_originator",
            "cli_version": "test_version",
            "source": "vscode",
+            "model_provider": "test-provider",
        }
    });
    let user_event = serde_json::json!({
@@ -532,6 +593,7 @@ async fn test_tail_includes_last_response_items() -> Result<()> {
                originator: "test_originator".into(),
                cli_version: "test_version".into(),
                source: SessionSource::VSCode,
+                model_provider: Some("test-provider".into()),
            },
            git: None,
        }),
@@ -563,7 +625,16 @@ async fn test_tail_includes_last_response_items() -> Result<()> {
    }
    drop(file);

-    let page = get_conversations(home, 1, None, INTERACTIVE_SESSION_SOURCES).await?;
+    let provider_filter = provider_vec(&[TEST_PROVIDER]);
+    let page = get_conversations(
+        home,
+        1,
+        None,
+        INTERACTIVE_SESSION_SOURCES,
+        Some(provider_filter.as_slice()),
+        TEST_PROVIDER,
+    )
+    .await?;
    let item = page.items.first().expect("conversation item");
    let tail_len = item.tail.len();
    assert_eq!(tail_len, 10usize.min(total_messages));
@@ -615,6 +686,7 @@ async fn test_tail_handles_short_sessions() -> Result<()> {
                originator: "test_originator".into(),
                cli_version: "test_version".into(),
                source: SessionSource::VSCode,
+                model_provider: Some("test-provider".into()),
            },
            git: None,
        }),
@@ -645,7 +717,16 @@ async fn test_tail_handles_short_sessions() -> Result<()> {
    }
    drop(file);

-    let page = get_conversations(home, 1, None, INTERACTIVE_SESSION_SOURCES).await?;
+    let provider_filter = provider_vec(&[TEST_PROVIDER]);
+    let page = get_conversations(
+        home,
+        1,
+        None,
+        INTERACTIVE_SESSION_SOURCES,
+        Some(provider_filter.as_slice()),
+        TEST_PROVIDER,
+    )
+    .await?;
    let tail = &page.items.first().expect("conversation item").tail;

    assert_eq!(tail.len(), 3);
@@ -699,6 +780,7 @@ async fn test_tail_skips_trailing_non_responses() -> Result<()> {
                originator: "test_originator".into(),
                cli_version: "test_version".into(),
                source: SessionSource::VSCode,
+                model_provider: Some("test-provider".into()),
            },
            git: None,
        }),
@@ -743,7 +825,16 @@ async fn test_tail_skips_trailing_non_responses() -> Result<()> {
    writeln!(file, "{}", serde_json::to_string(&shutdown_event)?)?;
    drop(file);

-    let page = get_conversations(home, 1, None, INTERACTIVE_SESSION_SOURCES).await?;
+    let provider_filter = provider_vec(&[TEST_PROVIDER]);
+    let page = get_conversations(
+        home,
+        1,
+        None,
+        INTERACTIVE_SESSION_SOURCES,
+        Some(provider_filter.as_slice()),
+        TEST_PROVIDER,
+    )
+    .await?;
    let tail = &page.items.first().expect("conversation item").tail;

    let expected: Vec<serde_json::Value> = (0..4)
@@ -785,9 +876,17 @@ async fn test_stable_ordering_same_second_pagination() {
    write_session_file(home, ts, u2, 0, Some(SessionSource::VSCode)).unwrap();
    write_session_file(home, ts, u3, 0, Some(SessionSource::VSCode)).unwrap();

-    let page1 = get_conversations(home, 2, None, INTERACTIVE_SESSION_SOURCES)
-        .await
-        .unwrap();
+    let provider_filter = provider_vec(&[TEST_PROVIDER]);
+    let page1 = get_conversations(
+        home,
+        2,
+        None,
+        INTERACTIVE_SESSION_SOURCES,
+        Some(provider_filter.as_slice()),
+        TEST_PROVIDER,
+    )
+    .await
+    .unwrap();

    let p3 = home
        .join("sessions")
@@ -810,6 +909,7 @@ async fn test_stable_ordering_same_second_pagination() {
            "originator": "test_originator",
            "cli_version": "test_version",
            "source": "vscode",
+            "model_provider": "test-provider",
        })]
    };
    let expected_cursor1: Cursor = serde_json::from_str(&format!("\"{ts}|{u2}\"")).unwrap();
@@ -841,6 +941,8 @@ async fn test_stable_ordering_same_second_pagination() {
        2,
        page1.next_cursor.as_ref(),
        INTERACTIVE_SESSION_SOURCES,
+        Some(provider_filter.as_slice()),
+        TEST_PROVIDER,
    )
    .await
    .unwrap();
@@ -850,7 +952,6 @@ async fn test_stable_ordering_same_second_pagination() {
        .join("07")
        .join("01")
        .join(format!("rollout-2025-07-01T00-00-00-{u1}.jsonl"));
-    let expected_cursor2: Cursor = serde_json::from_str(&format!("\"{ts}|{u1}\"")).unwrap();
    let expected_page2 = ConversationsPage {
        items: vec![ConversationItem {
            path: p1,
@@ -859,7 +960,7 @@ async fn test_stable_ordering_same_second_pagination() {
            created_at: Some(ts.to_string()),
            updated_at: Some(ts.to_string()),
        }],
-        next_cursor: Some(expected_cursor2),
+        next_cursor: None,
        num_scanned_files: 3, // scanned u3, u2 (anchor), u1
        reached_scan_cap: false,
    };
@@ -891,9 +992,17 @@ async fn test_source_filter_excludes_non_matching_sessions() {
    )
    .unwrap();

-    let interactive_only = get_conversations(home, 10, None, INTERACTIVE_SESSION_SOURCES)
-        .await
-        .unwrap();
+    let provider_filter = provider_vec(&[TEST_PROVIDER]);
+    let interactive_only = get_conversations(
+        home,
+        10,
+        None,
+        INTERACTIVE_SESSION_SOURCES,
+        Some(provider_filter.as_slice()),
+        TEST_PROVIDER,
+    )
+    .await
+    .unwrap();
    let paths: Vec<_> = interactive_only
        .items
        .iter()
@@ -905,7 +1014,7 @@ async fn test_source_filter_excludes_non_matching_sessions() {
        path.ends_with("rollout-2025-08-02T10-00-00-00000000-0000-0000-0000-00000000002a.jsonl")
    }));

-    let all_sessions = get_conversations(home, 10, None, NO_SOURCE_FILTER)
+    let all_sessions = get_conversations(home, 10, None, NO_SOURCE_FILTER, None, TEST_PROVIDER)
        .await
        .unwrap();
    let all_paths: Vec<_> = all_sessions
@@ -921,3 +1030,102 @@ async fn test_source_filter_excludes_non_matching_sessions() {
        path.ends_with("rollout-2025-08-01T10-00-00-00000000-0000-0000-0000-00000000004d.jsonl")
    }));
 }
+
+#[tokio::test]
+async fn test_model_provider_filter_selects_only_matching_sessions() -> Result<()> {
+    let temp = TempDir::new().unwrap();
+    let home = temp.path();
+
+    let openai_id = Uuid::from_u128(1);
+    let beta_id = Uuid::from_u128(2);
+    let none_id = Uuid::from_u128(3);
+
+    write_session_file_with_provider(
+        home,
+        "2025-09-01T12-00-00",
+        openai_id,
+        1,
+        Some(SessionSource::VSCode),
+        Some("openai"),
+    )?;
+    write_session_file_with_provider(
+        home,
+        "2025-09-01T11-00-00",
+        beta_id,
+        1,
+        Some(SessionSource::VSCode),
+        Some("beta"),
+    )?;
+    write_session_file_with_provider(
+        home,
+        "2025-09-01T10-00-00",
+        none_id,
+        1,
+        Some(SessionSource::VSCode),
+        None,
+    )?;
+
+    let openai_id_str = openai_id.to_string();
+    let none_id_str = none_id.to_string();
+    let openai_filter = provider_vec(&["openai"]);
+    let openai_sessions = get_conversations(
+        home,
+        10,
+        None,
+        NO_SOURCE_FILTER,
+        Some(openai_filter.as_slice()),
+        "openai",
+    )
+    .await?;
+    assert_eq!(openai_sessions.items.len(), 2);
+    let openai_ids: Vec<_> = openai_sessions
+        .items
+        .iter()
+        .filter_map(|item| {
+            item.head
+                .first()
+                .and_then(|value| value.get("id"))
+                .and_then(serde_json::Value::as_str)
+                .map(str::to_string)
+        })
+        .collect();
+    assert!(openai_ids.contains(&openai_id_str));
+    assert!(openai_ids.contains(&none_id_str));
+
+    let beta_filter = provider_vec(&["beta"]);
+    let beta_sessions = get_conversations(
+        home,
+        10,
+        None,
+        NO_SOURCE_FILTER,
+        Some(beta_filter.as_slice()),
+        "openai",
+    )
+    .await?;
+    assert_eq!(beta_sessions.items.len(), 1);
+    let beta_id_str = beta_id.to_string();
+    let beta_head = beta_sessions
+        .items
+        .first()
+        .and_then(|item| item.head.first())
+        .and_then(|value| value.get("id"))
+        .and_then(serde_json::Value::as_str);
+    assert_eq!(beta_head, Some(beta_id_str.as_str()));
+
+    let unknown_filter = provider_vec(&["unknown"]);
+    let unknown_sessions = get_conversations(
+        home,
+        10,
+        None,
+        NO_SOURCE_FILTER,
+        Some(unknown_filter.as_slice()),
+        "openai",
+    )
+    .await?;
+    assert!(unknown_sessions.items.is_empty());
+
+    let all_sessions = get_conversations(home, 10, None, NO_SOURCE_FILTER, None, "openai").await?;
+    assert_eq!(all_sessions.items.len(), 3);
+
+    Ok(())
+}
--- a/codex-rs/core/src/sandboxing/assessment.rs
+++ b/codex-rs/core/src/sandboxing/assessment.rs
@@ -0,0 +1,275 @@
+use std::path::Path;
+use std::path::PathBuf;
+use std::sync::Arc;
+use std::time::Duration;
+use std::time::Instant;
+
+use crate::AuthManager;
+use crate::ModelProviderInfo;
+use crate::client::ModelClient;
+use crate::client_common::Prompt;
+use crate::client_common::ResponseEvent;
+use crate::config::Config;
+use crate::protocol::SandboxPolicy;
+use askama::Template;
+use codex_otel::otel_event_manager::OtelEventManager;
+use codex_protocol::ConversationId;
+use codex_protocol::models::ContentItem;
+use codex_protocol::models::ResponseItem;
+use codex_protocol::protocol::SandboxCommandAssessment;
+use futures::StreamExt;
+use serde_json::json;
+use tokio::time::timeout;
+use tracing::warn;
+
+const SANDBOX_ASSESSMENT_TIMEOUT: Duration = Duration::from_secs(5);
+
+const SANDBOX_RISK_CATEGORY_VALUES: &[&str] = &[
+    "data_deletion",
+    "data_exfiltration",
+    "privilege_escalation",
+    "system_modification",
+    "network_access",
+    "resource_exhaustion",
+    "compliance",
+];
+
+#[derive(Template)]
+#[template(path = "sandboxing/assessment_prompt.md", escape = "none")]
+struct SandboxAssessmentPromptTemplate<'a> {
+    platform: &'a str,
+    sandbox_policy: &'a str,
+    filesystem_roots: Option<&'a str>,
+    working_directory: &'a str,
+    command_argv: &'a str,
+    command_joined: &'a str,
+    sandbox_failure_message: Option<&'a str>,
+}
+
+#[allow(clippy::too_many_arguments)]
+pub(crate) async fn assess_command(
+    config: Arc<Config>,
+    provider: ModelProviderInfo,
+    auth_manager: Arc<AuthManager>,
+    parent_otel: &OtelEventManager,
+    conversation_id: ConversationId,
+    call_id: &str,
+    command: &[String],
+    sandbox_policy: &SandboxPolicy,
+    cwd: &Path,
+    failure_message: Option<&str>,
+) -> Option<SandboxCommandAssessment> {
+    if !config.experimental_sandbox_command_assessment || command.is_empty() {
+        return None;
+    }
+
+    let command_json = serde_json::to_string(command).unwrap_or_else(|_| "[]".to_string());
+    let command_joined =
+        shlex::try_join(command.iter().map(String::as_str)).unwrap_or_else(|_| command.join(" "));
+    let failure = failure_message
+        .map(str::trim)
+        .filter(|msg| !msg.is_empty())
+        .map(str::to_string);
+
+    let cwd_str = cwd.to_string_lossy().to_string();
+    let sandbox_summary = summarize_sandbox_policy(sandbox_policy);
+    let mut roots = sandbox_roots_for_prompt(sandbox_policy, cwd);
+    roots.sort();
+    roots.dedup();
+
+    let platform = std::env::consts::OS;
+    let roots_formatted = roots.iter().map(|root| root.to_string_lossy().to_string());
+    let filesystem_roots = match roots_formatted.collect::<Vec<_>>() {
+        collected if collected.is_empty() => None,
+        collected => Some(collected.join(", ")),
+    };
+
+    let prompt_template = SandboxAssessmentPromptTemplate {
+        platform,
+        sandbox_policy: sandbox_summary.as_str(),
+        filesystem_roots: filesystem_roots.as_deref(),
+        working_directory: cwd_str.as_str(),
+        command_argv: command_json.as_str(),
+        command_joined: command_joined.as_str(),
+        sandbox_failure_message: failure.as_deref(),
+    };
+    let rendered_prompt = match prompt_template.render() {
+        Ok(rendered) => rendered,
+        Err(err) => {
+            warn!("failed to render sandbox assessment prompt: {err}");
+            return None;
+        }
+    };
+    let (system_prompt_section, user_prompt_section) = match rendered_prompt.split_once("\n---\n") {
+        Some(split) => split,
+        None => {
+            warn!("rendered sandbox assessment prompt missing separator");
+            return None;
+        }
+    };
+    let system_prompt = system_prompt_section
+        .strip_prefix("System Prompt:\n")
+        .unwrap_or(system_prompt_section)
+        .trim()
+        .to_string();
+    let user_prompt = user_prompt_section
+        .strip_prefix("User Prompt:\n")
+        .unwrap_or(user_prompt_section)
+        .trim()
+        .to_string();
+
+    let prompt = Prompt {
+        input: vec![ResponseItem::Message {
+            id: None,
+            role: "user".to_string(),
+            content: vec![ContentItem::InputText { text: user_prompt }],
+        }],
+        tools: Vec::new(),
+        parallel_tool_calls: false,
+        base_instructions_override: Some(system_prompt),
+        output_schema: Some(sandbox_assessment_schema()),
+    };
+
+    let child_otel =
+        parent_otel.with_model(config.model.as_str(), config.model_family.slug.as_str());
+
+    let client = ModelClient::new(
+        Arc::clone(&config),
+        Some(auth_manager),
+        child_otel,
+        provider,
+        config.model_reasoning_effort,
+        config.model_reasoning_summary,
+        conversation_id,
+    );
+
+    let start = Instant::now();
+    let assessment_result = timeout(SANDBOX_ASSESSMENT_TIMEOUT, async move {
+        let mut stream = client.stream(&prompt).await?;
+        let mut last_json: Option<String> = None;
+        while let Some(event) = stream.next().await {
+            match event {
+                Ok(ResponseEvent::OutputItemDone(item)) => {
+                    if let Some(text) = response_item_text(&item) {
+                        last_json = Some(text);
+                    }
+                }
+                Ok(ResponseEvent::RateLimits(_)) => {}
+                Ok(ResponseEvent::Completed { .. }) => break,
+                Ok(_) => continue,
+                Err(err) => return Err(err),
+            }
+        }
+        Ok(last_json)
+    })
+    .await;
+    let duration = start.elapsed();
+    parent_otel.sandbox_assessment_latency(call_id, duration);
+
+    match assessment_result {
+        Ok(Ok(Some(raw))) => match serde_json::from_str::<SandboxCommandAssessment>(raw.trim()) {
+            Ok(assessment) => {
+                parent_otel.sandbox_assessment(
+                    call_id,
+                    "success",
+                    Some(assessment.risk_level),
+                    &assessment.risk_categories,
+                    duration,
+                );
+                return Some(assessment);
+            }
+            Err(err) => {
+                warn!("failed to parse sandbox assessment JSON: {err}");
+                parent_otel.sandbox_assessment(call_id, "parse_error", None, &[], duration);
+            }
+        },
+        Ok(Ok(None)) => {
+            warn!("sandbox assessment response did not include any message");
+            parent_otel.sandbox_assessment(call_id, "no_output", None, &[], duration);
+        }
+        Ok(Err(err)) => {
+            warn!("sandbox assessment failed: {err}");
+            parent_otel.sandbox_assessment(call_id, "model_error", None, &[], duration);
+        }
+        Err(_) => {
+            warn!("sandbox assessment timed out");
+            parent_otel.sandbox_assessment(call_id, "timeout", None, &[], duration);
+        }
+    }
+
+    None
+}
+
+fn summarize_sandbox_policy(policy: &SandboxPolicy) -> String {
+    match policy {
+        SandboxPolicy::DangerFullAccess => "danger-full-access".to_string(),
+        SandboxPolicy::ReadOnly => "read-only".to_string(),
+        SandboxPolicy::WorkspaceWrite { network_access, .. } => {
+            let network = if *network_access {
+                "network"
+            } else {
+                "no-network"
+            };
+            format!("workspace-write (network_access={network})")
+        }
+    }
+}
+
+fn sandbox_roots_for_prompt(policy: &SandboxPolicy, cwd: &Path) -> Vec<PathBuf> {
+    let mut roots = vec![cwd.to_path_buf()];
+    if let SandboxPolicy::WorkspaceWrite { writable_roots, .. } = policy {
+        roots.extend(writable_roots.iter().cloned());
+    }
+    roots
+}
+
+fn sandbox_assessment_schema() -> serde_json::Value {
+    json!({
+        "type": "object",
+        "required": ["description", "risk_level", "risk_categories"],
+        "properties": {
+            "description": {
+                "type": "string",
+                "minLength": 1,
+                "maxLength": 500
+            },
+            "risk_level": {
+                "type": "string",
+                "enum": ["low", "medium", "high"]
+            },
+            "risk_categories": {
+                "type": "array",
+                "items": {
+                    "type": "string",
+                    "enum": SANDBOX_RISK_CATEGORY_VALUES
+                }
+            }
+        },
+        "additionalProperties": false
+    })
+}
+
+fn response_item_text(item: &ResponseItem) -> Option<String> {
+    match item {
+        ResponseItem::Message { content, .. } => {
+            let mut buffers: Vec<&str> = Vec::new();
+            for segment in content {
+                match segment {
+                    ContentItem::InputText { text } | ContentItem::OutputText { text } => {
+                        if !text.is_empty() {
+                            buffers.push(text);
+                        }
+                    }
+                    ContentItem::InputImage { .. } => {}
+                }
+            }
+            if buffers.is_empty() {
+                None
+            } else {
+                Some(buffers.join("\n"))
+            }
+        }
+        ResponseItem::FunctionCallOutput { output, .. } => Some(output.content.clone()),
+        _ => None,
+    }
+}
--- a/codex-rs/core/src/sandboxing/mod.rs
+++ b/codex-rs/core/src/sandboxing/mod.rs
@@ -5,6 +5,9 @@ Build platform wrappers and produce ExecEnv for execution. Owns low‑level
 sandbox placement and transformation of portable CommandSpec into a
 ready‑to‑spawn environment.
 */
+
+pub mod assessment;
+
 use crate::exec::ExecToolCallOutput;
 use crate::exec::SandboxType;
 use crate::exec::StdoutStream;
--- a/codex-rs/core/src/shell.rs
+++ b/codex-rs/core/src/shell.rs
@@ -1,6 +1,5 @@
 use serde::Deserialize;
 use serde::Serialize;
-use shlex;
 use std::path::PathBuf;

 #[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize)]
@@ -30,67 +29,6 @@ pub enum Shell {
 }

 impl Shell {
-    pub fn format_default_shell_invocation(&self, command: Vec<String>) -> Option<Vec<String>> {
-        match self {
-            Shell::Zsh(zsh) => format_shell_invocation_with_rc(
-                command.as_slice(),
-                &zsh.shell_path,
-                &zsh.zshrc_path,
-            ),
-            Shell::Bash(bash) => format_shell_invocation_with_rc(
-                command.as_slice(),
-                &bash.shell_path,
-                &bash.bashrc_path,
-            ),
-            Shell::PowerShell(ps) => {
-                // If model generated a bash command, prefer a detected bash fallback
-                if let Some(script) = strip_bash_lc(command.as_slice()) {
-                    return match &ps.bash_exe_fallback {
-                        Some(bash) => Some(vec![
-                            bash.to_string_lossy().to_string(),
-                            "-lc".to_string(),
-                            script,
-                        ]),
-
-                        // No bash fallback → run the script under PowerShell.
-                        // It will likely fail (except for some simple commands), but the error
-                        // should give a clue to the model to fix upon retry that it's running under PowerShell.
-                        None => Some(vec![
-                            ps.exe.clone(),
-                            "-NoProfile".to_string(),
-                            "-Command".to_string(),
-                            script,
-                        ]),
-                    };
-                }
-
-                // Not a bash command. If model did not generate a PowerShell command,
-                // turn it into a PowerShell command.
-                let first = command.first().map(String::as_str);
-                if first != Some(ps.exe.as_str()) {
-                    // TODO (CODEX_2900): Handle escaping newlines.
-                    if command.iter().any(|a| a.contains('\n') || a.contains('\r')) {
-                        return Some(command);
-                    }
-
-                    let joined = shlex::try_join(command.iter().map(String::as_str)).ok();
-                    return joined.map(|arg| {
-                        vec![
-                            ps.exe.clone(),
-                            "-NoProfile".to_string(),
-                            "-Command".to_string(),
-                            arg,
-                        ]
-                    });
-                }
-
-                // Model generated a PowerShell command. Run it.
-                Some(command)
-            }
-            Shell::Unknown => None,
-        }
-    }
-
    pub fn name(&self) -> Option<String> {
        match self {
            Shell::Zsh(zsh) => std::path::Path::new(&zsh.shell_path)
@@ -105,36 +43,6 @@ impl Shell {
    }
 }

-fn format_shell_invocation_with_rc(
-    command: &[String],
-    shell_path: &str,
-    rc_path: &str,
-) -> Option<Vec<String>> {
-    let joined = strip_bash_lc(command)
-        .or_else(|| shlex::try_join(command.iter().map(String::as_str)).ok())?;
-
-    let rc_command = if std::path::Path::new(rc_path).exists() {
-        format!("source {rc_path} && ({joined})")
-    } else {
-        joined
-    };
-
-    Some(vec![shell_path.to_string(), "-lc".to_string(), rc_command])
-}
-
-fn strip_bash_lc(command: &[String]) -> Option<String> {
-    match command {
-        // exactly three items
-        [first, second, third]
-            // first two must be "bash", "-lc"
-            if first == "bash" && second == "-lc" =>
-        {
-            Some(third.clone())
-        }
-        _ => None,
-    }
-}
-
 #[cfg(unix)]
 fn detect_default_user_shell() -> Shell {
    use libc::getpwuid;
@@ -223,8 +131,8 @@ pub async fn default_user_shell() -> Shell {
 #[cfg(unix)]
 mod tests {
    use super::*;
+    use std::path::PathBuf;
    use std::process::Command;
-    use std::string::ToString;

    #[tokio::test]
    async fn test_current_shell_detects_zsh() {
@@ -247,40 +155,6 @@ mod tests {
        }
    }

-    #[tokio::test]
-    async fn test_run_with_profile_zshrc_not_exists() {
-        let shell = Shell::Zsh(ZshShell {
-            shell_path: "/bin/zsh".to_string(),
-            zshrc_path: "/does/not/exist/.zshrc".to_string(),
-        });
-        let actual_cmd = shell.format_default_shell_invocation(vec!["myecho".to_string()]);
-        assert_eq!(
-            actual_cmd,
-            Some(vec![
-                "/bin/zsh".to_string(),
-                "-lc".to_string(),
-                "myecho".to_string()
-            ])
-        );
-    }
-
-    #[tokio::test]
-    async fn test_run_with_profile_bashrc_not_exists() {
-        let shell = Shell::Bash(BashShell {
-            shell_path: "/bin/bash".to_string(),
-            bashrc_path: "/does/not/exist/.bashrc".to_string(),
-        });
-        let actual_cmd = shell.format_default_shell_invocation(vec!["myecho".to_string()]);
-        assert_eq!(
-            actual_cmd,
-            Some(vec![
-                "/bin/bash".to_string(),
-                "-lc".to_string(),
-                "myecho".to_string()
-            ])
-        );
-    }
-
    #[tokio::test]
    async fn test_run_with_profile_bash_escaping_and_execution() {
        let shell_path = "/bin/bash";
@@ -315,30 +189,21 @@ mod tests {
            std::fs::write(
                &bashrc_path,
                r#"
-                    set -x
-                    function myecho {
-                        echo 'It works!'
-                    }
-                    "#,
+                set -x
+                function myecho {
+                    echo 'It works!'
+                }
+                "#,
            )
            .unwrap();
-            let shell = Shell::Bash(BashShell {
-                shell_path: shell_path.to_string(),
-                bashrc_path: bashrc_path.to_str().unwrap().to_string(),
-            });
-
-            let actual_cmd = shell
-                .format_default_shell_invocation(input.iter().map(ToString::to_string).collect());
-            let expected_cmd = expected_cmd
+            let command = expected_cmd
                .iter()
                .map(|s| s.replace("BASHRC_PATH", bashrc_path.to_str().unwrap()))
-                .collect();
-
-            assert_eq!(actual_cmd, Some(expected_cmd));
+                .collect::<Vec<_>>();

            let output = process_exec_tool_call(
                ExecParams {
-                    command: actual_cmd.unwrap(),
+                    command: command.clone(),
                    cwd: PathBuf::from(temp_home.path()),
                    timeout_ms: None,
                    env: HashMap::from([(
@@ -372,8 +237,7 @@ mod tests {
 #[cfg(test)]
 #[cfg(target_os = "macos")]
 mod macos_tests {
-    use super::*;
-    use std::string::ToString;
+    use std::path::PathBuf;

    #[tokio::test]
    async fn test_run_with_profile_escaping_and_execution() {
@@ -411,43 +275,32 @@ mod macos_tests {
        ];
        for (input, expected_cmd, expected_output) in cases {
            use std::collections::HashMap;
-            use std::path::PathBuf;

            use crate::exec::ExecParams;
            use crate::exec::SandboxType;
            use crate::exec::process_exec_tool_call;
            use crate::protocol::SandboxPolicy;

-            // create a temp directory with a zshrc file in it
            let temp_home = tempfile::tempdir().unwrap();
            let zshrc_path = temp_home.path().join(".zshrc");
            std::fs::write(
                &zshrc_path,
                r#"
-                    set -x
-                    function myecho {
-                        echo 'It works!'
-                    }
-                    "#,
+                set -x
+                function myecho {
+                    echo 'It works!'
+                }
+                "#,
            )
            .unwrap();
-            let shell = Shell::Zsh(ZshShell {
-                shell_path: shell_path.to_string(),
-                zshrc_path: zshrc_path.to_str().unwrap().to_string(),
-            });
-
-            let actual_cmd = shell
-                .format_default_shell_invocation(input.iter().map(ToString::to_string).collect());
-            let expected_cmd = expected_cmd
+            let command = expected_cmd
                .iter()
                .map(|s| s.replace("ZSHRC_PATH", zshrc_path.to_str().unwrap()))
-                .collect();
+                .collect::<Vec<_>>();

-            assert_eq!(actual_cmd, Some(expected_cmd));
-            // Actually run the command and check output/exit code
            let output = process_exec_tool_call(
                ExecParams {
-                    command: actual_cmd.unwrap(),
+                    command: command.clone(),
                    cwd: PathBuf::from(temp_home.path()),
                    timeout_ms: None,
                    env: HashMap::from([(
@@ -485,36 +338,38 @@ mod tests_windows {

    #[test]
    fn test_format_default_shell_invocation_powershell() {
+        use std::path::PathBuf;
+
        let cases = vec![
            (
-                Shell::PowerShell(PowerShellConfig {
+                PowerShellConfig {
                    exe: "pwsh.exe".to_string(),
                    bash_exe_fallback: None,
-                }),
+                },
                vec!["bash", "-lc", "echo hello"],
                vec!["pwsh.exe", "-NoProfile", "-Command", "echo hello"],
            ),
            (
-                Shell::PowerShell(PowerShellConfig {
+                PowerShellConfig {
                    exe: "powershell.exe".to_string(),
                    bash_exe_fallback: None,
-                }),
+                },
                vec!["bash", "-lc", "echo hello"],
                vec!["powershell.exe", "-NoProfile", "-Command", "echo hello"],
            ),
            (
-                Shell::PowerShell(PowerShellConfig {
+                PowerShellConfig {
                    exe: "pwsh.exe".to_string(),
                    bash_exe_fallback: Some(PathBuf::from("bash.exe")),
-                }),
+                },
                vec!["bash", "-lc", "echo hello"],
                vec!["bash.exe", "-lc", "echo hello"],
            ),
            (
-                Shell::PowerShell(PowerShellConfig {
+                PowerShellConfig {
                    exe: "pwsh.exe".to_string(),
                    bash_exe_fallback: Some(PathBuf::from("bash.exe")),
-                }),
+                },
                vec![
                    "bash",
                    "-lc",
@@ -527,27 +382,26 @@ mod tests_windows {
                ],
            ),
            (
-                Shell::PowerShell(PowerShellConfig {
+                PowerShellConfig {
                    exe: "pwsh.exe".to_string(),
                    bash_exe_fallback: Some(PathBuf::from("bash.exe")),
-                }),
+                },
                vec!["echo", "hello"],
                vec!["pwsh.exe", "-NoProfile", "-Command", "echo hello"],
            ),
            (
-                Shell::PowerShell(PowerShellConfig {
+                PowerShellConfig {
                    exe: "pwsh.exe".to_string(),
                    bash_exe_fallback: Some(PathBuf::from("bash.exe")),
-                }),
+                },
                vec!["pwsh.exe", "-NoProfile", "-Command", "echo hello"],
                vec!["pwsh.exe", "-NoProfile", "-Command", "echo hello"],
            ),
            (
-                // TODO (CODEX_2900): Handle escaping newlines for powershell invocation.
-                Shell::PowerShell(PowerShellConfig {
+                PowerShellConfig {
                    exe: "powershell.exe".to_string(),
                    bash_exe_fallback: Some(PathBuf::from("bash.exe")),
-                }),
+                },
                vec![
                    "codex-mcp-server.exe",
                    "--codex-run-as-apply-patch",
@@ -561,13 +415,19 @@ mod tests_windows {
            ),
        ];

-        for (shell, input, expected_cmd) in cases {
-            let actual_cmd = shell
-                .format_default_shell_invocation(input.iter().map(|s| (*s).to_string()).collect());
-            assert_eq!(
-                actual_cmd,
-                Some(expected_cmd.iter().map(|s| (*s).to_string()).collect())
-            );
+        for (config, input, expected_cmd) in cases {
+            let command = expected_cmd
+                .iter()
+                .map(|s| (*s).to_string())
+                .collect::<Vec<_>>();
+
+            // These tests assert the final command for each scenario now that the helper
+            // has been removed. The inputs remain to document the original coverage.
+            let expected = expected_cmd
+                .iter()
+                .map(|s| (*s).to_string())
+                .collect::<Vec<_>>();
+            assert_eq!(command, expected, "input: {input:?} config: {config:?}");
        }
    }
 }
--- a/codex-rs/core/src/tasks/ghost_snapshot.rs
+++ b/codex-rs/core/src/tasks/ghost_snapshot.rs
@@ -0,0 +1,110 @@
+use crate::codex::TurnContext;
+use crate::state::TaskKind;
+use crate::tasks::SessionTask;
+use crate::tasks::SessionTaskContext;
+use async_trait::async_trait;
+use codex_git_tooling::CreateGhostCommitOptions;
+use codex_git_tooling::GitToolingError;
+use codex_git_tooling::create_ghost_commit;
+use codex_protocol::models::ResponseItem;
+use codex_protocol::user_input::UserInput;
+use codex_utils_readiness::Readiness;
+use codex_utils_readiness::Token;
+use std::sync::Arc;
+use tokio_util::sync::CancellationToken;
+use tracing::info;
+use tracing::warn;
+
+pub(crate) struct GhostSnapshotTask {
+    token: Token,
+}
+
+#[async_trait]
+impl SessionTask for GhostSnapshotTask {
+    fn kind(&self) -> TaskKind {
+        TaskKind::Regular
+    }
+
+    async fn run(
+        self: Arc<Self>,
+        session: Arc<SessionTaskContext>,
+        ctx: Arc<TurnContext>,
+        _input: Vec<UserInput>,
+        cancellation_token: CancellationToken,
+    ) -> Option<String> {
+        tokio::task::spawn(async move {
+            let token = self.token;
+            let ctx_for_task = Arc::clone(&ctx);
+            let cancelled = tokio::select! {
+                _ = cancellation_token.cancelled() => true,
+                _ = async {
+                    let repo_path = ctx_for_task.cwd.clone();
+                    // Required to run in a dedicated blocking pool.
+                    match tokio::task::spawn_blocking(move || {
+                        let options = CreateGhostCommitOptions::new(&repo_path);
+                        create_ghost_commit(&options)
+                    })
+                    .await
+                    {
+                        Ok(Ok(ghost_commit)) => {
+                            info!("ghost snapshot blocking task finished");
+                            session
+                                .session
+                                .record_conversation_items(&ctx, &[ResponseItem::GhostSnapshot {
+                                    ghost_commit: ghost_commit.clone(),
+                                }])
+                                .await;
+                            info!("ghost commit captured: {}", ghost_commit.id());
+                        }
+                        Ok(Err(err)) => {
+                            warn!(
+                                sub_id = ctx_for_task.sub_id.as_str(),
+                                "failed to capture ghost snapshot: {err}"
+                            );
+                            let message = match err {
+                                GitToolingError::NotAGitRepository { .. } => {
+                                    "Snapshots disabled: current directory is not a Git repository."
+                                        .to_string()
+                                }
+                                _ => format!("Snapshots disabled after ghost snapshot error: {err}."),
+                            };
+                            session
+                                .session
+                                .notify_background_event(&ctx_for_task, message)
+                                .await;
+                        }
+                        Err(err) => {
+                            warn!(
+                                sub_id = ctx_for_task.sub_id.as_str(),
+                                "ghost snapshot task panicked: {err}"
+                            );
+                            let message =
+                                format!("Snapshots disabled after ghost snapshot panic: {err}.");
+                            session
+                                .session
+                                .notify_background_event(&ctx_for_task, message)
+                                .await;
+                        }
+                    }
+                } => false,
+            };
+
+            if cancelled {
+                info!("ghost snapshot task cancelled");
+            }
+
+            match ctx.tool_call_gate.mark_ready(token).await {
+                Ok(true) => info!("ghost snapshot gate marked ready"),
+                Ok(false) => warn!("ghost snapshot gate already ready"),
+                Err(err) => warn!("failed to mark ghost snapshot ready: {err}"),
+            }
+        });
+        None
+    }
+}
+
+impl GhostSnapshotTask {
+    pub(crate) fn new(token: Token) -> Self {
+        Self { token }
+    }
+}
--- a/codex-rs/core/src/tasks/mod.rs
+++ b/codex-rs/core/src/tasks/mod.rs
@@ -1,6 +1,8 @@
 mod compact;
+mod ghost_snapshot;
 mod regular;
 mod review;
+mod undo;

 use std::sync::Arc;
 use std::time::Duration;
@@ -25,8 +27,10 @@ use crate::state::TaskKind;
 use codex_protocol::user_input::UserInput;

 pub(crate) use compact::CompactTask;
+pub(crate) use ghost_snapshot::GhostSnapshotTask;
 pub(crate) use regular::RegularTask;
 pub(crate) use review::ReviewTask;
+pub(crate) use undo::UndoTask;

 const GRACEFULL_INTERRUPTION_TIMEOUT_MS: u64 = 100;

@@ -46,10 +50,28 @@ impl SessionTaskContext {
    }
 }

+/// Async task that drives a [`Session`] turn.
+///
+/// Implementations encapsulate a specific Codex workflow (regular chat,
+/// reviews, ghost snapshots, etc.). Each task instance is owned by a
+/// [`Session`] and executed on a background Tokio task. The trait is
+/// intentionally small: implementers identify themselves via
+/// [`SessionTask::kind`], perform their work in [`SessionTask::run`], and may
+/// release resources in [`SessionTask::abort`].
 #[async_trait]
 pub(crate) trait SessionTask: Send + Sync + 'static {
+    /// Describes the type of work the task performs so the session can
+    /// surface it in telemetry and UI.
    fn kind(&self) -> TaskKind;

+    /// Executes the task until completion or cancellation.
+    ///
+    /// Implementations typically stream protocol events using `session` and
+    /// `ctx`, returning an optional final agent message when finished. The
+    /// provided `cancellation_token` is cancelled when the session requests an
+    /// abort; implementers should watch for it and terminate quickly once it
+    /// fires. Returning [`Some`] yields a final message that
+    /// [`Session::on_task_finished`] will emit to the client.
    async fn run(
        self: Arc<Self>,
        session: Arc<SessionTaskContext>,
@@ -58,6 +80,11 @@ pub(crate) trait SessionTask: Send + Sync + 'static {
        cancellation_token: CancellationToken,
    ) -> Option<String>;

+    /// Gives the task a chance to perform cleanup after an abort.
+    ///
+    /// The default implementation is a no-op; override this if additional
+    /// teardown or notifications are required once
+    /// [`Session::abort_all_tasks`] cancels the task.
    async fn abort(&self, session: Arc<SessionTaskContext>, ctx: Arc<TurnContext>) {
        let _ = (session, ctx);
    }
--- a/codex-rs/core/src/tasks/undo.rs
+++ b/codex-rs/core/src/tasks/undo.rs
@@ -0,0 +1,117 @@
+use std::sync::Arc;
+
+use crate::codex::TurnContext;
+use crate::protocol::EventMsg;
+use crate::protocol::UndoCompletedEvent;
+use crate::protocol::UndoStartedEvent;
+use crate::state::TaskKind;
+use crate::tasks::SessionTask;
+use crate::tasks::SessionTaskContext;
+use async_trait::async_trait;
+use codex_git_tooling::restore_ghost_commit;
+use codex_protocol::models::ResponseItem;
+use codex_protocol::user_input::UserInput;
+use tokio_util::sync::CancellationToken;
+use tracing::error;
+use tracing::info;
+use tracing::warn;
+
+pub(crate) struct UndoTask;
+
+impl UndoTask {
+    pub(crate) fn new() -> Self {
+        Self
+    }
+}
+
+#[async_trait]
+impl SessionTask for UndoTask {
+    fn kind(&self) -> TaskKind {
+        TaskKind::Regular
+    }
+
+    async fn run(
+        self: Arc<Self>,
+        session: Arc<SessionTaskContext>,
+        ctx: Arc<TurnContext>,
+        _input: Vec<UserInput>,
+        cancellation_token: CancellationToken,
+    ) -> Option<String> {
+        let sess = session.clone_session();
+        sess.send_event(
+            ctx.as_ref(),
+            EventMsg::UndoStarted(UndoStartedEvent {
+                message: Some("Undo in progress...".to_string()),
+            }),
+        )
+        .await;
+
+        if cancellation_token.is_cancelled() {
+            sess.send_event(
+                ctx.as_ref(),
+                EventMsg::UndoCompleted(UndoCompletedEvent {
+                    success: false,
+                    message: Some("Undo cancelled.".to_string()),
+                }),
+            )
+            .await;
+            return None;
+        }
+
+        let mut history = sess.clone_history().await;
+        let mut items = history.get_history();
+        let mut completed = UndoCompletedEvent {
+            success: false,
+            message: None,
+        };
+
+        let Some((idx, ghost_commit)) =
+            items
+                .iter()
+                .enumerate()
+                .rev()
+                .find_map(|(idx, item)| match item {
+                    ResponseItem::GhostSnapshot { ghost_commit } => {
+                        Some((idx, ghost_commit.clone()))
+                    }
+                    _ => None,
+                })
+        else {
+            completed.message = Some("No ghost snapshot available to undo.".to_string());
+            sess.send_event(ctx.as_ref(), EventMsg::UndoCompleted(completed))
+                .await;
+            return None;
+        };
+
+        let commit_id = ghost_commit.id().to_string();
+        let repo_path = ctx.cwd.clone();
+        let restore_result =
+            tokio::task::spawn_blocking(move || restore_ghost_commit(&repo_path, &ghost_commit))
+                .await;
+
+        match restore_result {
+            Ok(Ok(())) => {
+                items.remove(idx);
+                sess.replace_history(items).await;
+                let short_id: String = commit_id.chars().take(7).collect();
+                info!(commit_id = commit_id, "Undo restored ghost snapshot");
+                completed.success = true;
+                completed.message = Some(format!("Undo restored snapshot {short_id}."));
+            }
+            Ok(Err(err)) => {
+                let message = format!("Failed to restore snapshot {commit_id}: {err}");
+                warn!("{message}");
+                completed.message = Some(message);
+            }
+            Err(err) => {
+                let message = format!("Failed to restore snapshot {commit_id}: {err}");
+                error!("{message}");
+                completed.message = Some(message);
+            }
+        }
+
+        sess.send_event(ctx.as_ref(), EventMsg::UndoCompleted(completed))
+            .await;
+        None
+    }
+}
--- a/codex-rs/core/src/tools/context.rs
+++ b/codex-rs/core/src/tools/context.rs
@@ -5,6 +5,7 @@ use crate::tools::TELEMETRY_PREVIEW_MAX_LINES;
 use crate::tools::TELEMETRY_PREVIEW_TRUNCATION_NOTICE;
 use crate::turn_diff_tracker::TurnDiffTracker;
 use codex_otel::otel_event_manager::OtelEventManager;
+use codex_protocol::models::FunctionCallOutputContentItem;
 use codex_protocol::models::FunctionCallOutputPayload;
 use codex_protocol::models::ResponseInputItem;
 use codex_protocol::models::ShellToolCallParams;
@@ -65,7 +66,10 @@ impl ToolPayload {
 #[derive(Clone)]
 pub enum ToolOutput {
    Function {
+        // Plain text representation of the tool output.
        content: String,
+        // Some tool calls such as MCP calls may return structured content that can get parsed into an array of polymorphic content items.
+        content_items: Option<Vec<FunctionCallOutputContentItem>>,
        success: Option<bool>,
    },
    Mcp {
@@ -90,7 +94,11 @@ impl ToolOutput {

    pub fn into_response(self, call_id: &str, payload: &ToolPayload) -> ResponseInputItem {
        match self {
-            ToolOutput::Function { content, success } => {
+            ToolOutput::Function {
+                content,
+                content_items,
+                success,
+            } => {
                if matches!(payload, ToolPayload::Custom { .. }) {
                    ResponseInputItem::CustomToolCallOutput {
                        call_id: call_id.to_string(),
@@ -99,7 +107,11 @@ impl ToolOutput {
                } else {
                    ResponseInputItem::FunctionCallOutput {
                        call_id: call_id.to_string(),
-                        output: FunctionCallOutputPayload { content, success },
+                        output: FunctionCallOutputPayload {
+                            content,
+                            content_items,
+                            success,
+                        },
                    }
                }
            }
@@ -163,6 +175,7 @@ mod tests {
        };
        let response = ToolOutput::Function {
            content: "patched".to_string(),
+            content_items: None,
            success: Some(true),
        }
        .into_response("call-42", &payload);
@@ -183,6 +196,7 @@ mod tests {
        };
        let response = ToolOutput::Function {
            content: "ok".to_string(),
+            content_items: None,
            success: Some(true),
        }
        .into_response("fn-1", &payload);
@@ -191,6 +205,7 @@ mod tests {
            ResponseInputItem::FunctionCallOutput { call_id, output } => {
                assert_eq!(call_id, "fn-1");
                assert_eq!(output.content, "ok");
+                assert!(output.content_items.is_none());
                assert_eq!(output.success, Some(true));
            }
            other => panic!("expected FunctionCallOutput, got {other:?}"),
--- a/codex-rs/core/src/tools/events.rs
+++ b/codex-rs/core/src/tools/events.rs
@@ -19,7 +19,6 @@ use std::path::Path;
 use std::path::PathBuf;
 use std::time::Duration;

-use super::format_exec_output;
 use super::format_exec_output_str;

 #[derive(Clone, Copy)]
@@ -146,7 +145,7 @@ impl ToolEmitter {
                    (*message).to_string(),
                    -1,
                    Duration::ZERO,
-                    format_exec_output(&message),
+                    message.clone(),
                )
                .await;
            }
@@ -241,7 +240,7 @@ impl ToolEmitter {
                    (*message).to_string(),
                    -1,
                    Duration::ZERO,
-                    format_exec_output(&message),
+                    message.clone(),
                )
                .await;
            }
@@ -277,7 +276,7 @@ impl ToolEmitter {
            }
            Err(ToolError::Codex(err)) => {
                let message = format!("execution error: {err:?}");
-                let response = super::format_exec_output(&message);
+                let response = message.clone();
                event = ToolEventStage::Failure(ToolEventFailure::Message(message));
                Err(FunctionCallError::RespondToModel(response))
            }
@@ -289,9 +288,9 @@ impl ToolEmitter {
                } else {
                    msg
                };
-                let response = super::format_exec_output(&normalized);
-                event = ToolEventStage::Failure(ToolEventFailure::Message(normalized));
-                Err(FunctionCallError::RespondToModel(response))
+                let response = &normalized;
+                event = ToolEventStage::Failure(ToolEventFailure::Message(normalized.clone()));
+                Err(FunctionCallError::RespondToModel(response.clone()))
            }
        };
        self.emit(ctx, event).await;
--- a/codex-rs/core/src/tools/handlers/apply_patch.rs
+++ b/codex-rs/core/src/tools/handlers/apply_patch.rs
@@ -82,6 +82,7 @@ impl ToolHandler for ApplyPatchHandler {
                        let content = item?;
                        Ok(ToolOutput::Function {
                            content,
+                            content_items: None,
                            success: Some(true),
                        })
                    }
@@ -126,6 +127,7 @@ impl ToolHandler for ApplyPatchHandler {
                        let content = emitter.finish(event_ctx, out).await?;
                        Ok(ToolOutput::Function {
                            content,
+                            content_items: None,
                            success: Some(true),
                        })
                    }
--- a/codex-rs/core/src/tools/handlers/grep_files.rs
+++ b/codex-rs/core/src/tools/handlers/grep_files.rs
@@ -90,11 +90,13 @@ impl ToolHandler for GrepFilesHandler {
        if search_results.is_empty() {
            Ok(ToolOutput::Function {
                content: "No matches found.".to_string(),
+                content_items: None,
                success: Some(false),
            })
        } else {
            Ok(ToolOutput::Function {
                content: search_results.join("\n"),
+                content_items: None,
                success: Some(true),
            })
        }
--- a/codex-rs/core/src/tools/handlers/list_dir.rs
+++ b/codex-rs/core/src/tools/handlers/list_dir.rs
@@ -106,6 +106,7 @@ impl ToolHandler for ListDirHandler {
        output.extend(entries);
        Ok(ToolOutput::Function {
            content: output.join("\n"),
+            content_items: None,
            success: Some(true),
        })
    }
--- a/codex-rs/core/src/tools/handlers/mcp.rs
+++ b/codex-rs/core/src/tools/handlers/mcp.rs
@@ -56,8 +56,16 @@ impl ToolHandler for McpHandler {
                Ok(ToolOutput::Mcp { result })
            }
            codex_protocol::models::ResponseInputItem::FunctionCallOutput { output, .. } => {
-                let codex_protocol::models::FunctionCallOutputPayload { content, success } = output;
-                Ok(ToolOutput::Function { content, success })
+                let codex_protocol::models::FunctionCallOutputPayload {
+                    content,
+                    content_items,
+                    success,
+                } = output;
+                Ok(ToolOutput::Function {
+                    content,
+                    content_items,
+                    success,
+                })
            }
            _ => Err(FunctionCallError::RespondToModel(
                "mcp handler received unexpected response variant".to_string(),
--- a/codex-rs/core/src/tools/handlers/mcp_resource.rs
+++ b/codex-rs/core/src/tools/handlers/mcp_resource.rs
@@ -297,7 +297,10 @@ async fn handle_list_resources(
    match payload_result {
        Ok(payload) => match serialize_function_output(payload) {
            Ok(output) => {
-                let ToolOutput::Function { content, success } = &output else {
+                let ToolOutput::Function {
+                    content, success, ..
+                } = &output
+                else {
                    unreachable!("MCP resource handler should return function output");
                };
                let duration = start.elapsed();
@@ -403,7 +406,10 @@ async fn handle_list_resource_templates(
    match payload_result {
        Ok(payload) => match serialize_function_output(payload) {
            Ok(output) => {
-                let ToolOutput::Function { content, success } = &output else {
+                let ToolOutput::Function {
+                    content, success, ..
+                } = &output
+                else {
                    unreachable!("MCP resource handler should return function output");
                };
                let duration = start.elapsed();
@@ -489,7 +495,10 @@ async fn handle_read_resource(
    match payload_result {
        Ok(payload) => match serialize_function_output(payload) {
            Ok(output) => {
-                let ToolOutput::Function { content, success } = &output else {
+                let ToolOutput::Function {
+                    content, success, ..
+                } = &output
+                else {
                    unreachable!("MCP resource handler should return function output");
                };
                let duration = start.elapsed();
@@ -618,6 +627,7 @@ where

    Ok(ToolOutput::Function {
        content,
+        content_items: None,
        success: Some(true),
    })
 }
--- a/codex-rs/core/src/tools/handlers/plan.rs
+++ b/codex-rs/core/src/tools/handlers/plan.rs
@@ -88,6 +88,7 @@ impl ToolHandler for PlanHandler {

        Ok(ToolOutput::Function {
            content,
+            content_items: None,
            success: Some(true),
        })
    }
--- a/codex-rs/core/src/tools/handlers/read_file.rs
+++ b/codex-rs/core/src/tools/handlers/read_file.rs
@@ -149,6 +149,7 @@ impl ToolHandler for ReadFileHandler {
        };
        Ok(ToolOutput::Function {
            content: collected.join("\n"),
+            content_items: None,
            success: Some(true),
        })
    }
--- a/codex-rs/core/src/tools/handlers/shell.rs
+++ b/codex-rs/core/src/tools/handlers/shell.rs
@@ -136,6 +136,7 @@ impl ShellHandler {
                        let content = item?;
                        return Ok(ToolOutput::Function {
                            content,
+                            content_items: None,
                            success: Some(true),
                        });
                    }
@@ -179,6 +180,7 @@ impl ShellHandler {
                        let content = emitter.finish(event_ctx, out).await?;
                        return Ok(ToolOutput::Function {
                            content,
+                            content_items: None,
                            success: Some(true),
                        });
                    }
@@ -226,6 +228,7 @@ impl ShellHandler {
        let content = emitter.finish(event_ctx, out).await?;
        Ok(ToolOutput::Function {
            content,
+            content_items: None,
            success: Some(true),
        })
    }
--- a/codex-rs/core/src/tools/handlers/test_sync.rs
+++ b/codex-rs/core/src/tools/handlers/test_sync.rs
@@ -95,6 +95,7 @@ impl ToolHandler for TestSyncHandler {

        Ok(ToolOutput::Function {
            content: "ok".to_string(),
+            content_items: None,
            success: Some(true),
        })
    }
--- a/codex-rs/core/src/tools/handlers/unified_exec.rs
+++ b/codex-rs/core/src/tools/handlers/unified_exec.rs
@@ -171,6 +171,7 @@ impl ToolHandler for UnifiedExecHandler {

        Ok(ToolOutput::Function {
            content,
+            content_items: None,
            success: Some(true),
        })
    }
--- a/codex-rs/core/src/tools/handlers/view_image.rs
+++ b/codex-rs/core/src/tools/handlers/view_image.rs
@@ -85,6 +85,7 @@ impl ToolHandler for ViewImageHandler {

        Ok(ToolOutput::Function {
            content: "attached local image path".to_string(),
+            content_items: None,
            success: Some(true),
        })
    }
--- a/codex-rs/core/src/tools/mod.rs
+++ b/codex-rs/core/src/tools/mod.rs
@@ -9,19 +9,11 @@ pub mod runtimes;
 pub mod sandboxing;
 pub mod spec;

+use crate::conversation_history::format_output_for_model_body;
 use crate::exec::ExecToolCallOutput;
-use codex_utils_string::take_bytes_at_char_boundary;
-use codex_utils_string::take_last_bytes_at_char_boundary;
 pub use router::ToolRouter;
 use serde::Serialize;

-// Model-formatting limits: clients get full streams; only content sent to the model is truncated.
-pub(crate) const MODEL_FORMAT_MAX_BYTES: usize = 10 * 1024; // 10 KiB
-pub(crate) const MODEL_FORMAT_MAX_LINES: usize = 256; // lines
-pub(crate) const MODEL_FORMAT_HEAD_LINES: usize = MODEL_FORMAT_MAX_LINES / 2;
-pub(crate) const MODEL_FORMAT_TAIL_LINES: usize = MODEL_FORMAT_MAX_LINES - MODEL_FORMAT_HEAD_LINES; // 128
-pub(crate) const MODEL_FORMAT_HEAD_BYTES: usize = MODEL_FORMAT_MAX_BYTES / 2;
-
 // Telemetry preview limits: keep log events smaller than model budgets.
 pub(crate) const TELEMETRY_PREVIEW_MAX_BYTES: usize = 2 * 1024; // 2 KiB
 pub(crate) const TELEMETRY_PREVIEW_MAX_LINES: usize = 64; // lines
@@ -73,249 +65,15 @@ pub fn format_exec_output_str(exec_output: &ExecToolCallOutput) -> String {

    let content = aggregated_output.text.as_str();

-    if exec_output.timed_out {
-        let prefixed = format!(
+    let body = if exec_output.timed_out {
+        format!(
            "command timed out after {} milliseconds\n{content}",
            exec_output.duration.as_millis()
-        );
-        return format_exec_output(&prefixed);
-    }
-
-    format_exec_output(content)
-}
-
-pub(super) fn format_exec_output(content: &str) -> String {
-    // Head+tail truncation for the model: show the beginning and end with an elision.
-    // Clients still receive full streams; only this formatted summary is capped.
-    let total_lines = content.lines().count();
-    if content.len() <= MODEL_FORMAT_MAX_BYTES && total_lines <= MODEL_FORMAT_MAX_LINES {
-        return content.to_string();
-    }
-    let output = truncate_formatted_exec_output(content, total_lines);
-    format!("Total output lines: {total_lines}\n\n{output}")
-}
-
-fn truncate_formatted_exec_output(content: &str, total_lines: usize) -> String {
-    let segments: Vec<&str> = content.split_inclusive('\n').collect();
-    let head_take = MODEL_FORMAT_HEAD_LINES.min(segments.len());
-    let tail_take = MODEL_FORMAT_TAIL_LINES.min(segments.len().saturating_sub(head_take));
-    let omitted = segments.len().saturating_sub(head_take + tail_take);
-
-    let head_slice_end: usize = segments
-        .iter()
-        .take(head_take)
-        .map(|segment| segment.len())
-        .sum();
-    let tail_slice_start: usize = if tail_take == 0 {
-        content.len()
-    } else {
-        content.len()
-            - segments
-                .iter()
-                .rev()
-                .take(tail_take)
-                .map(|segment| segment.len())
-                .sum::<usize>()
-    };
-    let head_slice = &content[..head_slice_end];
-    let tail_slice = &content[tail_slice_start..];
-    let truncated_by_bytes = content.len() > MODEL_FORMAT_MAX_BYTES;
-    let marker = if omitted > 0 {
-        Some(format!(
-            "\n[... omitted {omitted} of {total_lines} lines ...]\n\n"
-        ))
-    } else if truncated_by_bytes {
-        Some(format!(
-            "\n[... output truncated to fit {MODEL_FORMAT_MAX_BYTES} bytes ...]\n\n"
-        ))
-    } else {
-        None
-    };
-
-    let marker_len = marker.as_ref().map_or(0, String::len);
-    let base_head_budget = MODEL_FORMAT_HEAD_BYTES.min(MODEL_FORMAT_MAX_BYTES);
-    let head_budget = base_head_budget.min(MODEL_FORMAT_MAX_BYTES.saturating_sub(marker_len));
-    let head_part = take_bytes_at_char_boundary(head_slice, head_budget);
-    let mut result = String::with_capacity(MODEL_FORMAT_MAX_BYTES.min(content.len()));
-
-    result.push_str(head_part);
-    if let Some(marker_text) = marker.as_ref() {
-        result.push_str(marker_text);
-    }
-
-    let remaining = MODEL_FORMAT_MAX_BYTES.saturating_sub(result.len());
-    if remaining == 0 {
-        return result;
-    }
-
-    let tail_part = take_last_bytes_at_char_boundary(tail_slice, remaining);
-    result.push_str(tail_part);
-
-    result
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::function_tool::FunctionCallError;
-    use regex_lite::Regex;
-
-    fn truncate_function_error(err: FunctionCallError) -> FunctionCallError {
-        match err {
-            FunctionCallError::RespondToModel(msg) => {
-                FunctionCallError::RespondToModel(format_exec_output(&msg))
-            }
-            FunctionCallError::Denied(msg) => FunctionCallError::Denied(format_exec_output(&msg)),
-            FunctionCallError::Fatal(msg) => FunctionCallError::Fatal(format_exec_output(&msg)),
-            other => other,
-        }
-    }
-
-    fn assert_truncated_message_matches(message: &str, line: &str, total_lines: usize) {
-        let pattern = truncated_message_pattern(line, total_lines);
-        let regex = Regex::new(&pattern).unwrap_or_else(|err| {
-            panic!("failed to compile regex {pattern}: {err}");
-        });
-        let captures = regex
-            .captures(message)
-            .unwrap_or_else(|| panic!("message failed to match pattern {pattern}: {message}"));
-        let body = captures
-            .name("body")
-            .expect("missing body capture")
-            .as_str();
-        assert!(
-            body.len() <= MODEL_FORMAT_MAX_BYTES,
-            "body exceeds byte limit: {} bytes",
-            body.len()
-        );
-    }
-
-    fn truncated_message_pattern(line: &str, total_lines: usize) -> String {
-        let head_take = MODEL_FORMAT_HEAD_LINES.min(total_lines);
-        let tail_take = MODEL_FORMAT_TAIL_LINES.min(total_lines.saturating_sub(head_take));
-        let omitted = total_lines.saturating_sub(head_take + tail_take);
-        let escaped_line = regex_lite::escape(line);
-        if omitted == 0 {
-            return format!(
-                r"(?s)^Total output lines: {total_lines}\n\n(?P<body>{escaped_line}.*\n\[\.{{3}} output truncated to fit {MODEL_FORMAT_MAX_BYTES} bytes \.{{3}}]\n\n.*)$",
-            );
-        }
-        format!(
-            r"(?s)^Total output lines: {total_lines}\n\n(?P<body>{escaped_line}.*\n\[\.{{3}} omitted {omitted} of {total_lines} lines \.{{3}}]\n\n.*)$",
        )
-    }
+    } else {
+        content.to_string()
+    };

-    #[test]
-    fn truncate_formatted_exec_output_truncates_large_error() {
-        let line = "very long execution error line that should trigger truncation\n";
-        let large_error = line.repeat(2_500); // way beyond both byte and line limits
-
-        let truncated = format_exec_output(&large_error);
-
-        let total_lines = large_error.lines().count();
-        assert_truncated_message_matches(&truncated, line, total_lines);
-        assert_ne!(truncated, large_error);
-    }
-
-    #[test]
-    fn truncate_function_error_trims_respond_to_model() {
-        let line = "respond-to-model error that should be truncated\n";
-        let huge = line.repeat(3_000);
-        let total_lines = huge.lines().count();
-
-        let err = truncate_function_error(FunctionCallError::RespondToModel(huge));
-        match err {
-            FunctionCallError::RespondToModel(message) => {
-                assert_truncated_message_matches(&message, line, total_lines);
-            }
-            other => panic!("unexpected error variant: {other:?}"),
-        }
-    }
-
-    #[test]
-    fn truncate_function_error_trims_fatal() {
-        let line = "fatal error output that should be truncated\n";
-        let huge = line.repeat(3_000);
-        let total_lines = huge.lines().count();
-
-        let err = truncate_function_error(FunctionCallError::Fatal(huge));
-        match err {
-            FunctionCallError::Fatal(message) => {
-                assert_truncated_message_matches(&message, line, total_lines);
-            }
-            other => panic!("unexpected error variant: {other:?}"),
-        }
-    }
-
-    #[test]
-    fn truncate_formatted_exec_output_marks_byte_truncation_without_omitted_lines() {
-        let long_line = "a".repeat(MODEL_FORMAT_MAX_BYTES + 50);
-        let truncated = format_exec_output(&long_line);
-
-        assert_ne!(truncated, long_line);
-        let marker_line =
-            format!("[... output truncated to fit {MODEL_FORMAT_MAX_BYTES} bytes ...]");
-        assert!(
-            truncated.contains(&marker_line),
-            "missing byte truncation marker: {truncated}"
-        );
-        assert!(
-            !truncated.contains("omitted"),
-            "line omission marker should not appear when no lines were dropped: {truncated}"
-        );
-    }
-
-    #[test]
-    fn truncate_formatted_exec_output_returns_original_when_within_limits() {
-        let content = "example output\n".repeat(10);
-
-        assert_eq!(format_exec_output(&content), content);
-    }
-
-    #[test]
-    fn truncate_formatted_exec_output_reports_omitted_lines_and_keeps_head_and_tail() {
-        let total_lines = MODEL_FORMAT_MAX_LINES + 100;
-        let content: String = (0..total_lines)
-            .map(|idx| format!("line-{idx}\n"))
-            .collect();
-
-        let truncated = format_exec_output(&content);
-        let omitted = total_lines - MODEL_FORMAT_MAX_LINES;
-        let expected_marker = format!("[... omitted {omitted} of {total_lines} lines ...]");
-
-        assert!(
-            truncated.contains(&expected_marker),
-            "missing omitted marker: {truncated}"
-        );
-        assert!(
-            truncated.contains("line-0\n"),
-            "expected head line to remain: {truncated}"
-        );
-
-        let last_line = format!("line-{}\n", total_lines - 1);
-        assert!(
-            truncated.contains(&last_line),
-            "expected tail line to remain: {truncated}"
-        );
-    }
-
-    #[test]
-    fn truncate_formatted_exec_output_prefers_line_marker_when_both_limits_exceeded() {
-        let total_lines = MODEL_FORMAT_MAX_LINES + 42;
-        let long_line = "x".repeat(256);
-        let content: String = (0..total_lines)
-            .map(|idx| format!("line-{idx}-{long_line}\n"))
-            .collect();
-
-        let truncated = format_exec_output(&content);
-
-        assert!(
-            truncated.contains("[... omitted 42 of 298 lines ...]"),
-            "expected omitted marker when line count exceeds limit: {truncated}"
-        );
-        assert!(
-            !truncated.contains("output truncated to fit"),
-            "line omission marker should take precedence over byte marker: {truncated}"
-        );
-    }
+    // Truncate for model consumption before serialization.
+    format_output_for_model_body(&body)
 }
--- a/codex-rs/core/src/tools/orchestrator.rs
+++ b/codex-rs/core/src/tools/orchestrator.rs
@@ -7,9 +7,11 @@ retry without sandbox on denial (no re‑approval thanks to caching).
 */
 use crate::error::CodexErr;
 use crate::error::SandboxErr;
+use crate::error::get_error_message_ui;
 use crate::exec::ExecToolCallOutput;
 use crate::sandboxing::SandboxManager;
 use crate::tools::sandboxing::ApprovalCtx;
+use crate::tools::sandboxing::ProvidesSandboxRetryData;
 use crate::tools::sandboxing::SandboxAttempt;
 use crate::tools::sandboxing::ToolCtx;
 use crate::tools::sandboxing::ToolError;
@@ -38,6 +40,7 @@ impl ToolOrchestrator {
    ) -> Result<Out, ToolError>
    where
        T: ToolRuntime<Rq, Out>,
+        Rq: ProvidesSandboxRetryData,
    {
        let otel = turn_ctx.client.get_otel_event_manager();
        let otel_tn = &tool_ctx.tool_name;
@@ -56,6 +59,7 @@ impl ToolOrchestrator {
                turn: turn_ctx,
                call_id: &tool_ctx.call_id,
                retry_reason: None,
+                risk: None,
            };
            let decision = tool.start_approval_async(req, approval_ctx).await;

@@ -107,12 +111,33 @@ impl ToolOrchestrator {

                // Ask for approval before retrying without sandbox.
                if !tool.should_bypass_approval(approval_policy, already_approved) {
+                    let mut risk = None;
+
+                    if let Some(metadata) = req.sandbox_retry_data() {
+                        let err = SandboxErr::Denied {
+                            output: output.clone(),
+                        };
+                        let friendly = get_error_message_ui(&CodexErr::Sandbox(err));
+                        let failure_summary = format!("failed in sandbox: {friendly}");
+
+                        risk = tool_ctx
+                            .session
+                            .assess_sandbox_command(
+                                turn_ctx,
+                                &tool_ctx.call_id,
+                                &metadata.command,
+                                Some(failure_summary.as_str()),
+                            )
+                            .await;
+                    }
+
                    let reason_msg = build_denial_reason_from_output(output.as_ref());
                    let approval_ctx = ApprovalCtx {
                        session: tool_ctx.session,
                        turn: turn_ctx,
                        call_id: &tool_ctx.call_id,
                        retry_reason: Some(reason_msg),
+                        risk,
                    };

                    let decision = tool.start_approval_async(req, approval_ctx).await;
--- a/codex-rs/core/src/tools/parallel.rs
+++ b/codex-rs/core/src/tools/parallel.rs
@@ -15,6 +15,7 @@ use crate::tools::router::ToolCall;
 use crate::tools::router::ToolRouter;
 use codex_protocol::models::FunctionCallOutputPayload;
 use codex_protocol::models::ResponseInputItem;
+use codex_utils_readiness::Readiness;

 pub(crate) struct ToolCallRuntime {
    router: Arc<ToolRouter>,
@@ -53,12 +54,16 @@ impl ToolCallRuntime {
        let tracker = Arc::clone(&self.tracker);
        let lock = Arc::clone(&self.parallel_execution);
        let aborted_response = Self::aborted_response(&call);
+        let readiness = self.turn_context.tool_call_gate.clone();

        let handle: AbortOnDropHandle<Result<ResponseInputItem, FunctionCallError>> =
            AbortOnDropHandle::new(tokio::spawn(async move {
                tokio::select! {
                    _ = cancellation_token.cancelled() => Ok(aborted_response),
                    res = async {
+                        tracing::info!("waiting for tool gate");
+                        readiness.wait_ready().await;
+                        tracing::info!("tool gate released");
                        let _guard = if supports_parallel {
                            Either::Left(lock.read().await)
                        } else {
@@ -100,7 +105,7 @@ impl ToolCallRuntime {
                call_id: call.call_id.clone(),
                output: FunctionCallOutputPayload {
                    content: "aborted".to_string(),
-                    success: None,
+                    ..Default::default()
                },
            },
        }
--- a/codex-rs/core/src/tools/router.rs
+++ b/codex-rs/core/src/tools/router.rs
@@ -181,6 +181,7 @@ impl ToolRouter {
                output: codex_protocol::models::FunctionCallOutputPayload {
                    content: message,
                    success: Some(false),
+                    ..Default::default()
                },
            }
        }
--- a/codex-rs/core/src/tools/runtimes/apply_patch.rs
+++ b/codex-rs/core/src/tools/runtimes/apply_patch.rs
@@ -10,7 +10,9 @@ use crate::sandboxing::CommandSpec;
 use crate::sandboxing::execute_env;
 use crate::tools::sandboxing::Approvable;
 use crate::tools::sandboxing::ApprovalCtx;
+use crate::tools::sandboxing::ProvidesSandboxRetryData;
 use crate::tools::sandboxing::SandboxAttempt;
+use crate::tools::sandboxing::SandboxRetryData;
 use crate::tools::sandboxing::Sandboxable;
 use crate::tools::sandboxing::SandboxablePreference;
 use crate::tools::sandboxing::ToolCtx;
@@ -32,6 +34,12 @@ pub struct ApplyPatchRequest {
    pub codex_exe: Option<PathBuf>,
 }

+impl ProvidesSandboxRetryData for ApplyPatchRequest {
+    fn sandbox_retry_data(&self) -> Option<SandboxRetryData> {
+        None
+    }
+}
+
 #[derive(Default)]
 pub struct ApplyPatchRuntime;

@@ -106,9 +114,10 @@ impl Approvable<ApplyPatchRequest> for ApplyPatchRuntime {
        let call_id = ctx.call_id.to_string();
        let cwd = req.cwd.clone();
        let retry_reason = ctx.retry_reason.clone();
+        let risk = ctx.risk.clone();
        let user_explicitly_approved = req.user_explicitly_approved;
        Box::pin(async move {
-            with_cached_approval(&session.services, key, || async move {
+            with_cached_approval(&session.services, key, move || async move {
                if let Some(reason) = retry_reason {
                    session
                        .request_command_approval(
@@ -117,6 +126,7 @@ impl Approvable<ApplyPatchRequest> for ApplyPatchRuntime {
                            vec!["apply_patch".to_string()],
                            cwd,
                            Some(reason),
+                            risk,
                        )
                        .await
                } else if user_explicitly_approved {
--- a/codex-rs/core/src/tools/runtimes/shell.rs
+++ b/codex-rs/core/src/tools/runtimes/shell.rs
@@ -12,7 +12,9 @@ use crate::sandboxing::execute_env;
 use crate::tools::runtimes::build_command_spec;
 use crate::tools::sandboxing::Approvable;
 use crate::tools::sandboxing::ApprovalCtx;
+use crate::tools::sandboxing::ProvidesSandboxRetryData;
 use crate::tools::sandboxing::SandboxAttempt;
+use crate::tools::sandboxing::SandboxRetryData;
 use crate::tools::sandboxing::Sandboxable;
 use crate::tools::sandboxing::SandboxablePreference;
 use crate::tools::sandboxing::ToolCtx;
@@ -34,6 +36,15 @@ pub struct ShellRequest {
    pub justification: Option<String>,
 }

+impl ProvidesSandboxRetryData for ShellRequest {
+    fn sandbox_retry_data(&self) -> Option<SandboxRetryData> {
+        Some(SandboxRetryData {
+            command: self.command.clone(),
+            cwd: self.cwd.clone(),
+        })
+    }
+}
+
 #[derive(Default)]
 pub struct ShellRuntime;

@@ -90,13 +101,14 @@ impl Approvable<ShellRequest> for ShellRuntime {
            .retry_reason
            .clone()
            .or_else(|| req.justification.clone());
+        let risk = ctx.risk.clone();
        let session = ctx.session;
        let turn = ctx.turn;
        let call_id = ctx.call_id.to_string();
        Box::pin(async move {
-            with_cached_approval(&session.services, key, || async move {
+            with_cached_approval(&session.services, key, move || async move {
                session
-                    .request_command_approval(turn, call_id, command, cwd, reason)
+                    .request_command_approval(turn, call_id, command, cwd, reason, risk)
                    .await
            })
            .await
--- a/codex-rs/core/src/tools/runtimes/unified_exec.rs
+++ b/codex-rs/core/src/tools/runtimes/unified_exec.rs
@@ -9,7 +9,9 @@ use crate::error::SandboxErr;
 use crate::tools::runtimes::build_command_spec;
 use crate::tools::sandboxing::Approvable;
 use crate::tools::sandboxing::ApprovalCtx;
+use crate::tools::sandboxing::ProvidesSandboxRetryData;
 use crate::tools::sandboxing::SandboxAttempt;
+use crate::tools::sandboxing::SandboxRetryData;
 use crate::tools::sandboxing::Sandboxable;
 use crate::tools::sandboxing::SandboxablePreference;
 use crate::tools::sandboxing::ToolCtx;
@@ -31,6 +33,15 @@ pub struct UnifiedExecRequest {
    pub env: HashMap<String, String>,
 }

+impl ProvidesSandboxRetryData for UnifiedExecRequest {
+    fn sandbox_retry_data(&self) -> Option<SandboxRetryData> {
+        Some(SandboxRetryData {
+            command: self.command.clone(),
+            cwd: self.cwd.clone(),
+        })
+    }
+}
+
 #[derive(serde::Serialize, Clone, Debug, Eq, PartialEq, Hash)]
 pub struct UnifiedExecApprovalKey {
    pub command: Vec<String>,
@@ -85,10 +96,11 @@ impl Approvable<UnifiedExecRequest> for UnifiedExecRuntime<'_> {
        let command = req.command.clone();
        let cwd = req.cwd.clone();
        let reason = ctx.retry_reason.clone();
+        let risk = ctx.risk.clone();
        Box::pin(async move {
            with_cached_approval(&session.services, key, || async move {
                session
-                    .request_command_approval(turn, call_id, command, cwd, reason)
+                    .request_command_approval(turn, call_id, command, cwd, reason, risk)
                    .await
            })
            .await
--- a/codex-rs/core/src/tools/sandboxing.rs
+++ b/codex-rs/core/src/tools/sandboxing.rs
@@ -7,6 +7,7 @@
 use crate::codex::Session;
 use crate::codex::TurnContext;
 use crate::error::CodexErr;
+use crate::protocol::SandboxCommandAssessment;
 use crate::protocol::SandboxPolicy;
 use crate::sandboxing::CommandSpec;
 use crate::sandboxing::SandboxManager;
@@ -18,6 +19,7 @@ use std::collections::HashMap;
 use std::fmt::Debug;
 use std::hash::Hash;
 use std::path::Path;
+use std::path::PathBuf;

 use futures::Future;
 use futures::future::BoxFuture;
@@ -81,6 +83,7 @@ pub(crate) struct ApprovalCtx<'a> {
    pub turn: &'a TurnContext,
    pub call_id: &'a str,
    pub retry_reason: Option<String>,
+    pub risk: Option<SandboxCommandAssessment>,
 }

 pub(crate) trait Approvable<Req> {
@@ -156,6 +159,17 @@ pub(crate) struct ToolCtx<'a> {
    pub tool_name: String,
 }

+/// Captures the command metadata needed to re-run a tool request without sandboxing.
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub(crate) struct SandboxRetryData {
+    pub command: Vec<String>,
+    pub cwd: PathBuf,
+}
+
+pub(crate) trait ProvidesSandboxRetryData {
+    fn sandbox_retry_data(&self) -> Option<SandboxRetryData>;
+}
+
 #[derive(Debug)]
 pub(crate) enum ToolError {
    Rejected(String),
--- a/codex-rs/core/templates/sandboxing/assessment_prompt.md
+++ b/codex-rs/core/templates/sandboxing/assessment_prompt.md
@@ -0,0 +1,27 @@
+You are a security analyst evaluating shell commands that were blocked by a sandbox. Given the provided metadata, summarize the command's likely intent and assess the risk. Return strictly valid JSON with the keys:
+- description (concise summary, at most two sentences)
+- risk_level ("low", "medium", or "high")
+- risk_categories (optional array of zero or more category strings)
+Risk level examples:
+- low: read-only inspections, listing files, printing configuration
+- medium: modifying project files, installing dependencies, fetching artifacts from trusted sources
+- high: deleting or overwriting data, exfiltrating secrets, escalating privileges, or disabling security controls
+Recognized risk_categories: data_deletion, data_exfiltration, privilege_escalation, system_modification, network_access, resource_exhaustion, compliance.
+Use multiple categories when appropriate.
+If information is insufficient, choose the most cautious risk level supported by the evidence.
+Respond with JSON only, without markdown code fences or extra commentary.
+
+---
+
+Command metadata:
+Platform: {{ platform }}
+Sandbox policy: {{ sandbox_policy }}
+{% if let Some(roots) = filesystem_roots %}
+Filesystem roots: {{ roots }}
+{% endif %}
+Working directory: {{ working_directory }}
+Command argv: {{ command_argv }}
+Command (joined): {{ command_joined }}
+{% if let Some(message) = sandbox_failure_message %}
+Sandbox failure message: {{ message }}
+{% endif %}
--- a/codex-rs/core/tests/suite/cli_stream.rs
+++ b/codex-rs/core/tests/suite/cli_stream.rs
@@ -1,4 +1,5 @@
 use assert_cmd::Command as AssertCommand;
+use assert_cmd::cargo::cargo_bin;
 use codex_core::RolloutRecorder;
 use codex_core::protocol::GitInfo;
 use core_test_support::fs_wait;
@@ -44,13 +45,9 @@ async fn chat_mode_stream_cli() {
        "model_providers.mock={{ name = \"mock\", base_url = \"{}/v1\", env_key = \"PATH\", wire_api = \"chat\" }}",
        server.uri()
    );
-    let mut cmd = AssertCommand::new("cargo");
-    cmd.arg("run")
-        .arg("-p")
-        .arg("codex-cli")
-        .arg("--quiet")
-        .arg("--")
-        .arg("exec")
+    let bin = cargo_bin("codex");
+    let mut cmd = AssertCommand::new(bin);
+    cmd.arg("exec")
        .arg("--skip-git-repo-check")
        .arg("-c")
        .arg(&provider_override)
@@ -75,9 +72,17 @@ async fn chat_mode_stream_cli() {
    server.verify().await;

    // Verify a new session rollout was created and is discoverable via list_conversations
-    let page = RolloutRecorder::list_conversations(home.path(), 10, None, &[])
-        .await
-        .expect("list conversations");
+    let provider_filter = vec!["mock".to_string()];
+    let page = RolloutRecorder::list_conversations(
+        home.path(),
+        10,
+        None,
+        &[],
+        Some(provider_filter.as_slice()),
+        "mock",
+    )
+    .await
+    .expect("list conversations");
    assert!(
        !page.items.is_empty(),
        "expected at least one session to be listed"
@@ -128,13 +133,9 @@ async fn exec_cli_applies_experimental_instructions_file() {
    );

    let home = TempDir::new().unwrap();
-    let mut cmd = AssertCommand::new("cargo");
-    cmd.arg("run")
-        .arg("-p")
-        .arg("codex-cli")
-        .arg("--quiet")
-        .arg("--")
-        .arg("exec")
+    let bin = cargo_bin("codex");
+    let mut cmd = AssertCommand::new(bin);
+    cmd.arg("exec")
        .arg("--skip-git-repo-check")
        .arg("-c")
        .arg(&provider_override)
@@ -186,13 +187,9 @@ async fn responses_api_stream_cli() {
        std::path::Path::new(env!("CARGO_MANIFEST_DIR")).join("tests/cli_responses_fixture.sse");

    let home = TempDir::new().unwrap();
-    let mut cmd = AssertCommand::new("cargo");
-    cmd.arg("run")
-        .arg("-p")
-        .arg("codex-cli")
-        .arg("--quiet")
-        .arg("--")
-        .arg("exec")
+    let bin = cargo_bin("codex");
+    let mut cmd = AssertCommand::new(bin);
+    cmd.arg("exec")
        .arg("--skip-git-repo-check")
        .arg("-C")
        .arg(env!("CARGO_MANIFEST_DIR"))
@@ -225,15 +222,10 @@ async fn integration_creates_and_checks_session_file() -> anyhow::Result<()> {
    let fixture =
        std::path::Path::new(env!("CARGO_MANIFEST_DIR")).join("tests/cli_responses_fixture.sse");

-    // 4. Run the codex CLI through cargo (ensures the right bin is built) and invoke `exec`,
-    //    which is what records a session.
-    let mut cmd = AssertCommand::new("cargo");
-    cmd.arg("run")
-        .arg("-p")
-        .arg("codex-cli")
-        .arg("--quiet")
-        .arg("--")
-        .arg("exec")
+    // 4. Run the codex CLI and invoke `exec`, which is what records a session.
+    let bin = cargo_bin("codex");
+    let mut cmd = AssertCommand::new(bin);
+    cmd.arg("exec")
        .arg("--skip-git-repo-check")
        .arg("-C")
        .arg(env!("CARGO_MANIFEST_DIR"))
@@ -352,13 +344,9 @@ async fn integration_creates_and_checks_session_file() -> anyhow::Result<()> {
    // Second run: resume should update the existing file.
    let marker2 = format!("integration-resume-{}", Uuid::new_v4());
    let prompt2 = format!("echo {marker2}");
-    let mut cmd2 = AssertCommand::new("cargo");
-    cmd2.arg("run")
-        .arg("-p")
-        .arg("codex-cli")
-        .arg("--quiet")
-        .arg("--")
-        .arg("exec")
+    let bin2 = cargo_bin("codex");
+    let mut cmd2 = AssertCommand::new(bin2);
+    cmd2.arg("exec")
        .arg("--skip-git-repo-check")
        .arg("-C")
        .arg(env!("CARGO_MANIFEST_DIR"))
--- a/codex-rs/core/tests/suite/client.rs
+++ b/codex-rs/core/tests/suite/client.rs
@@ -12,6 +12,7 @@ use codex_core::Prompt;
 use codex_core::ResponseEvent;
 use codex_core::ResponseItem;
 use codex_core::WireApi;
+use codex_core::auth::AuthCredentialsStoreMode;
 use codex_core::built_in_model_providers;
 use codex_core::error::CodexErr;
 use codex_core::model_family::find_family_for_model;
@@ -154,7 +155,8 @@ async fn resume_includes_initial_messages_and_sends_prior_items() {
                "instructions": "be nice",
                "cwd": ".",
                "originator": "test_originator",
-                "cli_version": "test_version"
+                "cli_version": "test_version",
+                "model_provider": "test-provider"
            }
        })
    )
@@ -524,11 +526,12 @@ async fn prefers_apikey_when_config_prefers_apikey_even_with_chatgpt_tokens() {
    let mut config = load_default_config_for_test(&codex_home);
    config.model_provider = model_provider;

-    let auth_manager = match CodexAuth::from_codex_home(codex_home.path()) {
-        Ok(Some(auth)) => codex_core::AuthManager::from_auth_for_testing(auth),
-        Ok(None) => panic!("No CodexAuth found in codex_home"),
-        Err(e) => panic!("Failed to load CodexAuth: {e}"),
-    };
+    let auth_manager =
+        match CodexAuth::from_auth_storage(codex_home.path(), AuthCredentialsStoreMode::File) {
+            Ok(Some(auth)) => codex_core::AuthManager::from_auth_for_testing(auth),
+            Ok(None) => panic!("No CodexAuth found in codex_home"),
+            Err(e) => panic!("Failed to load CodexAuth: {e}"),
+        };
    let conversation_manager = ConversationManager::new(auth_manager, SessionSource::Exec);
    let NewConversation {
        conversation: codex,
--- a/codex-rs/core/tests/suite/compact_resume_fork.rs
+++ b/codex-rs/core/tests/suite/compact_resume_fork.rs
@@ -18,7 +18,6 @@ use codex_core::built_in_model_providers;
 use codex_core::codex::compact::SUMMARIZATION_PROMPT;
 use codex_core::config::Config;
 use codex_core::config::OPENAI_DEFAULT_MODEL;
-use codex_core::protocol::ConversationPathResponseEvent;
 use codex_core::protocol::EventMsg;
 use codex_core::protocol::Op;
 use codex_core::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR;
@@ -42,6 +41,29 @@ fn network_disabled() -> bool {
    std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok()
 }

+fn filter_out_ghost_snapshot_entries(items: &[Value]) -> Vec<Value> {
+    items
+        .iter()
+        .filter(|item| !is_ghost_snapshot_message(item))
+        .cloned()
+        .collect()
+}
+
+fn is_ghost_snapshot_message(item: &Value) -> bool {
+    if item.get("type").and_then(Value::as_str) != Some("message") {
+        return false;
+    }
+    if item.get("role").and_then(Value::as_str) != Some("user") {
+        return false;
+    }
+    item.get("content")
+        .and_then(Value::as_array)
+        .and_then(|content| content.first())
+        .and_then(|entry| entry.get("text"))
+        .and_then(Value::as_str)
+        .is_some_and(|text| text.trim_start().starts_with("<ghost_snapshot>"))
+}
+
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 /// Scenario: compact an initial conversation, resume it, fork one turn back, and
 /// ensure the model-visible history matches expectations at each request.
@@ -61,7 +83,7 @@ async fn compact_resume_and_fork_preserve_model_history_view() {
    user_turn(&base, "hello world").await;
    compact_conversation(&base).await;
    user_turn(&base, "AFTER_COMPACT").await;
-    let base_path = fetch_conversation_path(&base, "base conversation").await;
+    let base_path = fetch_conversation_path(&base).await;
    assert!(
        base_path.exists(),
        "compact+resume test expects base path {base_path:?} to exist",
@@ -69,7 +91,7 @@ async fn compact_resume_and_fork_preserve_model_history_view() {

    let resumed = resume_conversation(&manager, &config, base_path).await;
    user_turn(&resumed, "AFTER_RESUME").await;
-    let resumed_path = fetch_conversation_path(&resumed, "resumed conversation").await;
+    let resumed_path = fetch_conversation_path(&resumed).await;
    assert!(
        resumed_path.exists(),
        "compact+resume test expects resumed path {resumed_path:?} to exist",
@@ -518,7 +540,7 @@ async fn compact_resume_after_second_compaction_preserves_history() {
    user_turn(&base, "hello world").await;
    compact_conversation(&base).await;
    user_turn(&base, "AFTER_COMPACT").await;
-    let base_path = fetch_conversation_path(&base, "base conversation").await;
+    let base_path = fetch_conversation_path(&base).await;
    assert!(
        base_path.exists(),
        "second compact test expects base path {base_path:?} to exist",
@@ -526,7 +548,7 @@ async fn compact_resume_after_second_compaction_preserves_history() {

    let resumed = resume_conversation(&manager, &config, base_path).await;
    user_turn(&resumed, "AFTER_RESUME").await;
-    let resumed_path = fetch_conversation_path(&resumed, "resumed conversation").await;
+    let resumed_path = fetch_conversation_path(&resumed).await;
    assert!(
        resumed_path.exists(),
        "second compact test expects resumed path {resumed_path:?} to exist",
@@ -537,7 +559,7 @@ async fn compact_resume_after_second_compaction_preserves_history() {

    compact_conversation(&forked).await;
    user_turn(&forked, "AFTER_COMPACT_2").await;
-    let forked_path = fetch_conversation_path(&forked, "forked conversation").await;
+    let forked_path = fetch_conversation_path(&forked).await;
    assert!(
        forked_path.exists(),
        "second compact test expects forked path {forked_path:?} to exist",
@@ -557,13 +579,15 @@ async fn compact_resume_after_second_compaction_preserves_history() {
    let resume_input_array = input_after_resume
        .as_array()
        .expect("input after resume should be an array");
+    let compact_filtered = filter_out_ghost_snapshot_entries(compact_input_array);
+    let resume_filtered = filter_out_ghost_snapshot_entries(resume_input_array);
    assert!(
-        compact_input_array.len() <= resume_input_array.len(),
+        compact_filtered.len() <= resume_filtered.len(),
        "after-resume input should have at least as many items as after-compact"
    );
    assert_eq!(
-        compact_input_array.as_slice(),
-        &resume_input_array[..compact_input_array.len()]
+        compact_filtered.as_slice(),
+        &resume_filtered[..compact_filtered.len()]
    );
    // hard coded test
    let prompt = requests[0]["instructions"]
@@ -792,22 +816,8 @@ async fn compact_conversation(conversation: &Arc<CodexConversation>) {
    wait_for_event(conversation, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
 }

-async fn fetch_conversation_path(
-    conversation: &Arc<CodexConversation>,
-    context: &str,
-) -> std::path::PathBuf {
-    conversation
-        .submit(Op::GetPath)
-        .await
-        .expect("request conversation path");
-    match wait_for_event(conversation, |ev| {
-        matches!(ev, EventMsg::ConversationPath(_))
-    })
-    .await
-    {
-        EventMsg::ConversationPath(ConversationPathResponseEvent { path, .. }) => path,
-        _ => panic!("expected ConversationPath event for {context}"),
-    }
+async fn fetch_conversation_path(conversation: &Arc<CodexConversation>) -> std::path::PathBuf {
+    conversation.rollout_path()
 }

 async fn resume_conversation(
--- a/codex-rs/core/tests/suite/fork_conversation.rs
+++ b/codex-rs/core/tests/suite/fork_conversation.rs
@@ -4,7 +4,6 @@ use codex_core::ModelProviderInfo;
 use codex_core::NewConversation;
 use codex_core::built_in_model_providers;
 use codex_core::parse_turn_item;
-use codex_core::protocol::ConversationPathResponseEvent;
 use codex_core::protocol::EventMsg;
 use codex_core::protocol::Op;
 use codex_core::protocol::RolloutItem;
@@ -79,13 +78,7 @@ async fn fork_conversation_twice_drops_to_first_message() {
    }

    // Request history from the base conversation to obtain rollout path.
-    codex.submit(Op::GetPath).await.unwrap();
-    let base_history =
-        wait_for_event(&codex, |ev| matches!(ev, EventMsg::ConversationPath(_))).await;
-    let base_path = match &base_history {
-        EventMsg::ConversationPath(ConversationPathResponseEvent { path, .. }) => path.clone(),
-        _ => panic!("expected ConversationHistory event"),
-    };
+    let base_path = codex.rollout_path();

    // GetHistory flushes before returning the path; no wait needed.

@@ -140,15 +133,7 @@ async fn fork_conversation_twice_drops_to_first_message() {
        .await
        .expect("fork 1");

-    codex_fork1.submit(Op::GetPath).await.unwrap();
-    let fork1_history = wait_for_event(&codex_fork1, |ev| {
-        matches!(ev, EventMsg::ConversationPath(_))
-    })
-    .await;
-    let fork1_path = match &fork1_history {
-        EventMsg::ConversationPath(ConversationPathResponseEvent { path, .. }) => path.clone(),
-        _ => panic!("expected ConversationHistory event after first fork"),
-    };
+    let fork1_path = codex_fork1.rollout_path();

    // GetHistory on fork1 flushed; the file is ready.
    let fork1_items = read_items(&fork1_path);
@@ -166,15 +151,7 @@ async fn fork_conversation_twice_drops_to_first_message() {
        .await
        .expect("fork 2");

-    codex_fork2.submit(Op::GetPath).await.unwrap();
-    let fork2_history = wait_for_event(&codex_fork2, |ev| {
-        matches!(ev, EventMsg::ConversationPath(_))
-    })
-    .await;
-    let fork2_path = match &fork2_history {
-        EventMsg::ConversationPath(ConversationPathResponseEvent { path, .. }) => path.clone(),
-        _ => panic!("expected ConversationHistory event after second fork"),
-    };
+    let fork2_path = codex_fork2.rollout_path();
    // GetHistory on fork2 flushed; the file is ready.
    let fork1_items = read_items(&fork1_path);
    let fork1_user_inputs = find_user_input_positions(&fork1_items);
--- a/codex-rs/core/tests/suite/mod.rs
+++ b/codex-rs/core/tests/suite/mod.rs
@@ -33,6 +33,7 @@ mod stream_no_completed;
 mod tool_harness;
 mod tool_parallelism;
 mod tools;
+mod truncation;
 mod unified_exec;
 mod user_notification;
 mod view_image;
--- a/codex-rs/core/tests/suite/review.rs
+++ b/codex-rs/core/tests/suite/review.rs
@@ -7,7 +7,6 @@ use codex_core::REVIEW_PROMPT;
 use codex_core::ResponseItem;
 use codex_core::built_in_model_providers;
 use codex_core::config::Config;
-use codex_core::protocol::ConversationPathResponseEvent;
 use codex_core::protocol::ENVIRONMENT_CONTEXT_OPEN_TAG;
 use codex_core::protocol::EventMsg;
 use codex_core::protocol::ExitedReviewModeEvent;
@@ -120,13 +119,7 @@ async fn review_op_emits_lifecycle_and_review_output() {

    // Also verify that a user message with the header and a formatted finding
    // was recorded back in the parent session's rollout.
-    codex.submit(Op::GetPath).await.unwrap();
-    let history_event =
-        wait_for_event(&codex, |ev| matches!(ev, EventMsg::ConversationPath(_))).await;
-    let path = match history_event {
-        EventMsg::ConversationPath(ConversationPathResponseEvent { path, .. }) => path,
-        other => panic!("expected ConversationPath event, got {other:?}"),
-    };
+    let path = codex.rollout_path();
    let text = std::fs::read_to_string(&path).expect("read rollout file");

    let mut saw_header = false;
@@ -375,7 +368,8 @@ async fn review_input_isolated_from_parent_history() {
                "instructions": null,
                "cwd": ".",
                "originator": "test_originator",
-                "cli_version": "test_version"
+                "cli_version": "test_version",
+                "model_provider": "test-provider"
            }
        });
        f.write_all(format!("{meta_line}\n").as_bytes())
@@ -482,13 +476,7 @@ async fn review_input_isolated_from_parent_history() {
    assert_eq!(instructions, REVIEW_PROMPT);

    // Also verify that a user interruption note was recorded in the rollout.
-    codex.submit(Op::GetPath).await.unwrap();
-    let history_event =
-        wait_for_event(&codex, |ev| matches!(ev, EventMsg::ConversationPath(_))).await;
-    let path = match history_event {
-        EventMsg::ConversationPath(ConversationPathResponseEvent { path, .. }) => path,
-        other => panic!("expected ConversationPath event, got {other:?}"),
-    };
+    let path = codex.rollout_path();
    let text = std::fs::read_to_string(&path).expect("read rollout file");
    let mut saw_interruption_message = false;
    for line in text.lines() {
--- a/codex-rs/core/tests/suite/rmcp_client.rs
+++ b/codex-rs/core/tests/suite/rmcp_client.rs
@@ -14,6 +14,8 @@ use codex_core::features::Feature;

 use codex_core::protocol::AskForApproval;
 use codex_core::protocol::EventMsg;
+use codex_core::protocol::McpInvocation;
+use codex_core::protocol::McpToolCallBeginEvent;
 use codex_core::protocol::Op;
 use codex_core::protocol::SandboxPolicy;
 use codex_protocol::config_types::ReasoningSummary;
@@ -25,7 +27,9 @@ use core_test_support::test_codex::test_codex;
 use core_test_support::wait_for_event;
 use core_test_support::wait_for_event_with_timeout;
 use escargot::CargoBuild;
+use mcp_types::ContentBlock;
 use serde_json::Value;
+use serde_json::json;
 use serial_test::serial;
 use tempfile::tempdir;
 use tokio::net::TcpStream;
@@ -35,6 +39,8 @@ use tokio::time::Instant;
 use tokio::time::sleep;
 use wiremock::matchers::any;

+static OPENAI_PNG: &str = "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAD0AAAA9CAYAAAAeYmHpAAAE6klEQVR4Aeyau44UVxCGx1fZsmRLlm3Zoe0XcGQ5cUiCCIgJeS9CHgAhMkISQnIuGQgJEkBcxLW+nqnZ6uqqc+nuWRC7q/P3qetf9e+MtOwyX25O4Nep6JPyop++0qev9HrfgZ+F6r2DuB/vHOrt/UIkqdDHYvujOW6fO7h/CNEI+a5jc+pBR8uy0jVFsziYu5HtfSUk+Io34q921hLNctFSX0gwww+S8wce8K1LfCU+cYW4888aov8NxqvQILUPPReLOrm6zyLxa4i+6VZuFbJo8d1MOHZm+7VUtB/aIvhPWc/3SWg49JcwFLlHxuXKjtyloo+YNhuW3VS+WPBuUEMvCFKjEDVgFBQHXrnazpqiSxNZCkQ1kYiozsbm9Oz7l4i2Il7vGccGNWAc3XosDrZe/9P3ZnMmzHNEQw4smf8RQ87XEAMsC7Az0Au+dgXerfH4+sHvEc0SYGic8WBBUGqFH2gN7yDrazy7m2pbRTeRmU3+MjZmr1h6LJgPbGy23SI6GlYT0brQ71IY8Us4PNQCm+zepSbaD2BY9xCaAsD9IIj/IzFmKMSdHHonwdZATbTnYREf6/VZGER98N9yCWIvXQwXDoDdhZJoT8jwLnJXDB9w4Sb3e6nK5ndzlkTLnP3JBu4LKkbrYrU69gCVceV0JvpyuW1xlsUVngzhwMetn/XamtTORF9IO5YnWNiyeF9zCAfqR3fUW+vZZKLtgP+ts8BmQRBREAdRDhH3o8QuRh/YucNFz2BEjxbRN6LGzphfKmvP6v6QhqIQyZ8XNJ0W0X83MR1PEcJBNO2KC2Z1TW/v244scp9FwRViZxIOBF0Lctk7ZVSavdLvRlV1hz/ysUi9sr8CIcB3nvWBwA93ykTz18eAYxQ6N/K2DkPA1lv3iXCwmDUT7YkjIby9siXueIJj9H+pzSqJ9oIuJWTUgSSt4WO7o/9GGg0viR4VinNRUDoIj34xoCd6pxD3aK3zfdbnx5v1J3ZNNEJsE0sBG7N27ReDrJc4sFxz7dI/ZAbOmmiKvHBitQXpAdR6+F7v+/ol/tOouUV01EeMZQF2BoQDn6dP4XNr+j9GZEtEK1/L8pFw7bd3a53tsTa7WD+054jOFmPg1XBKPQgnqFfmFcy32ZRvjmiIIQTYFvyDxQ8nH8WIwwGwlyDjDznnilYyFr6njrlZwsKkBpO59A7OwgdzPEWRm+G+oeb7IfyNuzjEEVLrOVxJsxvxwF8kmCM6I2QYmJunz4u4TrADpfl7mlbRTWQ7VmrBzh3+C9f6Grc3YoGN9dg/SXFthpRsT6vobfXRs2VBlgBHXVMLHjDNbIZv1sZ9+X3hB09cXdH1JKViyG0+W9bWZDa/r2f9zAFR71sTzGpMSWz2iI4YssWjWo3REy1MDGjdwe5e0dFSiAC1JakBvu4/CUS8Eh6dqHdU0Or0ioY3W5ClSqDXAy7/6SRfgw8vt4I+tbvvNtFT2kVDhY5+IGb1rCqYaXNF08vSALsXCPmt0kQNqJT1p5eI1mkIV/BxCY1z85lOzeFbPBQHURkkPTlwTYK9gTVE25l84IbFFN+YJDHjdpn0gq6mrHht0dkcjbM4UL9283O5p77GN+SPW/QwVB4IUYg7Or+Kp7naR6qktP98LNF2UxWo9yObPIT9KYg+hK4i56no4rfnM0qeyFf6AwAAAP//trwR3wAAAAZJREFUAwBZ0sR75itw5gAAAABJRU5ErkJggg==";
+
 #[tokio::test(flavor = "multi_thread", worker_threads = 1)]
 #[serial(mcp_test_value)]
 async fn stdio_server_round_trip() -> anyhow::Result<()> {
@@ -175,6 +181,352 @@ async fn stdio_server_round_trip() -> anyhow::Result<()> {
    Ok(())
 }

+#[tokio::test(flavor = "multi_thread", worker_threads = 1)]
+#[serial(mcp_test_value)]
+async fn stdio_image_responses_round_trip() -> anyhow::Result<()> {
+    skip_if_no_network!(Ok(()));
+
+    let server = responses::start_mock_server().await;
+
+    let call_id = "img-1";
+    let server_name = "rmcp";
+    let tool_name = format!("mcp__{server_name}__image");
+
+    // First stream: model decides to call the image tool.
+    mount_sse_once_match(
+        &server,
+        any(),
+        responses::sse(vec![
+            responses::ev_response_created("resp-1"),
+            responses::ev_function_call(call_id, &tool_name, "{}"),
+            responses::ev_completed("resp-1"),
+        ]),
+    )
+    .await;
+    // Second stream: after tool execution, assistant emits a message and completes.
+    let final_mock = mount_sse_once_match(
+        &server,
+        any(),
+        responses::sse(vec![
+            responses::ev_assistant_message("msg-1", "rmcp image tool completed successfully."),
+            responses::ev_completed("resp-2"),
+        ]),
+    )
+    .await;
+
+    // Build the stdio rmcp server and pass the image as data URL so it can construct ImageContent.
+    let rmcp_test_server_bin = CargoBuild::new()
+        .package("codex-rmcp-client")
+        .bin("test_stdio_server")
+        .run()?
+        .path()
+        .to_string_lossy()
+        .into_owned();
+
+    let fixture = test_codex()
+        .with_config(move |config| {
+            config.features.enable(Feature::RmcpClient);
+            config.mcp_servers.insert(
+                server_name.to_string(),
+                McpServerConfig {
+                    transport: McpServerTransportConfig::Stdio {
+                        command: rmcp_test_server_bin,
+                        args: Vec::new(),
+                        env: Some(HashMap::from([(
+                            "MCP_TEST_IMAGE_DATA_URL".to_string(),
+                            OPENAI_PNG.to_string(),
+                        )])),
+                        env_vars: Vec::new(),
+                        cwd: None,
+                    },
+                    enabled: true,
+                    startup_timeout_sec: Some(Duration::from_secs(10)),
+                    tool_timeout_sec: None,
+                    enabled_tools: None,
+                    disabled_tools: None,
+                },
+            );
+        })
+        .build(&server)
+        .await?;
+    let session_model = fixture.session_configured.model.clone();
+
+    fixture
+        .codex
+        .submit(Op::UserTurn {
+            items: vec![UserInput::Text {
+                text: "call the rmcp image tool".into(),
+            }],
+            final_output_json_schema: None,
+            cwd: fixture.cwd.path().to_path_buf(),
+            approval_policy: AskForApproval::Never,
+            sandbox_policy: SandboxPolicy::ReadOnly,
+            model: session_model,
+            effort: None,
+            summary: ReasoningSummary::Auto,
+        })
+        .await?;
+
+    // Wait for tool begin/end and final completion.
+    let begin_event = wait_for_event_with_timeout(
+        &fixture.codex,
+        |ev| matches!(ev, EventMsg::McpToolCallBegin(_)),
+        Duration::from_secs(10),
+    )
+    .await;
+    let EventMsg::McpToolCallBegin(begin) = begin_event else {
+        unreachable!("begin");
+    };
+    assert_eq!(
+        begin,
+        McpToolCallBeginEvent {
+            call_id: call_id.to_string(),
+            invocation: McpInvocation {
+                server: server_name.to_string(),
+                tool: "image".to_string(),
+                arguments: Some(json!({})),
+            },
+        },
+    );
+
+    let end_event = wait_for_event(&fixture.codex, |ev| {
+        matches!(ev, EventMsg::McpToolCallEnd(_))
+    })
+    .await;
+    let EventMsg::McpToolCallEnd(end) = end_event else {
+        unreachable!("end");
+    };
+    assert_eq!(end.call_id, call_id);
+    assert_eq!(
+        end.invocation,
+        McpInvocation {
+            server: server_name.to_string(),
+            tool: "image".to_string(),
+            arguments: Some(json!({})),
+        }
+    );
+    let result = end.result.expect("rmcp image tool should return success");
+    assert_eq!(result.is_error, Some(false));
+    assert_eq!(result.content.len(), 1);
+    let base64_only = OPENAI_PNG
+        .strip_prefix("data:image/png;base64,")
+        .expect("data url prefix");
+    match &result.content[0] {
+        ContentBlock::ImageContent(img) => {
+            assert_eq!(img.mime_type, "image/png");
+            assert_eq!(img.r#type, "image");
+            assert_eq!(img.data, base64_only);
+        }
+        other => panic!("expected image content, got {other:?}"),
+    }
+
+    wait_for_event(&fixture.codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
+
+    let output_item = final_mock.single_request().function_call_output(call_id);
+    assert_eq!(
+        output_item,
+        json!({
+            "type": "function_call_output",
+            "call_id": call_id,
+            "output": [{
+                "type": "input_image",
+                "image_url": OPENAI_PNG
+            }]
+        })
+    );
+    server.verify().await;
+    Ok(())
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 1)]
+#[serial(mcp_test_value)]
+async fn stdio_image_completions_round_trip() -> anyhow::Result<()> {
+    skip_if_no_network!(Ok(()));
+
+    let server = responses::start_mock_server().await;
+
+    let call_id = "img-cc-1";
+    let server_name = "rmcp";
+    let tool_name = format!("mcp__{server_name}__image");
+
+    let tool_call = json!({
+        "choices": [
+            {
+                "delta": {
+                    "tool_calls": [
+                        {
+                            "id": call_id,
+                            "type": "function",
+                            "function": {"name": tool_name, "arguments": "{}"}
+                        }
+                    ]
+                },
+                "finish_reason": "tool_calls"
+            }
+        ]
+    });
+    let sse_tool_call = format!(
+        "data: {}\n\ndata: [DONE]\n\n",
+        serde_json::to_string(&tool_call)?
+    );
+
+    let final_assistant = json!({
+        "choices": [
+            {
+                "delta": {"content": "rmcp image tool completed successfully."},
+                "finish_reason": "stop"
+            }
+        ]
+    });
+    let sse_final = format!(
+        "data: {}\n\ndata: [DONE]\n\n",
+        serde_json::to_string(&final_assistant)?
+    );
+
+    use std::sync::atomic::AtomicUsize;
+    use std::sync::atomic::Ordering;
+    struct ChatSeqResponder {
+        num_calls: AtomicUsize,
+        bodies: Vec<String>,
+    }
+    impl wiremock::Respond for ChatSeqResponder {
+        fn respond(&self, _: &wiremock::Request) -> wiremock::ResponseTemplate {
+            let idx = self.num_calls.fetch_add(1, Ordering::SeqCst);
+            match self.bodies.get(idx) {
+                Some(body) => wiremock::ResponseTemplate::new(200)
+                    .insert_header("content-type", "text/event-stream")
+                    .set_body_string(body.clone()),
+                None => panic!("no chat completion response for index {idx}"),
+            }
+        }
+    }
+
+    let chat_seq = ChatSeqResponder {
+        num_calls: AtomicUsize::new(0),
+        bodies: vec![sse_tool_call, sse_final],
+    };
+    wiremock::Mock::given(wiremock::matchers::method("POST"))
+        .and(wiremock::matchers::path("/v1/chat/completions"))
+        .respond_with(chat_seq)
+        .expect(2)
+        .mount(&server)
+        .await;
+
+    let rmcp_test_server_bin = CargoBuild::new()
+        .package("codex-rmcp-client")
+        .bin("test_stdio_server")
+        .run()?
+        .path()
+        .to_string_lossy()
+        .into_owned();
+
+    let fixture = test_codex()
+        .with_config(move |config| {
+            config.model_provider.wire_api = codex_core::WireApi::Chat;
+            config.features.enable(Feature::RmcpClient);
+            config.mcp_servers.insert(
+                server_name.to_string(),
+                McpServerConfig {
+                    transport: McpServerTransportConfig::Stdio {
+                        command: rmcp_test_server_bin,
+                        args: Vec::new(),
+                        env: Some(HashMap::from([(
+                            "MCP_TEST_IMAGE_DATA_URL".to_string(),
+                            OPENAI_PNG.to_string(),
+                        )])),
+                        env_vars: Vec::new(),
+                        cwd: None,
+                    },
+                    enabled: true,
+                    startup_timeout_sec: Some(Duration::from_secs(10)),
+                    tool_timeout_sec: None,
+                    enabled_tools: None,
+                    disabled_tools: None,
+                },
+            );
+        })
+        .build(&server)
+        .await?;
+    let session_model = fixture.session_configured.model.clone();
+
+    fixture
+        .codex
+        .submit(Op::UserTurn {
+            items: vec![UserInput::Text {
+                text: "call the rmcp image tool".into(),
+            }],
+            final_output_json_schema: None,
+            cwd: fixture.cwd.path().to_path_buf(),
+            approval_policy: AskForApproval::Never,
+            sandbox_policy: SandboxPolicy::ReadOnly,
+            model: session_model,
+            effort: None,
+            summary: ReasoningSummary::Auto,
+        })
+        .await?;
+
+    let begin_event = wait_for_event_with_timeout(
+        &fixture.codex,
+        |ev| matches!(ev, EventMsg::McpToolCallBegin(_)),
+        Duration::from_secs(10),
+    )
+    .await;
+    let EventMsg::McpToolCallBegin(begin) = begin_event else {
+        unreachable!("begin");
+    };
+    assert_eq!(
+        begin,
+        McpToolCallBeginEvent {
+            call_id: call_id.to_string(),
+            invocation: McpInvocation {
+                server: server_name.to_string(),
+                tool: "image".to_string(),
+                arguments: Some(json!({})),
+            },
+        },
+    );
+
+    let end_event = wait_for_event(&fixture.codex, |ev| {
+        matches!(ev, EventMsg::McpToolCallEnd(_))
+    })
+    .await;
+    let EventMsg::McpToolCallEnd(end) = end_event else {
+        unreachable!("end");
+    };
+    assert!(end.result.as_ref().is_ok(), "tool call should succeed");
+
+    wait_for_event(&fixture.codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
+
+    // Chat Completions assertion: the second POST should include a tool role message
+    // with an array `content` containing an item with the expected data URL.
+    let requests = server.received_requests().await.expect("requests captured");
+    assert!(requests.len() >= 2, "expected two chat completion calls");
+    let second = &requests[1];
+    let body: Value = serde_json::from_slice(&second.body)?;
+    let messages = body
+        .get("messages")
+        .and_then(Value::as_array)
+        .cloned()
+        .expect("messages array");
+    let tool_msg = messages
+        .iter()
+        .find(|m| {
+            m.get("role") == Some(&json!("tool")) && m.get("tool_call_id") == Some(&json!(call_id))
+        })
+        .cloned()
+        .expect("tool message present");
+    assert_eq!(
+        tool_msg,
+        json!({
+            "role": "tool",
+            "tool_call_id": call_id,
+            "content": [{"type": "image_url", "image_url": {"url": OPENAI_PNG}}]
+        })
+    );
+
+    Ok(())
+}
+
 #[tokio::test(flavor = "multi_thread", worker_threads = 1)]
 #[serial(mcp_test_value)]
 async fn stdio_server_propagates_whitelisted_env_vars() -> anyhow::Result<()> {
--- a/codex-rs/core/tests/suite/rollout_list_find.rs
+++ b/codex-rs/core/tests/suite/rollout_list_find.rs
@@ -28,7 +28,8 @@ fn write_minimal_rollout_with_id(codex_home: &Path, id: Uuid) -> PathBuf {
                "instructions": null,
                "cwd": ".",
                "originator": "test",
-                "cli_version": "test"
+                "cli_version": "test",
+                "model_provider": "test-provider"
            }
        })
    )
--- a/codex-rs/core/tests/suite/truncation.rs
+++ b/codex-rs/core/tests/suite/truncation.rs
@@ -0,0 +1,270 @@
+#![cfg(not(target_os = "windows"))]
+#![allow(clippy::unwrap_used, clippy::expect_used)]
+
+use anyhow::Context;
+use anyhow::Result;
+use codex_core::features::Feature;
+use codex_core::model_family::find_family_for_model;
+use codex_core::protocol::SandboxPolicy;
+use core_test_support::assert_regex_match;
+use core_test_support::responses;
+use core_test_support::responses::ev_assistant_message;
+use core_test_support::responses::ev_completed;
+use core_test_support::responses::ev_function_call;
+use core_test_support::responses::ev_response_created;
+use core_test_support::responses::mount_sse_once_match;
+use core_test_support::responses::mount_sse_sequence;
+use core_test_support::responses::sse;
+use core_test_support::responses::start_mock_server;
+use core_test_support::skip_if_no_network;
+use core_test_support::test_codex::test_codex;
+use escargot::CargoBuild;
+use regex_lite::Regex;
+use serde_json::Value;
+use serde_json::json;
+use wiremock::matchers::any;
+
+// Verifies byte-truncation formatting for function error output (RespondToModel errors)
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn truncate_function_error_trims_respond_to_model() -> Result<()> {
+    skip_if_no_network!(Ok(()));
+
+    let server = start_mock_server().await;
+    let mut builder = test_codex().with_config(|config| {
+        // Use the test model that wires function tools like grep_files
+        config.model = "test-gpt-5-codex".to_string();
+        config.model_family =
+            find_family_for_model("test-gpt-5-codex").expect("model family for test model");
+    });
+    let test = builder.build(&server).await?;
+
+    // Construct a very long, non-existent path to force a RespondToModel error with a large message
+    let long_path = "a".repeat(20_000);
+    let call_id = "grep-huge-error";
+    let args = json!({
+        "pattern": "alpha",
+        "path": long_path,
+        "limit": 10
+    });
+    let responses = vec![
+        sse(vec![
+            ev_response_created("resp-1"),
+            ev_function_call(call_id, "grep_files", &serde_json::to_string(&args)?),
+            ev_completed("resp-1"),
+        ]),
+        sse(vec![
+            ev_assistant_message("msg-1", "done"),
+            ev_completed("resp-2"),
+        ]),
+    ];
+    let mock = mount_sse_sequence(&server, responses).await;
+
+    test.submit_turn_with_policy(
+        "trigger grep_files with long path to test truncation",
+        SandboxPolicy::DangerFullAccess,
+    )
+    .await?;
+
+    let output = mock
+        .function_call_output_text(call_id)
+        .context("function error output present")?;
+
+    tracing::debug!(output = %output, "truncated function error output");
+
+    // Expect plaintext with byte-truncation marker and no omitted-lines marker
+    assert!(
+        serde_json::from_str::<serde_json::Value>(&output).is_err(),
+        "expected error output to be plain text",
+    );
+    let truncated_pattern = r#"(?s)^Total output lines: 1\s+.*\[\.\.\. output truncated to fit 10240 bytes \.\.\.\]\s*$"#;
+    assert_regex_match(truncated_pattern, &output);
+    assert!(
+        !output.contains("omitted"),
+        "line omission marker should not appear when no lines were dropped: {output}"
+    );
+
+    Ok(())
+}
+
+// Verifies that a standard tool call (shell) exceeding the model formatting
+// limits is truncated before being sent back to the model.
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn tool_call_output_exceeds_limit_truncated_for_model() -> Result<()> {
+    skip_if_no_network!(Ok(()));
+
+    let server = start_mock_server().await;
+
+    // Use a model that exposes the generic shell tool.
+    let mut builder = test_codex().with_config(|config| {
+        config.model = "gpt-5-codex".to_string();
+        config.model_family =
+            find_family_for_model("gpt-5-codex").expect("gpt-5-codex is a model family");
+    });
+    let fixture = builder.build(&server).await?;
+
+    let call_id = "shell-too-large";
+    let args = serde_json::json!({
+        "command": ["/bin/sh", "-c", "seq 1 400"],
+        "timeout_ms": 5_000,
+    });
+
+    // First response: model tells us to run the tool; second: complete the turn.
+    mount_sse_once_match(
+        &server,
+        any(),
+        sse(vec![
+            responses::ev_response_created("resp-1"),
+            responses::ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
+            responses::ev_completed("resp-1"),
+        ]),
+    )
+    .await;
+    let mock2 = mount_sse_once_match(
+        &server,
+        any(),
+        sse(vec![
+            responses::ev_assistant_message("msg-1", "done"),
+            responses::ev_completed("resp-2"),
+        ]),
+    )
+    .await;
+
+    fixture
+        .submit_turn_with_policy("trigger big shell output", SandboxPolicy::DangerFullAccess)
+        .await?;
+
+    // Inspect what we sent back to the model; it should contain a truncated
+    // function_call_output for the shell call.
+    let output = mock2
+        .single_request()
+        .function_call_output_text(call_id)
+        .context("function_call_output present for shell call")?;
+
+    // Expect plain text (not JSON) with truncation markers and line elision.
+    assert!(
+        serde_json::from_str::<Value>(&output).is_err(),
+        "expected truncated shell output to be plain text"
+    );
+    let truncated_pattern = r#"(?s)^Exit code: 0
+Wall time: .* seconds
+Total output lines: 400
+Output:
+1
+2
+3
+4
+5
+6
+.*
+\[\.{3} omitted 144 of 400 lines \.{3}\]
+
+.*
+396
+397
+398
+399
+400
+$"#;
+    assert_regex_match(truncated_pattern, &output);
+
+    Ok(())
+}
+
+// Verifies that an MCP tool call result exceeding the model formatting limits
+// is truncated before being sent back to the model.
+#[tokio::test(flavor = "multi_thread", worker_threads = 1)]
+async fn mcp_tool_call_output_exceeds_limit_truncated_for_model() -> Result<()> {
+    skip_if_no_network!(Ok(()));
+
+    let server = start_mock_server().await;
+
+    let call_id = "rmcp-truncated";
+    let server_name = "rmcp";
+    let tool_name = format!("mcp__{server_name}__echo");
+
+    // Build a very large message to exceed 10KiB once serialized.
+    let large_msg = "long-message-with-newlines-".repeat(600);
+    let args_json = serde_json::json!({ "message": large_msg });
+
+    mount_sse_once_match(
+        &server,
+        any(),
+        sse(vec![
+            responses::ev_response_created("resp-1"),
+            responses::ev_function_call(call_id, &tool_name, &args_json.to_string()),
+            responses::ev_completed("resp-1"),
+        ]),
+    )
+    .await;
+    let mock2 = mount_sse_once_match(
+        &server,
+        any(),
+        sse(vec![
+            responses::ev_assistant_message("msg-1", "rmcp echo tool completed."),
+            responses::ev_completed("resp-2"),
+        ]),
+    )
+    .await;
+
+    // Compile the rmcp stdio test server and configure it.
+    let rmcp_test_server_bin = CargoBuild::new()
+        .package("codex-rmcp-client")
+        .bin("test_stdio_server")
+        .run()?
+        .path()
+        .to_string_lossy()
+        .into_owned();
+
+    let mut builder = test_codex().with_config(move |config| {
+        config.features.enable(Feature::RmcpClient);
+        config.mcp_servers.insert(
+            server_name.to_string(),
+            codex_core::config_types::McpServerConfig {
+                transport: codex_core::config_types::McpServerTransportConfig::Stdio {
+                    command: rmcp_test_server_bin,
+                    args: Vec::new(),
+                    env: None,
+                    env_vars: Vec::new(),
+                    cwd: None,
+                },
+                enabled: true,
+                startup_timeout_sec: Some(std::time::Duration::from_secs(10)),
+                tool_timeout_sec: None,
+                enabled_tools: None,
+                disabled_tools: None,
+            },
+        );
+    });
+    let fixture = builder.build(&server).await?;
+
+    fixture
+        .submit_turn_with_policy(
+            "call the rmcp echo tool with a very large message",
+            SandboxPolicy::ReadOnly,
+        )
+        .await?;
+
+    // The MCP tool call output is converted to a function_call_output for the model.
+    let output = mock2
+        .single_request()
+        .function_call_output_text(call_id)
+        .context("function_call_output present for rmcp call")?;
+
+    // Expect plain text with byte-based truncation marker.
+    assert!(
+        serde_json::from_str::<Value>(&output).is_err(),
+        "expected truncated MCP output to be plain text"
+    );
+    assert!(
+        output.starts_with("Total output lines: 1\n\n{"),
+        "expected total line header and JSON head, got: {output}"
+    );
+    let byte_marker = Regex::new(r"\[\.\.\. output truncated to fit 10240 bytes \.\.\.\]")
+        .expect("compile regex");
+    assert!(
+        byte_marker.is_match(&output),
+        "expected byte truncation marker, got: {output}"
+    );
+
+    Ok(())
+}
--- a/codex-rs/core/tests/suite/unified_exec.rs
+++ b/codex-rs/core/tests/suite/unified_exec.rs
@@ -240,7 +240,7 @@ async fn unified_exec_emits_output_delta_for_exec_command() -> Result<()> {
    let call_id = "uexec-delta-1";
    let args = json!({
        "cmd": "printf 'HELLO-UEXEC'",
-        "yield_time_ms": 250,
+        "yield_time_ms": 1000,
    });

    let responses = vec![
--- a/codex-rs/core/tests/suite/view_image.rs
+++ b/codex-rs/core/tests/suite/view_image.rs
@@ -19,6 +19,10 @@ use core_test_support::skip_if_no_network;
 use core_test_support::test_codex::TestCodex;
 use core_test_support::test_codex::test_codex;
 use core_test_support::wait_for_event;
+use image::GenericImageView;
+use image::ImageBuffer;
+use image::Rgba;
+use image::load_from_memory;
 use serde_json::Value;
 use wiremock::matchers::any;

@@ -49,6 +53,88 @@ fn extract_output_text(item: &Value) -> Option<&str> {
    })
 }

+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn user_turn_with_local_image_attaches_image() -> anyhow::Result<()> {
+    skip_if_no_network!(Ok(()));
+
+    let server = start_mock_server().await;
+
+    let TestCodex {
+        codex,
+        cwd,
+        session_configured,
+        ..
+    } = test_codex().build(&server).await?;
+
+    let rel_path = "user-turn/example.png";
+    let abs_path = cwd.path().join(rel_path);
+    if let Some(parent) = abs_path.parent() {
+        std::fs::create_dir_all(parent)?;
+    }
+    let image = ImageBuffer::from_pixel(4096, 1024, Rgba([20u8, 40, 60, 255]));
+    image.save(&abs_path)?;
+
+    let response = sse(vec![
+        ev_response_created("resp-1"),
+        ev_assistant_message("msg-1", "done"),
+        ev_completed("resp-1"),
+    ]);
+    let mock = responses::mount_sse_once_match(&server, any(), response).await;
+
+    let session_model = session_configured.model.clone();
+
+    codex
+        .submit(Op::UserTurn {
+            items: vec![UserInput::LocalImage {
+                path: abs_path.clone(),
+            }],
+            final_output_json_schema: None,
+            cwd: cwd.path().to_path_buf(),
+            approval_policy: AskForApproval::Never,
+            sandbox_policy: SandboxPolicy::DangerFullAccess,
+            model: session_model,
+            effort: None,
+            summary: ReasoningSummary::Auto,
+        })
+        .await?;
+
+    wait_for_event(&codex, |event| matches!(event, EventMsg::TaskComplete(_))).await;
+
+    let body = mock.single_request().body_json();
+    let image_message =
+        find_image_message(&body).expect("pending input image message not included in request");
+    let image_url = image_message
+        .get("content")
+        .and_then(Value::as_array)
+        .and_then(|content| {
+            content.iter().find_map(|span| {
+                if span.get("type").and_then(Value::as_str) == Some("input_image") {
+                    span.get("image_url").and_then(Value::as_str)
+                } else {
+                    None
+                }
+            })
+        })
+        .expect("image_url present");
+
+    let (prefix, encoded) = image_url
+        .split_once(',')
+        .expect("image url contains data prefix");
+    assert_eq!(prefix, "data:image/png;base64");
+
+    let decoded = BASE64_STANDARD
+        .decode(encoded)
+        .expect("image data decodes from base64 for request");
+    let resized = load_from_memory(&decoded).expect("load resized image");
+    let (width, height) = resized.dimensions();
+    assert!(width <= 2048);
+    assert!(height <= 768);
+    assert!(width < 4096);
+    assert!(height < 1024);
+
+    Ok(())
+}
+
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn view_image_tool_attaches_local_image() -> anyhow::Result<()> {
    skip_if_no_network!(Ok(()));
@@ -67,8 +153,8 @@ async fn view_image_tool_attaches_local_image() -> anyhow::Result<()> {
    if let Some(parent) = abs_path.parent() {
        std::fs::create_dir_all(parent)?;
    }
-    let image_bytes = b"fake_png_bytes".to_vec();
-    std::fs::write(&abs_path, &image_bytes)?;
+    let image = ImageBuffer::from_pixel(4096, 1024, Rgba([255u8, 0, 0, 255]));
+    image.save(&abs_path)?;

    let call_id = "view-image-call";
    let arguments = serde_json::json!({ "path": rel_path }).to_string();
@@ -143,11 +229,20 @@ async fn view_image_tool_attaches_local_image() -> anyhow::Result<()> {
        })
        .expect("image_url present");

-    let expected_image_url = format!(
-        "data:image/png;base64,{}",
-        BASE64_STANDARD.encode(&image_bytes)
-    );
-    assert_eq!(image_url, expected_image_url);
+    let (prefix, encoded) = image_url
+        .split_once(',')
+        .expect("image url contains data prefix");
+    assert_eq!(prefix, "data:image/png;base64");
+
+    let decoded = BASE64_STANDARD
+        .decode(encoded)
+        .expect("image data decodes from base64 for request");
+    let resized = load_from_memory(&decoded).expect("load resized image");
+    let (resized_width, resized_height) = resized.dimensions();
+    assert!(resized_width <= 2048);
+    assert!(resized_height <= 768);
+    assert!(resized_width < 4096);
+    assert!(resized_height < 1024);

    Ok(())
 }
--- a/codex-rs/exec/src/event_processor_with_human_output.rs
+++ b/codex-rs/exec/src/event_processor_with_human_output.rs
@@ -20,7 +20,6 @@ use codex_core::protocol::StreamErrorEvent;
 use codex_core::protocol::TaskCompleteEvent;
 use codex_core::protocol::TurnAbortReason;
 use codex_core::protocol::TurnDiffEvent;
-use codex_core::protocol::WebSearchBeginEvent;
 use codex_core::protocol::WebSearchEndEvent;
 use codex_protocol::num_format::format_with_separators;
 use owo_colors::OwoColorize;
@@ -216,7 +215,6 @@ impl EventProcessor for EventProcessorWithHumanOutput {
                    cwd.to_string_lossy(),
                );
            }
-            EventMsg::ExecCommandOutputDelta(_) => {}
            EventMsg::ExecCommandEnd(ExecCommandEndEvent {
                aggregated_output,
                duration,
@@ -283,7 +281,6 @@ impl EventProcessor for EventProcessorWithHumanOutput {
                    }
                }
            }
-            EventMsg::WebSearchBegin(WebSearchBeginEvent { call_id: _ }) => {}
            EventMsg::WebSearchEnd(WebSearchEndEvent { call_id: _, query }) => {
                ts_msg!(self, "🌐 Searched: {query}");
            }
@@ -411,12 +408,6 @@ impl EventProcessor for EventProcessorWithHumanOutput {
                );
                eprintln!("{unified_diff}");
            }
-            EventMsg::ExecApprovalRequest(_) => {
-                // Should we exit?
-            }
-            EventMsg::ApplyPatchApprovalRequest(_) => {
-                // Should we exit?
-            }
            EventMsg::AgentReasoning(agent_reasoning_event) => {
                if self.show_agent_reasoning {
                    ts_msg!(
@@ -481,15 +472,6 @@ impl EventProcessor for EventProcessorWithHumanOutput {
                    }
                }
            }
-            EventMsg::GetHistoryEntryResponse(_) => {
-                // Currently ignored in exec output.
-            }
-            EventMsg::McpListToolsResponse(_) => {
-                // Currently ignored in exec output.
-            }
-            EventMsg::ListCustomPromptsResponse(_) => {
-                // Currently ignored in exec output.
-            }
            EventMsg::ViewImageToolCall(view) => {
                ts_msg!(
                    self,
@@ -510,15 +492,24 @@ impl EventProcessor for EventProcessorWithHumanOutput {
                }
            },
            EventMsg::ShutdownComplete => return CodexStatus::Shutdown,
-            EventMsg::ConversationPath(_) => {}
-            EventMsg::UserMessage(_) => {}
-            EventMsg::EnteredReviewMode(_) => {}
-            EventMsg::ExitedReviewMode(_) => {}
-            EventMsg::AgentMessageDelta(_) => {}
-            EventMsg::AgentReasoningDelta(_) => {}
-            EventMsg::AgentReasoningRawContentDelta(_) => {}
-            EventMsg::ItemStarted(_) => {}
-            EventMsg::ItemCompleted(_) => {}
+            EventMsg::WebSearchBegin(_)
+            | EventMsg::ExecApprovalRequest(_)
+            | EventMsg::ApplyPatchApprovalRequest(_)
+            | EventMsg::ExecCommandOutputDelta(_)
+            | EventMsg::GetHistoryEntryResponse(_)
+            | EventMsg::McpListToolsResponse(_)
+            | EventMsg::ListCustomPromptsResponse(_)
+            | EventMsg::RawResponseItem(_)
+            | EventMsg::UserMessage(_)
+            | EventMsg::EnteredReviewMode(_)
+            | EventMsg::ExitedReviewMode(_)
+            | EventMsg::AgentMessageDelta(_)
+            | EventMsg::AgentReasoningDelta(_)
+            | EventMsg::AgentReasoningRawContentDelta(_)
+            | EventMsg::ItemStarted(_)
+            | EventMsg::ItemCompleted(_)
+            | EventMsg::UndoCompleted(_)
+            | EventMsg::UndoStarted(_) => {}
        }
        CodexStatus::Running
    }
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
zhao-oai	624b013dc7	Merge branch 'main' into update_agents_md	2025-10-28 09:04:15 -07:00
kevin zhao	f5a39beb75	.	2025-10-28 09:02:11 -07:00
kevin zhao	1dc8b5624a	.	2025-10-28 09:00:32 -07:00
jif-oai	5ba2a17576	chore: decompose submission loop (#5854 )	2025-10-28 15:23:46 +00:00
Owen Lin	266419217e	chore: use anyhow::Result for all app-server integration tests (#5836 ) There's a lot of visual noise in app-server's integration tests due to the number of `.expect("<some_msg>")` lines which are largely redundant / not very useful. Clean them up by using `anyhow::Result` + `?` consistently. Replaces the existing pattern of: ``` let codex_home = TempDir::new().expect("create temp dir"); create_config_toml(codex_home.path()).expect("write config.toml"); let mut mcp = McpProcess::new(codex_home.path()) .await .expect("spawn mcp process"); timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()) .await .expect("initialize timeout") .expect("initialize request"); ``` With: ``` let codex_home = TempDir::new()?; create_config_toml(codex_home.path())?; let mut mcp = McpProcess::new(codex_home.path()).await?; timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??; ```	2025-10-28 08:10:23 -07:00
jif-oai	be4bdfec93	chore: drop useless shell stuff (#5848 )	2025-10-28 14:52:52 +00:00
jif-oai	7ff142d93f	chore: speed-up pipeline (#5812 ) Speed-up pipeline by: * Decoupling tests and clippy * Use pre-built binary in tests * `sccache` for caching of the builds	2025-10-28 14:08:52 +00:00
Celia Chen	4a42c4e142	[Auth] Choose which auth storage to use based on config (#5792 ) This PR is a follow-up to #5591. It allows users to choose which auth storage mode they want by using the new `cli_auth_credentials_store_mode` config.	2025-10-27 19:41:49 -07:00
Josh McKinney	66a4b89822	feat(tui): clarify Windows auto mode requirements (#5568 ) ## Summary - Coerce Windows `workspace-write` configs back to read-only, surface the forced downgrade in the approvals popup, and funnel users toward WSL or Full Access. - Add WSL installation instructions to the Auto preset on Windows while keeping the preset available for other platforms. - Skip the trust-on-first-run prompt on native Windows so new folders remain read-only without additional confirmation. - Expose a structured sandbox policy resolution from config to flag Windows downgrades and adjust tests (core, exec, TUI) to reflect the new behavior; provide a Windows-only approvals snapshot. ## Testing - cargo fmt - cargo test -p codex-core config::tests::add_dir_override_extends_workspace_writable_roots - cargo test -p codex-exec suite::resume::exec_resume_preserves_cli_configuration_overrides - cargo test -p codex-tui chatwidget::tests::approvals_selection_popup_snapshot - cargo test -p codex-tui approvals_popup_includes_wsl_note_for_auto_mode - cargo test -p codex-tui windows_skips_trust_prompt - just fix -p codex-core - just fix -p codex-tui	2025-10-28 01:19:32 +00:00
kevin zhao	e6b75d4359	.	2025-10-27 18:19:20 -07:00
kevin zhao	9ad24d7fa5	provide visibility that review threads ignore AGENTS.MD	2025-10-27 18:18:00 -07:00
Ahmed Ibrahim	d7b333be97	Truncate the content-item for mcp tools (#5835 ) This PR truncates the text output of MCP tool	2025-10-28 00:39:35 +00:00
zhao-oai	4d6a42a622	fix image drag drop (#5794 ) fixing drag/drop photos bug in codex state of the world before: sometimes, when you drag screenshots into codex, the image does not properly render into context. instead, the file name is shown in quotation marks. https://github.com/user-attachments/assets/3c0e540a-505c-4ec0-b634-e9add6a73119 the screenshot is not actually included in agent context. the agent needs to manually call the view_image tool to see the screenshot. this can be unreliable especially if the image is part of a longer prompt and is dependent on the agent going out of its way to view the image. state of the world after: https://github.com/user-attachments/assets/5f2b7bf7-8a3f-4708-85f3-d68a017bfd97 now, images will always be directly embedded into chat context ## Technical Details - MacOS sends screenshot paths with a narrow no‑break space right before the “AM/PM” suffix, which used to trigger our non‑ASCII fallback in the paste burst detector. - That fallback flushed the partially buffered paste immediately, so the path arrived in two separate `handle_paste` calls (quoted prefix + `PM.png'`). The split string could not be normalized to a real path, so we showed the quoted filename instead of embedding the image. - We now append non‑ASCII characters into the burst buffer when a burst is already active. Finder’s payload stays intact, the path normalizes, and the image attaches automatically. - When no burst is active (e.g. during IME typing), non‑ASCII characters still bypass the buffer so text entry remains responsive.	2025-10-27 17:11:30 -07:00
Gabriel Peal	b0bdc04c30	[MCP] Render MCP tool call result images to the model (#5600 ) It's pretty amazing we have gotten here without the ability for the model to see image content from MCP tool calls. This PR builds off of 4391 and fixes #4819. I would like @KKcorps to get adequete credit here but I also want to get this fix in ASAP so I gave him a week to update it and haven't gotten a response so I'm going to take it across the finish line. This test highlights how absured the current situation is. I asked the model to read this image using the Chrome MCP <img width="2378" height="674" alt="image" src="https://github.com/user-attachments/assets/9ef52608-72a2-4423-9f5e-7ae36b2b56e0" /> After this change, it correctly outputs: > Captured the page: image dhows a dark terminal-style UI labeled `OpenAI Codex (v0.0.0)` with prompt `model: gpt-5-codex medium` and working directory `/codex/codex-rs` (and more) Before this change, it said: > Took the full-page screenshot you asked for. It shows a long, horizontally repeating pattern of stylized people in orange, light-blue, and mustard clothing, holding hands in alternating poses against a white background. No text or other graphics-just rows of flat illustration stretching off to the right. Without this change, the Figma, Playwright, Chrome, and other visual MCP servers are pretty much entirely useless. I tested this change with the openai respones api as well as a third party completions api	2025-10-27 17:55:57 -04:00
Owen Lin	67a219ffc2	fix: move account struct to app-server-protocol and use camelCase (#5829 ) Makes sense to move this struct to `app-server-protocol/` since we want to serialize as camelCase, but we don't for structs defined in `protocol/` It was: ``` export type Account = { "type": "ApiKey", api_key: string, } \| { "type": "chatgpt", email: string \| null, plan_type: PlanType, }; ``` But we want: ``` export type Account = { "type": "apiKey", apiKey: string, } \| { "type": "chatgpt", email: string \| null, planType: PlanType, }; ```	2025-10-27 14:06:13 -07:00
Ahmed Ibrahim	7226365397	Centralize truncation in conversation history (#5652 ) move the truncation logic to conversation history to use on any tool output. This will help us in avoiding edge cases while truncating the tool calls and mcp calls.	2025-10-27 14:05:35 -07:00
Celia Chen	0fc295d958	[Auth] Add keyring support for Codex CLI (#5591 ) Follow-up PR to #5569. Add Keyring Support for Auth Storage in Codex CLI as well as a hybrid mode (default to persisting in keychain but fall back to file when unavailable.) It also refactors out the keyringstore implementation from rmcp-client [here](https://github.com/openai/codex/blob/main/codex-rs/rmcp-client/src/oauth.rs) to a new keyring-store crate. There will be a follow-up that picks the right credential mode depending on the config, instead of hardcoding `AuthCredentialsStoreMode::File`.	2025-10-27 12:10:11 -07:00
jif-oai	3e50f94d76	feat: support verbosity in model_family (#5821 )	2025-10-27 18:46:30 +00:00
Celia Chen	eb5b1b627f	[Auth] Introduce New Auth Storage Abstraction for Codex CLI (#5569 ) This PR introduces a new `Auth Storage` abstraction layer that takes care of read, write, and load of auth tokens based on the AuthCredentialsStoreMode. It is similar to how we handle MCP client oauth [here](https://github.com/openai/codex/blob/main/codex-rs/rmcp-client/src/oauth.rs). Instead of reading and writing directly from disk for auth tokens, Codex CLI workflows now should instead use this auth storage using the public helper functions. This PR is just a refactor of the current code so the behavior stays the same. We will add support for keyring and hybrid mode in follow-up PRs. I have read the CLA Document and I hereby sign the CLA	2025-10-27 11:01:14 -07:00
Eric Traut	0c1ff1d3fd	Made token refresh code resilient to missing `id_token` (#5782 ) This PR does the following: 1. Changes `try_refresh_token` to handle the case where the endpoint returns a response without an `id_token`. The OpenID spec indicates that this field is optional and clients should not assume it's present. 2. Changes the `attempt_stream_responses` to propagate token refresh errors rather than silently ignoring them. 3. Fixes a typo in a couple of error messages (unrelated to the above, but something I noticed in passing) - "reconnect" should be spelled without a hyphen. This PR does not implement the additional suggestion from @pakrym-oai that we should sign out when receiving `refresh_token_expired` from the refresh endpoint. Leaving this as a follow-on because I'm undecided on whether this should be implemented in `try_refresh_token` or its callers.	2025-10-27 10:09:53 -07:00
jif-oai	aea7610c76	feat: image resizing (#5446 ) Add image resizing on the client side to reduce load on the API	2025-10-27 16:58:10 +00:00
jif-oai	775fbba6e0	feat: return an error if unknown enabled/disabled feature (#5817 )	2025-10-27 16:53:00 +00:00
Michael Bolin	5ee8a17b4e	feat: introduce GetConversationSummary RPC (#5803 ) This adds an RPC to the app server to the the `ConversationSummary` via a rollout path. Now that the VS Code extension supports showing the Codex UI in an editor panel where the URI of the panel maps to the rollout file, we need to be able to get the `ConversationSummary` from the rollout file directly.	2025-10-27 09:11:45 -07:00
jif-oai	81be54b229	fix: test yield time (#5811 )	2025-10-27 11:57:29 +00:00
jif-oai	5e8659dcbc	chore: undo nits (#5631 )	2025-10-27 11:48:01 +00:00
jif-oai	2338294b39	nit: doc on session task (#5809 )	2025-10-27 11:43:33 +00:00
jif-oai	afc4eaab8b	feat: TUI undo op (#5629 )	2025-10-27 10:55:29 +00:00
jif-oai	e92c4f6561	feat: async ghost commit (#5618 )	2025-10-27 10:09:10 +00:00
Michael Bolin	15fa2283e7	feat: update NewConversationParams to take an optional model_provider (#5793 ) An AppServer client should be able to use any (`model_provider`, `model`) in the user's config. `NewConversationParams` already supported specifying the `model`, but this PR expands it to support `model_provider`, as well. --- [//]: # (BEGIN SAPLING FOOTER) Stack created with [Sapling](https://sapling-scm.com). Best reviewed with [ReviewStack](https://reviewstack.dev/openai/codex/pull/5793). * #5803 * __->__ #5793	2025-10-27 09:33:30 +00:00
Michael Bolin	5907422d65	feat: annotate conversations with model_provider for filtering (#5658 ) Because conversations that use the Responses API can have encrypted reasoning messages, trying to resume a conversation with a different provider could lead to confusing "failed to decrypt" errors. (This is reproducible by starting a conversation using ChatGPT login and resuming it as a conversation that uses OpenAI models via Azure.) This changes `ListConversationsParams` to take a `model_providers: Option<Vec<String>>` and adds `model_provider` on each `ConversationSummary` it returns so these cases can be disambiguated. Note this ended up making changes to `codex-rs/core/src/rollout/tests.rs` because it had a number of cases where it expected `Some` for the value of `next_cursor`, but the list of rollouts was complete, so according to this docstring: `bcd64c7e72/codex-rs/app-server-protocol/src/protocol.rs (L334-L337)` If there are no more items to return, then `next_cursor` should be `None`. This PR updates that logic. --- [//]: # (BEGIN SAPLING FOOTER) Stack created with [Sapling](https://sapling-scm.com). Best reviewed with [ReviewStack](https://reviewstack.dev/openai/codex/pull/5658). * #5803 * #5793 * __->__ #5658	2025-10-27 02:03:30 -07:00
Ahmed Ibrahim	f178805252	Add feedback upload request handling (#5682 )	2025-10-27 05:53:39 +00:00
Michael Bolin	a55b0c4bcc	fix: revert "[app-server] fix account/read response annotation (#5642 )" (#5796 ) Revert #5642 because this generates: ``` // GENERATED CODE! DO NOT MODIFY BY HAND! // This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually. export type GetAccountResponse = Account \| null; ``` But `Account` is unknown. The unique use of `#[ts(export)]` on `GetAccountResponse` is also suspicious as are the changes to `codex-rs/app-server-protocol/src/export.rs` since the existing system has worked fine for quite some time. Though a pure backout of #5642 puts things in a state where, as the PR noted, the following does not work: ``` cargo run -p codex-app-server-protocol --bin export -- --out DIR ``` So in addition to the backout, this PR adds: ```rust #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)] #[serde(rename_all = "camelCase")] pub struct GetAccountResponse { pub account: Account, } ``` and changes `GetAccount.response` as follows: ```diff - response: Option<Account>, + response: GetAccountResponse, ``` making it consistent with other types. With this change, I verified that both of the following work: ``` just codex generate-ts --out /tmp/somewhere cargo run -p codex-app-server-protocol --bin export -- --out /tmp/somewhere-else ``` The generated TypeScript is as follows: ```typescript // GetAccountResponse.ts import type { Account } from "./Account"; export type GetAccountResponse = { account: Account, }; ``` and ```typescript // Account.ts import type { PlanType } from "./PlanType"; export type Account = { "type": "ApiKey", api_key: string, } \| { "type": "chatgpt", email: string \| null, plan_type: PlanType, }; ``` Though while the inconsistency between `"type": "ApiKey"` and `"type": "chatgpt"` is quite concerning, I'm not sure if that format is ever written to disk in any case, but @owenlin0, I would recommend looking into that. Also, it appears that the types in `codex-rs/protocol/src/account.rs` are used exclusively by the `app-server-protocol` crate, so perhaps they should just be moved there?	2025-10-26 18:57:42 -07:00
Thibault Sottiaux	224222f09f	fix: use codex-exp prefix for experimental models and consider codex- models to be production (#5797 )	2025-10-27 01:55:12 +00:00
Gabriel Peal	7aab45e060	[MCP] Minor docs clarifications around stdio tokens (#5676 ) Noticed [here](https://github.com/openai/codex/issues/4707#issuecomment-3446547561)	2025-10-26 13:38:30 -04:00
Eric Traut	bcd64c7e72	Reduced runtime of unit test that was taking multiple minutes (#5688 ) Modified `build_compacted_history_truncates_overlong_user_messages` test to reduce runtime from minutes to tens of seconds	2025-10-25 23:46:08 -07:00
Eric Traut	c124f24354	Added support for `sandbox_mode` in profiles (#5686 ) Currently, `approval_policy` is supported in profiles, but `sandbox_mode` is not. This PR adds support for `sandbox_mode`. Note: a fix for this was submitted in [this PR](https://github.com/openai/codex/pull/2397), but the underlying code has changed significantly since then. This addresses issue #3034	2025-10-25 16:52:26 -07:00
pakrym-oai	c7e4e6d0ee	Skip flaky test (#5680 ) Did an investigation but couldn't find anything obvious. Let's skip for now.	2025-10-25 12:11:16 -07:00
Ahmed Ibrahim	88abbf58ce	Followup feedback (#5663 ) - Added files to be uploaded - Refactored - Updated title	2025-10-25 06:07:40 +00:00
Ahmed Ibrahim	71f838389b	Improve feedback (#5661 ) <img width="1099" height="153" alt="image" src="https://github.com/user-attachments/assets/2c901884-8baf-4b1b-b2c4-bcb61ff42be8" /> <img width="1082" height="125" alt="image" src="https://github.com/user-attachments/assets/6336e6c9-9ace-46df-a383-a807ceffa524" /> <img width="1102" height="103" alt="image" src="https://github.com/user-attachments/assets/78883682-7e44-4fa3-9e04-57f7df4766fd" />	2025-10-24 22:28:14 -07:00
Eric Traut	0533bd2e7c	Fixed flaky unit test (#5654 ) This PR fixes a test that is sporadically failing in CI. The problem is that two unit tests (the older `login_and_cancel_chatgpt` and a recently added `login_chatgpt_includes_forced_workspace_query_param`) exercise code paths that start the login server. The server binds to a hard-coded localhost port number, so attempts to start more than one server at the same time will fail. If these two tests happen to run concurrently, one of them will fail. To fix this, I've added a simple mutex. We can use this same mutex for future tests that use the same pattern.	2025-10-24 16:31:24 -07:00
Anton Panasenko	6af83d86ff	[codex][app-server] introduce codex/event/raw_item events (#5578 )	2025-10-24 22:41:52 +00:00
Gabriel Peal	e2e1b65da6	[MCP] Properly gate login after `mcp add` with `experimental_use_rmcp_client` (#5653 ) There was supposed to be a check here like in other places.	2025-10-24 18:32:15 -04:00
Gabriel Peal	817d1508bc	[MCP] Redact environment variable values in `/mcp` and `mcp get` (#5648 ) Fixes #5524	2025-10-24 18:30:20 -04:00
Eric Traut	f8af4f5c8d	Added model summary and risk assessment for commands that violate sandbox policy (#5536 ) This PR adds support for a model-based summary and risk assessment for commands that violate the sandbox policy and require user approval. This aids the user in evaluating whether the command should be approved. The feature works by taking a failed command and passing it back to the model and asking it to summarize the command, give it a risk level (low, medium, high) and a risk category (e.g. "data deletion" or "data exfiltration"). It uses a new conversation thread so the context in the existing thread doesn't influence the answer. If the call to the model fails or takes longer than 5 seconds, it falls back to the current behavior. For now, this is an experimental feature and is gated by a config key `experimental_sandbox_command_assessment`. Here is a screen shot of the approval prompt showing the risk assessment and summary. <img width="723" height="282" alt="image" src="https://github.com/user-attachments/assets/4597dd7c-d5a0-4e9f-9d13-414bd082fd6b" />	2025-10-24 15:23:44 -07:00