Remove intentionally failing CI test

chore: intentionally fail tests
temp
2026-02-01 22:47:52 +00:00 · 2025-12-17 23:17:53 -08:00 · 2025-12-17 22:45:24 -08:00 · 2025-12-17 22:32:35 -08:00 · 2025-12-17 18:48:28 -08:00 · 2025-12-18 02:03:40 +00:00
1254 changed files with 100096 additions and 9140 deletions
--- a/.codespellignore
+++ b/.codespellignore
@@ -1 +1,2 @@
 iTerm
+psuedo
--- a/.github/actions/linux-code-sign/action.yml
+++ b/.github/actions/linux-code-sign/action.yml
@@ -0,0 +1,44 @@
+name: linux-code-sign
+description: Sign Linux artifacts with cosign.
+inputs:
+  target:
+    description: Target triple for the artifacts to sign.
+    required: true
+  artifacts-dir:
+    description: Absolute path to the directory containing built binaries to sign.
+    required: true
+
+runs:
+  using: composite
+  steps:
+    - name: Install cosign
+      uses: sigstore/cosign-installer@v3.7.0
+
+    - name: Cosign Linux artifacts
+      shell: bash
+      env:
+        COSIGN_EXPERIMENTAL: "1"
+        COSIGN_YES: "true"
+        COSIGN_OIDC_CLIENT_ID: "sigstore"
+        COSIGN_OIDC_ISSUER: "https://oauth2.sigstore.dev/auth"
+      run: |
+        set -euo pipefail
+
+        dest="${{ inputs.artifacts-dir }}"
+        if [[ ! -d "$dest" ]]; then
+          echo "Destination $dest does not exist"
+          exit 1
+        fi
+
+        for binary in codex codex-responses-api-proxy; do
+          artifact="${dest}/${binary}"
+          if [[ ! -f "$artifact" ]]; then
+            echo "Binary $artifact not found"
+            exit 1
+          fi
+
+          cosign sign-blob \
+            --yes \
+            --bundle "${artifact}.sigstore" \
+            "$artifact"
+        done
--- a/.github/actions/macos-code-sign/action.yml
+++ b/.github/actions/macos-code-sign/action.yml
@@ -0,0 +1,212 @@
+name: macos-code-sign
+description: Configure, sign, notarize, and clean up macOS code signing artifacts.
+inputs:
+  target:
+    description: Rust compilation target triple (e.g. aarch64-apple-darwin).
+    required: true
+  apple-certificate:
+    description: Base64-encoded Apple signing certificate (P12).
+    required: true
+  apple-certificate-password:
+    description: Password for the signing certificate.
+    required: true
+  apple-notarization-key-p8:
+    description: Base64-encoded Apple notarization key (P8).
+    required: true
+  apple-notarization-key-id:
+    description: Apple notarization key ID.
+    required: true
+  apple-notarization-issuer-id:
+    description: Apple notarization issuer ID.
+    required: true
+runs:
+  using: composite
+  steps:
+    - name: Configure Apple code signing
+      shell: bash
+      env:
+        KEYCHAIN_PASSWORD: actions
+        APPLE_CERTIFICATE: ${{ inputs.apple-certificate }}
+        APPLE_CERTIFICATE_PASSWORD: ${{ inputs.apple-certificate-password }}
+      run: |
+        set -euo pipefail
+
+        if [[ -z "${APPLE_CERTIFICATE:-}" ]]; then
+          echo "APPLE_CERTIFICATE is required for macOS signing"
+          exit 1
+        fi
+
+        if [[ -z "${APPLE_CERTIFICATE_PASSWORD:-}" ]]; then
+          echo "APPLE_CERTIFICATE_PASSWORD is required for macOS signing"
+          exit 1
+        fi
+
+        cert_path="${RUNNER_TEMP}/apple_signing_certificate.p12"
+        echo "$APPLE_CERTIFICATE" | base64 -d > "$cert_path"
+
+        keychain_path="${RUNNER_TEMP}/codex-signing.keychain-db"
+        security create-keychain -p "$KEYCHAIN_PASSWORD" "$keychain_path"
+        security set-keychain-settings -lut 21600 "$keychain_path"
+        security unlock-keychain -p "$KEYCHAIN_PASSWORD" "$keychain_path"
+
+        keychain_args=()
+        cleanup_keychain() {
+          if ((${#keychain_args[@]} > 0)); then
+            security list-keychains -s "${keychain_args[@]}" || true
+            security default-keychain -s "${keychain_args[0]}" || true
+          else
+            security list-keychains -s || true
+          fi
+          if [[ -f "$keychain_path" ]]; then
+            security delete-keychain "$keychain_path" || true
+          fi
+        }
+
+        while IFS= read -r keychain; do
+          [[ -n "$keychain" ]] && keychain_args+=("$keychain")
+        done < <(security list-keychains | sed 's/^[[:space:]]*//;s/[[:space:]]*$//;s/"//g')
+
+        if ((${#keychain_args[@]} > 0)); then
+          security list-keychains -s "$keychain_path" "${keychain_args[@]}"
+        else
+          security list-keychains -s "$keychain_path"
+        fi
+
+        security default-keychain -s "$keychain_path"
+        security import "$cert_path" -k "$keychain_path" -P "$APPLE_CERTIFICATE_PASSWORD" -T /usr/bin/codesign -T /usr/bin/security
+        security set-key-partition-list -S apple-tool:,apple: -s -k "$KEYCHAIN_PASSWORD" "$keychain_path" > /dev/null
+
+        codesign_hashes=()
+        while IFS= read -r hash; do
+          [[ -n "$hash" ]] && codesign_hashes+=("$hash")
+        done < <(security find-identity -v -p codesigning "$keychain_path" \
+          | sed -n 's/.*\([0-9A-F]\{40\}\).*/\1/p' \
+          | sort -u)
+
+        if ((${#codesign_hashes[@]} == 0)); then
+          echo "No signing identities found in $keychain_path"
+          cleanup_keychain
+          rm -f "$cert_path"
+          exit 1
+        fi
+
+        if ((${#codesign_hashes[@]} > 1)); then
+          echo "Multiple signing identities found in $keychain_path:"
+          printf '  %s\n' "${codesign_hashes[@]}"
+          cleanup_keychain
+          rm -f "$cert_path"
+          exit 1
+        fi
+
+        APPLE_CODESIGN_IDENTITY="${codesign_hashes[0]}"
+
+        rm -f "$cert_path"
+
+        echo "APPLE_CODESIGN_IDENTITY=$APPLE_CODESIGN_IDENTITY" >> "$GITHUB_ENV"
+        echo "APPLE_CODESIGN_KEYCHAIN=$keychain_path" >> "$GITHUB_ENV"
+        echo "::add-mask::$APPLE_CODESIGN_IDENTITY"
+
+    - name: Sign macOS binaries
+      shell: bash
+      run: |
+        set -euo pipefail
+
+        if [[ -z "${APPLE_CODESIGN_IDENTITY:-}" ]]; then
+          echo "APPLE_CODESIGN_IDENTITY is required for macOS signing"
+          exit 1
+        fi
+
+        keychain_args=()
+        if [[ -n "${APPLE_CODESIGN_KEYCHAIN:-}" && -f "${APPLE_CODESIGN_KEYCHAIN}" ]]; then
+          keychain_args+=(--keychain "${APPLE_CODESIGN_KEYCHAIN}")
+        fi
+
+        for binary in codex codex-responses-api-proxy; do
+          path="codex-rs/target/${{ inputs.target }}/release/${binary}"
+          codesign --force --options runtime --timestamp --sign "$APPLE_CODESIGN_IDENTITY" "${keychain_args[@]}" "$path"
+        done
+
+    - name: Notarize macOS binaries
+      shell: bash
+      env:
+        APPLE_NOTARIZATION_KEY_P8: ${{ inputs.apple-notarization-key-p8 }}
+        APPLE_NOTARIZATION_KEY_ID: ${{ inputs.apple-notarization-key-id }}
+        APPLE_NOTARIZATION_ISSUER_ID: ${{ inputs.apple-notarization-issuer-id }}
+      run: |
+        set -euo pipefail
+
+        for var in APPLE_NOTARIZATION_KEY_P8 APPLE_NOTARIZATION_KEY_ID APPLE_NOTARIZATION_ISSUER_ID; do
+          if [[ -z "${!var:-}" ]]; then
+            echo "$var is required for notarization"
+            exit 1
+          fi
+        done
+
+        notary_key_path="${RUNNER_TEMP}/notarytool.key.p8"
+        echo "$APPLE_NOTARIZATION_KEY_P8" | base64 -d > "$notary_key_path"
+        cleanup_notary() {
+          rm -f "$notary_key_path"
+        }
+        trap cleanup_notary EXIT
+
+        notarize_binary() {
+          local binary="$1"
+          local source_path="codex-rs/target/${{ inputs.target }}/release/${binary}"
+          local archive_path="${RUNNER_TEMP}/${binary}.zip"
+
+          if [[ ! -f "$source_path" ]]; then
+            echo "Binary $source_path not found"
+            exit 1
+          fi
+
+          rm -f "$archive_path"
+          ditto -c -k --keepParent "$source_path" "$archive_path"
+
+          submission_json=$(xcrun notarytool submit "$archive_path" \
+            --key "$notary_key_path" \
+            --key-id "$APPLE_NOTARIZATION_KEY_ID" \
+            --issuer "$APPLE_NOTARIZATION_ISSUER_ID" \
+            --output-format json \
+            --wait)
+
+          status=$(printf '%s\n' "$submission_json" | jq -r '.status // "Unknown"')
+          submission_id=$(printf '%s\n' "$submission_json" | jq -r '.id // ""')
+
+          if [[ -z "$submission_id" ]]; then
+            echo "Failed to retrieve submission ID for $binary"
+            exit 1
+          fi
+
+          echo "::notice title=Notarization::$binary submission ${submission_id} completed with status ${status}"
+
+          if [[ "$status" != "Accepted" ]]; then
+            echo "Notarization failed for ${binary} (submission ${submission_id}, status ${status})"
+            exit 1
+          fi
+        }
+
+        notarize_binary "codex"
+        notarize_binary "codex-responses-api-proxy"
+
+    - name: Remove signing keychain
+      if: ${{ always() }}
+      shell: bash
+      env:
+        APPLE_CODESIGN_KEYCHAIN: ${{ env.APPLE_CODESIGN_KEYCHAIN }}
+      run: |
+        set -euo pipefail
+        if [[ -n "${APPLE_CODESIGN_KEYCHAIN:-}" ]]; then
+          keychain_args=()
+          while IFS= read -r keychain; do
+            [[ "$keychain" == "$APPLE_CODESIGN_KEYCHAIN" ]] && continue
+            [[ -n "$keychain" ]] && keychain_args+=("$keychain")
+          done < <(security list-keychains | sed 's/^[[:space:]]*//;s/[[:space:]]*$//;s/"//g')
+          if ((${#keychain_args[@]} > 0)); then
+            security list-keychains -s "${keychain_args[@]}"
+            security default-keychain -s "${keychain_args[0]}"
+          fi
+
+          if [[ -f "$APPLE_CODESIGN_KEYCHAIN" ]]; then
+            security delete-keychain "$APPLE_CODESIGN_KEYCHAIN"
+          fi
+        fi
--- a/.github/actions/windows-code-sign/action.yml
+++ b/.github/actions/windows-code-sign/action.yml
@@ -0,0 +1,57 @@
+name: windows-code-sign
+description: Sign Windows binaries with Azure Trusted Signing.
+inputs:
+  target:
+    description: Target triple for the artifacts to sign.
+    required: true
+  client-id:
+    description: Azure Trusted Signing client ID.
+    required: true
+  tenant-id:
+    description: Azure tenant ID for Trusted Signing.
+    required: true
+  subscription-id:
+    description: Azure subscription ID for Trusted Signing.
+    required: true
+  endpoint:
+    description: Azure Trusted Signing endpoint.
+    required: true
+  account-name:
+    description: Azure Trusted Signing account name.
+    required: true
+  certificate-profile-name:
+    description: Certificate profile name for signing.
+    required: true
+
+runs:
+  using: composite
+  steps:
+    - name: Azure login for Trusted Signing (OIDC)
+      uses: azure/login@v2
+      with:
+        client-id: ${{ inputs.client-id }}
+        tenant-id: ${{ inputs.tenant-id }}
+        subscription-id: ${{ inputs.subscription-id }}
+
+    - name: Sign Windows binaries with Azure Trusted Signing
+      uses: azure/trusted-signing-action@v0
+      with:
+        endpoint: ${{ inputs.endpoint }}
+        trusted-signing-account-name: ${{ inputs.account-name }}
+        certificate-profile-name: ${{ inputs.certificate-profile-name }}
+        exclude-environment-credential: true
+        exclude-workload-identity-credential: true
+        exclude-managed-identity-credential: true
+        exclude-shared-token-cache-credential: true
+        exclude-visual-studio-credential: true
+        exclude-visual-studio-code-credential: true
+        exclude-azure-cli-credential: false
+        exclude-azure-powershell-credential: true
+        exclude-azure-developer-cli-credential: true
+        exclude-interactive-browser-credential: true
+        cache-dependencies: false
+        files: |
+          ${{ github.workspace }}/codex-rs/target/${{ inputs.target }}/release/codex.exe
+          ${{ github.workspace }}/codex-rs/target/${{ inputs.target }}/release/codex-responses-api-proxy.exe
+          ${{ github.workspace }}/codex-rs/target/${{ inputs.target }}/release/codex-windows-sandbox-setup.exe
+          ${{ github.workspace }}/codex-rs/target/${{ inputs.target }}/release/codex-command-runner.exe
--- a/.github/dotslash-config.json
+++ b/.github/dotslash-config.json
@@ -55,6 +55,30 @@
          "path": "codex-responses-api-proxy.exe"
        }
      }
+    },
+    "codex-command-runner": {
+      "platforms": {
+        "windows-x86_64": {
+          "regex": "^codex-command-runner-x86_64-pc-windows-msvc\\.exe\\.zst$",
+          "path": "codex-command-runner.exe"
+        },
+        "windows-aarch64": {
+          "regex": "^codex-command-runner-aarch64-pc-windows-msvc\\.exe\\.zst$",
+          "path": "codex-command-runner.exe"
+        }
+      }
+    },
+    "codex-windows-sandbox-setup": {
+      "platforms": {
+        "windows-x86_64": {
+          "regex": "^codex-windows-sandbox-setup-x86_64-pc-windows-msvc\\.exe\\.zst$",
+          "path": "codex-windows-sandbox-setup.exe"
+        },
+        "windows-aarch64": {
+          "regex": "^codex-windows-sandbox-setup-aarch64-pc-windows-msvc\\.exe\\.zst$",
+          "path": "codex-windows-sandbox-setup.exe"
+        }
+      }
    }
  }
 }
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -20,7 +20,7 @@ jobs:
          run_install: false

      - name: Setup Node.js
-        uses: actions/setup-node@v5
+        uses: actions/setup-node@v6
        with:
          node-version: 22

@@ -36,7 +36,8 @@ jobs:
          GH_TOKEN: ${{ github.token }}
        run: |
          set -euo pipefail
-          CODEX_VERSION=0.40.0
+          # Use a rust-release version that includes all native binaries.
+          CODEX_VERSION=0.74.0-alpha.3
          OUTPUT_DIR="${RUNNER_TEMP}"
          python3 ./scripts/stage_npm_packages.py \
            --release-version "$CODEX_VERSION" \
@@ -46,7 +47,7 @@ jobs:
          echo "pack_output=$PACK_OUTPUT" >> "$GITHUB_OUTPUT"

      - name: Upload staged npm package artifact
-        uses: actions/upload-artifact@v5
+        uses: actions/upload-artifact@v6
        with:
          name: codex-npm-staging
          path: ${{ steps.stage_npm_package.outputs.pack_output }}
--- a/.github/workflows/rust-ci.yml
+++ b/.github/workflows/rust-ci.yml
@@ -28,9 +28,11 @@ jobs:

          if [[ "${{ github.event_name }}" == "pull_request" ]]; then
            BASE_SHA='${{ github.event.pull_request.base.sha }}'
+            HEAD_SHA='${{ github.event.pull_request.head.sha }}'
            echo "Base SHA: $BASE_SHA"
-            # List files changed between base and current HEAD (merge-base aware)
-            mapfile -t files < <(git diff --name-only --no-renames "$BASE_SHA"...HEAD)
+            echo "Head SHA: $HEAD_SHA"
+            # List files changed between base and PR head
+            mapfile -t files < <(git diff --name-only --no-renames "$BASE_SHA" "$HEAD_SHA")
          else
            # On push / manual runs, default to running everything
            files=("codex-rs/force" ".github/force")
@@ -166,7 +168,7 @@ jobs:
      # avoid caching the large target dir on the gnu-dev job.
      - name: Restore cargo home cache
        id: cache_cargo_home_restore
-        uses: actions/cache/restore@v4
+        uses: actions/cache/restore@v5
        with:
          path: |
            ~/.cargo/bin/
@@ -207,7 +209,7 @@ jobs:
      - name: Restore sccache cache (fallback)
        if: ${{ env.USE_SCCACHE == 'true' && env.SCCACHE_GHA_ENABLED != 'true' }}
        id: cache_sccache_restore
-        uses: actions/cache/restore@v4
+        uses: actions/cache/restore@v5
        with:
          path: ${{ github.workspace }}/.sccache/
          key: sccache-${{ matrix.runner }}-${{ matrix.target }}-${{ matrix.profile }}-${{ steps.lockhash.outputs.hash }}-${{ github.run_id }}
@@ -226,7 +228,7 @@ jobs:
      - if: ${{ matrix.target == 'x86_64-unknown-linux-musl' || matrix.target == 'aarch64-unknown-linux-musl'}}
        name: Restore APT cache (musl)
        id: cache_apt_restore
-        uses: actions/cache/restore@v4
+        uses: actions/cache/restore@v5
        with:
          path: |
            /var/cache/apt
@@ -280,7 +282,7 @@ jobs:
      - name: Save cargo home cache
        if: always() && !cancelled() && steps.cache_cargo_home_restore.outputs.cache-hit != 'true'
        continue-on-error: true
-        uses: actions/cache/save@v4
+        uses: actions/cache/save@v5
        with:
          path: |
            ~/.cargo/bin/
@@ -292,7 +294,7 @@ jobs:
      - name: Save sccache cache (fallback)
        if: always() && !cancelled() && env.USE_SCCACHE == 'true' && env.SCCACHE_GHA_ENABLED != 'true'
        continue-on-error: true
-        uses: actions/cache/save@v4
+        uses: actions/cache/save@v5
        with:
          path: ${{ github.workspace }}/.sccache/
          key: sccache-${{ matrix.runner }}-${{ matrix.target }}-${{ matrix.profile }}-${{ steps.lockhash.outputs.hash }}-${{ github.run_id }}
@@ -317,7 +319,7 @@ jobs:
      - name: Save APT cache (musl)
        if: always() && !cancelled() && (matrix.target == 'x86_64-unknown-linux-musl' || matrix.target == 'aarch64-unknown-linux-musl') && steps.cache_apt_restore.outputs.cache-hit != 'true'
        continue-on-error: true
-        uses: actions/cache/save@v4
+        uses: actions/cache/save@v5
        with:
          path: |
            /var/cache/apt
@@ -369,6 +371,27 @@ jobs:

    steps:
      - uses: actions/checkout@v6
+
+      # We have been running out of space when running this job on Linux for
+      # x86_64-unknown-linux-gnu, so remove some unnecessary dependencies.
+      - name: Remove unnecessary dependencies to save space
+        if: ${{ startsWith(matrix.runner, 'ubuntu') }}
+        shell: bash
+        run: |
+          set -euo pipefail
+          sudo rm -rf \
+            /usr/local/lib/android \
+            /usr/share/dotnet \
+            /usr/local/share/boost \
+            /usr/local/lib/node_modules \
+            /opt/ghc
+          sudo apt-get remove -y docker.io docker-compose podman buildah
+
+      # Some integration tests rely on DotSlash being installed.
+      # See https://github.com/openai/codex/pull/7617.
+      - name: Install DotSlash
+        uses: facebook/install-dotslash@v2
+
      - uses: dtolnay/rust-toolchain@1.90
        with:
          targets: ${{ matrix.target }}
@@ -384,7 +407,7 @@ jobs:

      - name: Restore cargo home cache
        id: cache_cargo_home_restore
-        uses: actions/cache/restore@v4
+        uses: actions/cache/restore@v5
        with:
          path: |
            ~/.cargo/bin/
@@ -424,7 +447,7 @@ jobs:
      - name: Restore sccache cache (fallback)
        if: ${{ env.USE_SCCACHE == 'true' && env.SCCACHE_GHA_ENABLED != 'true' }}
        id: cache_sccache_restore
-        uses: actions/cache/restore@v4
+        uses: actions/cache/restore@v5
        with:
          path: ${{ github.workspace }}/.sccache/
          key: sccache-${{ matrix.runner }}-${{ matrix.target }}-${{ matrix.profile }}-${{ steps.lockhash.outputs.hash }}-${{ github.run_id }}
@@ -447,7 +470,7 @@ jobs:
      - name: Save cargo home cache
        if: always() && !cancelled() && steps.cache_cargo_home_restore.outputs.cache-hit != 'true'
        continue-on-error: true
-        uses: actions/cache/save@v4
+        uses: actions/cache/save@v5
        with:
          path: |
            ~/.cargo/bin/
@@ -459,7 +482,7 @@ jobs:
      - name: Save sccache cache (fallback)
        if: always() && !cancelled() && env.USE_SCCACHE == 'true' && env.SCCACHE_GHA_ENABLED != 'true'
        continue-on-error: true
-        uses: actions/cache/save@v4
+        uses: actions/cache/save@v5
        with:
          path: ${{ github.workspace }}/.sccache/
          key: sccache-${{ matrix.runner }}-${{ matrix.target }}-${{ matrix.profile }}-${{ steps.lockhash.outputs.hash }}-${{ github.run_id }}
--- a/.github/workflows/rust-release-prepare.yml
+++ b/.github/workflows/rust-release-prepare.yml
@@ -0,0 +1,51 @@
+name: rust-release-prepare
+on:
+  workflow_dispatch:
+  schedule:
+    - cron: "0 */4 * * *"
+
+concurrency:
+  group: ${{ github.workflow }}
+  cancel-in-progress: false
+
+permissions:
+  contents: write
+  pull-requests: write
+
+jobs:
+  prepare:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v6
+        with:
+          ref: main
+          fetch-depth: 0
+
+      - name: Update models.json
+        env:
+          OPENAI_API_KEY: ${{ secrets.CODEX_OPENAI_API_KEY }}
+        run: |
+          set -euo pipefail
+
+          client_version="99.99.99"
+          terminal_info="github-actions"
+          user_agent="codex_cli_rs/99.99.99 (Linux $(uname -r); $(uname -m)) ${terminal_info}"
+          base_url="${OPENAI_BASE_URL:-https://chatgpt.com/backend-api/codex}"
+
+          headers=(
+            -H "Authorization: Bearer ${OPENAI_API_KEY}"
+            -H "User-Agent: ${user_agent}"
+          )
+
+          url="${base_url%/}/models?client_version=${client_version}"
+          curl --http1.1 --fail --show-error --location "${headers[@]}" "${url}" | jq '.' > codex-rs/core/models.json
+
+      - name: Open pull request (if changed)
+        uses: peter-evans/create-pull-request@v7
+        with:
+          commit-message: "Update models.json"
+          title: "Update models.json"
+          body: "Automated update of models.json."
+          branch: "bot/update-models-json"
+          reviewers: "pakrym-oai,aibrahim-oai"
+          delete-branch: true
--- a/.github/workflows/rust-release.yml
+++ b/.github/workflows/rust-release.yml
@@ -50,6 +50,9 @@ jobs:
    name: Build - ${{ matrix.runner }} - ${{ matrix.target }}
    runs-on: ${{ matrix.runner }}
    timeout-minutes: 30
+    permissions:
+      contents: read
+      id-token: write
    defaults:
      run:
        working-directory: codex-rs
@@ -81,7 +84,7 @@ jobs:
        with:
          targets: ${{ matrix.target }}

-      - uses: actions/cache@v4
+      - uses: actions/cache@v5
        with:
          path: |
            ~/.cargo/bin/
@@ -98,176 +101,43 @@ jobs:
          sudo apt-get install -y musl-tools pkg-config

      - name: Cargo build
-        run: cargo build --target ${{ matrix.target }} --release --bin codex --bin codex-responses-api-proxy
-
-      - if: ${{ matrix.runner == 'macos-15-xlarge' }}
-        name: Configure Apple code signing
        shell: bash
-        env:
-          KEYCHAIN_PASSWORD: actions
-          APPLE_CERTIFICATE: ${{ secrets.APPLE_CERTIFICATE_P12 }}
-          APPLE_CERTIFICATE_PASSWORD: ${{ secrets.APPLE_CERTIFICATE_PASSWORD }}
        run: |
-          set -euo pipefail
-
-          if [[ -z "${APPLE_CERTIFICATE:-}" ]]; then
-            echo "APPLE_CERTIFICATE is required for macOS signing"
-            exit 1
-          fi
-
-          if [[ -z "${APPLE_CERTIFICATE_PASSWORD:-}" ]]; then
-            echo "APPLE_CERTIFICATE_PASSWORD is required for macOS signing"
-            exit 1
-          fi
-
-          cert_path="${RUNNER_TEMP}/apple_signing_certificate.p12"
-          echo "$APPLE_CERTIFICATE" | base64 -d > "$cert_path"
-
-          keychain_path="${RUNNER_TEMP}/codex-signing.keychain-db"
-          security create-keychain -p "$KEYCHAIN_PASSWORD" "$keychain_path"
-          security set-keychain-settings -lut 21600 "$keychain_path"
-          security unlock-keychain -p "$KEYCHAIN_PASSWORD" "$keychain_path"
-
-          keychain_args=()
-          cleanup_keychain() {
-            if ((${#keychain_args[@]} > 0)); then
-              security list-keychains -s "${keychain_args[@]}" || true
-              security default-keychain -s "${keychain_args[0]}" || true
-            else
-              security list-keychains -s || true
-            fi
-            if [[ -f "$keychain_path" ]]; then
-              security delete-keychain "$keychain_path" || true
-            fi
-          }
-
-          while IFS= read -r keychain; do
-            [[ -n "$keychain" ]] && keychain_args+=("$keychain")
-          done < <(security list-keychains | sed 's/^[[:space:]]*//;s/[[:space:]]*$//;s/"//g')
-
-          if ((${#keychain_args[@]} > 0)); then
-            security list-keychains -s "$keychain_path" "${keychain_args[@]}"
+          if [[ "${{ contains(matrix.target, 'windows') }}" == 'true' ]]; then
+            cargo build --target ${{ matrix.target }} --release --bin codex --bin codex-responses-api-proxy --bin codex-windows-sandbox-setup --bin codex-command-runner
          else
-            security list-keychains -s "$keychain_path"
+            cargo build --target ${{ matrix.target }} --release --bin codex --bin codex-responses-api-proxy
          fi

-          security default-keychain -s "$keychain_path"
-          security import "$cert_path" -k "$keychain_path" -P "$APPLE_CERTIFICATE_PASSWORD" -T /usr/bin/codesign -T /usr/bin/security
-          security set-key-partition-list -S apple-tool:,apple: -s -k "$KEYCHAIN_PASSWORD" "$keychain_path" > /dev/null
+      - if: ${{ contains(matrix.target, 'linux') }}
+        name: Cosign Linux artifacts
+        uses: ./.github/actions/linux-code-sign
+        with:
+          target: ${{ matrix.target }}
+          artifacts-dir: ${{ github.workspace }}/codex-rs/target/${{ matrix.target }}/release

-          codesign_hashes=()
-          while IFS= read -r hash; do
-            [[ -n "$hash" ]] && codesign_hashes+=("$hash")
-          done < <(security find-identity -v -p codesigning "$keychain_path" \
-            | sed -n 's/.*\([0-9A-F]\{40\}\).*/\1/p' \
-            | sort -u)
-
-          if ((${#codesign_hashes[@]} == 0)); then
-            echo "No signing identities found in $keychain_path"
-            cleanup_keychain
-            rm -f "$cert_path"
-            exit 1
-          fi
-
-          if ((${#codesign_hashes[@]} > 1)); then
-            echo "Multiple signing identities found in $keychain_path:"
-            printf '  %s\n' "${codesign_hashes[@]}"
-            cleanup_keychain
-            rm -f "$cert_path"
-            exit 1
-          fi
-
-          APPLE_CODESIGN_IDENTITY="${codesign_hashes[0]}"
-
-          rm -f "$cert_path"
-
-          echo "APPLE_CODESIGN_IDENTITY=$APPLE_CODESIGN_IDENTITY" >> "$GITHUB_ENV"
-          echo "APPLE_CODESIGN_KEYCHAIN=$keychain_path" >> "$GITHUB_ENV"
-          echo "::add-mask::$APPLE_CODESIGN_IDENTITY"
+      - if: ${{ contains(matrix.target, 'windows') }}
+        name: Sign Windows binaries with Azure Trusted Signing
+        uses: ./.github/actions/windows-code-sign
+        with:
+          target: ${{ matrix.target }}
+          client-id: ${{ secrets.AZURE_TRUSTED_SIGNING_CLIENT_ID }}
+          tenant-id: ${{ secrets.AZURE_TRUSTED_SIGNING_TENANT_ID }}
+          subscription-id: ${{ secrets.AZURE_TRUSTED_SIGNING_SUBSCRIPTION_ID }}
+          endpoint: ${{ secrets.AZURE_TRUSTED_SIGNING_ENDPOINT }}
+          account-name: ${{ secrets.AZURE_TRUSTED_SIGNING_ACCOUNT_NAME }}
+          certificate-profile-name: ${{ secrets.AZURE_TRUSTED_SIGNING_CERTIFICATE_PROFILE_NAME }}

      - if: ${{ matrix.runner == 'macos-15-xlarge' }}
-        name: Sign macOS binaries
-        shell: bash
-        run: |
-          set -euo pipefail
-
-          if [[ -z "${APPLE_CODESIGN_IDENTITY:-}" ]]; then
-            echo "APPLE_CODESIGN_IDENTITY is required for macOS signing"
-            exit 1
-          fi
-
-          keychain_args=()
-          if [[ -n "${APPLE_CODESIGN_KEYCHAIN:-}" && -f "${APPLE_CODESIGN_KEYCHAIN}" ]]; then
-            keychain_args+=(--keychain "${APPLE_CODESIGN_KEYCHAIN}")
-          fi
-
-          for binary in codex codex-responses-api-proxy; do
-            path="target/${{ matrix.target }}/release/${binary}"
-            codesign --force --options runtime --timestamp --sign "$APPLE_CODESIGN_IDENTITY" "${keychain_args[@]}" "$path"
-          done
-
-      - if: ${{ matrix.runner == 'macos-15-xlarge' }}
-        name: Notarize macOS binaries
-        shell: bash
-        env:
-          APPLE_NOTARIZATION_KEY_P8: ${{ secrets.APPLE_NOTARIZATION_KEY_P8 }}
-          APPLE_NOTARIZATION_KEY_ID: ${{ secrets.APPLE_NOTARIZATION_KEY_ID }}
-          APPLE_NOTARIZATION_ISSUER_ID: ${{ secrets.APPLE_NOTARIZATION_ISSUER_ID }}
-        run: |
-          set -euo pipefail
-
-          for var in APPLE_NOTARIZATION_KEY_P8 APPLE_NOTARIZATION_KEY_ID APPLE_NOTARIZATION_ISSUER_ID; do
-            if [[ -z "${!var:-}" ]]; then
-              echo "$var is required for notarization"
-              exit 1
-            fi
-          done
-
-          notary_key_path="${RUNNER_TEMP}/notarytool.key.p8"
-          echo "$APPLE_NOTARIZATION_KEY_P8" | base64 -d > "$notary_key_path"
-          cleanup_notary() {
-            rm -f "$notary_key_path"
-          }
-          trap cleanup_notary EXIT
-
-          notarize_binary() {
-            local binary="$1"
-            local source_path="target/${{ matrix.target }}/release/${binary}"
-            local archive_path="${RUNNER_TEMP}/${binary}.zip"
-
-            if [[ ! -f "$source_path" ]]; then
-              echo "Binary $source_path not found"
-              exit 1
-            fi
-
-            rm -f "$archive_path"
-            ditto -c -k --keepParent "$source_path" "$archive_path"
-
-            submission_json=$(xcrun notarytool submit "$archive_path" \
-              --key "$notary_key_path" \
-              --key-id "$APPLE_NOTARIZATION_KEY_ID" \
-              --issuer "$APPLE_NOTARIZATION_ISSUER_ID" \
-              --output-format json \
-              --wait)
-
-            status=$(printf '%s\n' "$submission_json" | jq -r '.status // "Unknown"')
-            submission_id=$(printf '%s\n' "$submission_json" | jq -r '.id // ""')
-
-            if [[ -z "$submission_id" ]]; then
-              echo "Failed to retrieve submission ID for $binary"
-              exit 1
-            fi
-
-            echo "::notice title=Notarization::$binary submission ${submission_id} completed with status ${status}"
-
-            if [[ "$status" != "Accepted" ]]; then
-              echo "Notarization failed for ${binary} (submission ${submission_id}, status ${status})"
-              exit 1
-            fi
-          }
-
-          notarize_binary "codex"
-          notarize_binary "codex-responses-api-proxy"
+        name: MacOS code signing
+        uses: ./.github/actions/macos-code-sign
+        with:
+          target: ${{ matrix.target }}
+          apple-certificate: ${{ secrets.APPLE_CERTIFICATE_P12 }}
+          apple-certificate-password: ${{ secrets.APPLE_CERTIFICATE_PASSWORD }}
+          apple-notarization-key-p8: ${{ secrets.APPLE_NOTARIZATION_KEY_P8 }}
+          apple-notarization-key-id: ${{ secrets.APPLE_NOTARIZATION_KEY_ID }}
+          apple-notarization-issuer-id: ${{ secrets.APPLE_NOTARIZATION_ISSUER_ID }}

      - name: Stage artifacts
        shell: bash
@@ -278,11 +148,18 @@ jobs:
          if [[ "${{ matrix.runner }}" == windows* ]]; then
            cp target/${{ matrix.target }}/release/codex.exe "$dest/codex-${{ matrix.target }}.exe"
            cp target/${{ matrix.target }}/release/codex-responses-api-proxy.exe "$dest/codex-responses-api-proxy-${{ matrix.target }}.exe"
+            cp target/${{ matrix.target }}/release/codex-windows-sandbox-setup.exe "$dest/codex-windows-sandbox-setup-${{ matrix.target }}.exe"
+            cp target/${{ matrix.target }}/release/codex-command-runner.exe "$dest/codex-command-runner-${{ matrix.target }}.exe"
          else
            cp target/${{ matrix.target }}/release/codex "$dest/codex-${{ matrix.target }}"
            cp target/${{ matrix.target }}/release/codex-responses-api-proxy "$dest/codex-responses-api-proxy-${{ matrix.target }}"
          fi

+          if [[ "${{ matrix.target }}" == *linux* ]]; then
+            cp target/${{ matrix.target }}/release/codex.sigstore "$dest/codex-${{ matrix.target }}.sigstore"
+            cp target/${{ matrix.target }}/release/codex-responses-api-proxy.sigstore "$dest/codex-responses-api-proxy-${{ matrix.target }}.sigstore"
+          fi
+
      - if: ${{ matrix.runner == 'windows-11-arm' }}
        name: Install zstd
        shell: powershell
@@ -321,6 +198,11 @@ jobs:
              continue
            fi

+            # Don't try to compress signature bundles.
+            if [[ "$base" == *.sigstore ]]; then
+              continue
+            fi
+
            # Create per-binary tar.gz
            tar -C "$dest" -czf "$dest/${base}.tar.gz" "$base"

@@ -340,30 +222,7 @@ jobs:
            zstd "${zstd_args[@]}" "$dest/$base"
          done

-      - name: Remove signing keychain
-        if: ${{ always() && matrix.runner == 'macos-15-xlarge' }}
-        shell: bash
-        env:
-          APPLE_CODESIGN_KEYCHAIN: ${{ env.APPLE_CODESIGN_KEYCHAIN }}
-        run: |
-          set -euo pipefail
-          if [[ -n "${APPLE_CODESIGN_KEYCHAIN:-}" ]]; then
-            keychain_args=()
-            while IFS= read -r keychain; do
-              [[ "$keychain" == "$APPLE_CODESIGN_KEYCHAIN" ]] && continue
-              [[ -n "$keychain" ]] && keychain_args+=("$keychain")
-            done < <(security list-keychains | sed 's/^[[:space:]]*//;s/[[:space:]]*$//;s/"//g')
-            if ((${#keychain_args[@]} > 0)); then
-              security list-keychains -s "${keychain_args[@]}"
-              security default-keychain -s "${keychain_args[0]}"
-            fi
-
-            if [[ -f "$APPLE_CODESIGN_KEYCHAIN" ]]; then
-              security delete-keychain "$APPLE_CODESIGN_KEYCHAIN"
-            fi
-          fi
-
-      - uses: actions/upload-artifact@v5
+      - uses: actions/upload-artifact@v6
        with:
          name: ${{ matrix.target }}
          # Upload the per-binary .zst files as well as the new .tar.gz
@@ -399,7 +258,7 @@ jobs:
      - name: Checkout repository
        uses: actions/checkout@v6

-      - uses: actions/download-artifact@v4
+      - uses: actions/download-artifact@v7
        with:
          path: dist

@@ -447,7 +306,7 @@ jobs:
          run_install: false

      - name: Setup Node.js for npm packaging
-        uses: actions/setup-node@v5
+        uses: actions/setup-node@v6
        with:
          node-version: 22

@@ -498,7 +357,7 @@ jobs:

    steps:
      - name: Setup Node.js
-        uses: actions/setup-node@v5
+        uses: actions/setup-node@v6
        with:
          node-version: 22
          registry-url: "https://registry.npmjs.org"
--- a/.github/workflows/sdk.yml
+++ b/.github/workflows/sdk.yml
@@ -19,7 +19,7 @@ jobs:
          run_install: false

      - name: Setup Node.js
-        uses: actions/setup-node@v5
+        uses: actions/setup-node@v6
        with:
          node-version: 22
          cache: pnpm
--- a/.github/workflows/shell-tool-mcp-ci.yml
+++ b/.github/workflows/shell-tool-mcp-ci.yml
@@ -30,7 +30,7 @@ jobs:
          run_install: false

      - name: Setup Node.js
-        uses: actions/setup-node@v5
+        uses: actions/setup-node@v6
        with:
          node-version: ${{ env.NODE_VERSION }}
          cache: "pnpm"
--- a/.github/workflows/shell-tool-mcp.yml
+++ b/.github/workflows/shell-tool-mcp.yml
@@ -113,7 +113,7 @@ jobs:
          cp "target/${{ matrix.target }}/release/codex-exec-mcp-server" "$dest/"
          cp "target/${{ matrix.target }}/release/codex-execve-wrapper" "$dest/"

-      - uses: actions/upload-artifact@v5
+      - uses: actions/upload-artifact@v6
        with:
          name: shell-tool-mcp-rust-${{ matrix.target }}
          path: artifacts/**
@@ -211,7 +211,7 @@ jobs:
          mkdir -p "$dest"
          cp bash "$dest/bash"

-      - uses: actions/upload-artifact@v5
+      - uses: actions/upload-artifact@v6
        with:
          name: shell-tool-mcp-bash-${{ matrix.target }}-${{ matrix.variant }}
          path: artifacts/**
@@ -253,7 +253,7 @@ jobs:
          mkdir -p "$dest"
          cp bash "$dest/bash"

-      - uses: actions/upload-artifact@v5
+      - uses: actions/upload-artifact@v6
        with:
          name: shell-tool-mcp-bash-${{ matrix.target }}-${{ matrix.variant }}
          path: artifacts/**
@@ -280,7 +280,7 @@ jobs:
          run_install: false

      - name: Setup Node.js
-        uses: actions/setup-node@v5
+        uses: actions/setup-node@v6
        with:
          node-version: ${{ env.NODE_VERSION }}

@@ -291,7 +291,7 @@ jobs:
        run: pnpm --filter @openai/codex-shell-tool-mcp run build

      - name: Download build artifacts
-        uses: actions/download-artifact@v4
+        uses: actions/download-artifact@v7
        with:
          path: artifacts

@@ -352,7 +352,7 @@ jobs:
          filename=$(PACK_INFO="$pack_info" node -e 'const data = JSON.parse(process.env.PACK_INFO); console.log(data[0].filename);')
          mv "dist/npm/${filename}" "dist/npm/codex-shell-tool-mcp-npm-${PACKAGE_VERSION}.tgz"

-      - uses: actions/upload-artifact@v5
+      - uses: actions/upload-artifact@v6
        with:
          name: codex-shell-tool-mcp-npm
          path: dist/npm/codex-shell-tool-mcp-npm-${{ env.PACKAGE_VERSION }}.tgz
@@ -376,7 +376,7 @@ jobs:
          run_install: false

      - name: Setup Node.js
-        uses: actions/setup-node@v5
+        uses: actions/setup-node@v6
        with:
          node-version: ${{ env.NODE_VERSION }}
          registry-url: https://registry.npmjs.org
@@ -386,7 +386,7 @@ jobs:
        run: npm install -g npm@latest

      - name: Download npm tarball
-        uses: actions/download-artifact@v4
+        uses: actions/download-artifact@v7
        with:
          name: codex-shell-tool-mcp-npm
          path: dist/npm
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -11,7 +11,6 @@ In the codex-rs folder where the rust code lives:
 - Always collapse if statements per https://rust-lang.github.io/rust-clippy/master/index.html#collapsible_if
 - Always inline format! args when possible per https://rust-lang.github.io/rust-clippy/master/index.html#uninlined_format_args
 - Use method references over closures when possible per https://rust-lang.github.io/rust-clippy/master/index.html#redundant_closure_for_method_calls
- Do not use unsigned integer even if the number cannot be negative.
 - When writing tests, prefer comparing the equality of entire objects over fields one by one.
 - When making a change that adds or changes an API, ensure that the documentation in the `docs/` folder is up to date if applicable.

@@ -75,6 +74,8 @@ If you don’t have the tool:
 ### Test assertions

 - Tests should use pretty_assertions::assert_eq for clearer diffs. Import this at the top of the test module if it isn't already.
+- Prefer deep equals comparisons whenever possible. Perform `assert_eq!()` on entire objects, rather than individual fields.
+- Avoid mutating process environment in tests; prefer passing environment-derived flags or dependencies from above.

 ### Integration tests (core)

--- a/EXEC_PLAN.md
+++ b/EXEC_PLAN.md
@@ -0,0 +1,152 @@
+# Codex Execution Plans (ExecPlans):
+ 
+This document describes the requirements for an execution plan ("ExecPlan"), a design document that a coding agent can follow to deliver a working feature or system change. Treat the reader as a complete beginner to this repository: they have only the current working tree and the single ExecPlan file you provide. There is no memory of prior plans and no external context.
+ 
+## How to use ExecPlans and PLANS.md
+ 
+When authoring an executable specification (ExecPlan), follow PLANS.md _to the letter_. If it is not in your context, refresh your memory by reading the entire PLANS.md file. Be thorough in reading (and re-reading) source material to produce an accurate specification. When creating a spec, start from the skeleton and flesh it out as you do your research.
+ 
+When implementing an executable specification (ExecPlan), do not prompt the user for "next steps"; simply proceed to the next milestone. Keep all sections up to date, add or split entries in the list at every stopping point to affirmatively state the progress made and next steps. Resolve ambiguities autonomously, and commit frequently.
+ 
+When discussing an executable specification (ExecPlan), record decisions in a log in the spec for posterity; it should be unambiguously clear why any change to the specification was made. ExecPlans are living documents, and it should always be possible to restart from _only_ the ExecPlan and no other work.
+ 
+When researching a design with challenging requirements or significant unknowns, use milestones to implement proof of concepts, "toy implementations", etc., that allow validating whether the user's proposal is feasible. Read the source code of libraries by finding or acquiring them, research deeply, and include prototypes to guide a fuller implementation.
+ 
+## Requirements
+ 
+NON-NEGOTIABLE REQUIREMENTS:
+ 
+* Every ExecPlan must be fully self-contained. Self-contained means that in its current form it contains all knowledge and instructions needed for a novice to succeed.
+* Every ExecPlan is a living document. Contributors are required to revise it as progress is made, as discoveries occur, and as design decisions are finalized. Each revision must remain fully self-contained.
+* Every ExecPlan must enable a complete novice to implement the feature end-to-end without prior knowledge of this repo.
+* Every ExecPlan must produce a demonstrably working behavior, not merely code changes to "meet a definition".
+* Every ExecPlan must define every term of art in plain language or do not use it.
+ 
+Purpose and intent come first. Begin by explaining, in a few sentences, why the work matters from a user's perspective: what someone can do after this change that they could not do before, and how to see it working. Then guide the reader through the exact steps to achieve that outcome, including what to edit, what to run, and what they should observe.
+ 
+The agent executing your plan can list files, read files, search, run the project, and run tests. It does not know any prior context and cannot infer what you meant from earlier milestones. Repeat any assumption you rely on. Do not point to external blogs or docs; if knowledge is required, embed it in the plan itself in your own words. If an ExecPlan builds upon a prior ExecPlan and that file is checked in, incorporate it by reference. If it is not, you must include all relevant context from that plan.
+ 
+## Formatting
+ 
+Format and envelope are simple and strict. Each ExecPlan must be one single fenced code block labeled as `md` that begins and ends with triple backticks. Do not nest additional triple-backtick code fences inside; when you need to show commands, transcripts, diffs, or code, present them as indented blocks within that single fence. Use indentation for clarity rather than code fences inside an ExecPlan to avoid prematurely closing the ExecPlan's code fence. Use two newlines after every heading, use # and ## and so on, and correct syntax for ordered and unordered lists.
+ 
+When writing an ExecPlan to a Markdown (.md) file where the content of the file *is only* the single ExecPlan, you should omit the triple backticks.
+ 
+Write in plain prose. Prefer sentences over lists. Avoid checklists, tables, and long enumerations unless brevity would obscure meaning. Checklists are permitted only in the `Progress` section, where they are mandatory. Narrative sections must remain prose-first.
+ 
+## Guidelines
+ 
+Self-containment and plain language are paramount. If you introduce a phrase that is not ordinary English ("daemon", "middleware", "RPC gateway", "filter graph"), define it immediately and remind the reader how it manifests in this repository (for example, by naming the files or commands where it appears). Do not say "as defined previously" or "according to the architecture doc." Include the needed explanation here, even if you repeat yourself.
+ 
+Avoid common failure modes. Do not rely on undefined jargon. Do not describe "the letter of a feature" so narrowly that the resulting code compiles but does nothing meaningful. Do not outsource key decisions to the reader. When ambiguity exists, resolve it in the plan itself and explain why you chose that path. Err on the side of over-explaining user-visible effects and under-specifying incidental implementation details.
+ 
+Anchor the plan with observable outcomes. State what the user can do after implementation, the commands to run, and the outputs they should see. Acceptance should be phrased as behavior a human can verify ("after starting the server, navigating to [http://localhost:8080/health](http://localhost:8080/health) returns HTTP 200 with body OK") rather than internal attributes ("added a HealthCheck struct"). If a change is internal, explain how its impact can still be demonstrated (for example, by running tests that fail before and pass after, and by showing a scenario that uses the new behavior).
+ 
+Specify repository context explicitly. Name files with full repository-relative paths, name functions and modules precisely, and describe where new files should be created. If touching multiple areas, include a short orientation paragraph that explains how those parts fit together so a novice can navigate confidently. When running commands, show the working directory and exact command line. When outcomes depend on environment, state the assumptions and provide alternatives when reasonable.
+ 
+Be idempotent and safe. Write the steps so they can be run multiple times without causing damage or drift. If a step can fail halfway, include how to retry or adapt. If a migration or destructive operation is necessary, spell out backups or safe fallbacks. Prefer additive, testable changes that can be validated as you go.
+ 
+Validation is not optional. Include instructions to run tests, to start the system if applicable, and to observe it doing something useful. Describe comprehensive testing for any new features or capabilities. Include expected outputs and error messages so a novice can tell success from failure. Where possible, show how to prove that the change is effective beyond compilation (for example, through a small end-to-end scenario, a CLI invocation, or an HTTP request/response transcript). State the exact test commands appropriate to the project’s toolchain and how to interpret their results.
+ 
+Capture evidence. When your steps produce terminal output, short diffs, or logs, include them inside the single fenced block as indented examples. Keep them concise and focused on what proves success. If you need to include a patch, prefer file-scoped diffs or small excerpts that a reader can recreate by following your instructions rather than pasting large blobs.
+ 
+## Milestones
+ 
+Milestones are narrative, not bureaucracy. If you break the work into milestones, introduce each with a brief paragraph that describes the scope, what will exist at the end of the milestone that did not exist before, the commands to run, and the acceptance you expect to observe. Keep it readable as a story: goal, work, result, proof. Progress and milestones are distinct: milestones tell the story, progress tracks granular work. Both must exist. Never abbreviate a milestone merely for the sake of brevity, do not leave out details that could be crucial to a future implementation.
+ 
+Each milestone must be independently verifiable and incrementally implement the overall goal of the execution plan.
+ 
+## Living plans and design decisions
+ 
+* ExecPlans are living documents. As you make key design decisions, update the plan to record both the decision and the thinking behind it. Record all decisions in the `Decision Log` section.
+* ExecPlans must contain and maintain a `Progress` section, a `Surprises & Discoveries` section, a `Decision Log`, and an `Outcomes & Retrospective` section. These are not optional.
+* When you discover optimizer behavior, performance tradeoffs, unexpected bugs, or inverse/unapply semantics that shaped your approach, capture those observations in the `Surprises & Discoveries` section with short evidence snippets (test output is ideal).
+* If you change course mid-implementation, document why in the `Decision Log` and reflect the implications in `Progress`. Plans are guides for the next contributor as much as checklists for you.
+* At completion of a major task or the full plan, write an `Outcomes & Retrospective` entry summarizing what was achieved, what remains, and lessons learned.
+ 
+# Prototyping milestones and parallel implementations
+ 
+It is acceptable—-and often encouraged—-to include explicit prototyping milestones when they de-risk a larger change. Examples: adding a low-level operator to a dependency to validate feasibility, or exploring two composition orders while measuring optimizer effects. Keep prototypes additive and testable. Clearly label the scope as “prototyping”; describe how to run and observe results; and state the criteria for promoting or discarding the prototype.
+ 
+Prefer additive code changes followed by subtractions that keep tests passing. Parallel implementations (e.g., keeping an adapter alongside an older path during migration) are fine when they reduce risk or enable tests to continue passing during a large migration. Describe how to validate both paths and how to retire one safely with tests. When working with multiple new libraries or feature areas, consider creating spikes that evaluate the feasibility of these features _independently_ of one another, proving that the external library performs as expected and implements the features we need in isolation.
+ 
+## Skeleton of a Good ExecPlan
+ 
+```md
+# <Short, action-oriented description>
+ 
+This ExecPlan is a living document. The sections `Progress`, `Surprises & Discoveries`, `Decision Log`, and `Outcomes & Retrospective` must be kept up to date as work proceeds.
+ 
+If PLANS.md file is checked into the repo, reference the path to that file here from the repository root and note that this document must be maintained in accordance with PLANS.md.
+ 
+## Purpose / Big Picture
+ 
+Explain in a few sentences what someone gains after this change and how they can see it working. State the user-visible behavior you will enable.
+ 
+## Progress
+ 
+Use a list with checkboxes to summarize granular steps. Every stopping point must be documented here, even if it requires splitting a partially completed task into two (“done” vs. “remaining”). This section must always reflect the actual current state of the work.
+ 
+- [x] (2025-10-01 13:00Z) Example completed step.
+- [ ] Example incomplete step.
+- [ ] Example partially completed step (completed: X; remaining: Y).
+ 
+Use timestamps to measure rates of progress.
+ 
+## Surprises & Discoveries
+ 
+Document unexpected behaviors, bugs, optimizations, or insights discovered during implementation. Provide concise evidence.
+ 
+- Observation: …
+  Evidence: …
+ 
+## Decision Log
+ 
+Record every decision made while working on the plan in the format:
+ 
+- Decision: …
+  Rationale: …
+  Date/Author: …
+ 
+## Outcomes & Retrospective
+ 
+Summarize outcomes, gaps, and lessons learned at major milestones or at completion. Compare the result against the original purpose.
+ 
+## Context and Orientation
+ 
+Describe the current state relevant to this task as if the reader knows nothing. Name the key files and modules by full path. Define any non-obvious term you will use. Do not refer to prior plans.
+ 
+## Plan of Work
+ 
+Describe, in prose, the sequence of edits and additions. For each edit, name the file and location (function, module) and what to insert or change. Keep it concrete and minimal.
+ 
+## Concrete Steps
+ 
+State the exact commands to run and where to run them (working directory). When a command generates output, show a short expected transcript so the reader can compare. This section must be updated as work proceeds.
+ 
+## Validation and Acceptance
+ 
+Describe how to start or exercise the system and what to observe. Phrase acceptance as behavior, with specific inputs and outputs. If tests are involved, say "run <project’s test command> and expect <N> passed; the new test <name> fails before the change and passes after>".
+ 
+## Idempotence and Recovery
+ 
+If steps can be repeated safely, say so. If a step is risky, provide a safe retry or rollback path. Keep the environment clean after completion.
+ 
+## Artifacts and Notes
+ 
+Include the most important transcripts, diffs, or snippets as indented examples. Keep them concise and focused on what proves success.
+ 
+## Interfaces and Dependencies
+ 
+Be prescriptive. Name the libraries, modules, and services to use and why. Specify the types, traits/interfaces, and function signatures that must exist at the end of the milestone. Prefer stable names and paths such as `crate::module::function` or `package.submodule.Interface`. E.g.:
+ 
+In crates/foo/planner.rs, define:
+ 
+    pub trait Planner {
+        fn plan(&self, observed: &Observed) -> Vec<Action>;
+    }
+```
+ 
+If you follow the guidance above, a single, stateless agent -- or a human novice -- can read your ExecPlan from top to bottom and produce a working, observable result. That is the bar: SELF-CONTAINED, SELF-SUFFICIENT, NOVICE-GUIDING, OUTCOME-FOCUSED.
+ 
+When you revise a plan, you must ensure your changes are comprehensively reflected across all sections, including the living document sections, and you must write a note at the bottom of the plan describing the change and the reason why. ExecPlans must describe not just the what but the why for almost everything.
--- a/codex-cli/bin/codex.js
+++ b/codex-cli/bin/codex.js
@@ -95,6 +95,14 @@ function detectPackageManager() {
    return "bun";
  }

+
+  if (
+    __dirname.includes(".bun/install/global") ||
+    __dirname.includes(".bun\\install\\global")
+  ) {
+    return "bun";
+  }
+
  return userAgent ? "npm" : null;
 }

--- a/codex-cli/scripts/build_npm_package.py
+++ b/codex-cli/scripts/build_npm_package.py
@@ -20,9 +20,14 @@ PACKAGE_NATIVE_COMPONENTS: dict[str, list[str]] = {
    "codex-responses-api-proxy": ["codex-responses-api-proxy"],
    "codex-sdk": ["codex"],
 }
+WINDOWS_ONLY_COMPONENTS: dict[str, list[str]] = {
+    "codex": ["codex-windows-sandbox-setup", "codex-command-runner"],
+}
 COMPONENT_DEST_DIR: dict[str, str] = {
    "codex": "codex",
    "codex-responses-api-proxy": "codex-responses-api-proxy",
+    "codex-windows-sandbox-setup": "codex",
+    "codex-command-runner": "codex",
    "rg": "path",
 }

@@ -103,7 +108,7 @@ def main() -> int:
                    "pointing to a directory containing pre-installed binaries."
                )

-            copy_native_binaries(vendor_src, staging_dir, native_components)
+            copy_native_binaries(vendor_src, staging_dir, package, native_components)

        if release_version:
            staging_dir_str = str(staging_dir)
@@ -232,7 +237,12 @@ def stage_codex_sdk_sources(staging_dir: Path) -> None:
        shutil.copy2(license_src, staging_dir / "LICENSE")


-def copy_native_binaries(vendor_src: Path, staging_dir: Path, components: list[str]) -> None:
+def copy_native_binaries(
+    vendor_src: Path,
+    staging_dir: Path,
+    package: str,
+    components: list[str],
+) -> None:
    vendor_src = vendor_src.resolve()
    if not vendor_src.exists():
        raise RuntimeError(f"Vendor source directory not found: {vendor_src}")
@@ -250,6 +260,9 @@ def copy_native_binaries(vendor_src: Path, staging_dir: Path, components: list[s
        if not target_dir.is_dir():
            continue

+        if "windows" in target_dir.name:
+            components_set.update(WINDOWS_ONLY_COMPONENTS.get(package, []))
+
        dest_target_dir = vendor_dest / target_dir.name
        dest_target_dir.mkdir(parents=True, exist_ok=True)

--- a/codex-cli/scripts/install_native_deps.py
+++ b/codex-cli/scripts/install_native_deps.py
@@ -36,8 +36,11 @@ class BinaryComponent:
    artifact_prefix: str  # matches the artifact filename prefix (e.g. codex-<target>.zst)
    dest_dir: str  # directory under vendor/<target>/ where the binary is installed
    binary_basename: str  # executable name inside dest_dir (before optional .exe)
+    targets: tuple[str, ...] | None = None  # limit installation to specific targets


+WINDOWS_TARGETS = tuple(target for target in BINARY_TARGETS if "windows" in target)
+
 BINARY_COMPONENTS = {
    "codex": BinaryComponent(
        artifact_prefix="codex",
@@ -49,6 +52,18 @@ BINARY_COMPONENTS = {
        dest_dir="codex-responses-api-proxy",
        binary_basename="codex-responses-api-proxy",
    ),
+    "codex-windows-sandbox-setup": BinaryComponent(
+        artifact_prefix="codex-windows-sandbox-setup",
+        dest_dir="codex",
+        binary_basename="codex-windows-sandbox-setup",
+        targets=WINDOWS_TARGETS,
+    ),
+    "codex-command-runner": BinaryComponent(
+        artifact_prefix="codex-command-runner",
+        dest_dir="codex",
+        binary_basename="codex-command-runner",
+        targets=WINDOWS_TARGETS,
+    ),
 }

 RG_TARGET_PLATFORM_PAIRS: list[tuple[str, str]] = [
@@ -79,7 +94,8 @@ def parse_args() -> argparse.Namespace:
        choices=tuple(list(BINARY_COMPONENTS) + ["rg"]),
        help=(
            "Limit installation to the specified components."
-            " May be repeated. Defaults to 'codex' and 'rg'."
+            " May be repeated. Defaults to codex, codex-windows-sandbox-setup,"
+            " codex-command-runner, and rg."
        ),
    )
    parser.add_argument(
@@ -101,7 +117,12 @@ def main() -> int:
    vendor_dir = codex_cli_root / VENDOR_DIR_NAME
    vendor_dir.mkdir(parents=True, exist_ok=True)

-    components = args.components or ["codex", "rg"]
+    components = args.components or [
+        "codex",
+        "codex-windows-sandbox-setup",
+        "codex-command-runner",
+        "rg",
+    ]

    workflow_url = (args.workflow_url or DEFAULT_WORKFLOW_URL).strip()
    if not workflow_url:
@@ -116,8 +137,7 @@ def main() -> int:
        install_binary_components(
            artifacts_dir,
            vendor_dir,
-            BINARY_TARGETS,
-            [name for name in components if name in BINARY_COMPONENTS],
+            [BINARY_COMPONENTS[name] for name in components if name in BINARY_COMPONENTS],
        )

    if "rg" in components:
@@ -206,23 +226,19 @@ def _download_artifacts(workflow_id: str, dest_dir: Path) -> None:
 def install_binary_components(
    artifacts_dir: Path,
    vendor_dir: Path,
-    targets: Iterable[str],
-    component_names: Sequence[str],
+    selected_components: Sequence[BinaryComponent],
 ) -> None:
-    selected_components = [BINARY_COMPONENTS[name] for name in component_names if name in BINARY_COMPONENTS]
    if not selected_components:
        return

-    targets = list(targets)
-    if not targets:
-        return
-
    for component in selected_components:
+        component_targets = list(component.targets or BINARY_TARGETS)
+
        print(
            f"Installing {component.binary_basename} binaries for targets: "
-            + ", ".join(targets)
+            + ", ".join(component_targets)
        )
-        max_workers = min(len(targets), max(1, (os.cpu_count() or 1)))
+        max_workers = min(len(component_targets), max(1, (os.cpu_count() or 1)))
        with ThreadPoolExecutor(max_workers=max_workers) as executor:
            futures = {
                executor.submit(
@@ -232,7 +248,7 @@ def install_binary_components(
                    target,
                    component,
                ): target
-                for target in targets
+                for target in component_targets
            }
            for future in as_completed(futures):
                installed_path = future.result()
--- a/codex-rs/Cargo.lock
+++ b/codex-rs/Cargo.lock
--- a/codex-rs/Cargo.toml
+++ b/codex-rs/Cargo.toml
@@ -34,6 +34,8 @@ members = [
    "stdio-to-uds",
    "otel",
    "tui",
+    "tui2",
+    "utils/absolute-path",
    "utils/git",
    "utils/cache",
    "utils/image",
@@ -88,6 +90,8 @@ codex-responses-api-proxy = { path = "responses-api-proxy" }
 codex-rmcp-client = { path = "rmcp-client" }
 codex-stdio-to-uds = { path = "stdio-to-uds" }
 codex-tui = { path = "tui" }
+codex-tui2 = { path = "tui2" }
+codex-utils-absolute-path = { path = "utils/absolute-path" }
 codex-utils-cache = { path = "utils/cache" }
 codex-utils-image = { path = "utils/image" }
 codex-utils-json-to-toml = { path = "utils/json-to-toml" }
@@ -96,6 +100,7 @@ codex-utils-readiness = { path = "utils/readiness" }
 codex-utils-string = { path = "utils/string" }
 codex-windows-sandbox = { path = "windows-sandbox-rs" }
 core_test_support = { path = "core/tests/common" }
+exec_server_test_support = { path = "exec-server/tests/common" }
 mcp-types = { path = "mcp-types" }
 mcp_test_support = { path = "mcp-server/tests/common" }

@@ -104,7 +109,6 @@ allocative = "0.3.3"
 ansi-to-tui = "7.0.0"
 anyhow = "1"
 arboard = { version = "3", features = ["wayland-data-control"] }
-askama = "0.14"
 assert_cmd = "2"
 assert_matches = "1.5.0"
 async-channel = "2.3.1"
@@ -137,15 +141,16 @@ icu_locale_core = "2.1"
 icu_provider = { version = "2.1", features = ["sync"] }
 ignore = "0.4.23"
 image = { version = "^0.25.9", default-features = false }
+include_dir = "0.7.4"
 indexmap = "2.12.0"
-insta = "1.43.2"
+insta = "1.44.3"
 itertools = "0.14.0"
 keyring = { version = "3.6", default-features = false }
 landlock = "0.4.1"
 lazy_static = "1"
 libc = "0.2.177"
 log = "0.4"
-lru = "0.12.5"
+lru = "0.16.2"
 maplit = "1.0.2"
 mime_guess = "2.0.5"
 multimap = "0.10.0"
@@ -158,6 +163,7 @@ opentelemetry-appender-tracing = "0.30.0"
 opentelemetry-otlp = "0.30.0"
 opentelemetry-semantic-conventions = "0.30.0"
 opentelemetry_sdk = "0.30.0"
+tracing-opentelemetry = "0.31.0"
 os_info = "3.12.0"
 owo-colors = "4.2.0"
 path-absolutize = "3.1.1"
@@ -175,17 +181,17 @@ reqwest = "0.12"
 rmcp = { version = "0.10.0", default-features = false }
 schemars = "0.8.22"
 seccompiler = "0.5.0"
-sentry = "0.34.0"
+sentry = "0.46.0"
 serde = "1"
 serde_json = "1"
-serde_yaml = "0.9"
 serde_with = "3.16"
+serde_yaml = "0.9"
 serial_test = "3.2.0"
 sha1 = "0.10.6"
 sha2 = "0.10"
 shlex = "1.3.0"
 similar = "2.7.0"
-socket2 = "0.6.0"
+socket2 = "0.6.1"
 starlark = "0.13.0"
 strum = "0.27.2"
 strum_macros = "0.27.2"
@@ -222,7 +228,7 @@ vt100 = "0.16.2"
 walkdir = "2.5.0"
 webbrowser = "1.0"
 which = "6"
-wildmatch = "2.5.0"
+wildmatch = "2.6.1"

 wiremock = "0.6"
 zeroize = "1.8.2"
--- a/codex-rs/README.md
+++ b/codex-rs/README.md
@@ -46,7 +46,7 @@ Use `codex mcp` to add/list/get/remove MCP server launchers defined in `config.t

 ### Notifications

-You can enable notifications by configuring a script that is run whenever the agent finishes a turn. The [notify documentation](../docs/config.md#notify) includes a detailed example that explains how to get desktop notifications via [terminal-notifier](https://github.com/julienXX/terminal-notifier) on macOS.
+You can enable notifications by configuring a script that is run whenever the agent finishes a turn. The [notify documentation](../docs/config.md#notify) includes a detailed example that explains how to get desktop notifications via [terminal-notifier](https://github.com/julienXX/terminal-notifier) on macOS. When Codex detects that it is running under WSL 2 inside Windows Terminal (`WT_SESSION` is set), the TUI automatically falls back to native Windows toast notifications so approval prompts and completed turns surface even though Windows Terminal does not implement OSC 9.

 ### `codex exec` to run Codex programmatically/non-interactively

--- a/codex-rs/app-server-protocol/Cargo.toml
+++ b/codex-rs/app-server-protocol/Cargo.toml
@@ -15,6 +15,7 @@ workspace = true
 anyhow = { workspace = true }
 clap = { workspace = true, features = ["derive"] }
 codex-protocol = { workspace = true }
+codex-utils-absolute-path = { workspace = true }
 mcp-types = { workspace = true }
 schemars = { workspace = true }
 serde = { workspace = true, features = ["derive"] }
--- a/codex-rs/app-server-protocol/src/export.rs
+++ b/codex-rs/app-server-protocol/src/export.rs
@@ -31,6 +31,7 @@ use std::process::Command;
 use ts_rs::TS;

 const HEADER: &str = "// GENERATED CODE! DO NOT MODIFY BY HAND!\n\n";
+const IGNORED_DEFINITIONS: &[&str] = &["Option<()>"];

 #[derive(Clone)]
 pub struct GeneratedSchema {
@@ -184,7 +185,6 @@ fn build_schema_bundle(schemas: Vec<GeneratedSchema>) -> Result<Value> {
        "ServerNotification",
        "ServerRequest",
    ];
-    const IGNORED_DEFINITIONS: &[&str] = &["Option<()>"];

    let namespaced_types = collect_namespaced_types(&schemas);
    let mut definitions = Map::new();
@@ -304,8 +304,11 @@ where
        out_dir.join(format!("{file_stem}.json"))
    };

-    write_pretty_json(out_path, &schema_value)
-        .with_context(|| format!("Failed to write JSON schema for {file_stem}"))?;
+    if !IGNORED_DEFINITIONS.contains(&logical_name) {
+        write_pretty_json(out_path, &schema_value)
+            .with_context(|| format!("Failed to write JSON schema for {file_stem}"))?;
+    }
+
    let namespace = match raw_namespace {
        Some("v1") | None => None,
        Some(ns) => Some(ns.to_string()),
--- a/codex-rs/app-server-protocol/src/protocol/common.rs
+++ b/codex-rs/app-server-protocol/src/protocol/common.rs
@@ -117,9 +117,9 @@ client_request_definitions! {
        params: v2::ThreadListParams,
        response: v2::ThreadListResponse,
    },
-    ThreadCompact => "thread/compact" {
-        params: v2::ThreadCompactParams,
-        response: v2::ThreadCompactResponse,
+    SkillsList => "skills/list" {
+        params: v2::SkillsListParams,
+        response: v2::SkillsListResponse,
    },
    TurnStart => "turn/start" {
        params: v2::TurnStartParams,
@@ -139,9 +139,14 @@ client_request_definitions! {
        response: v2::ModelListResponse,
    },

-    McpServersList => "mcpServers/list" {
-        params: v2::ListMcpServersParams,
-        response: v2::ListMcpServersResponse,
+    McpServerOauthLogin => "mcpServer/oauth/login" {
+        params: v2::McpServerOauthLoginParams,
+        response: v2::McpServerOauthLoginResponse,
+    },
+
+    McpServerStatusList => "mcpServerStatus/list" {
+        params: v2::ListMcpServerStatusParams,
+        response: v2::ListMcpServerStatusResponse,
    },

    LoginAccount => "account/login/start" {
@@ -520,10 +525,14 @@ server_notification_definitions! {
    TurnPlanUpdated => "turn/plan/updated" (v2::TurnPlanUpdatedNotification),
    ItemStarted => "item/started" (v2::ItemStartedNotification),
    ItemCompleted => "item/completed" (v2::ItemCompletedNotification),
+    /// This event is internal-only. Used by Codex Cloud.
+    RawResponseItemCompleted => "rawResponseItem/completed" (v2::RawResponseItemCompletedNotification),
    AgentMessageDelta => "item/agentMessage/delta" (v2::AgentMessageDeltaNotification),
    CommandExecutionOutputDelta => "item/commandExecution/outputDelta" (v2::CommandExecutionOutputDeltaNotification),
+    TerminalInteraction => "item/commandExecution/terminalInteraction" (v2::TerminalInteractionNotification),
    FileChangeOutputDelta => "item/fileChange/outputDelta" (v2::FileChangeOutputDeltaNotification),
    McpToolCallProgress => "item/mcpToolCall/progress" (v2::McpToolCallProgressNotification),
+    McpServerOauthLoginCompleted => "mcpServer/oauthLogin/completed" (v2::McpServerOauthLoginCompletedNotification),
    AccountUpdated => "account/updated" (v2::AccountUpdatedNotification),
    AccountRateLimitsUpdated => "account/rateLimits/updated" (v2::AccountRateLimitsUpdatedNotification),
    ReasoningSummaryTextDelta => "item/reasoning/summaryTextDelta" (v2::ReasoningSummaryTextDeltaNotification),
@@ -647,7 +656,6 @@ mod tests {
            command: vec!["echo".to_string(), "hello".to_string()],
            cwd: PathBuf::from("/tmp"),
            reason: Some("because tests".to_string()),
-            risk: None,
            parsed_cmd: vec![ParsedCommand::Unknown {
                cmd: "echo hello".to_string(),
            }],
@@ -667,7 +675,6 @@ mod tests {
                    "command": ["echo", "hello"],
                    "cwd": "/tmp",
                    "reason": "because tests",
-                    "risk": null,
                    "parsedCmd": [
                        {
                            "type": "unknown",
--- a/codex-rs/app-server-protocol/src/protocol/v1.rs
+++ b/codex-rs/app-server-protocol/src/protocol/v1.rs
@@ -13,10 +13,10 @@ use codex_protocol::protocol::AskForApproval;
 use codex_protocol::protocol::EventMsg;
 use codex_protocol::protocol::FileChange;
 use codex_protocol::protocol::ReviewDecision;
-use codex_protocol::protocol::SandboxCommandAssessment;
 use codex_protocol::protocol::SandboxPolicy;
 use codex_protocol::protocol::SessionSource;
 use codex_protocol::protocol::TurnAbortReason;
+use codex_utils_absolute_path::AbsolutePathBuf;
 use schemars::JsonSchema;
 use serde::Deserialize;
 use serde::Serialize;
@@ -226,7 +226,6 @@ pub struct ExecCommandApprovalParams {
    pub command: Vec<String>,
    pub cwd: PathBuf,
    pub reason: Option<String>,
-    pub risk: Option<SandboxCommandAssessment>,
    pub parsed_cmd: Vec<ParsedCommand>,
 }

@@ -361,7 +360,7 @@ pub struct Tools {
 #[serde(rename_all = "camelCase")]
 pub struct SandboxSettings {
    #[serde(default)]
-    pub writable_roots: Vec<PathBuf>,
+    pub writable_roots: Vec<AbsolutePathBuf>,
    pub network_access: Option<bool>,
    pub exclude_tmpdir_env_var: Option<bool>,
    pub exclude_slash_tmp: Option<bool>,
--- a/codex-rs/app-server-protocol/src/protocol/v2.rs
+++ b/codex-rs/app-server-protocol/src/protocol/v2.rs
@@ -3,8 +3,11 @@ use std::path::PathBuf;

 use crate::protocol::common::AuthMode;
 use codex_protocol::account::PlanType;
-use codex_protocol::approvals::SandboxCommandAssessment as CoreSandboxCommandAssessment;
+use codex_protocol::approvals::ExecPolicyAmendment as CoreExecPolicyAmendment;
+use codex_protocol::config_types::ForcedLoginMethod;
 use codex_protocol::config_types::ReasoningSummary;
+use codex_protocol::config_types::SandboxMode as CoreSandboxMode;
+use codex_protocol::config_types::Verbosity;
 use codex_protocol::items::AgentMessageContent as CoreAgentMessageContent;
 use codex_protocol::items::TurnItem as CoreTurnItem;
 use codex_protocol::models::ResponseItem;
@@ -12,14 +15,19 @@ use codex_protocol::openai_models::ReasoningEffort;
 use codex_protocol::parse_command::ParsedCommand as CoreParsedCommand;
 use codex_protocol::plan_tool::PlanItemArg as CorePlanItemArg;
 use codex_protocol::plan_tool::StepStatus as CorePlanStepStatus;
+use codex_protocol::protocol::AskForApproval as CoreAskForApproval;
 use codex_protocol::protocol::CodexErrorInfo as CoreCodexErrorInfo;
 use codex_protocol::protocol::CreditsSnapshot as CoreCreditsSnapshot;
 use codex_protocol::protocol::RateLimitSnapshot as CoreRateLimitSnapshot;
 use codex_protocol::protocol::RateLimitWindow as CoreRateLimitWindow;
 use codex_protocol::protocol::SessionSource as CoreSessionSource;
+use codex_protocol::protocol::SkillErrorInfo as CoreSkillErrorInfo;
+use codex_protocol::protocol::SkillMetadata as CoreSkillMetadata;
+use codex_protocol::protocol::SkillScope as CoreSkillScope;
 use codex_protocol::protocol::TokenUsage as CoreTokenUsage;
 use codex_protocol::protocol::TokenUsageInfo as CoreTokenUsageInfo;
 use codex_protocol::user_input::UserInput as CoreUserInput;
+use codex_utils_absolute_path::AbsolutePathBuf;
 use mcp_types::ContentBlock as McpContentBlock;
 use mcp_types::Resource as McpResource;
 use mcp_types::ResourceTemplate as McpResourceTemplate;
@@ -122,17 +130,68 @@ impl From<CoreCodexErrorInfo> for CodexErrorInfo {
    }
 }

-v2_enum_from_core!(
-    pub enum AskForApproval from codex_protocol::protocol::AskForApproval {
-        UnlessTrusted, OnFailure, OnRequest, Never
-    }
-);
+#[derive(Serialize, Deserialize, Debug, Clone, Copy, PartialEq, Eq, JsonSchema, TS)]
+#[serde(rename_all = "kebab-case")]
+#[ts(rename_all = "kebab-case", export_to = "v2/")]
+pub enum AskForApproval {
+    #[serde(rename = "untrusted")]
+    #[ts(rename = "untrusted")]
+    UnlessTrusted,
+    OnFailure,
+    OnRequest,
+    Never,
+}

-v2_enum_from_core!(
-    pub enum SandboxMode from codex_protocol::config_types::SandboxMode {
-        ReadOnly, WorkspaceWrite, DangerFullAccess
+impl AskForApproval {
+    pub fn to_core(self) -> CoreAskForApproval {
+        match self {
+            AskForApproval::UnlessTrusted => CoreAskForApproval::UnlessTrusted,
+            AskForApproval::OnFailure => CoreAskForApproval::OnFailure,
+            AskForApproval::OnRequest => CoreAskForApproval::OnRequest,
+            AskForApproval::Never => CoreAskForApproval::Never,
+        }
    }
-);
+}
+
+impl From<CoreAskForApproval> for AskForApproval {
+    fn from(value: CoreAskForApproval) -> Self {
+        match value {
+            CoreAskForApproval::UnlessTrusted => AskForApproval::UnlessTrusted,
+            CoreAskForApproval::OnFailure => AskForApproval::OnFailure,
+            CoreAskForApproval::OnRequest => AskForApproval::OnRequest,
+            CoreAskForApproval::Never => AskForApproval::Never,
+        }
+    }
+}
+
+#[derive(Serialize, Deserialize, Debug, Clone, Copy, PartialEq, Eq, JsonSchema, TS)]
+#[serde(rename_all = "kebab-case")]
+#[ts(rename_all = "kebab-case", export_to = "v2/")]
+pub enum SandboxMode {
+    ReadOnly,
+    WorkspaceWrite,
+    DangerFullAccess,
+}
+
+impl SandboxMode {
+    pub fn to_core(self) -> CoreSandboxMode {
+        match self {
+            SandboxMode::ReadOnly => CoreSandboxMode::ReadOnly,
+            SandboxMode::WorkspaceWrite => CoreSandboxMode::WorkspaceWrite,
+            SandboxMode::DangerFullAccess => CoreSandboxMode::DangerFullAccess,
+        }
+    }
+}
+
+impl From<CoreSandboxMode> for SandboxMode {
+    fn from(value: CoreSandboxMode) -> Self {
+        match value {
+            CoreSandboxMode::ReadOnly => SandboxMode::ReadOnly,
+            CoreSandboxMode::WorkspaceWrite => SandboxMode::WorkspaceWrite,
+            CoreSandboxMode::DangerFullAccess => SandboxMode::DangerFullAccess,
+        }
+    }
+}

 v2_enum_from_core!(
    pub enum ReviewDelivery from codex_protocol::protocol::ReviewDelivery {
@@ -150,21 +209,97 @@ v2_enum_from_core!(
 );

 #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, JsonSchema, TS)]
-#[serde(rename_all = "camelCase")]
+#[serde(tag = "type", rename_all = "camelCase")]
+#[ts(tag = "type")]
 #[ts(export_to = "v2/")]
-pub enum ConfigLayerName {
-    Mdm,
-    System,
+pub enum ConfigLayerSource {
+    /// Managed preferences layer delivered by MDM (macOS only).
+    #[serde(rename_all = "camelCase")]
+    #[ts(rename_all = "camelCase")]
+    Mdm { domain: String, key: String },
+    /// Managed config layer from a file (usually `managed_config.toml`).
+    #[serde(rename_all = "camelCase")]
+    #[ts(rename_all = "camelCase")]
+    System { file: AbsolutePathBuf },
+    /// Session-layer overrides supplied via `-c`/`--config`.
    SessionFlags,
-    User,
+    /// User config layer from a file (usually `config.toml`).
+    #[serde(rename_all = "camelCase")]
+    #[ts(rename_all = "camelCase")]
+    User { file: AbsolutePathBuf },
+}
+
+#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Default, JsonSchema, TS)]
+#[serde(rename_all = "snake_case")]
+#[ts(export_to = "v2/")]
+pub struct SandboxWorkspaceWrite {
+    #[serde(default)]
+    pub writable_roots: Vec<PathBuf>,
+    #[serde(default)]
+    pub network_access: bool,
+    #[serde(default)]
+    pub exclude_tmpdir_env_var: bool,
+    #[serde(default)]
+    pub exclude_slash_tmp: bool,
+}
+
+#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
+#[serde(rename_all = "snake_case")]
+#[ts(export_to = "v2/")]
+pub struct ToolsV2 {
+    #[serde(alias = "web_search_request")]
+    pub web_search: Option<bool>,
+    pub view_image: Option<bool>,
+}
+
+#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
+#[serde(rename_all = "snake_case")]
+#[ts(export_to = "v2/")]
+pub struct ProfileV2 {
+    pub model: Option<String>,
+    pub model_provider: Option<String>,
+    pub approval_policy: Option<AskForApproval>,
+    pub model_reasoning_effort: Option<ReasoningEffort>,
+    pub model_reasoning_summary: Option<ReasoningSummary>,
+    pub model_verbosity: Option<Verbosity>,
+    pub chatgpt_base_url: Option<String>,
+    #[serde(default, flatten)]
+    pub additional: HashMap<String, JsonValue>,
+}
+
+#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
+#[serde(rename_all = "snake_case")]
+#[ts(export_to = "v2/")]
+pub struct Config {
+    pub model: Option<String>,
+    pub review_model: Option<String>,
+    pub model_context_window: Option<i64>,
+    pub model_auto_compact_token_limit: Option<i64>,
+    pub model_provider: Option<String>,
+    pub approval_policy: Option<AskForApproval>,
+    pub sandbox_mode: Option<SandboxMode>,
+    pub sandbox_workspace_write: Option<SandboxWorkspaceWrite>,
+    pub forced_chatgpt_workspace_id: Option<String>,
+    pub forced_login_method: Option<ForcedLoginMethod>,
+    pub tools: Option<ToolsV2>,
+    pub profile: Option<String>,
+    #[serde(default)]
+    pub profiles: HashMap<String, ProfileV2>,
+    pub instructions: Option<String>,
+    pub developer_instructions: Option<String>,
+    pub compact_prompt: Option<String>,
+    pub model_reasoning_effort: Option<ReasoningEffort>,
+    pub model_reasoning_summary: Option<ReasoningSummary>,
+    pub model_verbosity: Option<Verbosity>,
+    #[serde(default, flatten)]
+    pub additional: HashMap<String, JsonValue>,
 }

 #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
 #[serde(rename_all = "camelCase")]
 #[ts(export_to = "v2/")]
 pub struct ConfigLayerMetadata {
-    pub name: ConfigLayerName,
-    pub source: String,
+    pub name: ConfigLayerSource,
    pub version: String,
 }

@@ -172,8 +307,7 @@ pub struct ConfigLayerMetadata {
 #[serde(rename_all = "camelCase")]
 #[ts(export_to = "v2/")]
 pub struct ConfigLayer {
-    pub name: ConfigLayerName,
-    pub source: String,
+    pub name: ConfigLayerSource,
    pub version: String,
    pub config: JsonValue,
 }
@@ -237,7 +371,7 @@ pub struct ConfigReadParams {
 #[serde(rename_all = "camelCase")]
 #[ts(export_to = "v2/")]
 pub struct ConfigReadResponse {
-    pub config: JsonValue,
+    pub config: Config,
    pub origins: HashMap<String, ConfigLayerMetadata>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub layers: Option<Vec<ConfigLayer>>,
@@ -274,19 +408,16 @@ pub struct ConfigEdit {
    pub merge_strategy: MergeStrategy,
 }

-v2_enum_from_core!(
-    pub enum CommandRiskLevel from codex_protocol::approvals::SandboxRiskLevel {
-        Low,
-        Medium,
-        High
-    }
-);
-
 #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, JsonSchema, TS)]
 #[serde(rename_all = "camelCase")]
 #[ts(export_to = "v2/")]
 pub enum ApprovalDecision {
    Accept,
+    /// Approve and remember the approval for the session.
+    AcceptForSession,
+    AcceptWithExecpolicyAmendment {
+        execpolicy_amendment: ExecPolicyAmendment,
+    },
    Decline,
    Cancel,
 }
@@ -302,7 +433,7 @@ pub enum SandboxPolicy {
    #[ts(rename_all = "camelCase")]
    WorkspaceWrite {
        #[serde(default)]
-        writable_roots: Vec<PathBuf>,
+        writable_roots: Vec<AbsolutePathBuf>,
        #[serde(default)]
        network_access: bool,
        #[serde(default)]
@@ -356,28 +487,23 @@ impl From<codex_protocol::protocol::SandboxPolicy> for SandboxPolicy {
    }
 }

-#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
-#[serde(rename_all = "camelCase")]
-#[ts(export_to = "v2/")]
-pub struct SandboxCommandAssessment {
-    pub description: String,
-    pub risk_level: CommandRiskLevel,
+#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, JsonSchema, TS)]
+#[serde(transparent)]
+#[ts(type = "Array<string>", export_to = "v2/")]
+pub struct ExecPolicyAmendment {
+    pub command: Vec<String>,
 }

-impl SandboxCommandAssessment {
-    pub fn into_core(self) -> CoreSandboxCommandAssessment {
-        CoreSandboxCommandAssessment {
-            description: self.description,
-            risk_level: self.risk_level.to_core(),
-        }
+impl ExecPolicyAmendment {
+    pub fn into_core(self) -> CoreExecPolicyAmendment {
+        CoreExecPolicyAmendment::new(self.command)
    }
 }

-impl From<CoreSandboxCommandAssessment> for SandboxCommandAssessment {
-    fn from(value: CoreSandboxCommandAssessment) -> Self {
+impl From<CoreExecPolicyAmendment> for ExecPolicyAmendment {
+    fn from(value: CoreExecPolicyAmendment) -> Self {
        Self {
-            description: value.description,
-            risk_level: CommandRiskLevel::from(value.risk_level),
+            command: value.command().to_vec(),
        }
    }
 }
@@ -555,10 +681,21 @@ pub struct CancelLoginAccountParams {
    pub login_id: String,
 }

-#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
+#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, JsonSchema, TS)]
+#[serde(rename_all = "camelCase")]
+#[ts(rename_all = "camelCase")]
+#[ts(export_to = "v2/")]
+pub enum CancelLoginAccountStatus {
+    Canceled,
+    NotFound,
+}
+
+#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, JsonSchema, TS)]
 #[serde(rename_all = "camelCase")]
 #[ts(export_to = "v2/")]
-pub struct CancelLoginAccountResponse {}
+pub struct CancelLoginAccountResponse {
+    pub status: CancelLoginAccountStatus,
+}

 #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
 #[serde(rename_all = "camelCase")]
@@ -633,7 +770,7 @@ pub struct ModelListResponse {
 #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
 #[serde(rename_all = "camelCase")]
 #[ts(export_to = "v2/")]
-pub struct ListMcpServersParams {
+pub struct ListMcpServerStatusParams {
    /// Opaque pagination cursor returned by a previous call.
    pub cursor: Option<String>,
    /// Optional page size; defaults to a server-defined value.
@@ -643,7 +780,7 @@ pub struct ListMcpServersParams {
 #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
 #[serde(rename_all = "camelCase")]
 #[ts(export_to = "v2/")]
-pub struct McpServer {
+pub struct McpServerStatus {
    pub name: String,
    pub tools: std::collections::HashMap<String, McpTool>,
    pub resources: Vec<McpResource>,
@@ -654,13 +791,33 @@ pub struct McpServer {
 #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
 #[serde(rename_all = "camelCase")]
 #[ts(export_to = "v2/")]
-pub struct ListMcpServersResponse {
-    pub data: Vec<McpServer>,
+pub struct ListMcpServerStatusResponse {
+    pub data: Vec<McpServerStatus>,
    /// Opaque cursor to pass to the next call to continue after the last item.
    /// If None, there are no more items to return.
    pub next_cursor: Option<String>,
 }

+#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
+#[serde(rename_all = "camelCase")]
+#[ts(export_to = "v2/")]
+pub struct McpServerOauthLoginParams {
+    pub name: String,
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    #[ts(optional)]
+    pub scopes: Option<Vec<String>>,
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    #[ts(optional)]
+    pub timeout_secs: Option<i64>,
+}
+
+#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
+#[serde(rename_all = "camelCase")]
+#[ts(export_to = "v2/")]
+pub struct McpServerOauthLoginResponse {
+    pub authorization_url: String,
+}
+
 #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
 #[serde(rename_all = "camelCase")]
 #[ts(export_to = "v2/")]
@@ -712,6 +869,12 @@ pub struct ThreadStartParams {
    pub config: Option<HashMap<String, JsonValue>>,
    pub base_instructions: Option<String>,
    pub developer_instructions: Option<String>,
+    /// If true, opt into emitting raw response items on the event stream.
+    ///
+    /// This is for internal use only (e.g. Codex Cloud).
+    /// (TODO): Figure out a better way to categorize internal / experimental events & protocols.
+    #[serde(default)]
+    pub experimental_raw_events: bool,
 }

 #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
@@ -813,14 +976,89 @@ pub struct ThreadListResponse {
 #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
 #[serde(rename_all = "camelCase")]
 #[ts(export_to = "v2/")]
-pub struct ThreadCompactParams {
-    pub thread_id: String,
+pub struct SkillsListParams {
+    /// When empty, defaults to the current session working directory.
+    #[serde(default, skip_serializing_if = "Vec::is_empty")]
+    pub cwds: Vec<PathBuf>,
+
+    /// When true, bypass the skills cache and re-scan skills from disk.
+    #[serde(default, skip_serializing_if = "std::ops::Not::not")]
+    pub force_reload: bool,
 }

 #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
 #[serde(rename_all = "camelCase")]
 #[ts(export_to = "v2/")]
-pub struct ThreadCompactResponse {}
+pub struct SkillsListResponse {
+    pub data: Vec<SkillsListEntry>,
+}
+
+#[derive(Serialize, Deserialize, Debug, Clone, Copy, PartialEq, Eq, JsonSchema, TS)]
+#[serde(rename_all = "snake_case")]
+#[ts(rename_all = "snake_case")]
+#[ts(export_to = "v2/")]
+pub enum SkillScope {
+    User,
+    Repo,
+    System,
+}
+
+#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
+#[serde(rename_all = "camelCase")]
+#[ts(export_to = "v2/")]
+pub struct SkillMetadata {
+    pub name: String,
+    pub description: String,
+    pub path: PathBuf,
+    pub scope: SkillScope,
+}
+
+#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
+#[serde(rename_all = "camelCase")]
+#[ts(export_to = "v2/")]
+pub struct SkillErrorInfo {
+    pub path: PathBuf,
+    pub message: String,
+}
+
+#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
+#[serde(rename_all = "camelCase")]
+#[ts(export_to = "v2/")]
+pub struct SkillsListEntry {
+    pub cwd: PathBuf,
+    pub skills: Vec<SkillMetadata>,
+    pub errors: Vec<SkillErrorInfo>,
+}
+
+impl From<CoreSkillMetadata> for SkillMetadata {
+    fn from(value: CoreSkillMetadata) -> Self {
+        Self {
+            name: value.name,
+            description: value.description,
+            path: value.path,
+            scope: value.scope.into(),
+        }
+    }
+}
+
+impl From<CoreSkillScope> for SkillScope {
+    fn from(value: CoreSkillScope) -> Self {
+        match value {
+            CoreSkillScope::User => Self::User,
+            CoreSkillScope::Repo => Self::Repo,
+            CoreSkillScope::System => Self::System,
+        }
+    }
+}
+
+impl From<CoreSkillErrorInfo> for SkillErrorInfo {
+    fn from(value: CoreSkillErrorInfo) -> Self {
+        Self {
+            path: value.path,
+            message: value.message,
+        }
+    }
+}

 #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
 #[serde(rename_all = "camelCase")]
@@ -942,6 +1180,9 @@ pub struct TurnError {
 #[ts(export_to = "v2/")]
 pub struct ErrorNotification {
    pub error: TurnError,
+    // Set to true if the error is transient and the app-server process will automatically retry.
+    // If true, this will not interrupt a turn.
+    pub will_retry: bool,
    pub thread_id: String,
    pub turn_id: String,
 }
@@ -1141,6 +1382,9 @@ pub enum ThreadItem {
        arguments: JsonValue,
        result: Option<McpToolCallResult>,
        error: Option<McpToolCallError>,
+        /// The duration of the MCP tool call in milliseconds.
+        #[ts(type = "number | null")]
+        duration_ms: Option<i64>,
    },
    #[serde(rename_all = "camelCase")]
    #[ts(rename_all = "camelCase")]
@@ -1358,6 +1602,15 @@ pub struct ItemCompletedNotification {
    pub turn_id: String,
 }

+#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
+#[serde(rename_all = "camelCase")]
+#[ts(export_to = "v2/")]
+pub struct RawResponseItemCompletedNotification {
+    pub thread_id: String,
+    pub turn_id: String,
+    pub item: ResponseItem,
+}
+
 // Item-specific progress notifications
 #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
 #[serde(rename_all = "camelCase")]
@@ -1404,6 +1657,17 @@ pub struct ReasoningTextDeltaNotification {
    pub content_index: i64,
 }

+#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
+#[serde(rename_all = "camelCase")]
+#[ts(export_to = "v2/")]
+pub struct TerminalInteractionNotification {
+    pub thread_id: String,
+    pub turn_id: String,
+    pub item_id: String,
+    pub process_id: String,
+    pub stdin: String,
+}
+
 #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
 #[serde(rename_all = "camelCase")]
 #[ts(export_to = "v2/")]
@@ -1434,6 +1698,17 @@ pub struct McpToolCallProgressNotification {
    pub message: String,
 }

+#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
+#[serde(rename_all = "camelCase")]
+#[ts(export_to = "v2/")]
+pub struct McpServerOauthLoginCompletedNotification {
+    pub name: String,
+    pub success: bool,
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    #[ts(optional)]
+    pub error: Option<String>,
+}
+
 #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
 #[serde(rename_all = "camelCase")]
 #[ts(export_to = "v2/")]
@@ -1460,17 +1735,8 @@ pub struct CommandExecutionRequestApprovalParams {
    pub item_id: String,
    /// Optional explanatory reason (e.g. request for network access).
    pub reason: Option<String>,
-    /// Optional model-provided risk assessment describing the blocked command.
-    pub risk: Option<SandboxCommandAssessment>,
-}
-
-#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
-#[serde(rename_all = "camelCase")]
-#[ts(export_to = "v2/")]
-pub struct CommandExecutionRequestAcceptSettings {
-    /// If true, automatically approve this command for the duration of the session.
-    #[serde(default)]
-    pub for_session: bool,
+    /// Optional proposed execpolicy amendment to allow similar commands without prompting.
+    pub proposed_execpolicy_amendment: Option<ExecPolicyAmendment>,
 }

 #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
@@ -1478,10 +1744,6 @@ pub struct CommandExecutionRequestAcceptSettings {
 #[ts(export_to = "v2/")]
 pub struct CommandExecutionRequestApprovalResponse {
    pub decision: ApprovalDecision,
-    /// Optional approval settings for when the decision is `accept`.
-    /// Ignored if the decision is `decline` or `cancel`.
-    #[serde(default)]
-    pub accept_settings: Option<CommandExecutionRequestAcceptSettings>,
 }

 #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
@@ -1518,6 +1780,7 @@ pub struct RateLimitSnapshot {
    pub primary: Option<RateLimitWindow>,
    pub secondary: Option<RateLimitWindow>,
    pub credits: Option<CreditsSnapshot>,
+    pub plan_type: Option<PlanType>,
 }

 impl From<CoreRateLimitSnapshot> for RateLimitSnapshot {
@@ -1526,6 +1789,7 @@ impl From<CoreRateLimitSnapshot> for RateLimitSnapshot {
            primary: value.primary.map(RateLimitWindow::from),
            secondary: value.secondary.map(RateLimitWindow::from),
            credits: value.credits.map(CreditsSnapshot::from),
+            plan_type: value.plan_type,
        }
    }
 }
@@ -1679,6 +1943,30 @@ mod tests {
        );
    }

+    #[test]
+    fn skills_list_params_serialization_uses_force_reload() {
+        assert_eq!(
+            serde_json::to_value(SkillsListParams {
+                cwds: Vec::new(),
+                force_reload: false,
+            })
+            .unwrap(),
+            json!({}),
+        );
+
+        assert_eq!(
+            serde_json::to_value(SkillsListParams {
+                cwds: vec![PathBuf::from("/repo")],
+                force_reload: true,
+            })
+            .unwrap(),
+            json!({
+                "cwds": ["/repo"],
+                "forceReload": true,
+            }),
+        );
+    }
+
    #[test]
    fn codex_error_info_serializes_http_status_code_in_camel_case() {
        let value = CodexErrorInfo::ResponseTooManyFailedAttempts {
--- a/codex-rs/app-server-test-client/src/main.rs
+++ b/codex-rs/app-server-test-client/src/main.rs
@@ -21,7 +21,6 @@ use codex_app_server_protocol::ApprovalDecision;
 use codex_app_server_protocol::AskForApproval;
 use codex_app_server_protocol::ClientInfo;
 use codex_app_server_protocol::ClientRequest;
-use codex_app_server_protocol::CommandExecutionRequestAcceptSettings;
 use codex_app_server_protocol::CommandExecutionRequestApprovalParams;
 use codex_app_server_protocol::CommandExecutionRequestApprovalResponse;
 use codex_app_server_protocol::FileChangeRequestApprovalParams;
@@ -554,6 +553,10 @@ impl CodexClient {
                    print!("{}", delta.delta);
                    std::io::stdout().flush().ok();
                }
+                ServerNotification::TerminalInteraction(delta) => {
+                    println!("[stdin sent: {}]", delta.stdin);
+                    std::io::stdout().flush().ok();
+                }
                ServerNotification::ItemStarted(payload) => {
                    println!("\n< item started: {:?}", payload.item);
                }
@@ -753,7 +756,7 @@ impl CodexClient {
            turn_id,
            item_id,
            reason,
-            risk,
+            proposed_execpolicy_amendment,
        } = params;

        println!(
@@ -762,13 +765,12 @@ impl CodexClient {
        if let Some(reason) = reason.as_deref() {
            println!("< reason: {reason}");
        }
-        if let Some(risk) = risk.as_ref() {
-            println!("< risk assessment: {risk:?}");
+        if let Some(execpolicy_amendment) = proposed_execpolicy_amendment.as_ref() {
+            println!("< proposed execpolicy amendment: {execpolicy_amendment:?}");
        }

        let response = CommandExecutionRequestApprovalResponse {
            decision: ApprovalDecision::Accept,
-            accept_settings: Some(CommandExecutionRequestAcceptSettings { for_session: false }),
        };
        self.send_server_request_response(request_id, &response)?;
        println!("< approved commandExecution request for item {item_id}");
--- a/codex-rs/app-server/Cargo.toml
+++ b/codex-rs/app-server/Cargo.toml
@@ -26,11 +26,12 @@ codex-login = { workspace = true }
 codex-protocol = { workspace = true }
 codex-app-server-protocol = { workspace = true }
 codex-feedback = { workspace = true }
+codex-rmcp-client = { workspace = true }
+codex-utils-absolute-path = { workspace = true }
 codex-utils-json-to-toml = { workspace = true }
 chrono = { workspace = true }
 serde = { workspace = true, features = ["derive"] }
 serde_json = { workspace = true }
-sha2 = { workspace = true }
 mcp-types = { workspace = true }
 tempfile = { workspace = true }
 toml = { workspace = true }
@@ -43,7 +44,6 @@ tokio = { workspace = true, features = [
 ] }
 tracing = { workspace = true, features = ["log"] }
 tracing-subscriber = { workspace = true, features = ["env-filter", "fmt"] }
-opentelemetry-appender-tracing = { workspace = true }
 uuid = { workspace = true, features = ["serde", "v7"] }

 [dev-dependencies]
--- a/codex-rs/app-server/README.md
+++ b/codex-rs/app-server/README.md
@@ -3,6 +3,7 @@
 `codex app-server` is the interface Codex uses to power rich interfaces such as the [Codex VS Code extension](https://marketplace.visualstudio.com/items?itemName=openai.chatgpt).

 ## Table of Contents
+
 - [Protocol](#protocol)
 - [Message Schema](#message-schema)
 - [Core Primitives](#core-primitives)
@@ -28,6 +29,7 @@ codex app-server generate-json-schema --out DIR
 ## Core Primitives

 The API exposes three top level primitives representing an interaction between a user and Codex:
+
 - **Thread**: A conversation between a user and the Codex agent. Each thread contains multiple turns.
 - **Turn**: One turn of the conversation, typically starting with a user message and finishing with an agent message. Each turn contains multiple items.
 - **Item**: Represents user inputs and agent outputs as part of the turn, persisted and used as the context for future conversations. Example items include user message, agent reasoning, agent message, shell command, file edit, etc.
@@ -49,13 +51,23 @@ Clients must send a single `initialize` request before invoking any other method
 Applications building on top of `codex app-server` should identify themselves via the `clientInfo` parameter.

 Example (from OpenAI's official VSCode extension):
+
 ```json
-{ "method": "initialize", "id": 0, "params": {
-    "clientInfo": { "name": "codex-vscode", "title": "Codex VS Code Extension", "version": "0.1.0" }
-} }
+{
+  "method": "initialize",
+  "id": 0,
+  "params": {
+    "clientInfo": {
+      "name": "codex-vscode",
+      "title": "Codex VS Code Extension",
+      "version": "0.1.0"
+    }
+  }
+}
 ```

 ## API Overview
+
 - `thread/start` — create a new thread; emits `thread/started` and auto-subscribes you to turn/item events for that thread.
 - `thread/resume` — reopen an existing thread by id so subsequent `turn/start` calls append to it.
 - `thread/list` — page through stored rollouts; supports cursor-based pagination and optional `modelProviders` filtering.
@@ -65,6 +77,9 @@ Example (from OpenAI's official VSCode extension):
 - `review/start` — kick off Codex’s automated reviewer for a thread; responds like `turn/start` and emits `item/started`/`item/completed` notifications with `enteredReviewMode` and `exitedReviewMode` items, plus a final assistant `agentMessage` containing the review.
 - `command/exec` — run a single command under the server sandbox without starting a thread/turn (handy for utilities and validation).
 - `model/list` — list available models (with reasoning effort options).
+- `skills/list` — list skills for one or more `cwd` values (optional `forceReload`).
+- `mcpServer/oauth/login` — start an OAuth login for a configured MCP server; returns an `authorization_url` and later emits `mcpServer/oauthLogin/completed` once the browser flow finishes.
+- `mcpServerStatus/list` — enumerate configured MCP servers with their tools, resources, resource templates, and auth status; supports cursor+limit pagination.
 - `feedback/upload` — submit a feedback report (classification + optional reason/logs and conversation_id); returns the tracking thread id.
 - `command/exec` — run a single command under the server sandbox without starting a thread/turn (handy for utilities and validation).
 - `config/read` — fetch the effective config on disk after resolving config layering.
@@ -105,6 +120,7 @@ To continue a stored session, call `thread/resume` with the `thread.id` you prev
 ### Example: List threads (with pagination & filters)

 `thread/list` lets you render a history UI. Pass any combination of:
+
 - `cursor` — opaque string from a prior response; omit for the first page.
 - `limit` — server defaults to a reasonable page size if unset.
 - `modelProviders` — restrict results to specific providers; unset, null, or an empty array will include all providers.
@@ -225,22 +241,32 @@ Codex streams the usual `turn/started` notification followed by an `item/started
 with an `enteredReviewMode` item so clients can show progress:

 ```json
-{ "method": "item/started", "params": { "item": {
-    "type": "enteredReviewMode",
-    "id": "turn_900",
-    "review": "current changes"
-} } }
+{
+  "method": "item/started",
+  "params": {
+    "item": {
+      "type": "enteredReviewMode",
+      "id": "turn_900",
+      "review": "current changes"
+    }
+  }
+}
 ```

 When the reviewer finishes, the server emits `item/started` and `item/completed`
 containing an `exitedReviewMode` item with the final review text:

 ```json
-{ "method": "item/completed", "params": { "item": {
-    "type": "exitedReviewMode",
-    "id": "turn_900",
-    "review": "Looks solid overall...\n\n- Prefer Stylize helpers — app.rs:10-20\n  ..."
-} } }
+{
+  "method": "item/completed",
+  "params": {
+    "item": {
+      "type": "exitedReviewMode",
+      "id": "turn_900",
+      "review": "Looks solid overall...\n\n- Prefer Stylize helpers — app.rs:10-20\n  ..."
+    }
+  }
+}
 ```

 The `review` string is plain text that already bundles the overall explanation plus a bullet list for each structured finding (matching `ThreadItem::ExitedReviewMode` in the generated schema). Use this notification to render the reviewer output in your client.
@@ -260,6 +286,7 @@ Run a standalone command (argv vector) in the server’s sandbox without creatin
 ```

 Notes:
+
 - Empty `command` arrays are rejected.
 - `sandboxPolicy` accepts the same shape used by `turn/start` (e.g., `dangerFullAccess`, `readOnly`, `workspaceWrite` with flags).
 - When omitted, `timeoutMs` falls back to the server default.
@@ -282,6 +309,7 @@ Today both notifications carry an empty `items` array even when item events were
 #### Items

 `ThreadItem` is the tagged union carried in turn responses and `item/*` notifications. Currently we support events for the following items:
+
 - `userMessage` — `{id, content}` where `content` is a list of user inputs (`text`, `image`, or `localImage`).
 - `agentMessage` — `{id, text}` containing the accumulated agent reply.
 - `reasoning` — `{id, summary, content}` where `summary` holds streamed reasoning summaries (applicable for most OpenAI models) and `content` holds raw reasoning blocks (applicable for e.g. open source models).
@@ -295,37 +323,48 @@ Today both notifications carry an empty `items` array even when item events were
 - `compacted` - `{threadId, turnId}` when codex compacts the conversation history. This can happen automatically.

 All items emit two shared lifecycle events:
+
 - `item/started` — emits the full `item` when a new unit of work begins so the UI can render it immediately; the `item.id` in this payload matches the `itemId` used by deltas.
 - `item/completed` — sends the final `item` once that work finishes (e.g., after a tool call or message completes); treat this as the authoritative state.

 There are additional item-specific events:
+
 #### agentMessage
+
 - `item/agentMessage/delta` — appends streamed text for the agent message; concatenate `delta` values for the same `itemId` in order to reconstruct the full reply.
+
 #### reasoning
+
 - `item/reasoning/summaryTextDelta` — streams readable reasoning summaries; `summaryIndex` increments when a new summary section opens.
 - `item/reasoning/summaryPartAdded` — marks the boundary between reasoning summary sections for an `itemId`; subsequent `summaryTextDelta` entries share the same `summaryIndex`.
 - `item/reasoning/textDelta` — streams raw reasoning text (only applicable for e.g. open source models); use `contentIndex` to group deltas that belong together before showing them in the UI.
+
 #### commandExecution
+
 - `item/commandExecution/outputDelta` — streams stdout/stderr for the command; append deltas in order to render live output alongside `aggregatedOutput` in the final item.
-Final `commandExecution` items include parsed `commandActions`, `status`, `exitCode`, and `durationMs` so the UI can summarize what ran and whether it succeeded.
+  Final `commandExecution` items include parsed `commandActions`, `status`, `exitCode`, and `durationMs` so the UI can summarize what ran and whether it succeeded.
+
 #### fileChange
+
 - `item/fileChange/outputDelta` - contains the tool call response of the underlying `apply_patch` tool call.

 ### Errors
+
 `error` event is emitted whenever the server hits an error mid-turn (for example, upstream model errors or quota limits). Carries the same `{ error: { message, codexErrorInfo? } }` payload as `turn.status: "failed"` and may precede that terminal notification.

-  `codexErrorInfo` maps to the `CodexErrorInfo` enum. Common values:
-  - `ContextWindowExceeded`
-  - `UsageLimitExceeded`
-  - `HttpConnectionFailed { httpStatusCode? }`: upstream HTTP failures including 4xx/5xx
-  - `ResponseStreamConnectionFailed { httpStatusCode? }`: failure to connect to the response SSE stream
-  - `ResponseStreamDisconnected { httpStatusCode? }`: disconnect of the response SSE stream in the middle of a turn before completion
-  - `ResponseTooManyFailedAttempts { httpStatusCode? }`
-  - `BadRequest`
-  - `Unauthorized`
-  - `SandboxError`
-  - `InternalServerError`
-  - `Other`: all unclassified errors
+`codexErrorInfo` maps to the `CodexErrorInfo` enum. Common values:
+
+- `ContextWindowExceeded`
+- `UsageLimitExceeded`
+- `HttpConnectionFailed { httpStatusCode? }`: upstream HTTP failures including 4xx/5xx
+- `ResponseStreamConnectionFailed { httpStatusCode? }`: failure to connect to the response SSE stream
+- `ResponseStreamDisconnected { httpStatusCode? }`: disconnect of the response SSE stream in the middle of a turn before completion
+- `ResponseTooManyFailedAttempts { httpStatusCode? }`
+- `BadRequest`
+- `Unauthorized`
+- `SandboxError`
+- `InternalServerError`
+- `Other`: all unclassified errors

 When an upstream HTTP status is available (for example, from the Responses API or a provider), it is forwarded in `httpStatusCode` on the relevant `codexErrorInfo` variant.

@@ -339,6 +378,7 @@ Certain actions (shell commands or modifying files) may require explicit user ap
 ### Command execution approvals

 Order of messages:
+
 1. `item/started` — shows the pending `commandExecution` item with `command`, `cwd`, and other fields so you can render the proposed action.
 2. `item/commandExecution/requestApproval` (request) — carries the same `itemId`, `threadId`, `turnId`, optionally `reason` or `risk`, plus `parsedCmd` for friendly display.
 3. Client response — `{ "decision": "accept", "acceptSettings": { "forSession": false } }` or `{ "decision": "decline" }`.
@@ -347,6 +387,7 @@ Order of messages:
 ### File change approvals

 Order of messages:
+
 1. `item/started` — emits a `fileChange` item with `changes` (diff chunk summaries) and `status: "inProgress"`. Show the proposed edits and paths to the user.
 2. `item/fileChange/requestApproval` (request) — includes `itemId`, `threadId`, `turnId`, and an optional `reason`.
 3. Client response — `{ "decision": "accept" }` or `{ "decision": "decline" }`.
@@ -359,6 +400,7 @@ UI guidance for IDEs: surface an approval dialog as soon as the request arrives.
 The JSON-RPC auth/account surface exposes request/response methods plus server-initiated notifications (no `id`). Use these to determine auth state, start or cancel logins, logout, and inspect ChatGPT rate limits.

 ### API Overview
+
 - `account/read` — fetch current account info; optionally refresh tokens.
 - `account/login/start` — begin login (`apiKey` or `chatgpt`).
 - `account/login/completed` (notify) — emitted when a login attempt finishes (success or error).
@@ -366,15 +408,19 @@ The JSON-RPC auth/account surface exposes request/response methods plus server-i
 - `account/logout` — sign out; triggers `account/updated`.
 - `account/updated` (notify) — emitted whenever auth mode changes (`authMode`: `apikey`, `chatgpt`, or `null`).
 - `account/rateLimits/read` — fetch ChatGPT rate limits; updates arrive via `account/rateLimits/updated` (notify).
+- `account/rateLimits/updated` (notify) — emitted whenever a user's ChatGPT rate limits change.
+- `mcpServer/oauthLogin/completed` (notify) — emitted after a `mcpServer/oauth/login` flow finishes for a server; payload includes `{ name, success, error? }`.

 ### 1) Check auth state

 Request:
+
 ```json
 { "method": "account/read", "id": 1, "params": { "refreshToken": false } }
 ```

 Response examples:
+
 ```json
 { "id": 1, "result": { "account": null, "requiresOpenaiAuth": false } } // No OpenAI auth needed (e.g., OSS/local models)
 { "id": 1, "result": { "account": null, "requiresOpenaiAuth": true } }  // OpenAI auth required (typical for OpenAI-hosted models)
@@ -383,6 +429,7 @@ Response examples:
 ```

 Field notes:
+
 - `refreshToken` (bool): set `true` to force a token refresh.
 - `requiresOpenaiAuth` reflects the active provider; when `false`, Codex can run without OpenAI credentials.

@@ -390,7 +437,11 @@ Field notes:

 1. Send:
   ```json
-   { "method": "account/login/start", "id": 2, "params": { "type": "apiKey", "apiKey": "sk-…" } }
+   {
+     "method": "account/login/start",
+     "id": 2,
+     "params": { "type": "apiKey", "apiKey": "sk-…" }
+   }
   ```
 2. Expect:
   ```json
@@ -440,6 +491,7 @@ Field notes:
 ```

 Field notes:
+
 - `usedPercent` is current usage within the OpenAI quota window.
 - `windowDurationMins` is the quota window length.
 - `resetsAt` is a Unix timestamp (seconds) for the next reset.
--- a/codex-rs/app-server/src/bespoke_event_handling.rs
+++ b/codex-rs/app-server/src/bespoke_event_handling.rs
@@ -18,6 +18,7 @@ use codex_app_server_protocol::ContextCompactedNotification;
 use codex_app_server_protocol::ErrorNotification;
 use codex_app_server_protocol::ExecCommandApprovalParams;
 use codex_app_server_protocol::ExecCommandApprovalResponse;
+use codex_app_server_protocol::ExecPolicyAmendment as V2ExecPolicyAmendment;
 use codex_app_server_protocol::FileChangeOutputDeltaNotification;
 use codex_app_server_protocol::FileChangeRequestApprovalParams;
 use codex_app_server_protocol::FileChangeRequestApprovalResponse;
@@ -30,12 +31,13 @@ use codex_app_server_protocol::McpToolCallResult;
 use codex_app_server_protocol::McpToolCallStatus;
 use codex_app_server_protocol::PatchApplyStatus;
 use codex_app_server_protocol::PatchChangeKind as V2PatchChangeKind;
+use codex_app_server_protocol::RawResponseItemCompletedNotification;
 use codex_app_server_protocol::ReasoningSummaryPartAddedNotification;
 use codex_app_server_protocol::ReasoningSummaryTextDeltaNotification;
 use codex_app_server_protocol::ReasoningTextDeltaNotification;
-use codex_app_server_protocol::SandboxCommandAssessment as V2SandboxCommandAssessment;
 use codex_app_server_protocol::ServerNotification;
 use codex_app_server_protocol::ServerRequestPayload;
+use codex_app_server_protocol::TerminalInteractionNotification;
 use codex_app_server_protocol::ThreadItem;
 use codex_app_server_protocol::ThreadTokenUsage;
 use codex_app_server_protocol::ThreadTokenUsageUpdatedNotification;
@@ -178,7 +180,7 @@ pub(crate) async fn apply_bespoke_event_handling(
            command,
            cwd,
            reason,
-            risk,
+            proposed_execpolicy_amendment,
            parsed_cmd,
        }) => match api_version {
            ApiVersion::V1 => {
@@ -188,7 +190,6 @@ pub(crate) async fn apply_bespoke_event_handling(
                    command,
                    cwd,
                    reason,
-                    risk,
                    parsed_cmd,
                };
                let rx = outgoing
@@ -206,6 +207,8 @@ pub(crate) async fn apply_bespoke_event_handling(
                    .map(V2ParsedCommand::from)
                    .collect::<Vec<_>>();
                let command_string = shlex_join(&command);
+                let proposed_execpolicy_amendment_v2 =
+                    proposed_execpolicy_amendment.map(V2ExecPolicyAmendment::from);

                let params = CommandExecutionRequestApprovalParams {
                    thread_id: conversation_id.to_string(),
@@ -214,7 +217,7 @@ pub(crate) async fn apply_bespoke_event_handling(
                    // and emit the corresponding EventMsg, we repurpose the call_id as the item_id.
                    item_id: item_id.clone(),
                    reason,
-                    risk: risk.map(V2SandboxCommandAssessment::from),
+                    proposed_execpolicy_amendment: proposed_execpolicy_amendment_v2,
                };
                let rx = outgoing
                    .send_request(ServerRequestPayload::CommandExecutionRequestApproval(
@@ -332,6 +335,7 @@ pub(crate) async fn apply_bespoke_event_handling(
            outgoing
                .send_server_notification(ServerNotification::Error(ErrorNotification {
                    error: turn_error,
+                    will_retry: false,
                    thread_id: conversation_id.to_string(),
                    turn_id: event_turn_id.clone(),
                }))
@@ -347,6 +351,7 @@ pub(crate) async fn apply_bespoke_event_handling(
            outgoing
                .send_server_notification(ServerNotification::Error(ErrorNotification {
                    error: turn_error,
+                    will_retry: true,
                    thread_id: conversation_id.to_string(),
                    turn_id: event_turn_id.clone(),
                }))
@@ -447,6 +452,16 @@ pub(crate) async fn apply_bespoke_event_handling(
                .send_server_notification(ServerNotification::ItemCompleted(completed))
                .await;
        }
+        EventMsg::RawResponseItem(raw_response_item_event) => {
+            maybe_emit_raw_response_item_completed(
+                api_version,
+                conversation_id,
+                &event_turn_id,
+                raw_response_item_event.item,
+                outgoing.as_ref(),
+            )
+            .await;
+        }
        EventMsg::PatchApplyBegin(patch_begin_event) => {
            // Until we migrate the core to be aware of a first class FileChangeItem
            // and emit the corresponding EventMsg, we repurpose the call_id as the item_id.
@@ -566,6 +581,20 @@ pub(crate) async fn apply_bespoke_event_handling(
                    .await;
            }
        }
+        EventMsg::TerminalInteraction(terminal_event) => {
+            let item_id = terminal_event.call_id.clone();
+
+            let notification = TerminalInteractionNotification {
+                thread_id: conversation_id.to_string(),
+                turn_id: event_turn_id.clone(),
+                item_id,
+                process_id: terminal_event.process_id,
+                stdin: terminal_event.stdin,
+            };
+            outgoing
+                .send_server_notification(ServerNotification::TerminalInteraction(notification))
+                .await;
+        }
        EventMsg::ExecCommandEnd(exec_command_end_event) => {
            let ExecCommandEndEvent {
                call_id,
@@ -802,6 +831,27 @@ async fn complete_command_execution_item(
        .await;
 }

+async fn maybe_emit_raw_response_item_completed(
+    api_version: ApiVersion,
+    conversation_id: ConversationId,
+    turn_id: &str,
+    item: codex_protocol::models::ResponseItem,
+    outgoing: &OutgoingMessageSender,
+) {
+    let ApiVersion::V2 = api_version else {
+        return;
+    };
+
+    let notification = RawResponseItemCompletedNotification {
+        thread_id: conversation_id.to_string(),
+        turn_id: turn_id.to_string(),
+        item,
+    };
+    outgoing
+        .send_server_notification(ServerNotification::RawResponseItemCompleted(notification))
+        .await;
+}
+
 async fn find_and_remove_turn_summary(
    conversation_id: ConversationId,
    turn_summary_store: &TurnSummaryStore,
@@ -1044,7 +1094,11 @@ async fn on_file_change_request_approval_response(
                });

            let (decision, completion_status) = match response.decision {
-                ApprovalDecision::Accept => (ReviewDecision::Approved, None),
+                ApprovalDecision::Accept
+                | ApprovalDecision::AcceptForSession
+                | ApprovalDecision::AcceptWithExecpolicyAmendment { .. } => {
+                    (ReviewDecision::Approved, None)
+                }
                ApprovalDecision::Decline => {
                    (ReviewDecision::Denied, Some(PatchApplyStatus::Declined))
                }
@@ -1106,25 +1160,27 @@ async fn on_command_execution_request_approval_response(
                    error!("failed to deserialize CommandExecutionRequestApprovalResponse: {err}");
                    CommandExecutionRequestApprovalResponse {
                        decision: ApprovalDecision::Decline,
-                        accept_settings: None,
                    }
                });

-            let CommandExecutionRequestApprovalResponse {
-                decision,
-                accept_settings,
-            } = response;
+            let decision = response.decision;

-            let (decision, completion_status) = match (decision, accept_settings) {
-                (ApprovalDecision::Accept, Some(settings)) if settings.for_session => {
-                    (ReviewDecision::ApprovedForSession, None)
-                }
-                (ApprovalDecision::Accept, _) => (ReviewDecision::Approved, None),
-                (ApprovalDecision::Decline, _) => (
+            let (decision, completion_status) = match decision {
+                ApprovalDecision::Accept => (ReviewDecision::Approved, None),
+                ApprovalDecision::AcceptForSession => (ReviewDecision::ApprovedForSession, None),
+                ApprovalDecision::AcceptWithExecpolicyAmendment {
+                    execpolicy_amendment,
+                } => (
+                    ReviewDecision::ApprovedExecpolicyAmendment {
+                        proposed_execpolicy_amendment: execpolicy_amendment.into_core(),
+                    },
+                    None,
+                ),
+                ApprovalDecision::Decline => (
                    ReviewDecision::Denied,
                    Some(CommandExecutionStatus::Declined),
                ),
-                (ApprovalDecision::Cancel, _) => (
+                ApprovalDecision::Cancel => (
                    ReviewDecision::Abort,
                    Some(CommandExecutionStatus::Declined),
                ),
@@ -1177,6 +1233,7 @@ async fn construct_mcp_tool_call_notification(
        arguments: begin_event.invocation.arguments.unwrap_or(JsonValue::Null),
        result: None,
        error: None,
+        duration_ms: None,
    };
    ItemStartedNotification {
        thread_id,
@@ -1185,7 +1242,7 @@ async fn construct_mcp_tool_call_notification(
    }
 }

-/// simiilar to handle_mcp_tool_call_end in exec
+/// similar to handle_mcp_tool_call_end in exec
 async fn construct_mcp_tool_call_end_notification(
    end_event: McpToolCallEndEvent,
    thread_id: String,
@@ -1196,6 +1253,7 @@ async fn construct_mcp_tool_call_end_notification(
    } else {
        McpToolCallStatus::Failed
    };
+    let duration_ms = i64::try_from(end_event.duration.as_millis()).ok();

    let (result, error) = match &end_event.result {
        Ok(value) => (
@@ -1221,6 +1279,7 @@ async fn construct_mcp_tool_call_end_notification(
        arguments: end_event.invocation.arguments.unwrap_or(JsonValue::Null),
        result,
        error,
+        duration_ms,
    };
    ItemCompletedNotification {
        thread_id,
@@ -1493,6 +1552,7 @@ mod tests {
                unlimited: false,
                balance: Some("5".to_string()),
            }),
+            plan_type: None,
        };

        handle_token_count_event(
@@ -1597,6 +1657,7 @@ mod tests {
                arguments: serde_json::json!({"server": ""}),
                result: None,
                error: None,
+                duration_ms: None,
            },
        };

@@ -1750,6 +1811,7 @@ mod tests {
                arguments: JsonValue::Null,
                result: None,
                error: None,
+                duration_ms: None,
            },
        };

@@ -1803,6 +1865,7 @@ mod tests {
                    structured_content: None,
                }),
                error: None,
+                duration_ms: Some(0),
            },
        };

@@ -1844,6 +1907,7 @@ mod tests {
                error: Some(McpToolCallError {
                    message: "boom".to_string(),
                }),
+                duration_ms: Some(1),
            },
        };

--- a/codex-rs/app-server/src/codex_message_processor.rs
+++ b/codex-rs/app-server/src/codex_message_processor.rs
@@ -19,6 +19,7 @@ use codex_app_server_protocol::AuthMode;
 use codex_app_server_protocol::AuthStatusChangeNotification;
 use codex_app_server_protocol::CancelLoginAccountParams;
 use codex_app_server_protocol::CancelLoginAccountResponse;
+use codex_app_server_protocol::CancelLoginAccountStatus;
 use codex_app_server_protocol::CancelLoginChatGptResponse;
 use codex_app_server_protocol::ClientRequest;
 use codex_app_server_protocol::CommandExecParams;
@@ -45,8 +46,8 @@ use codex_app_server_protocol::InterruptConversationParams;
 use codex_app_server_protocol::JSONRPCErrorError;
 use codex_app_server_protocol::ListConversationsParams;
 use codex_app_server_protocol::ListConversationsResponse;
-use codex_app_server_protocol::ListMcpServersParams;
-use codex_app_server_protocol::ListMcpServersResponse;
+use codex_app_server_protocol::ListMcpServerStatusParams;
+use codex_app_server_protocol::ListMcpServerStatusResponse;
 use codex_app_server_protocol::LoginAccountParams;
 use codex_app_server_protocol::LoginApiKeyParams;
 use codex_app_server_protocol::LoginApiKeyResponse;
@@ -54,7 +55,10 @@ use codex_app_server_protocol::LoginChatGptCompleteNotification;
 use codex_app_server_protocol::LoginChatGptResponse;
 use codex_app_server_protocol::LogoutAccountResponse;
 use codex_app_server_protocol::LogoutChatGptResponse;
-use codex_app_server_protocol::McpServer;
+use codex_app_server_protocol::McpServerOauthLoginCompletedNotification;
+use codex_app_server_protocol::McpServerOauthLoginParams;
+use codex_app_server_protocol::McpServerOauthLoginResponse;
+use codex_app_server_protocol::McpServerStatus;
 use codex_app_server_protocol::ModelListParams;
 use codex_app_server_protocol::ModelListResponse;
 use codex_app_server_protocol::NewConversationParams;
@@ -77,6 +81,8 @@ use codex_app_server_protocol::ServerNotification;
 use codex_app_server_protocol::SessionConfiguredNotification;
 use codex_app_server_protocol::SetDefaultModelParams;
 use codex_app_server_protocol::SetDefaultModelResponse;
+use codex_app_server_protocol::SkillsListParams;
+use codex_app_server_protocol::SkillsListResponse;
 use codex_app_server_protocol::Thread;
 use codex_app_server_protocol::ThreadArchiveParams;
 use codex_app_server_protocol::ThreadArchiveResponse;
@@ -113,9 +119,9 @@ use codex_core::auth::CLIENT_ID;
 use codex_core::auth::login_with_api_key;
 use codex_core::config::Config;
 use codex_core::config::ConfigOverrides;
-use codex_core::config::ConfigToml;
+use codex_core::config::ConfigService;
 use codex_core::config::edit::ConfigEditsBuilder;
-use codex_core::config_loader::load_config_as_toml;
+use codex_core::config::types::McpServerTransportConfig;
 use codex_core::default_client::get_codex_user_agent;
 use codex_core::exec::ExecParams;
 use codex_core::exec_env::create_env;
@@ -132,6 +138,7 @@ use codex_core::protocol::ReviewRequest;
 use codex_core::protocol::ReviewTarget as CoreReviewTarget;
 use codex_core::protocol::SessionConfiguredEvent;
 use codex_core::read_head_for_summary;
+use codex_core::sandboxing::SandboxPermissions;
 use codex_feedback::CodexFeedback;
 use codex_login::ServerOptions as LoginServerOptions;
 use codex_login::ShutdownHandle;
@@ -147,6 +154,7 @@ use codex_protocol::protocol::RolloutItem;
 use codex_protocol::protocol::SessionMetaLine;
 use codex_protocol::protocol::USER_MESSAGE_BEGIN;
 use codex_protocol::user_input::UserInput as CoreInputItem;
+use codex_rmcp_client::perform_oauth_login_return_url;
 use codex_utils_json_to_toml::json_to_toml;
 use std::collections::HashMap;
 use std::collections::HashSet;
@@ -161,6 +169,7 @@ use std::time::Duration;
 use tokio::select;
 use tokio::sync::Mutex;
 use tokio::sync::oneshot;
+use toml::Value as TomlValue;
 use tracing::error;
 use tracing::info;
 use tracing::warn;
@@ -178,6 +187,9 @@ pub(crate) struct TurnSummary {

 pub(crate) type TurnSummaryStore = Arc<Mutex<HashMap<ConversationId, TurnSummary>>>;

+const THREAD_LIST_DEFAULT_LIMIT: usize = 25;
+const THREAD_LIST_MAX_LIMIT: usize = 100;
+
 // Duration before a ChatGPT login attempt is abandoned.
 const LOGIN_CHATGPT_TIMEOUT: Duration = Duration::from_secs(10 * 60);
 struct ActiveLogin {
@@ -185,6 +197,11 @@ struct ActiveLogin {
    login_id: Uuid,
 }

+#[derive(Clone, Copy, Debug)]
+enum CancelLoginError {
+    NotFound(Uuid),
+}
+
 impl Drop for ActiveLogin {
    fn drop(&mut self) {
        self.shutdown_handle.shutdown();
@@ -198,6 +215,7 @@ pub(crate) struct CodexMessageProcessor {
    outgoing: Arc<OutgoingMessageSender>,
    codex_linux_sandbox_exe: Option<PathBuf>,
    config: Arc<Config>,
+    cli_overrides: Vec<(String, TomlValue)>,
    conversation_listeners: HashMap<Uuid, oneshot::Sender<()>>,
    active_login: Arc<Mutex<Option<ActiveLogin>>>,
    // Queue of pending interrupt requests per conversation. We reply when TurnAborted arrives.
@@ -244,6 +262,7 @@ impl CodexMessageProcessor {
        outgoing: Arc<OutgoingMessageSender>,
        codex_linux_sandbox_exe: Option<PathBuf>,
        config: Arc<Config>,
+        cli_overrides: Vec<(String, TomlValue)>,
        feedback: CodexFeedback,
    ) -> Self {
        Self {
@@ -252,6 +271,7 @@ impl CodexMessageProcessor {
            outgoing,
            codex_linux_sandbox_exe,
            config,
+            cli_overrides,
            conversation_listeners: HashMap::new(),
            active_login: Arc::new(Mutex::new(None)),
            pending_interrupts: Arc::new(Mutex::new(HashMap::new())),
@@ -261,6 +281,16 @@ impl CodexMessageProcessor {
        }
    }

+    async fn load_latest_config(&self) -> Result<Config, JSONRPCErrorError> {
+        Config::load_with_cli_overrides(self.cli_overrides.clone())
+            .await
+            .map_err(|err| JSONRPCErrorError {
+                code: INTERNAL_ERROR_CODE,
+                message: format!("failed to reload config: {err}"),
+                data: None,
+            })
+    }
+
    fn review_request_from_target(
        target: ApiReviewTarget,
    ) -> Result<(ReviewRequest, String), JSONRPCErrorError> {
@@ -338,12 +368,8 @@ impl CodexMessageProcessor {
            ClientRequest::ThreadList { request_id, params } => {
                self.thread_list(request_id, params).await;
            }
-            ClientRequest::ThreadCompact {
-                request_id,
-                params: _,
-            } => {
-                self.send_unimplemented_error(request_id, "thread/compact")
-                    .await;
+            ClientRequest::SkillsList { request_id, params } => {
+                self.skills_list(request_id, params).await;
            }
            ClientRequest::TurnStart { request_id, params } => {
                self.turn_start(request_id, params).await;
@@ -367,10 +393,20 @@ impl CodexMessageProcessor {
                self.handle_list_conversations(request_id, params).await;
            }
            ClientRequest::ModelList { request_id, params } => {
-                self.list_models(request_id, params).await;
+                let outgoing = self.outgoing.clone();
+                let conversation_manager = self.conversation_manager.clone();
+                let config = self.config.clone();
+
+                tokio::spawn(async move {
+                    Self::list_models(outgoing, conversation_manager, config, request_id, params)
+                        .await;
+                });
            }
-            ClientRequest::McpServersList { request_id, params } => {
-                self.list_mcp_servers(request_id, params).await;
+            ClientRequest::McpServerOauthLogin { request_id, params } => {
+                self.mcp_server_oauth_login(request_id, params).await;
+            }
+            ClientRequest::McpServerStatusList { request_id, params } => {
+                self.list_mcp_server_status(request_id, params).await;
            }
            ClientRequest::LoginAccount { request_id, params } => {
                self.login_v2(request_id, params).await;
@@ -479,15 +515,6 @@ impl CodexMessageProcessor {
        }
    }

-    async fn send_unimplemented_error(&self, request_id: RequestId, method: &str) {
-        let error = JSONRPCErrorError {
-            code: INTERNAL_ERROR_CODE,
-            message: format!("{method} is not implemented yet"),
-            data: None,
-        };
-        self.outgoing.send_error(request_id, error).await;
-    }
-
    async fn login_v2(&mut self, request_id: RequestId, params: LoginAccountParams) {
        match params {
            LoginAccountParams::ApiKey { api_key } => {
@@ -802,7 +829,7 @@ impl CodexMessageProcessor {
    async fn cancel_login_chatgpt_common(
        &mut self,
        login_id: Uuid,
-    ) -> std::result::Result<(), JSONRPCErrorError> {
+    ) -> std::result::Result<(), CancelLoginError> {
        let mut guard = self.active_login.lock().await;
        if guard.as_ref().map(|l| l.login_id) == Some(login_id) {
            if let Some(active) = guard.take() {
@@ -810,11 +837,7 @@ impl CodexMessageProcessor {
            }
            Ok(())
        } else {
-            Err(JSONRPCErrorError {
-                code: INVALID_REQUEST_ERROR_CODE,
-                message: format!("login id not found: {login_id}"),
-                data: None,
-            })
+            Err(CancelLoginError::NotFound(login_id))
        }
    }

@@ -825,7 +848,12 @@ impl CodexMessageProcessor {
                    .send_response(request_id, CancelLoginChatGptResponse {})
                    .await;
            }
-            Err(error) => {
+            Err(CancelLoginError::NotFound(missing_login_id)) => {
+                let error = JSONRPCErrorError {
+                    code: INVALID_REQUEST_ERROR_CODE,
+                    message: format!("login id not found: {missing_login_id}"),
+                    data: None,
+                };
                self.outgoing.send_error(request_id, error).await;
            }
        }
@@ -834,16 +862,14 @@ impl CodexMessageProcessor {
    async fn cancel_login_v2(&mut self, request_id: RequestId, params: CancelLoginAccountParams) {
        let login_id = params.login_id;
        match Uuid::parse_str(&login_id) {
-            Ok(uuid) => match self.cancel_login_chatgpt_common(uuid).await {
-                Ok(()) => {
-                    self.outgoing
-                        .send_response(request_id, CancelLoginAccountResponse {})
-                        .await;
-                }
-                Err(error) => {
-                    self.outgoing.send_error(request_id, error).await;
-                }
-            },
+            Ok(uuid) => {
+                let status = match self.cancel_login_chatgpt_common(uuid).await {
+                    Ok(()) => CancelLoginAccountStatus::Canceled,
+                    Err(CancelLoginError::NotFound(_)) => CancelLoginAccountStatus::NotFound,
+                };
+                let response = CancelLoginAccountResponse { status };
+                self.outgoing.send_response(request_id, response).await;
+            }
            Err(_) => {
                let error = JSONRPCErrorError {
                    code: INVALID_REQUEST_ERROR_CODE,
@@ -1077,12 +1103,13 @@ impl CodexMessageProcessor {
    }

    async fn get_user_saved_config(&self, request_id: RequestId) {
-        let toml_value = match load_config_as_toml(&self.config.codex_home).await {
-            Ok(val) => val,
+        let service = ConfigService::new(self.config.codex_home.clone(), Vec::new());
+        let user_saved_config: UserSavedConfig = match service.load_user_saved_config().await {
+            Ok(config) => config,
            Err(err) => {
                let error = JSONRPCErrorError {
                    code: INTERNAL_ERROR_CODE,
-                    message: format!("failed to load config.toml: {err}"),
+                    message: err.to_string(),
                    data: None,
                };
                self.outgoing.send_error(request_id, error).await;
@@ -1090,21 +1117,6 @@ impl CodexMessageProcessor {
            }
        };

-        let cfg: ConfigToml = match toml_value.try_into() {
-            Ok(cfg) => cfg,
-            Err(err) => {
-                let error = JSONRPCErrorError {
-                    code: INTERNAL_ERROR_CODE,
-                    message: format!("failed to parse config.toml: {err}"),
-                    data: None,
-                };
-                self.outgoing.send_error(request_id, error).await;
-                return;
-            }
-        };
-
-        let user_saved_config: UserSavedConfig = cfg.into();
-
        let response = GetUserSavedConfigResponse {
            config: user_saved_config,
        };
@@ -1169,7 +1181,7 @@ impl CodexMessageProcessor {
            cwd,
            expiration: timeout_ms.into(),
            env,
-            with_escalated_permissions: None,
+            sandbox_permissions: SandboxPermissions::UseDefault,
            justification: None,
            arg0: None,
        };
@@ -1249,7 +1261,7 @@ impl CodexMessageProcessor {
        let mut cli_overrides = cli_overrides.unwrap_or_default();
        if cfg!(windows) && self.config.features.enabled(Feature::WindowsSandbox) {
            cli_overrides.insert(
-                "features.enable_experimental_windows_sandbox".to_string(),
+                "features.experimental_windows_sandbox".to_string(),
                serde_json::json!(true),
            );
        }
@@ -1368,9 +1380,13 @@ impl CodexMessageProcessor {
                };

                // Auto-attach a conversation listener when starting a thread.
-                // Use the same behavior as the v1 API with experimental_raw_events=false.
+                // Use the same behavior as the v1 API, with opt-in support for raw item events.
                if let Err(err) = self
-                    .attach_conversation_listener(conversation_id, false, ApiVersion::V2)
+                    .attach_conversation_listener(
+                        conversation_id,
+                        params.experimental_raw_events,
+                        ApiVersion::V2,
+                    )
                    .await
                {
                    tracing::warn!(
@@ -1485,10 +1501,12 @@ impl CodexMessageProcessor {
            model_providers,
        } = params;

-        let page_size = limit.unwrap_or(25).max(1) as usize;
-
+        let requested_page_size = limit
+            .map(|value| value as usize)
+            .unwrap_or(THREAD_LIST_DEFAULT_LIMIT)
+            .clamp(1, THREAD_LIST_MAX_LIMIT);
        let (summaries, next_cursor) = match self
-            .list_conversations_common(page_size, cursor, model_providers)
+            .list_conversations_common(requested_page_size, cursor, model_providers)
            .await
        {
            Ok(r) => r,
@@ -1499,7 +1517,6 @@ impl CodexMessageProcessor {
        };

        let data = summaries.into_iter().map(summary_to_thread).collect();
-
        let response = ThreadListResponse { data, next_cursor };
        self.outgoing.send_response(request_id, response).await;
    }
@@ -1777,10 +1794,12 @@ impl CodexMessageProcessor {
            cursor,
            model_providers,
        } = params;
-        let page_size = page_size.unwrap_or(25).max(1);
+        let requested_page_size = page_size
+            .unwrap_or(THREAD_LIST_DEFAULT_LIMIT)
+            .clamp(1, THREAD_LIST_MAX_LIMIT);

        match self
-            .list_conversations_common(page_size, cursor, model_providers)
+            .list_conversations_common(requested_page_size, cursor, model_providers)
            .await
        {
            Ok((items, next_cursor)) => {
@@ -1795,12 +1814,15 @@ impl CodexMessageProcessor {

    async fn list_conversations_common(
        &self,
-        page_size: usize,
+        requested_page_size: usize,
        cursor: Option<String>,
        model_providers: Option<Vec<String>>,
    ) -> Result<(Vec<ConversationSummary>, Option<String>), JSONRPCErrorError> {
-        let cursor_obj: Option<RolloutCursor> = cursor.as_ref().and_then(|s| parse_cursor(s));
-        let cursor_ref = cursor_obj.as_ref();
+        let mut cursor_obj: Option<RolloutCursor> = cursor.as_ref().and_then(|s| parse_cursor(s));
+        let mut last_cursor = cursor_obj.clone();
+        let mut remaining = requested_page_size;
+        let mut items = Vec::with_capacity(requested_page_size);
+        let mut next_cursor: Option<String> = None;

        let model_provider_filter = match model_providers {
            Some(providers) => {
@@ -1814,55 +1836,84 @@ impl CodexMessageProcessor {
        };
        let fallback_provider = self.config.model_provider_id.clone();

-        let page = match RolloutRecorder::list_conversations(
-            &self.config.codex_home,
-            page_size,
-            cursor_ref,
-            INTERACTIVE_SESSION_SOURCES,
-            model_provider_filter.as_deref(),
-            fallback_provider.as_str(),
-        )
-        .await
-        {
-            Ok(p) => p,
-            Err(err) => {
-                return Err(JSONRPCErrorError {
-                    code: INTERNAL_ERROR_CODE,
-                    message: format!("failed to list conversations: {err}"),
-                    data: None,
-                });
+        while remaining > 0 {
+            let page_size = remaining.min(THREAD_LIST_MAX_LIMIT);
+            let page = RolloutRecorder::list_conversations(
+                &self.config.codex_home,
+                page_size,
+                cursor_obj.as_ref(),
+                INTERACTIVE_SESSION_SOURCES,
+                model_provider_filter.as_deref(),
+                fallback_provider.as_str(),
+            )
+            .await
+            .map_err(|err| JSONRPCErrorError {
+                code: INTERNAL_ERROR_CODE,
+                message: format!("failed to list conversations: {err}"),
+                data: None,
+            })?;
+
+            let mut filtered = page
+                .items
+                .into_iter()
+                .filter_map(|it| {
+                    let session_meta_line = it.head.first().and_then(|first| {
+                        serde_json::from_value::<SessionMetaLine>(first.clone()).ok()
+                    })?;
+                    extract_conversation_summary(
+                        it.path,
+                        &it.head,
+                        &session_meta_line.meta,
+                        session_meta_line.git.as_ref(),
+                        fallback_provider.as_str(),
+                    )
+                })
+                .collect::<Vec<_>>();
+            if filtered.len() > remaining {
+                filtered.truncate(remaining);
            }
-        };
+            items.extend(filtered);
+            remaining = requested_page_size.saturating_sub(items.len());

-        let items = page
-            .items
-            .into_iter()
-            .filter_map(|it| {
-                let session_meta_line = it.head.first().and_then(|first| {
-                    serde_json::from_value::<SessionMetaLine>(first.clone()).ok()
-                })?;
-                extract_conversation_summary(
-                    it.path,
-                    &it.head,
-                    &session_meta_line.meta,
-                    session_meta_line.git.as_ref(),
-                    fallback_provider.as_str(),
-                )
-            })
-            .collect::<Vec<_>>();
+            // Encode RolloutCursor into the JSON-RPC string form returned to clients.
+            let next_cursor_value = page.next_cursor.clone();
+            next_cursor = next_cursor_value
+                .as_ref()
+                .and_then(|cursor| serde_json::to_value(cursor).ok())
+                .and_then(|value| value.as_str().map(str::to_owned));
+            if remaining == 0 {
+                break;
+            }

-        // Encode next_cursor as a plain string
-        let next_cursor = page
-            .next_cursor
-            .and_then(|cursor| serde_json::to_value(&cursor).ok())
-            .and_then(|value| value.as_str().map(str::to_owned));
+            match next_cursor_value {
+                Some(cursor_val) if remaining > 0 => {
+                    // Break if our pagination would reuse the same cursor again; this avoids
+                    // an infinite loop when filtering drops everything on the page.
+                    if last_cursor.as_ref() == Some(&cursor_val) {
+                        next_cursor = None;
+                        break;
+                    }
+                    last_cursor = Some(cursor_val.clone());
+                    cursor_obj = Some(cursor_val);
+                }
+                _ => break,
+            }
+        }

        Ok((items, next_cursor))
    }

-    async fn list_models(&self, request_id: RequestId, params: ModelListParams) {
+    async fn list_models(
+        outgoing: Arc<OutgoingMessageSender>,
+        conversation_manager: Arc<ConversationManager>,
+        config: Arc<Config>,
+        request_id: RequestId,
+        params: ModelListParams,
+    ) {
        let ModelListParams { limit, cursor } = params;
-        let models = supported_models(self.conversation_manager.clone()).await;
+        let mut config = (*config).clone();
+        config.features.enable(Feature::RemoteModels);
+        let models = supported_models(conversation_manager, &config).await;
        let total = models.len();

        if total == 0 {
@@ -1870,7 +1921,7 @@ impl CodexMessageProcessor {
                data: Vec::new(),
                next_cursor: None,
            };
-            self.outgoing.send_response(request_id, response).await;
+            outgoing.send_response(request_id, response).await;
            return;
        }

@@ -1885,7 +1936,7 @@ impl CodexMessageProcessor {
                        message: format!("invalid cursor: {cursor}"),
                        data: None,
                    };
-                    self.outgoing.send_error(request_id, error).await;
+                    outgoing.send_error(request_id, error).await;
                    return;
                }
            },
@@ -1898,7 +1949,7 @@ impl CodexMessageProcessor {
                message: format!("cursor {start} exceeds total models {total}"),
                data: None,
            };
-            self.outgoing.send_error(request_id, error).await;
+            outgoing.send_error(request_id, error).await;
            return;
        }

@@ -1913,16 +1964,143 @@ impl CodexMessageProcessor {
            data: items,
            next_cursor,
        };
-        self.outgoing.send_response(request_id, response).await;
+        outgoing.send_response(request_id, response).await;
    }

-    async fn list_mcp_servers(&self, request_id: RequestId, params: ListMcpServersParams) {
-        let snapshot = collect_mcp_snapshot(self.config.as_ref()).await;
+    async fn mcp_server_oauth_login(
+        &self,
+        request_id: RequestId,
+        params: McpServerOauthLoginParams,
+    ) {
+        let config = match self.load_latest_config().await {
+            Ok(config) => config,
+            Err(error) => {
+                self.outgoing.send_error(request_id, error).await;
+                return;
+            }
+        };
+
+        if !config.features.enabled(Feature::RmcpClient) {
+            let error = JSONRPCErrorError {
+                code: INVALID_REQUEST_ERROR_CODE,
+                message: "OAuth login is only supported when [features].rmcp_client is true in config.toml".to_string(),
+                data: None,
+            };
+            self.outgoing.send_error(request_id, error).await;
+            return;
+        }
+
+        let McpServerOauthLoginParams {
+            name,
+            scopes,
+            timeout_secs,
+        } = params;
+
+        let Some(server) = config.mcp_servers.get(&name) else {
+            let error = JSONRPCErrorError {
+                code: INVALID_REQUEST_ERROR_CODE,
+                message: format!("No MCP server named '{name}' found."),
+                data: None,
+            };
+            self.outgoing.send_error(request_id, error).await;
+            return;
+        };
+
+        let (url, http_headers, env_http_headers) = match &server.transport {
+            McpServerTransportConfig::StreamableHttp {
+                url,
+                http_headers,
+                env_http_headers,
+                ..
+            } => (url.clone(), http_headers.clone(), env_http_headers.clone()),
+            _ => {
+                let error = JSONRPCErrorError {
+                    code: INVALID_REQUEST_ERROR_CODE,
+                    message: "OAuth login is only supported for streamable HTTP servers."
+                        .to_string(),
+                    data: None,
+                };
+                self.outgoing.send_error(request_id, error).await;
+                return;
+            }
+        };
+
+        match perform_oauth_login_return_url(
+            &name,
+            &url,
+            config.mcp_oauth_credentials_store_mode,
+            http_headers,
+            env_http_headers,
+            scopes.as_deref().unwrap_or_default(),
+            timeout_secs,
+        )
+        .await
+        {
+            Ok(handle) => {
+                let authorization_url = handle.authorization_url().to_string();
+                let notification_name = name.clone();
+                let outgoing = Arc::clone(&self.outgoing);
+
+                tokio::spawn(async move {
+                    let (success, error) = match handle.wait().await {
+                        Ok(()) => (true, None),
+                        Err(err) => (false, Some(err.to_string())),
+                    };
+
+                    let notification = ServerNotification::McpServerOauthLoginCompleted(
+                        McpServerOauthLoginCompletedNotification {
+                            name: notification_name,
+                            success,
+                            error,
+                        },
+                    );
+                    outgoing.send_server_notification(notification).await;
+                });
+
+                let response = McpServerOauthLoginResponse { authorization_url };
+                self.outgoing.send_response(request_id, response).await;
+            }
+            Err(err) => {
+                let error = JSONRPCErrorError {
+                    code: INTERNAL_ERROR_CODE,
+                    message: format!("failed to login to MCP server '{name}': {err}"),
+                    data: None,
+                };
+                self.outgoing.send_error(request_id, error).await;
+            }
+        }
+    }
+
+    async fn list_mcp_server_status(
+        &self,
+        request_id: RequestId,
+        params: ListMcpServerStatusParams,
+    ) {
+        let outgoing = Arc::clone(&self.outgoing);
+        let config = match self.load_latest_config().await {
+            Ok(config) => config,
+            Err(error) => {
+                self.outgoing.send_error(request_id, error).await;
+                return;
+            }
+        };
+
+        tokio::spawn(async move {
+            Self::list_mcp_server_status_task(outgoing, request_id, params, config).await;
+        });
+    }
+
+    async fn list_mcp_server_status_task(
+        outgoing: Arc<OutgoingMessageSender>,
+        request_id: RequestId,
+        params: ListMcpServerStatusParams,
+        config: Config,
+    ) {
+        let snapshot = collect_mcp_snapshot(&config).await;

        let tools_by_server = group_tools_by_server(&snapshot.tools);

-        let mut server_names: Vec<String> = self
-            .config
+        let mut server_names: Vec<String> = config
            .mcp_servers
            .keys()
            .cloned()
@@ -1945,7 +2123,7 @@ impl CodexMessageProcessor {
                        message: format!("invalid cursor: {cursor}"),
                        data: None,
                    };
-                    self.outgoing.send_error(request_id, error).await;
+                    outgoing.send_error(request_id, error).await;
                    return;
                }
            },
@@ -1958,15 +2136,15 @@ impl CodexMessageProcessor {
                message: format!("cursor {start} exceeds total MCP servers {total}"),
                data: None,
            };
-            self.outgoing.send_error(request_id, error).await;
+            outgoing.send_error(request_id, error).await;
            return;
        }

        let end = start.saturating_add(effective_limit).min(total);

-        let data: Vec<McpServer> = server_names[start..end]
+        let data: Vec<McpServerStatus> = server_names[start..end]
            .iter()
-            .map(|name| McpServer {
+            .map(|name| McpServerStatus {
                name: name.clone(),
                tools: tools_by_server.get(name).cloned().unwrap_or_default(),
                resources: snapshot.resources.get(name).cloned().unwrap_or_default(),
@@ -1990,9 +2168,9 @@ impl CodexMessageProcessor {
            None
        };

-        let response = ListMcpServersResponse { data, next_cursor };
+        let response = ListMcpServerStatusResponse { data, next_cursor };

-        self.outgoing.send_response(request_id, response).await;
+        outgoing.send_response(request_id, response).await;
    }

    async fn handle_resume_conversation(
@@ -2028,7 +2206,7 @@ impl CodexMessageProcessor {
                let mut cli_overrides = cli_overrides.unwrap_or_default();
                if cfg!(windows) && self.config.features.enabled(Feature::WindowsSandbox) {
                    cli_overrides.insert(
-                        "features.enable_experimental_windows_sandbox".to_string(),
+                        "features.experimental_windows_sandbox".to_string(),
                        serde_json::json!(true),
                    );
                }
@@ -2461,6 +2639,33 @@ impl CodexMessageProcessor {
            .await;
    }

+    async fn skills_list(&self, request_id: RequestId, params: SkillsListParams) {
+        let SkillsListParams { cwds, force_reload } = params;
+        let cwds = if cwds.is_empty() {
+            vec![self.config.cwd.clone()]
+        } else {
+            cwds
+        };
+
+        let skills_manager = self.conversation_manager.skills_manager();
+        let data = cwds
+            .into_iter()
+            .map(|cwd| {
+                let outcome = skills_manager.skills_for_cwd_with_options(&cwd, force_reload);
+                let errors = errors_to_info(&outcome.errors);
+                let skills = skills_to_info(&outcome.skills);
+                codex_app_server_protocol::SkillsListEntry {
+                    cwd,
+                    skills,
+                    errors,
+                }
+            })
+            .collect();
+        self.outgoing
+            .send_response(request_id, SkillsListResponse { data })
+            .await;
+    }
+
    async fn interrupt_conversation(
        &mut self,
        request_id: RequestId,
@@ -2669,7 +2874,7 @@ impl CodexMessageProcessor {
        })?;

        let mut config = self.config.as_ref().clone();
-        config.model = self.config.review_model.clone();
+        config.model = Some(self.config.review_model.clone());

        let NewConversation {
            conversation_id,
@@ -3106,9 +3311,35 @@ impl CodexMessageProcessor {
    }
 }

+fn skills_to_info(
+    skills: &[codex_core::skills::SkillMetadata],
+) -> Vec<codex_app_server_protocol::SkillMetadata> {
+    skills
+        .iter()
+        .map(|skill| codex_app_server_protocol::SkillMetadata {
+            name: skill.name.clone(),
+            description: skill.description.clone(),
+            path: skill.path.clone(),
+            scope: skill.scope.into(),
+        })
+        .collect()
+}
+
+fn errors_to_info(
+    errors: &[codex_core::skills::SkillError],
+) -> Vec<codex_app_server_protocol::SkillErrorInfo> {
+    errors
+        .iter()
+        .map(|err| codex_app_server_protocol::SkillErrorInfo {
+            path: err.path.clone(),
+            message: err.message.clone(),
+        })
+        .collect()
+}
+
 async fn derive_config_from_params(
    overrides: ConfigOverrides,
-    cli_overrides: Option<std::collections::HashMap<String, serde_json::Value>>,
+    cli_overrides: Option<HashMap<String, serde_json::Value>>,
 ) -> std::io::Result<Config> {
    let cli_overrides = cli_overrides
        .unwrap_or_default()
@@ -3116,7 +3347,7 @@ async fn derive_config_from_params(
        .map(|(k, v)| (k, json_to_toml(v)))
        .collect();

-    Config::load_with_cli_overrides(cli_overrides, overrides).await
+    Config::load_with_cli_overrides_and_harness_overrides(cli_overrides, overrides).await
 }

 async fn read_summary_from_rollout(
--- a/codex-rs/app-server/src/config_api.rs
+++ b/codex-rs/app-server/src/config_api.rs
--- a/codex-rs/app-server/src/lib.rs
+++ b/codex-rs/app-server/src/lib.rs
@@ -2,8 +2,6 @@

 use codex_common::CliConfigOverrides;
 use codex_core::config::Config;
-use codex_core::config::ConfigOverrides;
-use opentelemetry_appender_tracing::layer::OpenTelemetryTracingBridge;
 use std::io::ErrorKind;
 use std::io::Result as IoResult;
 use std::path::PathBuf;
@@ -82,12 +80,11 @@ pub async fn run_main(
            format!("error parsing -c overrides: {e}"),
        )
    })?;
-    let config =
-        Config::load_with_cli_overrides(cli_kv_overrides.clone(), ConfigOverrides::default())
-            .await
-            .map_err(|e| {
-                std::io::Error::new(ErrorKind::InvalidData, format!("error loading config: {e}"))
-            })?;
+    let config = Config::load_with_cli_overrides(cli_kv_overrides.clone())
+        .await
+        .map_err(|e| {
+            std::io::Error::new(ErrorKind::InvalidData, format!("error loading config: {e}"))
+        })?;

    let feedback = CodexFeedback::new();

@@ -103,6 +100,7 @@ pub async fn run_main(
    // control the log level with `RUST_LOG`.
    let stderr_fmt = tracing_subscriber::fmt::layer()
        .with_writer(std::io::stderr)
+        .with_span_events(tracing_subscriber::fmt::format::FmtSpan::FULL)
        .with_filter(EnvFilter::from_default_env());

    let feedback_layer = tracing_subscriber::fmt::layer()
@@ -111,14 +109,15 @@ pub async fn run_main(
        .with_target(false)
        .with_filter(Targets::new().with_default(Level::TRACE));

+    let otel_logger_layer = otel.as_ref().and_then(|o| o.logger_layer());
+
+    let otel_tracing_layer = otel.as_ref().and_then(|o| o.tracing_layer());
+
    let _ = tracing_subscriber::registry()
        .with(stderr_fmt)
        .with(feedback_layer)
-        .with(otel.as_ref().map(|provider| {
-            OpenTelemetryTracingBridge::new(&provider.logger).with_filter(
-                tracing_subscriber::filter::filter_fn(codex_core::otel_init::codex_export_filter),
-            )
-        }))
+        .with(otel_logger_layer)
+        .with(otel_tracing_layer)
        .try_init();

    // Task: process incoming messages.
--- a/codex-rs/app-server/src/message_processor.rs
+++ b/codex-rs/app-server/src/message_processor.rs
@@ -59,6 +59,7 @@ impl MessageProcessor {
            outgoing.clone(),
            codex_linux_sandbox_exe,
            Arc::clone(&config),
+            cli_overrides.clone(),
            feedback,
        );
        let config_api = ConfigApi::new(config.codex_home.clone(), cli_overrides);
--- a/codex-rs/app-server/src/models.rs
+++ b/codex-rs/app-server/src/models.rs
@@ -3,12 +3,16 @@ use std::sync::Arc;
 use codex_app_server_protocol::Model;
 use codex_app_server_protocol::ReasoningEffortOption;
 use codex_core::ConversationManager;
+use codex_core::config::Config;
 use codex_protocol::openai_models::ModelPreset;
 use codex_protocol::openai_models::ReasoningEffortPreset;

-pub async fn supported_models(conversation_manager: Arc<ConversationManager>) -> Vec<Model> {
+pub async fn supported_models(
+    conversation_manager: Arc<ConversationManager>,
+    config: &Config,
+) -> Vec<Model> {
    conversation_manager
-        .list_models()
+        .list_models(config)
        .await
        .into_iter()
        .map(model_from_preset)
--- a/codex-rs/app-server/src/outgoing_message.rs
+++ b/codex-rs/app-server/src/outgoing_message.rs
@@ -16,6 +16,9 @@ use tracing::warn;

 use crate::error_code::INTERNAL_ERROR_CODE;

+#[cfg(test)]
+use codex_protocol::account::PlanType;
+
 /// Sends messages to the client and manages request callbacks.
 pub(crate) struct OutgoingMessageSender {
    next_request_id: AtomicI64,
@@ -230,6 +233,7 @@ mod tests {
                    }),
                    secondary: None,
                    credits: None,
+                    plan_type: Some(PlanType::Plus),
                },
            });

@@ -245,7 +249,8 @@ mod tests {
                            "resetsAt": 123
                        },
                        "secondary": null,
-                        "credits": null
+                        "credits": null,
+                        "planType": "plus"
                    }
                },
            }),
--- a/codex-rs/app-server/tests/common/Cargo.toml
+++ b/codex-rs/app-server/tests/common/Cargo.toml
@@ -13,7 +13,7 @@ assert_cmd = { workspace = true }
 base64 = { workspace = true }
 chrono = { workspace = true }
 codex-app-server-protocol = { workspace = true }
-codex-core = { workspace = true }
+codex-core = { workspace = true, features = ["test-support"] }
 codex-protocol = { workspace = true }
 serde = { workspace = true }
 serde_json = { workspace = true }
--- a/codex-rs/app-server/tests/common/lib.rs
+++ b/codex-rs/app-server/tests/common/lib.rs
@@ -1,6 +1,7 @@
 mod auth_fixtures;
 mod mcp_process;
 mod mock_model_server;
+mod models_cache;
 mod responses;
 mod rollout;

@@ -11,9 +12,16 @@ pub use auth_fixtures::write_chatgpt_auth;
 use codex_app_server_protocol::JSONRPCResponse;
 pub use core_test_support::format_with_current_shell;
 pub use core_test_support::format_with_current_shell_display;
+pub use core_test_support::format_with_current_shell_display_non_login;
+pub use core_test_support::format_with_current_shell_non_login;
+pub use core_test_support::test_path_buf_with_windows;
+pub use core_test_support::test_tmp_path;
+pub use core_test_support::test_tmp_path_buf;
 pub use mcp_process::McpProcess;
 pub use mock_model_server::create_mock_chat_completions_server;
 pub use mock_model_server::create_mock_chat_completions_server_unchecked;
+pub use models_cache::write_models_cache;
+pub use models_cache::write_models_cache_with_models;
 pub use responses::create_apply_patch_sse_response;
 pub use responses::create_exec_command_sse_response;
 pub use responses::create_final_assistant_message_sse_response;
--- a/codex-rs/app-server/tests/common/models_cache.rs
+++ b/codex-rs/app-server/tests/common/models_cache.rs
@@ -0,0 +1,86 @@
+use chrono::DateTime;
+use chrono::Utc;
+use codex_core::openai_models::model_presets::all_model_presets;
+use codex_protocol::openai_models::ClientVersion;
+use codex_protocol::openai_models::ConfigShellToolType;
+use codex_protocol::openai_models::ModelInfo;
+use codex_protocol::openai_models::ModelPreset;
+use codex_protocol::openai_models::ModelVisibility;
+use codex_protocol::openai_models::ReasoningSummaryFormat;
+use codex_protocol::openai_models::TruncationPolicyConfig;
+use serde_json::json;
+use std::path::Path;
+
+/// Convert a ModelPreset to ModelInfo for cache storage.
+fn preset_to_info(preset: &ModelPreset, priority: i32) -> ModelInfo {
+    ModelInfo {
+        slug: preset.id.clone(),
+        display_name: preset.display_name.clone(),
+        description: Some(preset.description.clone()),
+        default_reasoning_level: preset.default_reasoning_effort,
+        supported_reasoning_levels: preset.supported_reasoning_efforts.clone(),
+        shell_type: ConfigShellToolType::ShellCommand,
+        visibility: if preset.show_in_picker {
+            ModelVisibility::List
+        } else {
+            ModelVisibility::Hide
+        },
+        minimal_client_version: ClientVersion(0, 1, 0),
+        supported_in_api: true,
+        priority,
+        upgrade: preset.upgrade.as_ref().map(|u| u.id.clone()),
+        base_instructions: None,
+        supports_reasoning_summaries: false,
+        support_verbosity: false,
+        default_verbosity: None,
+        apply_patch_tool_type: None,
+        truncation_policy: TruncationPolicyConfig::bytes(10_000),
+        supports_parallel_tool_calls: false,
+        context_window: None,
+        reasoning_summary_format: ReasoningSummaryFormat::None,
+        experimental_supported_tools: Vec::new(),
+    }
+}
+
+// todo(aibrahim): fix the priorities to be the opposite here.
+/// Write a models_cache.json file to the codex home directory.
+/// This prevents ModelsManager from making network requests to refresh models.
+/// The cache will be treated as fresh (within TTL) and used instead of fetching from the network.
+/// Uses the built-in model presets from ModelsManager, converted to ModelInfo format.
+pub fn write_models_cache(codex_home: &Path) -> std::io::Result<()> {
+    // Get all presets and filter for show_in_picker (same as builtin_model_presets does)
+    let presets: Vec<&ModelPreset> = all_model_presets()
+        .iter()
+        .filter(|preset| preset.show_in_picker)
+        .collect();
+    // Convert presets to ModelInfo, assigning priorities (higher = earlier in list)
+    // Priority is used for sorting, so first model gets highest priority
+    let models: Vec<ModelInfo> = presets
+        .iter()
+        .enumerate()
+        .map(|(idx, preset)| {
+            // Higher priority = earlier in list, so reverse the index
+            let priority = (presets.len() - idx) as i32;
+            preset_to_info(preset, priority)
+        })
+        .collect();
+
+    write_models_cache_with_models(codex_home, models)
+}
+
+/// Write a models_cache.json file with specific models.
+/// Useful when tests need specific models to be available.
+pub fn write_models_cache_with_models(
+    codex_home: &Path,
+    models: Vec<ModelInfo>,
+) -> std::io::Result<()> {
+    let cache_path = codex_home.join("models_cache.json");
+    // DateTime<Utc> serializes to RFC3339 format by default with serde
+    let fetched_at: DateTime<Utc> = Utc::now();
+    let cache = json!({
+        "fetched_at": fetched_at,
+        "etag": null,
+        "models": models
+    });
+    std::fs::write(cache_path, serde_json::to_string_pretty(&cache)?)
+}
--- a/codex-rs/app-server/tests/suite/codex_message_processor_flow.rs
+++ b/codex-rs/app-server/tests/suite/codex_message_processor_flow.rs
@@ -271,7 +271,6 @@ async fn test_send_user_turn_changes_approval_policy_behavior() -> Result<()> {
            command: format_with_current_shell("python3 -c 'print(42)'"),
            cwd: working_directory.clone(),
            reason: None,
-            risk: None,
            parsed_cmd: vec![ParsedCommand::Unknown {
                cmd: "python3 -c 'print(42)'".to_string()
            }],
@@ -411,7 +410,7 @@ async fn test_send_user_turn_updates_sandbox_and_cwd_between_turns() -> Result<(
            cwd: first_cwd.clone(),
            approval_policy: AskForApproval::Never,
            sandbox_policy: SandboxPolicy::WorkspaceWrite {
-                writable_roots: vec![first_cwd.clone()],
+                writable_roots: vec![first_cwd.try_into()?],
                network_access: false,
                exclude_tmpdir_env_var: false,
                exclude_slash_tmp: false,
--- a/codex-rs/app-server/tests/suite/config.rs
+++ b/codex-rs/app-server/tests/suite/config.rs
@@ -1,5 +1,6 @@
 use anyhow::Result;
 use app_test_support::McpProcess;
+use app_test_support::test_tmp_path;
 use app_test_support::to_response;
 use codex_app_server_protocol::GetUserSavedConfigResponse;
 use codex_app_server_protocol::JSONRPCResponse;
@@ -23,10 +24,12 @@ use tokio::time::timeout;
 const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(10);

 fn create_config_toml(codex_home: &Path) -> std::io::Result<()> {
+    let writable_root = test_tmp_path();
    let config_toml = codex_home.join("config.toml");
    std::fs::write(
        config_toml,
-        r#"
+        format!(
+            r#"
 model = "gpt-5.1-codex-max"
 approval_policy = "on-request"
 sandbox_mode = "workspace-write"
@@ -38,7 +41,7 @@ forced_chatgpt_workspace_id = "12345678-0000-0000-0000-000000000000"
 forced_login_method = "chatgpt"

 [sandbox_workspace_write]
-writable_roots = ["/tmp"]
+writable_roots = [{}]
 network_access = true
 exclude_tmpdir_env_var = true
 exclude_slash_tmp = true
@@ -56,6 +59,8 @@ model_verbosity = "medium"
 model_provider = "openai"
 chatgpt_base_url = "https://api.chatgpt.com"
 "#,
+            serde_json::json!(writable_root)
+        ),
    )
 }

@@ -75,12 +80,13 @@ async fn get_config_toml_parses_all_fields() -> Result<()> {
    .await??;

    let config: GetUserSavedConfigResponse = to_response(resp)?;
+    let writable_root = test_tmp_path();
    let expected = GetUserSavedConfigResponse {
        config: UserSavedConfig {
            approval_policy: Some(AskForApproval::OnRequest),
            sandbox_mode: Some(SandboxMode::WorkspaceWrite),
            sandbox_settings: Some(SandboxSettings {
-                writable_roots: vec!["/tmp".into()],
+                writable_roots: vec![writable_root],
                network_access: Some(true),
                exclude_tmpdir_env_var: Some(true),
                exclude_slash_tmp: Some(true),
--- a/codex-rs/app-server/tests/suite/list_resume.rs
+++ b/codex-rs/app-server/tests/suite/list_resume.rs
@@ -358,3 +358,81 @@ async fn test_list_and_resume_conversations() -> Result<()> {

    Ok(())
 }
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn list_conversations_fetches_through_filtered_pages() -> Result<()> {
+    let codex_home = TempDir::new()?;
+
+    // Only the last 3 conversations match the provider filter; request 3 and
+    // ensure pagination keeps fetching past non-matching pages.
+    let cases = [
+        (
+            "2025-03-04T12-00-00",
+            "2025-03-04T12:00:00Z",
+            "skip_provider",
+        ),
+        (
+            "2025-03-03T12-00-00",
+            "2025-03-03T12:00:00Z",
+            "skip_provider",
+        ),
+        (
+            "2025-03-02T12-00-00",
+            "2025-03-02T12:00:00Z",
+            "target_provider",
+        ),
+        (
+            "2025-03-01T12-00-00",
+            "2025-03-01T12:00:00Z",
+            "target_provider",
+        ),
+        (
+            "2025-02-28T12-00-00",
+            "2025-02-28T12:00:00Z",
+            "target_provider",
+        ),
+    ];
+
+    for (ts_file, ts_rfc, provider) in cases {
+        create_fake_rollout(
+            codex_home.path(),
+            ts_file,
+            ts_rfc,
+            "Hello",
+            Some(provider),
+            None,
+        )?;
+    }
+
+    let mut mcp = McpProcess::new(codex_home.path()).await?;
+    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;
+
+    let req_id = mcp
+        .send_list_conversations_request(ListConversationsParams {
+            page_size: Some(3),
+            cursor: None,
+            model_providers: Some(vec!["target_provider".to_string()]),
+        })
+        .await?;
+    let resp: JSONRPCResponse = timeout(
+        DEFAULT_READ_TIMEOUT,
+        mcp.read_stream_until_response_message(RequestId::Integer(req_id)),
+    )
+    .await??;
+    let ListConversationsResponse { items, next_cursor } =
+        to_response::<ListConversationsResponse>(resp)?;
+
+    assert_eq!(
+        items.len(),
+        3,
+        "should fetch across pages to satisfy the limit"
+    );
+    assert!(
+        items
+            .iter()
+            .all(|item| item.model_provider == "target_provider")
+    );
+    assert_eq!(next_cursor, None);
+
+    Ok(())
+}
--- a/codex-rs/app-server/tests/suite/login.rs
+++ b/codex-rs/app-server/tests/suite/login.rs
@@ -1,8 +1,6 @@
 use anyhow::Result;
 use app_test_support::McpProcess;
 use app_test_support::to_response;
-use codex_app_server_protocol::CancelLoginChatGptParams;
-use codex_app_server_protocol::CancelLoginChatGptResponse;
 use codex_app_server_protocol::GetAuthStatusParams;
 use codex_app_server_protocol::GetAuthStatusResponse;
 use codex_app_server_protocol::JSONRPCError;
@@ -14,7 +12,6 @@ use codex_core::auth::AuthCredentialsStoreMode;
 use codex_login::login_with_api_key;
 use serial_test::serial;
 use std::path::Path;
-use std::time::Duration;
 use tempfile::TempDir;
 use tokio::time::timeout;

@@ -87,48 +84,6 @@ async fn logout_chatgpt_removes_auth() -> Result<()> {
    Ok(())
 }

-#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-// Serialize tests that launch the login server since it binds to a fixed port.
-#[serial(login_port)]
-async fn login_and_cancel_chatgpt() -> Result<()> {
-    let codex_home = TempDir::new()?;
-    create_config_toml(codex_home.path())?;
-
-    let mut mcp = McpProcess::new(codex_home.path()).await?;
-    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;
-
-    let login_id = mcp.send_login_chat_gpt_request().await?;
-    let login_resp: JSONRPCResponse = timeout(
-        DEFAULT_READ_TIMEOUT,
-        mcp.read_stream_until_response_message(RequestId::Integer(login_id)),
-    )
-    .await??;
-    let login: LoginChatGptResponse = to_response(login_resp)?;
-
-    let cancel_id = mcp
-        .send_cancel_login_chat_gpt_request(CancelLoginChatGptParams {
-            login_id: login.login_id,
-        })
-        .await?;
-    let cancel_resp: JSONRPCResponse = timeout(
-        DEFAULT_READ_TIMEOUT,
-        mcp.read_stream_until_response_message(RequestId::Integer(cancel_id)),
-    )
-    .await??;
-    let _ok: CancelLoginChatGptResponse = to_response(cancel_resp)?;
-
-    // Optionally observe the completion notification; do not fail if it races.
-    let maybe_note = timeout(
-        Duration::from_secs(2),
-        mcp.read_stream_until_notification_message("codex/event/login_chat_gpt_complete"),
-    )
-    .await;
-    if maybe_note.is_err() {
-        eprintln!("warning: did not observe login_chat_gpt_complete notification after cancel");
-    }
-    Ok(())
-}
-
 fn create_config_toml_forced_login(codex_home: &Path, forced_method: &str) -> std::io::Result<()> {
    let config_toml = codex_home.join("config.toml");
    let contents = format!(
--- a/codex-rs/app-server/tests/suite/user_agent.rs
+++ b/codex-rs/app-server/tests/suite/user_agent.rs
@@ -25,12 +25,13 @@ async fn get_user_agent_returns_current_codex_user_agent() -> Result<()> {
    .await??;

    let os_info = os_info::get();
+    let originator = codex_core::default_client::originator().value.as_str();
+    let os_type = os_info.os_type();
+    let os_version = os_info.version();
+    let architecture = os_info.architecture().unwrap_or("unknown");
+    let terminal_ua = codex_core::terminal::user_agent();
    let user_agent = format!(
-        "codex_cli_rs/0.0.0 ({} {}; {}) {} (codex-app-server-tests; 0.1.0)",
-        os_info.os_type(),
-        os_info.version(),
-        os_info.architecture().unwrap_or("unknown"),
-        codex_core::terminal::user_agent()
+        "{originator}/0.0.0 ({os_type} {os_version}; {architecture}) {terminal_ua} (codex-app-server-tests; 0.1.0)"
    );

    let received: GetUserAgentResponse = to_response(response)?;
--- a/codex-rs/app-server/tests/suite/v2/account.rs
+++ b/codex-rs/app-server/tests/suite/v2/account.rs
@@ -241,7 +241,7 @@ async fn login_account_chatgpt_rejected_when_forced_api() -> Result<()> {
 #[tokio::test]
 // Serialize tests that launch the login server since it binds to a fixed port.
 #[serial(login_port)]
-async fn login_account_chatgpt_start() -> Result<()> {
+async fn login_account_chatgpt_start_can_be_cancelled() -> Result<()> {
    let codex_home = TempDir::new()?;
    create_config_toml(codex_home.path(), CreateConfigTomlParams::default())?;

--- a/codex-rs/app-server/tests/suite/v2/config_rpc.rs
+++ b/codex-rs/app-server/tests/suite/v2/config_rpc.rs
@@ -1,9 +1,12 @@
 use anyhow::Result;
 use app_test_support::McpProcess;
+use app_test_support::test_path_buf_with_windows;
+use app_test_support::test_tmp_path_buf;
 use app_test_support::to_response;
+use codex_app_server_protocol::AskForApproval;
 use codex_app_server_protocol::ConfigBatchWriteParams;
 use codex_app_server_protocol::ConfigEdit;
-use codex_app_server_protocol::ConfigLayerName;
+use codex_app_server_protocol::ConfigLayerSource;
 use codex_app_server_protocol::ConfigReadParams;
 use codex_app_server_protocol::ConfigReadResponse;
 use codex_app_server_protocol::ConfigValueWriteParams;
@@ -12,7 +15,10 @@ use codex_app_server_protocol::JSONRPCError;
 use codex_app_server_protocol::JSONRPCResponse;
 use codex_app_server_protocol::MergeStrategy;
 use codex_app_server_protocol::RequestId;
+use codex_app_server_protocol::SandboxMode;
+use codex_app_server_protocol::ToolsV2;
 use codex_app_server_protocol::WriteStatus;
+use codex_utils_absolute_path::AbsolutePathBuf;
 use pretty_assertions::assert_eq;
 use serde_json::json;
 use tempfile::TempDir;
@@ -37,6 +43,8 @@ model = "gpt-user"
 sandbox_mode = "workspace-write"
 "#,
    )?;
+    let codex_home_path = codex_home.path().canonicalize()?;
+    let user_file = AbsolutePathBuf::try_from(codex_home_path.join("config.toml"))?;

    let mut mcp = McpProcess::new(codex_home.path()).await?;
    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;
@@ -57,15 +65,81 @@ sandbox_mode = "workspace-write"
        layers,
    } = to_response(resp)?;

-    assert_eq!(config.get("model"), Some(&json!("gpt-user")));
+    assert_eq!(config.model.as_deref(), Some("gpt-user"));
    assert_eq!(
        origins.get("model").expect("origin").name,
-        ConfigLayerName::User
+        ConfigLayerSource::User {
+            file: user_file.clone(),
+        }
    );
    let layers = layers.expect("layers present");
    assert_eq!(layers.len(), 2);
-    assert_eq!(layers[0].name, ConfigLayerName::SessionFlags);
-    assert_eq!(layers[1].name, ConfigLayerName::User);
+    assert_eq!(layers[0].name, ConfigLayerSource::SessionFlags);
+    assert_eq!(layers[1].name, ConfigLayerSource::User { file: user_file });
+
+    Ok(())
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn config_read_includes_tools() -> Result<()> {
+    let codex_home = TempDir::new()?;
+    write_config(
+        &codex_home,
+        r#"
+model = "gpt-user"
+
+[tools]
+web_search = true
+view_image = false
+"#,
+    )?;
+    let codex_home_path = codex_home.path().canonicalize()?;
+    let user_file = AbsolutePathBuf::try_from(codex_home_path.join("config.toml"))?;
+
+    let mut mcp = McpProcess::new(codex_home.path()).await?;
+    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;
+
+    let request_id = mcp
+        .send_config_read_request(ConfigReadParams {
+            include_layers: true,
+        })
+        .await?;
+    let resp: JSONRPCResponse = timeout(
+        DEFAULT_READ_TIMEOUT,
+        mcp.read_stream_until_response_message(RequestId::Integer(request_id)),
+    )
+    .await??;
+    let ConfigReadResponse {
+        config,
+        origins,
+        layers,
+    } = to_response(resp)?;
+
+    let tools = config.tools.expect("tools present");
+    assert_eq!(
+        tools,
+        ToolsV2 {
+            web_search: Some(true),
+            view_image: Some(false),
+        }
+    );
+    assert_eq!(
+        origins.get("tools.web_search").expect("origin").name,
+        ConfigLayerSource::User {
+            file: user_file.clone(),
+        }
+    );
+    assert_eq!(
+        origins.get("tools.view_image").expect("origin").name,
+        ConfigLayerSource::User {
+            file: user_file.clone(),
+        }
+    );
+
+    let layers = layers.expect("layers present");
+    assert_eq!(layers.len(), 2);
+    assert_eq!(layers[0].name, ConfigLayerSource::SessionFlags);
+    assert_eq!(layers[1].name, ConfigLayerSource::User { file: user_file });

    Ok(())
 }
@@ -73,29 +147,40 @@ sandbox_mode = "workspace-write"
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn config_read_includes_system_layer_and_overrides() -> Result<()> {
    let codex_home = TempDir::new()?;
+    let user_dir = test_path_buf_with_windows("/user", Some(r"C:\Users\user"));
+    let system_dir = test_path_buf_with_windows("/system", Some(r"C:\System"));
    write_config(
        &codex_home,
-        r#"
+        &format!(
+            r#"
 model = "gpt-user"
 approval_policy = "on-request"
 sandbox_mode = "workspace-write"

 [sandbox_workspace_write]
-writable_roots = ["/user"]
+writable_roots = [{}]
 network_access = true
 "#,
+            serde_json::json!(user_dir)
+        ),
    )?;
+    let codex_home_path = codex_home.path().canonicalize()?;
+    let user_file = AbsolutePathBuf::try_from(codex_home_path.join("config.toml"))?;

    let managed_path = codex_home.path().join("managed_config.toml");
+    let managed_file = AbsolutePathBuf::try_from(managed_path.clone())?;
    std::fs::write(
        &managed_path,
-        r#"
+        format!(
+            r#"
 model = "gpt-system"
 approval_policy = "never"

 [sandbox_workspace_write]
-writable_roots = ["/system"]
+writable_roots = [{}]
 "#,
+            serde_json::json!(system_dir.clone())
+        ),
    )?;

    let managed_path_str = managed_path.display().to_string();
@@ -123,57 +208,64 @@ writable_roots = ["/system"]
        layers,
    } = to_response(resp)?;

-    assert_eq!(config.get("model"), Some(&json!("gpt-system")));
+    assert_eq!(config.model.as_deref(), Some("gpt-system"));
    assert_eq!(
        origins.get("model").expect("origin").name,
-        ConfigLayerName::System
+        ConfigLayerSource::System {
+            file: managed_file.clone(),
+        }
    );

-    assert_eq!(config.get("approval_policy"), Some(&json!("never")));
+    assert_eq!(config.approval_policy, Some(AskForApproval::Never));
    assert_eq!(
        origins.get("approval_policy").expect("origin").name,
-        ConfigLayerName::System
+        ConfigLayerSource::System {
+            file: managed_file.clone(),
+        }
    );

-    assert_eq!(config.get("sandbox_mode"), Some(&json!("workspace-write")));
+    assert_eq!(config.sandbox_mode, Some(SandboxMode::WorkspaceWrite));
    assert_eq!(
        origins.get("sandbox_mode").expect("origin").name,
-        ConfigLayerName::User
+        ConfigLayerSource::User {
+            file: user_file.clone(),
+        }
    );

-    assert_eq!(
-        config
-            .get("sandbox_workspace_write")
-            .and_then(|v| v.get("writable_roots")),
-        Some(&json!(["/system"]))
-    );
+    let sandbox = config
+        .sandbox_workspace_write
+        .as_ref()
+        .expect("sandbox workspace write");
+    assert_eq!(sandbox.writable_roots, vec![system_dir]);
    assert_eq!(
        origins
            .get("sandbox_workspace_write.writable_roots.0")
            .expect("origin")
            .name,
-        ConfigLayerName::System
+        ConfigLayerSource::System {
+            file: managed_file.clone(),
+        }
    );

-    assert_eq!(
-        config
-            .get("sandbox_workspace_write")
-            .and_then(|v| v.get("network_access")),
-        Some(&json!(true))
-    );
+    assert!(sandbox.network_access);
    assert_eq!(
        origins
            .get("sandbox_workspace_write.network_access")
            .expect("origin")
            .name,
-        ConfigLayerName::User
+        ConfigLayerSource::User {
+            file: user_file.clone(),
+        }
    );

    let layers = layers.expect("layers present");
    assert_eq!(layers.len(), 3);
-    assert_eq!(layers[0].name, ConfigLayerName::System);
-    assert_eq!(layers[1].name, ConfigLayerName::SessionFlags);
-    assert_eq!(layers[2].name, ConfigLayerName::User);
+    assert_eq!(
+        layers[0].name,
+        ConfigLayerSource::System { file: managed_file }
+    );
+    assert_eq!(layers[1].name, ConfigLayerSource::SessionFlags);
+    assert_eq!(layers[2].name, ConfigLayerSource::User { file: user_file });

    Ok(())
 }
@@ -242,7 +334,7 @@ model = "gpt-old"
    )
    .await??;
    let verify: ConfigReadResponse = to_response(verify_resp)?;
-    assert_eq!(verify.config.get("model"), Some(&json!("gpt-new")));
+    assert_eq!(verify.config.model.as_deref(), Some("gpt-new"));

    Ok(())
 }
@@ -294,6 +386,7 @@ async fn config_batch_write_applies_multiple_edits() -> Result<()> {
    let mut mcp = McpProcess::new(codex_home.path()).await?;
    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;

+    let writable_root = test_tmp_path_buf();
    let batch_id = mcp
        .send_config_batch_write_request(ConfigBatchWriteParams {
            file_path: Some(codex_home.path().join("config.toml").display().to_string()),
@@ -306,7 +399,7 @@ async fn config_batch_write_applies_multiple_edits() -> Result<()> {
                ConfigEdit {
                    key_path: "sandbox_workspace_write".to_string(),
                    value: json!({
-                        "writable_roots": ["/tmp"],
+                        "writable_roots": [writable_root.clone()],
                        "network_access": false
                    }),
                    merge_strategy: MergeStrategy::Replace,
@@ -342,22 +435,14 @@ async fn config_batch_write_applies_multiple_edits() -> Result<()> {
    )
    .await??;
    let read: ConfigReadResponse = to_response(read_resp)?;
-    assert_eq!(
-        read.config.get("sandbox_mode"),
-        Some(&json!("workspace-write"))
-    );
-    assert_eq!(
-        read.config
-            .get("sandbox_workspace_write")
-            .and_then(|v| v.get("writable_roots")),
-        Some(&json!(["/tmp"]))
-    );
-    assert_eq!(
-        read.config
-            .get("sandbox_workspace_write")
-            .and_then(|v| v.get("network_access")),
-        Some(&json!(false))
-    );
+    assert_eq!(read.config.sandbox_mode, Some(SandboxMode::WorkspaceWrite));
+    let sandbox = read
+        .config
+        .sandbox_workspace_write
+        .as_ref()
+        .expect("sandbox workspace write");
+    assert_eq!(sandbox.writable_roots, vec![writable_root]);
+    assert!(!sandbox.network_access);

    Ok(())
 }
--- a/codex-rs/app-server/tests/suite/v2/model_list.rs
+++ b/codex-rs/app-server/tests/suite/v2/model_list.rs
@@ -4,6 +4,7 @@ use anyhow::Result;
 use anyhow::anyhow;
 use app_test_support::McpProcess;
 use app_test_support::to_response;
+use app_test_support::write_models_cache;
 use codex_app_server_protocol::JSONRPCError;
 use codex_app_server_protocol::JSONRPCResponse;
 use codex_app_server_protocol::Model;
@@ -22,6 +23,7 @@ const INVALID_REQUEST_ERROR_CODE: i64 = -32600;
 #[tokio::test]
 async fn list_models_returns_all_models_with_large_limit() -> Result<()> {
    let codex_home = TempDir::new()?;
+    write_models_cache(codex_home.path())?;
    let mut mcp = McpProcess::new(codex_home.path()).await?;

    timeout(DEFAULT_TIMEOUT, mcp.initialize()).await??;
@@ -45,75 +47,6 @@ async fn list_models_returns_all_models_with_large_limit() -> Result<()> {
    } = to_response::<ModelListResponse>(response)?;

    let expected_models = vec![
-        Model {
-            id: "gpt-5.1-codex-max".to_string(),
-            model: "gpt-5.1-codex-max".to_string(),
-            display_name: "gpt-5.1-codex-max".to_string(),
-            description: "Latest Codex-optimized flagship for deep and fast reasoning.".to_string(),
-            supported_reasoning_efforts: vec![
-                ReasoningEffortOption {
-                    reasoning_effort: ReasoningEffort::Low,
-                    description: "Fast responses with lighter reasoning".to_string(),
-                },
-                ReasoningEffortOption {
-                    reasoning_effort: ReasoningEffort::Medium,
-                    description: "Balances speed and reasoning depth for everyday tasks"
-                        .to_string(),
-                },
-                ReasoningEffortOption {
-                    reasoning_effort: ReasoningEffort::High,
-                    description: "Maximizes reasoning depth for complex problems".to_string(),
-                },
-                ReasoningEffortOption {
-                    reasoning_effort: ReasoningEffort::XHigh,
-                    description: "Extra high reasoning depth for complex problems".to_string(),
-                },
-            ],
-            default_reasoning_effort: ReasoningEffort::Medium,
-            is_default: true,
-        },
-        Model {
-            id: "gpt-5.1-codex".to_string(),
-            model: "gpt-5.1-codex".to_string(),
-            display_name: "gpt-5.1-codex".to_string(),
-            description: "Optimized for codex.".to_string(),
-            supported_reasoning_efforts: vec![
-                ReasoningEffortOption {
-                    reasoning_effort: ReasoningEffort::Low,
-                    description: "Fastest responses with limited reasoning".to_string(),
-                },
-                ReasoningEffortOption {
-                    reasoning_effort: ReasoningEffort::Medium,
-                    description: "Dynamically adjusts reasoning based on the task".to_string(),
-                },
-                ReasoningEffortOption {
-                    reasoning_effort: ReasoningEffort::High,
-                    description: "Maximizes reasoning depth for complex or ambiguous problems"
-                        .to_string(),
-                },
-            ],
-            default_reasoning_effort: ReasoningEffort::Medium,
-            is_default: false,
-        },
-        Model {
-            id: "gpt-5.1-codex-mini".to_string(),
-            model: "gpt-5.1-codex-mini".to_string(),
-            display_name: "gpt-5.1-codex-mini".to_string(),
-            description: "Optimized for codex. Cheaper, faster, but less capable.".to_string(),
-            supported_reasoning_efforts: vec![
-                ReasoningEffortOption {
-                    reasoning_effort: ReasoningEffort::Medium,
-                    description: "Dynamically adjusts reasoning based on the task".to_string(),
-                },
-                ReasoningEffortOption {
-                    reasoning_effort: ReasoningEffort::High,
-                    description: "Maximizes reasoning depth for complex or ambiguous problems"
-                        .to_string(),
-                },
-            ],
-            default_reasoning_effort: ReasoningEffort::Medium,
-            is_default: false,
-        },
        Model {
            id: "gpt-5.1".to_string(),
            model: "gpt-5.1".to_string(),
@@ -139,6 +72,135 @@ async fn list_models_returns_all_models_with_large_limit() -> Result<()> {
                },
            ],
            default_reasoning_effort: ReasoningEffort::Medium,
+            is_default: true,
+        },
+        Model {
+            id: "gpt-5.2".to_string(),
+            model: "gpt-5.2".to_string(),
+            display_name: "gpt-5.2".to_string(),
+            description:
+                "Latest frontier model with improvements across knowledge, reasoning and coding"
+                    .to_string(),
+            supported_reasoning_efforts: vec![
+                ReasoningEffortOption {
+                    reasoning_effort: ReasoningEffort::Low,
+                    description: "Balances speed with some reasoning; useful for straightforward \
+                                   queries and short explanations"
+                        .to_string(),
+                },
+                ReasoningEffortOption {
+                    reasoning_effort: ReasoningEffort::Medium,
+                    description: "Provides a solid balance of reasoning depth and latency for \
+                         general-purpose tasks"
+                        .to_string(),
+                },
+                ReasoningEffortOption {
+                    reasoning_effort: ReasoningEffort::High,
+                    description: "Maximizes reasoning depth for complex or ambiguous problems"
+                        .to_string(),
+                },
+                ReasoningEffortOption {
+                    reasoning_effort: ReasoningEffort::XHigh,
+                    description: "Extra high reasoning for complex problems".to_string(),
+                },
+            ],
+            default_reasoning_effort: ReasoningEffort::Medium,
+            is_default: false,
+        },
+        Model {
+            id: "gpt-5.1-codex-mini".to_string(),
+            model: "gpt-5.1-codex-mini".to_string(),
+            display_name: "gpt-5.1-codex-mini".to_string(),
+            description: "Optimized for codex. Cheaper, faster, but less capable.".to_string(),
+            supported_reasoning_efforts: vec![
+                ReasoningEffortOption {
+                    reasoning_effort: ReasoningEffort::Medium,
+                    description: "Dynamically adjusts reasoning based on the task".to_string(),
+                },
+                ReasoningEffortOption {
+                    reasoning_effort: ReasoningEffort::High,
+                    description: "Maximizes reasoning depth for complex or ambiguous problems"
+                        .to_string(),
+                },
+            ],
+            default_reasoning_effort: ReasoningEffort::Medium,
+            is_default: false,
+        },
+        Model {
+            id: "gpt-5.1-codex".to_string(),
+            model: "gpt-5.1-codex".to_string(),
+            display_name: "gpt-5.1-codex".to_string(),
+            description: "Optimized for codex.".to_string(),
+            supported_reasoning_efforts: vec![
+                ReasoningEffortOption {
+                    reasoning_effort: ReasoningEffort::Low,
+                    description: "Fastest responses with limited reasoning".to_string(),
+                },
+                ReasoningEffortOption {
+                    reasoning_effort: ReasoningEffort::Medium,
+                    description: "Dynamically adjusts reasoning based on the task".to_string(),
+                },
+                ReasoningEffortOption {
+                    reasoning_effort: ReasoningEffort::High,
+                    description: "Maximizes reasoning depth for complex or ambiguous problems"
+                        .to_string(),
+                },
+            ],
+            default_reasoning_effort: ReasoningEffort::Medium,
+            is_default: false,
+        },
+        Model {
+            id: "gpt-5.1-codex-max".to_string(),
+            model: "gpt-5.1-codex-max".to_string(),
+            display_name: "gpt-5.1-codex-max".to_string(),
+            description: "Latest Codex-optimized flagship for deep and fast reasoning.".to_string(),
+            supported_reasoning_efforts: vec![
+                ReasoningEffortOption {
+                    reasoning_effort: ReasoningEffort::Low,
+                    description: "Fast responses with lighter reasoning".to_string(),
+                },
+                ReasoningEffortOption {
+                    reasoning_effort: ReasoningEffort::Medium,
+                    description: "Balances speed and reasoning depth for everyday tasks"
+                        .to_string(),
+                },
+                ReasoningEffortOption {
+                    reasoning_effort: ReasoningEffort::High,
+                    description: "Greater reasoning depth for complex problems".to_string(),
+                },
+                ReasoningEffortOption {
+                    reasoning_effort: ReasoningEffort::XHigh,
+                    description: "Extra high reasoning depth for complex problems".to_string(),
+                },
+            ],
+            default_reasoning_effort: ReasoningEffort::Medium,
+            is_default: false,
+        },
+        Model {
+            id: "caribou".to_string(),
+            model: "caribou".to_string(),
+            display_name: "caribou".to_string(),
+            description: "Latest Codex-optimized flagship for deep and fast reasoning.".to_string(),
+            supported_reasoning_efforts: vec![
+                ReasoningEffortOption {
+                    reasoning_effort: ReasoningEffort::Low,
+                    description: "Fast responses with lighter reasoning".to_string(),
+                },
+                ReasoningEffortOption {
+                    reasoning_effort: ReasoningEffort::Medium,
+                    description: "Balances speed and reasoning depth for everyday tasks"
+                        .to_string(),
+                },
+                ReasoningEffortOption {
+                    reasoning_effort: ReasoningEffort::High,
+                    description: "Greater reasoning depth for complex problems".to_string(),
+                },
+                ReasoningEffortOption {
+                    reasoning_effort: ReasoningEffort::XHigh,
+                    description: "Extra high reasoning depth for complex problems".to_string(),
+                },
+            ],
+            default_reasoning_effort: ReasoningEffort::Medium,
            is_default: false,
        },
    ];
@@ -151,6 +213,7 @@ async fn list_models_returns_all_models_with_large_limit() -> Result<()> {
 #[tokio::test]
 async fn list_models_pagination_works() -> Result<()> {
    let codex_home = TempDir::new()?;
+    write_models_cache(codex_home.path())?;
    let mut mcp = McpProcess::new(codex_home.path()).await?;

    timeout(DEFAULT_TIMEOUT, mcp.initialize()).await??;
@@ -174,7 +237,7 @@ async fn list_models_pagination_works() -> Result<()> {
    } = to_response::<ModelListResponse>(first_response)?;

    assert_eq!(first_items.len(), 1);
-    assert_eq!(first_items[0].id, "gpt-5.1-codex-max");
+    assert_eq!(first_items[0].id, "gpt-5.1");
    let next_cursor = first_cursor.ok_or_else(|| anyhow!("cursor for second page"))?;

    let second_request = mcp
@@ -196,7 +259,7 @@ async fn list_models_pagination_works() -> Result<()> {
    } = to_response::<ModelListResponse>(second_response)?;

    assert_eq!(second_items.len(), 1);
-    assert_eq!(second_items[0].id, "gpt-5.1-codex");
+    assert_eq!(second_items[0].id, "gpt-5.2");
    let third_cursor = second_cursor.ok_or_else(|| anyhow!("cursor for third page"))?;

    let third_request = mcp
@@ -240,14 +303,59 @@ async fn list_models_pagination_works() -> Result<()> {
    } = to_response::<ModelListResponse>(fourth_response)?;

    assert_eq!(fourth_items.len(), 1);
-    assert_eq!(fourth_items[0].id, "gpt-5.1");
-    assert!(fourth_cursor.is_none());
+    assert_eq!(fourth_items[0].id, "gpt-5.1-codex");
+    let fifth_cursor = fourth_cursor.ok_or_else(|| anyhow!("cursor for fifth page"))?;
+
+    let fifth_request = mcp
+        .send_list_models_request(ModelListParams {
+            limit: Some(1),
+            cursor: Some(fifth_cursor.clone()),
+        })
+        .await?;
+
+    let fifth_response: JSONRPCResponse = timeout(
+        DEFAULT_TIMEOUT,
+        mcp.read_stream_until_response_message(RequestId::Integer(fifth_request)),
+    )
+    .await??;
+
+    let ModelListResponse {
+        data: fifth_items,
+        next_cursor: fifth_cursor,
+    } = to_response::<ModelListResponse>(fifth_response)?;
+
+    assert_eq!(fifth_items.len(), 1);
+    assert_eq!(fifth_items[0].id, "gpt-5.1-codex-max");
+    let sixth_cursor = fifth_cursor.ok_or_else(|| anyhow!("cursor for sixth page"))?;
+
+    let sixth_request = mcp
+        .send_list_models_request(ModelListParams {
+            limit: Some(1),
+            cursor: Some(sixth_cursor.clone()),
+        })
+        .await?;
+
+    let sixth_response: JSONRPCResponse = timeout(
+        DEFAULT_TIMEOUT,
+        mcp.read_stream_until_response_message(RequestId::Integer(sixth_request)),
+    )
+    .await??;
+
+    let ModelListResponse {
+        data: sixth_items,
+        next_cursor: sixth_cursor,
+    } = to_response::<ModelListResponse>(sixth_response)?;
+
+    assert_eq!(sixth_items.len(), 1);
+    assert_eq!(sixth_items[0].id, "caribou");
+    assert!(sixth_cursor.is_none());
    Ok(())
 }

 #[tokio::test]
 async fn list_models_rejects_invalid_cursor() -> Result<()> {
    let codex_home = TempDir::new()?;
+    write_models_cache(codex_home.path())?;
    let mut mcp = McpProcess::new(codex_home.path()).await?;

    timeout(DEFAULT_TIMEOUT, mcp.initialize()).await??;
--- a/codex-rs/app-server/tests/suite/v2/rate_limits.rs
+++ b/codex-rs/app-server/tests/suite/v2/rate_limits.rs
@@ -11,6 +11,7 @@ use codex_app_server_protocol::RateLimitSnapshot;
 use codex_app_server_protocol::RateLimitWindow;
 use codex_app_server_protocol::RequestId;
 use codex_core::auth::AuthCredentialsStoreMode;
+use codex_protocol::account::PlanType as AccountPlanType;
 use pretty_assertions::assert_eq;
 use serde_json::json;
 use std::path::Path;
@@ -153,6 +154,7 @@ async fn get_account_rate_limits_returns_snapshot() -> Result<()> {
                resets_at: Some(secondary_reset_timestamp),
            }),
            credits: None,
+            plan_type: Some(AccountPlanType::Pro),
        },
    };
    assert_eq!(received, expected);
--- a/codex-rs/app-server/tests/suite/v2/thread_list.rs
+++ b/codex-rs/app-server/tests/suite/v2/thread_list.rs
@@ -6,37 +6,96 @@ use codex_app_server_protocol::GitInfo as ApiGitInfo;
 use codex_app_server_protocol::JSONRPCResponse;
 use codex_app_server_protocol::RequestId;
 use codex_app_server_protocol::SessionSource;
-use codex_app_server_protocol::ThreadListParams;
 use codex_app_server_protocol::ThreadListResponse;
 use codex_protocol::protocol::GitInfo as CoreGitInfo;
+use std::path::Path;
 use std::path::PathBuf;
 use tempfile::TempDir;
 use tokio::time::timeout;

 const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(10);

+async fn init_mcp(codex_home: &Path) -> Result<McpProcess> {
+    let mut mcp = McpProcess::new(codex_home).await?;
+    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;
+    Ok(mcp)
+}
+
+async fn list_threads(
+    mcp: &mut McpProcess,
+    cursor: Option<String>,
+    limit: Option<u32>,
+    providers: Option<Vec<String>>,
+) -> Result<ThreadListResponse> {
+    let request_id = mcp
+        .send_thread_list_request(codex_app_server_protocol::ThreadListParams {
+            cursor,
+            limit,
+            model_providers: providers,
+        })
+        .await?;
+    let resp: JSONRPCResponse = timeout(
+        DEFAULT_READ_TIMEOUT,
+        mcp.read_stream_until_response_message(RequestId::Integer(request_id)),
+    )
+    .await??;
+    to_response::<ThreadListResponse>(resp)
+}
+
+fn create_fake_rollouts<F, G>(
+    codex_home: &Path,
+    count: usize,
+    provider_for_index: F,
+    timestamp_for_index: G,
+    preview: &str,
+) -> Result<Vec<String>>
+where
+    F: Fn(usize) -> &'static str,
+    G: Fn(usize) -> (String, String),
+{
+    let mut ids = Vec::with_capacity(count);
+    for i in 0..count {
+        let (ts_file, ts_rfc) = timestamp_for_index(i);
+        ids.push(create_fake_rollout(
+            codex_home,
+            &ts_file,
+            &ts_rfc,
+            preview,
+            Some(provider_for_index(i)),
+            None,
+        )?);
+    }
+    Ok(ids)
+}
+
+fn timestamp_at(
+    year: i32,
+    month: u32,
+    day: u32,
+    hour: u32,
+    minute: u32,
+    second: u32,
+) -> (String, String) {
+    (
+        format!("{year:04}-{month:02}-{day:02}T{hour:02}-{minute:02}-{second:02}"),
+        format!("{year:04}-{month:02}-{day:02}T{hour:02}:{minute:02}:{second:02}Z"),
+    )
+}
+
 #[tokio::test]
 async fn thread_list_basic_empty() -> Result<()> {
    let codex_home = TempDir::new()?;
    create_minimal_config(codex_home.path())?;

-    let mut mcp = McpProcess::new(codex_home.path()).await?;
-    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;
+    let mut mcp = init_mcp(codex_home.path()).await?;

-    // List threads in an empty CODEX_HOME; should return an empty page with nextCursor: null.
-    let list_id = mcp
-        .send_thread_list_request(ThreadListParams {
-            cursor: None,
-            limit: Some(10),
-            model_providers: Some(vec!["mock_provider".to_string()]),
-        })
-        .await?;
-    let list_resp: JSONRPCResponse = timeout(
-        DEFAULT_READ_TIMEOUT,
-        mcp.read_stream_until_response_message(RequestId::Integer(list_id)),
+    let ThreadListResponse { data, next_cursor } = list_threads(
+        &mut mcp,
+        None,
+        Some(10),
+        Some(vec!["mock_provider".to_string()]),
    )
-    .await??;
-    let ThreadListResponse { data, next_cursor } = to_response::<ThreadListResponse>(list_resp)?;
+    .await?;
    assert!(data.is_empty());
    assert_eq!(next_cursor, None);

@@ -86,26 +145,19 @@ async fn thread_list_pagination_next_cursor_none_on_last_page() -> Result<()> {
        None,
    )?;

-    let mut mcp = McpProcess::new(codex_home.path()).await?;
-    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;
+    let mut mcp = init_mcp(codex_home.path()).await?;

    // Page 1: limit 2 → expect next_cursor Some.
-    let page1_id = mcp
-        .send_thread_list_request(ThreadListParams {
-            cursor: None,
-            limit: Some(2),
-            model_providers: Some(vec!["mock_provider".to_string()]),
-        })
-        .await?;
-    let page1_resp: JSONRPCResponse = timeout(
-        DEFAULT_READ_TIMEOUT,
-        mcp.read_stream_until_response_message(RequestId::Integer(page1_id)),
-    )
-    .await??;
    let ThreadListResponse {
        data: data1,
        next_cursor: cursor1,
-    } = to_response::<ThreadListResponse>(page1_resp)?;
+    } = list_threads(
+        &mut mcp,
+        None,
+        Some(2),
+        Some(vec!["mock_provider".to_string()]),
+    )
+    .await?;
    assert_eq!(data1.len(), 2);
    for thread in &data1 {
        assert_eq!(thread.preview, "Hello");
@@ -119,22 +171,16 @@ async fn thread_list_pagination_next_cursor_none_on_last_page() -> Result<()> {
    let cursor1 = cursor1.expect("expected nextCursor on first page");

    // Page 2: with cursor → expect next_cursor None when no more results.
-    let page2_id = mcp
-        .send_thread_list_request(ThreadListParams {
-            cursor: Some(cursor1),
-            limit: Some(2),
-            model_providers: Some(vec!["mock_provider".to_string()]),
-        })
-        .await?;
-    let page2_resp: JSONRPCResponse = timeout(
-        DEFAULT_READ_TIMEOUT,
-        mcp.read_stream_until_response_message(RequestId::Integer(page2_id)),
-    )
-    .await??;
    let ThreadListResponse {
        data: data2,
        next_cursor: cursor2,
-    } = to_response::<ThreadListResponse>(page2_resp)?;
+    } = list_threads(
+        &mut mcp,
+        Some(cursor1),
+        Some(2),
+        Some(vec!["mock_provider".to_string()]),
+    )
+    .await?;
    assert!(data2.len() <= 2);
    for thread in &data2 {
        assert_eq!(thread.preview, "Hello");
@@ -173,23 +219,16 @@ async fn thread_list_respects_provider_filter() -> Result<()> {
        None,
    )?;

-    let mut mcp = McpProcess::new(codex_home.path()).await?;
-    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;
+    let mut mcp = init_mcp(codex_home.path()).await?;

    // Filter to only other_provider; expect 1 item, nextCursor None.
-    let list_id = mcp
-        .send_thread_list_request(ThreadListParams {
-            cursor: None,
-            limit: Some(10),
-            model_providers: Some(vec!["other_provider".to_string()]),
-        })
-        .await?;
-    let resp: JSONRPCResponse = timeout(
-        DEFAULT_READ_TIMEOUT,
-        mcp.read_stream_until_response_message(RequestId::Integer(list_id)),
+    let ThreadListResponse { data, next_cursor } = list_threads(
+        &mut mcp,
+        None,
+        Some(10),
+        Some(vec!["other_provider".to_string()]),
    )
-    .await??;
-    let ThreadListResponse { data, next_cursor } = to_response::<ThreadListResponse>(resp)?;
+    .await?;
    assert_eq!(data.len(), 1);
    assert_eq!(next_cursor, None);
    let thread = &data[0];
@@ -205,6 +244,146 @@ async fn thread_list_respects_provider_filter() -> Result<()> {
    Ok(())
 }

+#[tokio::test]
+async fn thread_list_fetches_until_limit_or_exhausted() -> Result<()> {
+    let codex_home = TempDir::new()?;
+    create_minimal_config(codex_home.path())?;
+
+    // Newest 16 conversations belong to a different provider; the older 8 are the
+    // only ones that match the filter. We request 8 so the server must keep
+    // paging past the first two pages to reach the desired count.
+    create_fake_rollouts(
+        codex_home.path(),
+        24,
+        |i| {
+            if i < 16 {
+                "skip_provider"
+            } else {
+                "target_provider"
+            }
+        },
+        |i| timestamp_at(2025, 3, 30 - i as u32, 12, 0, 0),
+        "Hello",
+    )?;
+
+    let mut mcp = init_mcp(codex_home.path()).await?;
+
+    // Request 8 threads for the target provider; the matches only start on the
+    // third page so we rely on pagination to reach the limit.
+    let ThreadListResponse { data, next_cursor } = list_threads(
+        &mut mcp,
+        None,
+        Some(8),
+        Some(vec!["target_provider".to_string()]),
+    )
+    .await?;
+    assert_eq!(
+        data.len(),
+        8,
+        "should keep paging until the requested count is filled"
+    );
+    assert!(
+        data.iter()
+            .all(|thread| thread.model_provider == "target_provider"),
+        "all returned threads must match the requested provider"
+    );
+    assert_eq!(
+        next_cursor, None,
+        "once the requested count is satisfied on the final page, nextCursor should be None"
+    );
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn thread_list_enforces_max_limit() -> Result<()> {
+    let codex_home = TempDir::new()?;
+    create_minimal_config(codex_home.path())?;
+
+    create_fake_rollouts(
+        codex_home.path(),
+        105,
+        |_| "mock_provider",
+        |i| {
+            let month = 5 + (i / 28);
+            let day = (i % 28) + 1;
+            timestamp_at(2025, month as u32, day as u32, 0, 0, 0)
+        },
+        "Hello",
+    )?;
+
+    let mut mcp = init_mcp(codex_home.path()).await?;
+
+    let ThreadListResponse { data, next_cursor } = list_threads(
+        &mut mcp,
+        None,
+        Some(200),
+        Some(vec!["mock_provider".to_string()]),
+    )
+    .await?;
+    assert_eq!(
+        data.len(),
+        100,
+        "limit should be clamped to the maximum page size"
+    );
+    assert!(
+        next_cursor.is_some(),
+        "when more than the maximum exist, nextCursor should continue pagination"
+    );
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn thread_list_stops_when_not_enough_filtered_results_exist() -> Result<()> {
+    let codex_home = TempDir::new()?;
+    create_minimal_config(codex_home.path())?;
+
+    // Only the last 7 conversations match the provider filter; we ask for 10 to
+    // ensure the server exhausts pagination without looping forever.
+    create_fake_rollouts(
+        codex_home.path(),
+        22,
+        |i| {
+            if i < 15 {
+                "skip_provider"
+            } else {
+                "target_provider"
+            }
+        },
+        |i| timestamp_at(2025, 4, 28 - i as u32, 8, 0, 0),
+        "Hello",
+    )?;
+
+    let mut mcp = init_mcp(codex_home.path()).await?;
+
+    // Request more threads than exist after filtering; expect all matches to be
+    // returned with nextCursor None.
+    let ThreadListResponse { data, next_cursor } = list_threads(
+        &mut mcp,
+        None,
+        Some(10),
+        Some(vec!["target_provider".to_string()]),
+    )
+    .await?;
+    assert_eq!(
+        data.len(),
+        7,
+        "all available filtered threads should be returned"
+    );
+    assert!(
+        data.iter()
+            .all(|thread| thread.model_provider == "target_provider"),
+        "results should still respect the provider filter"
+    );
+    assert_eq!(
+        next_cursor, None,
+        "when results are exhausted before reaching the limit, nextCursor should be None"
+    );
+
+    Ok(())
+}
+
 #[tokio::test]
 async fn thread_list_includes_git_info() -> Result<()> {
    let codex_home = TempDir::new()?;
@@ -224,22 +403,15 @@ async fn thread_list_includes_git_info() -> Result<()> {
        Some(git_info),
    )?;

-    let mut mcp = McpProcess::new(codex_home.path()).await?;
-    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;
+    let mut mcp = init_mcp(codex_home.path()).await?;

-    let list_id = mcp
-        .send_thread_list_request(ThreadListParams {
-            cursor: None,
-            limit: Some(10),
-            model_providers: Some(vec!["mock_provider".to_string()]),
-        })
-        .await?;
-    let resp: JSONRPCResponse = timeout(
-        DEFAULT_READ_TIMEOUT,
-        mcp.read_stream_until_response_message(RequestId::Integer(list_id)),
+    let ThreadListResponse { data, .. } = list_threads(
+        &mut mcp,
+        None,
+        Some(10),
+        Some(vec!["mock_provider".to_string()]),
    )
-    .await??;
-    let ThreadListResponse { data, .. } = to_response::<ThreadListResponse>(resp)?;
+    .await?;
    let thread = data
        .iter()
        .find(|t| t.id == conversation_id)
--- a/codex-rs/app-server/tests/suite/v2/turn_start.rs
+++ b/codex-rs/app-server/tests/suite/v2/turn_start.rs
@@ -427,7 +427,6 @@ async fn turn_start_exec_approval_decline_v2() -> Result<()> {
        request_id,
        serde_json::to_value(CommandExecutionRequestApprovalResponse {
            decision: ApprovalDecision::Decline,
-            accept_settings: None,
        })?,
    )
    .await?;
@@ -533,7 +532,7 @@ async fn turn_start_updates_sandbox_and_cwd_between_turns_v2() -> Result<()> {
            cwd: Some(first_cwd.clone()),
            approval_policy: Some(codex_app_server_protocol::AskForApproval::Never),
            sandbox_policy: Some(codex_app_server_protocol::SandboxPolicy::WorkspaceWrite {
-                writable_roots: vec![first_cwd.clone()],
+                writable_roots: vec![first_cwd.try_into()?],
                network_access: false,
                exclude_tmpdir_env_var: false,
                exclude_slash_tmp: false,
--- a/codex-rs/apply-patch/src/invocation.rs
+++ b/codex-rs/apply-patch/src/invocation.rs
@@ -0,0 +1,813 @@
+use std::collections::HashMap;
+use std::path::Path;
+use std::sync::LazyLock;
+
+use tree_sitter::Parser;
+use tree_sitter::Query;
+use tree_sitter::QueryCursor;
+use tree_sitter::StreamingIterator;
+use tree_sitter_bash::LANGUAGE as BASH;
+
+use crate::ApplyPatchAction;
+use crate::ApplyPatchArgs;
+use crate::ApplyPatchError;
+use crate::ApplyPatchFileChange;
+use crate::ApplyPatchFileUpdate;
+use crate::IoError;
+use crate::MaybeApplyPatchVerified;
+use crate::parser::Hunk;
+use crate::parser::ParseError;
+use crate::parser::parse_patch;
+use crate::unified_diff_from_chunks;
+use std::str::Utf8Error;
+use tree_sitter::LanguageError;
+
+const APPLY_PATCH_COMMANDS: [&str; 2] = ["apply_patch", "applypatch"];
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+enum ApplyPatchShell {
+    Unix,
+    PowerShell,
+    Cmd,
+}
+
+#[derive(Debug, PartialEq)]
+pub enum MaybeApplyPatch {
+    Body(ApplyPatchArgs),
+    ShellParseError(ExtractHeredocError),
+    PatchParseError(ParseError),
+    NotApplyPatch,
+}
+
+#[derive(Debug, PartialEq)]
+pub enum ExtractHeredocError {
+    CommandDidNotStartWithApplyPatch,
+    FailedToLoadBashGrammar(LanguageError),
+    HeredocNotUtf8(Utf8Error),
+    FailedToParsePatchIntoAst,
+    FailedToFindHeredocBody,
+}
+
+fn classify_shell_name(shell: &str) -> Option<String> {
+    std::path::Path::new(shell)
+        .file_stem()
+        .and_then(|name| name.to_str())
+        .map(str::to_ascii_lowercase)
+}
+
+fn classify_shell(shell: &str, flag: &str) -> Option<ApplyPatchShell> {
+    classify_shell_name(shell).and_then(|name| match name.as_str() {
+        "bash" | "zsh" | "sh" if matches!(flag, "-lc" | "-c") => Some(ApplyPatchShell::Unix),
+        "pwsh" | "powershell" if flag.eq_ignore_ascii_case("-command") => {
+            Some(ApplyPatchShell::PowerShell)
+        }
+        "cmd" if flag.eq_ignore_ascii_case("/c") => Some(ApplyPatchShell::Cmd),
+        _ => None,
+    })
+}
+
+fn can_skip_flag(shell: &str, flag: &str) -> bool {
+    classify_shell_name(shell).is_some_and(|name| {
+        matches!(name.as_str(), "pwsh" | "powershell") && flag.eq_ignore_ascii_case("-noprofile")
+    })
+}
+
+fn parse_shell_script(argv: &[String]) -> Option<(ApplyPatchShell, &str)> {
+    match argv {
+        [shell, flag, script] => classify_shell(shell, flag).map(|shell_type| {
+            let script = script.as_str();
+            (shell_type, script)
+        }),
+        [shell, skip_flag, flag, script] if can_skip_flag(shell, skip_flag) => {
+            classify_shell(shell, flag).map(|shell_type| {
+                let script = script.as_str();
+                (shell_type, script)
+            })
+        }
+        _ => None,
+    }
+}
+
+fn extract_apply_patch_from_shell(
+    shell: ApplyPatchShell,
+    script: &str,
+) -> std::result::Result<(String, Option<String>), ExtractHeredocError> {
+    match shell {
+        ApplyPatchShell::Unix | ApplyPatchShell::PowerShell | ApplyPatchShell::Cmd => {
+            extract_apply_patch_from_bash(script)
+        }
+    }
+}
+
+// TODO: make private once we remove tests in lib.rs
+pub fn maybe_parse_apply_patch(argv: &[String]) -> MaybeApplyPatch {
+    match argv {
+        // Direct invocation: apply_patch <patch>
+        [cmd, body] if APPLY_PATCH_COMMANDS.contains(&cmd.as_str()) => match parse_patch(body) {
+            Ok(source) => MaybeApplyPatch::Body(source),
+            Err(e) => MaybeApplyPatch::PatchParseError(e),
+        },
+        // Shell heredoc form: (optional `cd <path> &&`) apply_patch <<'EOF' ...
+        _ => match parse_shell_script(argv) {
+            Some((shell, script)) => match extract_apply_patch_from_shell(shell, script) {
+                Ok((body, workdir)) => match parse_patch(&body) {
+                    Ok(mut source) => {
+                        source.workdir = workdir;
+                        MaybeApplyPatch::Body(source)
+                    }
+                    Err(e) => MaybeApplyPatch::PatchParseError(e),
+                },
+                Err(ExtractHeredocError::CommandDidNotStartWithApplyPatch) => {
+                    MaybeApplyPatch::NotApplyPatch
+                }
+                Err(e) => MaybeApplyPatch::ShellParseError(e),
+            },
+            None => MaybeApplyPatch::NotApplyPatch,
+        },
+    }
+}
+
+/// cwd must be an absolute path so that we can resolve relative paths in the
+/// patch.
+pub fn maybe_parse_apply_patch_verified(argv: &[String], cwd: &Path) -> MaybeApplyPatchVerified {
+    // Detect a raw patch body passed directly as the command or as the body of a shell
+    // script. In these cases, report an explicit error rather than applying the patch.
+    if let [body] = argv
+        && parse_patch(body).is_ok()
+    {
+        return MaybeApplyPatchVerified::CorrectnessError(ApplyPatchError::ImplicitInvocation);
+    }
+    if let Some((_, script)) = parse_shell_script(argv)
+        && parse_patch(script).is_ok()
+    {
+        return MaybeApplyPatchVerified::CorrectnessError(ApplyPatchError::ImplicitInvocation);
+    }
+
+    match maybe_parse_apply_patch(argv) {
+        MaybeApplyPatch::Body(ApplyPatchArgs {
+            patch,
+            hunks,
+            workdir,
+        }) => {
+            let effective_cwd = workdir
+                .as_ref()
+                .map(|dir| {
+                    let path = Path::new(dir);
+                    if path.is_absolute() {
+                        path.to_path_buf()
+                    } else {
+                        cwd.join(path)
+                    }
+                })
+                .unwrap_or_else(|| cwd.to_path_buf());
+            let mut changes = HashMap::new();
+            for hunk in hunks {
+                let path = hunk.resolve_path(&effective_cwd);
+                match hunk {
+                    Hunk::AddFile { contents, .. } => {
+                        changes.insert(path, ApplyPatchFileChange::Add { content: contents });
+                    }
+                    Hunk::DeleteFile { .. } => {
+                        let content = match std::fs::read_to_string(&path) {
+                            Ok(content) => content,
+                            Err(e) => {
+                                return MaybeApplyPatchVerified::CorrectnessError(
+                                    ApplyPatchError::IoError(IoError {
+                                        context: format!("Failed to read {}", path.display()),
+                                        source: e,
+                                    }),
+                                );
+                            }
+                        };
+                        changes.insert(path, ApplyPatchFileChange::Delete { content });
+                    }
+                    Hunk::UpdateFile {
+                        move_path, chunks, ..
+                    } => {
+                        let ApplyPatchFileUpdate {
+                            unified_diff,
+                            content: contents,
+                        } = match unified_diff_from_chunks(&path, &chunks) {
+                            Ok(diff) => diff,
+                            Err(e) => {
+                                return MaybeApplyPatchVerified::CorrectnessError(e);
+                            }
+                        };
+                        changes.insert(
+                            path,
+                            ApplyPatchFileChange::Update {
+                                unified_diff,
+                                move_path: move_path.map(|p| effective_cwd.join(p)),
+                                new_content: contents,
+                            },
+                        );
+                    }
+                }
+            }
+            MaybeApplyPatchVerified::Body(ApplyPatchAction {
+                changes,
+                patch,
+                cwd: effective_cwd,
+            })
+        }
+        MaybeApplyPatch::ShellParseError(e) => MaybeApplyPatchVerified::ShellParseError(e),
+        MaybeApplyPatch::PatchParseError(e) => MaybeApplyPatchVerified::CorrectnessError(e.into()),
+        MaybeApplyPatch::NotApplyPatch => MaybeApplyPatchVerified::NotApplyPatch,
+    }
+}
+
+/// Extract the heredoc body (and optional `cd` workdir) from a `bash -lc` script
+/// that invokes the apply_patch tool using a heredoc.
+///
+/// Supported top‑level forms (must be the only top‑level statement):
+/// - `apply_patch <<'EOF'\n...\nEOF`
+/// - `cd <path> && apply_patch <<'EOF'\n...\nEOF`
+///
+/// Notes about matching:
+/// - Parsed with Tree‑sitter Bash and a strict query that uses anchors so the
+///   heredoc‑redirected statement is the only top‑level statement.
+/// - The connector between `cd` and `apply_patch` must be `&&` (not `|` or `||`).
+/// - Exactly one positional `word` argument is allowed for `cd` (no flags, no quoted
+///   strings, no second argument).
+/// - The apply command is validated in‑query via `#any-of?` to allow `apply_patch`
+///   or `applypatch`.
+/// - Preceding or trailing commands (e.g., `echo ...;` or `... && echo done`) do not match.
+///
+/// Returns `(heredoc_body, Some(path))` when the `cd` variant matches, or
+/// `(heredoc_body, None)` for the direct form. Errors are returned if the script
+/// cannot be parsed or does not match the allowed patterns.
+fn extract_apply_patch_from_bash(
+    src: &str,
+) -> std::result::Result<(String, Option<String>), ExtractHeredocError> {
+    // This function uses a Tree-sitter query to recognize one of two
+    // whole-script forms, each expressed as a single top-level statement:
+    //
+    // 1. apply_patch <<'EOF'\n...\nEOF
+    // 2. cd <path> && apply_patch <<'EOF'\n...\nEOF
+    //
+    // Key ideas when reading the query:
+    // - dots (`.`) between named nodes enforces adjacency among named children and
+    //   anchor to the start/end of the expression.
+    // - we match a single redirected_statement directly under program with leading
+    //   and trailing anchors (`.`). This ensures it is the only top-level statement
+    //   (so prefixes like `echo ...;` or suffixes like `... && echo done` do not match).
+    //
+    // Overall, we want to be conservative and only match the intended forms, as other
+    // forms are likely to be model errors, or incorrectly interpreted by later code.
+    //
+    // If you're editing this query, it's helpful to start by creating a debugging binary
+    // which will let you see the AST of an arbitrary bash script passed in, and optionally
+    // also run an arbitrary query against the AST. This is useful for understanding
+    // how tree-sitter parses the script and whether the query syntax is correct. Be sure
+    // to test both positive and negative cases.
+    static APPLY_PATCH_QUERY: LazyLock<Query> = LazyLock::new(|| {
+        let language = BASH.into();
+        #[expect(clippy::expect_used)]
+        Query::new(
+            &language,
+            r#"
+            (
+              program
+                . (redirected_statement
+                    body: (command
+                            name: (command_name (word) @apply_name) .)
+                    (#any-of? @apply_name "apply_patch" "applypatch")
+                    redirect: (heredoc_redirect
+                                . (heredoc_start)
+                                . (heredoc_body) @heredoc
+                                . (heredoc_end)
+                                .))
+                .)
+
+            (
+              program
+                . (redirected_statement
+                    body: (list
+                            . (command
+                                name: (command_name (word) @cd_name) .
+                                argument: [
+                                  (word) @cd_path
+                                  (string (string_content) @cd_path)
+                                  (raw_string) @cd_raw_string
+                                ] .)
+                            "&&"
+                            . (command
+                                name: (command_name (word) @apply_name))
+                            .)
+                    (#eq? @cd_name "cd")
+                    (#any-of? @apply_name "apply_patch" "applypatch")
+                    redirect: (heredoc_redirect
+                                . (heredoc_start)
+                                . (heredoc_body) @heredoc
+                                . (heredoc_end)
+                                .))
+                .)
+            "#,
+        )
+        .expect("valid bash query")
+    });
+
+    let lang = BASH.into();
+    let mut parser = Parser::new();
+    parser
+        .set_language(&lang)
+        .map_err(ExtractHeredocError::FailedToLoadBashGrammar)?;
+    let tree = parser
+        .parse(src, None)
+        .ok_or(ExtractHeredocError::FailedToParsePatchIntoAst)?;
+
+    let bytes = src.as_bytes();
+    let root = tree.root_node();
+
+    let mut cursor = QueryCursor::new();
+    let mut matches = cursor.matches(&APPLY_PATCH_QUERY, root, bytes);
+    while let Some(m) = matches.next() {
+        let mut heredoc_text: Option<String> = None;
+        let mut cd_path: Option<String> = None;
+
+        for capture in m.captures.iter() {
+            let name = APPLY_PATCH_QUERY.capture_names()[capture.index as usize];
+            match name {
+                "heredoc" => {
+                    let text = capture
+                        .node
+                        .utf8_text(bytes)
+                        .map_err(ExtractHeredocError::HeredocNotUtf8)?
+                        .trim_end_matches('\n')
+                        .to_string();
+                    heredoc_text = Some(text);
+                }
+                "cd_path" => {
+                    let text = capture
+                        .node
+                        .utf8_text(bytes)
+                        .map_err(ExtractHeredocError::HeredocNotUtf8)?
+                        .to_string();
+                    cd_path = Some(text);
+                }
+                "cd_raw_string" => {
+                    let raw = capture
+                        .node
+                        .utf8_text(bytes)
+                        .map_err(ExtractHeredocError::HeredocNotUtf8)?;
+                    let trimmed = raw
+                        .strip_prefix('\'')
+                        .and_then(|s| s.strip_suffix('\''))
+                        .unwrap_or(raw);
+                    cd_path = Some(trimmed.to_string());
+                }
+                _ => {}
+            }
+        }
+
+        if let Some(heredoc) = heredoc_text {
+            return Ok((heredoc, cd_path));
+        }
+    }
+
+    Err(ExtractHeredocError::CommandDidNotStartWithApplyPatch)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use assert_matches::assert_matches;
+    use pretty_assertions::assert_eq;
+    use std::fs;
+    use std::path::PathBuf;
+    use std::string::ToString;
+    use tempfile::tempdir;
+
+    /// Helper to construct a patch with the given body.
+    fn wrap_patch(body: &str) -> String {
+        format!("*** Begin Patch\n{body}\n*** End Patch")
+    }
+
+    fn strs_to_strings(strs: &[&str]) -> Vec<String> {
+        strs.iter().map(ToString::to_string).collect()
+    }
+
+    // Test helpers to reduce repetition when building bash -lc heredoc scripts
+    fn args_bash(script: &str) -> Vec<String> {
+        strs_to_strings(&["bash", "-lc", script])
+    }
+
+    fn args_powershell(script: &str) -> Vec<String> {
+        strs_to_strings(&["powershell.exe", "-Command", script])
+    }
+
+    fn args_powershell_no_profile(script: &str) -> Vec<String> {
+        strs_to_strings(&["powershell.exe", "-NoProfile", "-Command", script])
+    }
+
+    fn args_pwsh(script: &str) -> Vec<String> {
+        strs_to_strings(&["pwsh", "-NoProfile", "-Command", script])
+    }
+
+    fn args_cmd(script: &str) -> Vec<String> {
+        strs_to_strings(&["cmd.exe", "/c", script])
+    }
+
+    fn heredoc_script(prefix: &str) -> String {
+        format!(
+            "{prefix}apply_patch <<'PATCH'\n*** Begin Patch\n*** Add File: foo\n+hi\n*** End Patch\nPATCH"
+        )
+    }
+
+    fn heredoc_script_ps(prefix: &str, suffix: &str) -> String {
+        format!(
+            "{prefix}apply_patch <<'PATCH'\n*** Begin Patch\n*** Add File: foo\n+hi\n*** End Patch\nPATCH{suffix}"
+        )
+    }
+
+    fn expected_single_add() -> Vec<Hunk> {
+        vec![Hunk::AddFile {
+            path: PathBuf::from("foo"),
+            contents: "hi\n".to_string(),
+        }]
+    }
+
+    fn assert_match_args(args: Vec<String>, expected_workdir: Option<&str>) {
+        match maybe_parse_apply_patch(&args) {
+            MaybeApplyPatch::Body(ApplyPatchArgs { hunks, workdir, .. }) => {
+                assert_eq!(workdir.as_deref(), expected_workdir);
+                assert_eq!(hunks, expected_single_add());
+            }
+            result => panic!("expected MaybeApplyPatch::Body got {result:?}"),
+        }
+    }
+
+    fn assert_match(script: &str, expected_workdir: Option<&str>) {
+        let args = args_bash(script);
+        assert_match_args(args, expected_workdir);
+    }
+
+    fn assert_not_match(script: &str) {
+        let args = args_bash(script);
+        assert_matches!(
+            maybe_parse_apply_patch(&args),
+            MaybeApplyPatch::NotApplyPatch
+        );
+    }
+
+    #[test]
+    fn test_implicit_patch_single_arg_is_error() {
+        let patch = "*** Begin Patch\n*** Add File: foo\n+hi\n*** End Patch".to_string();
+        let args = vec![patch];
+        let dir = tempdir().unwrap();
+        assert_matches!(
+            maybe_parse_apply_patch_verified(&args, dir.path()),
+            MaybeApplyPatchVerified::CorrectnessError(ApplyPatchError::ImplicitInvocation)
+        );
+    }
+
+    #[test]
+    fn test_implicit_patch_bash_script_is_error() {
+        let script = "*** Begin Patch\n*** Add File: foo\n+hi\n*** End Patch";
+        let args = args_bash(script);
+        let dir = tempdir().unwrap();
+        assert_matches!(
+            maybe_parse_apply_patch_verified(&args, dir.path()),
+            MaybeApplyPatchVerified::CorrectnessError(ApplyPatchError::ImplicitInvocation)
+        );
+    }
+
+    #[test]
+    fn test_literal() {
+        let args = strs_to_strings(&[
+            "apply_patch",
+            r#"*** Begin Patch
+*** Add File: foo
+hi
+*** End Patch
+"#,
+        ]);
+
+        match maybe_parse_apply_patch(&args) {
+            MaybeApplyPatch::Body(ApplyPatchArgs { hunks, .. }) => {
+                assert_eq!(
+                    hunks,
+                    vec![Hunk::AddFile {
+                        path: PathBuf::from("foo"),
+                        contents: "hi\n".to_string()
+                    }]
+                );
+            }
+            result => panic!("expected MaybeApplyPatch::Body got {result:?}"),
+        }
+    }
+
+    #[test]
+    fn test_literal_applypatch() {
+        let args = strs_to_strings(&[
+            "applypatch",
+            r#"*** Begin Patch
+*** Add File: foo
+hi
+*** End Patch
+"#,
+        ]);
+
+        match maybe_parse_apply_patch(&args) {
+            MaybeApplyPatch::Body(ApplyPatchArgs { hunks, .. }) => {
+                assert_eq!(
+                    hunks,
+                    vec![Hunk::AddFile {
+                        path: PathBuf::from("foo"),
+                        contents: "hi\n".to_string()
+                    }]
+                );
+            }
+            result => panic!("expected MaybeApplyPatch::Body got {result:?}"),
+        }
+    }
+
+    #[test]
+    fn test_heredoc() {
+        assert_match(&heredoc_script(""), None);
+    }
+
+    #[test]
+    fn test_heredoc_non_login_shell() {
+        let script = heredoc_script("");
+        let args = strs_to_strings(&["bash", "-c", &script]);
+        assert_match_args(args, None);
+    }
+
+    #[test]
+    fn test_heredoc_applypatch() {
+        let args = strs_to_strings(&[
+            "bash",
+            "-lc",
+            r#"applypatch <<'PATCH'
+*** Begin Patch
+*** Add File: foo
+hi
+*** End Patch
+PATCH"#,
+        ]);
+
+        match maybe_parse_apply_patch(&args) {
+            MaybeApplyPatch::Body(ApplyPatchArgs { hunks, workdir, .. }) => {
+                assert_eq!(workdir, None);
+                assert_eq!(
+                    hunks,
+                    vec![Hunk::AddFile {
+                        path: PathBuf::from("foo"),
+                        contents: "hi\n".to_string()
+                    }]
+                );
+            }
+            result => panic!("expected MaybeApplyPatch::Body got {result:?}"),
+        }
+    }
+
+    #[test]
+    fn test_powershell_heredoc() {
+        let script = heredoc_script("");
+        assert_match_args(args_powershell(&script), None);
+    }
+    #[test]
+    fn test_powershell_heredoc_no_profile() {
+        let script = heredoc_script("");
+        assert_match_args(args_powershell_no_profile(&script), None);
+    }
+    #[test]
+    fn test_pwsh_heredoc() {
+        let script = heredoc_script("");
+        assert_match_args(args_pwsh(&script), None);
+    }
+
+    #[test]
+    fn test_cmd_heredoc_with_cd() {
+        let script = heredoc_script("cd foo && ");
+        assert_match_args(args_cmd(&script), Some("foo"));
+    }
+
+    #[test]
+    fn test_heredoc_with_leading_cd() {
+        assert_match(&heredoc_script("cd foo && "), Some("foo"));
+    }
+
+    #[test]
+    fn test_cd_with_semicolon_is_ignored() {
+        assert_not_match(&heredoc_script("cd foo; "));
+    }
+
+    #[test]
+    fn test_cd_or_apply_patch_is_ignored() {
+        assert_not_match(&heredoc_script("cd bar || "));
+    }
+
+    #[test]
+    fn test_cd_pipe_apply_patch_is_ignored() {
+        assert_not_match(&heredoc_script("cd bar | "));
+    }
+
+    #[test]
+    fn test_cd_single_quoted_path_with_spaces() {
+        assert_match(&heredoc_script("cd 'foo bar' && "), Some("foo bar"));
+    }
+
+    #[test]
+    fn test_cd_double_quoted_path_with_spaces() {
+        assert_match(&heredoc_script("cd \"foo bar\" && "), Some("foo bar"));
+    }
+
+    #[test]
+    fn test_echo_and_apply_patch_is_ignored() {
+        assert_not_match(&heredoc_script("echo foo && "));
+    }
+
+    #[test]
+    fn test_apply_patch_with_arg_is_ignored() {
+        let script = "apply_patch foo <<'PATCH'\n*** Begin Patch\n*** Add File: foo\n+hi\n*** End Patch\nPATCH";
+        assert_not_match(script);
+    }
+
+    #[test]
+    fn test_double_cd_then_apply_patch_is_ignored() {
+        assert_not_match(&heredoc_script("cd foo && cd bar && "));
+    }
+
+    #[test]
+    fn test_cd_two_args_is_ignored() {
+        assert_not_match(&heredoc_script("cd foo bar && "));
+    }
+
+    #[test]
+    fn test_cd_then_apply_patch_then_extra_is_ignored() {
+        let script = heredoc_script_ps("cd bar && ", " && echo done");
+        assert_not_match(&script);
+    }
+
+    #[test]
+    fn test_echo_then_cd_and_apply_patch_is_ignored() {
+        // Ensure preceding commands before the `cd && apply_patch <<...` sequence do not match.
+        assert_not_match(&heredoc_script("echo foo; cd bar && "));
+    }
+
+    #[test]
+    fn test_unified_diff_last_line_replacement() {
+        // Replace the very last line of the file.
+        let dir = tempdir().unwrap();
+        let path = dir.path().join("last.txt");
+        fs::write(&path, "foo\nbar\nbaz\n").unwrap();
+
+        let patch = wrap_patch(&format!(
+            r#"*** Update File: {}
+@@
+ foo
+ bar
+-baz
+BAZ
+"#,
+            path.display()
+        ));
+
+        let patch = parse_patch(&patch).unwrap();
+        let chunks = match patch.hunks.as_slice() {
+            [Hunk::UpdateFile { chunks, .. }] => chunks,
+            _ => panic!("Expected a single UpdateFile hunk"),
+        };
+
+        let diff = unified_diff_from_chunks(&path, chunks).unwrap();
+        let expected_diff = r#"@@ -2,2 +2,2 @@
+ bar
+-baz
+BAZ
+"#;
+        let expected = ApplyPatchFileUpdate {
+            unified_diff: expected_diff.to_string(),
+            content: "foo\nbar\nBAZ\n".to_string(),
+        };
+        assert_eq!(expected, diff);
+    }
+
+    #[test]
+    fn test_unified_diff_insert_at_eof() {
+        // Insert a new line at end‑of‑file.
+        let dir = tempdir().unwrap();
+        let path = dir.path().join("insert.txt");
+        fs::write(&path, "foo\nbar\nbaz\n").unwrap();
+
+        let patch = wrap_patch(&format!(
+            r#"*** Update File: {}
+@@
+quux
+*** End of File
+"#,
+            path.display()
+        ));
+
+        let patch = parse_patch(&patch).unwrap();
+        let chunks = match patch.hunks.as_slice() {
+            [Hunk::UpdateFile { chunks, .. }] => chunks,
+            _ => panic!("Expected a single UpdateFile hunk"),
+        };
+
+        let diff = unified_diff_from_chunks(&path, chunks).unwrap();
+        let expected_diff = r#"@@ -3 +3,2 @@
+ baz
+quux
+"#;
+        let expected = ApplyPatchFileUpdate {
+            unified_diff: expected_diff.to_string(),
+            content: "foo\nbar\nbaz\nquux\n".to_string(),
+        };
+        assert_eq!(expected, diff);
+    }
+
+    #[test]
+    fn test_apply_patch_should_resolve_absolute_paths_in_cwd() {
+        let session_dir = tempdir().unwrap();
+        let relative_path = "source.txt";
+
+        // Note that we need this file to exist for the patch to be "verified"
+        // and parsed correctly.
+        let session_file_path = session_dir.path().join(relative_path);
+        fs::write(&session_file_path, "session directory content\n").unwrap();
+
+        let argv = vec![
+            "apply_patch".to_string(),
+            r#"*** Begin Patch
+*** Update File: source.txt
+@@
+-session directory content
+updated session directory content
+*** End Patch"#
+                .to_string(),
+        ];
+
+        let result = maybe_parse_apply_patch_verified(&argv, session_dir.path());
+
+        // Verify the patch contents - as otherwise we may have pulled contents
+        // from the wrong file (as we're using relative paths)
+        assert_eq!(
+            result,
+            MaybeApplyPatchVerified::Body(ApplyPatchAction {
+                changes: HashMap::from([(
+                    session_dir.path().join(relative_path),
+                    ApplyPatchFileChange::Update {
+                        unified_diff: r#"@@ -1 +1 @@
+-session directory content
+updated session directory content
+"#
+                        .to_string(),
+                        move_path: None,
+                        new_content: "updated session directory content\n".to_string(),
+                    },
+                )]),
+                patch: argv[1].clone(),
+                cwd: session_dir.path().to_path_buf(),
+            })
+        );
+    }
+
+    #[test]
+    fn test_apply_patch_resolves_move_path_with_effective_cwd() {
+        let session_dir = tempdir().unwrap();
+        let worktree_rel = "alt";
+        let worktree_dir = session_dir.path().join(worktree_rel);
+        fs::create_dir_all(&worktree_dir).unwrap();
+
+        let source_name = "old.txt";
+        let dest_name = "renamed.txt";
+        let source_path = worktree_dir.join(source_name);
+        fs::write(&source_path, "before\n").unwrap();
+
+        let patch = wrap_patch(&format!(
+            r#"*** Update File: {source_name}
+*** Move to: {dest_name}
+@@
+-before
+after"#
+        ));
+
+        let shell_script = format!("cd {worktree_rel} && apply_patch <<'PATCH'\n{patch}\nPATCH");
+        let argv = vec!["bash".into(), "-lc".into(), shell_script];
+
+        let result = maybe_parse_apply_patch_verified(&argv, session_dir.path());
+        let action = match result {
+            MaybeApplyPatchVerified::Body(action) => action,
+            other => panic!("expected verified body, got {other:?}"),
+        };
+
+        assert_eq!(action.cwd, worktree_dir);
+
+        let change = action
+            .changes()
+            .get(&worktree_dir.join(source_name))
+            .expect("source file change present");
+
+        match change {
+            ApplyPatchFileChange::Update { move_path, .. } => {
+                assert_eq!(
+                    move_path.as_deref(),
+                    Some(worktree_dir.join(dest_name).as_path())
+                );
+            }
+            other => panic!("expected update change, got {other:?}"),
+        }
+    }
+}
--- a/codex-rs/apply-patch/src/lib.rs
+++ b/codex-rs/apply-patch/src/lib.rs
@@ -1,3 +1,4 @@
+mod invocation;
 mod parser;
 mod seek_sequence;
 mod standalone_executable;
@@ -5,8 +6,6 @@ mod standalone_executable;
 use std::collections::HashMap;
 use std::path::Path;
 use std::path::PathBuf;
-use std::str::Utf8Error;
-use std::sync::LazyLock;

 use anyhow::Context;
 use anyhow::Result;
@@ -17,27 +16,15 @@ use parser::UpdateFileChunk;
 pub use parser::parse_patch;
 use similar::TextDiff;
 use thiserror::Error;
-use tree_sitter::LanguageError;
-use tree_sitter::Parser;
-use tree_sitter::Query;
-use tree_sitter::QueryCursor;
-use tree_sitter::StreamingIterator;
-use tree_sitter_bash::LANGUAGE as BASH;

+pub use invocation::maybe_parse_apply_patch_verified;
 pub use standalone_executable::main;

+use crate::invocation::ExtractHeredocError;
+
 /// Detailed instructions for gpt-4.1 on how to use the `apply_patch` tool.
 pub const APPLY_PATCH_TOOL_INSTRUCTIONS: &str = include_str!("../apply_patch_tool_instructions.md");

-const APPLY_PATCH_COMMANDS: [&str; 2] = ["apply_patch", "applypatch"];
-
-#[derive(Debug, Clone, Copy, PartialEq, Eq)]
-enum ApplyPatchShell {
-    Unix,
-    PowerShell,
-    Cmd,
-}
-
 #[derive(Debug, Error, PartialEq)]
 pub enum ApplyPatchError {
    #[error(transparent)]
@@ -86,14 +73,6 @@ impl PartialEq for IoError {
    }
 }

-#[derive(Debug, PartialEq)]
-pub enum MaybeApplyPatch {
-    Body(ApplyPatchArgs),
-    ShellParseError(ExtractHeredocError),
-    PatchParseError(ParseError),
-    NotApplyPatch,
-}
-
 /// Both the raw PATCH argument to `apply_patch` as well as the PATCH argument
 /// parsed into hunks.
 #[derive(Debug, PartialEq)]
@@ -103,84 +82,6 @@ pub struct ApplyPatchArgs {
    pub workdir: Option<String>,
 }

-fn classify_shell_name(shell: &str) -> Option<String> {
-    std::path::Path::new(shell)
-        .file_stem()
-        .and_then(|name| name.to_str())
-        .map(str::to_ascii_lowercase)
-}
-
-fn classify_shell(shell: &str, flag: &str) -> Option<ApplyPatchShell> {
-    classify_shell_name(shell).and_then(|name| match name.as_str() {
-        "bash" | "zsh" | "sh" if flag == "-lc" => Some(ApplyPatchShell::Unix),
-        "pwsh" | "powershell" if flag.eq_ignore_ascii_case("-command") => {
-            Some(ApplyPatchShell::PowerShell)
-        }
-        "cmd" if flag.eq_ignore_ascii_case("/c") => Some(ApplyPatchShell::Cmd),
-        _ => None,
-    })
-}
-
-fn can_skip_flag(shell: &str, flag: &str) -> bool {
-    classify_shell_name(shell).is_some_and(|name| {
-        matches!(name.as_str(), "pwsh" | "powershell") && flag.eq_ignore_ascii_case("-noprofile")
-    })
-}
-
-fn parse_shell_script(argv: &[String]) -> Option<(ApplyPatchShell, &str)> {
-    match argv {
-        [shell, flag, script] => classify_shell(shell, flag).map(|shell_type| {
-            let script = script.as_str();
-            (shell_type, script)
-        }),
-        [shell, skip_flag, flag, script] if can_skip_flag(shell, skip_flag) => {
-            classify_shell(shell, flag).map(|shell_type| {
-                let script = script.as_str();
-                (shell_type, script)
-            })
-        }
-        _ => None,
-    }
-}
-
-fn extract_apply_patch_from_shell(
-    shell: ApplyPatchShell,
-    script: &str,
-) -> std::result::Result<(String, Option<String>), ExtractHeredocError> {
-    match shell {
-        ApplyPatchShell::Unix | ApplyPatchShell::PowerShell | ApplyPatchShell::Cmd => {
-            extract_apply_patch_from_bash(script)
-        }
-    }
-}
-
-pub fn maybe_parse_apply_patch(argv: &[String]) -> MaybeApplyPatch {
-    match argv {
-        // Direct invocation: apply_patch <patch>
-        [cmd, body] if APPLY_PATCH_COMMANDS.contains(&cmd.as_str()) => match parse_patch(body) {
-            Ok(source) => MaybeApplyPatch::Body(source),
-            Err(e) => MaybeApplyPatch::PatchParseError(e),
-        },
-        // Shell heredoc form: (optional `cd <path> &&`) apply_patch <<'EOF' ...
-        _ => match parse_shell_script(argv) {
-            Some((shell, script)) => match extract_apply_patch_from_shell(shell, script) {
-                Ok((body, workdir)) => match parse_patch(&body) {
-                    Ok(mut source) => {
-                        source.workdir = workdir;
-                        MaybeApplyPatch::Body(source)
-                    }
-                    Err(e) => MaybeApplyPatch::PatchParseError(e),
-                },
-                Err(ExtractHeredocError::CommandDidNotStartWithApplyPatch) => {
-                    MaybeApplyPatch::NotApplyPatch
-                }
-                Err(e) => MaybeApplyPatch::ShellParseError(e),
-            },
-            None => MaybeApplyPatch::NotApplyPatch,
-        },
-    }
-}
-
 #[derive(Debug, PartialEq)]
 pub enum ApplyPatchFileChange {
    Add {
@@ -269,256 +170,6 @@ impl ApplyPatchAction {
    }
 }

-/// cwd must be an absolute path so that we can resolve relative paths in the
-/// patch.
-pub fn maybe_parse_apply_patch_verified(argv: &[String], cwd: &Path) -> MaybeApplyPatchVerified {
-    // Detect a raw patch body passed directly as the command or as the body of a shell
-    // script. In these cases, report an explicit error rather than applying the patch.
-    if let [body] = argv
-        && parse_patch(body).is_ok()
-    {
-        return MaybeApplyPatchVerified::CorrectnessError(ApplyPatchError::ImplicitInvocation);
-    }
-    if let Some((_, script)) = parse_shell_script(argv)
-        && parse_patch(script).is_ok()
-    {
-        return MaybeApplyPatchVerified::CorrectnessError(ApplyPatchError::ImplicitInvocation);
-    }
-
-    match maybe_parse_apply_patch(argv) {
-        MaybeApplyPatch::Body(ApplyPatchArgs {
-            patch,
-            hunks,
-            workdir,
-        }) => {
-            let effective_cwd = workdir
-                .as_ref()
-                .map(|dir| {
-                    let path = Path::new(dir);
-                    if path.is_absolute() {
-                        path.to_path_buf()
-                    } else {
-                        cwd.join(path)
-                    }
-                })
-                .unwrap_or_else(|| cwd.to_path_buf());
-            let mut changes = HashMap::new();
-            for hunk in hunks {
-                let path = hunk.resolve_path(&effective_cwd);
-                match hunk {
-                    Hunk::AddFile { contents, .. } => {
-                        changes.insert(path, ApplyPatchFileChange::Add { content: contents });
-                    }
-                    Hunk::DeleteFile { .. } => {
-                        let content = match std::fs::read_to_string(&path) {
-                            Ok(content) => content,
-                            Err(e) => {
-                                return MaybeApplyPatchVerified::CorrectnessError(
-                                    ApplyPatchError::IoError(IoError {
-                                        context: format!("Failed to read {}", path.display()),
-                                        source: e,
-                                    }),
-                                );
-                            }
-                        };
-                        changes.insert(path, ApplyPatchFileChange::Delete { content });
-                    }
-                    Hunk::UpdateFile {
-                        move_path, chunks, ..
-                    } => {
-                        let ApplyPatchFileUpdate {
-                            unified_diff,
-                            content: contents,
-                        } = match unified_diff_from_chunks(&path, &chunks) {
-                            Ok(diff) => diff,
-                            Err(e) => {
-                                return MaybeApplyPatchVerified::CorrectnessError(e);
-                            }
-                        };
-                        changes.insert(
-                            path,
-                            ApplyPatchFileChange::Update {
-                                unified_diff,
-                                move_path: move_path.map(|p| effective_cwd.join(p)),
-                                new_content: contents,
-                            },
-                        );
-                    }
-                }
-            }
-            MaybeApplyPatchVerified::Body(ApplyPatchAction {
-                changes,
-                patch,
-                cwd: effective_cwd,
-            })
-        }
-        MaybeApplyPatch::ShellParseError(e) => MaybeApplyPatchVerified::ShellParseError(e),
-        MaybeApplyPatch::PatchParseError(e) => MaybeApplyPatchVerified::CorrectnessError(e.into()),
-        MaybeApplyPatch::NotApplyPatch => MaybeApplyPatchVerified::NotApplyPatch,
-    }
-}
-
-/// Extract the heredoc body (and optional `cd` workdir) from a `bash -lc` script
-/// that invokes the apply_patch tool using a heredoc.
-///
-/// Supported top‑level forms (must be the only top‑level statement):
-/// - `apply_patch <<'EOF'\n...\nEOF`
-/// - `cd <path> && apply_patch <<'EOF'\n...\nEOF`
-///
-/// Notes about matching:
-/// - Parsed with Tree‑sitter Bash and a strict query that uses anchors so the
-///   heredoc‑redirected statement is the only top‑level statement.
-/// - The connector between `cd` and `apply_patch` must be `&&` (not `|` or `||`).
-/// - Exactly one positional `word` argument is allowed for `cd` (no flags, no quoted
-///   strings, no second argument).
-/// - The apply command is validated in‑query via `#any-of?` to allow `apply_patch`
-///   or `applypatch`.
-/// - Preceding or trailing commands (e.g., `echo ...;` or `... && echo done`) do not match.
-///
-/// Returns `(heredoc_body, Some(path))` when the `cd` variant matches, or
-/// `(heredoc_body, None)` for the direct form. Errors are returned if the script
-/// cannot be parsed or does not match the allowed patterns.
-fn extract_apply_patch_from_bash(
-    src: &str,
-) -> std::result::Result<(String, Option<String>), ExtractHeredocError> {
-    // This function uses a Tree-sitter query to recognize one of two
-    // whole-script forms, each expressed as a single top-level statement:
-    //
-    // 1. apply_patch <<'EOF'\n...\nEOF
-    // 2. cd <path> && apply_patch <<'EOF'\n...\nEOF
-    //
-    // Key ideas when reading the query:
-    // - dots (`.`) between named nodes enforces adjacency among named children and
-    //   anchor to the start/end of the expression.
-    // - we match a single redirected_statement directly under program with leading
-    //   and trailing anchors (`.`). This ensures it is the only top-level statement
-    //   (so prefixes like `echo ...;` or suffixes like `... && echo done` do not match).
-    //
-    // Overall, we want to be conservative and only match the intended forms, as other
-    // forms are likely to be model errors, or incorrectly interpreted by later code.
-    //
-    // If you're editing this query, it's helpful to start by creating a debugging binary
-    // which will let you see the AST of an arbitrary bash script passed in, and optionally
-    // also run an arbitrary query against the AST. This is useful for understanding
-    // how tree-sitter parses the script and whether the query syntax is correct. Be sure
-    // to test both positive and negative cases.
-    static APPLY_PATCH_QUERY: LazyLock<Query> = LazyLock::new(|| {
-        let language = BASH.into();
-        #[expect(clippy::expect_used)]
-        Query::new(
-            &language,
-            r#"
-            (
-              program
-                . (redirected_statement
-                    body: (command
-                            name: (command_name (word) @apply_name) .)
-                    (#any-of? @apply_name "apply_patch" "applypatch")
-                    redirect: (heredoc_redirect
-                                . (heredoc_start)
-                                . (heredoc_body) @heredoc
-                                . (heredoc_end)
-                                .))
-                .)
-
-            (
-              program
-                . (redirected_statement
-                    body: (list
-                            . (command
-                                name: (command_name (word) @cd_name) .
-                                argument: [
-                                  (word) @cd_path
-                                  (string (string_content) @cd_path)
-                                  (raw_string) @cd_raw_string
-                                ] .)
-                            "&&"
-                            . (command
-                                name: (command_name (word) @apply_name))
-                            .)
-                    (#eq? @cd_name "cd")
-                    (#any-of? @apply_name "apply_patch" "applypatch")
-                    redirect: (heredoc_redirect
-                                . (heredoc_start)
-                                . (heredoc_body) @heredoc
-                                . (heredoc_end)
-                                .))
-                .)
-            "#,
-        )
-        .expect("valid bash query")
-    });
-
-    let lang = BASH.into();
-    let mut parser = Parser::new();
-    parser
-        .set_language(&lang)
-        .map_err(ExtractHeredocError::FailedToLoadBashGrammar)?;
-    let tree = parser
-        .parse(src, None)
-        .ok_or(ExtractHeredocError::FailedToParsePatchIntoAst)?;
-
-    let bytes = src.as_bytes();
-    let root = tree.root_node();
-
-    let mut cursor = QueryCursor::new();
-    let mut matches = cursor.matches(&APPLY_PATCH_QUERY, root, bytes);
-    while let Some(m) = matches.next() {
-        let mut heredoc_text: Option<String> = None;
-        let mut cd_path: Option<String> = None;
-
-        for capture in m.captures.iter() {
-            let name = APPLY_PATCH_QUERY.capture_names()[capture.index as usize];
-            match name {
-                "heredoc" => {
-                    let text = capture
-                        .node
-                        .utf8_text(bytes)
-                        .map_err(ExtractHeredocError::HeredocNotUtf8)?
-                        .trim_end_matches('\n')
-                        .to_string();
-                    heredoc_text = Some(text);
-                }
-                "cd_path" => {
-                    let text = capture
-                        .node
-                        .utf8_text(bytes)
-                        .map_err(ExtractHeredocError::HeredocNotUtf8)?
-                        .to_string();
-                    cd_path = Some(text);
-                }
-                "cd_raw_string" => {
-                    let raw = capture
-                        .node
-                        .utf8_text(bytes)
-                        .map_err(ExtractHeredocError::HeredocNotUtf8)?;
-                    let trimmed = raw
-                        .strip_prefix('\'')
-                        .and_then(|s| s.strip_suffix('\''))
-                        .unwrap_or(raw);
-                    cd_path = Some(trimmed.to_string());
-                }
-                _ => {}
-            }
-        }
-
-        if let Some(heredoc) = heredoc_text {
-            return Ok((heredoc, cd_path));
-        }
-    }
-
-    Err(ExtractHeredocError::CommandDidNotStartWithApplyPatch)
-}
-
-#[derive(Debug, PartialEq)]
-pub enum ExtractHeredocError {
-    CommandDidNotStartWithApplyPatch,
-    FailedToLoadBashGrammar(LanguageError),
-    HeredocNotUtf8(Utf8Error),
-    FailedToParsePatchIntoAst,
-    FailedToFindHeredocBody,
-}
-
 /// Applies the patch and prints the result to stdout/stderr.
 pub fn apply_patch(
    patch: &str,
@@ -894,7 +545,6 @@ pub fn print_summary(
 #[cfg(test)]
 mod tests {
    use super::*;
-    use assert_matches::assert_matches;
    use pretty_assertions::assert_eq;
    use std::fs;
    use std::string::ToString;
@@ -905,263 +555,6 @@ mod tests {
        format!("*** Begin Patch\n{body}\n*** End Patch")
    }

-    fn strs_to_strings(strs: &[&str]) -> Vec<String> {
-        strs.iter().map(ToString::to_string).collect()
-    }
-
-    // Test helpers to reduce repetition when building bash -lc heredoc scripts
-    fn args_bash(script: &str) -> Vec<String> {
-        strs_to_strings(&["bash", "-lc", script])
-    }
-
-    fn args_powershell(script: &str) -> Vec<String> {
-        strs_to_strings(&["powershell.exe", "-Command", script])
-    }
-
-    fn args_powershell_no_profile(script: &str) -> Vec<String> {
-        strs_to_strings(&["powershell.exe", "-NoProfile", "-Command", script])
-    }
-
-    fn args_pwsh(script: &str) -> Vec<String> {
-        strs_to_strings(&["pwsh", "-NoProfile", "-Command", script])
-    }
-
-    fn args_cmd(script: &str) -> Vec<String> {
-        strs_to_strings(&["cmd.exe", "/c", script])
-    }
-
-    fn heredoc_script(prefix: &str) -> String {
-        format!(
-            "{prefix}apply_patch <<'PATCH'\n*** Begin Patch\n*** Add File: foo\n+hi\n*** End Patch\nPATCH"
-        )
-    }
-
-    fn heredoc_script_ps(prefix: &str, suffix: &str) -> String {
-        format!(
-            "{prefix}apply_patch <<'PATCH'\n*** Begin Patch\n*** Add File: foo\n+hi\n*** End Patch\nPATCH{suffix}"
-        )
-    }
-
-    fn expected_single_add() -> Vec<Hunk> {
-        vec![Hunk::AddFile {
-            path: PathBuf::from("foo"),
-            contents: "hi\n".to_string(),
-        }]
-    }
-
-    fn assert_match_args(args: Vec<String>, expected_workdir: Option<&str>) {
-        match maybe_parse_apply_patch(&args) {
-            MaybeApplyPatch::Body(ApplyPatchArgs { hunks, workdir, .. }) => {
-                assert_eq!(workdir.as_deref(), expected_workdir);
-                assert_eq!(hunks, expected_single_add());
-            }
-            result => panic!("expected MaybeApplyPatch::Body got {result:?}"),
-        }
-    }
-
-    fn assert_match(script: &str, expected_workdir: Option<&str>) {
-        let args = args_bash(script);
-        assert_match_args(args, expected_workdir);
-    }
-
-    fn assert_not_match(script: &str) {
-        let args = args_bash(script);
-        assert_matches!(
-            maybe_parse_apply_patch(&args),
-            MaybeApplyPatch::NotApplyPatch
-        );
-    }
-
-    #[test]
-    fn test_implicit_patch_single_arg_is_error() {
-        let patch = "*** Begin Patch\n*** Add File: foo\n+hi\n*** End Patch".to_string();
-        let args = vec![patch];
-        let dir = tempdir().unwrap();
-        assert_matches!(
-            maybe_parse_apply_patch_verified(&args, dir.path()),
-            MaybeApplyPatchVerified::CorrectnessError(ApplyPatchError::ImplicitInvocation)
-        );
-    }
-
-    #[test]
-    fn test_implicit_patch_bash_script_is_error() {
-        let script = "*** Begin Patch\n*** Add File: foo\n+hi\n*** End Patch";
-        let args = args_bash(script);
-        let dir = tempdir().unwrap();
-        assert_matches!(
-            maybe_parse_apply_patch_verified(&args, dir.path()),
-            MaybeApplyPatchVerified::CorrectnessError(ApplyPatchError::ImplicitInvocation)
-        );
-    }
-
-    #[test]
-    fn test_literal() {
-        let args = strs_to_strings(&[
-            "apply_patch",
-            r#"*** Begin Patch
-*** Add File: foo
-+hi
-*** End Patch
-"#,
-        ]);
-
-        match maybe_parse_apply_patch(&args) {
-            MaybeApplyPatch::Body(ApplyPatchArgs { hunks, .. }) => {
-                assert_eq!(
-                    hunks,
-                    vec![Hunk::AddFile {
-                        path: PathBuf::from("foo"),
-                        contents: "hi\n".to_string()
-                    }]
-                );
-            }
-            result => panic!("expected MaybeApplyPatch::Body got {result:?}"),
-        }
-    }
-
-    #[test]
-    fn test_literal_applypatch() {
-        let args = strs_to_strings(&[
-            "applypatch",
-            r#"*** Begin Patch
-*** Add File: foo
-+hi
-*** End Patch
-"#,
-        ]);
-
-        match maybe_parse_apply_patch(&args) {
-            MaybeApplyPatch::Body(ApplyPatchArgs { hunks, .. }) => {
-                assert_eq!(
-                    hunks,
-                    vec![Hunk::AddFile {
-                        path: PathBuf::from("foo"),
-                        contents: "hi\n".to_string()
-                    }]
-                );
-            }
-            result => panic!("expected MaybeApplyPatch::Body got {result:?}"),
-        }
-    }
-
-    #[test]
-    fn test_heredoc() {
-        assert_match(&heredoc_script(""), None);
-    }
-
-    #[test]
-    fn test_heredoc_applypatch() {
-        let args = strs_to_strings(&[
-            "bash",
-            "-lc",
-            r#"applypatch <<'PATCH'
-*** Begin Patch
-*** Add File: foo
-+hi
-*** End Patch
-PATCH"#,
-        ]);
-
-        match maybe_parse_apply_patch(&args) {
-            MaybeApplyPatch::Body(ApplyPatchArgs { hunks, workdir, .. }) => {
-                assert_eq!(workdir, None);
-                assert_eq!(
-                    hunks,
-                    vec![Hunk::AddFile {
-                        path: PathBuf::from("foo"),
-                        contents: "hi\n".to_string()
-                    }]
-                );
-            }
-            result => panic!("expected MaybeApplyPatch::Body got {result:?}"),
-        }
-    }
-
-    #[test]
-    fn test_powershell_heredoc() {
-        let script = heredoc_script("");
-        assert_match_args(args_powershell(&script), None);
-    }
-    #[test]
-    fn test_powershell_heredoc_no_profile() {
-        let script = heredoc_script("");
-        assert_match_args(args_powershell_no_profile(&script), None);
-    }
-    #[test]
-    fn test_pwsh_heredoc() {
-        let script = heredoc_script("");
-        assert_match_args(args_pwsh(&script), None);
-    }
-
-    #[test]
-    fn test_cmd_heredoc_with_cd() {
-        let script = heredoc_script("cd foo && ");
-        assert_match_args(args_cmd(&script), Some("foo"));
-    }
-
-    #[test]
-    fn test_heredoc_with_leading_cd() {
-        assert_match(&heredoc_script("cd foo && "), Some("foo"));
-    }
-
-    #[test]
-    fn test_cd_with_semicolon_is_ignored() {
-        assert_not_match(&heredoc_script("cd foo; "));
-    }
-
-    #[test]
-    fn test_cd_or_apply_patch_is_ignored() {
-        assert_not_match(&heredoc_script("cd bar || "));
-    }
-
-    #[test]
-    fn test_cd_pipe_apply_patch_is_ignored() {
-        assert_not_match(&heredoc_script("cd bar | "));
-    }
-
-    #[test]
-    fn test_cd_single_quoted_path_with_spaces() {
-        assert_match(&heredoc_script("cd 'foo bar' && "), Some("foo bar"));
-    }
-
-    #[test]
-    fn test_cd_double_quoted_path_with_spaces() {
-        assert_match(&heredoc_script("cd \"foo bar\" && "), Some("foo bar"));
-    }
-
-    #[test]
-    fn test_echo_and_apply_patch_is_ignored() {
-        assert_not_match(&heredoc_script("echo foo && "));
-    }
-
-    #[test]
-    fn test_apply_patch_with_arg_is_ignored() {
-        let script = "apply_patch foo <<'PATCH'\n*** Begin Patch\n*** Add File: foo\n+hi\n*** End Patch\nPATCH";
-        assert_not_match(script);
-    }
-
-    #[test]
-    fn test_double_cd_then_apply_patch_is_ignored() {
-        assert_not_match(&heredoc_script("cd foo && cd bar && "));
-    }
-
-    #[test]
-    fn test_cd_two_args_is_ignored() {
-        assert_not_match(&heredoc_script("cd foo bar && "));
-    }
-
-    #[test]
-    fn test_cd_then_apply_patch_then_extra_is_ignored() {
-        let script = heredoc_script_ps("cd bar && ", " && echo done");
-        assert_not_match(&script);
-    }
-
-    #[test]
-    fn test_echo_then_cd_and_apply_patch_is_ignored() {
-        // Ensure preceding commands before the `cd && apply_patch <<...` sequence do not match.
-        assert_not_match(&heredoc_script("echo foo; cd bar && "));
-    }
-
    #[test]
    fn test_add_file_hunk_creates_file_with_contents() {
        let dir = tempdir().unwrap();
@@ -1650,99 +1043,6 @@ g
        );
    }

-    #[test]
-    fn test_apply_patch_should_resolve_absolute_paths_in_cwd() {
-        let session_dir = tempdir().unwrap();
-        let relative_path = "source.txt";
-
-        // Note that we need this file to exist for the patch to be "verified"
-        // and parsed correctly.
-        let session_file_path = session_dir.path().join(relative_path);
-        fs::write(&session_file_path, "session directory content\n").unwrap();
-
-        let argv = vec![
-            "apply_patch".to_string(),
-            r#"*** Begin Patch
-*** Update File: source.txt
-@@
-session directory content
-+updated session directory content
-*** End Patch"#
-                .to_string(),
-        ];
-
-        let result = maybe_parse_apply_patch_verified(&argv, session_dir.path());
-
-        // Verify the patch contents - as otherwise we may have pulled contents
-        // from the wrong file (as we're using relative paths)
-        assert_eq!(
-            result,
-            MaybeApplyPatchVerified::Body(ApplyPatchAction {
-                changes: HashMap::from([(
-                    session_dir.path().join(relative_path),
-                    ApplyPatchFileChange::Update {
-                        unified_diff: r#"@@ -1 +1 @@
-session directory content
-+updated session directory content
-"#
-                        .to_string(),
-                        move_path: None,
-                        new_content: "updated session directory content\n".to_string(),
-                    },
-                )]),
-                patch: argv[1].clone(),
-                cwd: session_dir.path().to_path_buf(),
-            })
-        );
-    }
-
-    #[test]
-    fn test_apply_patch_resolves_move_path_with_effective_cwd() {
-        let session_dir = tempdir().unwrap();
-        let worktree_rel = "alt";
-        let worktree_dir = session_dir.path().join(worktree_rel);
-        fs::create_dir_all(&worktree_dir).unwrap();
-
-        let source_name = "old.txt";
-        let dest_name = "renamed.txt";
-        let source_path = worktree_dir.join(source_name);
-        fs::write(&source_path, "before\n").unwrap();
-
-        let patch = wrap_patch(&format!(
-            r#"*** Update File: {source_name}
-*** Move to: {dest_name}
-@@
-before
-+after"#
-        ));
-
-        let shell_script = format!("cd {worktree_rel} && apply_patch <<'PATCH'\n{patch}\nPATCH");
-        let argv = vec!["bash".into(), "-lc".into(), shell_script];
-
-        let result = maybe_parse_apply_patch_verified(&argv, session_dir.path());
-        let action = match result {
-            MaybeApplyPatchVerified::Body(action) => action,
-            other => panic!("expected verified body, got {other:?}"),
-        };
-
-        assert_eq!(action.cwd, worktree_dir);
-
-        let change = action
-            .changes()
-            .get(&worktree_dir.join(source_name))
-            .expect("source file change present");
-
-        match change {
-            ApplyPatchFileChange::Update { move_path, .. } => {
-                assert_eq!(
-                    move_path.as_deref(),
-                    Some(worktree_dir.join(dest_name).as_path())
-                );
-            }
-            other => panic!("expected update change, got {other:?}"),
-        }
-    }
-
    #[test]
    fn test_apply_patch_fails_on_write_error() {
        let dir = tempdir().unwrap();
--- a/codex-rs/apply-patch/tests/fixtures/scenarios/.gitattributes
+++ b/codex-rs/apply-patch/tests/fixtures/scenarios/.gitattributes
@@ -0,0 +1 @@
+** text eol=lf
--- a/codex-rs/apply-patch/tests/fixtures/scenarios/001_add_file/expected/bar.md
+++ b/codex-rs/apply-patch/tests/fixtures/scenarios/001_add_file/expected/bar.md
@@ -0,0 +1 @@
+This is a new file
--- a/codex-rs/apply-patch/tests/fixtures/scenarios/001_add_file/patch.txt
+++ b/codex-rs/apply-patch/tests/fixtures/scenarios/001_add_file/patch.txt
@@ -0,0 +1,4 @@
+*** Begin Patch
+*** Add File: bar.md
+This is a new file
+*** End Patch
--- a/codex-rs/apply-patch/tests/fixtures/scenarios/002_multiple_operations/expected/modify.txt
+++ b/codex-rs/apply-patch/tests/fixtures/scenarios/002_multiple_operations/expected/modify.txt
@@ -0,0 +1,2 @@
+line1
+changed
--- a/codex-rs/apply-patch/tests/fixtures/scenarios/002_multiple_operations/expected/nested/new.txt
+++ b/codex-rs/apply-patch/tests/fixtures/scenarios/002_multiple_operations/expected/nested/new.txt
@@ -0,0 +1 @@
+created
--- a/codex-rs/apply-patch/tests/fixtures/scenarios/002_multiple_operations/input/delete.txt
+++ b/codex-rs/apply-patch/tests/fixtures/scenarios/002_multiple_operations/input/delete.txt
@@ -0,0 +1 @@
+obsolete
--- a/codex-rs/apply-patch/tests/fixtures/scenarios/002_multiple_operations/input/modify.txt
+++ b/codex-rs/apply-patch/tests/fixtures/scenarios/002_multiple_operations/input/modify.txt
@@ -0,0 +1,2 @@
+line1
+line2
--- a/codex-rs/apply-patch/tests/fixtures/scenarios/002_multiple_operations/patch.txt
+++ b/codex-rs/apply-patch/tests/fixtures/scenarios/002_multiple_operations/patch.txt
@@ -0,0 +1,9 @@
+*** Begin Patch
+*** Add File: nested/new.txt
+created
+*** Delete File: delete.txt
+*** Update File: modify.txt
+@@
+-line2
+changed
+*** End Patch
--- a/codex-rs/apply-patch/tests/fixtures/scenarios/003_multiple_chunks/expected/multi.txt
+++ b/codex-rs/apply-patch/tests/fixtures/scenarios/003_multiple_chunks/expected/multi.txt
@@ -0,0 +1,4 @@
+line1
+changed2
+line3
+changed4
--- a/codex-rs/apply-patch/tests/fixtures/scenarios/003_multiple_chunks/input/multi.txt
+++ b/codex-rs/apply-patch/tests/fixtures/scenarios/003_multiple_chunks/input/multi.txt
@@ -0,0 +1,4 @@
+line1
+line2
+line3
+line4
--- a/codex-rs/apply-patch/tests/fixtures/scenarios/003_multiple_chunks/patch.txt
+++ b/codex-rs/apply-patch/tests/fixtures/scenarios/003_multiple_chunks/patch.txt
@@ -0,0 +1,9 @@
+*** Begin Patch
+*** Update File: multi.txt
+@@
+-line2
+changed2
+@@
+-line4
+changed4
+*** End Patch
--- a/codex-rs/apply-patch/tests/fixtures/scenarios/004_move_to_new_directory/expected/old/other.txt
+++ b/codex-rs/apply-patch/tests/fixtures/scenarios/004_move_to_new_directory/expected/old/other.txt
@@ -0,0 +1 @@
+unrelated file
--- a/codex-rs/apply-patch/tests/fixtures/scenarios/004_move_to_new_directory/expected/renamed/dir/name.txt
+++ b/codex-rs/apply-patch/tests/fixtures/scenarios/004_move_to_new_directory/expected/renamed/dir/name.txt
@@ -0,0 +1 @@
+new content
--- a/codex-rs/apply-patch/tests/fixtures/scenarios/004_move_to_new_directory/input/old/name.txt
+++ b/codex-rs/apply-patch/tests/fixtures/scenarios/004_move_to_new_directory/input/old/name.txt
@@ -0,0 +1 @@
+old content
--- a/codex-rs/apply-patch/tests/fixtures/scenarios/004_move_to_new_directory/input/old/other.txt
+++ b/codex-rs/apply-patch/tests/fixtures/scenarios/004_move_to_new_directory/input/old/other.txt
@@ -0,0 +1 @@
+unrelated file
--- a/codex-rs/apply-patch/tests/fixtures/scenarios/004_move_to_new_directory/patch.txt
+++ b/codex-rs/apply-patch/tests/fixtures/scenarios/004_move_to_new_directory/patch.txt
@@ -0,0 +1,7 @@
+*** Begin Patch
+*** Update File: old/name.txt
+*** Move to: renamed/dir/name.txt
+@@
+-old content
+new content
+*** End Patch
--- a/codex-rs/apply-patch/tests/fixtures/scenarios/005_rejects_empty_patch/expected/foo.txt
+++ b/codex-rs/apply-patch/tests/fixtures/scenarios/005_rejects_empty_patch/expected/foo.txt
@@ -0,0 +1 @@
+stable
--- a/codex-rs/apply-patch/tests/fixtures/scenarios/005_rejects_empty_patch/input/foo.txt
+++ b/codex-rs/apply-patch/tests/fixtures/scenarios/005_rejects_empty_patch/input/foo.txt
@@ -0,0 +1 @@
+stable
--- a/codex-rs/apply-patch/tests/fixtures/scenarios/005_rejects_empty_patch/patch.txt
+++ b/codex-rs/apply-patch/tests/fixtures/scenarios/005_rejects_empty_patch/patch.txt
@@ -0,0 +1,2 @@
+*** Begin Patch
+*** End Patch
--- a/codex-rs/apply-patch/tests/fixtures/scenarios/006_rejects_missing_context/expected/modify.txt
+++ b/codex-rs/apply-patch/tests/fixtures/scenarios/006_rejects_missing_context/expected/modify.txt
@@ -0,0 +1,2 @@
+line1
+line2
--- a/codex-rs/apply-patch/tests/fixtures/scenarios/006_rejects_missing_context/input/modify.txt
+++ b/codex-rs/apply-patch/tests/fixtures/scenarios/006_rejects_missing_context/input/modify.txt
@@ -0,0 +1,2 @@
+line1
+line2
--- a/codex-rs/apply-patch/tests/fixtures/scenarios/006_rejects_missing_context/patch.txt
+++ b/codex-rs/apply-patch/tests/fixtures/scenarios/006_rejects_missing_context/patch.txt
@@ -0,0 +1,6 @@
+*** Begin Patch
+*** Update File: modify.txt
+@@
+-missing
+changed
+*** End Patch
--- a/codex-rs/apply-patch/tests/fixtures/scenarios/007_rejects_missing_file_delete/expected/foo.txt
+++ b/codex-rs/apply-patch/tests/fixtures/scenarios/007_rejects_missing_file_delete/expected/foo.txt
@@ -0,0 +1 @@
+stable
--- a/codex-rs/apply-patch/tests/fixtures/scenarios/007_rejects_missing_file_delete/input/foo.txt
+++ b/codex-rs/apply-patch/tests/fixtures/scenarios/007_rejects_missing_file_delete/input/foo.txt
@@ -0,0 +1 @@
+stable
--- a/codex-rs/apply-patch/tests/fixtures/scenarios/007_rejects_missing_file_delete/patch.txt
+++ b/codex-rs/apply-patch/tests/fixtures/scenarios/007_rejects_missing_file_delete/patch.txt
@@ -0,0 +1,3 @@
+*** Begin Patch
+*** Delete File: missing.txt
+*** End Patch
--- a/codex-rs/apply-patch/tests/fixtures/scenarios/008_rejects_empty_update_hunk/expected/foo.txt
+++ b/codex-rs/apply-patch/tests/fixtures/scenarios/008_rejects_empty_update_hunk/expected/foo.txt
@@ -0,0 +1 @@
+stable
--- a/codex-rs/apply-patch/tests/fixtures/scenarios/008_rejects_empty_update_hunk/input/foo.txt
+++ b/codex-rs/apply-patch/tests/fixtures/scenarios/008_rejects_empty_update_hunk/input/foo.txt
@@ -0,0 +1 @@
+stable
--- a/codex-rs/apply-patch/tests/fixtures/scenarios/008_rejects_empty_update_hunk/patch.txt
+++ b/codex-rs/apply-patch/tests/fixtures/scenarios/008_rejects_empty_update_hunk/patch.txt
@@ -0,0 +1,3 @@
+*** Begin Patch
+*** Update File: foo.txt
+*** End Patch
--- a/codex-rs/apply-patch/tests/fixtures/scenarios/009_requires_existing_file_for_update/expected/foo.txt
+++ b/codex-rs/apply-patch/tests/fixtures/scenarios/009_requires_existing_file_for_update/expected/foo.txt
@@ -0,0 +1 @@
+stable
--- a/codex-rs/apply-patch/tests/fixtures/scenarios/009_requires_existing_file_for_update/input/foo.txt
+++ b/codex-rs/apply-patch/tests/fixtures/scenarios/009_requires_existing_file_for_update/input/foo.txt
@@ -0,0 +1 @@
+stable
--- a/codex-rs/apply-patch/tests/fixtures/scenarios/009_requires_existing_file_for_update/patch.txt
+++ b/codex-rs/apply-patch/tests/fixtures/scenarios/009_requires_existing_file_for_update/patch.txt
@@ -0,0 +1,6 @@
+*** Begin Patch
+*** Update File: missing.txt
+@@
+-old
+new
+*** End Patch
--- a/codex-rs/apply-patch/tests/fixtures/scenarios/010_move_overwrites_existing_destination/expected/old/other.txt
+++ b/codex-rs/apply-patch/tests/fixtures/scenarios/010_move_overwrites_existing_destination/expected/old/other.txt
@@ -0,0 +1 @@
+unrelated file
--- a/codex-rs/apply-patch/tests/fixtures/scenarios/010_move_overwrites_existing_destination/expected/renamed/dir/name.txt
+++ b/codex-rs/apply-patch/tests/fixtures/scenarios/010_move_overwrites_existing_destination/expected/renamed/dir/name.txt
@@ -0,0 +1 @@
+new
--- a/codex-rs/apply-patch/tests/fixtures/scenarios/010_move_overwrites_existing_destination/input/old/name.txt
+++ b/codex-rs/apply-patch/tests/fixtures/scenarios/010_move_overwrites_existing_destination/input/old/name.txt
@@ -0,0 +1 @@
+from
--- a/codex-rs/apply-patch/tests/fixtures/scenarios/010_move_overwrites_existing_destination/input/old/other.txt
+++ b/codex-rs/apply-patch/tests/fixtures/scenarios/010_move_overwrites_existing_destination/input/old/other.txt
@@ -0,0 +1 @@
+unrelated file
--- a/codex-rs/apply-patch/tests/fixtures/scenarios/010_move_overwrites_existing_destination/input/renamed/dir/name.txt
+++ b/codex-rs/apply-patch/tests/fixtures/scenarios/010_move_overwrites_existing_destination/input/renamed/dir/name.txt
@@ -0,0 +1 @@
+existing
--- a/codex-rs/apply-patch/tests/fixtures/scenarios/010_move_overwrites_existing_destination/patch.txt
+++ b/codex-rs/apply-patch/tests/fixtures/scenarios/010_move_overwrites_existing_destination/patch.txt
@@ -0,0 +1,7 @@
+*** Begin Patch
+*** Update File: old/name.txt
+*** Move to: renamed/dir/name.txt
+@@
+-from
+new
+*** End Patch
--- a/codex-rs/apply-patch/tests/fixtures/scenarios/011_add_overwrites_existing_file/expected/duplicate.txt
+++ b/codex-rs/apply-patch/tests/fixtures/scenarios/011_add_overwrites_existing_file/expected/duplicate.txt
@@ -0,0 +1 @@
+new content
--- a/codex-rs/apply-patch/tests/fixtures/scenarios/011_add_overwrites_existing_file/input/duplicate.txt
+++ b/codex-rs/apply-patch/tests/fixtures/scenarios/011_add_overwrites_existing_file/input/duplicate.txt
@@ -0,0 +1 @@
+old content
--- a/codex-rs/apply-patch/tests/fixtures/scenarios/011_add_overwrites_existing_file/patch.txt
+++ b/codex-rs/apply-patch/tests/fixtures/scenarios/011_add_overwrites_existing_file/patch.txt
@@ -0,0 +1,4 @@
+*** Begin Patch
+*** Add File: duplicate.txt
+new content
+*** End Patch
--- a/codex-rs/apply-patch/tests/fixtures/scenarios/012_delete_directory_fails/expected/dir/foo.txt
+++ b/codex-rs/apply-patch/tests/fixtures/scenarios/012_delete_directory_fails/expected/dir/foo.txt
@@ -0,0 +1 @@
+stable
--- a/codex-rs/apply-patch/tests/fixtures/scenarios/012_delete_directory_fails/input/dir/foo.txt
+++ b/codex-rs/apply-patch/tests/fixtures/scenarios/012_delete_directory_fails/input/dir/foo.txt
@@ -0,0 +1 @@
+stable
--- a/codex-rs/apply-patch/tests/fixtures/scenarios/012_delete_directory_fails/patch.txt
+++ b/codex-rs/apply-patch/tests/fixtures/scenarios/012_delete_directory_fails/patch.txt
@@ -0,0 +1,3 @@
+*** Begin Patch
+*** Delete File: dir
+*** End Patch
--- a/codex-rs/apply-patch/tests/fixtures/scenarios/013_rejects_invalid_hunk_header/expected/foo.txt
+++ b/codex-rs/apply-patch/tests/fixtures/scenarios/013_rejects_invalid_hunk_header/expected/foo.txt
@@ -0,0 +1 @@
+stable
--- a/codex-rs/apply-patch/tests/fixtures/scenarios/013_rejects_invalid_hunk_header/input/foo.txt
+++ b/codex-rs/apply-patch/tests/fixtures/scenarios/013_rejects_invalid_hunk_header/input/foo.txt
@@ -0,0 +1 @@
+stable
--- a/codex-rs/apply-patch/tests/fixtures/scenarios/013_rejects_invalid_hunk_header/patch.txt
+++ b/codex-rs/apply-patch/tests/fixtures/scenarios/013_rejects_invalid_hunk_header/patch.txt
@@ -0,0 +1,3 @@
+*** Begin Patch
+*** Frobnicate File: foo
+*** End Patch
--- a/codex-rs/apply-patch/tests/fixtures/scenarios/014_update_file_appends_trailing_newline/expected/no_newline.txt
+++ b/codex-rs/apply-patch/tests/fixtures/scenarios/014_update_file_appends_trailing_newline/expected/no_newline.txt
@@ -0,0 +1,2 @@
+first line
+second line
--- a/codex-rs/apply-patch/tests/fixtures/scenarios/014_update_file_appends_trailing_newline/input/no_newline.txt
+++ b/codex-rs/apply-patch/tests/fixtures/scenarios/014_update_file_appends_trailing_newline/input/no_newline.txt
@@ -0,0 +1 @@
+no newline at end
--- a/codex-rs/apply-patch/tests/fixtures/scenarios/014_update_file_appends_trailing_newline/patch.txt
+++ b/codex-rs/apply-patch/tests/fixtures/scenarios/014_update_file_appends_trailing_newline/patch.txt
@@ -0,0 +1,7 @@
+*** Begin Patch
+*** Update File: no_newline.txt
+@@
+-no newline at end
+first line
+second line
+*** End Patch
--- a/Show More
+++ b/Show More