Update macOS tab title when running TUI

update composer + user message styling (#4240 )
Changes: - the composer and user messages now have a colored background that stretches the entire width of the terminal. - the prompt character was changed from a cyan `▌` to a bold `›`. - the "working" shimmer now follows the "dark gray" color of the terminal, better matching the terminal's color scheme | Terminal + Background | Screenshot | |------------------------------|------------| | iTerm with dark bg | <img width="810" height="641" alt="Screenshot 2025-09-25 at 11 44 52 AM" src="https://github.com/user-attachments/assets/1317e579-64a9-4785-93e6-98b0258f5d92" /> | | iTerm with light bg | <img width="845" height="540" alt="Screenshot 2025-09-25 at 11 46 29 AM" src="https://github.com/user-attachments/assets/e671d490-c747-4460-af0b-3f8d7f7a6b8e" /> | | iTerm with color bg | <img width="825" height="564" alt="Screenshot 2025-09-25 at 11 47 12 AM" src="https://github.com/user-attachments/assets/141cda1b-1164-41d5-87da-3be11e6a3063" /> | | Terminal.app with dark bg | <img width="577" height="367" alt="Screenshot 2025-09-25 at 11 45 22 AM" src="https://github.com/user-attachments/assets/93fc4781-99f7-4ee7-9c8e-3db3cd854fe5" /> | | Terminal.app with light bg | <img width="577" height="367" alt="Screenshot 2025-09-25 at 11 46 04 AM" src="https://github.com/user-attachments/assets/19bf6a3c-91e0-447b-9667-b8033f512219" /> | | Terminal.app with color bg | <img width="577" height="367" alt="Screenshot 2025-09-25 at 11 45 50 AM" src="https://github.com/user-attachments/assets/dd7c4b5b-342e-4028-8140-f4e65752bd0b" /> |
2026-05-01 09:56:37 +00:00 · 2025-09-26 17:41:28 -07:00 · 2025-09-26 16:35:56 -07:00 · 2025-09-26 16:21:50 -07:00 · 2025-09-26 14:08:28 -07:00 · 2025-09-26 18:16:54 +00:00
219 changed files with 12863 additions and 4457 deletions
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -27,12 +27,26 @@ jobs:
      - name: Install dependencies
        run: pnpm install --frozen-lockfile

-      # Run all tasks using workspace filters
+      # build_npm_package.py requires DotSlash when staging releases.
+      - uses: facebook/install-dotslash@v2

-      - name: Ensure staging a release works.
+      - name: Stage npm package
        env:
          GH_TOKEN: ${{ github.token }}
-        run: ./codex-cli/scripts/stage_release.sh
+        run: |
+          set -euo pipefail
+          CODEX_VERSION=0.40.0
+          PACK_OUTPUT="${RUNNER_TEMP}/codex-npm.tgz"
+          python3 ./codex-cli/scripts/build_npm_package.py \
+            --release-version "$CODEX_VERSION" \
+            --pack-output "$PACK_OUTPUT"
+          echo "PACK_OUTPUT=$PACK_OUTPUT" >> "$GITHUB_ENV"
+
+      - name: Upload staged npm package artifact
+        uses: actions/upload-artifact@v4
+        with:
+          name: codex-npm-staging
+          path: ${{ env.PACK_OUTPUT }}

      - name: Ensure root README.md contains only ASCII and certain Unicode code points
        run: ./scripts/asciicheck.py README.md
--- a/.github/workflows/codespell.yml
+++ b/.github/workflows/codespell.yml
@@ -22,7 +22,7 @@ jobs:
      - name: Annotate locations with typos
        uses: codespell-project/codespell-problem-matcher@b80729f885d32f78a716c2f107b4db1025001c42 # v1
      - name: Codespell
-        uses: codespell-project/actions-codespell@406322ec52dd7b488e48c1c4b82e2a8b3a1bf630 # v2
+        uses: codespell-project/actions-codespell@406322ec52dd7b488e48c1c4b82e2a8b3a1bf630 # v2.1
        with:
          ignore_words_file: .codespellignore
          skip: frame*.txt
--- a/.github/workflows/rust-ci.yml
+++ b/.github/workflows/rust-ci.yml
@@ -57,7 +57,7 @@ jobs:
        working-directory: codex-rs
    steps:
      - uses: actions/checkout@v5
-      - uses: dtolnay/rust-toolchain@1.89
+      - uses: dtolnay/rust-toolchain@1.90
        with:
          components: rustfmt
      - name: cargo fmt
@@ -75,7 +75,7 @@ jobs:
        working-directory: codex-rs
    steps:
      - uses: actions/checkout@v5
-      - uses: dtolnay/rust-toolchain@1.89
+      - uses: dtolnay/rust-toolchain@1.90
      - uses: taiki-e/install-action@0c5db7f7f897c03b771660e91d065338615679f4 # v2
        with:
          tool: cargo-shear
@@ -143,7 +143,7 @@ jobs:

    steps:
      - uses: actions/checkout@v5
-      - uses: dtolnay/rust-toolchain@1.89
+      - uses: dtolnay/rust-toolchain@1.90
        with:
          targets: ${{ matrix.target }}
          components: clippy
--- a/.github/workflows/rust-release.yml
+++ b/.github/workflows/rust-release.yml
@@ -77,7 +77,7 @@ jobs:

    steps:
      - uses: actions/checkout@v5
-      - uses: dtolnay/rust-toolchain@1.89
+      - uses: dtolnay/rust-toolchain@1.90
        with:
          targets: ${{ matrix.target }}

@@ -173,6 +173,8 @@ jobs:
    outputs:
      version: ${{ steps.release_name.outputs.name }}
      tag: ${{ github.ref_name }}
+      should_publish_npm: ${{ steps.npm_publish_settings.outputs.should_publish }}
+      npm_tag: ${{ steps.npm_publish_settings.outputs.npm_tag }}

    steps:
      - name: Checkout repository
@@ -193,21 +195,37 @@ jobs:
          version="${GITHUB_REF_NAME#rust-v}"
          echo "name=${version}" >> $GITHUB_OUTPUT

+      - name: Determine npm publish settings
+        id: npm_publish_settings
+        env:
+          VERSION: ${{ steps.release_name.outputs.name }}
+        run: |
+          set -euo pipefail
+          version="${VERSION}"
+
+          if [[ "${version}" =~ ^[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
+            echo "should_publish=true" >> "$GITHUB_OUTPUT"
+            echo "npm_tag=" >> "$GITHUB_OUTPUT"
+          elif [[ "${version}" =~ ^[0-9]+\.[0-9]+\.[0-9]+-alpha\.[0-9]+$ ]]; then
+            echo "should_publish=true" >> "$GITHUB_OUTPUT"
+            echo "npm_tag=alpha" >> "$GITHUB_OUTPUT"
+          else
+            echo "should_publish=false" >> "$GITHUB_OUTPUT"
+            echo "npm_tag=" >> "$GITHUB_OUTPUT"
+          fi
+
+      # build_npm_package.py requires DotSlash when staging releases.
+      - uses: facebook/install-dotslash@v2
      - name: Stage npm package
        env:
          GH_TOKEN: ${{ github.token }}
        run: |
          set -euo pipefail
          TMP_DIR="${RUNNER_TEMP}/npm-stage"
-          python3 codex-cli/scripts/stage_rust_release.py \
+          ./codex-cli/scripts/build_npm_package.py \
            --release-version "${{ steps.release_name.outputs.name }}" \
-            --tmp "${TMP_DIR}"
-          mkdir -p dist/npm
-          # Produce an npm-ready tarball using `npm pack` and store it in dist/npm.
-          # We then rename it to a stable name used by our publishing script.
-          (cd "$TMP_DIR" && npm pack --pack-destination "${GITHUB_WORKSPACE}/dist/npm")
-          mv "${GITHUB_WORKSPACE}"/dist/npm/*.tgz \
-             "${GITHUB_WORKSPACE}/dist/npm/codex-npm-${{ steps.release_name.outputs.name }}.tgz"
+            --staging-dir "${TMP_DIR}" \
+            --pack-output "${GITHUB_WORKSPACE}/dist/npm/codex-npm-${{ steps.release_name.outputs.name }}.tgz"

      - name: Create GitHub Release
        uses: softprops/action-gh-release@v2
@@ -230,8 +248,8 @@ jobs:
  # July 31, 2025: https://github.blog/changelog/2025-07-31-npm-trusted-publishing-with-oidc-is-generally-available/
  # npm docs: https://docs.npmjs.com/trusted-publishers
  publish-npm:
-    # Skip this step for pre-releases (alpha/beta).
-    if: ${{ !contains(needs.release.outputs.version, '-') }}
+    # Publish to npm for stable releases and alpha pre-releases with numeric suffixes.
+    if: ${{ needs.release.outputs.should_publish_npm == 'true' }}
    name: publish-npm
    needs: release
    runs-on: ubuntu-latest
@@ -266,7 +284,17 @@ jobs:

      # No NODE_AUTH_TOKEN needed because we use OIDC.
      - name: Publish to npm
-        run: npm publish "${GITHUB_WORKSPACE}/dist/npm/codex-npm-${{ needs.release.outputs.version }}.tgz"
+        env:
+          VERSION: ${{ needs.release.outputs.version }}
+          NPM_TAG: ${{ needs.release.outputs.npm_tag }}
+        run: |
+          set -euo pipefail
+          tag_args=()
+          if [[ -n "${NPM_TAG}" ]]; then
+            tag_args+=(--tag "${NPM_TAG}")
+          fi
+
+          npm publish "${GITHUB_WORKSPACE}/dist/npm/codex-npm-${VERSION}.tgz" "${tag_args[@]}"

  update-branch:
    name: Update latest-alpha-cli branch
--- a/README.md
+++ b/README.md
@@ -1,4 +1,3 @@
-<h1 align="center">OpenAI Codex CLI</h1>

 <p align="center"><code>npm i -g @openai/codex</code><br />or <code>brew install codex</code></p>

@@ -102,4 +101,3 @@ Codex CLI supports a rich set of configuration options, with preferences stored
 ## License

 This repository is licensed under the [Apache-2.0 License](LICENSE).
-
--- a/codex-cli/.gitignore
+++ b/codex-cli/.gitignore
@@ -1,7 +1 @@
-# Added by ./scripts/install_native_deps.sh
-/bin/codex-aarch64-apple-darwin
-/bin/codex-aarch64-unknown-linux-musl
-/bin/codex-linux-sandbox-arm64
-/bin/codex-linux-sandbox-x64
-/bin/codex-x86_64-apple-darwin
-/bin/codex-x86_64-unknown-linux-musl
+/vendor/
--- a/codex-cli/bin/codex.js
+++ b/codex-cli/bin/codex.js
@@ -1,6 +1,7 @@
 #!/usr/bin/env node
 // Unified entry point for the Codex CLI.

+import { existsSync } from "fs";
 import path from "path";
 import { fileURLToPath } from "url";

@@ -40,10 +41,10 @@ switch (platform) {
  case "win32":
    switch (arch) {
      case "x64":
-        targetTriple = "x86_64-pc-windows-msvc.exe";
+        targetTriple = "x86_64-pc-windows-msvc";
        break;
      case "arm64":
-        targetTriple = "aarch64-pc-windows-msvc.exe";
+        targetTriple = "aarch64-pc-windows-msvc";
        break;
      default:
        break;
@@ -57,7 +58,10 @@ if (!targetTriple) {
  throw new Error(`Unsupported platform: ${platform} (${arch})`);
 }

-const binaryPath = path.join(__dirname, "..", "bin", `codex-${targetTriple}`);
+const vendorRoot = path.join(__dirname, "..", "vendor");
+const archRoot = path.join(vendorRoot, targetTriple);
+const codexBinaryName = process.platform === "win32" ? "codex.exe" : "codex";
+const binaryPath = path.join(archRoot, "codex", codexBinaryName);

 // Use an asynchronous spawn instead of spawnSync so that Node is able to
 // respond to signals (e.g. Ctrl-C / SIGINT) while the native binary is
@@ -66,23 +70,6 @@ const binaryPath = path.join(__dirname, "..", "bin", `codex-${targetTriple}`);
 // receives a fatal signal, both processes exit in a predictable manner.
 const { spawn } = await import("child_process");

-async function tryImport(moduleName) {
-  try {
-    // eslint-disable-next-line node/no-unsupported-features/es-syntax
-    return await import(moduleName);
-  } catch (err) {
-    return null;
-  }
-}
-
-async function resolveRgDir() {
-  const ripgrep = await tryImport("@vscode/ripgrep");
-  if (!ripgrep?.rgPath) {
-    return null;
-  }
-  return path.dirname(ripgrep.rgPath);
-}
-
 function getUpdatedPath(newDirs) {
  const pathSep = process.platform === "win32" ? ";" : ":";
  const existingPath = process.env.PATH || "";
@@ -94,9 +81,9 @@ function getUpdatedPath(newDirs) {
 }

 const additionalDirs = [];
-const rgDir = await resolveRgDir();
-if (rgDir) {
-  additionalDirs.push(rgDir);
+const pathDir = path.join(archRoot, "path");
+if (existsSync(pathDir)) {
+  additionalDirs.push(pathDir);
 }
 const updatedPath = getUpdatedPath(additionalDirs);

--- a/codex-cli/bin/rg
+++ b/codex-cli/bin/rg
@@ -0,0 +1,79 @@
+#!/usr/bin/env dotslash
+
+{
+  "name": "rg",
+  "platforms": {
+    "macos-aarch64": {
+      "size": 1787248,
+      "hash": "blake3",
+      "digest": "8d9942032585ea8ee805937634238d9aee7b210069f4703c88fbe568e26fb78a",
+      "format": "tar.gz",
+      "path": "ripgrep-14.1.1-aarch64-apple-darwin/rg",
+      "providers": [
+        {
+          "url": "https://github.com/BurntSushi/ripgrep/releases/download/14.1.1/ripgrep-14.1.1-aarch64-apple-darwin.tar.gz"
+        }
+      ]
+    },
+    "linux-aarch64": {
+      "size": 2047405,
+      "hash": "blake3",
+      "digest": "0b670b8fa0a3df2762af2fc82cc4932f684ca4c02dbd1260d4f3133fd4b2a515",
+      "format": "tar.gz",
+      "path": "ripgrep-14.1.1-aarch64-unknown-linux-gnu/rg",
+      "providers": [
+        {
+          "url": "https://github.com/BurntSushi/ripgrep/releases/download/14.1.1/ripgrep-14.1.1-aarch64-unknown-linux-gnu.tar.gz"
+        }
+      ]
+    },
+    "macos-x86_64": {
+      "size": 2082672,
+      "hash": "blake3",
+      "digest": "e9b862fc8da3127f92791f0ff6a799504154ca9d36c98bf3e60a81c6b1f7289e",
+      "format": "tar.gz",
+      "path": "ripgrep-14.1.1-x86_64-apple-darwin/rg",
+      "providers": [
+        {
+          "url": "https://github.com/BurntSushi/ripgrep/releases/download/14.1.1/ripgrep-14.1.1-x86_64-apple-darwin.tar.gz"
+        }
+      ]
+    },
+    "linux-x86_64": {
+      "size": 2566310,
+      "hash": "blake3",
+      "digest": "f73cca4e54d78c31f832c7f6e2c0b4db8b04fa3eaa747915727d570893dbee76",
+      "format": "tar.gz",
+      "path": "ripgrep-14.1.1-x86_64-unknown-linux-musl/rg",
+      "providers": [
+        {
+          "url": "https://github.com/BurntSushi/ripgrep/releases/download/14.1.1/ripgrep-14.1.1-x86_64-unknown-linux-musl.tar.gz"
+        }
+      ]
+    },
+    "windows-x86_64": {
+      "size": 2058893,
+      "hash": "blake3",
+      "digest": "a8ce1a6fed4f8093ee997e57f33254e94b2cd18e26358b09db599c89882eadbd",
+      "format": "zip",
+      "path": "ripgrep-14.1.1-x86_64-pc-windows-msvc/rg.exe",
+      "providers": [
+        {
+          "url": "https://github.com/BurntSushi/ripgrep/releases/download/14.1.1/ripgrep-14.1.1-x86_64-pc-windows-msvc.zip"
+        }
+      ]
+    },
+    "windows-aarch64": {
+      "size": 1667740,
+      "hash": "blake3",
+      "digest": "47b971a8c4fca1d23a4e7c19bd4d88465ebc395598458133139406d3bf85f3fa",
+      "format": "zip",
+      "path": "rg.exe",
+      "providers": [
+        {
+          "url": "https://github.com/microsoft/ripgrep-prebuilt/releases/download/v13.0.0-13/ripgrep-v13.0.0-13-aarch64-pc-windows-msvc.zip"
+        }
+      ]
+    }
+  }
+}
--- a/codex-cli/package-lock.json
+++ b/codex-cli/package-lock.json
@@ -2,118 +2,17 @@
  "name": "@openai/codex",
  "version": "0.0.0-dev",
  "lockfileVersion": 3,
-  "requires": true,
  "packages": {
    "": {
      "name": "@openai/codex",
      "version": "0.0.0-dev",
      "license": "Apache-2.0",
-      "dependencies": {
-        "@vscode/ripgrep": "^1.15.14"
-      },
      "bin": {
        "codex": "bin/codex.js"
      },
      "engines": {
        "node": ">=20"
      }
-    },
-    "node_modules/@vscode/ripgrep": {
-      "version": "1.15.14",
-      "resolved": "https://registry.npmjs.org/@vscode/ripgrep/-/ripgrep-1.15.14.tgz",
-      "integrity": "sha512-/G1UJPYlm+trBWQ6cMO3sv6b8D1+G16WaJH1/DSqw32JOVlzgZbLkDxRyzIpTpv30AcYGMkCf5tUqGlW6HbDWw==",
-      "hasInstallScript": true,
-      "license": "MIT",
-      "dependencies": {
-        "https-proxy-agent": "^7.0.2",
-        "proxy-from-env": "^1.1.0",
-        "yauzl": "^2.9.2"
-      }
-    },
-    "node_modules/agent-base": {
-      "version": "7.1.4",
-      "resolved": "https://registry.npmjs.org/agent-base/-/agent-base-7.1.4.tgz",
-      "integrity": "sha512-MnA+YT8fwfJPgBx3m60MNqakm30XOkyIoH1y6huTQvC0PwZG7ki8NacLBcrPbNoo8vEZy7Jpuk7+jMO+CUovTQ==",
-      "license": "MIT",
-      "engines": {
-        "node": ">= 14"
-      }
-    },
-    "node_modules/buffer-crc32": {
-      "version": "0.2.13",
-      "resolved": "https://registry.npmjs.org/buffer-crc32/-/buffer-crc32-0.2.13.tgz",
-      "integrity": "sha512-VO9Ht/+p3SN7SKWqcrgEzjGbRSJYTx+Q1pTQC0wrWqHx0vpJraQ6GtHx8tvcg1rlK1byhU5gccxgOgj7B0TDkQ==",
-      "license": "MIT",
-      "engines": {
-        "node": "*"
-      }
-    },
-    "node_modules/debug": {
-      "version": "4.4.1",
-      "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.1.tgz",
-      "integrity": "sha512-KcKCqiftBJcZr++7ykoDIEwSa3XWowTfNPo92BYxjXiyYEVrUQh2aLyhxBCwww+heortUFxEJYcRzosstTEBYQ==",
-      "license": "MIT",
-      "dependencies": {
-        "ms": "^2.1.3"
-      },
-      "engines": {
-        "node": ">=6.0"
-      },
-      "peerDependenciesMeta": {
-        "supports-color": {
-          "optional": true
-        }
-      }
-    },
-    "node_modules/fd-slicer": {
-      "version": "1.1.0",
-      "resolved": "https://registry.npmjs.org/fd-slicer/-/fd-slicer-1.1.0.tgz",
-      "integrity": "sha512-cE1qsB/VwyQozZ+q1dGxR8LBYNZeofhEdUNGSMbQD3Gw2lAzX9Zb3uIU6Ebc/Fmyjo9AWWfnn0AUCHqtevs/8g==",
-      "license": "MIT",
-      "dependencies": {
-        "pend": "~1.2.0"
-      }
-    },
-    "node_modules/https-proxy-agent": {
-      "version": "7.0.6",
-      "resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-7.0.6.tgz",
-      "integrity": "sha512-vK9P5/iUfdl95AI+JVyUuIcVtd4ofvtrOr3HNtM2yxC9bnMbEdp3x01OhQNnjb8IJYi38VlTE3mBXwcfvywuSw==",
-      "license": "MIT",
-      "dependencies": {
-        "agent-base": "^7.1.2",
-        "debug": "4"
-      },
-      "engines": {
-        "node": ">= 14"
-      }
-    },
-    "node_modules/ms": {
-      "version": "2.1.3",
-      "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz",
-      "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==",
-      "license": "MIT"
-    },
-    "node_modules/pend": {
-      "version": "1.2.0",
-      "resolved": "https://registry.npmjs.org/pend/-/pend-1.2.0.tgz",
-      "integrity": "sha512-F3asv42UuXchdzt+xXqfW1OGlVBe+mxa2mqI0pg5yAHZPvFmY3Y6drSf/GQ1A86WgWEN9Kzh/WrgKa6iGcHXLg==",
-      "license": "MIT"
-    },
-    "node_modules/proxy-from-env": {
-      "version": "1.1.0",
-      "resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz",
-      "integrity": "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg==",
-      "license": "MIT"
-    },
-    "node_modules/yauzl": {
-      "version": "2.10.0",
-      "resolved": "https://registry.npmjs.org/yauzl/-/yauzl-2.10.0.tgz",
-      "integrity": "sha512-p4a9I6X6nu6IhoGmBqAcbJy1mlC4j27vEPZX9F4L4/vZT3Lyq1VkFHw/V/PUcB9Buo+DG3iHkT0x3Qya58zc3g==",
-      "license": "MIT",
-      "dependencies": {
-        "buffer-crc32": "~0.2.3",
-        "fd-slicer": "~1.1.0"
-      }
    }
  }
 }
--- a/codex-cli/package.json
+++ b/codex-cli/package.json
@@ -11,17 +11,11 @@
  },
  "files": [
    "bin",
-    "dist"
+    "vendor"
  ],
  "repository": {
    "type": "git",
    "url": "git+https://github.com/openai/codex.git",
    "directory": "codex-cli"
-  },
-  "dependencies": {
-    "@vscode/ripgrep": "^1.15.14"
-  },
-  "devDependencies": {
-    "prettier": "^3.3.3"
  }
 }
--- a/codex-cli/scripts/README.md
+++ b/codex-cli/scripts/README.md
@@ -5,5 +5,7 @@ Run the following:
 To build the 0.2.x or later version of the npm module, which runs the Rust version of the CLI, build it as follows:

 ```bash
-./codex-cli/scripts/stage_rust_release.py --release-version 0.6.0
+./codex-cli/scripts/build_npm_package.py --release-version 0.6.0
 ```
+
+Note this will create `./codex-cli/vendor/` as a side-effect.
--- a/codex-cli/scripts/build_npm_package.py
+++ b/codex-cli/scripts/build_npm_package.py
@@ -0,0 +1,269 @@
+#!/usr/bin/env python3
+"""Stage and optionally package the @openai/codex npm module."""
+
+import argparse
+import json
+import re
+import shutil
+import subprocess
+import sys
+import tempfile
+from pathlib import Path
+
+SCRIPT_DIR = Path(__file__).resolve().parent
+CODEX_CLI_ROOT = SCRIPT_DIR.parent
+REPO_ROOT = CODEX_CLI_ROOT.parent
+GITHUB_REPO = "openai/codex"
+
+# The docs are not clear on what the expected value/format of
+# workflow/workflowName is:
+# https://cli.github.com/manual/gh_run_list
+WORKFLOW_NAME = ".github/workflows/rust-release.yml"
+
+
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(description="Build or stage the Codex CLI npm package.")
+    parser.add_argument(
+        "--version",
+        help="Version number to write to package.json inside the staged package.",
+    )
+    parser.add_argument(
+        "--release-version",
+        help=(
+            "Version to stage for npm release. When provided, the script also resolves the "
+            "matching rust-release workflow unless --workflow-url is supplied."
+        ),
+    )
+    parser.add_argument(
+        "--workflow-url",
+        help="Optional GitHub Actions workflow run URL used to download native binaries.",
+    )
+    parser.add_argument(
+        "--staging-dir",
+        type=Path,
+        help=(
+            "Directory to stage the package contents. Defaults to a new temporary directory "
+            "if omitted. The directory must be empty when provided."
+        ),
+    )
+    parser.add_argument(
+        "--tmp",
+        dest="staging_dir",
+        type=Path,
+        help=argparse.SUPPRESS,
+    )
+    parser.add_argument(
+        "--pack-output",
+        type=Path,
+        help="Path where the generated npm tarball should be written.",
+    )
+    return parser.parse_args()
+
+
+def main() -> int:
+    args = parse_args()
+
+    version = args.version
+    release_version = args.release_version
+    if release_version:
+        if version and version != release_version:
+            raise RuntimeError("--version and --release-version must match when both are provided.")
+        version = release_version
+
+    if not version:
+        raise RuntimeError("Must specify --version or --release-version.")
+
+    staging_dir, created_temp = prepare_staging_dir(args.staging_dir)
+
+    try:
+        stage_sources(staging_dir, version)
+
+        workflow_url = args.workflow_url
+        resolved_head_sha: str | None = None
+        if not workflow_url:
+            if release_version:
+                workflow = resolve_release_workflow(version)
+                workflow_url = workflow["url"]
+                resolved_head_sha = workflow.get("headSha")
+            else:
+                workflow_url = resolve_latest_alpha_workflow_url()
+        elif release_version:
+            try:
+                workflow = resolve_release_workflow(version)
+                resolved_head_sha = workflow.get("headSha")
+            except Exception:
+                resolved_head_sha = None
+
+        if release_version and resolved_head_sha:
+            print(f"should `git checkout {resolved_head_sha}`")
+
+        if not workflow_url:
+            raise RuntimeError("Unable to determine workflow URL for native binaries.")
+
+        install_native_binaries(staging_dir, workflow_url)
+
+        if release_version:
+            staging_dir_str = str(staging_dir)
+            print(
+                f"Staged version {version} for release in {staging_dir_str}\n\n"
+                "Verify the CLI:\n"
+                f"    node {staging_dir_str}/bin/codex.js --version\n"
+                f"    node {staging_dir_str}/bin/codex.js --help\n\n"
+            )
+        else:
+            print(f"Staged package in {staging_dir}")
+
+        if args.pack_output is not None:
+            output_path = run_npm_pack(staging_dir, args.pack_output)
+            print(f"npm pack output written to {output_path}")
+    finally:
+        if created_temp:
+            # Preserve the staging directory for further inspection.
+            pass
+
+    return 0
+
+
+def prepare_staging_dir(staging_dir: Path | None) -> tuple[Path, bool]:
+    if staging_dir is not None:
+        staging_dir = staging_dir.resolve()
+        staging_dir.mkdir(parents=True, exist_ok=True)
+        if any(staging_dir.iterdir()):
+            raise RuntimeError(f"Staging directory {staging_dir} is not empty.")
+        return staging_dir, False
+
+    temp_dir = Path(tempfile.mkdtemp(prefix="codex-npm-stage-"))
+    return temp_dir, True
+
+
+def stage_sources(staging_dir: Path, version: str) -> None:
+    bin_dir = staging_dir / "bin"
+    bin_dir.mkdir(parents=True, exist_ok=True)
+
+    shutil.copy2(CODEX_CLI_ROOT / "bin" / "codex.js", bin_dir / "codex.js")
+    rg_manifest = CODEX_CLI_ROOT / "bin" / "rg"
+    if rg_manifest.exists():
+        shutil.copy2(rg_manifest, bin_dir / "rg")
+
+    readme_src = REPO_ROOT / "README.md"
+    if readme_src.exists():
+        shutil.copy2(readme_src, staging_dir / "README.md")
+
+    with open(CODEX_CLI_ROOT / "package.json", "r", encoding="utf-8") as fh:
+        package_json = json.load(fh)
+    package_json["version"] = version
+
+    with open(staging_dir / "package.json", "w", encoding="utf-8") as out:
+        json.dump(package_json, out, indent=2)
+        out.write("\n")
+
+
+def install_native_binaries(staging_dir: Path, workflow_url: str | None) -> None:
+    cmd = ["./scripts/install_native_deps.py"]
+    if workflow_url:
+        cmd.extend(["--workflow-url", workflow_url])
+    cmd.append(str(staging_dir))
+    subprocess.check_call(cmd, cwd=CODEX_CLI_ROOT)
+
+
+def resolve_latest_alpha_workflow_url() -> str:
+    version = determine_latest_alpha_version()
+    workflow = resolve_release_workflow(version)
+    return workflow["url"]
+
+
+def determine_latest_alpha_version() -> str:
+    releases = list_releases()
+    best_key: tuple[int, int, int, int] | None = None
+    best_version: str | None = None
+    pattern = re.compile(r"^rust-v(\d+)\.(\d+)\.(\d+)-alpha\.(\d+)$")
+    for release in releases:
+        tag = release.get("tag_name", "")
+        match = pattern.match(tag)
+        if not match:
+            continue
+        key = tuple(int(match.group(i)) for i in range(1, 5))
+        if best_key is None or key > best_key:
+            best_key = key
+            best_version = (
+                f"{match.group(1)}.{match.group(2)}.{match.group(3)}-alpha.{match.group(4)}"
+            )
+
+    if best_version is None:
+        raise RuntimeError("No alpha releases found when resolving workflow URL.")
+    return best_version
+
+
+def list_releases() -> list[dict]:
+    stdout = subprocess.check_output(
+        ["gh", "api", f"/repos/{GITHUB_REPO}/releases?per_page=100"],
+        text=True,
+    )
+    try:
+        releases = json.loads(stdout or "[]")
+    except json.JSONDecodeError as exc:
+        raise RuntimeError("Unable to parse releases JSON.") from exc
+    if not isinstance(releases, list):
+        raise RuntimeError("Unexpected response when listing releases.")
+    return releases
+
+
+def resolve_release_workflow(version: str) -> dict:
+    stdout = subprocess.check_output(
+        [
+            "gh",
+            "run",
+            "list",
+            "--branch",
+            f"rust-v{version}",
+            "--json",
+            "workflowName,url,headSha",
+            "--workflow",
+            WORKFLOW_NAME,
+            "--jq",
+            "first(.[])",
+        ],
+        text=True,
+    )
+    workflow = json.loads(stdout or "[]")
+    if not workflow:
+        raise RuntimeError(f"Unable to find rust-release workflow for version {version}.")
+    return workflow
+
+
+def run_npm_pack(staging_dir: Path, output_path: Path) -> Path:
+    output_path = output_path.resolve()
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+
+    with tempfile.TemporaryDirectory(prefix="codex-npm-pack-") as pack_dir_str:
+        pack_dir = Path(pack_dir_str)
+        stdout = subprocess.check_output(
+            ["npm", "pack", "--json", "--pack-destination", str(pack_dir)],
+            cwd=staging_dir,
+            text=True,
+        )
+        try:
+            pack_output = json.loads(stdout)
+        except json.JSONDecodeError as exc:
+            raise RuntimeError("Failed to parse npm pack output.") from exc
+
+        if not pack_output:
+            raise RuntimeError("npm pack did not produce an output tarball.")
+
+        tarball_name = pack_output[0].get("filename") or pack_output[0].get("name")
+        if not tarball_name:
+            raise RuntimeError("Unable to determine npm pack output filename.")
+
+        tarball_path = pack_dir / tarball_name
+        if not tarball_path.exists():
+            raise RuntimeError(f"Expected npm pack output not found: {tarball_path}")
+
+        shutil.move(str(tarball_path), output_path)
+
+    return output_path
+
+
+if __name__ == "__main__":
+    import sys
+
+    sys.exit(main())
--- a/codex-cli/scripts/install_native_deps.py
+++ b/codex-cli/scripts/install_native_deps.py
@@ -0,0 +1,318 @@
+#!/usr/bin/env python3
+"""Install Codex native binaries (Rust CLI plus ripgrep helpers)."""
+
+import argparse
+import json
+import os
+import shutil
+import subprocess
+import tarfile
+import tempfile
+import zipfile
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from pathlib import Path
+from typing import Iterable, Sequence
+from urllib.parse import urlparse
+from urllib.request import urlopen
+
+SCRIPT_DIR = Path(__file__).resolve().parent
+CODEX_CLI_ROOT = SCRIPT_DIR.parent
+DEFAULT_WORKFLOW_URL = "https://github.com/openai/codex/actions/runs/17952349351"  # rust-v0.40.0
+VENDOR_DIR_NAME = "vendor"
+RG_MANIFEST = CODEX_CLI_ROOT / "bin" / "rg"
+CODEX_TARGETS = (
+    "x86_64-unknown-linux-musl",
+    "aarch64-unknown-linux-musl",
+    "x86_64-apple-darwin",
+    "aarch64-apple-darwin",
+    "x86_64-pc-windows-msvc",
+    "aarch64-pc-windows-msvc",
+)
+
+RG_TARGET_PLATFORM_PAIRS: list[tuple[str, str]] = [
+    ("x86_64-unknown-linux-musl", "linux-x86_64"),
+    ("aarch64-unknown-linux-musl", "linux-aarch64"),
+    ("x86_64-apple-darwin", "macos-x86_64"),
+    ("aarch64-apple-darwin", "macos-aarch64"),
+    ("x86_64-pc-windows-msvc", "windows-x86_64"),
+    ("aarch64-pc-windows-msvc", "windows-aarch64"),
+]
+RG_TARGET_TO_PLATFORM = {target: platform for target, platform in RG_TARGET_PLATFORM_PAIRS}
+DEFAULT_RG_TARGETS = [target for target, _ in RG_TARGET_PLATFORM_PAIRS]
+
+
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(description="Install native Codex binaries.")
+    parser.add_argument(
+        "--workflow-url",
+        help=(
+            "GitHub Actions workflow URL that produced the artifacts. Defaults to a "
+            "known good run when omitted."
+        ),
+    )
+    parser.add_argument(
+        "root",
+        nargs="?",
+        type=Path,
+        help=(
+            "Directory containing package.json for the staged package. If omitted, the "
+            "repository checkout is used."
+        ),
+    )
+    return parser.parse_args()
+
+
+def main() -> int:
+    args = parse_args()
+
+    codex_cli_root = (args.root or CODEX_CLI_ROOT).resolve()
+    vendor_dir = codex_cli_root / VENDOR_DIR_NAME
+    vendor_dir.mkdir(parents=True, exist_ok=True)
+
+    workflow_url = (args.workflow_url or DEFAULT_WORKFLOW_URL).strip()
+    if not workflow_url:
+        workflow_url = DEFAULT_WORKFLOW_URL
+
+    workflow_id = workflow_url.rstrip("/").split("/")[-1]
+
+    with tempfile.TemporaryDirectory(prefix="codex-native-artifacts-") as artifacts_dir_str:
+        artifacts_dir = Path(artifacts_dir_str)
+        _download_artifacts(workflow_id, artifacts_dir)
+        install_codex_binaries(artifacts_dir, vendor_dir, CODEX_TARGETS)
+
+    fetch_rg(vendor_dir, DEFAULT_RG_TARGETS, manifest_path=RG_MANIFEST)
+
+    print(f"Installed native dependencies into {vendor_dir}")
+    return 0
+
+
+def fetch_rg(
+    vendor_dir: Path,
+    targets: Sequence[str] | None = None,
+    *,
+    manifest_path: Path,
+) -> list[Path]:
+    """Download ripgrep binaries described by the DotSlash manifest."""
+
+    if targets is None:
+        targets = DEFAULT_RG_TARGETS
+
+    if not manifest_path.exists():
+        raise FileNotFoundError(f"DotSlash manifest not found: {manifest_path}")
+
+    manifest = _load_manifest(manifest_path)
+    platforms = manifest.get("platforms", {})
+
+    vendor_dir.mkdir(parents=True, exist_ok=True)
+
+    targets = list(targets)
+    if not targets:
+        return []
+
+    task_configs: list[tuple[str, str, dict]] = []
+    for target in targets:
+        platform_key = RG_TARGET_TO_PLATFORM.get(target)
+        if platform_key is None:
+            raise ValueError(f"Unsupported ripgrep target '{target}'.")
+
+        platform_info = platforms.get(platform_key)
+        if platform_info is None:
+            raise RuntimeError(f"Platform '{platform_key}' not found in manifest {manifest_path}.")
+
+        task_configs.append((target, platform_key, platform_info))
+
+    results: dict[str, Path] = {}
+    max_workers = min(len(task_configs), max(1, (os.cpu_count() or 1)))
+
+    with ThreadPoolExecutor(max_workers=max_workers) as executor:
+        future_map = {
+            executor.submit(
+                _fetch_single_rg,
+                vendor_dir,
+                target,
+                platform_key,
+                platform_info,
+                manifest_path,
+            ): target
+            for target, platform_key, platform_info in task_configs
+        }
+
+        for future in as_completed(future_map):
+            target = future_map[future]
+            results[target] = future.result()
+
+    return [results[target] for target in targets]
+
+
+def _download_artifacts(workflow_id: str, dest_dir: Path) -> None:
+    cmd = [
+        "gh",
+        "run",
+        "download",
+        "--dir",
+        str(dest_dir),
+        "--repo",
+        "openai/codex",
+        workflow_id,
+    ]
+    subprocess.check_call(cmd)
+
+
+def install_codex_binaries(
+    artifacts_dir: Path, vendor_dir: Path, targets: Iterable[str]
+) -> list[Path]:
+    targets = list(targets)
+    if not targets:
+        return []
+
+    results: dict[str, Path] = {}
+    max_workers = min(len(targets), max(1, (os.cpu_count() or 1)))
+
+    with ThreadPoolExecutor(max_workers=max_workers) as executor:
+        future_map = {
+            executor.submit(_install_single_codex_binary, artifacts_dir, vendor_dir, target): target
+            for target in targets
+        }
+
+        for future in as_completed(future_map):
+            target = future_map[future]
+            results[target] = future.result()
+
+    return [results[target] for target in targets]
+
+
+def _install_single_codex_binary(artifacts_dir: Path, vendor_dir: Path, target: str) -> Path:
+    artifact_subdir = artifacts_dir / target
+    archive_name = _archive_name_for_target(target)
+    archive_path = artifact_subdir / archive_name
+    if not archive_path.exists():
+        raise FileNotFoundError(f"Expected artifact not found: {archive_path}")
+
+    dest_dir = vendor_dir / target / "codex"
+    dest_dir.mkdir(parents=True, exist_ok=True)
+
+    binary_name = "codex.exe" if "windows" in target else "codex"
+    dest = dest_dir / binary_name
+    dest.unlink(missing_ok=True)
+    extract_archive(archive_path, "zst", None, dest)
+    if "windows" not in target:
+        dest.chmod(0o755)
+    return dest
+
+
+def _archive_name_for_target(target: str) -> str:
+    if "windows" in target:
+        return f"codex-{target}.exe.zst"
+    return f"codex-{target}.zst"
+
+
+def _fetch_single_rg(
+    vendor_dir: Path,
+    target: str,
+    platform_key: str,
+    platform_info: dict,
+    manifest_path: Path,
+) -> Path:
+    providers = platform_info.get("providers", [])
+    if not providers:
+        raise RuntimeError(f"No providers listed for platform '{platform_key}' in {manifest_path}.")
+
+    url = providers[0]["url"]
+    archive_format = platform_info.get("format", "zst")
+    archive_member = platform_info.get("path")
+
+    dest_dir = vendor_dir / target / "path"
+    dest_dir.mkdir(parents=True, exist_ok=True)
+
+    is_windows = platform_key.startswith("win")
+    binary_name = "rg.exe" if is_windows else "rg"
+    dest = dest_dir / binary_name
+
+    with tempfile.TemporaryDirectory() as tmp_dir_str:
+        tmp_dir = Path(tmp_dir_str)
+        archive_filename = os.path.basename(urlparse(url).path)
+        download_path = tmp_dir / archive_filename
+        _download_file(url, download_path)
+
+        dest.unlink(missing_ok=True)
+        extract_archive(download_path, archive_format, archive_member, dest)
+
+    if not is_windows:
+        dest.chmod(0o755)
+
+    return dest
+
+
+def _download_file(url: str, dest: Path) -> None:
+    dest.parent.mkdir(parents=True, exist_ok=True)
+    with urlopen(url) as response, open(dest, "wb") as out:
+        shutil.copyfileobj(response, out)
+
+
+def extract_archive(
+    archive_path: Path,
+    archive_format: str,
+    archive_member: str | None,
+    dest: Path,
+) -> None:
+    dest.parent.mkdir(parents=True, exist_ok=True)
+
+    if archive_format == "zst":
+        output_path = archive_path.parent / dest.name
+        subprocess.check_call(
+            ["zstd", "-f", "-d", str(archive_path), "-o", str(output_path)]
+        )
+        shutil.move(str(output_path), dest)
+        return
+
+    if archive_format == "tar.gz":
+        if not archive_member:
+            raise RuntimeError("Missing 'path' for tar.gz archive in DotSlash manifest.")
+        with tarfile.open(archive_path, "r:gz") as tar:
+            try:
+                member = tar.getmember(archive_member)
+            except KeyError as exc:
+                raise RuntimeError(
+                    f"Entry '{archive_member}' not found in archive {archive_path}."
+                ) from exc
+            tar.extract(member, path=archive_path.parent, filter="data")
+        extracted = archive_path.parent / archive_member
+        shutil.move(str(extracted), dest)
+        return
+
+    if archive_format == "zip":
+        if not archive_member:
+            raise RuntimeError("Missing 'path' for zip archive in DotSlash manifest.")
+        with zipfile.ZipFile(archive_path) as archive:
+            try:
+                with archive.open(archive_member) as src, open(dest, "wb") as out:
+                    shutil.copyfileobj(src, out)
+            except KeyError as exc:
+                raise RuntimeError(
+                    f"Entry '{archive_member}' not found in archive {archive_path}."
+                ) from exc
+        return
+
+    raise RuntimeError(f"Unsupported archive format '{archive_format}'.")
+
+
+def _load_manifest(manifest_path: Path) -> dict:
+    cmd = ["dotslash", "--", "parse", str(manifest_path)]
+    stdout = subprocess.check_output(cmd, text=True)
+    try:
+        manifest = json.loads(stdout)
+    except json.JSONDecodeError as exc:
+        raise RuntimeError(f"Invalid DotSlash manifest output from {manifest_path}.") from exc
+
+    if not isinstance(manifest, dict):
+        raise RuntimeError(
+            f"Unexpected DotSlash manifest structure for {manifest_path}: {type(manifest)!r}"
+        )
+
+    return manifest
+
+
+if __name__ == "__main__":
+    import sys
+
+    sys.exit(main())
--- a/codex-cli/scripts/install_native_deps.sh
+++ b/codex-cli/scripts/install_native_deps.sh
@@ -1,94 +0,0 @@
-#!/usr/bin/env bash
-
-# Install native runtime dependencies for codex-cli.
-#
-# Usage
-#   install_native_deps.sh [--workflow-url URL] [CODEX_CLI_ROOT]
-#
-# The optional RELEASE_ROOT is the path that contains package.json.  Omitting
-# it installs the binaries into the repository's own bin/ folder to support
-# local development.
-
-set -euo pipefail
-
-# ------------------
-# Parse arguments
-# ------------------
-
-CODEX_CLI_ROOT=""
-
-# Until we start publishing stable GitHub releases, we have to grab the binaries
-# from the GitHub Action that created them. Update the URL below to point to the
-# appropriate workflow run:
-WORKFLOW_URL="https://github.com/openai/codex/actions/runs/17417194663" # rust-v0.28.0
-
-while [[ $# -gt 0 ]]; do
-  case "$1" in
-    --workflow-url)
-      shift || { echo "--workflow-url requires an argument"; exit 1; }
-      if [ -n "$1" ]; then
-        WORKFLOW_URL="$1"
-      fi
-      ;;
-    *)
-      if [[ -z "$CODEX_CLI_ROOT" ]]; then
-        CODEX_CLI_ROOT="$1"
-      else
-        echo "Unexpected argument: $1" >&2
-        exit 1
-      fi
-      ;;
-  esac
-  shift
-done
-
-# ----------------------------------------------------------------------------
-# Determine where the binaries should be installed.
-# ----------------------------------------------------------------------------
-
-if [ -n "$CODEX_CLI_ROOT" ]; then
-  # The caller supplied a release root directory.
-  BIN_DIR="$CODEX_CLI_ROOT/bin"
-else
-  # No argument; fall back to the repo’s own bin directory.
-  # Resolve the path of this script, then walk up to the repo root.
-  SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-  CODEX_CLI_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
-  BIN_DIR="$CODEX_CLI_ROOT/bin"
-fi
-
-# Make sure the destination directory exists.
-mkdir -p "$BIN_DIR"
-
-# ----------------------------------------------------------------------------
-# Download and decompress the artifacts from the GitHub Actions workflow.
-# ----------------------------------------------------------------------------
-
-WORKFLOW_ID="${WORKFLOW_URL##*/}"
-
-ARTIFACTS_DIR="$(mktemp -d)"
-trap 'rm -rf "$ARTIFACTS_DIR"' EXIT
-
-# NB: The GitHub CLI `gh` must be installed and authenticated.
-gh run download --dir "$ARTIFACTS_DIR" --repo openai/codex "$WORKFLOW_ID"
-
-# x64 Linux
-zstd -d "$ARTIFACTS_DIR/x86_64-unknown-linux-musl/codex-x86_64-unknown-linux-musl.zst" \
-    -o "$BIN_DIR/codex-x86_64-unknown-linux-musl"
-# ARM64 Linux
-zstd -d "$ARTIFACTS_DIR/aarch64-unknown-linux-musl/codex-aarch64-unknown-linux-musl.zst" \
-    -o "$BIN_DIR/codex-aarch64-unknown-linux-musl"
-# x64 macOS
-zstd -d "$ARTIFACTS_DIR/x86_64-apple-darwin/codex-x86_64-apple-darwin.zst" \
-    -o "$BIN_DIR/codex-x86_64-apple-darwin"
-# ARM64 macOS
-zstd -d "$ARTIFACTS_DIR/aarch64-apple-darwin/codex-aarch64-apple-darwin.zst" \
-    -o "$BIN_DIR/codex-aarch64-apple-darwin"
-# x64 Windows
-zstd -d "$ARTIFACTS_DIR/x86_64-pc-windows-msvc/codex-x86_64-pc-windows-msvc.exe.zst" \
-    -o "$BIN_DIR/codex-x86_64-pc-windows-msvc.exe"
-# ARM64 Windows
-zstd -d "$ARTIFACTS_DIR/aarch64-pc-windows-msvc/codex-aarch64-pc-windows-msvc.exe.zst" \
-    -o "$BIN_DIR/codex-aarch64-pc-windows-msvc.exe"
-
-echo "Installed native dependencies into $BIN_DIR"
--- a/codex-cli/scripts/stage_release.sh
+++ b/codex-cli/scripts/stage_release.sh
@@ -1,120 +0,0 @@
-#!/usr/bin/env bash
-# -----------------------------------------------------------------------------
-# stage_release.sh
-# -----------------------------------------------------------------------------
-# Stages an npm release for @openai/codex.
-#
-# Usage:
-#
-#   --tmp <dir>  : Use <dir> instead of a freshly created temp directory.
-#   -h|--help    : Print usage.
-#
-# -----------------------------------------------------------------------------
-
-set -euo pipefail
-
-# Helper - usage / flag parsing
-
-usage() {
-  cat <<EOF
-Usage: $(basename "$0") [--tmp DIR] [--version VERSION]
-
-Options
-  --tmp DIR   Use DIR to stage the release (defaults to a fresh mktemp dir)
-  --version   Specify the version to release (defaults to a timestamp-based version)
-  -h, --help  Show this help
-
-Legacy positional argument: the first non-flag argument is still interpreted
-as the temporary directory (for backwards compatibility) but is deprecated.
-EOF
-  exit "${1:-0}"
-}
-
-TMPDIR=""
-# Default to a timestamp-based version (keep same scheme as before)
-VERSION="$(printf '0.1.%d' "$(date +%y%m%d%H%M)")"
-WORKFLOW_URL=""
-
-# Manual flag parser - Bash getopts does not handle GNU long options well.
-while [[ $# -gt 0 ]]; do
-  case "$1" in
-    --tmp)
-      shift || { echo "--tmp requires an argument"; usage 1; }
-      TMPDIR="$1"
-      ;;
-    --tmp=*)
-      TMPDIR="${1#*=}"
-      ;;
-    --version)
-      shift || { echo "--version requires an argument"; usage 1; }
-      VERSION="$1"
-      ;;
-    --workflow-url)
-      shift || { echo "--workflow-url requires an argument"; exit 1; }
-      WORKFLOW_URL="$1"
-      ;;
-    -h|--help)
-      usage 0
-      ;;
-    --*)
-      echo "Unknown option: $1" >&2
-      usage 1
-      ;;
-    *)
-      echo "Unexpected extra argument: $1" >&2
-      usage 1
-      ;;
-  esac
-  shift
-done
-
-# Fallback when the caller did not specify a directory.
-# If no directory was specified create a fresh temporary one.
-if [[ -z "$TMPDIR" ]]; then
-  TMPDIR="$(mktemp -d)"
-fi
-
-# Ensure the directory exists, then resolve to an absolute path.
-mkdir -p "$TMPDIR"
-TMPDIR="$(cd "$TMPDIR" && pwd)"
-
-# Main build logic
-
-echo "Staging release in $TMPDIR"
-
-# The script lives in codex-cli/scripts/ - change into codex-cli root so that
-# relative paths keep working.
-SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-CODEX_CLI_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
-
-pushd "$CODEX_CLI_ROOT" >/dev/null
-
-# 1. Build the JS artifacts ---------------------------------------------------
-
-# Paths inside the staged package
-mkdir -p "$TMPDIR/bin"
-
-cp -r bin/codex.js "$TMPDIR/bin/codex.js"
-cp ../README.md "$TMPDIR" || true # README is one level up - ignore if missing
-
-# Modify package.json - bump version and optionally add the native directory to
-# the files array so that the binaries are published to npm.
-
-jq --arg version "$VERSION" \
-    '.version = $version' \
-    package.json > "$TMPDIR/package.json"
-
-# 2. Native runtime deps (sandbox plus optional Rust binaries)
-
-./scripts/install_native_deps.sh --workflow-url "$WORKFLOW_URL" "$TMPDIR"
-
-popd >/dev/null
-
-echo "Staged version $VERSION for release in $TMPDIR"
-
-echo "Verify the CLI:"
-echo "    node ${TMPDIR}/bin/codex.js --version"
-echo "    node ${TMPDIR}/bin/codex.js --help"
-
-# Print final hint for convenience
-echo "Next:  cd \"$TMPDIR\" && npm publish"
--- a/codex-cli/scripts/stage_rust_release.py
+++ b/codex-cli/scripts/stage_rust_release.py
@@ -1,70 +0,0 @@
-#!/usr/bin/env python3
-
-import json
-import subprocess
-import sys
-import argparse
-from pathlib import Path
-
-
-def main() -> int:
-    parser = argparse.ArgumentParser(
-        description="""Stage a release for the npm module.
-
-Run this after the GitHub Release has been created and use
-`--release-version` to specify the version to release.
-
-Optionally pass `--tmp` to control the temporary staging directory that will be
-forwarded to stage_release.sh.
-"""
-    )
-    parser.add_argument(
-        "--release-version", required=True, help="Version to release, e.g., 0.3.0"
-    )
-    parser.add_argument(
-        "--tmp",
-        help="Optional path to stage the npm package; forwarded to stage_release.sh",
-    )
-    args = parser.parse_args()
-    version = args.release_version
-
-    gh_run = subprocess.run(
-        [
-            "gh",
-            "run",
-            "list",
-            "--branch",
-            f"rust-v{version}",
-            "--json",
-            "workflowName,url,headSha",
-            "--jq",
-            'first(.[] | select(.workflowName == "rust-release"))',
-        ],
-        stdout=subprocess.PIPE,
-        check=True,
-    )
-    gh_run.check_returncode()
-    workflow = json.loads(gh_run.stdout)
-    sha = workflow["headSha"]
-
-    print(f"should `git checkout {sha}`")
-
-    current_dir = Path(__file__).parent.resolve()
-    cmd = [
-        str(current_dir / "stage_release.sh"),
-        "--version",
-        version,
-        "--workflow-url",
-        workflow["url"],
-    ]
-    if args.tmp:
-        cmd.extend(["--tmp", args.tmp])
-
-    stage_release = subprocess.run(cmd)
-    stage_release.check_returncode()
-
-    return 0
-
-
-if __name__ == "__main__":
-    sys.exit(main())
--- a/codex-rs/Cargo.lock
+++ b/codex-rs/Cargo.lock
@@ -56,7 +56,7 @@ checksum = "8fac2ce611db8b8cee9b2aa886ca03c924e9da5e5295d0dbd0526e5d0b0710f7"
 dependencies = [
 "allocative_derive",
 "bumpalo",
- "ctor",
+ "ctor 0.1.26",
 "hashbrown 0.14.5",
 "num-bigint",
 ]
@@ -78,12 +78,6 @@ version = "0.2.21"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923"

-[[package]]
-name = "android-tzdata"
-version = "0.1.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0"
-
 [[package]]
 name = "android_system_properties"
 version = "0.1.5"
@@ -316,6 +310,17 @@ dependencies = [
 "syn 2.0.104",
 ]

+[[package]]
+name = "async-trait"
+version = "0.1.89"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.104",
+]
+
 [[package]]
 name = "atomic-waker"
 version = "1.1.2"
@@ -484,18 +489,23 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "fd16c4719339c4530435d38e511904438d07cce7950afa3718a84ac36c10e89e"

 [[package]]
-name = "chrono"
-version = "0.4.41"
+name = "cfg_aliases"
+version = "0.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c469d952047f47f91b68d1cba3f10d63c11d73e4636f24f08daf0278abf01c4d"
+checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724"
+
+[[package]]
+name = "chrono"
+version = "0.4.42"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "145052bdd345b87320e369255277e3fb5152762ad123a901ef5c262dd38fe8d2"
 dependencies = [
- "android-tzdata",
 "iana-time-zone",
 "js-sys",
 "num-traits",
 "serde",
 "wasm-bindgen",
- "windows-link",
+ "windows-link 0.2.0",
 ]

 [[package]]
@@ -584,7 +594,6 @@ version = "0.0.0"
 dependencies = [
 "anyhow",
 "assert_cmd",
- "once_cell",
 "pretty_assertions",
 "similar",
 "tempfile",
@@ -637,7 +646,10 @@ dependencies = [
 "codex-mcp-server",
 "codex-protocol",
 "codex-protocol-ts",
+ "codex-responses-api-proxy",
 "codex-tui",
+ "ctor 0.5.0",
+ "libc",
 "owo-colors",
 "predicates",
 "pretty_assertions",
@@ -668,6 +680,7 @@ dependencies = [
 "askama",
 "assert_cmd",
 "async-channel",
+ "async-trait",
 "base64",
 "bytes",
 "chrono",
@@ -675,11 +688,14 @@ dependencies = [
 "codex-file-search",
 "codex-mcp-client",
 "codex-protocol",
+ "codex-rmcp-client",
 "core_test_support",
 "dirs",
 "env-flags",
+ "escargot",
 "eventsource-stream",
 "futures",
+ "indexmap 2.10.0",
 "landlock",
 "libc",
 "maplit",
@@ -734,12 +750,15 @@ dependencies = [
 "libc",
 "owo-colors",
 "predicates",
+ "pretty_assertions",
+ "serde",
 "serde_json",
 "shlex",
 "tempfile",
 "tokio",
 "tracing",
 "tracing-subscriber",
+ "ts-rs",
 "uuid",
 "walkdir",
 "wiremock",
@@ -805,6 +824,7 @@ dependencies = [
 name = "codex-login"
 version = "0.0.0"
 dependencies = [
+ "anyhow",
 "base64",
 "chrono",
 "codex-core",
@@ -885,6 +905,7 @@ dependencies = [
 name = "codex-protocol"
 version = "0.0.0"
 dependencies = [
+ "anyhow",
 "base64",
 "icu_decimal",
 "icu_locale_core",
@@ -914,6 +935,36 @@ dependencies = [
 "ts-rs",
 ]

+[[package]]
+name = "codex-responses-api-proxy"
+version = "0.0.0"
+dependencies = [
+ "anyhow",
+ "clap",
+ "codex-arg0",
+ "libc",
+ "reqwest",
+ "serde",
+ "serde_json",
+ "tiny_http",
+ "tokio",
+ "zeroize",
+]
+
+[[package]]
+name = "codex-rmcp-client"
+version = "0.0.0"
+dependencies = [
+ "anyhow",
+ "mcp-types",
+ "pretty_assertions",
+ "rmcp",
+ "serde",
+ "serde_json",
+ "tokio",
+ "tracing",
+]
+
 [[package]]
 name = "codex-tui"
 version = "0.0.0"
@@ -943,7 +994,6 @@ dependencies = [
 "lazy_static",
 "libc",
 "mcp-types",
- "once_cell",
 "path-clean",
 "pathdiff",
 "pretty_assertions",
@@ -965,11 +1015,21 @@ dependencies = [
 "tracing-appender",
 "tracing-subscriber",
 "unicode-segmentation",
- "unicode-width 0.1.14",
+ "unicode-width 0.2.1",
 "url",
 "vt100",
 ]

+[[package]]
+name = "codex-utils-readiness"
+version = "0.0.0"
+dependencies = [
+ "async-trait",
+ "thiserror 2.0.16",
+ "time",
+ "tokio",
+]
+
 [[package]]
 name = "color-eyre"
 version = "0.6.5"
@@ -1088,6 +1148,7 @@ name = "core_test_support"
 version = "0.0.0"
 dependencies = [
 "anyhow",
+ "assert_cmd",
 "codex-core",
 "serde_json",
 "tempfile",
@@ -1199,14 +1260,40 @@ dependencies = [
 "syn 1.0.109",
 ]

+[[package]]
+name = "ctor"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "67773048316103656a637612c4a62477603b777d91d9c62ff2290f9cde178fdb"
+dependencies = [
+ "ctor-proc-macro",
+ "dtor",
+]
+
+[[package]]
+name = "ctor-proc-macro"
+version = "0.0.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e2931af7e13dc045d8e9d26afccc6fa115d64e115c9c84b1166288b46f6782c2"
+
 [[package]]
 name = "darling"
 version = "0.20.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "fc7f46116c46ff9ab3eb1597a45688b6715c6e628b5c133e288e709a29bcb4ee"
 dependencies = [
- "darling_core",
- "darling_macro",
+ "darling_core 0.20.11",
+ "darling_macro 0.20.11",
+]
+
+[[package]]
+name = "darling"
+version = "0.21.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9cdf337090841a411e2a7f3deb9187445851f91b309c0c0a29e05f74a00a48c0"
+dependencies = [
+ "darling_core 0.21.3",
+ "darling_macro 0.21.3",
 ]

 [[package]]
@@ -1223,13 +1310,38 @@ dependencies = [
 "syn 2.0.104",
 ]

+[[package]]
+name = "darling_core"
+version = "0.21.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1247195ecd7e3c85f83c8d2a366e4210d588e802133e1e355180a9870b517ea4"
+dependencies = [
+ "fnv",
+ "ident_case",
+ "proc-macro2",
+ "quote",
+ "strsim 0.11.1",
+ "syn 2.0.104",
+]
+
 [[package]]
 name = "darling_macro"
 version = "0.20.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead"
 dependencies = [
- "darling_core",
+ "darling_core 0.20.11",
+ "quote",
+ "syn 2.0.104",
+]
+
+[[package]]
+name = "darling_macro"
+version = "0.21.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d38308df82d1080de0afee5d069fa14b0326a88c14f15c5ccda35b4a6c414c81"
+dependencies = [
+ "darling_core 0.21.3",
 "quote",
 "syn 2.0.104",
 ]
@@ -1265,12 +1377,12 @@ dependencies = [

 [[package]]
 name = "deranged"
-version = "0.4.0"
+version = "0.5.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9c9e6a11ca8224451684bc0d7d5a7adbf8f2fd6887261a1cfc3c0432f9d4068e"
+checksum = "a41953f86f8a05768a6cda24def994fd2f424b04ec5c719cf89989779f199071"
 dependencies = [
 "powerfmt",
- "serde",
+ "serde_core",
 ]

 [[package]]
@@ -1449,6 +1561,21 @@ version = "1.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "75b325c5dbd37f80359721ad39aca5a29fb04c89279657cffdda8736d0c0b9d2"

+[[package]]
+name = "dtor"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e58a0764cddb55ab28955347b45be00ade43d4d6f3ba4bf3dc354e4ec9432934"
+dependencies = [
+ "dtor-proc-macro",
+]
+
+[[package]]
+name = "dtor-proc-macro"
+version = "0.0.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f678cf4a922c215c63e0de95eb1ff08a958a81d47e485cf9da1e27bf6305cfa5"
+
 [[package]]
 name = "dupe"
 version = "0.9.1"
@@ -1591,6 +1718,17 @@ version = "3.3.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "dea2df4cf52843e0452895c455a1a2cfbb842a1e7329671acf418fdc53ed4c59"

+[[package]]
+name = "escargot"
+version = "0.5.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "11c3aea32bc97b500c9ca6a72b768a26e558264303d101d3409cf6d57a9ed0cf"
+dependencies = [
+ "log",
+ "serde",
+ "serde_json",
+]
+
 [[package]]
 name = "event-listener"
 version = "5.4.0"
@@ -1896,8 +2034,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592"
 dependencies = [
 "cfg-if",
+ "js-sys",
 "libc",
 "wasi 0.11.1+wasi-snapshot-preview1",
+ "wasm-bindgen",
 ]

 [[package]]
@@ -1907,9 +2047,11 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4"
 dependencies = [
 "cfg-if",
+ "js-sys",
 "libc",
 "r-efi",
 "wasi 0.14.2+wasi-0.2.4",
+ "wasm-bindgen",
 ]

 [[package]]
@@ -2106,6 +2248,7 @@ dependencies = [
 "tokio",
 "tokio-rustls",
 "tower-service",
+ "webpki-roots",
 ]

 [[package]]
@@ -2415,7 +2558,7 @@ version = "0.3.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "435d80800b936787d62688c927b6490e887c7ef5ff9ce922c6c6050fca75eb9a"
 dependencies = [
- "darling",
+ "darling 0.20.11",
 "indoc",
 "proc-macro2",
 "quote",
@@ -2670,9 +2813,9 @@ dependencies = [

 [[package]]
 name = "log"
-version = "0.4.27"
+version = "0.4.28"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94"
+checksum = "34080505efa8e45a4b816c349525ebe327ceaa8559756f0356cba97ef3bf7432"

 [[package]]
 name = "logos"
@@ -2706,6 +2849,12 @@ dependencies = [
 "hashbrown 0.15.4",
 ]

+[[package]]
+name = "lru-slab"
+version = "0.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154"
+
 [[package]]
 name = "lsp-types"
 version = "0.94.1"
@@ -2885,7 +3034,19 @@ checksum = "ab2156c4fce2f8df6c499cc1c763e4394b7482525bf2a9701c9d79d215f519e4"
 dependencies = [
 "bitflags 2.9.1",
 "cfg-if",
- "cfg_aliases",
+ "cfg_aliases 0.1.1",
+ "libc",
+]
+
+[[package]]
+name = "nix"
+version = "0.30.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "74523f3a35e05aba87a1d978330aef40f67b0304ac79c1c00b294c9830543db6"
+dependencies = [
+ "bitflags 2.9.1",
+ "cfg-if",
+ "cfg_aliases 0.2.1",
 "libc",
 ]

@@ -3311,7 +3472,7 @@ dependencies = [
 "lazy_static",
 "libc",
 "log",
- "nix",
+ "nix 0.28.0",
 "serial2",
 "shared_library",
 "shell-words",
@@ -3399,6 +3560,20 @@ dependencies = [
 "unicode-ident",
 ]

+[[package]]
+name = "process-wrap"
+version = "8.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a3ef4f2f0422f23a82ec9f628ea2acd12871c81a9362b02c43c1aa86acfc3ba1"
+dependencies = [
+ "futures",
+ "indexmap 2.10.0",
+ "nix 0.30.1",
+ "tokio",
+ "tracing",
+ "windows",
+]
+
 [[package]]
 name = "pulldown-cmark"
 version = "0.10.3"
@@ -3442,6 +3617,61 @@ dependencies = [
 "memchr",
 ]

+[[package]]
+name = "quinn"
+version = "0.11.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b9e20a958963c291dc322d98411f541009df2ced7b5a4f2bd52337638cfccf20"
+dependencies = [
+ "bytes",
+ "cfg_aliases 0.2.1",
+ "pin-project-lite",
+ "quinn-proto",
+ "quinn-udp",
+ "rustc-hash",
+ "rustls",
+ "socket2",
+ "thiserror 2.0.16",
+ "tokio",
+ "tracing",
+ "web-time",
+]
+
+[[package]]
+name = "quinn-proto"
+version = "0.11.13"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f1906b49b0c3bc04b5fe5d86a77925ae6524a19b816ae38ce1e426255f1d8a31"
+dependencies = [
+ "bytes",
+ "getrandom 0.3.3",
+ "lru-slab",
+ "rand",
+ "ring",
+ "rustc-hash",
+ "rustls",
+ "rustls-pki-types",
+ "slab",
+ "thiserror 2.0.16",
+ "tinyvec",
+ "tracing",
+ "web-time",
+]
+
+[[package]]
+name = "quinn-udp"
+version = "0.5.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "addec6a0dcad8a8d96a771f815f0eaf55f9d1805756410b39f5fa81332574cbd"
+dependencies = [
+ "cfg_aliases 0.2.1",
+ "libc",
+ "once_cell",
+ "socket2",
+ "tracing",
+ "windows-sys 0.52.0",
+]
+
 [[package]]
 name = "quote"
 version = "1.0.40"
@@ -3634,6 +3864,8 @@ dependencies = [
 "native-tls",
 "percent-encoding",
 "pin-project-lite",
+ "quinn",
+ "rustls",
 "rustls-pki-types",
 "serde",
 "serde_json",
@@ -3641,6 +3873,7 @@ dependencies = [
 "sync_wrapper",
 "tokio",
 "tokio-native-tls",
+ "tokio-rustls",
 "tokio-util",
 "tower",
 "tower-http",
@@ -3650,6 +3883,7 @@ dependencies = [
 "wasm-bindgen-futures",
 "wasm-streams",
 "web-sys",
+ "webpki-roots",
 ]

 [[package]]
@@ -3666,12 +3900,54 @@ dependencies = [
 "windows-sys 0.52.0",
 ]

+[[package]]
+name = "rmcp"
+version = "0.7.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "534fd1cd0601e798ac30545ff2b7f4a62c6f14edd4aaed1cc5eb1e85f69f09af"
+dependencies = [
+ "base64",
+ "chrono",
+ "futures",
+ "paste",
+ "pin-project-lite",
+ "process-wrap",
+ "rmcp-macros",
+ "schemars 1.0.4",
+ "serde",
+ "serde_json",
+ "thiserror 2.0.16",
+ "tokio",
+ "tokio-stream",
+ "tokio-util",
+ "tracing",
+]
+
+[[package]]
+name = "rmcp-macros"
+version = "0.7.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9ba777eb0e5f53a757e36f0e287441da0ab766564ba7201600eeb92a4753022e"
+dependencies = [
+ "darling 0.21.3",
+ "proc-macro2",
+ "quote",
+ "serde_json",
+ "syn 2.0.104",
+]
+
 [[package]]
 name = "rustc-demangle"
 version = "0.1.25"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "989e6739f80c4ad5b13e0fd7fe89531180375b18520cc8c82080e4dc4035b84f"

+[[package]]
+name = "rustc-hash"
+version = "2.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d"
+
 [[package]]
 name = "rustix"
 version = "0.38.44"
@@ -3705,6 +3981,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "2491382039b29b9b11ff08b76ff6c97cf287671dbb74f0be44bda389fffe9bd1"
 dependencies = [
 "once_cell",
+ "ring",
 "rustls-pki-types",
 "rustls-webpki",
 "subtle",
@@ -3717,6 +3994,7 @@ version = "1.12.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "229a4a4c221013e7e1f1a043678c5cc39fe5171437c88fb47151a21e6f5b5c79"
 dependencies = [
+ "web-time",
 "zeroize",
 ]

@@ -3751,7 +4029,7 @@ dependencies = [
 "libc",
 "log",
 "memchr",
- "nix",
+ "nix 0.28.0",
 "radix_trie",
 "unicode-segmentation",
 "unicode-width 0.1.14",
@@ -3832,7 +4110,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "3fbf2ae1b8bc8e02df939598064d22402220cd5bbcca1c76f7d6a310974d5615"
 dependencies = [
 "dyn-clone",
- "schemars_derive",
+ "schemars_derive 0.8.22",
 "serde",
 "serde_json",
 ]
@@ -3855,8 +4133,10 @@ version = "1.0.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "82d20c4491bc164fa2f6c5d44565947a52ad80b9505d8e36f8d54c27c739fcd0"
 dependencies = [
+ "chrono",
 "dyn-clone",
 "ref-cast",
+ "schemars_derive 1.0.4",
 "serde",
 "serde_json",
 ]
@@ -3873,6 +4153,18 @@ dependencies = [
 "syn 2.0.104",
 ]

+[[package]]
+name = "schemars_derive"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "33d020396d1d138dc19f1165df7545479dcd58d93810dc5d646a16e55abefa80"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "serde_derive_internals",
+ "syn 2.0.104",
+]
+
 [[package]]
 name = "scopeguard"
 version = "1.2.0"
@@ -3913,9 +4205,9 @@ dependencies = [

 [[package]]
 name = "serde"
-version = "1.0.224"
+version = "1.0.226"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6aaeb1e94f53b16384af593c71e20b095e958dab1d26939c1b70645c5cfbcc0b"
+checksum = "0dca6411025b24b60bfa7ec1fe1f8e710ac09782dca409ee8237ba74b51295fd"
 dependencies = [
 "serde_core",
 "serde_derive",
@@ -3923,18 +4215,18 @@ dependencies = [

 [[package]]
 name = "serde_core"
-version = "1.0.224"
+version = "1.0.226"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "32f39390fa6346e24defbcdd3d9544ba8a19985d0af74df8501fbfe9a64341ab"
+checksum = "ba2ba63999edb9dac981fb34b3e5c0d111a69b0924e253ed29d83f7c99e966a4"
 dependencies = [
 "serde_derive",
 ]

 [[package]]
 name = "serde_derive"
-version = "1.0.224"
+version = "1.0.226"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "87ff78ab5e8561c9a675bfc1785cb07ae721f0ee53329a595cefd8c04c2ac4e0"
+checksum = "8db53ae22f34573731bafa1db20f04027b2d25e02d8205921b569171699cdb33"
 dependencies = [
 "proc-macro2",
 "quote",
@@ -4024,7 +4316,7 @@ version = "3.14.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "de90945e6565ce0d9a25098082ed4ee4002e047cb59892c318d66821e14bb30f"
 dependencies = [
- "darling",
+ "darling 0.20.11",
 "proc-macro2",
 "quote",
 "syn 2.0.104",
@@ -4437,15 +4729,15 @@ dependencies = [

 [[package]]
 name = "tempfile"
-version = "3.20.0"
+version = "3.23.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e8a64e3985349f2441a1a9ef0b853f869006c3855f2cda6862a94d26ebb9d6a1"
+checksum = "2d31c77bdf42a745371d260a26ca7163f1e0924b64afa0b688e61b5a9fa02f16"
 dependencies = [
 "fastrand",
 "getrandom 0.3.3",
 "once_cell",
 "rustix 1.0.8",
- "windows-sys 0.59.0",
+ "windows-sys 0.60.2",
 ]

 [[package]]
@@ -4569,9 +4861,9 @@ dependencies = [

 [[package]]
 name = "time"
-version = "0.3.41"
+version = "0.3.44"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8a7619e19bc266e0f9c5e6686659d394bc57973859340060a69221e57dbc0c40"
+checksum = "91e7d9e3bb61134e77bde20dd4825b97c010155709965fedf0f49bb138e52a9d"
 dependencies = [
 "deranged",
 "itoa",
@@ -4586,15 +4878,15 @@ dependencies = [

 [[package]]
 name = "time-core"
-version = "0.1.4"
+version = "0.1.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c9e9a38711f559d9e3ce1cdb06dd7c5b8ea546bc90052da6d06bb76da74bb07c"
+checksum = "40868e7c1d2f0b8d73e4a8c7f0ff63af4f6d19be117e90bd73eb1d62cf831c6b"

 [[package]]
 name = "time-macros"
-version = "0.2.22"
+version = "0.2.24"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3526739392ec93fd8b359c8e98514cb3e8e021beb4e5f597b00a0221f8ed8a49"
+checksum = "30cfb0125f12d9c277f35663a0a33f8c30190f4e4574868a330595412d34ebf3"
 dependencies = [
 "num-conv",
 "time-core",
@@ -4631,6 +4923,21 @@ dependencies = [
 "zerovec",
 ]

+[[package]]
+name = "tinyvec"
+version = "1.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bfa5fdc3bce6191a1dbc8c02d5c8bffcf557bafa17c124c5264a458f1b0613fa"
+dependencies = [
+ "tinyvec_macros",
+]
+
+[[package]]
+name = "tinyvec_macros"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
+
 [[package]]
 name = "tokio"
 version = "1.47.1"
@@ -5243,6 +5550,16 @@ dependencies = [
 "wasm-bindgen",
 ]

+[[package]]
+name = "web-time"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb"
+dependencies = [
+ "js-sys",
+ "wasm-bindgen",
+]
+
 [[package]]
 name = "webbrowser"
 version = "1.0.5"
@@ -5259,6 +5576,15 @@ dependencies = [
 "web-sys",
 ]

+[[package]]
+name = "webpki-roots"
+version = "1.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7e8983c3ab33d6fb807cfcdad2491c4ea8cbc8ed839181c7dfd9c67c83e261b2"
+dependencies = [
+ "rustls-pki-types",
+]
+
 [[package]]
 name = "weezl"
 version = "0.1.10"
@@ -5314,6 +5640,28 @@ version = "0.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"

+[[package]]
+name = "windows"
+version = "0.61.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9babd3a767a4c1aef6900409f85f5d53ce2544ccdfaa86dad48c91782c6d6893"
+dependencies = [
+ "windows-collections",
+ "windows-core",
+ "windows-future",
+ "windows-link 0.1.3",
+ "windows-numerics",
+]
+
+[[package]]
+name = "windows-collections"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3beeceb5e5cfd9eb1d76b381630e82c4241ccd0d27f1a39ed41b2760b255c5e8"
+dependencies = [
+ "windows-core",
+]
+
 [[package]]
 name = "windows-core"
 version = "0.61.2"
@@ -5322,11 +5670,22 @@ checksum = "c0fdd3ddb90610c7638aa2b3a3ab2904fb9e5cdbecc643ddb3647212781c4ae3"
 dependencies = [
 "windows-implement",
 "windows-interface",
- "windows-link",
+ "windows-link 0.1.3",
 "windows-result",
 "windows-strings",
 ]

+[[package]]
+name = "windows-future"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fc6a41e98427b19fe4b73c550f060b59fa592d7d686537eebf9385621bfbad8e"
+dependencies = [
+ "windows-core",
+ "windows-link 0.1.3",
+ "windows-threading",
+]
+
 [[package]]
 name = "windows-implement"
 version = "0.60.0"
@@ -5355,13 +5714,29 @@ version = "0.1.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "5e6ad25900d524eaabdbbb96d20b4311e1e7ae1699af4fb28c17ae66c80d798a"

+[[package]]
+name = "windows-link"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "45e46c0661abb7180e7b9c281db115305d49ca1709ab8242adf09666d2173c65"
+
+[[package]]
+name = "windows-numerics"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9150af68066c4c5c07ddc0ce30421554771e528bde427614c61038bc2c92c2b1"
+dependencies = [
+ "windows-core",
+ "windows-link 0.1.3",
+]
+
 [[package]]
 name = "windows-registry"
 version = "0.5.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "5b8a9ed28765efc97bbc954883f4e6796c33a06546ebafacbabee9696967499e"
 dependencies = [
- "windows-link",
+ "windows-link 0.1.3",
 "windows-result",
 "windows-strings",
 ]
@@ -5372,7 +5747,7 @@ version = "0.3.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "56f42bd332cc6c8eac5af113fc0c1fd6a8fd2aa08a0119358686e5160d0586c6"
 dependencies = [
- "windows-link",
+ "windows-link 0.1.3",
 ]

 [[package]]
@@ -5381,7 +5756,7 @@ version = "0.4.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "56e6c93f3a0c3b36176cb1327a4958a0353d5d166c2a35cb268ace15e91d3b57"
 dependencies = [
- "windows-link",
+ "windows-link 0.1.3",
 ]

 [[package]]
@@ -5482,6 +5857,15 @@ dependencies = [
 "windows_x86_64_msvc 0.53.0",
 ]

+[[package]]
+name = "windows-threading"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b66463ad2e0ea3bbf808b7f1d371311c80e115c0b71d60efc142cafbcfb057a6"
+dependencies = [
+ "windows-link 0.1.3",
+]
+
 [[package]]
 name = "windows_aarch64_gnullvm"
 version = "0.42.2"
--- a/codex-rs/Cargo.toml
+++ b/codex-rs/Cargo.toml
@@ -18,7 +18,10 @@ members = [
    "ollama",
    "protocol",
    "protocol-ts",
+    "rmcp-client",
+    "responses-api-proxy",
    "tui",
+    "utils/readiness",
 ]
 resolver = "2"

@@ -47,8 +50,11 @@ codex-mcp-client = { path = "mcp-client" }
 codex-mcp-server = { path = "mcp-server" }
 codex-ollama = { path = "ollama" }
 codex-protocol = { path = "protocol" }
+codex-rmcp-client = { path = "rmcp-client" }
 codex-protocol-ts = { path = "protocol-ts" }
+codex-responses-api-proxy = { path = "responses-api-proxy" }
 codex-tui = { path = "tui" }
+codex-utils-readiness = { path = "utils/readiness" }
 core_test_support = { path = "core/tests/common" }
 mcp-types = { path = "mcp-types" }
 mcp_test_support = { path = "mcp-server/tests/common" }
@@ -62,13 +68,15 @@ askama = "0.12"
 assert_cmd = "2"
 async-channel = "2.3.1"
 async-stream = "0.3.6"
+async-trait = "0.1.89"
 base64 = "0.22.1"
 bytes = "1.10.1"
-chrono = "0.4.40"
+chrono = "0.4.42"
 clap = "4"
 clap_complete = "4"
 color-eyre = "0.6.3"
 crossterm = "0.28.1"
+ctor = "0.5.0"
 derive_more = "2"
 diffy = "0.4.2"
 dirs = "6"
@@ -76,11 +84,13 @@ dotenvy = "0.15.7"
 env-flags = "0.1.1"
 env_logger = "0.11.5"
 eventsource-stream = "0.2.3"
+escargot = "0.5"
 futures = "0.3"
 icu_decimal = "2.0.0"
 icu_locale_core = "2.0.0"
 ignore = "0.4.23"
 image = { version = "^0.25.8", default-features = false }
+indexmap = "2.6.0"
 insta = "1.43.2"
 itertools = "0.14.0"
 landlock = "0.4.1"
@@ -91,7 +101,6 @@ maplit = "1.0.2"
 mime_guess = "2.0.5"
 multimap = "0.10.0"
 nucleo-matcher = "0.3.1"
-once_cell = "1"
 openssl-sys = "*"
 os_info = "3.12.0"
 owo-colors = "4.2.0"
@@ -120,7 +129,7 @@ strum = "0.27.2"
 strum_macros = "0.27.2"
 supports-color = "3.0.2"
 sys-locale = "0.3.2"
-tempfile = "3.13.0"
+tempfile = "3.23.0"
 textwrap = "0.16.2"
 thiserror = "2.0.16"
 time = "0.3"
@@ -138,7 +147,7 @@ tree-sitter = "0.25.9"
 tree-sitter-bash = "0.25.0"
 ts-rs = "11"
 unicode-segmentation = "1.12.0"
-unicode-width = "0.1"
+unicode-width = "0.2"
 url = "2"
 urlencoding = "2.1"
 uuid = "1"
@@ -148,6 +157,7 @@ webbrowser = "1.0"
 which = "6"
 wildmatch = "2.5.0"
 wiremock = "0.6"
+zeroize = "1.8.1"

 [workspace.lints]
 rust = {}
@@ -190,7 +200,7 @@ unwrap_used = "deny"
 # cargo-shear cannot see the platform-specific openssl-sys usage, so we
 # silence the false positive here instead of deleting a real dependency.
 [workspace.metadata.cargo-shear]
-ignored = ["openssl-sys"]
+ignored = ["openssl-sys", "codex-utils-readiness"]

 [profile.release]
 lto = "fat"
--- a/codex-rs/apply-patch/Cargo.toml
+++ b/codex-rs/apply-patch/Cargo.toml
@@ -20,7 +20,6 @@ similar = { workspace = true }
 thiserror = { workspace = true }
 tree-sitter = { workspace = true }
 tree-sitter-bash = { workspace = true }
-once_cell = { workspace = true }

 [dev-dependencies]
 assert_cmd = { workspace = true }
--- a/codex-rs/apply-patch/src/lib.rs
+++ b/codex-rs/apply-patch/src/lib.rs
@@ -6,10 +6,10 @@ use std::collections::HashMap;
 use std::path::Path;
 use std::path::PathBuf;
 use std::str::Utf8Error;
+use std::sync::LazyLock;

 use anyhow::Context;
 use anyhow::Result;
-use once_cell::sync::Lazy;
 pub use parser::Hunk;
 pub use parser::ParseError;
 use parser::ParseError::*;
@@ -351,7 +351,7 @@ fn extract_apply_patch_from_bash(
    // also run an arbitrary query against the AST. This is useful for understanding
    // how tree-sitter parses the script and whether the query syntax is correct. Be sure
    // to test both positive and negative cases.
-    static APPLY_PATCH_QUERY: Lazy<Query> = Lazy::new(|| {
+    static APPLY_PATCH_QUERY: LazyLock<Query> = LazyLock::new(|| {
        let language = BASH.into();
        #[expect(clippy::expect_used)]
        Query::new(
--- a/codex-rs/cli/Cargo.toml
+++ b/codex-rs/cli/Cargo.toml
@@ -27,7 +27,9 @@ codex-login = { workspace = true }
 codex-mcp-server = { workspace = true }
 codex-protocol = { workspace = true }
 codex-protocol-ts = { workspace = true }
+codex-responses-api-proxy = { workspace = true }
 codex-tui = { workspace = true }
+ctor = { workspace = true }
 owo-colors = { workspace = true }
 serde_json = { workspace = true }
 supports-color = { workspace = true }
@@ -41,6 +43,15 @@ tokio = { workspace = true, features = [
 tracing = { workspace = true }
 tracing-subscriber = { workspace = true }

+[target.'cfg(target_os = "linux")'.dependencies]
+libc = { workspace = true }
+
+[target.'cfg(target_os = "android")'.dependencies]
+libc = { workspace = true }
+
+[target.'cfg(target_os = "macos")'.dependencies]
+libc = { workspace = true }
+
 [dev-dependencies]
 assert_cmd = { workspace = true }
 predicates = { workspace = true }
--- a/codex-rs/cli/src/main.rs
+++ b/codex-rs/cli/src/main.rs
@@ -1,3 +1,4 @@
+use anyhow::Context;
 use clap::CommandFactory;
 use clap::Parser;
 use clap_complete::Shell;
@@ -14,6 +15,7 @@ use codex_cli::login::run_logout;
 use codex_cli::proto;
 use codex_common::CliConfigOverrides;
 use codex_exec::Cli as ExecCli;
+use codex_responses_api_proxy::Args as ResponsesApiProxyArgs;
 use codex_tui::AppExitInfo;
 use codex_tui::Cli as TuiCli;
 use owo_colors::OwoColorize;
@@ -21,6 +23,7 @@ use std::path::PathBuf;
 use supports_color::Stream;

 mod mcp_cmd;
+mod pre_main_hardening;

 use crate::mcp_cmd::McpCli;
 use crate::proto::ProtoCli;
@@ -85,6 +88,10 @@ enum Subcommand {
    /// Internal: generate TypeScript protocol bindings.
    #[clap(hide = true)]
    GenerateTs(GenerateTsCommand),
+
+    /// Internal: run the responses API proxy.
+    #[clap(hide = true)]
+    ResponsesApiProxy(ResponsesApiProxyArgs),
 }

 #[derive(Debug, Parser)]
@@ -194,6 +201,34 @@ fn print_exit_messages(exit_info: AppExitInfo) {
    }
 }

+pub(crate) const CODEX_SECURE_MODE_ENV_VAR: &str = "CODEX_SECURE_MODE";
+
+/// As early as possible in the process lifecycle, apply hardening measures
+/// if the CODEX_SECURE_MODE environment variable is set to "1".
+#[ctor::ctor]
+fn pre_main_hardening() {
+    let secure_mode = match std::env::var(CODEX_SECURE_MODE_ENV_VAR) {
+        Ok(value) => value,
+        Err(_) => return,
+    };
+
+    if secure_mode == "1" {
+        #[cfg(any(target_os = "linux", target_os = "android"))]
+        crate::pre_main_hardening::pre_main_hardening_linux();
+
+        #[cfg(target_os = "macos")]
+        crate::pre_main_hardening::pre_main_hardening_macos();
+
+        #[cfg(windows)]
+        crate::pre_main_hardening::pre_main_hardening_windows();
+    }
+
+    // Always clear this env var so child processes don't inherit it.
+    unsafe {
+        std::env::remove_var(CODEX_SECURE_MODE_ENV_VAR);
+    }
+}
+
 fn main() -> anyhow::Result<()> {
    arg0_dispatch_or_else(|codex_linux_sandbox_exe| async move {
        cli_main(codex_linux_sandbox_exe).await?;
@@ -312,6 +347,11 @@ async fn cli_main(codex_linux_sandbox_exe: Option<PathBuf>) -> anyhow::Result<()
        Some(Subcommand::GenerateTs(gen_cli)) => {
            codex_protocol_ts::generate_ts(&gen_cli.out_dir, gen_cli.prettier.as_deref())?;
        }
+        Some(Subcommand::ResponsesApiProxy(args)) => {
+            tokio::task::spawn_blocking(move || codex_responses_api_proxy::run_main(args))
+                .await
+                .context("responses-api-proxy blocking task panicked")??;
+        }
    }

    Ok(())
--- a/codex-rs/cli/src/pre_main_hardening.rs
+++ b/codex-rs/cli/src/pre_main_hardening.rs
@@ -0,0 +1,98 @@
+#[cfg(any(target_os = "linux", target_os = "android"))]
+const PRCTL_FAILED_EXIT_CODE: i32 = 5;
+
+#[cfg(target_os = "macos")]
+const PTRACE_DENY_ATTACH_FAILED_EXIT_CODE: i32 = 6;
+
+#[cfg(any(target_os = "linux", target_os = "android", target_os = "macos"))]
+const SET_RLIMIT_CORE_FAILED_EXIT_CODE: i32 = 7;
+
+#[cfg(any(target_os = "linux", target_os = "android"))]
+pub(crate) fn pre_main_hardening_linux() {
+    // Disable ptrace attach / mark process non-dumpable.
+    let ret_code = unsafe { libc::prctl(libc::PR_SET_DUMPABLE, 0, 0, 0, 0) };
+    if ret_code != 0 {
+        eprintln!(
+            "ERROR: prctl(PR_SET_DUMPABLE, 0) failed: {}",
+            std::io::Error::last_os_error()
+        );
+        std::process::exit(PRCTL_FAILED_EXIT_CODE);
+    }
+
+    // For "defense in depth," set the core file size limit to 0.
+    set_core_file_size_limit_to_zero();
+
+    // Official Codex releases are MUSL-linked, which means that variables such
+    // as LD_PRELOAD are ignored anyway, but just to be sure, clear them here.
+    let ld_keys: Vec<String> = std::env::vars()
+        .filter_map(|(key, _)| {
+            if key.starts_with("LD_") {
+                Some(key)
+            } else {
+                None
+            }
+        })
+        .collect();
+
+    for key in ld_keys {
+        unsafe {
+            std::env::remove_var(key);
+        }
+    }
+}
+
+#[cfg(target_os = "macos")]
+pub(crate) fn pre_main_hardening_macos() {
+    // Prevent debuggers from attaching to this process.
+    let ret_code = unsafe { libc::ptrace(libc::PT_DENY_ATTACH, 0, std::ptr::null_mut(), 0) };
+    if ret_code == -1 {
+        eprintln!(
+            "ERROR: ptrace(PT_DENY_ATTACH) failed: {}",
+            std::io::Error::last_os_error()
+        );
+        std::process::exit(PTRACE_DENY_ATTACH_FAILED_EXIT_CODE);
+    }
+
+    // Set the core file size limit to 0 to prevent core dumps.
+    set_core_file_size_limit_to_zero();
+
+    // Remove all DYLD_ environment variables, which can be used to subvert
+    // library loading.
+    let dyld_keys: Vec<String> = std::env::vars()
+        .filter_map(|(key, _)| {
+            if key.starts_with("DYLD_") {
+                Some(key)
+            } else {
+                None
+            }
+        })
+        .collect();
+
+    for key in dyld_keys {
+        unsafe {
+            std::env::remove_var(key);
+        }
+    }
+}
+
+#[cfg(unix)]
+fn set_core_file_size_limit_to_zero() {
+    let rlim = libc::rlimit {
+        rlim_cur: 0,
+        rlim_max: 0,
+    };
+
+    let ret_code = unsafe { libc::setrlimit(libc::RLIMIT_CORE, &rlim) };
+    if ret_code != 0 {
+        eprintln!(
+            "ERROR: setrlimit(RLIMIT_CORE) failed: {}",
+            std::io::Error::last_os_error()
+        );
+        std::process::exit(SET_RLIMIT_CORE_FAILED_EXIT_CODE);
+    }
+}
+
+#[cfg(windows)]
+pub(crate) fn pre_main_hardening_windows() {
+    // TODO(mbolin): Perform the appropriate configuration for Windows.
+}
--- a/codex-rs/code
+++ b/codex-rs/code
--- a/codex-rs/core/Cargo.toml
+++ b/codex-rs/core/Cargo.toml
@@ -15,17 +15,20 @@ workspace = true
 anyhow = { workspace = true }
 askama = { workspace = true }
 async-channel = { workspace = true }
+async-trait = { workspace = true }
 base64 = { workspace = true }
 bytes = { workspace = true }
 chrono = { workspace = true, features = ["serde"] }
 codex-apply-patch = { workspace = true }
 codex-file-search = { workspace = true }
 codex-mcp-client = { workspace = true }
+codex-rmcp-client = { workspace = true }
 codex-protocol = { workspace = true }
 dirs = { workspace = true }
 env-flags = { workspace = true }
 eventsource-stream = { workspace = true }
 futures = { workspace = true }
+indexmap = { workspace = true }
 libc = { workspace = true }
 mcp-types = { workspace = true }
 os_info = { workspace = true }
@@ -80,6 +83,7 @@ openssl-sys = { workspace = true, features = ["vendored"] }
 [dev-dependencies]
 assert_cmd = { workspace = true }
 core_test_support = { workspace = true }
+escargot = { workspace = true }
 maplit = { workspace = true }
 predicates = { workspace = true }
 pretty_assertions = { workspace = true }
--- a/codex-rs/core/src/apply_patch.rs
+++ b/codex-rs/core/src/apply_patch.rs
@@ -1,13 +1,12 @@
 use crate::codex::Session;
 use crate::codex::TurnContext;
+use crate::function_tool::FunctionCallError;
 use crate::protocol::FileChange;
 use crate::protocol::ReviewDecision;
 use crate::safety::SafetyCheck;
 use crate::safety::assess_patch_safety;
 use codex_apply_patch::ApplyPatchAction;
 use codex_apply_patch::ApplyPatchFileChange;
-use codex_protocol::models::FunctionCallOutputPayload;
-use codex_protocol::models::ResponseInputItem;
 use std::collections::HashMap;
 use std::path::PathBuf;

@@ -17,7 +16,7 @@ pub(crate) enum InternalApplyPatchInvocation {
    /// The `apply_patch` call was handled programmatically, without any sort
    /// of sandbox, because the user explicitly approved it. This is the
    /// result to use with the `shell` function call that contained `apply_patch`.
-    Output(ResponseInputItem),
+    Output(Result<String, FunctionCallError>),

    /// The `apply_patch` call was approved, either automatically because it
    /// appears that it should be allowed based on the user's sandbox policy
@@ -33,12 +32,6 @@ pub(crate) struct ApplyPatchExec {
    pub(crate) user_explicitly_approved_this_action: bool,
 }

-impl From<ResponseInputItem> for InternalApplyPatchInvocation {
-    fn from(item: ResponseInputItem) -> Self {
-        InternalApplyPatchInvocation::Output(item)
-    }
-}
-
 pub(crate) async fn apply_patch(
    sess: &Session,
    turn_context: &TurnContext,
@@ -77,25 +70,15 @@ pub(crate) async fn apply_patch(
                    })
                }
                ReviewDecision::Denied | ReviewDecision::Abort => {
-                    ResponseInputItem::FunctionCallOutput {
-                        call_id: call_id.to_owned(),
-                        output: FunctionCallOutputPayload {
-                            content: "patch rejected by user".to_string(),
-                            success: Some(false),
-                        },
-                    }
-                    .into()
+                    InternalApplyPatchInvocation::Output(Err(FunctionCallError::RespondToModel(
+                        "patch rejected by user".to_string(),
+                    )))
                }
            }
        }
-        SafetyCheck::Reject { reason } => ResponseInputItem::FunctionCallOutput {
-            call_id: call_id.to_owned(),
-            output: FunctionCallOutputPayload {
-                content: format!("patch rejected: {reason}"),
-                success: Some(false),
-            },
-        }
-        .into(),
+        SafetyCheck::Reject { reason } => InternalApplyPatchInvocation::Output(Err(
+            FunctionCallError::RespondToModel(format!("patch rejected: {reason}")),
+        )),
    }
 }

--- a/codex-rs/core/src/auth.rs
+++ b/codex-rs/core/src/auth.rs
@@ -267,6 +267,9 @@ pub fn try_read_auth_json(auth_file: &Path) -> std::io::Result<AuthDotJson> {
 }

 pub fn write_auth_json(auth_file: &Path, auth_dot_json: &AuthDotJson) -> std::io::Result<()> {
+    if let Some(parent) = auth_file.parent() {
+        std::fs::create_dir_all(parent)?;
+    }
    let json_data = serde_json::to_string_pretty(auth_dot_json)?;
    let mut options = OpenOptions::new();
    options.truncate(true).write(true).create(true);
--- a/codex-rs/core/src/bash.rs
+++ b/codex-rs/core/src/bash.rs
@@ -88,6 +88,21 @@ pub fn try_parse_word_only_commands_sequence(tree: &Tree, src: &str) -> Option<V
    Some(commands)
 }

+/// Returns the sequence of plain commands within a `bash -lc "..."` invocation
+/// when the script only contains word-only commands joined by safe operators.
+pub fn parse_bash_lc_plain_commands(command: &[String]) -> Option<Vec<Vec<String>>> {
+    let [bash, flag, script] = command else {
+        return None;
+    };
+
+    if bash != "bash" || flag != "-lc" {
+        return None;
+    }
+
+    let tree = try_parse_bash(script)?;
+    try_parse_word_only_commands_sequence(&tree, script)
+}
+
 fn parse_plain_command_from_node(cmd: tree_sitter::Node, src: &str) -> Option<Vec<String>> {
    if cmd.kind() != "command" {
        return None;
--- a/codex-rs/core/src/chat_completions.rs
+++ b/codex-rs/core/src/chat_completions.rs
@@ -35,6 +35,12 @@ pub(crate) async fn stream_chat_completions(
    client: &reqwest::Client,
    provider: &ModelProviderInfo,
 ) -> Result<ResponseStream> {
+    if prompt.output_schema.is_some() {
+        return Err(CodexErr::UnsupportedOperation(
+            "output_schema is not supported for Chat Completions API".to_string(),
+        ));
+    }
+
    // Build messages array
    let mut messages = Vec::<serde_json::Value>::new();

--- a/codex-rs/core/src/client.rs
+++ b/codex-rs/core/src/client.rs
@@ -42,7 +42,8 @@ use crate::model_provider_info::ModelProviderInfo;
 use crate::model_provider_info::WireApi;
 use crate::openai_model_info::get_model_info;
 use crate::openai_tools::create_tools_json_for_responses_api;
-use crate::protocol::RateLimitSnapshotEvent;
+use crate::protocol::RateLimitSnapshot;
+use crate::protocol::RateLimitWindow;
 use crate::protocol::TokenUsage;
 use crate::token_data::PlanType;
 use crate::util::backoff;
@@ -183,19 +184,23 @@ impl ModelClient {

        let input_with_instructions = prompt.get_formatted_input();

-        // Only include `text.verbosity` for GPT-5 family models
-        let text = if self.config.model_family.family == "gpt-5" {
-            create_text_param_for_request(self.config.model_verbosity)
-        } else {
-            if self.config.model_verbosity.is_some() {
-                warn!(
-                    "model_verbosity is set but ignored for non-gpt-5 model family: {}",
-                    self.config.model_family.family
-                );
+        let verbosity = match &self.config.model_family.family {
+            family if family == "gpt-5" => self.config.model_verbosity,
+            _ => {
+                if self.config.model_verbosity.is_some() {
+                    warn!(
+                        "model_verbosity is set but ignored for non-gpt-5 model family: {}",
+                        self.config.model_family.family
+                    );
+                }
+
+                None
            }
-            None
        };

+        // Only include `text.verbosity` for GPT-5 family models
+        let text = create_text_param_for_request(verbosity, &prompt.output_schema);
+
        // In general, we want to explicitly send `store: false` when using the Responses API,
        // but in practice, the Azure Responses API rejects `store: false`:
        //
@@ -224,153 +229,169 @@ impl ModelClient {
        if azure_workaround {
            attach_item_ids(&mut payload_json, &input_with_instructions);
        }
-        let payload_body = serde_json::to_string(&payload_json)?;

-        let mut attempt = 0;
-        let max_retries = self.provider.request_max_retries();
-
-        loop {
-            attempt += 1;
-
-            // Always fetch the latest auth in case a prior attempt refreshed the token.
-            let auth = auth_manager.as_ref().and_then(|m| m.auth());
-
-            trace!(
-                "POST to {}: {}",
-                self.provider.get_full_url(&auth),
-                payload_body.as_str()
-            );
-
-            let mut req_builder = self
-                .provider
-                .create_request_builder(&self.client, &auth)
-                .await?;
-
-            req_builder = req_builder
-                .header("OpenAI-Beta", "responses=experimental")
-                // Send session_id for compatibility.
-                .header("conversation_id", self.conversation_id.to_string())
-                .header("session_id", self.conversation_id.to_string())
-                .header(reqwest::header::ACCEPT, "text/event-stream")
-                .json(&payload_json);
-
-            if let Some(auth) = auth.as_ref()
-                && auth.mode == AuthMode::ChatGPT
-                && let Some(account_id) = auth.get_account_id()
+        let max_attempts = self.provider.request_max_retries();
+        for attempt in 0..=max_attempts {
+            match self
+                .attempt_stream_responses(&payload_json, &auth_manager)
+                .await
            {
-                req_builder = req_builder.header("chatgpt-account-id", account_id);
-            }
-
-            let res = req_builder.send().await;
-            if let Ok(resp) = &res {
-                trace!(
-                    "Response status: {}, cf-ray: {}",
-                    resp.status(),
-                    resp.headers()
-                        .get("cf-ray")
-                        .map(|v| v.to_str().unwrap_or_default())
-                        .unwrap_or_default()
-                );
-            }
-
-            match res {
-                Ok(resp) if resp.status().is_success() => {
-                    let (tx_event, rx_event) = mpsc::channel::<Result<ResponseEvent>>(1600);
-
-                    if let Some(snapshot) = parse_rate_limit_snapshot(resp.headers())
-                        && tx_event
-                            .send(Ok(ResponseEvent::RateLimits(snapshot)))
-                            .await
-                            .is_err()
-                    {
-                        debug!("receiver dropped rate limit snapshot event");
-                    }
-
-                    // spawn task to process SSE
-                    let stream = resp.bytes_stream().map_err(CodexErr::Reqwest);
-                    tokio::spawn(process_sse(
-                        stream,
-                        tx_event,
-                        self.provider.stream_idle_timeout(),
-                    ));
-
-                    return Ok(ResponseStream { rx_event });
+                Ok(stream) => {
+                    return Ok(stream);
                }
-                Ok(res) => {
-                    let status = res.status();
-
-                    // Pull out Retry‑After header if present.
-                    let retry_after_secs = res
-                        .headers()
-                        .get(reqwest::header::RETRY_AFTER)
-                        .and_then(|v| v.to_str().ok())
-                        .and_then(|s| s.parse::<u64>().ok());
-
-                    if status == StatusCode::UNAUTHORIZED
-                        && let Some(manager) = auth_manager.as_ref()
-                        && manager.auth().is_some()
-                    {
-                        let _ = manager.refresh_token().await;
+                Err(StreamAttemptError::Fatal(e)) => {
+                    return Err(e);
+                }
+                Err(retryable_attempt_error) => {
+                    if attempt == max_attempts {
+                        return Err(retryable_attempt_error.into_error());
                    }

-                    // The OpenAI Responses endpoint returns structured JSON bodies even for 4xx/5xx
-                    // errors. When we bubble early with only the HTTP status the caller sees an opaque
-                    // "unexpected status 400 Bad Request" which makes debugging nearly impossible.
-                    // Instead, read (and include) the response text so higher layers and users see the
-                    // exact error message (e.g. "Unknown parameter: 'input[0].metadata'"). The body is
-                    // small and this branch only runs on error paths so the extra allocation is
-                    // negligible.
-                    if !(status == StatusCode::TOO_MANY_REQUESTS
-                        || status == StatusCode::UNAUTHORIZED
-                        || status.is_server_error())
-                    {
-                        // Surface the error body to callers. Use `unwrap_or_default` per Clippy.
-                        let body = res.text().await.unwrap_or_default();
-                        return Err(CodexErr::UnexpectedStatus(status, body));
-                    }
+                    tokio::time::sleep(retryable_attempt_error.delay(attempt)).await;
+                }
+            }
+        }

-                    if status == StatusCode::TOO_MANY_REQUESTS {
-                        let body = res.json::<ErrorResponse>().await.ok();
-                        if let Some(ErrorResponse { error }) = body {
-                            if error.r#type.as_deref() == Some("usage_limit_reached") {
-                                // Prefer the plan_type provided in the error message if present
-                                // because it's more up to date than the one encoded in the auth
-                                // token.
-                                let plan_type = error
-                                    .plan_type
-                                    .or_else(|| auth.as_ref().and_then(CodexAuth::get_plan_type));
-                                let resets_in_seconds = error.resets_in_seconds;
-                                return Err(CodexErr::UsageLimitReached(UsageLimitReachedError {
-                                    plan_type,
-                                    resets_in_seconds,
-                                }));
-                            } else if error.r#type.as_deref() == Some("usage_not_included") {
-                                return Err(CodexErr::UsageNotIncluded);
-                            }
+        unreachable!("stream_responses_attempt should always return");
+    }
+
+    /// Single attempt to start a streaming Responses API call.
+    async fn attempt_stream_responses(
+        &self,
+        payload_json: &Value,
+        auth_manager: &Option<Arc<AuthManager>>,
+    ) -> std::result::Result<ResponseStream, StreamAttemptError> {
+        // Always fetch the latest auth in case a prior attempt refreshed the token.
+        let auth = auth_manager.as_ref().and_then(|m| m.auth());
+
+        trace!(
+            "POST to {}: {:?}",
+            self.provider.get_full_url(&auth),
+            serde_json::to_string(payload_json)
+        );
+
+        let mut req_builder = self
+            .provider
+            .create_request_builder(&self.client, &auth)
+            .await
+            .map_err(StreamAttemptError::Fatal)?;
+
+        req_builder = req_builder
+            .header("OpenAI-Beta", "responses=experimental")
+            // Send session_id for compatibility.
+            .header("conversation_id", self.conversation_id.to_string())
+            .header("session_id", self.conversation_id.to_string())
+            .header(reqwest::header::ACCEPT, "text/event-stream")
+            .json(payload_json);
+
+        if let Some(auth) = auth.as_ref()
+            && auth.mode == AuthMode::ChatGPT
+            && let Some(account_id) = auth.get_account_id()
+        {
+            req_builder = req_builder.header("chatgpt-account-id", account_id);
+        }
+
+        let res = req_builder.send().await;
+        if let Ok(resp) = &res {
+            trace!(
+                "Response status: {}, cf-ray: {}",
+                resp.status(),
+                resp.headers()
+                    .get("cf-ray")
+                    .map(|v| v.to_str().unwrap_or_default())
+                    .unwrap_or_default()
+            );
+        }
+
+        match res {
+            Ok(resp) if resp.status().is_success() => {
+                let (tx_event, rx_event) = mpsc::channel::<Result<ResponseEvent>>(1600);
+
+                if let Some(snapshot) = parse_rate_limit_snapshot(resp.headers())
+                    && tx_event
+                        .send(Ok(ResponseEvent::RateLimits(snapshot)))
+                        .await
+                        .is_err()
+                {
+                    debug!("receiver dropped rate limit snapshot event");
+                }
+
+                // spawn task to process SSE
+                let stream = resp.bytes_stream().map_err(CodexErr::Reqwest);
+                tokio::spawn(process_sse(
+                    stream,
+                    tx_event,
+                    self.provider.stream_idle_timeout(),
+                ));
+
+                Ok(ResponseStream { rx_event })
+            }
+            Ok(res) => {
+                let status = res.status();
+
+                // Pull out Retry‑After header if present.
+                let retry_after_secs = res
+                    .headers()
+                    .get(reqwest::header::RETRY_AFTER)
+                    .and_then(|v| v.to_str().ok())
+                    .and_then(|s| s.parse::<u64>().ok());
+                let retry_after = retry_after_secs.map(|s| Duration::from_millis(s * 1_000));
+
+                if status == StatusCode::UNAUTHORIZED
+                    && let Some(manager) = auth_manager.as_ref()
+                    && manager.auth().is_some()
+                {
+                    let _ = manager.refresh_token().await;
+                }
+
+                // The OpenAI Responses endpoint returns structured JSON bodies even for 4xx/5xx
+                // errors. When we bubble early with only the HTTP status the caller sees an opaque
+                // "unexpected status 400 Bad Request" which makes debugging nearly impossible.
+                // Instead, read (and include) the response text so higher layers and users see the
+                // exact error message (e.g. "Unknown parameter: 'input[0].metadata'"). The body is
+                // small and this branch only runs on error paths so the extra allocation is
+                // negligible.
+                if !(status == StatusCode::TOO_MANY_REQUESTS
+                    || status == StatusCode::UNAUTHORIZED
+                    || status.is_server_error())
+                {
+                    // Surface the error body to callers. Use `unwrap_or_default` per Clippy.
+                    let body = res.text().await.unwrap_or_default();
+                    return Err(StreamAttemptError::Fatal(CodexErr::UnexpectedStatus(
+                        status, body,
+                    )));
+                }
+
+                if status == StatusCode::TOO_MANY_REQUESTS {
+                    let rate_limit_snapshot = parse_rate_limit_snapshot(res.headers());
+                    let body = res.json::<ErrorResponse>().await.ok();
+                    if let Some(ErrorResponse { error }) = body {
+                        if error.r#type.as_deref() == Some("usage_limit_reached") {
+                            // Prefer the plan_type provided in the error message if present
+                            // because it's more up to date than the one encoded in the auth
+                            // token.
+                            let plan_type = error
+                                .plan_type
+                                .or_else(|| auth.as_ref().and_then(CodexAuth::get_plan_type));
+                            let resets_in_seconds = error.resets_in_seconds;
+                            let codex_err = CodexErr::UsageLimitReached(UsageLimitReachedError {
+                                plan_type,
+                                resets_in_seconds,
+                                rate_limits: rate_limit_snapshot,
+                            });
+                            return Err(StreamAttemptError::Fatal(codex_err));
+                        } else if error.r#type.as_deref() == Some("usage_not_included") {
+                            return Err(StreamAttemptError::Fatal(CodexErr::UsageNotIncluded));
                        }
                    }
-
-                    if attempt > max_retries {
-                        if status == StatusCode::INTERNAL_SERVER_ERROR {
-                            return Err(CodexErr::InternalServerError);
-                        }
-
-                        return Err(CodexErr::RetryLimit(status));
-                    }
-
-                    let delay = retry_after_secs
-                        .map(|s| Duration::from_millis(s * 1_000))
-                        .unwrap_or_else(|| backoff(attempt));
-                    tokio::time::sleep(delay).await;
-                }
-                Err(e) => {
-                    if attempt > max_retries {
-                        return Err(e.into());
-                    }
-                    let delay = backoff(attempt);
-                    tokio::time::sleep(delay).await;
                }
+
+                Err(StreamAttemptError::RetryableHttpError {
+                    status,
+                    retry_after,
+                })
            }
+            Err(e) => Err(StreamAttemptError::RetryableTransportError(e.into())),
        }
    }

@@ -403,6 +424,47 @@ impl ModelClient {
    }
 }

+enum StreamAttemptError {
+    RetryableHttpError {
+        status: StatusCode,
+        retry_after: Option<Duration>,
+    },
+    RetryableTransportError(CodexErr),
+    Fatal(CodexErr),
+}
+
+impl StreamAttemptError {
+    /// attempt is 0-based.
+    fn delay(&self, attempt: u64) -> Duration {
+        // backoff() uses 1-based attempts.
+        let backoff_attempt = attempt + 1;
+        match self {
+            Self::RetryableHttpError { retry_after, .. } => {
+                retry_after.unwrap_or_else(|| backoff(backoff_attempt))
+            }
+            Self::RetryableTransportError { .. } => backoff(backoff_attempt),
+            Self::Fatal(_) => {
+                // Should not be called on Fatal errors.
+                Duration::from_secs(0)
+            }
+        }
+    }
+
+    fn into_error(self) -> CodexErr {
+        match self {
+            Self::RetryableHttpError { status, .. } => {
+                if status == StatusCode::INTERNAL_SERVER_ERROR {
+                    CodexErr::InternalServerError
+                } else {
+                    CodexErr::RetryLimit(status)
+                }
+            }
+            Self::RetryableTransportError(error) => error,
+            Self::Fatal(error) => error,
+        }
+    }
+}
+
 #[derive(Debug, Deserialize, Serialize)]
 struct SseEvent {
    #[serde(rename = "type")]
@@ -412,9 +474,6 @@ struct SseEvent {
    delta: Option<String>,
 }

-#[derive(Debug, Deserialize)]
-struct ResponseCreated {}
-
 #[derive(Debug, Deserialize)]
 struct ResponseCompleted {
    id: String,
@@ -485,20 +544,45 @@ fn attach_item_ids(payload_json: &mut Value, original_items: &[ResponseItem]) {
    }
 }

-fn parse_rate_limit_snapshot(headers: &HeaderMap) -> Option<RateLimitSnapshotEvent> {
-    let primary_used_percent = parse_header_f64(headers, "x-codex-primary-used-percent")?;
-    let secondary_used_percent = parse_header_f64(headers, "x-codex-secondary-used-percent")?;
-    let primary_to_secondary_ratio_percent =
-        parse_header_f64(headers, "x-codex-primary-over-secondary-limit-percent")?;
-    let primary_window_minutes = parse_header_u64(headers, "x-codex-primary-window-minutes")?;
-    let secondary_window_minutes = parse_header_u64(headers, "x-codex-secondary-window-minutes")?;
+fn parse_rate_limit_snapshot(headers: &HeaderMap) -> Option<RateLimitSnapshot> {
+    let primary = parse_rate_limit_window(
+        headers,
+        "x-codex-primary-used-percent",
+        "x-codex-primary-window-minutes",
+        "x-codex-primary-reset-after-seconds",
+    );

-    Some(RateLimitSnapshotEvent {
-        primary_used_percent,
-        secondary_used_percent,
-        primary_to_secondary_ratio_percent,
-        primary_window_minutes,
-        secondary_window_minutes,
+    let secondary = parse_rate_limit_window(
+        headers,
+        "x-codex-secondary-used-percent",
+        "x-codex-secondary-window-minutes",
+        "x-codex-secondary-reset-after-seconds",
+    );
+
+    Some(RateLimitSnapshot { primary, secondary })
+}
+
+fn parse_rate_limit_window(
+    headers: &HeaderMap,
+    used_percent_header: &str,
+    window_minutes_header: &str,
+    resets_header: &str,
+) -> Option<RateLimitWindow> {
+    let used_percent: Option<f64> = parse_header_f64(headers, used_percent_header);
+
+    used_percent.and_then(|used_percent| {
+        let window_minutes = parse_header_u64(headers, window_minutes_header);
+        let resets_in_seconds = parse_header_u64(headers, resets_header);
+
+        let has_data = used_percent != 0.0
+            || window_minutes.is_some_and(|minutes| minutes != 0)
+            || resets_in_seconds.is_some_and(|seconds| seconds != 0);
+
+        has_data.then_some(RateLimitWindow {
+            used_percent,
+            window_minutes,
+            resets_in_seconds,
+        })
    })
 }

--- a/codex-rs/core/src/client_common.rs
+++ b/codex-rs/core/src/client_common.rs
@@ -1,7 +1,7 @@
 use crate::error::Result;
 use crate::model_family::ModelFamily;
 use crate::openai_tools::OpenAiTool;
-use crate::protocol::RateLimitSnapshotEvent;
+use crate::protocol::RateLimitSnapshot;
 use crate::protocol::TokenUsage;
 use codex_apply_patch::APPLY_PATCH_TOOL_INSTRUCTIONS;
 use codex_protocol::config_types::ReasoningEffort as ReasoningEffortConfig;
@@ -10,6 +10,7 @@ use codex_protocol::config_types::Verbosity as VerbosityConfig;
 use codex_protocol::models::ResponseItem;
 use futures::Stream;
 use serde::Serialize;
+use serde_json::Value;
 use std::borrow::Cow;
 use std::ops::Deref;
 use std::pin::Pin;
@@ -32,6 +33,9 @@ pub struct Prompt {

    /// Optional override for the built-in BASE_INSTRUCTIONS.
    pub base_instructions_override: Option<String>,
+
+    /// Optional the output schema for the model's response.
+    pub output_schema: Option<Value>,
 }

 impl Prompt {
@@ -79,7 +83,7 @@ pub enum ResponseEvent {
    WebSearchCallBegin {
        call_id: String,
    },
-    RateLimits(RateLimitSnapshotEvent),
+    RateLimits(RateLimitSnapshot),
 }

 #[derive(Debug, Serialize)]
@@ -90,14 +94,31 @@ pub(crate) struct Reasoning {
    pub(crate) summary: Option<ReasoningSummaryConfig>,
 }

+#[derive(Debug, Serialize, Default, Clone)]
+#[serde(rename_all = "snake_case")]
+pub(crate) enum TextFormatType {
+    #[default]
+    JsonSchema,
+}
+
+#[derive(Debug, Serialize, Default, Clone)]
+pub(crate) struct TextFormat {
+    pub(crate) r#type: TextFormatType,
+    pub(crate) strict: bool,
+    pub(crate) schema: Value,
+    pub(crate) name: String,
+}
+
 /// Controls under the `text` field in the Responses API for GPT-5.
-#[derive(Debug, Serialize, Default, Clone, Copy)]
+#[derive(Debug, Serialize, Default, Clone)]
 pub(crate) struct TextControls {
    #[serde(skip_serializing_if = "Option::is_none")]
    pub(crate) verbosity: Option<OpenAiVerbosity>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub(crate) format: Option<TextFormat>,
 }

-#[derive(Debug, Serialize, Default, Clone, Copy)]
+#[derive(Debug, Serialize, Default, Clone)]
 #[serde(rename_all = "lowercase")]
 pub(crate) enum OpenAiVerbosity {
    Low,
@@ -156,9 +177,20 @@ pub(crate) fn create_reasoning_param_for_request(

 pub(crate) fn create_text_param_for_request(
    verbosity: Option<VerbosityConfig>,
+    output_schema: &Option<Value>,
 ) -> Option<TextControls> {
-    verbosity.map(|v| TextControls {
-        verbosity: Some(v.into()),
+    if verbosity.is_none() && output_schema.is_none() {
+        return None;
+    }
+
+    Some(TextControls {
+        verbosity: verbosity.map(std::convert::Into::into),
+        format: output_schema.as_ref().map(|schema| TextFormat {
+            r#type: TextFormatType::JsonSchema,
+            strict: true,
+            schema: schema.clone(),
+            name: "codex_output_schema".to_string(),
+        }),
    })
 }

@@ -255,6 +287,7 @@ mod tests {
            prompt_cache_key: None,
            text: Some(TextControls {
                verbosity: Some(OpenAiVerbosity::Low),
+                format: None,
            }),
        };

@@ -267,6 +300,52 @@ mod tests {
        );
    }

+    #[test]
+    fn serializes_text_schema_with_strict_format() {
+        let input: Vec<ResponseItem> = vec![];
+        let tools: Vec<serde_json::Value> = vec![];
+        let schema = serde_json::json!({
+            "type": "object",
+            "properties": {
+                "answer": {"type": "string"}
+            },
+            "required": ["answer"],
+        });
+        let text_controls =
+            create_text_param_for_request(None, &Some(schema.clone())).expect("text controls");
+
+        let req = ResponsesApiRequest {
+            model: "gpt-5",
+            instructions: "i",
+            input: &input,
+            tools: &tools,
+            tool_choice: "auto",
+            parallel_tool_calls: false,
+            reasoning: None,
+            store: false,
+            stream: true,
+            include: vec![],
+            prompt_cache_key: None,
+            text: Some(text_controls),
+        };
+
+        let v = serde_json::to_value(&req).expect("json");
+        let text = v.get("text").expect("text field");
+        assert!(text.get("verbosity").is_none());
+        let format = text.get("format").expect("format field");
+
+        assert_eq!(
+            format.get("name"),
+            Some(&serde_json::Value::String("codex_output_schema".into()))
+        );
+        assert_eq!(
+            format.get("type"),
+            Some(&serde_json::Value::String("json_schema".into()))
+        );
+        assert_eq!(format.get("strict"), Some(&serde_json::Value::Bool(true)));
+        assert_eq!(format.get("schema"), Some(&schema));
+    }
+
    #[test]
    fn omits_text_when_not_set() {
        let input: Vec<ResponseItem> = vec![];
--- a/codex-rs/core/src/codex.rs
+++ b/codex-rs/core/src/codex.rs
--- a/codex-rs/core/src/codex/compact.rs
+++ b/codex-rs/core/src/codex/compact.rs
@@ -1,6 +1,5 @@
 use std::sync::Arc;

-use super::AgentTask;
 use super::Session;
 use super::TurnContext;
 use super::get_last_assistant_message_from_turn;
@@ -15,7 +14,6 @@ use crate::protocol::Event;
 use crate::protocol::EventMsg;
 use crate::protocol::InputItem;
 use crate::protocol::InputMessageKind;
-use crate::protocol::TaskCompleteEvent;
 use crate::protocol::TaskStartedEvent;
 use crate::protocol::TurnContextItem;
 use crate::truncate::truncate_middle;
@@ -27,8 +25,7 @@ use codex_protocol::models::ResponseItem;
 use codex_protocol::protocol::RolloutItem;
 use futures::prelude::*;

-pub(super) const COMPACT_TRIGGER_TEXT: &str = "Start Summarization";
-const SUMMARIZATION_PROMPT: &str = include_str!("../../templates/compact/prompt.md");
+pub const SUMMARIZATION_PROMPT: &str = include_str!("../../templates/compact/prompt.md");
 const COMPACT_USER_MESSAGE_MAX_TOKENS: usize = 20_000;

 #[derive(Template)]
@@ -38,48 +35,23 @@ struct HistoryBridgeTemplate<'a> {
    summary_text: &'a str,
 }

-pub(super) async fn spawn_compact_task(
-    sess: Arc<Session>,
-    turn_context: Arc<TurnContext>,
-    sub_id: String,
-    input: Vec<InputItem>,
-) {
-    let task = AgentTask::compact(
-        sess.clone(),
-        turn_context,
-        sub_id,
-        input,
-        SUMMARIZATION_PROMPT.to_string(),
-    );
-    sess.set_task(task).await;
-}
-
-pub(super) async fn run_inline_auto_compact_task(
+pub(crate) async fn run_inline_auto_compact_task(
    sess: Arc<Session>,
    turn_context: Arc<TurnContext>,
 ) {
    let sub_id = sess.next_internal_sub_id();
    let input = vec![InputItem::Text {
-        text: COMPACT_TRIGGER_TEXT.to_string(),
+        text: SUMMARIZATION_PROMPT.to_string(),
    }];
-    run_compact_task_inner(
-        sess,
-        turn_context,
-        sub_id,
-        input,
-        SUMMARIZATION_PROMPT.to_string(),
-        false,
-    )
-    .await;
+    run_compact_task_inner(sess, turn_context, sub_id, input).await;
 }

-pub(super) async fn run_compact_task(
+pub(crate) async fn run_compact_task(
    sess: Arc<Session>,
    turn_context: Arc<TurnContext>,
    sub_id: String,
    input: Vec<InputItem>,
-    compact_instructions: String,
-) {
+) -> Option<String> {
    let start_event = Event {
        id: sub_id.clone(),
        msg: EventMsg::TaskStarted(TaskStartedEvent {
@@ -87,22 +59,8 @@ pub(super) async fn run_compact_task(
        }),
    };
    sess.send_event(start_event).await;
-    run_compact_task_inner(
-        sess.clone(),
-        turn_context,
-        sub_id.clone(),
-        input,
-        compact_instructions,
-        true,
-    )
-    .await;
-    let event = Event {
-        id: sub_id,
-        msg: EventMsg::TaskComplete(TaskCompleteEvent {
-            last_agent_message: None,
-        }),
-    };
-    sess.send_event(event).await;
+    run_compact_task_inner(sess.clone(), turn_context, sub_id.clone(), input).await;
+    None
 }

 async fn run_compact_task_inner(
@@ -110,19 +68,15 @@ async fn run_compact_task_inner(
    turn_context: Arc<TurnContext>,
    sub_id: String,
    input: Vec<InputItem>,
-    compact_instructions: String,
-    remove_task_on_completion: bool,
 ) {
    let initial_input_for_turn: ResponseInputItem = ResponseInputItem::from(input);
-    let instructions_override = compact_instructions;
    let turn_input = sess
        .turn_input_with_history(vec![initial_input_for_turn.clone().into()])
        .await;

    let prompt = Prompt {
        input: turn_input,
-        tools: Vec::new(),
-        base_instructions_override: Some(instructions_override),
+        ..Default::default()
    };

    let max_retries = turn_context.client.get_provider().stream_max_retries();
@@ -139,7 +93,8 @@ async fn run_compact_task_inner(
    sess.persist_rollout_items(&[rollout_item]).await;

    loop {
-        let attempt_result = drain_to_completed(&sess, turn_context.as_ref(), &prompt).await;
+        let attempt_result =
+            drain_to_completed(&sess, turn_context.as_ref(), &sub_id, &prompt).await;

        match attempt_result {
            Ok(()) => {
@@ -175,21 +130,12 @@ async fn run_compact_task_inner(
        }
    }

-    if remove_task_on_completion {
-        sess.remove_task(&sub_id).await;
-    }
-    let history_snapshot = {
-        let state = sess.state.lock().await;
-        state.history.contents()
-    };
+    let history_snapshot = sess.history_snapshot().await;
    let summary_text = get_last_assistant_message_from_turn(&history_snapshot).unwrap_or_default();
    let user_messages = collect_user_messages(&history_snapshot);
    let initial_context = sess.build_initial_context(turn_context.as_ref());
    let new_history = build_compacted_history(initial_context, &user_messages, &summary_text);
-    {
-        let mut state = sess.state.lock().await;
-        state.history.replace(new_history);
-    }
+    sess.replace_history(new_history).await;

    let rollout_item = RolloutItem::Compacted(CompactedItem {
        message: summary_text.clone(),
@@ -284,6 +230,7 @@ pub(crate) fn build_compacted_history(
 async fn drain_to_completed(
    sess: &Session,
    turn_context: &TurnContext,
+    sub_id: &str,
    prompt: &Prompt,
 ) -> CodexResult<()> {
    let mut stream = turn_context.client.clone().stream(prompt).await?;
@@ -297,10 +244,14 @@ async fn drain_to_completed(
        };
        match event {
            Ok(ResponseEvent::OutputItemDone(item)) => {
-                let mut state = sess.state.lock().await;
-                state.history.record_items(std::slice::from_ref(&item));
+                sess.record_into_history(std::slice::from_ref(&item)).await;
            }
-            Ok(ResponseEvent::Completed { .. }) => {
+            Ok(ResponseEvent::RateLimits(snapshot)) => {
+                sess.update_rate_limits(sub_id, snapshot).await;
+            }
+            Ok(ResponseEvent::Completed { token_usage, .. }) => {
+                sess.update_token_usage_info(sub_id, turn_context, token_usage.as_ref())
+                    .await;
                return Ok(());
            }
            Ok(_) => continue,
--- a/codex-rs/core/src/command_safety/is_dangerous_command.rs
+++ b/codex-rs/core/src/command_safety/is_dangerous_command.rs
@@ -0,0 +1,99 @@
+use crate::bash::parse_bash_lc_plain_commands;
+
+pub fn command_might_be_dangerous(command: &[String]) -> bool {
+    if is_dangerous_to_call_with_exec(command) {
+        return true;
+    }
+
+    // Support `bash -lc "<script>"` where the any part of the script might contain a dangerous command.
+    if let Some(all_commands) = parse_bash_lc_plain_commands(command)
+        && all_commands
+            .iter()
+            .any(|cmd| is_dangerous_to_call_with_exec(cmd))
+    {
+        return true;
+    }
+
+    false
+}
+
+fn is_dangerous_to_call_with_exec(command: &[String]) -> bool {
+    let cmd0 = command.first().map(String::as_str);
+
+    match cmd0 {
+        Some(cmd) if cmd.ends_with("git") || cmd.ends_with("/git") => {
+            matches!(command.get(1).map(String::as_str), Some("reset" | "rm"))
+        }
+
+        Some("rm") => matches!(command.get(1).map(String::as_str), Some("-f" | "-rf")),
+
+        // for sudo <cmd> simply do the check for <cmd>
+        Some("sudo") => is_dangerous_to_call_with_exec(&command[1..]),
+
+        // ── anything else ─────────────────────────────────────────────────
+        _ => false,
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn vec_str(items: &[&str]) -> Vec<String> {
+        items.iter().map(std::string::ToString::to_string).collect()
+    }
+
+    #[test]
+    fn git_reset_is_dangerous() {
+        assert!(command_might_be_dangerous(&vec_str(&["git", "reset"])));
+    }
+
+    #[test]
+    fn bash_git_reset_is_dangerous() {
+        assert!(command_might_be_dangerous(&vec_str(&[
+            "bash",
+            "-lc",
+            "git reset --hard"
+        ])));
+    }
+
+    #[test]
+    fn git_status_is_not_dangerous() {
+        assert!(!command_might_be_dangerous(&vec_str(&["git", "status"])));
+    }
+
+    #[test]
+    fn bash_git_status_is_not_dangerous() {
+        assert!(!command_might_be_dangerous(&vec_str(&[
+            "bash",
+            "-lc",
+            "git status"
+        ])));
+    }
+
+    #[test]
+    fn sudo_git_reset_is_dangerous() {
+        assert!(command_might_be_dangerous(&vec_str(&[
+            "sudo", "git", "reset", "--hard"
+        ])));
+    }
+
+    #[test]
+    fn usr_bin_git_is_dangerous() {
+        assert!(command_might_be_dangerous(&vec_str(&[
+            "/usr/bin/git",
+            "reset",
+            "--hard"
+        ])));
+    }
+
+    #[test]
+    fn rm_rf_is_dangerous() {
+        assert!(command_might_be_dangerous(&vec_str(&["rm", "-rf", "/"])));
+    }
+
+    #[test]
+    fn rm_f_is_dangerous() {
+        assert!(command_might_be_dangerous(&vec_str(&["rm", "-f", "/"])));
+    }
+}
--- a/codex-rs/core/src/command_safety/is_safe_command.rs
+++ b/codex-rs/core/src/command_safety/is_safe_command.rs
@@ -1,7 +1,14 @@
-use crate::bash::try_parse_bash;
-use crate::bash::try_parse_word_only_commands_sequence;
+use crate::bash::parse_bash_lc_plain_commands;

 pub fn is_known_safe_command(command: &[String]) -> bool {
+    #[cfg(target_os = "windows")]
+    {
+        use super::windows_safe_commands::is_safe_command_windows;
+        if is_safe_command_windows(command) {
+            return true;
+        }
+    }
+
    if is_safe_to_call_with_exec(command) {
        return true;
    }
@@ -12,11 +19,7 @@ pub fn is_known_safe_command(command: &[String]) -> bool {
    // introduce side effects ( "&&", "||", ";", and "|" ). If every
    // individual command in the script is itself a known‑safe command, then
    // the composite expression is considered safe.
-    if let [bash, flag, script] = command
-        && bash == "bash"
-        && flag == "-lc"
-        && let Some(tree) = try_parse_bash(script)
-        && let Some(all_commands) = try_parse_word_only_commands_sequence(&tree, script)
+    if let Some(all_commands) = parse_bash_lc_plain_commands(command)
        && !all_commands.is_empty()
        && all_commands
            .iter()
@@ -24,7 +27,6 @@ pub fn is_known_safe_command(command: &[String]) -> bool {
    {
        return true;
    }
-
    false
 }

--- a/codex-rs/core/src/command_safety/mod.rs
+++ b/codex-rs/core/src/command_safety/mod.rs
@@ -0,0 +1,4 @@
+pub mod is_dangerous_command;
+pub mod is_safe_command;
+#[cfg(target_os = "windows")]
+pub mod windows_safe_commands;
--- a/codex-rs/core/src/command_safety/windows_safe_commands.rs
+++ b/codex-rs/core/src/command_safety/windows_safe_commands.rs
@@ -0,0 +1,25 @@
+// This is a WIP. This will eventually contain a real list of common safe Windows commands.
+pub fn is_safe_command_windows(_command: &[String]) -> bool {
+    false
+}
+
+#[cfg(test)]
+mod tests {
+    use super::is_safe_command_windows;
+
+    fn vec_str(args: &[&str]) -> Vec<String> {
+        args.iter().map(ToString::to_string).collect()
+    }
+
+    #[test]
+    fn everything_is_unsafe() {
+        for cmd in [
+            vec_str(&["powershell.exe", "-NoLogo", "-Command", "echo hello"]),
+            vec_str(&["copy", "foo", "bar"]),
+            vec_str(&["del", "file.txt"]),
+            vec_str(&["powershell.exe", "Get-ChildItem"]),
+        ] {
+            assert!(!is_safe_command_windows(&cmd));
+        }
+    }
+}
--- a/codex-rs/core/src/config.rs
+++ b/codex-rs/core/src/config.rs
@@ -184,6 +184,10 @@ pub struct Config {
    /// If set to `true`, used only the experimental unified exec tool.
    pub use_experimental_unified_exec_tool: bool,

+    /// If set to `true`, use the experimental official Rust MCP client.
+    /// https://github.com/modelcontextprotocol/rust-sdk
+    pub use_experimental_use_rmcp_client: bool,
+
    /// Include the `view_image` tool that lets the agent attach a local image path to context.
    pub include_view_image_tool: bool,

@@ -693,6 +697,7 @@ pub struct ConfigToml {

    pub experimental_use_exec_command_tool: Option<bool>,
    pub experimental_use_unified_exec_tool: Option<bool>,
+    pub experimental_use_rmcp_client: Option<bool>,

    pub projects: Option<HashMap<String, ProjectConfig>>,

@@ -1043,6 +1048,7 @@ impl Config {
            use_experimental_unified_exec_tool: cfg
                .experimental_use_unified_exec_tool
                .unwrap_or(false),
+            use_experimental_use_rmcp_client: cfg.experimental_use_rmcp_client.unwrap_or(false),
            include_view_image_tool,
            active_profile: active_profile_name,
            disable_paste_burst: cfg.disable_paste_burst.unwrap_or(false),
@@ -1651,6 +1657,7 @@ model_verbosity = "high"
                tools_web_search_request: false,
                use_experimental_streamable_shell_tool: false,
                use_experimental_unified_exec_tool: false,
+                use_experimental_use_rmcp_client: false,
                include_view_image_tool: true,
                active_profile: Some("o3".to_string()),
                disable_paste_burst: false,
@@ -1709,6 +1716,7 @@ model_verbosity = "high"
            tools_web_search_request: false,
            use_experimental_streamable_shell_tool: false,
            use_experimental_unified_exec_tool: false,
+            use_experimental_use_rmcp_client: false,
            include_view_image_tool: true,
            active_profile: Some("gpt3".to_string()),
            disable_paste_burst: false,
@@ -1782,6 +1790,7 @@ model_verbosity = "high"
            tools_web_search_request: false,
            use_experimental_streamable_shell_tool: false,
            use_experimental_unified_exec_tool: false,
+            use_experimental_use_rmcp_client: false,
            include_view_image_tool: true,
            active_profile: Some("zdr".to_string()),
            disable_paste_burst: false,
@@ -1841,6 +1850,7 @@ model_verbosity = "high"
            tools_web_search_request: false,
            use_experimental_streamable_shell_tool: false,
            use_experimental_unified_exec_tool: false,
+            use_experimental_use_rmcp_client: false,
            include_view_image_tool: true,
            active_profile: Some("gpt5".to_string()),
            disable_paste_burst: false,
--- a/codex-rs/core/src/default_client.rs
+++ b/codex-rs/core/src/default_client.rs
@@ -1,3 +1,4 @@
+use crate::spawn::CODEX_SANDBOX_ENV_VAR;
 use reqwest::header::HeaderValue;
 use std::sync::LazyLock;
 use std::sync::Mutex;
@@ -20,7 +21,6 @@ use std::sync::Mutex;
 pub static USER_AGENT_SUFFIX: LazyLock<Mutex<Option<String>>> = LazyLock::new(|| Mutex::new(None));

 pub const CODEX_INTERNAL_ORIGINATOR_OVERRIDE_ENV_VAR: &str = "CODEX_INTERNAL_ORIGINATOR_OVERRIDE";
-
 #[derive(Debug, Clone)]
 pub struct Originator {
    pub value: String,
@@ -112,17 +112,25 @@ pub fn create_client() -> reqwest::Client {
    headers.insert("originator", ORIGINATOR.header_value.clone());
    let ua = get_codex_user_agent();

-    reqwest::Client::builder()
+    let mut builder = reqwest::Client::builder()
        // Set UA via dedicated helper to avoid header validation pitfalls
        .user_agent(ua)
-        .default_headers(headers)
-        .build()
-        .unwrap_or_else(|_| reqwest::Client::new())
+        .default_headers(headers);
+    if is_sandboxed() {
+        builder = builder.no_proxy();
+    }
+
+    builder.build().unwrap_or_else(|_| reqwest::Client::new())
+}
+
+fn is_sandboxed() -> bool {
+    std::env::var(CODEX_SANDBOX_ENV_VAR).as_deref() == Ok("seatbelt")
 }

 #[cfg(test)]
 mod tests {
    use super::*;
+    use core_test_support::skip_if_no_network;

    #[test]
    fn test_get_codex_user_agent() {
@@ -132,6 +140,8 @@ mod tests {

    #[tokio::test]
    async fn test_create_client_sets_default_headers() {
+        skip_if_no_network!();
+
        use wiremock::Mock;
        use wiremock::MockServer;
        use wiremock::ResponseTemplate;
--- a/codex-rs/core/src/error.rs
+++ b/codex-rs/core/src/error.rs
@@ -2,6 +2,7 @@ use crate::exec::ExecToolCallOutput;
 use crate::token_data::KnownPlan;
 use crate::token_data::PlanType;
 use codex_protocol::mcp_protocol::ConversationId;
+use codex_protocol::protocol::RateLimitSnapshot;
 use reqwest::StatusCode;
 use serde_json;
 use std::io;
@@ -104,6 +105,9 @@ pub enum CodexErr {
    #[error("codex-linux-sandbox was required but not provided")]
    LandlockSandboxExecutableNotProvided,

+    #[error("unsupported operation: {0}")]
+    UnsupportedOperation(String),
+
    // -----------------------------------------------------------------
    // Automatic conversions for common external error types
    // -----------------------------------------------------------------
@@ -135,6 +139,7 @@ pub enum CodexErr {
 pub struct UsageLimitReachedError {
    pub(crate) plan_type: Option<PlanType>,
    pub(crate) resets_in_seconds: Option<u64>,
+    pub(crate) rate_limits: Option<RateLimitSnapshot>,
 }

 impl std::fmt::Display for UsageLimitReachedError {
@@ -151,7 +156,7 @@ impl std::fmt::Display for UsageLimitReachedError {
                )
            }
            Some(PlanType::Known(KnownPlan::Free)) => {
-                "To use Codex with your ChatGPT plan, upgrade to Plus: https://openai.com/chatgpt/pricing."
+                "You've hit your usage limit. Upgrade to Plus to continue using Codex (https://openai.com/chatgpt/pricing)."
                    .to_string()
            }
            Some(PlanType::Known(KnownPlan::Pro))
@@ -262,12 +267,29 @@ pub fn get_error_message_ui(e: &CodexErr) -> String {
 #[cfg(test)]
 mod tests {
    use super::*;
+    use codex_protocol::protocol::RateLimitWindow;
+
+    fn rate_limit_snapshot() -> RateLimitSnapshot {
+        RateLimitSnapshot {
+            primary: Some(RateLimitWindow {
+                used_percent: 50.0,
+                window_minutes: Some(60),
+                resets_in_seconds: Some(3600),
+            }),
+            secondary: Some(RateLimitWindow {
+                used_percent: 30.0,
+                window_minutes: Some(120),
+                resets_in_seconds: Some(7200),
+            }),
+        }
+    }

    #[test]
    fn usage_limit_reached_error_formats_plus_plan() {
        let err = UsageLimitReachedError {
            plan_type: Some(PlanType::Known(KnownPlan::Plus)),
            resets_in_seconds: None,
+            rate_limits: Some(rate_limit_snapshot()),
        };
        assert_eq!(
            err.to_string(),
@@ -280,10 +302,11 @@ mod tests {
        let err = UsageLimitReachedError {
            plan_type: Some(PlanType::Known(KnownPlan::Free)),
            resets_in_seconds: Some(3600),
+            rate_limits: Some(rate_limit_snapshot()),
        };
        assert_eq!(
            err.to_string(),
-            "To use Codex with your ChatGPT plan, upgrade to Plus: https://openai.com/chatgpt/pricing."
+            "You've hit your usage limit. Upgrade to Plus to continue using Codex (https://openai.com/chatgpt/pricing)."
        );
    }

@@ -292,6 +315,7 @@ mod tests {
        let err = UsageLimitReachedError {
            plan_type: None,
            resets_in_seconds: None,
+            rate_limits: Some(rate_limit_snapshot()),
        };
        assert_eq!(
            err.to_string(),
@@ -304,6 +328,7 @@ mod tests {
        let err = UsageLimitReachedError {
            plan_type: Some(PlanType::Known(KnownPlan::Team)),
            resets_in_seconds: Some(3600),
+            rate_limits: Some(rate_limit_snapshot()),
        };
        assert_eq!(
            err.to_string(),
@@ -316,6 +341,7 @@ mod tests {
        let err = UsageLimitReachedError {
            plan_type: Some(PlanType::Known(KnownPlan::Business)),
            resets_in_seconds: None,
+            rate_limits: Some(rate_limit_snapshot()),
        };
        assert_eq!(
            err.to_string(),
@@ -328,6 +354,7 @@ mod tests {
        let err = UsageLimitReachedError {
            plan_type: Some(PlanType::Known(KnownPlan::Pro)),
            resets_in_seconds: None,
+            rate_limits: Some(rate_limit_snapshot()),
        };
        assert_eq!(
            err.to_string(),
@@ -340,6 +367,7 @@ mod tests {
        let err = UsageLimitReachedError {
            plan_type: None,
            resets_in_seconds: Some(5 * 60),
+            rate_limits: Some(rate_limit_snapshot()),
        };
        assert_eq!(
            err.to_string(),
@@ -352,6 +380,7 @@ mod tests {
        let err = UsageLimitReachedError {
            plan_type: Some(PlanType::Known(KnownPlan::Plus)),
            resets_in_seconds: Some(3 * 3600 + 32 * 60),
+            rate_limits: Some(rate_limit_snapshot()),
        };
        assert_eq!(
            err.to_string(),
@@ -364,6 +393,7 @@ mod tests {
        let err = UsageLimitReachedError {
            plan_type: None,
            resets_in_seconds: Some(2 * 86_400 + 3 * 3600 + 5 * 60),
+            rate_limits: Some(rate_limit_snapshot()),
        };
        assert_eq!(
            err.to_string(),
@@ -376,6 +406,7 @@ mod tests {
        let err = UsageLimitReachedError {
            plan_type: None,
            resets_in_seconds: Some(30),
+            rate_limits: Some(rate_limit_snapshot()),
        };
        assert_eq!(
            err.to_string(),
--- a/codex-rs/core/src/exec_command/mod.rs
+++ b/codex-rs/core/src/exec_command/mod.rs
@@ -12,4 +12,3 @@ pub use responses_api::WRITE_STDIN_TOOL_NAME;
 pub use responses_api::create_exec_command_tool_for_responses_api;
 pub use responses_api::create_write_stdin_tool_for_responses_api;
 pub use session_manager::SessionManager as ExecSessionManager;
-pub use session_manager::result_into_payload;
--- a/codex-rs/core/src/exec_command/session_manager.rs
+++ b/codex-rs/core/src/exec_command/session_manager.rs
@@ -21,7 +21,6 @@ use crate::exec_command::exec_command_params::WriteStdinParams;
 use crate::exec_command::exec_command_session::ExecCommandSession;
 use crate::exec_command::session_id::SessionId;
 use crate::truncate::truncate_middle;
-use codex_protocol::models::FunctionCallOutputPayload;

 #[derive(Debug, Default)]
 pub struct SessionManager {
@@ -38,7 +37,7 @@ pub struct ExecCommandOutput {
 }

 impl ExecCommandOutput {
-    fn to_text_output(&self) -> String {
+    pub(crate) fn to_text_output(&self) -> String {
        let wall_time_secs = self.wall_time.as_secs_f32();
        let termination_status = match self.exit_status {
            ExitStatus::Exited(code) => format!("Process exited with code {code}"),
@@ -68,19 +67,6 @@ pub enum ExitStatus {
    Ongoing(SessionId),
 }

-pub fn result_into_payload(result: Result<ExecCommandOutput, String>) -> FunctionCallOutputPayload {
-    match result {
-        Ok(output) => FunctionCallOutputPayload {
-            content: output.to_text_output(),
-            success: Some(true),
-        },
-        Err(err) => FunctionCallOutputPayload {
-            content: err,
-            success: Some(false),
-        },
-    }
-}
-
 impl SessionManager {
    /// Processes the request and is required to send a response via `outgoing`.
    pub async fn handle_exec_command_request(
--- a/codex-rs/core/src/flags.rs
+++ b/codex-rs/core/src/flags.rs
@@ -1,16 +1,6 @@
-use std::time::Duration;
-
 use env_flags::env_flags;

 env_flags! {
-    pub OPENAI_API_BASE: &str = "https://api.openai.com/v1";
-
-    /// Fallback when the provider-specific key is not set.
-    pub OPENAI_API_KEY: Option<&str> = None;
-    pub OPENAI_TIMEOUT_MS: Duration = Duration::from_millis(300_000), |value| {
-        value.parse().map(Duration::from_millis)
-    };
-
    /// Fixture path for offline tests (see client.rs).
    pub CODEX_RS_SSE_FIXTURE: Option<&str> = None;
 }
--- a/codex-rs/core/src/function_tool.rs
+++ b/codex-rs/core/src/function_tool.rs
@@ -0,0 +1,7 @@
+use thiserror::Error;
+
+#[derive(Debug, Error, PartialEq)]
+pub enum FunctionCallError {
+    #[error("{0}")]
+    RespondToModel(String),
+}
--- a/codex-rs/core/src/git_info.rs
+++ b/codex-rs/core/src/git_info.rs
@@ -589,6 +589,7 @@ pub async fn current_branch_name(cwd: &Path) -> Option<String> {
 mod tests {
    use super::*;

+    use core_test_support::skip_if_sandbox;
    use std::fs;
    use std::path::PathBuf;
    use tempfile::TempDir;
@@ -660,6 +661,7 @@ mod tests {

    #[tokio::test]
    async fn test_recent_commits_orders_and_limits() {
+        skip_if_sandbox!();
        use tokio::time::Duration;
        use tokio::time::sleep;

--- a/codex-rs/core/src/lib.rs
+++ b/codex-rs/core/src/lib.rs
@@ -15,6 +15,7 @@ pub mod codex;
 mod codex_conversation;
 pub mod token_data;
 pub use codex_conversation::CodexConversation;
+mod command_safety;
 pub mod config;
 pub mod config_edit;
 pub mod config_profile;
@@ -29,7 +30,6 @@ pub mod exec_env;
 mod flags;
 pub mod git_info;
 pub mod internal_storage;
-mod is_safe_command;
 pub mod landlock;
 mod mcp_connection_manager;
 mod mcp_tool_call;
@@ -75,10 +75,14 @@ pub use rollout::find_conversation_path_by_id_str;
 pub use rollout::list::ConversationItem;
 pub use rollout::list::ConversationsPage;
 pub use rollout::list::Cursor;
+mod function_tool;
+mod state;
+mod tasks;
 mod user_notification;
 pub mod util;

 pub use apply_patch::CODEX_APPLY_PATCH_ARG1;
+pub use command_safety::is_safe_command;
 pub use safety::get_platform_sandbox;
 // Re-export the protocol types from the standalone `codex-protocol` crate so existing
 // `codex_core::protocol::...` references continue to work across the workspace.
--- a/codex-rs/core/src/mcp_connection_manager.rs
+++ b/codex-rs/core/src/mcp_connection_manager.rs
@@ -16,6 +16,7 @@ use anyhow::Context;
 use anyhow::Result;
 use anyhow::anyhow;
 use codex_mcp_client::McpClient;
+use codex_rmcp_client::RmcpClient;
 use mcp_types::ClientCapabilities;
 use mcp_types::Implementation;
 use mcp_types::Tool;
@@ -86,11 +87,64 @@ struct ToolInfo {
 }

 struct ManagedClient {
-    client: Arc<McpClient>,
+    client: McpClientAdapter,
    startup_timeout: Duration,
    tool_timeout: Option<Duration>,
 }

+#[derive(Clone)]
+enum McpClientAdapter {
+    Legacy(Arc<McpClient>),
+    Rmcp(Arc<RmcpClient>),
+}
+
+impl McpClientAdapter {
+    async fn new_stdio_client(
+        use_rmcp_client: bool,
+        program: OsString,
+        args: Vec<OsString>,
+        env: Option<HashMap<String, String>>,
+        params: mcp_types::InitializeRequestParams,
+        startup_timeout: Duration,
+    ) -> Result<Self> {
+        tracing::error!(
+            "new_stdio_client use_rmcp_client: {use_rmcp_client} program: {program:?} args: {args:?} env: {env:?} params: {params:?} startup_timeout: {startup_timeout:?}"
+        );
+        if use_rmcp_client {
+            let client = Arc::new(RmcpClient::new_stdio_client(program, args, env).await?);
+            client.initialize(params, Some(startup_timeout)).await?;
+            Ok(McpClientAdapter::Rmcp(client))
+        } else {
+            let client = Arc::new(McpClient::new_stdio_client(program, args, env).await?);
+            client.initialize(params, Some(startup_timeout)).await?;
+            Ok(McpClientAdapter::Legacy(client))
+        }
+    }
+
+    async fn list_tools(
+        &self,
+        params: Option<mcp_types::ListToolsRequestParams>,
+        timeout: Option<Duration>,
+    ) -> Result<mcp_types::ListToolsResult> {
+        match self {
+            McpClientAdapter::Legacy(client) => client.list_tools(params, timeout).await,
+            McpClientAdapter::Rmcp(client) => client.list_tools(params, timeout).await,
+        }
+    }
+
+    async fn call_tool(
+        &self,
+        name: String,
+        arguments: Option<serde_json::Value>,
+        timeout: Option<Duration>,
+    ) -> Result<mcp_types::CallToolResult> {
+        match self {
+            McpClientAdapter::Legacy(client) => client.call_tool(name, arguments, timeout).await,
+            McpClientAdapter::Rmcp(client) => client.call_tool(name, arguments, timeout).await,
+        }
+    }
+}
+
 /// A thin wrapper around a set of running [`McpClient`] instances.
 #[derive(Default)]
 pub(crate) struct McpConnectionManager {
@@ -115,12 +169,15 @@ impl McpConnectionManager {
    /// user should be informed about these errors.
    pub async fn new(
        mcp_servers: HashMap<String, McpServerConfig>,
+        use_rmcp_client: bool,
    ) -> Result<(Self, ClientStartErrors)> {
        // Early exit if no servers are configured.
        if mcp_servers.is_empty() {
            return Ok((Self::default(), ClientStartErrors::default()));
        }

+        tracing::error!("new mcp_servers: {mcp_servers:?} use_rmcp_client: {use_rmcp_client}");
+
        // Launch all configured servers concurrently.
        let mut join_set = JoinSet::new();
        let mut errors = ClientStartErrors::new();
@@ -137,57 +194,48 @@ impl McpConnectionManager {
            }

            let startup_timeout = cfg.startup_timeout_sec.unwrap_or(DEFAULT_STARTUP_TIMEOUT);
-
            let tool_timeout = cfg.tool_timeout_sec.unwrap_or(DEFAULT_TOOL_TIMEOUT);

+            let use_rmcp_client_flag = use_rmcp_client;
            join_set.spawn(async move {
                let McpServerConfig {
                    command, args, env, ..
                } = cfg;
-                let client_res = McpClient::new_stdio_client(
-                    command.into(),
-                    args.into_iter().map(OsString::from).collect(),
+                let command_os: OsString = command.into();
+                let args_os: Vec<OsString> = args.into_iter().map(Into::into).collect();
+                let params = mcp_types::InitializeRequestParams {
+                    capabilities: ClientCapabilities {
+                        experimental: None,
+                        roots: None,
+                        sampling: None,
+                        // https://modelcontextprotocol.io/specification/2025-06-18/client/elicitation#capabilities
+                        // indicates this should be an empty object.
+                        elicitation: Some(json!({})),
+                    },
+                    client_info: Implementation {
+                        name: "codex-mcp-client".to_owned(),
+                        version: env!("CARGO_PKG_VERSION").to_owned(),
+                        title: Some("Codex".into()),
+                        // This field is used by Codex when it is an MCP
+                        // server: it should not be used when Codex is
+                        // an MCP client.
+                        user_agent: None,
+                    },
+                    protocol_version: mcp_types::MCP_SCHEMA_VERSION.to_owned(),
+                };
+
+                let client = McpClientAdapter::new_stdio_client(
+                    use_rmcp_client_flag,
+                    command_os,
+                    args_os,
                    env,
+                    params,
+                    startup_timeout,
                )
-                .await;
-                match client_res {
-                    Ok(client) => {
-                        // Initialize the client.
-                        let params = mcp_types::InitializeRequestParams {
-                            capabilities: ClientCapabilities {
-                                experimental: None,
-                                roots: None,
-                                sampling: None,
-                                // https://modelcontextprotocol.io/specification/2025-06-18/client/elicitation#capabilities
-                                // indicates this should be an empty object.
-                                elicitation: Some(json!({})),
-                            },
-                            client_info: Implementation {
-                                name: "codex-mcp-client".to_owned(),
-                                version: env!("CARGO_PKG_VERSION").to_owned(),
-                                title: Some("Codex".into()),
-                                // This field is used by Codex when it is an MCP
-                                // server: it should not be used when Codex is
-                                // an MCP client.
-                                user_agent: None,
-                            },
-                            protocol_version: mcp_types::MCP_SCHEMA_VERSION.to_owned(),
-                        };
-                        let initialize_notification_params = None;
-                        let init_result = client
-                            .initialize(
-                                params,
-                                initialize_notification_params,
-                                Some(startup_timeout),
-                            )
-                            .await;
-                        (
-                            (server_name, tool_timeout),
-                            init_result.map(|_| (client, startup_timeout)),
-                        )
-                    }
-                    Err(e) => ((server_name, tool_timeout), Err(e.into())),
-                }
+                .await
+                .map(|c| (c, startup_timeout));
+
+                ((server_name, tool_timeout), client)
            });
        }

@@ -207,7 +255,7 @@ impl McpConnectionManager {
                    clients.insert(
                        server_name,
                        ManagedClient {
-                            client: Arc::new(client),
+                            client,
                            startup_timeout,
                            tool_timeout: Some(tool_timeout),
                        },
--- a/codex-rs/core/src/openai_model_info.rs
+++ b/codex-rs/core/src/openai_model_info.rs
@@ -7,13 +7,14 @@ use crate::model_family::ModelFamily;
 /// Though this would help present more accurate pricing information in the UI.
 #[derive(Debug)]
 pub(crate) struct ModelInfo {
-    /// Size of the context window in tokens.
+    /// Size of the context window in tokens. This is the maximum size of the input context.
    pub(crate) context_window: u64,

    /// Maximum number of output tokens that can be generated for the model.
    pub(crate) max_output_tokens: u64,

-    /// Token threshold where we should automatically compact conversation history.
+    /// Token threshold where we should automatically compact conversation history. This considers
+    /// input tokens + output tokens of this turn.
    pub(crate) auto_compact_token_limit: Option<i64>,
 }

@@ -64,7 +65,7 @@ pub(crate) fn get_model_info(model_family: &ModelFamily) -> Option<ModelInfo> {
        _ if slug.starts_with("gpt-5-codex") => Some(ModelInfo {
            context_window: 272_000,
            max_output_tokens: 128_000,
-            auto_compact_token_limit: Some(220_000),
+            auto_compact_token_limit: Some(350_000),
        }),

        _ if slug.starts_with("gpt-5") => Some(ModelInfo::new(272_000, 128_000)),
--- a/codex-rs/core/src/plan_tool.rs
+++ b/codex-rs/core/src/plan_tool.rs
@@ -2,13 +2,12 @@ use std::collections::BTreeMap;
 use std::sync::LazyLock;

 use crate::codex::Session;
+use crate::function_tool::FunctionCallError;
 use crate::openai_tools::JsonSchema;
 use crate::openai_tools::OpenAiTool;
 use crate::openai_tools::ResponsesApiTool;
 use crate::protocol::Event;
 use crate::protocol::EventMsg;
-use codex_protocol::models::FunctionCallOutputPayload;
-use codex_protocol::models::ResponseInputItem;

 // Use the canonical plan tool types from the protocol crate to ensure
 // type-identity matches events transported via `codex_protocol`.
@@ -67,44 +66,20 @@ pub(crate) async fn handle_update_plan(
    session: &Session,
    arguments: String,
    sub_id: String,
-    call_id: String,
-) -> ResponseInputItem {
-    match parse_update_plan_arguments(arguments, &call_id) {
-        Ok(args) => {
-            let output = ResponseInputItem::FunctionCallOutput {
-                call_id,
-                output: FunctionCallOutputPayload {
-                    content: "Plan updated".to_string(),
-                    success: Some(true),
-                },
-            };
-            session
-                .send_event(Event {
-                    id: sub_id.to_string(),
-                    msg: EventMsg::PlanUpdate(args),
-                })
-                .await;
-            output
-        }
-        Err(output) => *output,
-    }
+    _call_id: String,
+) -> Result<String, FunctionCallError> {
+    let args = parse_update_plan_arguments(&arguments)?;
+    session
+        .send_event(Event {
+            id: sub_id.to_string(),
+            msg: EventMsg::PlanUpdate(args),
+        })
+        .await;
+    Ok("Plan updated".to_string())
 }

-fn parse_update_plan_arguments(
-    arguments: String,
-    call_id: &str,
-) -> Result<UpdatePlanArgs, Box<ResponseInputItem>> {
-    match serde_json::from_str::<UpdatePlanArgs>(&arguments) {
-        Ok(args) => Ok(args),
-        Err(e) => {
-            let output = ResponseInputItem::FunctionCallOutput {
-                call_id: call_id.to_string(),
-                output: FunctionCallOutputPayload {
-                    content: format!("failed to parse function arguments: {e}"),
-                    success: None,
-                },
-            };
-            Err(Box::new(output))
-        }
-    }
+fn parse_update_plan_arguments(arguments: &str) -> Result<UpdatePlanArgs, FunctionCallError> {
+    serde_json::from_str::<UpdatePlanArgs>(arguments).map_err(|e| {
+        FunctionCallError::RespondToModel(format!("failed to parse function arguments: {e}"))
+    })
 }
--- a/codex-rs/core/src/rollout/recorder.rs
+++ b/codex-rs/core/src/rollout/recorder.rs
@@ -7,8 +7,6 @@ use std::path::Path;
 use std::path::PathBuf;

 use codex_protocol::mcp_protocol::ConversationId;
-use serde::Deserialize;
-use serde::Serialize;
 use serde_json::Value;
 use time::OffsetDateTime;
 use time::format_description::FormatItem;
@@ -28,7 +26,6 @@ use super::policy::is_persisted_response_item;
 use crate::config::Config;
 use crate::default_client::ORIGINATOR;
 use crate::git_info::collect_git_info;
-use codex_protocol::models::ResponseItem;
 use codex_protocol::protocol::InitialHistory;
 use codex_protocol::protocol::ResumedHistory;
 use codex_protocol::protocol::RolloutItem;
@@ -36,19 +33,6 @@ use codex_protocol::protocol::RolloutLine;
 use codex_protocol::protocol::SessionMeta;
 use codex_protocol::protocol::SessionMetaLine;

-#[derive(Serialize, Deserialize, Default, Clone)]
-pub struct SessionStateSnapshot {}
-
-#[derive(Serialize, Deserialize, Default, Clone)]
-pub struct SavedSession {
-    pub session: SessionMeta,
-    #[serde(default)]
-    pub items: Vec<ResponseItem>,
-    #[serde(default)]
-    pub state: SessionStateSnapshot,
-    pub session_id: ConversationId,
-}
-
 /// Records all [`ResponseItem`]s for a session and flushes them to disk after
 /// every update.
 ///
--- a/codex-rs/core/src/safety.rs
+++ b/codex-rs/core/src/safety.rs
@@ -7,7 +7,9 @@ use codex_apply_patch::ApplyPatchAction;
 use codex_apply_patch::ApplyPatchFileChange;

 use crate::exec::SandboxType;
-use crate::is_safe_command::is_known_safe_command;
+
+use crate::command_safety::is_dangerous_command::command_might_be_dangerous;
+use crate::command_safety::is_safe_command::is_known_safe_command;
 use crate::protocol::AskForApproval;
 use crate::protocol::SandboxPolicy;

@@ -85,6 +87,20 @@ pub fn assess_command_safety(
    approved: &HashSet<Vec<String>>,
    with_escalated_permissions: bool,
 ) -> SafetyCheck {
+    // Some commands look dangerous. Even if they are run inside a sandbox,
+    // unless the user has explicitly approved them, we should ask,
+    // or reject if the approval_policy tells us not to ask.
+    if command_might_be_dangerous(command) && !approved.contains(command) {
+        if approval_policy == AskForApproval::Never {
+            return SafetyCheck::Reject {
+                reason: "dangerous command detected; rejected by user approval settings"
+                    .to_string(),
+            };
+        }
+
+        return SafetyCheck::AskUser;
+    }
+
    // A command is "trusted" because either:
    // - it belongs to a set of commands we consider "safe" by default, or
    // - the user has explicitly approved the command for this session
@@ -98,6 +114,7 @@ pub fn assess_command_safety(
    // would probably be fine to run the command in a sandbox, but when
    // `approved.contains(command)` is `true`, the user may have approved it for
    // the session _because_ they know it needs to run outside a sandbox.
+
    if is_known_safe_command(command) || approved.contains(command) {
        return SafetyCheck::AutoApprove {
            sandbox_type: SandboxType::None,
@@ -325,6 +342,56 @@ mod tests {
        assert_eq!(safety_check, SafetyCheck::AskUser);
    }

+    #[test]
+    fn dangerous_command_allowed_if_explicitly_approved() {
+        let command = vec!["git".to_string(), "reset".to_string(), "--hard".to_string()];
+        let approval_policy = AskForApproval::OnRequest;
+        let sandbox_policy = SandboxPolicy::ReadOnly;
+        let mut approved: HashSet<Vec<String>> = HashSet::new();
+        approved.insert(command.clone());
+        let request_escalated_privileges = false;
+
+        let safety_check = assess_command_safety(
+            &command,
+            approval_policy,
+            &sandbox_policy,
+            &approved,
+            request_escalated_privileges,
+        );
+
+        assert_eq!(
+            safety_check,
+            SafetyCheck::AutoApprove {
+                sandbox_type: SandboxType::None
+            }
+        );
+    }
+
+    #[test]
+    fn dangerous_command_not_allowed_if_not_explicitly_approved() {
+        let command = vec!["git".to_string(), "reset".to_string(), "--hard".to_string()];
+        let approval_policy = AskForApproval::Never;
+        let sandbox_policy = SandboxPolicy::ReadOnly;
+        let approved: HashSet<Vec<String>> = HashSet::new();
+        let request_escalated_privileges = false;
+
+        let safety_check = assess_command_safety(
+            &command,
+            approval_policy,
+            &sandbox_policy,
+            &approved,
+            request_escalated_privileges,
+        );
+
+        assert_eq!(
+            safety_check,
+            SafetyCheck::Reject {
+                reason: "dangerous command detected; rejected by user approval settings"
+                    .to_string(),
+            }
+        );
+    }
+
    #[test]
    fn test_request_escalated_privileges_no_sandbox_fallback() {
        let command = vec!["git".to_string(), "commit".to_string()];
--- a/codex-rs/core/src/state/mod.rs
+++ b/codex-rs/core/src/state/mod.rs
@@ -0,0 +1,9 @@
+mod service;
+mod session;
+mod turn;
+
+pub(crate) use service::SessionServices;
+pub(crate) use session::SessionState;
+pub(crate) use turn::ActiveTurn;
+pub(crate) use turn::RunningTask;
+pub(crate) use turn::TaskKind;
--- a/codex-rs/core/src/state/service.rs
+++ b/codex-rs/core/src/state/service.rs
@@ -0,0 +1,18 @@
+use crate::RolloutRecorder;
+use crate::exec_command::ExecSessionManager;
+use crate::mcp_connection_manager::McpConnectionManager;
+use crate::unified_exec::UnifiedExecSessionManager;
+use crate::user_notification::UserNotifier;
+use std::path::PathBuf;
+use tokio::sync::Mutex;
+
+pub(crate) struct SessionServices {
+    pub(crate) mcp_connection_manager: McpConnectionManager,
+    pub(crate) session_manager: ExecSessionManager,
+    pub(crate) unified_exec_manager: UnifiedExecSessionManager,
+    pub(crate) notifier: UserNotifier,
+    pub(crate) rollout: Mutex<Option<RolloutRecorder>>,
+    pub(crate) codex_linux_sandbox_exe: Option<PathBuf>,
+    pub(crate) user_shell: crate::shell::Shell,
+    pub(crate) show_raw_agent_reasoning: bool,
+}
--- a/codex-rs/core/src/state/session.rs
+++ b/codex-rs/core/src/state/session.rs
@@ -0,0 +1,80 @@
+//! Session-wide mutable state.
+
+use std::collections::HashSet;
+
+use codex_protocol::models::ResponseItem;
+
+use crate::conversation_history::ConversationHistory;
+use crate::protocol::RateLimitSnapshot;
+use crate::protocol::TokenUsage;
+use crate::protocol::TokenUsageInfo;
+
+/// Persistent, session-scoped state previously stored directly on `Session`.
+#[derive(Default)]
+pub(crate) struct SessionState {
+    pub(crate) approved_commands: HashSet<Vec<String>>,
+    pub(crate) history: ConversationHistory,
+    pub(crate) token_info: Option<TokenUsageInfo>,
+    pub(crate) latest_rate_limits: Option<RateLimitSnapshot>,
+}
+
+impl SessionState {
+    /// Create a new session state mirroring previous `State::default()` semantics.
+    pub(crate) fn new() -> Self {
+        Self {
+            history: ConversationHistory::new(),
+            ..Default::default()
+        }
+    }
+
+    // History helpers
+    pub(crate) fn record_items<I>(&mut self, items: I)
+    where
+        I: IntoIterator,
+        I::Item: std::ops::Deref<Target = ResponseItem>,
+    {
+        self.history.record_items(items)
+    }
+
+    pub(crate) fn history_snapshot(&self) -> Vec<ResponseItem> {
+        self.history.contents()
+    }
+
+    pub(crate) fn replace_history(&mut self, items: Vec<ResponseItem>) {
+        self.history.replace(items);
+    }
+
+    // Approved command helpers
+    pub(crate) fn add_approved_command(&mut self, cmd: Vec<String>) {
+        self.approved_commands.insert(cmd);
+    }
+
+    pub(crate) fn approved_commands_ref(&self) -> &HashSet<Vec<String>> {
+        &self.approved_commands
+    }
+
+    // Token/rate limit helpers
+    pub(crate) fn update_token_info_from_usage(
+        &mut self,
+        usage: &TokenUsage,
+        model_context_window: Option<u64>,
+    ) {
+        self.token_info = TokenUsageInfo::new_or_append(
+            &self.token_info,
+            &Some(usage.clone()),
+            model_context_window,
+        );
+    }
+
+    pub(crate) fn set_rate_limits(&mut self, snapshot: RateLimitSnapshot) {
+        self.latest_rate_limits = Some(snapshot);
+    }
+
+    pub(crate) fn token_info_and_rate_limits(
+        &self,
+    ) -> (Option<TokenUsageInfo>, Option<RateLimitSnapshot>) {
+        (self.token_info.clone(), self.latest_rate_limits.clone())
+    }
+
+    // Pending input/approval moved to TurnState.
+}
--- a/codex-rs/core/src/state/turn.rs
+++ b/codex-rs/core/src/state/turn.rs
@@ -0,0 +1,115 @@
+//! Turn-scoped state and active turn metadata scaffolding.
+
+use indexmap::IndexMap;
+use std::collections::HashMap;
+use std::sync::Arc;
+use tokio::sync::Mutex;
+use tokio::task::AbortHandle;
+
+use codex_protocol::models::ResponseInputItem;
+use tokio::sync::oneshot;
+
+use crate::protocol::ReviewDecision;
+use crate::tasks::SessionTask;
+
+/// Metadata about the currently running turn.
+pub(crate) struct ActiveTurn {
+    pub(crate) tasks: IndexMap<String, RunningTask>,
+    pub(crate) turn_state: Arc<Mutex<TurnState>>,
+}
+
+impl Default for ActiveTurn {
+    fn default() -> Self {
+        Self {
+            tasks: IndexMap::new(),
+            turn_state: Arc::new(Mutex::new(TurnState::default())),
+        }
+    }
+}
+
+#[derive(Clone, Copy, Debug, Eq, PartialEq)]
+pub(crate) enum TaskKind {
+    Regular,
+    Review,
+    Compact,
+}
+
+#[derive(Clone)]
+pub(crate) struct RunningTask {
+    pub(crate) handle: AbortHandle,
+    pub(crate) kind: TaskKind,
+    pub(crate) task: Arc<dyn SessionTask>,
+}
+
+impl ActiveTurn {
+    pub(crate) fn add_task(&mut self, sub_id: String, task: RunningTask) {
+        self.tasks.insert(sub_id, task);
+    }
+
+    pub(crate) fn remove_task(&mut self, sub_id: &str) -> bool {
+        self.tasks.swap_remove(sub_id);
+        self.tasks.is_empty()
+    }
+
+    pub(crate) fn drain_tasks(&mut self) -> IndexMap<String, RunningTask> {
+        std::mem::take(&mut self.tasks)
+    }
+}
+
+/// Mutable state for a single turn.
+#[derive(Default)]
+pub(crate) struct TurnState {
+    pending_approvals: HashMap<String, oneshot::Sender<ReviewDecision>>,
+    pending_input: Vec<ResponseInputItem>,
+}
+
+impl TurnState {
+    pub(crate) fn insert_pending_approval(
+        &mut self,
+        key: String,
+        tx: oneshot::Sender<ReviewDecision>,
+    ) -> Option<oneshot::Sender<ReviewDecision>> {
+        self.pending_approvals.insert(key, tx)
+    }
+
+    pub(crate) fn remove_pending_approval(
+        &mut self,
+        key: &str,
+    ) -> Option<oneshot::Sender<ReviewDecision>> {
+        self.pending_approvals.remove(key)
+    }
+
+    pub(crate) fn clear_pending(&mut self) {
+        self.pending_approvals.clear();
+        self.pending_input.clear();
+    }
+
+    pub(crate) fn push_pending_input(&mut self, input: ResponseInputItem) {
+        self.pending_input.push(input);
+    }
+
+    pub(crate) fn take_pending_input(&mut self) -> Vec<ResponseInputItem> {
+        if self.pending_input.is_empty() {
+            Vec::with_capacity(0)
+        } else {
+            let mut ret = Vec::new();
+            std::mem::swap(&mut ret, &mut self.pending_input);
+            ret
+        }
+    }
+}
+
+impl ActiveTurn {
+    /// Clear any pending approvals and input buffered for the current turn.
+    pub(crate) async fn clear_pending(&self) {
+        let mut ts = self.turn_state.lock().await;
+        ts.clear_pending();
+    }
+
+    /// Best-effort, non-blocking variant for synchronous contexts (Drop/interrupt).
+    pub(crate) fn try_clear_pending_sync(&self) {
+        if let Ok(mut ts) = self.turn_state.try_lock() {
+            ts.clear_pending();
+        }
+    }
+}
--- a/codex-rs/core/src/tasks/compact.rs
+++ b/codex-rs/core/src/tasks/compact.rs
@@ -0,0 +1,31 @@
+use std::sync::Arc;
+
+use async_trait::async_trait;
+
+use crate::codex::TurnContext;
+use crate::codex::compact;
+use crate::protocol::InputItem;
+use crate::state::TaskKind;
+
+use super::SessionTask;
+use super::SessionTaskContext;
+
+#[derive(Clone, Copy, Default)]
+pub(crate) struct CompactTask;
+
+#[async_trait]
+impl SessionTask for CompactTask {
+    fn kind(&self) -> TaskKind {
+        TaskKind::Compact
+    }
+
+    async fn run(
+        self: Arc<Self>,
+        session: Arc<SessionTaskContext>,
+        ctx: Arc<TurnContext>,
+        sub_id: String,
+        input: Vec<InputItem>,
+    ) -> Option<String> {
+        compact::run_compact_task(session.clone_session(), ctx, sub_id, input).await
+    }
+}
--- a/codex-rs/core/src/tasks/mod.rs
+++ b/codex-rs/core/src/tasks/mod.rs
@@ -0,0 +1,166 @@
+mod compact;
+mod regular;
+mod review;
+
+use std::sync::Arc;
+
+use async_trait::async_trait;
+use tracing::trace;
+
+use crate::codex::Session;
+use crate::codex::TurnContext;
+use crate::protocol::Event;
+use crate::protocol::EventMsg;
+use crate::protocol::InputItem;
+use crate::protocol::TaskCompleteEvent;
+use crate::protocol::TurnAbortReason;
+use crate::protocol::TurnAbortedEvent;
+use crate::state::ActiveTurn;
+use crate::state::RunningTask;
+use crate::state::TaskKind;
+
+pub(crate) use compact::CompactTask;
+pub(crate) use regular::RegularTask;
+pub(crate) use review::ReviewTask;
+
+/// Thin wrapper that exposes the parts of [`Session`] task runners need.
+#[derive(Clone)]
+pub(crate) struct SessionTaskContext {
+    session: Arc<Session>,
+}
+
+impl SessionTaskContext {
+    pub(crate) fn new(session: Arc<Session>) -> Self {
+        Self { session }
+    }
+
+    pub(crate) fn clone_session(&self) -> Arc<Session> {
+        Arc::clone(&self.session)
+    }
+}
+
+#[async_trait]
+pub(crate) trait SessionTask: Send + Sync + 'static {
+    fn kind(&self) -> TaskKind;
+
+    async fn run(
+        self: Arc<Self>,
+        session: Arc<SessionTaskContext>,
+        ctx: Arc<TurnContext>,
+        sub_id: String,
+        input: Vec<InputItem>,
+    ) -> Option<String>;
+
+    async fn abort(&self, session: Arc<SessionTaskContext>, sub_id: &str) {
+        let _ = (session, sub_id);
+    }
+}
+
+impl Session {
+    pub async fn spawn_task<T: SessionTask>(
+        self: &Arc<Self>,
+        turn_context: Arc<TurnContext>,
+        sub_id: String,
+        input: Vec<InputItem>,
+        task: T,
+    ) {
+        self.abort_all_tasks(TurnAbortReason::Replaced).await;
+
+        let task: Arc<dyn SessionTask> = Arc::new(task);
+        let task_kind = task.kind();
+
+        let handle = {
+            let session_ctx = Arc::new(SessionTaskContext::new(Arc::clone(self)));
+            let ctx = Arc::clone(&turn_context);
+            let task_for_run = Arc::clone(&task);
+            let sub_clone = sub_id.clone();
+            tokio::spawn(async move {
+                let last_agent_message = task_for_run
+                    .run(Arc::clone(&session_ctx), ctx, sub_clone.clone(), input)
+                    .await;
+                // Emit completion uniformly from spawn site so all tasks share the same lifecycle.
+                let sess = session_ctx.clone_session();
+                sess.on_task_finished(sub_clone, last_agent_message).await;
+            })
+            .abort_handle()
+        };
+
+        let running_task = RunningTask {
+            handle,
+            kind: task_kind,
+            task,
+        };
+        self.register_new_active_task(sub_id, running_task).await;
+    }
+
+    pub async fn abort_all_tasks(self: &Arc<Self>, reason: TurnAbortReason) {
+        for (sub_id, task) in self.take_all_running_tasks().await {
+            self.handle_task_abort(sub_id, task, reason.clone()).await;
+        }
+    }
+
+    pub async fn on_task_finished(
+        self: &Arc<Self>,
+        sub_id: String,
+        last_agent_message: Option<String>,
+    ) {
+        let mut active = self.active_turn.lock().await;
+        if let Some(at) = active.as_mut()
+            && at.remove_task(&sub_id)
+        {
+            *active = None;
+        }
+        drop(active);
+        let event = Event {
+            id: sub_id,
+            msg: EventMsg::TaskComplete(TaskCompleteEvent { last_agent_message }),
+        };
+        self.send_event(event).await;
+    }
+
+    async fn register_new_active_task(&self, sub_id: String, task: RunningTask) {
+        let mut active = self.active_turn.lock().await;
+        let mut turn = ActiveTurn::default();
+        turn.add_task(sub_id, task);
+        *active = Some(turn);
+    }
+
+    async fn take_all_running_tasks(&self) -> Vec<(String, RunningTask)> {
+        let mut active = self.active_turn.lock().await;
+        match active.take() {
+            Some(mut at) => {
+                at.clear_pending().await;
+                let tasks = at.drain_tasks();
+                tasks.into_iter().collect()
+            }
+            None => Vec::new(),
+        }
+    }
+
+    async fn handle_task_abort(
+        self: &Arc<Self>,
+        sub_id: String,
+        task: RunningTask,
+        reason: TurnAbortReason,
+    ) {
+        if task.handle.is_finished() {
+            return;
+        }
+
+        trace!(task_kind = ?task.kind, sub_id, "aborting running task");
+        let session_task = task.task;
+        let handle = task.handle;
+        handle.abort();
+        let session_ctx = Arc::new(SessionTaskContext::new(Arc::clone(self)));
+        session_task.abort(session_ctx, &sub_id).await;
+
+        let event = Event {
+            id: sub_id.clone(),
+            msg: EventMsg::TurnAborted(TurnAbortedEvent { reason }),
+        };
+        self.send_event(event).await;
+    }
+}
+
+#[cfg(test)]
+mod tests {}
--- a/codex-rs/core/src/tasks/regular.rs
+++ b/codex-rs/core/src/tasks/regular.rs
@@ -0,0 +1,32 @@
+use std::sync::Arc;
+
+use async_trait::async_trait;
+
+use crate::codex::TurnContext;
+use crate::codex::run_task;
+use crate::protocol::InputItem;
+use crate::state::TaskKind;
+
+use super::SessionTask;
+use super::SessionTaskContext;
+
+#[derive(Clone, Copy, Default)]
+pub(crate) struct RegularTask;
+
+#[async_trait]
+impl SessionTask for RegularTask {
+    fn kind(&self) -> TaskKind {
+        TaskKind::Regular
+    }
+
+    async fn run(
+        self: Arc<Self>,
+        session: Arc<SessionTaskContext>,
+        ctx: Arc<TurnContext>,
+        sub_id: String,
+        input: Vec<InputItem>,
+    ) -> Option<String> {
+        let sess = session.clone_session();
+        run_task(sess, ctx, sub_id, input).await
+    }
+}
--- a/codex-rs/core/src/tasks/review.rs
+++ b/codex-rs/core/src/tasks/review.rs
@@ -0,0 +1,37 @@
+use std::sync::Arc;
+
+use async_trait::async_trait;
+
+use crate::codex::TurnContext;
+use crate::codex::exit_review_mode;
+use crate::codex::run_task;
+use crate::protocol::InputItem;
+use crate::state::TaskKind;
+
+use super::SessionTask;
+use super::SessionTaskContext;
+
+#[derive(Clone, Copy, Default)]
+pub(crate) struct ReviewTask;
+
+#[async_trait]
+impl SessionTask for ReviewTask {
+    fn kind(&self) -> TaskKind {
+        TaskKind::Review
+    }
+
+    async fn run(
+        self: Arc<Self>,
+        session: Arc<SessionTaskContext>,
+        ctx: Arc<TurnContext>,
+        sub_id: String,
+        input: Vec<InputItem>,
+    ) -> Option<String> {
+        let sess = session.clone_session();
+        run_task(sess, ctx, sub_id, input).await
+    }
+
+    async fn abort(&self, session: Arc<SessionTaskContext>, sub_id: &str) {
+        exit_review_mode(session.clone_session(), sub_id.to_string(), None).await;
+    }
+}
--- a/codex-rs/core/src/tool_apply_patch.rs
+++ b/codex-rs/core/src/tool_apply_patch.rs
@@ -10,11 +10,6 @@ use crate::openai_tools::ResponsesApiTool;

 const APPLY_PATCH_LARK_GRAMMAR: &str = include_str!("tool_apply_patch.lark");

-#[derive(Serialize, Deserialize)]
-pub(crate) struct ApplyPatchToolArgs {
-    pub(crate) input: String,
-}
-
 #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
 #[serde(rename_all = "snake_case")]
 pub enum ApplyPatchToolType {
--- a/codex-rs/core/src/unified_exec/mod.rs
+++ b/codex-rs/core/src/unified_exec/mod.rs
@@ -404,6 +404,8 @@ async fn create_unified_exec_session(
 #[cfg(test)]
 mod tests {
    use super::*;
+    #[cfg(unix)]
+    use core_test_support::skip_if_sandbox;

    #[test]
    fn push_chunk_trims_only_excess_bytes() {
@@ -425,6 +427,8 @@ mod tests {
    #[cfg(unix)]
    #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
    async fn unified_exec_persists_across_requests_jif() -> Result<(), UnifiedExecError> {
+        skip_if_sandbox!(Ok(()));
+
        let manager = UnifiedExecSessionManager::default();

        let open_shell = manager
@@ -462,6 +466,8 @@ mod tests {
    #[cfg(unix)]
    #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
    async fn multi_unified_exec_sessions() -> Result<(), UnifiedExecError> {
+        skip_if_sandbox!(Ok(()));
+
        let manager = UnifiedExecSessionManager::default();

        let shell_a = manager
@@ -508,6 +514,8 @@ mod tests {
    #[cfg(unix)]
    #[tokio::test]
    async fn unified_exec_timeouts() -> Result<(), UnifiedExecError> {
+        skip_if_sandbox!(Ok(()));
+
        let manager = UnifiedExecSessionManager::default();

        let open_shell = manager
@@ -601,6 +609,8 @@ mod tests {
    #[cfg(unix)]
    #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
    async fn reusing_completed_session_returns_unknown_session() -> Result<(), UnifiedExecError> {
+        skip_if_sandbox!(Ok(()));
+
        let manager = UnifiedExecSessionManager::default();

        let open_shell = manager
--- a/codex-rs/core/src/user_notification.rs
+++ b/codex-rs/core/src/user_notification.rs
@@ -62,9 +62,10 @@ pub(crate) enum UserNotification {
 #[cfg(test)]
 mod tests {
    use super::*;
+    use anyhow::Result;

    #[test]
-    fn test_user_notification() {
+    fn test_user_notification() -> Result<()> {
        let notification = UserNotification::AgentTurnComplete {
            turn_id: "12345".to_string(),
            input_messages: vec!["Rename `foo` to `bar` and update the callsites.".to_string()],
@@ -72,10 +73,11 @@ mod tests {
                "Rename complete and verified `cargo build` succeeds.".to_string(),
            ),
        };
-        let serialized = serde_json::to_string(&notification).unwrap();
+        let serialized = serde_json::to_string(&notification)?;
        assert_eq!(
            serialized,
            r#"{"type":"agent-turn-complete","turn-id":"12345","input-messages":["Rename `foo` to `bar` and update the callsites."],"last-assistant-message":"Rename complete and verified `cargo build` succeeds."}"#
        );
+        Ok(())
    }
 }
--- a/codex-rs/core/tests/common/Cargo.toml
+++ b/codex-rs/core/tests/common/Cargo.toml
@@ -8,6 +8,7 @@ path = "lib.rs"

 [dependencies]
 anyhow = { workspace = true }
+assert_cmd = { workspace = true }
 codex-core = { workspace = true }
 serde_json = { workspace = true }
 tempfile = { workspace = true }
--- a/codex-rs/core/tests/common/lib.rs
+++ b/codex-rs/core/tests/common/lib.rs
@@ -9,6 +9,7 @@ use codex_core::config::ConfigToml;

 pub mod responses;
 pub mod test_codex;
+pub mod test_codex_exec;

 /// Returns a default `Config` whose on-disk state is confined to the provided
 /// temporary directory. Using a per-test directory keeps tests hermetic and
@@ -128,20 +129,56 @@ where
    }
 }

+pub fn sandbox_env_var() -> &'static str {
+    codex_core::spawn::CODEX_SANDBOX_ENV_VAR
+}
+
+pub fn sandbox_network_env_var() -> &'static str {
+    codex_core::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR
+}
+
 #[macro_export]
-macro_rules! non_sandbox_test {
-    // For tests that return ()
+macro_rules! skip_if_sandbox {
    () => {{
-        if ::std::env::var("CODEX_SANDBOX_NETWORK_DISABLED").is_ok() {
-            println!("Skipping test because it cannot execute when network is disabled in a Codex sandbox.");
+        if ::std::env::var($crate::sandbox_env_var())
+            == ::core::result::Result::Ok("seatbelt".to_string())
+        {
+            eprintln!(
+                "{} is set to 'seatbelt', skipping test.",
+                $crate::sandbox_env_var()
+            );
            return;
        }
    }};
-    // For tests that return Result<(), _>
-    (result $(,)?) => {{
-        if ::std::env::var("CODEX_SANDBOX_NETWORK_DISABLED").is_ok() {
-            println!("Skipping test because it cannot execute when network is disabled in a Codex sandbox.");
-            return ::core::result::Result::Ok(());
+    ($return_value:expr $(,)?) => {{
+        if ::std::env::var($crate::sandbox_env_var())
+            == ::core::result::Result::Ok("seatbelt".to_string())
+        {
+            eprintln!(
+                "{} is set to 'seatbelt', skipping test.",
+                $crate::sandbox_env_var()
+            );
+            return $return_value;
+        }
+    }};
+}
+
+#[macro_export]
+macro_rules! skip_if_no_network {
+    () => {{
+        if ::std::env::var($crate::sandbox_network_env_var()).is_ok() {
+            println!(
+                "Skipping test because it cannot execute when network is disabled in a Codex sandbox."
+            );
+            return;
+        }
+    }};
+    ($return_value:expr $(,)?) => {{
+        if ::std::env::var($crate::sandbox_network_env_var()).is_ok() {
+            println!(
+                "Skipping test because it cannot execute when network is disabled in a Codex sandbox."
+            );
+            return $return_value;
        }
    }};
 }
--- a/codex-rs/core/tests/common/responses.rs
+++ b/codex-rs/core/tests/common/responses.rs
@@ -2,6 +2,7 @@ use serde_json::Value;
 use wiremock::BodyPrintLimit;
 use wiremock::Mock;
 use wiremock::MockServer;
+use wiremock::Respond;
 use wiremock::ResponseTemplate;
 use wiremock::matchers::method;
 use wiremock::matchers::path;
@@ -121,6 +122,7 @@ where
        .and(path("/v1/responses"))
        .and(matcher)
        .respond_with(sse_response(body))
+        .up_to_n_times(1)
        .mount(server)
        .await;
 }
@@ -131,3 +133,41 @@ pub async fn start_mock_server() -> MockServer {
        .start()
        .await
 }
+
+/// Mounts a sequence of SSE response bodies and serves them in order for each
+/// POST to `/v1/responses`. Panics if more requests are received than bodies
+/// provided. Also asserts the exact number of expected calls.
+pub async fn mount_sse_sequence(server: &MockServer, bodies: Vec<String>) {
+    use std::sync::atomic::AtomicUsize;
+    use std::sync::atomic::Ordering;
+
+    struct SeqResponder {
+        num_calls: AtomicUsize,
+        responses: Vec<String>,
+    }
+
+    impl Respond for SeqResponder {
+        fn respond(&self, _: &wiremock::Request) -> ResponseTemplate {
+            let call_num = self.num_calls.fetch_add(1, Ordering::SeqCst);
+            match self.responses.get(call_num) {
+                Some(body) => ResponseTemplate::new(200)
+                    .insert_header("content-type", "text/event-stream")
+                    .set_body_string(body.clone()),
+                None => panic!("no response for {call_num}"),
+            }
+        }
+    }
+
+    let num_calls = bodies.len();
+    let responder = SeqResponder {
+        num_calls: AtomicUsize::new(0),
+        responses: bodies,
+    };
+
+    Mock::given(method("POST"))
+        .and(path("/v1/responses"))
+        .respond_with(responder)
+        .expect(num_calls as u64)
+        .mount(server)
+        .await;
+}
--- a/codex-rs/core/tests/common/test_codex_exec.rs
+++ b/codex-rs/core/tests/common/test_codex_exec.rs
@@ -0,0 +1,40 @@
+#![allow(clippy::expect_used)]
+use std::path::Path;
+use tempfile::TempDir;
+use wiremock::MockServer;
+
+pub struct TestCodexExecBuilder {
+    home: TempDir,
+    cwd: TempDir,
+}
+
+impl TestCodexExecBuilder {
+    pub fn cmd(&self) -> assert_cmd::Command {
+        let mut cmd = assert_cmd::Command::cargo_bin("codex-exec")
+            .expect("should find binary for codex-exec");
+        cmd.current_dir(self.cwd.path())
+            .env("CODEX_HOME", self.home.path())
+            .env("OPENAI_API_KEY", "dummy");
+        cmd
+    }
+    pub fn cmd_with_server(&self, server: &MockServer) -> assert_cmd::Command {
+        let mut cmd = self.cmd();
+        let base = format!("{}/v1", server.uri());
+        cmd.env("OPENAI_BASE_URL", base);
+        cmd
+    }
+
+    pub fn cwd_path(&self) -> &Path {
+        self.cwd.path()
+    }
+    pub fn home_path(&self) -> &Path {
+        self.home.path()
+    }
+}
+
+pub fn test_codex_exec() -> TestCodexExecBuilder {
+    TestCodexExecBuilder {
+        home: TempDir::new().expect("create temp home"),
+        cwd: TempDir::new().expect("create temp cwd"),
+    }
+}
--- a/codex-rs/core/tests/suite/abort_tasks.rs
+++ b/codex-rs/core/tests/suite/abort_tasks.rs
@@ -0,0 +1,66 @@
+use std::time::Duration;
+
+use codex_core::protocol::EventMsg;
+use codex_core::protocol::InputItem;
+use codex_core::protocol::Op;
+use core_test_support::responses::ev_function_call;
+use core_test_support::responses::mount_sse_once;
+use core_test_support::responses::sse;
+use core_test_support::responses::start_mock_server;
+use core_test_support::test_codex::test_codex;
+use core_test_support::wait_for_event_with_timeout;
+use serde_json::json;
+use wiremock::matchers::body_string_contains;
+
+/// Integration test: spawn a long‑running shell tool via a mocked Responses SSE
+/// function call, then interrupt the session and expect TurnAborted.
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn interrupt_long_running_tool_emits_turn_aborted() {
+    let command = vec![
+        "bash".to_string(),
+        "-lc".to_string(),
+        "sleep 60".to_string(),
+    ];
+
+    let args = json!({
+        "command": command,
+        "timeout_ms": 60_000
+    })
+    .to_string();
+    let body = sse(vec![ev_function_call("call_sleep", "shell", &args)]);
+
+    let server = start_mock_server().await;
+    mount_sse_once(&server, body_string_contains("start sleep"), body).await;
+
+    let codex = test_codex().build(&server).await.unwrap().codex;
+
+    let wait_timeout = Duration::from_secs(5);
+
+    // Kick off a turn that triggers the function call.
+    codex
+        .submit(Op::UserInput {
+            items: vec![InputItem::Text {
+                text: "start sleep".into(),
+            }],
+        })
+        .await
+        .unwrap();
+
+    // Wait until the exec begins to avoid a race, then interrupt.
+    wait_for_event_with_timeout(
+        &codex,
+        |ev| matches!(ev, EventMsg::ExecCommandBegin(_)),
+        wait_timeout,
+    )
+    .await;
+
+    codex.submit(Op::Interrupt).await.unwrap();
+
+    // Expect TurnAborted soon after.
+    wait_for_event_with_timeout(
+        &codex,
+        |ev| matches!(ev, EventMsg::TurnAborted(_)),
+        wait_timeout,
+    )
+    .await;
+}
--- a/codex-rs/core/tests/suite/cli_stream.rs
+++ b/codex-rs/core/tests/suite/cli_stream.rs
@@ -1,7 +1,7 @@
 use assert_cmd::Command as AssertCommand;
 use codex_core::RolloutRecorder;
 use codex_core::protocol::GitInfo;
-use core_test_support::non_sandbox_test;
+use core_test_support::skip_if_no_network;
 use std::time::Duration;
 use std::time::Instant;
 use tempfile::TempDir;
@@ -21,7 +21,7 @@ use wiremock::matchers::path;
 /// 4. Ensures the response is received exactly once and contains "hi"
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn chat_mode_stream_cli() {
-    non_sandbox_test!();
+    skip_if_no_network!();

    let server = MockServer::start().await;
    let sse = concat!(
@@ -97,7 +97,7 @@ async fn chat_mode_stream_cli() {
 /// received by a mock OpenAI Responses endpoint.
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn exec_cli_applies_experimental_instructions_file() {
-    non_sandbox_test!();
+    skip_if_no_network!();

    // Start mock server which will capture the request and return a minimal
    // SSE stream for a single turn.
@@ -185,7 +185,7 @@ async fn exec_cli_applies_experimental_instructions_file() {
 /// 4. Ensures the fixture content is correctly streamed through the CLI
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn responses_api_stream_cli() {
-    non_sandbox_test!();
+    skip_if_no_network!();

    let fixture =
        std::path::Path::new(env!("CARGO_MANIFEST_DIR")).join("tests/cli_responses_fixture.sse");
@@ -217,7 +217,7 @@ async fn responses_api_stream_cli() {
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn integration_creates_and_checks_session_file() {
    // Honor sandbox network restrictions for CI parity with the other tests.
-    non_sandbox_test!();
+    skip_if_no_network!();

    // 1. Temp home so we read/write isolated session files.
    let home = TempDir::new().unwrap();
--- a/codex-rs/core/tests/suite/client.rs
+++ b/codex-rs/core/tests/suite/client.rs
@@ -21,8 +21,9 @@ use codex_protocol::models::ReasoningItemReasoningSummary;
 use codex_protocol::models::WebSearchAction;
 use core_test_support::load_default_config_for_test;
 use core_test_support::load_sse_fixture_with_id;
-use core_test_support::non_sandbox_test;
 use core_test_support::responses;
+use core_test_support::skip_if_no_network;
+use core_test_support::test_codex::test_codex;
 use core_test_support::wait_for_event;
 use futures::StreamExt;
 use serde_json::json;
@@ -126,7 +127,7 @@ fn write_auth_json(

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn resume_includes_initial_messages_and_sends_prior_items() {
-    non_sandbox_test!();
+    skip_if_no_network!();

    // Create a fake rollout session file with prior user + system + assistant messages.
    let tmpdir = TempDir::new().unwrap();
@@ -292,7 +293,7 @@ async fn resume_includes_initial_messages_and_sends_prior_items() {

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn includes_conversation_id_and_model_headers_in_request() {
-    non_sandbox_test!();
+    skip_if_no_network!();

    // Mock server
    let server = MockServer::start().await;
@@ -360,6 +361,7 @@ async fn includes_conversation_id_and_model_headers_in_request() {

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn includes_base_instructions_override_in_request() {
+    skip_if_no_network!();
    // Mock server
    let server = MockServer::start().await;

@@ -417,7 +419,7 @@ async fn includes_base_instructions_override_in_request() {

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn chatgpt_auth_sends_correct_request() {
-    non_sandbox_test!();
+    skip_if_no_network!();

    // Mock server
    let server = MockServer::start().await;
@@ -491,7 +493,7 @@ async fn chatgpt_auth_sends_correct_request() {

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn prefers_apikey_when_config_prefers_apikey_even_with_chatgpt_tokens() {
-    non_sandbox_test!();
+    skip_if_no_network!();

    // Mock server
    let server = MockServer::start().await;
@@ -557,6 +559,7 @@ async fn prefers_apikey_when_config_prefers_apikey_even_with_chatgpt_tokens() {

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn includes_user_instructions_message_in_request() {
+    skip_if_no_network!();
    let server = MockServer::start().await;

    let first = ResponseTemplate::new(200)
@@ -618,7 +621,7 @@ async fn includes_user_instructions_message_in_request() {

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn azure_responses_request_includes_store_and_reasoning_ids() {
-    non_sandbox_test!();
+    skip_if_no_network!();

    let server = MockServer::start().await;

@@ -754,6 +757,7 @@ async fn azure_responses_request_includes_store_and_reasoning_ids() {

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn token_count_includes_rate_limits_snapshot() {
+    skip_if_no_network!();
    let server = MockServer::start().await;

    let sse_body = responses::sse(vec![responses::ev_completed_with_tokens("resp_rate", 123)]);
@@ -762,9 +766,10 @@ async fn token_count_includes_rate_limits_snapshot() {
        .insert_header("content-type", "text/event-stream")
        .insert_header("x-codex-primary-used-percent", "12.5")
        .insert_header("x-codex-secondary-used-percent", "40.0")
-        .insert_header("x-codex-primary-over-secondary-limit-percent", "75.0")
        .insert_header("x-codex-primary-window-minutes", "10")
        .insert_header("x-codex-secondary-window-minutes", "60")
+        .insert_header("x-codex-primary-reset-after-seconds", "1800")
+        .insert_header("x-codex-secondary-reset-after-seconds", "7200")
        .set_body_raw(sse_body, "text/event-stream");

    Mock::given(method("POST"))
@@ -797,7 +802,38 @@ async fn token_count_includes_rate_limits_snapshot() {
        .await
        .unwrap();

-    let token_event = wait_for_event(&codex, |msg| matches!(msg, EventMsg::TokenCount(_))).await;
+    let first_token_event =
+        wait_for_event(&codex, |msg| matches!(msg, EventMsg::TokenCount(_))).await;
+    let rate_limit_only = match first_token_event {
+        EventMsg::TokenCount(ev) => ev,
+        _ => unreachable!(),
+    };
+
+    let rate_limit_json = serde_json::to_value(&rate_limit_only).unwrap();
+    pretty_assertions::assert_eq!(
+        rate_limit_json,
+        json!({
+            "info": null,
+            "rate_limits": {
+                "primary": {
+                    "used_percent": 12.5,
+                    "window_minutes": 10,
+                    "resets_in_seconds": 1800
+                },
+                "secondary": {
+                    "used_percent": 40.0,
+                    "window_minutes": 60,
+                    "resets_in_seconds": 7200
+                }
+            }
+        })
+    );
+
+    let token_event = wait_for_event(
+        &codex,
+        |msg| matches!(msg, EventMsg::TokenCount(ev) if ev.info.is_some()),
+    )
+    .await;
    let final_payload = match token_event {
        EventMsg::TokenCount(ev) => ev,
        _ => unreachable!(),
@@ -826,11 +862,16 @@ async fn token_count_includes_rate_limits_snapshot() {
                "model_context_window": 272000
            },
            "rate_limits": {
-                "primary_used_percent": 12.5,
-                "secondary_used_percent": 40.0,
-                "primary_to_secondary_ratio_percent": 75.0,
-                "primary_window_minutes": 10,
-                "secondary_window_minutes": 60
+                "primary": {
+                    "used_percent": 12.5,
+                    "window_minutes": 10,
+                    "resets_in_seconds": 1800
+                },
+                "secondary": {
+                    "used_percent": 40.0,
+                    "window_minutes": 60,
+                    "resets_in_seconds": 7200
+                }
            }
        })
    );
@@ -841,13 +882,107 @@ async fn token_count_includes_rate_limits_snapshot() {
    let final_snapshot = final_payload
        .rate_limits
        .expect("latest rate limit snapshot should be retained");
-    assert_eq!(final_snapshot.primary_used_percent, 12.5);
+    assert_eq!(
+        final_snapshot
+            .primary
+            .as_ref()
+            .map(|window| window.used_percent),
+        Some(12.5)
+    );
+    assert_eq!(
+        final_snapshot
+            .primary
+            .as_ref()
+            .and_then(|window| window.resets_in_seconds),
+        Some(1800)
+    );

    wait_for_event(&codex, |msg| matches!(msg, EventMsg::TaskComplete(_))).await;
 }

+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn usage_limit_error_emits_rate_limit_event() -> anyhow::Result<()> {
+    skip_if_no_network!(Ok(()));
+    let server = MockServer::start().await;
+
+    let response = ResponseTemplate::new(429)
+        .insert_header("x-codex-primary-used-percent", "100.0")
+        .insert_header("x-codex-secondary-used-percent", "87.5")
+        .insert_header("x-codex-primary-over-secondary-limit-percent", "95.0")
+        .insert_header("x-codex-primary-window-minutes", "15")
+        .insert_header("x-codex-secondary-window-minutes", "60")
+        .set_body_json(json!({
+            "error": {
+                "type": "usage_limit_reached",
+                "message": "limit reached",
+                "resets_in_seconds": 42,
+                "plan_type": "pro"
+            }
+        }));
+
+    Mock::given(method("POST"))
+        .and(path("/v1/responses"))
+        .respond_with(response)
+        .expect(1)
+        .mount(&server)
+        .await;
+
+    let mut builder = test_codex();
+    let codex_fixture = builder.build(&server).await?;
+    let codex = codex_fixture.codex.clone();
+
+    let expected_limits = json!({
+        "primary": {
+            "used_percent": 100.0,
+            "window_minutes": 15,
+            "resets_in_seconds": null
+        },
+        "secondary": {
+            "used_percent": 87.5,
+            "window_minutes": 60,
+            "resets_in_seconds": null
+        }
+    });
+
+    let submission_id = codex
+        .submit(Op::UserInput {
+            items: vec![InputItem::Text {
+                text: "hello".into(),
+            }],
+        })
+        .await
+        .expect("submission should succeed while emitting usage limit error events");
+
+    let token_event = wait_for_event(&codex, |msg| matches!(msg, EventMsg::TokenCount(_))).await;
+    let EventMsg::TokenCount(event) = token_event else {
+        unreachable!();
+    };
+
+    let event_json = serde_json::to_value(&event).expect("serialize token count event");
+    pretty_assertions::assert_eq!(
+        event_json,
+        json!({
+            "info": null,
+            "rate_limits": expected_limits
+        })
+    );
+
+    let error_event = wait_for_event(&codex, |msg| matches!(msg, EventMsg::Error(_))).await;
+    let EventMsg::Error(error_event) = error_event else {
+        unreachable!();
+    };
+    assert!(
+        error_event.message.to_lowercase().contains("usage limit"),
+        "unexpected error message for submission {submission_id}: {}",
+        error_event.message
+    );
+
+    Ok(())
+}
+
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn azure_overrides_assign_properties_used_for_responses_url() {
+    skip_if_no_network!();
    let existing_env_var_with_random_value = if cfg!(windows) { "USERNAME" } else { "USER" };

    // Mock server
@@ -924,6 +1059,7 @@ async fn azure_overrides_assign_properties_used_for_responses_url() {

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn env_var_overrides_loaded_auth() {
+    skip_if_no_network!();
    let existing_env_var_with_random_value = if cfg!(windows) { "USERNAME" } else { "USER" };

    // Mock server
@@ -1011,7 +1147,7 @@ fn create_dummy_codex_auth() -> CodexAuth {
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn history_dedupes_streamed_and_final_messages_across_turns() {
    // Skip under Codex sandbox network restrictions (mirrors other tests).
-    non_sandbox_test!();
+    skip_if_no_network!();

    // Mock server that will receive three sequential requests and return the same SSE stream
    // each time: a few deltas, then a final assistant message, then completed.
--- a/codex-rs/core/tests/suite/compact.rs
+++ b/codex-rs/core/tests/suite/compact.rs
@@ -10,6 +10,7 @@ use codex_core::protocol::Op;
 use codex_core::protocol::RolloutItem;
 use codex_core::protocol::RolloutLine;
 use core_test_support::load_default_config_for_test;
+use core_test_support::skip_if_no_network;
 use core_test_support::wait_for_event;
 use tempfile::TempDir;
 use wiremock::Mock;
@@ -19,7 +20,7 @@ use wiremock::ResponseTemplate;
 use wiremock::matchers::method;
 use wiremock::matchers::path;

-use core_test_support::non_sandbox_test;
+use codex_core::codex::compact::SUMMARIZATION_PROMPT;
 use core_test_support::responses::ev_assistant_message;
 use core_test_support::responses::ev_completed;
 use core_test_support::responses::ev_completed_with_tokens;
@@ -37,7 +38,6 @@ use std::sync::atomic::Ordering;

 pub(super) const FIRST_REPLY: &str = "FIRST_REPLY";
 pub(super) const SUMMARY_TEXT: &str = "SUMMARY_ONLY_CONTEXT";
-pub(super) const SUMMARIZE_TRIGGER: &str = "Start Summarization";
 const THIRD_USER_MSG: &str = "next turn";
 const AUTO_SUMMARY_TEXT: &str = "AUTO_SUMMARY";
 const FIRST_AUTO_MSG: &str = "token limit start";
@@ -53,7 +53,7 @@ const DUMMY_CALL_ID: &str = "call-multi-auto";

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn summarize_context_three_requests_and_instructions() {
-    non_sandbox_test!();
+    skip_if_no_network!();

    // Set up a mock server that we can inspect after the run.
    let server = start_mock_server().await;
@@ -77,13 +77,13 @@ async fn summarize_context_three_requests_and_instructions() {
    let first_matcher = |req: &wiremock::Request| {
        let body = std::str::from_utf8(&req.body).unwrap_or("");
        body.contains("\"text\":\"hello world\"")
-            && !body.contains(&format!("\"text\":\"{SUMMARIZE_TRIGGER}\""))
+            && !body.contains("You have exceeded the maximum number of tokens")
    };
    mount_sse_once(&server, first_matcher, sse1).await;

    let second_matcher = |req: &wiremock::Request| {
        let body = std::str::from_utf8(&req.body).unwrap_or("");
-        body.contains(&format!("\"text\":\"{SUMMARIZE_TRIGGER}\""))
+        body.contains("You have exceeded the maximum number of tokens")
    };
    mount_sse_once(&server, second_matcher, sse2).await;

@@ -121,7 +121,7 @@ async fn summarize_context_three_requests_and_instructions() {
        .unwrap();
    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;

-    // 2) Summarize – second hit with summarization instructions.
+    // 2) Summarize – second hit should include the summarization prompt.
    codex.submit(Op::Compact).await.unwrap();
    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;

@@ -148,16 +148,12 @@ async fn summarize_context_three_requests_and_instructions() {
    let body2 = req2.body_json::<serde_json::Value>().unwrap();
    let body3 = req3.body_json::<serde_json::Value>().unwrap();

-    // System instructions should change for the summarization turn.
+    // Manual compact should keep the baseline developer instructions.
    let instr1 = body1.get("instructions").and_then(|v| v.as_str()).unwrap();
    let instr2 = body2.get("instructions").and_then(|v| v.as_str()).unwrap();
-    assert_ne!(
+    assert_eq!(
        instr1, instr2,
-        "summarization should override base instructions"
-    );
-    assert!(
-        instr2.contains("You have exceeded the maximum number of tokens"),
-        "summarization instructions not applied"
+        "manual compact should keep the standard developer instructions"
    );

    // The summarization request should include the injected user input marker.
@@ -167,14 +163,14 @@ async fn summarize_context_three_requests_and_instructions() {
    assert_eq!(last2.get("type").unwrap().as_str().unwrap(), "message");
    assert_eq!(last2.get("role").unwrap().as_str().unwrap(), "user");
    let text2 = last2["content"][0]["text"].as_str().unwrap();
-    assert!(
-        text2.contains(SUMMARIZE_TRIGGER),
+    assert_eq!(
+        text2, SUMMARIZATION_PROMPT,
        "expected summarize trigger, got `{text2}`"
    );

    // Third request must contain the refreshed instructions, bridge summary message and new user msg.
    let input3 = body3.get("input").and_then(|v| v.as_array()).unwrap();
-    println!("third request body: {body3}");
+
    assert!(
        input3.len() >= 3,
        "expected refreshed context and new user message in third request"
@@ -215,13 +211,13 @@ async fn summarize_context_three_requests_and_instructions() {
        "bridge should capture earlier user messages"
    );
    assert!(
-        !bridge_text.contains(SUMMARIZE_TRIGGER),
+        !bridge_text.contains(SUMMARIZATION_PROMPT),
        "bridge text should not echo the summarize trigger"
    );
    assert!(
        !messages
            .iter()
-            .any(|(_, text)| text.contains(SUMMARIZE_TRIGGER)),
+            .any(|(_, text)| text.contains(SUMMARIZATION_PROMPT)),
        "third request should not include the summarize trigger"
    );

@@ -274,7 +270,7 @@ async fn summarize_context_three_requests_and_instructions() {
 #[cfg_attr(windows, tokio::test(flavor = "multi_thread", worker_threads = 4))]
 #[cfg_attr(not(windows), tokio::test(flavor = "multi_thread", worker_threads = 2))]
 async fn auto_compact_runs_after_token_limit_hit() {
-    non_sandbox_test!();
+    skip_if_no_network!();

    let server = start_mock_server().await;

@@ -395,6 +391,7 @@ async fn auto_compact_runs_after_token_limit_hit() {
        "auto compact should add a third request"
    );

+    let body_first = requests[0].body_json::<serde_json::Value>().unwrap();
    let body3 = requests[auto_compact_index]
        .body_json::<serde_json::Value>()
        .unwrap();
@@ -402,15 +399,38 @@ async fn auto_compact_runs_after_token_limit_hit() {
        .get("instructions")
        .and_then(|v| v.as_str())
        .unwrap_or_default();
-    assert!(
-        instructions.contains("You have exceeded the maximum number of tokens"),
-        "auto compact should reuse summarization instructions"
+    let baseline_instructions = body_first
+        .get("instructions")
+        .and_then(|v| v.as_str())
+        .unwrap_or_default()
+        .to_string();
+    assert_eq!(
+        instructions, baseline_instructions,
+        "auto compact should keep the standard developer instructions",
+    );
+
+    let input3 = body3.get("input").and_then(|v| v.as_array()).unwrap();
+    let last3 = input3
+        .last()
+        .expect("auto compact request should append a user message");
+    assert_eq!(last3.get("type").and_then(|v| v.as_str()), Some("message"));
+    assert_eq!(last3.get("role").and_then(|v| v.as_str()), Some("user"));
+    let last_text = last3
+        .get("content")
+        .and_then(|v| v.as_array())
+        .and_then(|items| items.first())
+        .and_then(|item| item.get("text"))
+        .and_then(|text| text.as_str())
+        .unwrap_or_default();
+    assert_eq!(
+        last_text, SUMMARIZATION_PROMPT,
+        "auto compact should send the summarization prompt as a user message",
    );
 }

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn auto_compact_persists_rollout_entries() {
-    non_sandbox_test!();
+    skip_if_no_network!();

    let server = start_mock_server().await;

@@ -538,7 +558,7 @@ async fn auto_compact_persists_rollout_entries() {

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn auto_compact_stops_after_failed_attempt() {
-    non_sandbox_test!();
+    skip_if_no_network!();

    let server = start_mock_server().await;

@@ -635,19 +655,31 @@ async fn auto_compact_stops_after_failed_attempt() {
    );

    let last_body = requests[2].body_json::<serde_json::Value>().unwrap();
-    let instructions = last_body
-        .get("instructions")
-        .and_then(|v| v.as_str())
-        .unwrap_or_default();
+    let input = last_body
+        .get("input")
+        .and_then(|v| v.as_array())
+        .unwrap_or_else(|| panic!("unexpected request format: {last_body}"));
+    let contains_prompt = input.iter().any(|item| {
+        item.get("type").and_then(|v| v.as_str()) == Some("message")
+            && item.get("role").and_then(|v| v.as_str()) == Some("user")
+            && item
+                .get("content")
+                .and_then(|v| v.as_array())
+                .and_then(|items| items.first())
+                .and_then(|entry| entry.get("text"))
+                .and_then(|text| text.as_str())
+                .map(|text| text == SUMMARIZATION_PROMPT)
+                .unwrap_or(false)
+    });
    assert!(
-        !instructions.contains("You have exceeded the maximum number of tokens"),
-        "third request should be the follow-up turn, not another summarization"
+        !contains_prompt,
+        "third request should be the follow-up turn, not another summarization",
    );
 }

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn auto_compact_allows_multiple_attempts_when_interleaved_with_other_turn_events() {
-    non_sandbox_test!();
+    skip_if_no_network!();

    let server = start_mock_server().await;

@@ -785,7 +817,7 @@ async fn auto_compact_allows_multiple_attempts_when_interleaved_with_other_turn_
    );
    assert!(
        request_bodies[1].contains("You have exceeded the maximum number of tokens"),
-        "first auto compact request should use summarization instructions"
+        "first auto compact request should include the summarization prompt"
    );
    assert!(
        request_bodies[3].contains(&format!("unsupported call: {DUMMY_FUNCTION_NAME}")),
@@ -793,6 +825,6 @@ async fn auto_compact_allows_multiple_attempts_when_interleaved_with_other_turn_
    );
    assert!(
        request_bodies[4].contains("You have exceeded the maximum number of tokens"),
-        "second auto compact request should reuse summarization instructions"
+        "second auto compact request should include the summarization prompt"
    );
 }
--- a/codex-rs/core/tests/suite/compact_resume_fork.rs
+++ b/codex-rs/core/tests/suite/compact_resume_fork.rs
@@ -8,7 +8,6 @@
 //! model-visible history matches the expected sequence of messages.

 use super::compact::FIRST_REPLY;
-use super::compact::SUMMARIZE_TRIGGER;
 use super::compact::SUMMARY_TEXT;
 use codex_core::CodexAuth;
 use codex_core::CodexConversation;
@@ -16,6 +15,7 @@ use codex_core::ConversationManager;
 use codex_core::ModelProviderInfo;
 use codex_core::NewConversation;
 use codex_core::built_in_model_providers;
+use codex_core::codex::compact::SUMMARIZATION_PROMPT;
 use codex_core::config::Config;
 use codex_core::protocol::ConversationPathResponseEvent;
 use codex_core::protocol::EventMsg;
@@ -183,11 +183,7 @@ async fn compact_resume_and_fork_preserve_model_history_view() {
    let compact_1 = json!(
    {
      "model": "gpt-5-codex",
-      "instructions": "You have exceeded the maximum number of tokens, please stop coding and instead write a short memento message for the next agent. Your note should:
- Summarize what you finished and what still needs work. If there was a recent update_plan call, repeat its steps verbatim.
- List outstanding TODOs with file paths / line numbers so they're easy to find.
- Flag code that needs more tests (edge cases, performance, integration, etc.).
- Record any open bugs, quirks, or setup steps that will make it easier for the next agent to pick up where you left off.",
+      "instructions": prompt,
      "input": [
        {
          "type": "message",
@@ -235,7 +231,7 @@ async fn compact_resume_and_fork_preserve_model_history_view() {
          "content": [
            {
              "type": "input_text",
-              "text": "Start Summarization"
+              "text": SUMMARIZATION_PROMPT
            }
          ]
        }
@@ -488,13 +484,14 @@ SUMMARY_ONLY_CONTEXT"
      ],
      "prompt_cache_key": fork_prompt_cache_key
    });
-    let expected = json!([
+    let mut expected = json!([
        user_turn_1,
        compact_1,
        user_turn_2_after_compact,
        usert_turn_3_after_resume,
        user_turn_3_after_fork
    ]);
+    normalize_line_endings(&mut expected);
    assert_eq!(requests.len(), 5);
    assert_eq!(json!(requests), expected);
 }
@@ -580,7 +577,7 @@ async fn compact_resume_after_second_compaction_preserves_history() {
        .unwrap_or_default()
        .to_string();

-    let expected = json!([
+    let mut expected = json!([
      {
        "instructions": prompt,
        "input": [
@@ -637,6 +634,7 @@ async fn compact_resume_after_second_compaction_preserves_history() {
        ],
      }
    ]);
+    normalize_line_endings(&mut expected);
    let last_request_after_2_compacts = json!([{
        "instructions": requests[requests.len() -1]["instructions"],
        "input": requests[requests.len() -1]["input"],
@@ -698,7 +696,8 @@ async fn mount_initial_flow(server: &MockServer) {
    let match_first = |req: &wiremock::Request| {
        let body = std::str::from_utf8(&req.body).unwrap_or("");
        body.contains("\"text\":\"hello world\"")
-            && !body.contains(&format!("\"text\":\"{SUMMARIZE_TRIGGER}\""))
+            && !body.contains("You have exceeded the maximum number of tokens")
+            && !body.contains(&format!("\"text\":\"{SUMMARY_TEXT}\""))
            && !body.contains("\"text\":\"AFTER_COMPACT\"")
            && !body.contains("\"text\":\"AFTER_RESUME\"")
            && !body.contains("\"text\":\"AFTER_FORK\"")
@@ -707,7 +706,7 @@ async fn mount_initial_flow(server: &MockServer) {

    let match_compact = |req: &wiremock::Request| {
        let body = std::str::from_utf8(&req.body).unwrap_or("");
-        body.contains(&format!("\"text\":\"{SUMMARIZE_TRIGGER}\""))
+        body.contains("You have exceeded the maximum number of tokens")
    };
    mount_sse_once(server, match_compact, sse2).await;

@@ -741,7 +740,8 @@ async fn mount_second_compact_flow(server: &MockServer) {

    let match_second_compact = |req: &wiremock::Request| {
        let body = std::str::from_utf8(&req.body).unwrap_or("");
-        body.contains(&format!("\"text\":\"{SUMMARIZE_TRIGGER}\"")) && body.contains("AFTER_FORK")
+        body.contains("You have exceeded the maximum number of tokens")
+            && body.contains("AFTER_FORK")
    };
    mount_sse_once(server, match_second_compact, sse6).await;

--- a/codex-rs/core/tests/suite/fork_conversation.rs
+++ b/codex-rs/core/tests/suite/fork_conversation.rs
@@ -14,6 +14,7 @@ use codex_core::protocol::Op;
 use codex_core::protocol::RolloutItem;
 use codex_core::protocol::RolloutLine;
 use core_test_support::load_default_config_for_test;
+use core_test_support::skip_if_no_network;
 use core_test_support::wait_for_event;
 use tempfile::TempDir;
 use wiremock::Mock;
@@ -29,6 +30,8 @@ fn sse_completed(id: &str) -> String {

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn fork_conversation_twice_drops_to_first_message() {
+    skip_if_no_network!();
+
    // Start a mock server that completes three turns.
    let server = MockServer::start().await;
    let sse = sse_completed("resp");
--- a/codex-rs/core/tests/suite/json_result.rs
+++ b/codex-rs/core/tests/suite/json_result.rs
@@ -0,0 +1,106 @@
+#![cfg(not(target_os = "windows"))]
+
+use codex_core::protocol::AskForApproval;
+use codex_core::protocol::EventMsg;
+use codex_core::protocol::InputItem;
+use codex_core::protocol::Op;
+use codex_core::protocol::SandboxPolicy;
+use codex_protocol::config_types::ReasoningSummary;
+use core_test_support::responses;
+use core_test_support::skip_if_no_network;
+use core_test_support::test_codex::TestCodex;
+use core_test_support::test_codex::test_codex;
+use core_test_support::wait_for_event;
+use pretty_assertions::assert_eq;
+use responses::ev_assistant_message;
+use responses::ev_completed;
+use responses::sse;
+use responses::start_mock_server;
+
+const SCHEMA: &str = r#"
+{
+    "type": "object",
+    "properties": {
+        "explanation": { "type": "string" },
+        "final_answer": { "type": "string" }
+    },
+    "required": ["explanation", "final_answer"],
+    "additionalProperties": false
+}
+"#;
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn codex_returns_json_result_for_gpt5() -> anyhow::Result<()> {
+    codex_returns_json_result("gpt-5".to_string()).await
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn codex_returns_json_result_for_gpt5_codex() -> anyhow::Result<()> {
+    codex_returns_json_result("gpt-5-codex".to_string()).await
+}
+
+async fn codex_returns_json_result(model: String) -> anyhow::Result<()> {
+    skip_if_no_network!(Ok(()));
+
+    let server = start_mock_server().await;
+
+    let sse1 = sse(vec![
+        ev_assistant_message(
+            "m2",
+            r#"{"explanation": "explanation", "final_answer": "final_answer"}"#,
+        ),
+        ev_completed("r1"),
+    ]);
+
+    let expected_schema: serde_json::Value = serde_json::from_str(SCHEMA)?;
+    let match_json_text_param = move |req: &wiremock::Request| {
+        let body: serde_json::Value = serde_json::from_slice(&req.body).unwrap_or_default();
+        let Some(text) = body.get("text") else {
+            return false;
+        };
+        let Some(format) = text.get("format") else {
+            return false;
+        };
+
+        format.get("name") == Some(&serde_json::Value::String("codex_output_schema".into()))
+            && format.get("type") == Some(&serde_json::Value::String("json_schema".into()))
+            && format.get("strict") == Some(&serde_json::Value::Bool(true))
+            && format.get("schema") == Some(&expected_schema)
+    };
+    responses::mount_sse_once(&server, match_json_text_param, sse1).await;
+
+    let TestCodex { codex, cwd, .. } = test_codex().build(&server).await?;
+
+    // 1) Normal user input – should hit server once.
+    codex
+        .submit(Op::UserTurn {
+            items: vec![InputItem::Text {
+                text: "hello world".into(),
+            }],
+            final_output_json_schema: Some(serde_json::from_str(SCHEMA)?),
+            cwd: cwd.path().to_path_buf(),
+            approval_policy: AskForApproval::Never,
+            sandbox_policy: SandboxPolicy::DangerFullAccess,
+            model,
+            effort: None,
+            summary: ReasoningSummary::Auto,
+        })
+        .await?;
+
+    let message = wait_for_event(&codex, |ev| matches!(ev, EventMsg::AgentMessage(_))).await;
+    if let EventMsg::AgentMessage(message) = message {
+        let json: serde_json::Value = serde_json::from_str(&message.message)?;
+        assert_eq!(
+            json.get("explanation"),
+            Some(&serde_json::Value::String("explanation".into()))
+        );
+        assert_eq!(
+            json.get("final_answer"),
+            Some(&serde_json::Value::String("final_answer".into()))
+        );
+    } else {
+        anyhow::bail!("expected agent message event");
+    }
+
+    Ok(())
+}
--- a/codex-rs/core/tests/suite/mod.rs
+++ b/codex-rs/core/tests/suite/mod.rs
@@ -1,5 +1,7 @@
 // Aggregates all former standalone integration tests as modules.

+#[cfg(not(target_os = "windows"))]
+mod abort_tasks;
 mod cli_stream;
 mod client;
 mod compact;
@@ -7,10 +9,12 @@ mod compact_resume_fork;
 mod exec;
 mod exec_stream_events;
 mod fork_conversation;
+mod json_result;
 mod live_cli;
 mod model_overrides;
 mod prompt_caching;
 mod review;
+mod rmcp_client;
 mod rollout_list_find;
 mod seatbelt;
 mod stream_error_allows_next_turn;
--- a/codex-rs/core/tests/suite/prompt_caching.rs
+++ b/codex-rs/core/tests/suite/prompt_caching.rs
@@ -16,6 +16,7 @@ use codex_core::shell::Shell;
 use codex_core::shell::default_user_shell;
 use core_test_support::load_default_config_for_test;
 use core_test_support::load_sse_fixture_with_id;
+use core_test_support::skip_if_no_network;
 use core_test_support::wait_for_event;
 use tempfile::TempDir;
 use wiremock::Mock;
@@ -67,6 +68,7 @@ fn assert_tool_names(body: &serde_json::Value, expected_names: &[&str]) {

 #[tokio::test(flavor = "multi_thread", worker_threads = 4)]
 async fn codex_mini_latest_tools() {
+    skip_if_no_network!();
    use pretty_assertions::assert_eq;

    let server = MockServer::start().await;
@@ -151,6 +153,7 @@ async fn codex_mini_latest_tools() {

 #[tokio::test(flavor = "multi_thread", worker_threads = 4)]
 async fn prompt_tools_are_consistent_across_requests() {
+    skip_if_no_network!();
    use pretty_assertions::assert_eq;

    let server = MockServer::start().await;
@@ -234,6 +237,7 @@ async fn prompt_tools_are_consistent_across_requests() {

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn prefixes_context_and_instructions_once_and_consistently_across_requests() {
+    skip_if_no_network!();
    use pretty_assertions::assert_eq;

    let server = MockServer::start().await;
@@ -352,6 +356,7 @@ async fn prefixes_context_and_instructions_once_and_consistently_across_requests

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn overrides_turn_context_but_keeps_cached_prefix_and_key_constant() {
+    skip_if_no_network!();
    use pretty_assertions::assert_eq;

    let server = MockServer::start().await;
@@ -479,6 +484,7 @@ async fn overrides_turn_context_but_keeps_cached_prefix_and_key_constant() {

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn per_turn_overrides_keep_cached_prefix_and_key_constant() {
+    skip_if_no_network!();
    use pretty_assertions::assert_eq;

    let server = MockServer::start().await;
@@ -546,6 +552,7 @@ async fn per_turn_overrides_keep_cached_prefix_and_key_constant() {
            model: "o3".to_string(),
            effort: Some(ReasoningEffort::High),
            summary: ReasoningSummary::Detailed,
+            final_output_json_schema: None,
        })
        .await
        .unwrap();
@@ -601,6 +608,7 @@ async fn per_turn_overrides_keep_cached_prefix_and_key_constant() {

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn send_user_turn_with_no_changes_does_not_send_environment_context() {
+    skip_if_no_network!();
    use pretty_assertions::assert_eq;

    let server = MockServer::start().await;
@@ -655,6 +663,7 @@ async fn send_user_turn_with_no_changes_does_not_send_environment_context() {
            model: default_model.clone(),
            effort: default_effort,
            summary: default_summary,
+            final_output_json_schema: None,
        })
        .await
        .unwrap();
@@ -671,6 +680,7 @@ async fn send_user_turn_with_no_changes_does_not_send_environment_context() {
            model: default_model.clone(),
            effort: default_effort,
            summary: default_summary,
+            final_output_json_schema: None,
        })
        .await
        .unwrap();
@@ -712,6 +722,7 @@ async fn send_user_turn_with_no_changes_does_not_send_environment_context() {

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn send_user_turn_with_changes_sends_environment_context() {
+    skip_if_no_network!();
    use pretty_assertions::assert_eq;

    let server = MockServer::start().await;
@@ -766,6 +777,7 @@ async fn send_user_turn_with_changes_sends_environment_context() {
            model: default_model,
            effort: default_effort,
            summary: default_summary,
+            final_output_json_schema: None,
        })
        .await
        .unwrap();
@@ -782,6 +794,7 @@ async fn send_user_turn_with_changes_sends_environment_context() {
            model: "o3".to_string(),
            effort: Some(ReasoningEffort::High),
            summary: ReasoningSummary::Detailed,
+            final_output_json_schema: None,
        })
        .await
        .unwrap();
--- a/codex-rs/core/tests/suite/review.rs
+++ b/codex-rs/core/tests/suite/review.rs
@@ -22,7 +22,7 @@ use codex_core::protocol::RolloutItem;
 use codex_core::protocol::RolloutLine;
 use core_test_support::load_default_config_for_test;
 use core_test_support::load_sse_fixture_with_id_from_str;
-use core_test_support::non_sandbox_test;
+use core_test_support::skip_if_no_network;
 use core_test_support::wait_for_event;
 use pretty_assertions::assert_eq;
 use std::path::PathBuf;
@@ -42,7 +42,7 @@ use wiremock::matchers::path;
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn review_op_emits_lifecycle_and_review_output() {
    // Skip under Codex sandbox network restrictions.
-    non_sandbox_test!();
+    skip_if_no_network!();

    // Start mock Responses API server. Return a single assistant message whose
    // text is a JSON-encoded ReviewOutputEvent.
@@ -167,7 +167,7 @@ async fn review_op_emits_lifecycle_and_review_output() {
 #[cfg_attr(windows, tokio::test(flavor = "multi_thread", worker_threads = 4))]
 #[cfg_attr(not(windows), tokio::test(flavor = "multi_thread", worker_threads = 2))]
 async fn review_op_with_plain_text_emits_review_fallback() {
-    non_sandbox_test!();
+    skip_if_no_network!();

    let sse_raw = r#"[
        {"type":"response.output_item.done", "item":{
@@ -216,7 +216,7 @@ async fn review_op_with_plain_text_emits_review_fallback() {
 #[cfg_attr(windows, tokio::test(flavor = "multi_thread", worker_threads = 4))]
 #[cfg_attr(not(windows), tokio::test(flavor = "multi_thread", worker_threads = 2))]
 async fn review_does_not_emit_agent_message_on_structured_output() {
-    non_sandbox_test!();
+    skip_if_no_network!();

    let review_json = serde_json::json!({
        "findings": [
@@ -288,7 +288,7 @@ async fn review_does_not_emit_agent_message_on_structured_output() {
 /// request uses that model (and not the main chat model).
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn review_uses_custom_review_model_from_config() {
-    non_sandbox_test!();
+    skip_if_no_network!();

    // Minimal stream: just a completed event
    let sse_raw = r#"[
@@ -341,7 +341,7 @@ async fn review_uses_custom_review_model_from_config() {
 #[cfg_attr(windows, tokio::test(flavor = "multi_thread", worker_threads = 4))]
 #[cfg_attr(not(windows), tokio::test(flavor = "multi_thread", worker_threads = 2))]
 async fn review_input_isolated_from_parent_history() {
-    non_sandbox_test!();
+    skip_if_no_network!();

    // Mock server for the single review request
    let sse_raw = r#"[
@@ -517,7 +517,7 @@ async fn review_input_isolated_from_parent_history() {
 /// messages in its request `input`.
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn review_history_does_not_leak_into_parent_session() {
-    non_sandbox_test!();
+    skip_if_no_network!();

    // Respond to both the review request and the subsequent parent request.
    let sse_raw = r#"[
--- a/codex-rs/core/tests/suite/rmcp_client.rs
+++ b/codex-rs/core/tests/suite/rmcp_client.rs
@@ -0,0 +1,162 @@
+use std::collections::HashMap;
+use std::time::Duration;
+
+use codex_core::config_types::McpServerConfig;
+use codex_core::protocol::AskForApproval;
+use codex_core::protocol::EventMsg;
+use codex_core::protocol::InputItem;
+use codex_core::protocol::Op;
+use codex_core::protocol::SandboxPolicy;
+use codex_protocol::config_types::ReasoningSummary;
+use core_test_support::responses;
+use core_test_support::responses::mount_sse_once;
+use core_test_support::skip_if_no_network;
+use core_test_support::test_codex::test_codex;
+use core_test_support::wait_for_event;
+use core_test_support::wait_for_event_with_timeout;
+use escargot::CargoBuild;
+use serde_json::Value;
+use wiremock::matchers::any;
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn rmcp_tool_call_round_trip() -> anyhow::Result<()> {
+    skip_if_no_network!(Ok(()));
+
+    let server = responses::start_mock_server().await;
+
+    let call_id = "call-123";
+    let server_name = "rmcp";
+    let tool_name = format!("{server_name}__echo");
+
+    mount_sse_once(
+        &server,
+        any(),
+        responses::sse(vec![
+            serde_json::json!({
+                "type": "response.created",
+                "response": {"id": "resp-1"}
+            }),
+            responses::ev_function_call(call_id, &tool_name, "{\"message\":\"ping\"}"),
+            responses::ev_completed("resp-1"),
+        ]),
+    )
+    .await;
+    mount_sse_once(
+        &server,
+        any(),
+        responses::sse(vec![
+            responses::ev_assistant_message("msg-1", "rmcp echo tool completed successfully."),
+            responses::ev_completed("resp-2"),
+        ]),
+    )
+    .await;
+
+    let expected_env_value = "propagated-env";
+    let rmcp_test_server_bin = CargoBuild::new()
+        .package("codex-rmcp-client")
+        .bin("rmcp_test_server")
+        .run()?
+        .path()
+        .to_string_lossy()
+        .into_owned();
+
+    let fixture = test_codex()
+        .with_config(move |config| {
+            config.use_experimental_use_rmcp_client = true;
+            config.mcp_servers.insert(
+                server_name.to_string(),
+                McpServerConfig {
+                    command: rmcp_test_server_bin.clone(),
+                    args: Vec::new(),
+                    env: Some(HashMap::from([(
+                        "MCP_TEST_VALUE".to_string(),
+                        expected_env_value.to_string(),
+                    )])),
+                    startup_timeout_sec: Some(Duration::from_secs(10)),
+                    tool_timeout_sec: None,
+                },
+            );
+        })
+        .build(&server)
+        .await?;
+    let session_model = fixture.session_configured.model.clone();
+
+    fixture
+        .codex
+        .submit(Op::UserTurn {
+            items: vec![InputItem::Text {
+                text: "call the rmcp echo tool".into(),
+            }],
+            final_output_json_schema: None,
+            cwd: fixture.cwd.path().to_path_buf(),
+            approval_policy: AskForApproval::Never,
+            sandbox_policy: SandboxPolicy::DangerFullAccess,
+            model: session_model,
+            effort: None,
+            summary: ReasoningSummary::Auto,
+        })
+        .await?;
+
+    eprintln!("waiting for mcp tool call begin event");
+    let begin_event = wait_for_event_with_timeout(
+        &fixture.codex,
+        |ev| {
+            eprintln!("ev: {ev:?}");
+            matches!(ev, EventMsg::McpToolCallBegin(_))
+        },
+        Duration::from_secs(10),
+    )
+    .await;
+
+    eprintln!("mcp tool call begin event: {begin_event:?}");
+    let EventMsg::McpToolCallBegin(begin) = begin_event else {
+        unreachable!("event guard guarantees McpToolCallBegin");
+    };
+    assert_eq!(begin.invocation.server, server_name);
+    assert_eq!(begin.invocation.tool, "echo");
+
+    let end_event = wait_for_event(&fixture.codex, |ev| {
+        matches!(ev, EventMsg::McpToolCallEnd(_))
+    })
+    .await;
+    eprintln!("end_event: {end_event:?}");
+    let EventMsg::McpToolCallEnd(end) = end_event else {
+        unreachable!("event guard guarantees McpToolCallEnd");
+    };
+
+    let result = end
+        .result
+        .as_ref()
+        .expect("rmcp echo tool should return success");
+    assert_eq!(result.is_error, Some(false));
+    assert!(
+        result.content.is_empty(),
+        "content should default to an empty array"
+    );
+
+    let structured = result
+        .structured_content
+        .as_ref()
+        .expect("structured content");
+    let Value::Object(map) = structured else {
+        panic!("structured content should be an object: {structured:?}");
+    };
+    let echo_value = map
+        .get("echo")
+        .and_then(Value::as_str)
+        .expect("echo payload present");
+    assert_eq!(echo_value, "ping");
+    let env_value = map
+        .get("env")
+        .and_then(Value::as_str)
+        .expect("env snapshot inserted");
+    assert_eq!(env_value, expected_env_value);
+
+    let task_complete_event =
+        wait_for_event(&fixture.codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
+    eprintln!("task_complete_event: {task_complete_event:?}");
+
+    server.verify().await;
+
+    Ok(())
+}
--- a/codex-rs/core/tests/suite/stream_error_allows_next_turn.rs
+++ b/codex-rs/core/tests/suite/stream_error_allows_next_turn.rs
@@ -6,7 +6,7 @@ use codex_core::protocol::EventMsg;
 use codex_core::protocol::InputItem;
 use codex_core::protocol::Op;
 use core_test_support::load_sse_fixture_with_id;
-use core_test_support::non_sandbox_test;
+use core_test_support::skip_if_no_network;
 use core_test_support::test_codex::TestCodex;
 use core_test_support::test_codex::test_codex;
 use core_test_support::wait_for_event_with_timeout;
@@ -23,7 +23,7 @@ fn sse_completed(id: &str) -> String {

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn continue_after_stream_error() {
-    non_sandbox_test!();
+    skip_if_no_network!();

    let server = MockServer::start().await;

--- a/codex-rs/core/tests/suite/stream_no_completed.rs
+++ b/codex-rs/core/tests/suite/stream_no_completed.rs
@@ -10,7 +10,7 @@ use codex_core::protocol::InputItem;
 use codex_core::protocol::Op;
 use core_test_support::load_sse_fixture;
 use core_test_support::load_sse_fixture_with_id;
-use core_test_support::non_sandbox_test;
+use core_test_support::skip_if_no_network;
 use core_test_support::test_codex::TestCodex;
 use core_test_support::test_codex::test_codex;
 use tokio::time::timeout;
@@ -32,7 +32,7 @@ fn sse_completed(id: &str) -> String {

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn retries_on_early_close() {
-    non_sandbox_test!();
+    skip_if_no_network!();

    let server = MockServer::start().await;

--- a/codex-rs/core/tests/suite/user_notification.rs
+++ b/codex-rs/core/tests/suite/user_notification.rs
@@ -5,8 +5,8 @@ use std::os::unix::fs::PermissionsExt;
 use codex_core::protocol::EventMsg;
 use codex_core::protocol::InputItem;
 use codex_core::protocol::Op;
-use core_test_support::non_sandbox_test;
 use core_test_support::responses;
+use core_test_support::skip_if_no_network;
 use core_test_support::test_codex::TestCodex;
 use core_test_support::test_codex::test_codex;
 use core_test_support::wait_for_event;
@@ -22,7 +22,7 @@ use tokio::time::sleep;

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn summarize_context_three_requests_and_instructions() -> anyhow::Result<()> {
-    non_sandbox_test!(result);
+    skip_if_no_network!(Ok(()));

    let server = start_mock_server().await;

--- a/codex-rs/exec/Cargo.toml
+++ b/codex-rs/exec/Cargo.toml
@@ -28,6 +28,7 @@ codex-core = { workspace = true }
 codex-ollama = { workspace = true }
 codex-protocol = { workspace = true }
 owo-colors = { workspace = true }
+serde = { workspace = true, features = ["derive"] }
 serde_json = { workspace = true }
 shlex = { workspace = true }
 tokio = { workspace = true, features = [
@@ -39,12 +40,18 @@ tokio = { workspace = true, features = [
 ] }
 tracing = { workspace = true, features = ["log"] }
 tracing-subscriber = { workspace = true, features = ["env-filter"] }
+ts-rs = { workspace = true, features = [
+    "uuid-impl",
+    "serde-json-impl",
+    "no-serde-warnings",
+] }

 [dev-dependencies]
 assert_cmd = { workspace = true }
 core_test_support = { workspace = true }
 libc = { workspace = true }
 predicates = { workspace = true }
+pretty_assertions = { workspace = true }
 tempfile = { workspace = true }
 uuid = { workspace = true }
 walkdir = { workspace = true }
--- a/codex-rs/exec/src/cli.rs
+++ b/codex-rs/exec/src/cli.rs
@@ -52,6 +52,10 @@ pub struct Cli {
    #[arg(long = "skip-git-repo-check", default_value_t = false)]
    pub skip_git_repo_check: bool,

+    /// Path to a JSON Schema file describing the model's final response shape.
+    #[arg(long = "output-schema", value_name = "FILE")]
+    pub output_schema: Option<PathBuf>,
+
    #[clap(skip)]
    pub config_overrides: CliConfigOverrides,

@@ -60,9 +64,24 @@ pub struct Cli {
    pub color: Color,

    /// Print events to stdout as JSONL.
-    #[arg(long = "json", default_value_t = false)]
+    #[arg(
+        long = "json",
+        default_value_t = false,
+        conflicts_with = "experimental_json"
+    )]
    pub json: bool,

+    #[arg(
+        long = "experimental-json",
+        default_value_t = false,
+        conflicts_with = "json"
+    )]
+    pub experimental_json: bool,
+
+    /// Whether to include the plan tool in the conversation.
+    #[arg(long = "include-plan-tool", default_value_t = false)]
+    pub include_plan_tool: bool,
+
    /// Specifies file where the last message from the agent should be written.
    #[arg(long = "output-last-message")]
    pub last_message_file: Option<PathBuf>,
--- a/codex-rs/exec/src/event_processor.rs
+++ b/codex-rs/exec/src/event_processor.rs
@@ -2,6 +2,7 @@ use std::path::Path;

 use codex_core::config::Config;
 use codex_core::protocol::Event;
+use codex_core::protocol::SessionConfiguredEvent;

 pub(crate) enum CodexStatus {
    Running,
@@ -11,7 +12,12 @@ pub(crate) enum CodexStatus {

 pub(crate) trait EventProcessor {
    /// Print summary of effective configuration and user prompt.
-    fn print_config_summary(&mut self, config: &Config, prompt: &str);
+    fn print_config_summary(
+        &mut self,
+        config: &Config,
+        prompt: &str,
+        session_configured: &SessionConfiguredEvent,
+    );

    /// Handle a single event emitted by the agent.
    fn process_event(&mut self, event: Event) -> CodexStatus;
--- a/codex-rs/exec/src/event_processor_with_human_output.rs
+++ b/codex-rs/exec/src/event_processor_with_human_output.rs
@@ -141,7 +141,7 @@ impl EventProcessor for EventProcessorWithHumanOutput {
    /// Print a concise summary of the effective configuration that will be used
    /// for the session. This mirrors the information shown in the TUI welcome
    /// screen.
-    fn print_config_summary(&mut self, config: &Config, prompt: &str) {
+    fn print_config_summary(&mut self, config: &Config, prompt: &str, _: &SessionConfiguredEvent) {
        const VERSION: &str = env!("CARGO_PKG_VERSION");
        ts_println!(
            self,
@@ -539,8 +539,37 @@ impl EventProcessor for EventProcessorWithHumanOutput {
            }
            EventMsg::PlanUpdate(plan_update_event) => {
                let UpdatePlanArgs { explanation, plan } = plan_update_event;
-                ts_println!(self, "explanation: {explanation:?}");
-                ts_println!(self, "plan: {plan:?}");
+
+                // Header
+                ts_println!(self, "{}", "Plan update".style(self.magenta));
+
+                // Optional explanation
+                if let Some(explanation) = explanation
+                    && !explanation.trim().is_empty()
+                {
+                    ts_println!(self, "{}", explanation.style(self.italic));
+                }
+
+                // Pretty-print the plan items with simple status markers.
+                for item in plan {
+                    use codex_core::plan_tool::StepStatus;
+                    match item.status {
+                        StepStatus::Completed => {
+                            ts_println!(self, "  {} {}", "✓".style(self.green), item.step);
+                        }
+                        StepStatus::InProgress => {
+                            ts_println!(self, "  {} {}", "→".style(self.cyan), item.step);
+                        }
+                        StepStatus::Pending => {
+                            ts_println!(
+                                self,
+                                "  {} {}",
+                                "•".style(self.dimmed),
+                                item.step.style(self.dimmed)
+                            );
+                        }
+                    }
+                }
            }
            EventMsg::GetHistoryEntryResponse(_) => {
                // Currently ignored in exec output.
--- a/codex-rs/exec/src/event_processor_with_json_output.rs
+++ b/codex-rs/exec/src/event_processor_with_json_output.rs
@@ -4,6 +4,7 @@ use std::path::PathBuf;
 use codex_core::config::Config;
 use codex_core::protocol::Event;
 use codex_core::protocol::EventMsg;
+use codex_core::protocol::SessionConfiguredEvent;
 use codex_core::protocol::TaskCompleteEvent;
 use serde_json::json;

@@ -23,7 +24,7 @@ impl EventProcessorWithJsonOutput {
 }

 impl EventProcessor for EventProcessorWithJsonOutput {
-    fn print_config_summary(&mut self, config: &Config, prompt: &str) {
+    fn print_config_summary(&mut self, config: &Config, prompt: &str, _: &SessionConfiguredEvent) {
        let entries = create_config_summary_entries(config)
            .into_iter()
            .map(|(key, value)| (key.to_string(), value))
--- a/codex-rs/exec/src/exec_events.rs
+++ b/codex-rs/exec/src/exec_events.rs
@@ -0,0 +1,191 @@
+use serde::Deserialize;
+use serde::Serialize;
+use ts_rs::TS;
+
+/// Top-level events emitted on the Codex Exec conversation stream.
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS)]
+#[serde(tag = "type")]
+pub enum ConversationEvent {
+    #[serde(rename = "session.created")]
+    SessionCreated(SessionCreatedEvent),
+    #[serde(rename = "turn.started")]
+    TurnStarted(TurnStartedEvent),
+    #[serde(rename = "turn.completed")]
+    TurnCompleted(TurnCompletedEvent),
+    #[serde(rename = "item.started")]
+    ItemStarted(ItemStartedEvent),
+    #[serde(rename = "item.updated")]
+    ItemUpdated(ItemUpdatedEvent),
+    #[serde(rename = "item.completed")]
+    ItemCompleted(ItemCompletedEvent),
+    #[serde(rename = "error")]
+    Error(ConversationErrorEvent),
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS)]
+pub struct SessionCreatedEvent {
+    pub session_id: String,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS, Default)]
+pub struct TurnStartedEvent {}
+
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS)]
+pub struct TurnCompletedEvent {
+    pub usage: Usage,
+}
+
+/// Minimal usage summary for a turn.
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS, Default)]
+pub struct Usage {
+    pub input_tokens: u64,
+    pub cached_input_tokens: u64,
+    pub output_tokens: u64,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS)]
+pub struct ItemStartedEvent {
+    pub item: ConversationItem,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS)]
+pub struct ItemCompletedEvent {
+    pub item: ConversationItem,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS)]
+pub struct ItemUpdatedEvent {
+    pub item: ConversationItem,
+}
+
+/// Fatal error emitted by the stream.
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS)]
+pub struct ConversationErrorEvent {
+    pub message: String,
+}
+
+/// Canonical representation of a conversation item and its domain-specific payload.
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS)]
+pub struct ConversationItem {
+    pub id: String,
+    #[serde(flatten)]
+    pub details: ConversationItemDetails,
+}
+
+/// Typed payloads for each supported conversation item type.
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS)]
+#[serde(tag = "item_type", rename_all = "snake_case")]
+pub enum ConversationItemDetails {
+    AssistantMessage(AssistantMessageItem),
+    Reasoning(ReasoningItem),
+    CommandExecution(CommandExecutionItem),
+    FileChange(FileChangeItem),
+    McpToolCall(McpToolCallItem),
+    WebSearch(WebSearchItem),
+    TodoList(TodoListItem),
+    Error(ErrorItem),
+}
+
+/// Session conversation metadata.
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS)]
+pub struct SessionItem {
+    pub session_id: String,
+}
+
+/// Assistant message payload.
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS)]
+pub struct AssistantMessageItem {
+    pub text: String,
+}
+
+/// Model reasoning summary payload.
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS)]
+pub struct ReasoningItem {
+    pub text: String,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default, TS)]
+#[serde(rename_all = "snake_case")]
+pub enum CommandExecutionStatus {
+    #[default]
+    InProgress,
+    Completed,
+    Failed,
+}
+
+/// Local shell command execution payload.
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS)]
+pub struct CommandExecutionItem {
+    pub command: String,
+    pub aggregated_output: String,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub exit_code: Option<i32>,
+    pub status: CommandExecutionStatus,
+}
+
+/// Single file change summary for a patch.
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS)]
+pub struct FileUpdateChange {
+    pub path: String,
+    pub kind: PatchChangeKind,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS)]
+#[serde(rename_all = "snake_case")]
+pub enum PatchApplyStatus {
+    Completed,
+    Failed,
+}
+
+/// Patch application payload.
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS)]
+pub struct FileChangeItem {
+    pub changes: Vec<FileUpdateChange>,
+    pub status: PatchApplyStatus,
+}
+
+/// Known change kinds for a patch.
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS)]
+#[serde(rename_all = "snake_case")]
+pub enum PatchChangeKind {
+    Add,
+    Delete,
+    Update,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default, TS)]
+#[serde(rename_all = "snake_case")]
+pub enum McpToolCallStatus {
+    #[default]
+    InProgress,
+    Completed,
+    Failed,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS)]
+pub struct McpToolCallItem {
+    pub server: String,
+    pub tool: String,
+    pub status: McpToolCallStatus,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS)]
+pub struct WebSearchItem {
+    pub query: String,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS)]
+pub struct ErrorItem {
+    pub message: String,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS)]
+pub struct TodoItem {
+    pub text: String,
+    pub completed: bool,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS)]
+pub struct TodoListItem {
+    pub items: Vec<TodoItem>,
+}
--- a/codex-rs/exec/src/experimental_event_processor_with_json_output.rs
+++ b/codex-rs/exec/src/experimental_event_processor_with_json_output.rs
@@ -0,0 +1,368 @@
+use std::collections::HashMap;
+use std::path::PathBuf;
+use std::sync::atomic::AtomicU64;
+
+use crate::event_processor::CodexStatus;
+use crate::event_processor::EventProcessor;
+use crate::event_processor::handle_last_message;
+use crate::exec_events::AssistantMessageItem;
+use crate::exec_events::CommandExecutionItem;
+use crate::exec_events::CommandExecutionStatus;
+use crate::exec_events::ConversationErrorEvent;
+use crate::exec_events::ConversationEvent;
+use crate::exec_events::ConversationItem;
+use crate::exec_events::ConversationItemDetails;
+use crate::exec_events::FileChangeItem;
+use crate::exec_events::FileUpdateChange;
+use crate::exec_events::ItemCompletedEvent;
+use crate::exec_events::ItemStartedEvent;
+use crate::exec_events::ItemUpdatedEvent;
+use crate::exec_events::PatchApplyStatus;
+use crate::exec_events::PatchChangeKind;
+use crate::exec_events::ReasoningItem;
+use crate::exec_events::SessionCreatedEvent;
+use crate::exec_events::TodoItem;
+use crate::exec_events::TodoListItem;
+use crate::exec_events::TurnCompletedEvent;
+use crate::exec_events::TurnStartedEvent;
+use crate::exec_events::Usage;
+use codex_core::config::Config;
+use codex_core::plan_tool::StepStatus;
+use codex_core::plan_tool::UpdatePlanArgs;
+use codex_core::protocol::AgentMessageEvent;
+use codex_core::protocol::AgentReasoningEvent;
+use codex_core::protocol::Event;
+use codex_core::protocol::EventMsg;
+use codex_core::protocol::ExecCommandBeginEvent;
+use codex_core::protocol::ExecCommandEndEvent;
+use codex_core::protocol::FileChange;
+use codex_core::protocol::PatchApplyBeginEvent;
+use codex_core::protocol::PatchApplyEndEvent;
+use codex_core::protocol::SessionConfiguredEvent;
+use codex_core::protocol::TaskCompleteEvent;
+use codex_core::protocol::TaskStartedEvent;
+use tracing::error;
+use tracing::warn;
+
+pub struct ExperimentalEventProcessorWithJsonOutput {
+    last_message_path: Option<PathBuf>,
+    next_event_id: AtomicU64,
+    // Tracks running commands by call_id, including the associated item id.
+    running_commands: HashMap<String, RunningCommand>,
+    running_patch_applies: HashMap<String, PatchApplyBeginEvent>,
+    // Tracks the todo list for the current turn (at most one per turn).
+    running_todo_list: Option<RunningTodoList>,
+    last_total_token_usage: Option<codex_core::protocol::TokenUsage>,
+}
+
+#[derive(Debug, Clone)]
+struct RunningCommand {
+    command: String,
+    item_id: String,
+}
+
+#[derive(Debug, Clone)]
+struct RunningTodoList {
+    item_id: String,
+    items: Vec<TodoItem>,
+}
+
+impl ExperimentalEventProcessorWithJsonOutput {
+    pub fn new(last_message_path: Option<PathBuf>) -> Self {
+        Self {
+            last_message_path,
+            next_event_id: AtomicU64::new(0),
+            running_commands: HashMap::new(),
+            running_patch_applies: HashMap::new(),
+            running_todo_list: None,
+            last_total_token_usage: None,
+        }
+    }
+
+    pub fn collect_conversation_events(&mut self, event: &Event) -> Vec<ConversationEvent> {
+        match &event.msg {
+            EventMsg::SessionConfigured(ev) => self.handle_session_configured(ev),
+            EventMsg::AgentMessage(ev) => self.handle_agent_message(ev),
+            EventMsg::AgentReasoning(ev) => self.handle_reasoning_event(ev),
+            EventMsg::ExecCommandBegin(ev) => self.handle_exec_command_begin(ev),
+            EventMsg::ExecCommandEnd(ev) => self.handle_exec_command_end(ev),
+            EventMsg::PatchApplyBegin(ev) => self.handle_patch_apply_begin(ev),
+            EventMsg::PatchApplyEnd(ev) => self.handle_patch_apply_end(ev),
+            EventMsg::TokenCount(ev) => {
+                if let Some(info) = &ev.info {
+                    self.last_total_token_usage = Some(info.total_token_usage.clone());
+                }
+                Vec::new()
+            }
+            EventMsg::TaskStarted(ev) => self.handle_task_started(ev),
+            EventMsg::TaskComplete(_) => self.handle_task_complete(),
+            EventMsg::Error(ev) => vec![ConversationEvent::Error(ConversationErrorEvent {
+                message: ev.message.clone(),
+            })],
+            EventMsg::StreamError(ev) => vec![ConversationEvent::Error(ConversationErrorEvent {
+                message: ev.message.clone(),
+            })],
+            EventMsg::PlanUpdate(ev) => self.handle_plan_update(ev),
+            _ => Vec::new(),
+        }
+    }
+
+    fn get_next_item_id(&self) -> String {
+        format!(
+            "item_{}",
+            self.next_event_id
+                .fetch_add(1, std::sync::atomic::Ordering::SeqCst)
+        )
+    }
+
+    fn handle_session_configured(
+        &self,
+        payload: &SessionConfiguredEvent,
+    ) -> Vec<ConversationEvent> {
+        vec![ConversationEvent::SessionCreated(SessionCreatedEvent {
+            session_id: payload.session_id.to_string(),
+        })]
+    }
+
+    fn handle_agent_message(&self, payload: &AgentMessageEvent) -> Vec<ConversationEvent> {
+        let item = ConversationItem {
+            id: self.get_next_item_id(),
+
+            details: ConversationItemDetails::AssistantMessage(AssistantMessageItem {
+                text: payload.message.clone(),
+            }),
+        };
+
+        vec![ConversationEvent::ItemCompleted(ItemCompletedEvent {
+            item,
+        })]
+    }
+
+    fn handle_reasoning_event(&self, ev: &AgentReasoningEvent) -> Vec<ConversationEvent> {
+        let item = ConversationItem {
+            id: self.get_next_item_id(),
+
+            details: ConversationItemDetails::Reasoning(ReasoningItem {
+                text: ev.text.clone(),
+            }),
+        };
+
+        vec![ConversationEvent::ItemCompleted(ItemCompletedEvent {
+            item,
+        })]
+    }
+    fn handle_exec_command_begin(&mut self, ev: &ExecCommandBeginEvent) -> Vec<ConversationEvent> {
+        let item_id = self.get_next_item_id();
+
+        let command_string = match shlex::try_join(ev.command.iter().map(String::as_str)) {
+            Ok(command_string) => command_string,
+            Err(e) => {
+                warn!(
+                    call_id = ev.call_id,
+                    "Failed to stringify command: {e:?}; skipping item.started"
+                );
+                ev.command.join(" ")
+            }
+        };
+
+        self.running_commands.insert(
+            ev.call_id.clone(),
+            RunningCommand {
+                command: command_string.clone(),
+                item_id: item_id.clone(),
+            },
+        );
+
+        let item = ConversationItem {
+            id: item_id,
+            details: ConversationItemDetails::CommandExecution(CommandExecutionItem {
+                command: command_string,
+                aggregated_output: String::new(),
+                exit_code: None,
+                status: CommandExecutionStatus::InProgress,
+            }),
+        };
+
+        vec![ConversationEvent::ItemStarted(ItemStartedEvent { item })]
+    }
+
+    fn handle_patch_apply_begin(&mut self, ev: &PatchApplyBeginEvent) -> Vec<ConversationEvent> {
+        self.running_patch_applies
+            .insert(ev.call_id.clone(), ev.clone());
+
+        Vec::new()
+    }
+
+    fn map_change_kind(&self, kind: &FileChange) -> PatchChangeKind {
+        match kind {
+            FileChange::Add { .. } => PatchChangeKind::Add,
+            FileChange::Delete { .. } => PatchChangeKind::Delete,
+            FileChange::Update { .. } => PatchChangeKind::Update,
+        }
+    }
+
+    fn handle_patch_apply_end(&mut self, ev: &PatchApplyEndEvent) -> Vec<ConversationEvent> {
+        if let Some(running_patch_apply) = self.running_patch_applies.remove(&ev.call_id) {
+            let status = if ev.success {
+                PatchApplyStatus::Completed
+            } else {
+                PatchApplyStatus::Failed
+            };
+            let item = ConversationItem {
+                id: self.get_next_item_id(),
+
+                details: ConversationItemDetails::FileChange(FileChangeItem {
+                    changes: running_patch_apply
+                        .changes
+                        .iter()
+                        .map(|(path, change)| FileUpdateChange {
+                            path: path.to_str().unwrap_or("").to_string(),
+                            kind: self.map_change_kind(change),
+                        })
+                        .collect(),
+                    status,
+                }),
+            };
+
+            return vec![ConversationEvent::ItemCompleted(ItemCompletedEvent {
+                item,
+            })];
+        }
+
+        Vec::new()
+    }
+
+    fn handle_exec_command_end(&mut self, ev: &ExecCommandEndEvent) -> Vec<ConversationEvent> {
+        let Some(RunningCommand { command, item_id }) = self.running_commands.remove(&ev.call_id)
+        else {
+            warn!(
+                call_id = ev.call_id,
+                "ExecCommandEnd without matching ExecCommandBegin; skipping item.completed"
+            );
+            return Vec::new();
+        };
+        let status = if ev.exit_code == 0 {
+            CommandExecutionStatus::Completed
+        } else {
+            CommandExecutionStatus::Failed
+        };
+        let item = ConversationItem {
+            id: item_id,
+
+            details: ConversationItemDetails::CommandExecution(CommandExecutionItem {
+                command,
+                aggregated_output: ev.aggregated_output.clone(),
+                exit_code: Some(ev.exit_code),
+                status,
+            }),
+        };
+
+        vec![ConversationEvent::ItemCompleted(ItemCompletedEvent {
+            item,
+        })]
+    }
+
+    fn todo_items_from_plan(&self, args: &UpdatePlanArgs) -> Vec<TodoItem> {
+        args.plan
+            .iter()
+            .map(|p| TodoItem {
+                text: p.step.clone(),
+                completed: matches!(p.status, StepStatus::Completed),
+            })
+            .collect()
+    }
+
+    fn handle_plan_update(&mut self, args: &UpdatePlanArgs) -> Vec<ConversationEvent> {
+        let items = self.todo_items_from_plan(args);
+
+        if let Some(running) = &mut self.running_todo_list {
+            running.items = items.clone();
+            let item = ConversationItem {
+                id: running.item_id.clone(),
+                details: ConversationItemDetails::TodoList(TodoListItem { items }),
+            };
+            return vec![ConversationEvent::ItemUpdated(ItemUpdatedEvent { item })];
+        }
+
+        let item_id = self.get_next_item_id();
+        self.running_todo_list = Some(RunningTodoList {
+            item_id: item_id.clone(),
+            items: items.clone(),
+        });
+        let item = ConversationItem {
+            id: item_id,
+            details: ConversationItemDetails::TodoList(TodoListItem { items }),
+        };
+        vec![ConversationEvent::ItemStarted(ItemStartedEvent { item })]
+    }
+
+    fn handle_task_started(&self, _: &TaskStartedEvent) -> Vec<ConversationEvent> {
+        vec![ConversationEvent::TurnStarted(TurnStartedEvent {})]
+    }
+
+    fn handle_task_complete(&mut self) -> Vec<ConversationEvent> {
+        let usage = if let Some(u) = &self.last_total_token_usage {
+            Usage {
+                input_tokens: u.input_tokens,
+                cached_input_tokens: u.cached_input_tokens,
+                output_tokens: u.output_tokens,
+            }
+        } else {
+            Usage::default()
+        };
+
+        let mut items = Vec::new();
+
+        if let Some(running) = self.running_todo_list.take() {
+            let item = ConversationItem {
+                id: running.item_id,
+                details: ConversationItemDetails::TodoList(TodoListItem {
+                    items: running.items,
+                }),
+            };
+            items.push(ConversationEvent::ItemCompleted(ItemCompletedEvent {
+                item,
+            }));
+        }
+
+        items.push(ConversationEvent::TurnCompleted(TurnCompletedEvent {
+            usage,
+        }));
+
+        items
+    }
+}
+
+impl EventProcessor for ExperimentalEventProcessorWithJsonOutput {
+    fn print_config_summary(&mut self, _: &Config, _: &str, ev: &SessionConfiguredEvent) {
+        self.process_event(Event {
+            id: "".to_string(),
+            msg: EventMsg::SessionConfigured(ev.clone()),
+        });
+    }
+
+    fn process_event(&mut self, event: Event) -> CodexStatus {
+        let aggregated = self.collect_conversation_events(&event);
+        for conv_event in aggregated {
+            match serde_json::to_string(&conv_event) {
+                Ok(line) => {
+                    println!("{line}");
+                }
+                Err(e) => {
+                    error!("Failed to serialize event: {e:?}");
+                }
+            }
+        }
+
+        let Event { msg, .. } = event;
+
+        if let EventMsg::TaskComplete(TaskCompleteEvent { last_agent_message }) = msg {
+            if let Some(output_file) = self.last_message_path.as_deref() {
+                handle_last_message(last_agent_message.as_deref(), output_file);
+            }
+            CodexStatus::InitiateShutdown
+        } else {
+            CodexStatus::Running
+        }
+    }
+}
--- a/codex-rs/exec/src/lib.rs
+++ b/codex-rs/exec/src/lib.rs
@@ -1,7 +1,9 @@
 mod cli;
 mod event_processor;
 mod event_processor_with_human_output;
-mod event_processor_with_json_output;
+pub mod event_processor_with_json_output;
+pub mod exec_events;
+pub mod experimental_event_processor_with_json_output;

 use std::io::IsTerminal;
 use std::io::Read;
@@ -24,7 +26,8 @@ use codex_core::protocol::TaskCompleteEvent;
 use codex_ollama::DEFAULT_OSS_MODEL;
 use codex_protocol::config_types::SandboxMode;
 use event_processor_with_human_output::EventProcessorWithHumanOutput;
-use event_processor_with_json_output::EventProcessorWithJsonOutput;
+use experimental_event_processor_with_json_output::ExperimentalEventProcessorWithJsonOutput;
+use serde_json::Value;
 use tracing::debug;
 use tracing::error;
 use tracing::info;
@@ -33,6 +36,7 @@ use tracing_subscriber::EnvFilter;
 use crate::cli::Command as ExecCommand;
 use crate::event_processor::CodexStatus;
 use crate::event_processor::EventProcessor;
+use crate::event_processor_with_json_output::EventProcessorWithJsonOutput;
 use codex_core::find_conversation_path_by_id_str;

 pub async fn run_main(cli: Cli, codex_linux_sandbox_exe: Option<PathBuf>) -> anyhow::Result<()> {
@@ -49,8 +53,11 @@ pub async fn run_main(cli: Cli, codex_linux_sandbox_exe: Option<PathBuf>) -> any
        color,
        last_message_file,
        json: json_mode,
+        experimental_json,
        sandbox_mode: sandbox_mode_cli_arg,
        prompt,
+        output_schema: output_schema_path,
+        include_plan_tool,
        config_overrides,
    } = cli;

@@ -96,6 +103,8 @@ pub async fn run_main(cli: Cli, codex_linux_sandbox_exe: Option<PathBuf>) -> any
        }
    };

+    let output_schema = load_output_schema(output_schema_path);
+
    let (stdout_with_ansi, stderr_with_ansi) = match color {
        cli::Color::Always => (true, true),
        cli::Color::Never => (false, false),
@@ -157,7 +166,7 @@ pub async fn run_main(cli: Cli, codex_linux_sandbox_exe: Option<PathBuf>) -> any
        model_provider,
        codex_linux_sandbox_exe,
        base_instructions: None,
-        include_plan_tool: None,
+        include_plan_tool: Some(include_plan_tool),
        include_apply_patch_tool: None,
        include_view_image_tool: None,
        show_raw_agent_reasoning: oss.then_some(true),
@@ -173,14 +182,22 @@ pub async fn run_main(cli: Cli, codex_linux_sandbox_exe: Option<PathBuf>) -> any
    };

    let config = Config::load_with_cli_overrides(cli_kv_overrides, overrides)?;
-    let mut event_processor: Box<dyn EventProcessor> = if json_mode {
-        Box::new(EventProcessorWithJsonOutput::new(last_message_file.clone()))
-    } else {
-        Box::new(EventProcessorWithHumanOutput::create_with_ansi(
+    let mut event_processor: Box<dyn EventProcessor> = match (json_mode, experimental_json) {
+        (_, true) => Box::new(ExperimentalEventProcessorWithJsonOutput::new(
+            last_message_file.clone(),
+        )),
+        (true, _) => {
+            eprintln!(
+                "The existing `--json` output format is being deprecated. Please try the new format using `--experimental-json`."
+            );
+
+            Box::new(EventProcessorWithJsonOutput::new(last_message_file.clone()))
+        }
+        _ => Box::new(EventProcessorWithHumanOutput::create_with_ansi(
            stdout_with_ansi,
            &config,
            last_message_file.clone(),
-        ))
+        )),
    };

    if oss {
@@ -189,11 +206,14 @@ pub async fn run_main(cli: Cli, codex_linux_sandbox_exe: Option<PathBuf>) -> any
            .map_err(|e| anyhow::anyhow!("OSS setup failed: {e}"))?;
    }

-    // Print the effective configuration and prompt so users can see what Codex
-    // is using.
-    event_processor.print_config_summary(&config, &prompt);
+    let default_cwd = config.cwd.to_path_buf();
+    let default_approval_policy = config.approval_policy;
+    let default_sandbox_policy = config.sandbox_policy.clone();
+    let default_model = config.model.clone();
+    let default_effort = config.model_reasoning_effort;
+    let default_summary = config.model_reasoning_summary;

-    if !skip_git_repo_check && get_git_repo_root(&config.cwd.to_path_buf()).is_none() {
+    if !skip_git_repo_check && get_git_repo_root(&default_cwd).is_none() {
        eprintln!("Not inside a trusted directory and --skip-git-repo-check was not specified.");
        std::process::exit(1);
    }
@@ -218,11 +238,19 @@ pub async fn run_main(cli: Cli, codex_linux_sandbox_exe: Option<PathBuf>) -> any
                )
                .await?
        } else {
-            conversation_manager.new_conversation(config).await?
+            conversation_manager
+                .new_conversation(config.clone())
+                .await?
        }
    } else {
-        conversation_manager.new_conversation(config).await?
+        conversation_manager
+            .new_conversation(config.clone())
+            .await?
    };
+    // Print the effective configuration and prompt so users can see what Codex
+    // is using.
+    event_processor.print_config_summary(&config, &prompt, &session_configured);
+
    info!("Codex initialized with event: {session_configured:?}");

    let (tx, mut rx) = tokio::sync::mpsc::unbounded_channel::<Event>();
@@ -288,11 +316,28 @@ pub async fn run_main(cli: Cli, codex_linux_sandbox_exe: Option<PathBuf>) -> any

    // Send the prompt.
    let items: Vec<InputItem> = vec![InputItem::Text { text: prompt }];
-    let initial_prompt_task_id = conversation.submit(Op::UserInput { items }).await?;
+    let initial_prompt_task_id = conversation
+        .submit(Op::UserTurn {
+            items,
+            cwd: default_cwd,
+            approval_policy: default_approval_policy,
+            sandbox_policy: default_sandbox_policy,
+            model: default_model,
+            effort: default_effort,
+            summary: default_summary,
+            final_output_json_schema: output_schema,
+        })
+        .await?;
    info!("Sent prompt with event ID: {initial_prompt_task_id}");

    // Run the loop until the task is complete.
+    // Track whether a fatal error was reported by the server so we can
+    // exit with a non-zero status for automation-friendly signaling.
+    let mut error_seen = false;
    while let Some(event) = rx.recv().await {
+        if matches!(event.msg, EventMsg::Error(_)) {
+            error_seen = true;
+        }
        let shutdown: CodexStatus = event_processor.process_event(event);
        match shutdown {
            CodexStatus::Running => continue,
@@ -304,6 +349,9 @@ pub async fn run_main(cli: Cli, codex_linux_sandbox_exe: Option<PathBuf>) -> any
            }
        }
    }
+    if error_seen {
+        std::process::exit(1);
+    }

    Ok(())
 }
@@ -327,3 +375,29 @@ async fn resolve_resume_path(
        Ok(None)
    }
 }
+
+fn load_output_schema(path: Option<PathBuf>) -> Option<Value> {
+    let path = path?;
+
+    let schema_str = match std::fs::read_to_string(&path) {
+        Ok(contents) => contents,
+        Err(err) => {
+            eprintln!(
+                "Failed to read output schema file {}: {err}",
+                path.display()
+            );
+            std::process::exit(1);
+        }
+    };
+
+    match serde_json::from_str::<Value>(&schema_str) {
+        Ok(value) => Some(value),
+        Err(err) => {
+            eprintln!(
+                "Output schema file {} is not valid JSON: {err}",
+                path.display()
+            );
+            std::process::exit(1);
+        }
+    }
+}
--- a/codex-rs/exec/tests/all.rs
+++ b/codex-rs/exec/tests/all.rs
@@ -1,3 +1,5 @@
 // Single integration test binary that aggregates all test modules.
 // The submodules live in `tests/suite/`.
 mod suite;
+
+mod event_processor_with_json_output;
--- a/codex-rs/exec/tests/event_processor_with_json_output.rs
+++ b/codex-rs/exec/tests/event_processor_with_json_output.rs
@@ -0,0 +1,660 @@
+use codex_core::protocol::AgentMessageEvent;
+use codex_core::protocol::AgentReasoningEvent;
+use codex_core::protocol::Event;
+use codex_core::protocol::EventMsg;
+use codex_core::protocol::ExecCommandBeginEvent;
+use codex_core::protocol::ExecCommandEndEvent;
+use codex_core::protocol::FileChange;
+use codex_core::protocol::PatchApplyBeginEvent;
+use codex_core::protocol::PatchApplyEndEvent;
+use codex_core::protocol::SessionConfiguredEvent;
+use codex_exec::exec_events::AssistantMessageItem;
+use codex_exec::exec_events::CommandExecutionItem;
+use codex_exec::exec_events::CommandExecutionStatus;
+use codex_exec::exec_events::ConversationErrorEvent;
+use codex_exec::exec_events::ConversationEvent;
+use codex_exec::exec_events::ConversationItem;
+use codex_exec::exec_events::ConversationItemDetails;
+use codex_exec::exec_events::ItemCompletedEvent;
+use codex_exec::exec_events::ItemStartedEvent;
+use codex_exec::exec_events::ItemUpdatedEvent;
+use codex_exec::exec_events::PatchApplyStatus;
+use codex_exec::exec_events::PatchChangeKind;
+use codex_exec::exec_events::ReasoningItem;
+use codex_exec::exec_events::SessionCreatedEvent;
+use codex_exec::exec_events::TodoItem as ExecTodoItem;
+use codex_exec::exec_events::TodoListItem as ExecTodoListItem;
+use codex_exec::exec_events::TurnCompletedEvent;
+use codex_exec::exec_events::TurnStartedEvent;
+use codex_exec::exec_events::Usage;
+use codex_exec::experimental_event_processor_with_json_output::ExperimentalEventProcessorWithJsonOutput;
+use pretty_assertions::assert_eq;
+use std::path::PathBuf;
+use std::time::Duration;
+
+fn event(id: &str, msg: EventMsg) -> Event {
+    Event {
+        id: id.to_string(),
+        msg,
+    }
+}
+
+#[test]
+fn session_configured_produces_session_created_event() {
+    let mut ep = ExperimentalEventProcessorWithJsonOutput::new(None);
+    let session_id = codex_protocol::mcp_protocol::ConversationId::from_string(
+        "67e55044-10b1-426f-9247-bb680e5fe0c8",
+    )
+    .unwrap();
+    let rollout_path = PathBuf::from("/tmp/rollout.json");
+    let ev = event(
+        "e1",
+        EventMsg::SessionConfigured(SessionConfiguredEvent {
+            session_id,
+            model: "codex-mini-latest".to_string(),
+            reasoning_effort: None,
+            history_log_id: 0,
+            history_entry_count: 0,
+            initial_messages: None,
+            rollout_path,
+        }),
+    );
+    let out = ep.collect_conversation_events(&ev);
+    assert_eq!(
+        out,
+        vec![ConversationEvent::SessionCreated(SessionCreatedEvent {
+            session_id: "67e55044-10b1-426f-9247-bb680e5fe0c8".to_string(),
+        })]
+    );
+}
+
+#[test]
+fn task_started_produces_turn_started_event() {
+    let mut ep = ExperimentalEventProcessorWithJsonOutput::new(None);
+    let out = ep.collect_conversation_events(&event(
+        "t1",
+        EventMsg::TaskStarted(codex_core::protocol::TaskStartedEvent {
+            model_context_window: Some(32_000),
+        }),
+    ));
+
+    assert_eq!(
+        out,
+        vec![ConversationEvent::TurnStarted(TurnStartedEvent {})]
+    );
+}
+
+#[test]
+fn plan_update_emits_todo_list_started_updated_and_completed() {
+    use codex_core::plan_tool::PlanItemArg;
+    use codex_core::plan_tool::StepStatus;
+    use codex_core::plan_tool::UpdatePlanArgs;
+
+    let mut ep = ExperimentalEventProcessorWithJsonOutput::new(None);
+
+    // First plan update => item.started (todo_list)
+    let first = event(
+        "p1",
+        EventMsg::PlanUpdate(UpdatePlanArgs {
+            explanation: None,
+            plan: vec![
+                PlanItemArg {
+                    step: "step one".to_string(),
+                    status: StepStatus::Pending,
+                },
+                PlanItemArg {
+                    step: "step two".to_string(),
+                    status: StepStatus::InProgress,
+                },
+            ],
+        }),
+    );
+    let out_first = ep.collect_conversation_events(&first);
+    assert_eq!(
+        out_first,
+        vec![ConversationEvent::ItemStarted(ItemStartedEvent {
+            item: ConversationItem {
+                id: "item_0".to_string(),
+                details: ConversationItemDetails::TodoList(ExecTodoListItem {
+                    items: vec![
+                        ExecTodoItem {
+                            text: "step one".to_string(),
+                            completed: false
+                        },
+                        ExecTodoItem {
+                            text: "step two".to_string(),
+                            completed: false
+                        },
+                    ],
+                }),
+            },
+        })]
+    );
+
+    // Second plan update in same turn => item.updated (same id)
+    let second = event(
+        "p2",
+        EventMsg::PlanUpdate(UpdatePlanArgs {
+            explanation: None,
+            plan: vec![
+                PlanItemArg {
+                    step: "step one".to_string(),
+                    status: StepStatus::Completed,
+                },
+                PlanItemArg {
+                    step: "step two".to_string(),
+                    status: StepStatus::InProgress,
+                },
+            ],
+        }),
+    );
+    let out_second = ep.collect_conversation_events(&second);
+    assert_eq!(
+        out_second,
+        vec![ConversationEvent::ItemUpdated(ItemUpdatedEvent {
+            item: ConversationItem {
+                id: "item_0".to_string(),
+                details: ConversationItemDetails::TodoList(ExecTodoListItem {
+                    items: vec![
+                        ExecTodoItem {
+                            text: "step one".to_string(),
+                            completed: true
+                        },
+                        ExecTodoItem {
+                            text: "step two".to_string(),
+                            completed: false
+                        },
+                    ],
+                }),
+            },
+        })]
+    );
+
+    // Task completes => item.completed (same id, latest state)
+    let complete = event(
+        "p3",
+        EventMsg::TaskComplete(codex_core::protocol::TaskCompleteEvent {
+            last_agent_message: None,
+        }),
+    );
+    let out_complete = ep.collect_conversation_events(&complete);
+    assert_eq!(
+        out_complete,
+        vec![
+            ConversationEvent::ItemCompleted(ItemCompletedEvent {
+                item: ConversationItem {
+                    id: "item_0".to_string(),
+                    details: ConversationItemDetails::TodoList(ExecTodoListItem {
+                        items: vec![
+                            ExecTodoItem {
+                                text: "step one".to_string(),
+                                completed: true
+                            },
+                            ExecTodoItem {
+                                text: "step two".to_string(),
+                                completed: false
+                            },
+                        ],
+                    }),
+                },
+            }),
+            ConversationEvent::TurnCompleted(TurnCompletedEvent {
+                usage: Usage::default(),
+            }),
+        ]
+    );
+}
+
+#[test]
+fn plan_update_after_complete_starts_new_todo_list_with_new_id() {
+    use codex_core::plan_tool::PlanItemArg;
+    use codex_core::plan_tool::StepStatus;
+    use codex_core::plan_tool::UpdatePlanArgs;
+
+    let mut ep = ExperimentalEventProcessorWithJsonOutput::new(None);
+
+    // First turn: start + complete
+    let start = event(
+        "t1",
+        EventMsg::PlanUpdate(UpdatePlanArgs {
+            explanation: None,
+            plan: vec![PlanItemArg {
+                step: "only".to_string(),
+                status: StepStatus::Pending,
+            }],
+        }),
+    );
+    let _ = ep.collect_conversation_events(&start);
+    let complete = event(
+        "t2",
+        EventMsg::TaskComplete(codex_core::protocol::TaskCompleteEvent {
+            last_agent_message: None,
+        }),
+    );
+    let _ = ep.collect_conversation_events(&complete);
+
+    // Second turn: a new todo list should have a new id
+    let start_again = event(
+        "t3",
+        EventMsg::PlanUpdate(UpdatePlanArgs {
+            explanation: None,
+            plan: vec![PlanItemArg {
+                step: "again".to_string(),
+                status: StepStatus::Pending,
+            }],
+        }),
+    );
+    let out = ep.collect_conversation_events(&start_again);
+
+    match &out[0] {
+        ConversationEvent::ItemStarted(ItemStartedEvent { item }) => {
+            assert_eq!(&item.id, "item_1");
+        }
+        other => panic!("unexpected event: {other:?}"),
+    }
+}
+
+#[test]
+fn agent_reasoning_produces_item_completed_reasoning() {
+    let mut ep = ExperimentalEventProcessorWithJsonOutput::new(None);
+    let ev = event(
+        "e1",
+        EventMsg::AgentReasoning(AgentReasoningEvent {
+            text: "thinking...".to_string(),
+        }),
+    );
+    let out = ep.collect_conversation_events(&ev);
+    assert_eq!(
+        out,
+        vec![ConversationEvent::ItemCompleted(ItemCompletedEvent {
+            item: ConversationItem {
+                id: "item_0".to_string(),
+                details: ConversationItemDetails::Reasoning(ReasoningItem {
+                    text: "thinking...".to_string(),
+                }),
+            },
+        })]
+    );
+}
+
+#[test]
+fn agent_message_produces_item_completed_assistant_message() {
+    let mut ep = ExperimentalEventProcessorWithJsonOutput::new(None);
+    let ev = event(
+        "e1",
+        EventMsg::AgentMessage(AgentMessageEvent {
+            message: "hello".to_string(),
+        }),
+    );
+    let out = ep.collect_conversation_events(&ev);
+    assert_eq!(
+        out,
+        vec![ConversationEvent::ItemCompleted(ItemCompletedEvent {
+            item: ConversationItem {
+                id: "item_0".to_string(),
+                details: ConversationItemDetails::AssistantMessage(AssistantMessageItem {
+                    text: "hello".to_string(),
+                }),
+            },
+        })]
+    );
+}
+
+#[test]
+fn error_event_produces_error() {
+    let mut ep = ExperimentalEventProcessorWithJsonOutput::new(None);
+    let out = ep.collect_conversation_events(&event(
+        "e1",
+        EventMsg::Error(codex_core::protocol::ErrorEvent {
+            message: "boom".to_string(),
+        }),
+    ));
+    assert_eq!(
+        out,
+        vec![ConversationEvent::Error(ConversationErrorEvent {
+            message: "boom".to_string(),
+        })]
+    );
+}
+
+#[test]
+fn stream_error_event_produces_error() {
+    let mut ep = ExperimentalEventProcessorWithJsonOutput::new(None);
+    let out = ep.collect_conversation_events(&event(
+        "e1",
+        EventMsg::StreamError(codex_core::protocol::StreamErrorEvent {
+            message: "retrying".to_string(),
+        }),
+    ));
+    assert_eq!(
+        out,
+        vec![ConversationEvent::Error(ConversationErrorEvent {
+            message: "retrying".to_string(),
+        })]
+    );
+}
+
+#[test]
+fn exec_command_end_success_produces_completed_command_item() {
+    let mut ep = ExperimentalEventProcessorWithJsonOutput::new(None);
+
+    // Begin -> no output
+    let begin = event(
+        "c1",
+        EventMsg::ExecCommandBegin(ExecCommandBeginEvent {
+            call_id: "1".to_string(),
+            command: vec!["bash".to_string(), "-lc".to_string(), "echo hi".to_string()],
+            cwd: std::env::current_dir().unwrap(),
+            parsed_cmd: Vec::new(),
+        }),
+    );
+    let out_begin = ep.collect_conversation_events(&begin);
+    assert_eq!(
+        out_begin,
+        vec![ConversationEvent::ItemStarted(ItemStartedEvent {
+            item: ConversationItem {
+                id: "item_0".to_string(),
+                details: ConversationItemDetails::CommandExecution(CommandExecutionItem {
+                    command: "bash -lc 'echo hi'".to_string(),
+                    aggregated_output: String::new(),
+                    exit_code: None,
+                    status: CommandExecutionStatus::InProgress,
+                }),
+            },
+        })]
+    );
+
+    // End (success) -> item.completed (item_0)
+    let end_ok = event(
+        "c2",
+        EventMsg::ExecCommandEnd(ExecCommandEndEvent {
+            call_id: "1".to_string(),
+            stdout: String::new(),
+            stderr: String::new(),
+            aggregated_output: "hi\n".to_string(),
+            exit_code: 0,
+            duration: Duration::from_millis(5),
+            formatted_output: String::new(),
+        }),
+    );
+    let out_ok = ep.collect_conversation_events(&end_ok);
+    assert_eq!(
+        out_ok,
+        vec![ConversationEvent::ItemCompleted(ItemCompletedEvent {
+            item: ConversationItem {
+                id: "item_0".to_string(),
+                details: ConversationItemDetails::CommandExecution(CommandExecutionItem {
+                    command: "bash -lc 'echo hi'".to_string(),
+                    aggregated_output: "hi\n".to_string(),
+                    exit_code: Some(0),
+                    status: CommandExecutionStatus::Completed,
+                }),
+            },
+        })]
+    );
+}
+
+#[test]
+fn exec_command_end_failure_produces_failed_command_item() {
+    let mut ep = ExperimentalEventProcessorWithJsonOutput::new(None);
+
+    // Begin -> no output
+    let begin = event(
+        "c1",
+        EventMsg::ExecCommandBegin(ExecCommandBeginEvent {
+            call_id: "2".to_string(),
+            command: vec!["sh".to_string(), "-c".to_string(), "exit 1".to_string()],
+            cwd: std::env::current_dir().unwrap(),
+            parsed_cmd: Vec::new(),
+        }),
+    );
+    assert_eq!(
+        ep.collect_conversation_events(&begin),
+        vec![ConversationEvent::ItemStarted(ItemStartedEvent {
+            item: ConversationItem {
+                id: "item_0".to_string(),
+                details: ConversationItemDetails::CommandExecution(CommandExecutionItem {
+                    command: "sh -c 'exit 1'".to_string(),
+                    aggregated_output: String::new(),
+                    exit_code: None,
+                    status: CommandExecutionStatus::InProgress,
+                }),
+            },
+        })]
+    );
+
+    // End (failure) -> item.completed (item_0)
+    let end_fail = event(
+        "c2",
+        EventMsg::ExecCommandEnd(ExecCommandEndEvent {
+            call_id: "2".to_string(),
+            stdout: String::new(),
+            stderr: String::new(),
+            aggregated_output: String::new(),
+            exit_code: 1,
+            duration: Duration::from_millis(2),
+            formatted_output: String::new(),
+        }),
+    );
+    let out_fail = ep.collect_conversation_events(&end_fail);
+    assert_eq!(
+        out_fail,
+        vec![ConversationEvent::ItemCompleted(ItemCompletedEvent {
+            item: ConversationItem {
+                id: "item_0".to_string(),
+                details: ConversationItemDetails::CommandExecution(CommandExecutionItem {
+                    command: "sh -c 'exit 1'".to_string(),
+                    aggregated_output: String::new(),
+                    exit_code: Some(1),
+                    status: CommandExecutionStatus::Failed,
+                }),
+            },
+        })]
+    );
+}
+
+#[test]
+fn exec_command_end_without_begin_is_ignored() {
+    let mut ep = ExperimentalEventProcessorWithJsonOutput::new(None);
+
+    // End event arrives without a prior Begin; should produce no conversation events.
+    let end_only = event(
+        "c1",
+        EventMsg::ExecCommandEnd(ExecCommandEndEvent {
+            call_id: "no-begin".to_string(),
+            stdout: String::new(),
+            stderr: String::new(),
+            aggregated_output: String::new(),
+            exit_code: 0,
+            duration: Duration::from_millis(1),
+            formatted_output: String::new(),
+        }),
+    );
+    let out = ep.collect_conversation_events(&end_only);
+    assert!(out.is_empty());
+}
+
+#[test]
+fn patch_apply_success_produces_item_completed_patchapply() {
+    let mut ep = ExperimentalEventProcessorWithJsonOutput::new(None);
+
+    // Prepare a patch with multiple kinds of changes
+    let mut changes = std::collections::HashMap::new();
+    changes.insert(
+        PathBuf::from("a/added.txt"),
+        FileChange::Add {
+            content: "+hello".to_string(),
+        },
+    );
+    changes.insert(
+        PathBuf::from("b/deleted.txt"),
+        FileChange::Delete {
+            content: "-goodbye".to_string(),
+        },
+    );
+    changes.insert(
+        PathBuf::from("c/modified.txt"),
+        FileChange::Update {
+            unified_diff: "--- c/modified.txt\n+++ c/modified.txt\n@@\n-old\n+new\n".to_string(),
+            move_path: Some(PathBuf::from("c/renamed.txt")),
+        },
+    );
+
+    // Begin -> no output
+    let begin = event(
+        "p1",
+        EventMsg::PatchApplyBegin(PatchApplyBeginEvent {
+            call_id: "call-1".to_string(),
+            auto_approved: true,
+            changes: changes.clone(),
+        }),
+    );
+    let out_begin = ep.collect_conversation_events(&begin);
+    assert!(out_begin.is_empty());
+
+    // End (success) -> item.completed (item_0)
+    let end = event(
+        "p2",
+        EventMsg::PatchApplyEnd(PatchApplyEndEvent {
+            call_id: "call-1".to_string(),
+            stdout: "applied 3 changes".to_string(),
+            stderr: String::new(),
+            success: true,
+        }),
+    );
+    let out_end = ep.collect_conversation_events(&end);
+    assert_eq!(out_end.len(), 1);
+
+    // Validate structure without relying on HashMap iteration order
+    match &out_end[0] {
+        ConversationEvent::ItemCompleted(ItemCompletedEvent { item }) => {
+            assert_eq!(&item.id, "item_0");
+            match &item.details {
+                ConversationItemDetails::FileChange(file_update) => {
+                    assert_eq!(file_update.status, PatchApplyStatus::Completed);
+
+                    let mut actual: Vec<(String, PatchChangeKind)> = file_update
+                        .changes
+                        .iter()
+                        .map(|c| (c.path.clone(), c.kind.clone()))
+                        .collect();
+                    actual.sort_by(|a, b| a.0.cmp(&b.0));
+
+                    let mut expected = vec![
+                        ("a/added.txt".to_string(), PatchChangeKind::Add),
+                        ("b/deleted.txt".to_string(), PatchChangeKind::Delete),
+                        ("c/modified.txt".to_string(), PatchChangeKind::Update),
+                    ];
+                    expected.sort_by(|a, b| a.0.cmp(&b.0));
+
+                    assert_eq!(actual, expected);
+                }
+                other => panic!("unexpected details: {other:?}"),
+            }
+        }
+        other => panic!("unexpected event: {other:?}"),
+    }
+}
+
+#[test]
+fn patch_apply_failure_produces_item_completed_patchapply_failed() {
+    let mut ep = ExperimentalEventProcessorWithJsonOutput::new(None);
+
+    let mut changes = std::collections::HashMap::new();
+    changes.insert(
+        PathBuf::from("file.txt"),
+        FileChange::Update {
+            unified_diff: "--- file.txt\n+++ file.txt\n@@\n-old\n+new\n".to_string(),
+            move_path: None,
+        },
+    );
+
+    // Begin -> no output
+    let begin = event(
+        "p1",
+        EventMsg::PatchApplyBegin(PatchApplyBeginEvent {
+            call_id: "call-2".to_string(),
+            auto_approved: false,
+            changes: changes.clone(),
+        }),
+    );
+    assert!(ep.collect_conversation_events(&begin).is_empty());
+
+    // End (failure) -> item.completed (item_0) with Failed status
+    let end = event(
+        "p2",
+        EventMsg::PatchApplyEnd(PatchApplyEndEvent {
+            call_id: "call-2".to_string(),
+            stdout: String::new(),
+            stderr: "failed to apply".to_string(),
+            success: false,
+        }),
+    );
+    let out_end = ep.collect_conversation_events(&end);
+    assert_eq!(out_end.len(), 1);
+
+    match &out_end[0] {
+        ConversationEvent::ItemCompleted(ItemCompletedEvent { item }) => {
+            assert_eq!(&item.id, "item_0");
+            match &item.details {
+                ConversationItemDetails::FileChange(file_update) => {
+                    assert_eq!(file_update.status, PatchApplyStatus::Failed);
+                    assert_eq!(file_update.changes.len(), 1);
+                    assert_eq!(file_update.changes[0].path, "file.txt".to_string());
+                    assert_eq!(file_update.changes[0].kind, PatchChangeKind::Update);
+                }
+                other => panic!("unexpected details: {other:?}"),
+            }
+        }
+        other => panic!("unexpected event: {other:?}"),
+    }
+}
+
+#[test]
+fn task_complete_produces_turn_completed_with_usage() {
+    let mut ep = ExperimentalEventProcessorWithJsonOutput::new(None);
+
+    // First, feed a TokenCount event with known totals.
+    let usage = codex_core::protocol::TokenUsage {
+        input_tokens: 1200,
+        cached_input_tokens: 200,
+        output_tokens: 345,
+        reasoning_output_tokens: 0,
+        total_tokens: 0,
+    };
+    let info = codex_core::protocol::TokenUsageInfo {
+        total_token_usage: usage.clone(),
+        last_token_usage: usage,
+        model_context_window: None,
+    };
+    let token_count_event = event(
+        "e1",
+        EventMsg::TokenCount(codex_core::protocol::TokenCountEvent {
+            info: Some(info),
+            rate_limits: None,
+        }),
+    );
+    assert!(
+        ep.collect_conversation_events(&token_count_event)
+            .is_empty()
+    );
+
+    // Then TaskComplete should produce turn.completed with the captured usage.
+    let complete_event = event(
+        "e2",
+        EventMsg::TaskComplete(codex_core::protocol::TaskCompleteEvent {
+            last_agent_message: Some("done".to_string()),
+        }),
+    );
+    let out = ep.collect_conversation_events(&complete_event);
+    assert_eq!(
+        out,
+        vec![ConversationEvent::TurnCompleted(TurnCompletedEvent {
+            usage: Usage {
+                input_tokens: 1200,
+                cached_input_tokens: 200,
+                output_tokens: 345,
+            },
+        })]
+    );
+}
--- a/codex-rs/exec/tests/suite/apply_patch.rs
+++ b/codex-rs/exec/tests/suite/apply_patch.rs
@@ -6,7 +6,9 @@ use codex_core::CODEX_APPLY_PATCH_ARG1;
 use core_test_support::responses::ev_apply_patch_custom_tool_call;
 use core_test_support::responses::ev_apply_patch_function_call;
 use core_test_support::responses::ev_completed;
+use core_test_support::responses::mount_sse_sequence;
 use core_test_support::responses::sse;
+use core_test_support::responses::start_mock_server;
 use std::fs;
 use std::process::Command;
 use tempfile::tempdir;
@@ -47,13 +49,13 @@ fn test_standalone_exec_cli_can_use_apply_patch() -> anyhow::Result<()> {
 #[cfg(not(target_os = "windows"))]
 #[tokio::test(flavor = "multi_thread", worker_threads = 4)]
 async fn test_apply_patch_tool() -> anyhow::Result<()> {
-    use crate::suite::common::run_e2e_exec_test;
-    use core_test_support::non_sandbox_test;
+    use core_test_support::skip_if_no_network;
+    use core_test_support::test_codex_exec::test_codex_exec;

-    non_sandbox_test!(result);
+    skip_if_no_network!(Ok(()));

-    let tmp_cwd = tempdir().expect("failed to create temp dir");
-    let tmp_path = tmp_cwd.path().to_path_buf();
+    let test = test_codex_exec();
+    let tmp_path = test.cwd_path().to_path_buf();
    let add_patch = r#"*** Begin Patch
 *** Add File: test.md
 +Hello world
@@ -75,7 +77,16 @@ async fn test_apply_patch_tool() -> anyhow::Result<()> {
        ]),
        sse(vec![ev_completed("request_2")]),
    ];
-    run_e2e_exec_test(tmp_cwd.path(), response_streams).await;
+    let server = start_mock_server().await;
+    mount_sse_sequence(&server, response_streams).await;
+
+    test.cmd_with_server(&server)
+        .arg("--skip-git-repo-check")
+        .arg("-s")
+        .arg("danger-full-access")
+        .arg("foo")
+        .assert()
+        .success();

    let final_path = tmp_path.join("test.md");
    let contents = std::fs::read_to_string(&final_path)
@@ -87,12 +98,12 @@ async fn test_apply_patch_tool() -> anyhow::Result<()> {
 #[cfg(not(target_os = "windows"))]
 #[tokio::test(flavor = "multi_thread", worker_threads = 4)]
 async fn test_apply_patch_freeform_tool() -> anyhow::Result<()> {
-    use crate::suite::common::run_e2e_exec_test;
-    use core_test_support::non_sandbox_test;
+    use core_test_support::skip_if_no_network;
+    use core_test_support::test_codex_exec::test_codex_exec;

-    non_sandbox_test!(result);
+    skip_if_no_network!(Ok(()));

-    let tmp_cwd = tempdir().expect("failed to create temp dir");
+    let test = test_codex_exec();
    let freeform_add_patch = r#"*** Begin Patch
 *** Add File: app.py
 +class BaseClass:
@@ -117,10 +128,19 @@ async fn test_apply_patch_freeform_tool() -> anyhow::Result<()> {
        ]),
        sse(vec![ev_completed("request_2")]),
    ];
-    run_e2e_exec_test(tmp_cwd.path(), response_streams).await;
+    let server = start_mock_server().await;
+    mount_sse_sequence(&server, response_streams).await;
+
+    test.cmd_with_server(&server)
+        .arg("--skip-git-repo-check")
+        .arg("-s")
+        .arg("danger-full-access")
+        .arg("foo")
+        .assert()
+        .success();

    // Verify final file contents
-    let final_path = tmp_cwd.path().join("app.py");
+    let final_path = test.cwd_path().join("app.py");
    let contents = std::fs::read_to_string(&final_path)
        .unwrap_or_else(|e| panic!("failed reading {}: {e}", final_path.display()));
    assert_eq!(
--- a/codex-rs/exec/tests/suite/common.rs
+++ b/codex-rs/exec/tests/suite/common.rs
@@ -1,69 +0,0 @@
-// this file is only used for e2e tests which are currently disabled on windows
-#![cfg(not(target_os = "windows"))]
-#![allow(clippy::expect_used)]
-
-use anyhow::Context;
-use assert_cmd::prelude::*;
-use std::path::Path;
-use std::process::Command;
-use std::sync::atomic::AtomicUsize;
-use std::sync::atomic::Ordering;
-use wiremock::Mock;
-use wiremock::MockServer;
-use wiremock::matchers::method;
-use wiremock::matchers::path;
-
-use wiremock::Respond;
-
-struct SeqResponder {
-    num_calls: AtomicUsize,
-    responses: Vec<String>,
-}
-
-impl Respond for SeqResponder {
-    fn respond(&self, _: &wiremock::Request) -> wiremock::ResponseTemplate {
-        let call_num = self.num_calls.fetch_add(1, Ordering::SeqCst);
-        match self.responses.get(call_num) {
-            Some(body) => wiremock::ResponseTemplate::new(200)
-                .insert_header("content-type", "text/event-stream")
-                .set_body_string(body.clone()),
-            None => panic!("no response for {call_num}"),
-        }
-    }
-}
-
-/// Helper function to run an E2E test of a codex-exec call. Starts a wiremock
-/// server, and returns the response_streams in order for each api call. Runs
-/// the codex-exec command with the wiremock server as the model server.
-pub(crate) async fn run_e2e_exec_test(cwd: &Path, response_streams: Vec<String>) {
-    let server = MockServer::start().await;
-
-    let num_calls = response_streams.len();
-    let seq_responder = SeqResponder {
-        num_calls: AtomicUsize::new(0),
-        responses: response_streams,
-    };
-
-    Mock::given(method("POST"))
-        .and(path("/v1/responses"))
-        .respond_with(seq_responder)
-        .expect(num_calls as u64)
-        .mount(&server)
-        .await;
-
-    let cwd = cwd.to_path_buf();
-    let uri = server.uri();
-    Command::cargo_bin("codex-exec")
-        .context("should find binary for codex-exec")
-        .expect("should find binary for codex-exec")
-        .current_dir(cwd.clone())
-        .env("CODEX_HOME", cwd)
-        .env("OPENAI_API_KEY", "dummy")
-        .env("OPENAI_BASE_URL", format!("{uri}/v1"))
-        .arg("--skip-git-repo-check")
-        .arg("-s")
-        .arg("danger-full-access")
-        .arg("foo")
-        .assert()
-        .success();
-}
--- a/codex-rs/exec/tests/suite/mod.rs
+++ b/codex-rs/exec/tests/suite/mod.rs
@@ -1,5 +1,6 @@
 // Aggregates all former standalone integration tests as modules.
 mod apply_patch;
-mod common;
+mod output_schema;
 mod resume;
 mod sandbox;
+mod server_error_exit;
--- a/codex-rs/exec/tests/suite/output_schema.rs
+++ b/codex-rs/exec/tests/suite/output_schema.rs
@@ -0,0 +1,70 @@
+#![cfg(not(target_os = "windows"))]
+#![allow(clippy::expect_used, clippy::unwrap_used)]
+
+use core_test_support::responses;
+use core_test_support::test_codex_exec::test_codex_exec;
+use serde_json::Value;
+use wiremock::matchers::any;
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn exec_includes_output_schema_in_request() -> anyhow::Result<()> {
+    let test = test_codex_exec();
+
+    let schema_contents = serde_json::json!({
+        "type": "object",
+        "properties": {
+            "answer": { "type": "string" }
+        },
+        "required": ["answer"],
+        "additionalProperties": false
+    });
+    let schema_path = test.cwd_path().join("schema.json");
+    std::fs::write(&schema_path, serde_json::to_vec_pretty(&schema_contents)?)?;
+    let expected_schema: Value = schema_contents;
+
+    let server = responses::start_mock_server().await;
+    let body = responses::sse(vec![
+        serde_json::json!({
+            "type": "response.created",
+            "response": {"id": "resp1"}
+        }),
+        responses::ev_assistant_message("m1", "fixture hello"),
+        responses::ev_completed("resp1"),
+    ]);
+    responses::mount_sse_once(&server, any(), body).await;
+
+    test.cmd_with_server(&server)
+        .arg("--skip-git-repo-check")
+        // keep using -C in the test to exercise the flag as well
+        .arg("-C")
+        .arg(test.cwd_path())
+        .arg("--output-schema")
+        .arg(&schema_path)
+        .arg("-m")
+        .arg("gpt-5")
+        .arg("tell me a joke")
+        .assert()
+        .success();
+
+    let requests = server
+        .received_requests()
+        .await
+        .expect("failed to capture requests");
+    assert_eq!(requests.len(), 1, "expected exactly one request");
+    let payload: Value = serde_json::from_slice(&requests[0].body)?;
+    let text = payload.get("text").expect("request missing text field");
+    let format = text
+        .get("format")
+        .expect("request missing text.format field");
+    assert_eq!(
+        format,
+        &serde_json::json!({
+            "name": "codex_output_schema",
+            "type": "json_schema",
+            "strict": true,
+            "schema": expected_schema,
+        })
+    );
+
+    Ok(())
+}
--- a/codex-rs/exec/tests/suite/sandbox.rs
+++ b/codex-rs/exec/tests/suite/sandbox.rs
@@ -56,6 +56,7 @@ async fn spawn_command_under_sandbox(

 #[tokio::test]
 async fn python_multiprocessing_lock_works_under_sandbox() {
+    core_test_support::skip_if_sandbox!();
    #[cfg(target_os = "macos")]
    let writable_roots = Vec::<PathBuf>::new();

@@ -110,6 +111,7 @@ if __name__ == '__main__':

 #[tokio::test]
 async fn sandbox_distinguishes_command_and_policy_cwds() {
+    core_test_support::skip_if_sandbox!();
    let temp = tempfile::tempdir().expect("should be able to create temp dir");
    let sandbox_root = temp.path().join("sandbox");
    let command_root = temp.path().join("command");
--- a/codex-rs/exec/tests/suite/server_error_exit.rs
+++ b/codex-rs/exec/tests/suite/server_error_exit.rs
@@ -0,0 +1,34 @@
+#![cfg(not(target_os = "windows"))]
+#![allow(clippy::expect_used, clippy::unwrap_used)]
+
+use core_test_support::responses;
+use core_test_support::test_codex_exec::test_codex_exec;
+use wiremock::matchers::any;
+
+/// Verify that when the server reports an error, `codex-exec` exits with a
+/// non-zero status code so automation can detect failures.
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn exits_non_zero_when_server_reports_error() -> anyhow::Result<()> {
+    let test = test_codex_exec();
+
+    // Mock a simple Responses API SSE stream that immediately reports a
+    // `response.failed` event with an error message.
+    let server = responses::start_mock_server().await;
+    let body = responses::sse(vec![serde_json::json!({
+        "type": "response.failed",
+        "response": {
+            "id": "resp_err_1",
+            "error": {"code": "rate_limit_exceeded", "message": "synthetic server error"}
+        }
+    })]);
+    responses::mount_sse_once(&server, any(), body).await;
+
+    test.cmd_with_server(&server)
+        .arg("--skip-git-repo-check")
+        .arg("tell me something")
+        .arg("--experimental-json")
+        .assert()
+        .code(1);
+
+    Ok(())
+}
--- a/codex-rs/execpolicy/src/execv_checker.rs
+++ b/codex-rs/execpolicy/src/execv_checker.rs
@@ -146,6 +146,8 @@ mod tests {
    use super::*;
    use crate::MatchedArg;
    use crate::PolicyParser;
+    use anyhow::Result;
+    use anyhow::anyhow;

    fn setup(fake_cp: &Path) -> ExecvChecker {
        let source = format!(
@@ -164,7 +166,7 @@ system_path=[{fake_cp:?}]

    #[test]
    fn test_check_valid_input_files() -> Result<()> {
-        let temp_dir = TempDir::new().unwrap();
+        let temp_dir = TempDir::new()?;

        // Create an executable file that can be used with the system_path arg.
        let fake_cp = temp_dir.path().join("cp");
@@ -172,14 +174,14 @@ system_path=[{fake_cp:?}]
        {
            use std::os::unix::fs::PermissionsExt;

-            let fake_cp_file = std::fs::File::create(&fake_cp).unwrap();
-            let mut permissions = fake_cp_file.metadata().unwrap().permissions();
+            let fake_cp_file = std::fs::File::create(&fake_cp)?;
+            let mut permissions = fake_cp_file.metadata()?.permissions();
            permissions.set_mode(0o755);
-            std::fs::set_permissions(&fake_cp, permissions).unwrap();
+            std::fs::set_permissions(&fake_cp, permissions)?;
        }
        #[cfg(windows)]
        {
-            std::fs::File::create(&fake_cp).unwrap();
+            std::fs::File::create(&fake_cp)?;
        }

        // Create root_path and reference to files under the root.
@@ -199,7 +201,7 @@ system_path=[{fake_cp:?}]
            program: "cp".into(),
            args: vec![source, dest.clone()],
        };
-        let valid_exec = match checker.r#match(&exec_call)? {
+        let valid_exec = match checker.r#match(&exec_call).map_err(|e| anyhow!("{e:?}"))? {
            MatchedExec::Match { exec } => exec,
            unexpected => panic!("Expected a safe exec but got {unexpected:?}"),
        };
@@ -244,7 +246,10 @@ system_path=[{fake_cp:?}]
            program: "cp".into(),
            args: vec![root.clone(), root],
        };
-        let valid_exec_call_folders_as_args = match checker.r#match(&exec_call_folders_as_args)? {
+        let valid_exec_call_folders_as_args = match checker
+            .r#match(&exec_call_folders_as_args)
+            .map_err(|e| anyhow!("{e:?}"))?
+        {
            MatchedExec::Match { exec } => exec,
            _ => panic!("Expected a safe exec"),
        };
@@ -266,8 +271,9 @@ system_path=[{fake_cp:?}]
                    0,
                    ArgType::ReadableFile,
                    root_path.parent().unwrap().to_str().unwrap(),
-                )?,
-                MatchedArg::new(1, ArgType::WriteableFile, &dest)?,
+                )
+                .map_err(|e| anyhow!("{e:?}"))?,
+                MatchedArg::new(1, ArgType::WriteableFile, &dest).map_err(|e| anyhow!("{e:?}"))?,
            ],
            ..Default::default()
        };
--- a/codex-rs/login/Cargo.toml
+++ b/codex-rs/login/Cargo.toml
@@ -30,5 +30,6 @@ urlencoding = { workspace = true }
 webbrowser = { workspace = true }

 [dev-dependencies]
+anyhow = { workspace = true }
 core_test_support = { workspace = true }
 tempfile = { workspace = true }
--- a/codex-rs/login/tests/suite/login_server_e2e.rs
+++ b/codex-rs/login/tests/suite/login_server_e2e.rs
@@ -5,10 +5,11 @@ use std::net::TcpListener;
 use std::thread;
 use std::time::Duration;

+use anyhow::Result;
 use base64::Engine;
 use codex_login::ServerOptions;
 use codex_login::run_login_server;
-use core_test_support::non_sandbox_test;
+use core_test_support::skip_if_no_network;
 use tempfile::tempdir;

 // See spawn.rs for details
@@ -76,13 +77,13 @@ fn start_mock_issuer() -> (SocketAddr, thread::JoinHandle<()>) {
 }

 #[tokio::test]
-async fn end_to_end_login_flow_persists_auth_json() {
-    non_sandbox_test!();
+async fn end_to_end_login_flow_persists_auth_json() -> Result<()> {
+    skip_if_no_network!(Ok(()));

    let (issuer_addr, issuer_handle) = start_mock_issuer();
    let issuer = format!("http://{}:{}", issuer_addr.ip(), issuer_addr.port());

-    let tmp = tempdir().unwrap();
+    let tmp = tempdir()?;
    let codex_home = tmp.path().to_path_buf();

    // Seed auth.json with stale API key + tokens that should be overwritten.
@@ -97,9 +98,8 @@ async fn end_to_end_login_flow_persists_auth_json() {
    });
    std::fs::write(
        codex_home.join("auth.json"),
-        serde_json::to_string_pretty(&stale_auth).unwrap(),
-    )
-    .unwrap();
+        serde_json::to_string_pretty(&stale_auth)?,
+    )?;

    let state = "test_state_123".to_string();

@@ -114,25 +114,24 @@ async fn end_to_end_login_flow_persists_auth_json() {
        open_browser: false,
        force_state: Some(state),
    };
-    let server = run_login_server(opts).unwrap();
+    let server = run_login_server(opts)?;
    let login_port = server.actual_port;

    // Simulate browser callback, and follow redirect to /success
    let client = reqwest::Client::builder()
        .redirect(reqwest::redirect::Policy::limited(5))
-        .build()
-        .unwrap();
+        .build()?;
    let url = format!("http://127.0.0.1:{login_port}/auth/callback?code=abc&state=test_state_123");
-    let resp = client.get(&url).send().await.unwrap();
+    let resp = client.get(&url).send().await?;
    assert!(resp.status().is_success());

    // Wait for server shutdown
-    server.block_until_done().await.unwrap();
+    server.block_until_done().await?;

    // Validate auth.json
    let auth_path = codex_home.join("auth.json");
-    let data = std::fs::read_to_string(&auth_path).unwrap();
-    let json: serde_json::Value = serde_json::from_str(&data).unwrap();
+    let data = std::fs::read_to_string(&auth_path)?;
+    let json: serde_json::Value = serde_json::from_str(&data)?;
    // The following assert is here because of the old oauth flow that exchanges tokens for an
    // API key. See obtain_api_key in server.rs for details. Once we remove this old mechanism
    // from the code, this test should be updated to expect that the API key is no longer present.
@@ -143,16 +142,17 @@ async fn end_to_end_login_flow_persists_auth_json() {

    // Stop mock issuer
    drop(issuer_handle);
+    Ok(())
 }

 #[tokio::test]
-async fn creates_missing_codex_home_dir() {
-    non_sandbox_test!();
+async fn creates_missing_codex_home_dir() -> Result<()> {
+    skip_if_no_network!(Ok(()));

    let (issuer_addr, _issuer_handle) = start_mock_issuer();
    let issuer = format!("http://{}:{}", issuer_addr.ip(), issuer_addr.port());

-    let tmp = tempdir().unwrap();
+    let tmp = tempdir()?;
    let codex_home = tmp.path().join("missing-subdir"); // does not exist

    let state = "state2".to_string();
@@ -167,31 +167,32 @@ async fn creates_missing_codex_home_dir() {
        open_browser: false,
        force_state: Some(state),
    };
-    let server = run_login_server(opts).unwrap();
+    let server = run_login_server(opts)?;
    let login_port = server.actual_port;

    let client = reqwest::Client::new();
    let url = format!("http://127.0.0.1:{login_port}/auth/callback?code=abc&state=state2");
-    let resp = client.get(&url).send().await.unwrap();
+    let resp = client.get(&url).send().await?;
    assert!(resp.status().is_success());

-    server.block_until_done().await.unwrap();
+    server.block_until_done().await?;

    let auth_path = codex_home.join("auth.json");
    assert!(
        auth_path.exists(),
        "auth.json should be created even if parent dir was missing"
    );
+    Ok(())
 }

 #[tokio::test(flavor = "multi_thread", worker_threads = 4)]
-async fn cancels_previous_login_server_when_port_is_in_use() {
-    non_sandbox_test!();
+async fn cancels_previous_login_server_when_port_is_in_use() -> Result<()> {
+    skip_if_no_network!(Ok(()));

    let (issuer_addr, _issuer_handle) = start_mock_issuer();
    let issuer = format!("http://{}:{}", issuer_addr.ip(), issuer_addr.port());

-    let first_tmp = tempdir().unwrap();
+    let first_tmp = tempdir()?;
    let first_codex_home = first_tmp.path().to_path_buf();

    let first_opts = ServerOptions {
@@ -203,13 +204,13 @@ async fn cancels_previous_login_server_when_port_is_in_use() {
        force_state: Some("cancel_state".to_string()),
    };

-    let first_server = run_login_server(first_opts).unwrap();
+    let first_server = run_login_server(first_opts)?;
    let login_port = first_server.actual_port;
    let first_server_task = tokio::spawn(async move { first_server.block_until_done().await });

    tokio::time::sleep(Duration::from_millis(100)).await;

-    let second_tmp = tempdir().unwrap();
+    let second_tmp = tempdir()?;
    let second_codex_home = second_tmp.path().to_path_buf();

    let second_opts = ServerOptions {
@@ -221,7 +222,7 @@ async fn cancels_previous_login_server_when_port_is_in_use() {
        force_state: Some("cancel_state_2".to_string()),
    };

-    let second_server = run_login_server(second_opts).unwrap();
+    let second_server = run_login_server(second_opts)?;
    assert_eq!(second_server.actual_port, login_port);

    let cancel_result = first_server_task
@@ -232,11 +233,12 @@ async fn cancels_previous_login_server_when_port_is_in_use() {

    let client = reqwest::Client::new();
    let cancel_url = format!("http://127.0.0.1:{login_port}/cancel");
-    let resp = client.get(cancel_url).send().await.unwrap();
+    let resp = client.get(cancel_url).send().await?;
    assert!(resp.status().is_success());

    second_server
        .block_until_done()
        .await
        .expect_err("second login server should report cancellation");
+    Ok(())
 }
--- a/codex-rs/mcp-client/src/main.rs
+++ b/codex-rs/mcp-client/src/main.rs
@@ -70,11 +70,8 @@ async fn main() -> Result<()> {
        },
        protocol_version: MCP_SCHEMA_VERSION.to_owned(),
    };
-    let initialize_notification_params = None;
    let timeout = Some(Duration::from_secs(10));
-    let response = client
-        .initialize(params, initialize_notification_params, timeout)
-        .await?;
+    let response = client.initialize(params, timeout).await?;
    eprintln!("initialize response: {response:?}");

    // Issue `tools/list` request (no params).
--- a/Show More
+++ b/Show More