Merge branch 'main' into patch-guard

[MCP] Prompt mcp login when adding a streamable HTTP server that supports oauth (#5193 )
1. If Codex detects that a `codex mcp add -url …` server supports oauth, it will auto-initiate the login flow. 2. If the TUI starts and a MCP server supports oauth but isn't logged in, it will give the user an explicit warning telling them to log in.
2026-02-07 01:13:40 +00:00 · 2025-10-15 09:38:48 -07:00 · 2025-10-15 12:27:40 -04:00 · 2025-10-15 07:27:24 -07:00 · 2025-10-15 09:56:59 +01:00 · 2025-10-14 16:29:02 -07:00
242 changed files with 12518 additions and 4077 deletions
--- a/.github/ISSUE_TEMPLATE/2-bug-report.yml
+++ b/.github/ISSUE_TEMPLATE/2-bug-report.yml
@@ -20,6 +20,14 @@ body:
    attributes:
      label: What version of Codex is running?
      description: Copy the output of `codex --version`
+    validations:
+      required: true
+  - type: input
+    id: plan
+    attributes:
+      label: What subscription do you have?
+    validations:
+      required: true
  - type: input
    id: model
    attributes:
@@ -32,11 +40,18 @@ body:
      description: |
        For MacOS and Linux: copy the output of `uname -mprs`
        For Windows: copy the output of `"$([Environment]::OSVersion | ForEach-Object VersionString) $(if ([Environment]::Is64BitOperatingSystem) { "x64" } else { "x86" })"` in the PowerShell console
+  - type: textarea
+    id: actual
+    attributes:
+      label: What issue are you seeing?
+      description: Please include the full error messages and prompts with PII redacted. If possible, please provide text instead of a screenshot. 
+    validations:
+      required: true
  - type: textarea
    id: steps
    attributes:
      label: What steps can reproduce the bug?
-      description: Explain the bug and provide a code snippet that can reproduce it.
+      description: Explain the bug and provide a code snippet that can reproduce it. Please include session id, token limit usage, context window usage if applicable.
    validations:
      required: true
  - type: textarea
@@ -44,11 +59,6 @@ body:
    attributes:
      label: What is the expected behavior?
      description: If possible, please provide text instead of a screenshot.
-  - type: textarea
-    id: actual
-    attributes:
-      label: What do you see instead?
-      description: If possible, please provide text instead of a screenshot.
  - type: textarea
    id: notes
    attributes:
--- a/.github/ISSUE_TEMPLATE/4-feature-request.yml
+++ b/.github/ISSUE_TEMPLATE/4-feature-request.yml
@@ -2,7 +2,6 @@ name: 🎁 Feature Request
 description: Propose a new feature for Codex
 labels:
  - enhancement
-  - needs triage
 body:
  - type: markdown
    attributes:
@@ -19,11 +18,6 @@ body:
      label: What feature would you like to see?
    validations:
      required: true
-  - type: textarea
-    id: author
-    attributes:
-      label: Are you interested in implementing this feature?
-      description: Please wait for acknowledgement before implementing or opening a PR.
  - type: textarea
    id: notes
    attributes:
--- a/.github/ISSUE_TEMPLATE/5-vs-code-extension.yml
+++ b/.github/ISSUE_TEMPLATE/5-vs-code-extension.yml
@@ -14,11 +14,21 @@ body:
    id: version
    attributes:
      label: What version of the VS Code extension are you using?
+    validations:
+      required: true
+  - type: input
+    id: plan
+    attributes:
+      label: What subscription do you have?
+    validations:
+      required: true
  - type: input
    id: ide
    attributes:
      label: Which IDE are you using?
      description: Like `VS Code`, `Cursor`, `Windsurf`, etc.
+    validations:
+      required: true
  - type: input
    id: platform
    attributes:
@@ -26,11 +36,18 @@ body:
      description: |
        For MacOS and Linux: copy the output of `uname -mprs`
        For Windows: copy the output of `"$([Environment]::OSVersion | ForEach-Object VersionString) $(if ([Environment]::Is64BitOperatingSystem) { "x64" } else { "x86" })"` in the PowerShell console
+  - type: textarea
+    id: actual
+    attributes:
+      label: What issue are you seeing?
+      description: Please include the full error messages and prompts with PII redacted. If possible, please provide text instead of a screenshot. 
+    validations:
+      required: true
  - type: textarea
    id: steps
    attributes:
      label: What steps can reproduce the bug?
-      description: Explain the bug and provide a code snippet that can reproduce it.
+      description: Explain the bug and provide a code snippet that can reproduce it. Please include session id, token limit usage, context window usage if applicable.
    validations:
      required: true
  - type: textarea
@@ -38,11 +55,6 @@ body:
    attributes:
      label: What is the expected behavior?
      description: If possible, please provide text instead of a screenshot.
-  - type: textarea
-    id: actual
-    attributes:
-      label: What do you see instead?
-      description: If possible, please provide text instead of a screenshot.
  - type: textarea
    id: notes
    attributes:
--- a/.github/workflows/issue-deduplicator.yml
+++ b/.github/workflows/issue-deduplicator.yml
@@ -14,7 +14,7 @@ jobs:
    permissions:
      contents: read
    outputs:
-      codex_output: ${{ steps.codex.outputs.final_message }}
+      codex_output: ${{ steps.codex.outputs.final-message }}
    steps:
      - uses: actions/checkout@v4

@@ -44,8 +44,8 @@ jobs:
      - id: codex
        uses: openai/codex-action@main
        with:
-          openai_api_key: ${{ secrets.CODEX_OPENAI_API_KEY }}
-          require_repo_write: false
+          openai-api-key: ${{ secrets.CODEX_OPENAI_API_KEY }}
+          allow-users: "*"
          model: gpt-5
          prompt: |
            You are an assistant that triages new GitHub issues by identifying potential duplicates.
@@ -55,12 +55,13 @@ jobs:
            - `codex-existing-issues.json`: JSON array of recent issues (each element includes number, title, body, createdAt).

            Instructions:
-            - Load both files as JSON and review their contents carefully. The codex-existing-issues.json file is large, ensure you explore all of it.
            - Compare the current issue against the existing issues to find up to five that appear to describe the same underlying problem or request.
+            - Focus on the underlying intent and context of each issue—such as reported symptoms, feature requests, reproduction steps, or error messages—rather than relying solely on string similarity or synthetic metrics.
+            - After your analysis, validate your results in 1-2 lines explaining your decision to return the selected matches.
            - When unsure, prefer returning fewer matches.
            - Include at most five numbers.

-          output_schema: |
+          output-schema: |
            {
              "type": "object",
              "properties": {
@@ -69,9 +70,10 @@ jobs:
                  "items": {
                    "type": "string"
                  }
-                }
+                },
+                "reason": { "type": "string" }
              },
-              "required": ["issues"],
+              "required": ["issues", "reason"],
              "additionalProperties": false
            }

@@ -102,14 +104,22 @@ jobs:
            }

            const issues = Array.isArray(parsed?.issues) ? parsed.issues : [];
-            if (issues.length === 0) {
+            const currentIssueNumber = String(context.payload.issue.number);
+
+            console.log(`Current issue number: ${currentIssueNumber}`);
+            console.log(issues);
+
+            const filteredIssues = issues.filter((value) => String(value) !== currentIssueNumber);
+
+            if (filteredIssues.length === 0) {
              core.info('Codex reported no potential duplicates.');
              return;
            }

            const lines = [
-              'Potential duplicates detected:',
-              ...issues.map((value) => `- #${String(value)}`),
+              'Potential duplicates detected. Please review them and close your issue if it is a duplicate.',
+              '',
+              ...filteredIssues.map((value) => `- #${String(value)}`),
              '',
              '*Powered by [Codex Action](https://github.com/openai/codex-action)*'];

--- a/.github/workflows/issue-labeler.yml
+++ b/.github/workflows/issue-labeler.yml
@@ -14,15 +14,15 @@ jobs:
    permissions:
      contents: read
    outputs:
-      codex_output: ${{ steps.codex.outputs.final_message }}
+      codex_output: ${{ steps.codex.outputs.final-message }}
    steps:
      - uses: actions/checkout@v4

      - id: codex
        uses: openai/codex-action@main
        with:
-          openai_api_key: ${{ secrets.CODEX_OPENAI_API_KEY }}
-          require_repo_write: false
+          openai-api-key: ${{ secrets.CODEX_OPENAI_API_KEY }}
+          allow-users: "*"
          prompt: |
            You are an assistant that reviews GitHub issues for the repository.

@@ -53,7 +53,7 @@ jobs:
            Repository full name:
            ${{ github.repository }}

-          output_schema: |
+          output-schema: |
            {
              "type": "object",
              "properties": {
--- a/.github/workflows/rust-ci.yml
+++ b/.github/workflows/rust-ci.yml
@@ -148,15 +148,26 @@ jobs:
          targets: ${{ matrix.target }}
          components: clippy

-      - uses: actions/cache@v4
+      # Explicit cache restore: split cargo home vs target, so we can
+      # avoid caching the large target dir on the gnu-dev job.
+      - name: Restore cargo home cache
+        id: cache_cargo_home_restore
+        uses: actions/cache/restore@v4
        with:
          path: |
            ~/.cargo/bin/
            ~/.cargo/registry/index/
            ~/.cargo/registry/cache/
            ~/.cargo/git/db/
-            ${{ github.workspace }}/codex-rs/target/
-          key: cargo-${{ matrix.runner }}-${{ matrix.target }}-${{ matrix.profile }}-${{ hashFiles('**/Cargo.lock') }}
+          key: cargo-home-${{ matrix.runner }}-${{ matrix.target }}-${{ matrix.profile }}-${{ hashFiles('**/Cargo.lock') }}
+
+      - name: Restore target cache (except gnu-dev)
+        id: cache_target_restore
+        if: ${{ !(matrix.target == 'x86_64-unknown-linux-gnu' && matrix.profile != 'release') }}
+        uses: actions/cache/restore@v4
+        with:
+          path: ${{ github.workspace }}/codex-rs/target/
+          key: cargo-target-${{ matrix.runner }}-${{ matrix.target }}-${{ matrix.profile }}-${{ hashFiles('**/Cargo.lock') }}

      - if: ${{ matrix.target == 'x86_64-unknown-linux-musl' || matrix.target == 'aarch64-unknown-linux-musl'}}
        name: Install musl build tools
@@ -194,6 +205,31 @@ jobs:
        env:
          RUST_BACKTRACE: 1

+      # Save caches explicitly; make non-fatal so cache packaging
+      # never fails the overall job. Only save when key wasn't hit.
+      - name: Save cargo home cache
+        if: always() && !cancelled() && steps.cache_cargo_home_restore.outputs.cache-hit != 'true'
+        continue-on-error: true
+        uses: actions/cache/save@v4
+        with:
+          path: |
+            ~/.cargo/bin/
+            ~/.cargo/registry/index/
+            ~/.cargo/registry/cache/
+            ~/.cargo/git/db/
+          key: cargo-home-${{ matrix.runner }}-${{ matrix.target }}-${{ matrix.profile }}-${{ hashFiles('**/Cargo.lock') }}
+
+      - name: Save target cache (except gnu-dev)
+        if: >-
+          always() && !cancelled() &&
+          (steps.cache_target_restore.outputs.cache-hit != 'true') &&
+          !(matrix.target == 'x86_64-unknown-linux-gnu' && matrix.profile != 'release')
+        continue-on-error: true
+        uses: actions/cache/save@v4
+        with:
+          path: ${{ github.workspace }}/codex-rs/target/
+          key: cargo-target-${{ matrix.runner }}-${{ matrix.target }}-${{ matrix.profile }}-${{ hashFiles('**/Cargo.lock') }}
+
      # Fail the job if any of the previous steps failed.
      - name: verify all steps passed
        if: |
--- a/.github/workflows/rust-release.yml
+++ b/.github/workflows/rust-release.yml
@@ -47,7 +47,7 @@ jobs:

  build:
    needs: tag-check
-    name: ${{ matrix.runner }} - ${{ matrix.target }}
+    name: Build - ${{ matrix.runner }} - ${{ matrix.target }}
    runs-on: ${{ matrix.runner }}
    timeout-minutes: 30
    defaults:
@@ -94,11 +94,118 @@ jobs:
      - if: ${{ matrix.target == 'x86_64-unknown-linux-musl' || matrix.target == 'aarch64-unknown-linux-musl'}}
        name: Install musl build tools
        run: |
-          sudo apt install -y musl-tools pkg-config
+          sudo apt-get update
+          sudo apt-get install -y musl-tools pkg-config

      - name: Cargo build
        run: cargo build --target ${{ matrix.target }} --release --bin codex --bin codex-responses-api-proxy

+      - if: ${{ matrix.runner == 'macos-14' }}
+        name: Configure Apple code signing
+        shell: bash
+        env:
+          KEYCHAIN_PASSWORD: actions
+          APPLE_CERTIFICATE: ${{ secrets.APPLE_CERTIFICATE_P12 }}
+          APPLE_CERTIFICATE_PASSWORD: ${{ secrets.APPLE_CERTIFICATE_PASSWORD }}
+        run: |
+          set -euo pipefail
+
+          if [[ -z "${APPLE_CERTIFICATE:-}" ]]; then
+            echo "APPLE_CERTIFICATE is required for macOS signing"
+            exit 1
+          fi
+
+          if [[ -z "${APPLE_CERTIFICATE_PASSWORD:-}" ]]; then
+            echo "APPLE_CERTIFICATE_PASSWORD is required for macOS signing"
+            exit 1
+          fi
+
+          cert_path="${RUNNER_TEMP}/apple_signing_certificate.p12"
+          echo "$APPLE_CERTIFICATE" | base64 -d > "$cert_path"
+
+          keychain_path="${RUNNER_TEMP}/codex-signing.keychain-db"
+          security create-keychain -p "$KEYCHAIN_PASSWORD" "$keychain_path"
+          security set-keychain-settings -lut 21600 "$keychain_path"
+          security unlock-keychain -p "$KEYCHAIN_PASSWORD" "$keychain_path"
+
+          keychain_args=()
+          cleanup_keychain() {
+            if ((${#keychain_args[@]} > 0)); then
+              security list-keychains -s "${keychain_args[@]}" || true
+              security default-keychain -s "${keychain_args[0]}" || true
+            else
+              security list-keychains -s || true
+            fi
+            if [[ -f "$keychain_path" ]]; then
+              security delete-keychain "$keychain_path" || true
+            fi
+          }
+
+          while IFS= read -r keychain; do
+            [[ -n "$keychain" ]] && keychain_args+=("$keychain")
+          done < <(security list-keychains | sed 's/^[[:space:]]*//;s/[[:space:]]*$//;s/"//g')
+
+          if ((${#keychain_args[@]} > 0)); then
+            security list-keychains -s "$keychain_path" "${keychain_args[@]}"
+          else
+            security list-keychains -s "$keychain_path"
+          fi
+
+          security default-keychain -s "$keychain_path"
+          security import "$cert_path" -k "$keychain_path" -P "$APPLE_CERTIFICATE_PASSWORD" -T /usr/bin/codesign -T /usr/bin/security
+          security set-key-partition-list -S apple-tool:,apple: -s -k "$KEYCHAIN_PASSWORD" "$keychain_path" > /dev/null
+
+          codesign_hashes=()
+          while IFS= read -r hash; do
+            [[ -n "$hash" ]] && codesign_hashes+=("$hash")
+          done < <(security find-identity -v -p codesigning "$keychain_path" \
+            | sed -n 's/.*\([0-9A-F]\{40\}\).*/\1/p' \
+            | sort -u)
+
+          if ((${#codesign_hashes[@]} == 0)); then
+            echo "No signing identities found in $keychain_path"
+            cleanup_keychain
+            rm -f "$cert_path"
+            exit 1
+          fi
+
+          if ((${#codesign_hashes[@]} > 1)); then
+            echo "Multiple signing identities found in $keychain_path:"
+            printf '  %s\n' "${codesign_hashes[@]}"
+            cleanup_keychain
+            rm -f "$cert_path"
+            exit 1
+          fi
+
+          APPLE_CODESIGN_IDENTITY="${codesign_hashes[0]}"
+
+          rm -f "$cert_path"
+
+          echo "APPLE_CODESIGN_IDENTITY=$APPLE_CODESIGN_IDENTITY" >> "$GITHUB_ENV"
+          echo "APPLE_CODESIGN_KEYCHAIN=$keychain_path" >> "$GITHUB_ENV"
+          echo "::add-mask::$APPLE_CODESIGN_IDENTITY"
+
+      - if: ${{ matrix.runner == 'macos-14' }}
+        name: Sign macOS binaries
+        shell: bash
+        run: |
+          set -euo pipefail
+
+          if [[ -z "${APPLE_CODESIGN_IDENTITY:-}" ]]; then
+            echo "APPLE_CODESIGN_IDENTITY is required for macOS signing"
+            exit 1
+          fi
+
+          keychain_args=()
+          if [[ -n "${APPLE_CODESIGN_KEYCHAIN:-}" && -f "${APPLE_CODESIGN_KEYCHAIN}" ]]; then
+            keychain_args+=(--keychain "${APPLE_CODESIGN_KEYCHAIN}")
+          fi
+
+          for binary in codex codex-responses-api-proxy; do
+            path="target/${{ matrix.target }}/release/${binary}"
+            codesign --force --options runtime --timestamp --sign "$APPLE_CODESIGN_IDENTITY" "${keychain_args[@]}" "$path"
+          done
+
      - name: Stage artifacts
        shell: bash
        run: |
@@ -157,6 +264,29 @@ jobs:
            zstd -T0 -19 --rm "$dest/$base"
          done

+      - name: Remove signing keychain
+        if: ${{ always() && matrix.runner == 'macos-14' }}
+        shell: bash
+        env:
+          APPLE_CODESIGN_KEYCHAIN: ${{ env.APPLE_CODESIGN_KEYCHAIN }}
+        run: |
+          set -euo pipefail
+          if [[ -n "${APPLE_CODESIGN_KEYCHAIN:-}" ]]; then
+            keychain_args=()
+            while IFS= read -r keychain; do
+              [[ "$keychain" == "$APPLE_CODESIGN_KEYCHAIN" ]] && continue
+              [[ -n "$keychain" ]] && keychain_args+=("$keychain")
+            done < <(security list-keychains | sed 's/^[[:space:]]*//;s/[[:space:]]*$//;s/"//g')
+            if ((${#keychain_args[@]} > 0)); then
+              security list-keychains -s "${keychain_args[@]}"
+              security default-keychain -s "${keychain_args[0]}"
+            fi
+
+            if [[ -f "$APPLE_CODESIGN_KEYCHAIN" ]]; then
+              security delete-keychain "$APPLE_CODESIGN_KEYCHAIN"
+            fi
+          fi
+
      - uses: actions/upload-artifact@v4
        with:
          name: ${{ matrix.target }}
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -73,3 +73,28 @@ If you don’t have the tool:
 ### Test assertions

 - Tests should use pretty_assertions::assert_eq for clearer diffs. Import this at the top of the test module if it isn't already.
+
+### Integration tests (core)
+
+- Prefer the utilities in `core_test_support::responses` when writing end-to-end Codex tests.
+
+- All `mount_sse*` helpers return a `ResponseMock`; hold onto it so you can assert against outbound `/responses` POST bodies.
+- Use `ResponseMock::single_request()` when a test should only issue one POST, or `ResponseMock::requests()` to inspect every captured `ResponsesRequest`.
+- `ResponsesRequest` exposes helpers (`body_json`, `input`, `function_call_output`, `custom_tool_call_output`, `call_output`, `header`, `path`, `query_param`) so assertions can target structured payloads instead of manual JSON digging.
+- Build SSE payloads with the provided `ev_*` constructors and the `sse(...)`.
+
+- Typical pattern:
+
+  ```rust
+  let mock = responses::mount_sse_once(&server, responses::sse(vec![
+      responses::ev_response_created("resp-1"),
+      responses::ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
+      responses::ev_completed("resp-1"),
+  ])).await;
+
+  codex.submit(Op::UserTurn { ... }).await?;
+
+  // Assert request body if needed.
+  let request = mock.single_request();
+  // assert using request.function_call_output(call_id) or request.json_body() or other helpers.
+  ```
--- a/README.md
+++ b/README.md
@@ -61,7 +61,7 @@ You can also use Codex with an API key, but this requires [additional setup](./d

 ### Model Context Protocol (MCP)

-Codex CLI supports [MCP servers](./docs/advanced.md#model-context-protocol-mcp). Enable by adding an `mcp_servers` section to your `~/.codex/config.toml`.
+Codex can access MCP servers. To configure them, refer to the [config docs](./docs/config.md#mcp_servers).

 ### Configuration

@@ -81,9 +81,11 @@ Codex CLI supports a rich set of configuration options, with preferences stored
 - [**Authentication**](./docs/authentication.md)
  - [Auth methods](./docs/authentication.md#forcing-a-specific-auth-method-advanced)
  - [Login on a "Headless" machine](./docs/authentication.md#connecting-on-a-headless-machine)
- [**Non-interactive mode**](./docs/exec.md)
+- **Automating Codex**
+  - [GitHub Action](https://github.com/openai/codex-action)
+  - [TypeScript SDK](./sdk/typescript/README.md)
+  - [Non-interactive mode (`codex exec`)](./docs/exec.md)
 - [**Advanced**](./docs/advanced.md)
-  - [Non-interactive / CI mode](./docs/advanced.md#non-interactive--ci-mode)
  - [Tracing / verbose logging](./docs/advanced.md#tracing--verbose-logging)
  - [Model Context Protocol (MCP)](./docs/advanced.md#model-context-protocol-mcp)
 - [**Zero data retention (ZDR)**](./docs/zdr.md)
--- a/codex-cli/bin/codex.js
+++ b/codex-cli/bin/codex.js
@@ -80,6 +80,32 @@ function getUpdatedPath(newDirs) {
  return updatedPath;
 }

+/**
+ * Use heuristics to detect the package manager that was used to install Codex
+ * in order to give the user a hint about how to update it.
+ */
+function detectPackageManager() {
+  const userAgent = process.env.npm_config_user_agent || "";
+  if (/\bbun\//.test(userAgent)) {
+    return "bun";
+  }
+
+  const execPath = process.env.npm_execpath || "";
+  if (execPath.includes("bun")) {
+    return "bun";
+  }
+
+  if (
+    process.env.BUN_INSTALL ||
+    process.env.BUN_INSTALL_GLOBAL_DIR ||
+    process.env.BUN_INSTALL_BIN_DIR
+  ) {
+    return "bun";
+  }
+
+  return userAgent ? "npm" : null;
+}
+
 const additionalDirs = [];
 const pathDir = path.join(archRoot, "path");
 if (existsSync(pathDir)) {
@@ -87,9 +113,16 @@ if (existsSync(pathDir)) {
 }
 const updatedPath = getUpdatedPath(additionalDirs);

+const env = { ...process.env, PATH: updatedPath };
+const packageManagerEnvVar =
+  detectPackageManager() === "bun"
+    ? "CODEX_MANAGED_BY_BUN"
+    : "CODEX_MANAGED_BY_NPM";
+env[packageManagerEnvVar] = "1";
+
 const child = spawn(binaryPath, process.argv.slice(2), {
  stdio: "inherit",
-  env: { ...process.env, PATH: updatedPath, CODEX_MANAGED_BY_NPM: "1" },
+  env,
 });

 child.on("error", (err) => {
--- a/codex-rs/Cargo.lock
+++ b/codex-rs/Cargo.lock
@@ -300,6 +300,12 @@ dependencies = [
 "wait-timeout",
 ]

+[[package]]
+name = "assert_matches"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9b34d609dfbaf33d6889b2b7106d3ca345eacad44200913df5ba02bfd31d2ba9"
+
 [[package]]
 name = "async-broadcast"
 version = "0.7.2"
@@ -871,6 +877,7 @@ version = "0.0.0"
 dependencies = [
 "anyhow",
 "assert_cmd",
+ "assert_matches",
 "pretty_assertions",
 "similar",
 "tempfile",
@@ -933,6 +940,7 @@ version = "0.0.0"
 dependencies = [
 "anyhow",
 "assert_cmd",
+ "assert_matches",
 "clap",
 "clap_complete",
 "codex-app-server",
@@ -980,12 +988,11 @@ dependencies = [
 "reqwest",
 "serde",
 "serde_json",
- "throbber-widgets-tui",
 "tokio",
 "tokio-stream",
 "tracing",
 "tracing-subscriber",
- "unicode-width 0.1.14",
+ "unicode-width 0.2.1",
 ]

 [[package]]
@@ -1022,6 +1029,7 @@ dependencies = [
 "anyhow",
 "askama",
 "assert_cmd",
+ "assert_matches",
 "async-channel",
 "async-trait",
 "base64",
@@ -1042,9 +1050,8 @@ dependencies = [
 "env-flags",
 "escargot",
 "eventsource-stream",
- "fd-lock",
 "futures",
- "gethostname",
+ "ignore",
 "indexmap 2.10.0",
 "landlock",
 "libc",
@@ -1063,7 +1070,7 @@ dependencies = [
 "serde_json",
 "serial_test",
 "sha1",
- "shellexpand",
+ "sha2",
 "shlex",
 "similar",
 "strum_macros 0.27.2",
@@ -1165,6 +1172,7 @@ dependencies = [
 name = "codex-git-tooling"
 version = "0.0.0"
 dependencies = [
+ "assert_matches",
 "pretty_assertions",
 "tempfile",
 "thiserror 2.0.16",
@@ -1252,6 +1260,7 @@ dependencies = [
 name = "codex-ollama"
 version = "0.0.0"
 dependencies = [
+ "assert_matches",
 "async-stream",
 "bytes",
 "codex-core",
@@ -1345,6 +1354,7 @@ version = "0.0.0"
 dependencies = [
 "anyhow",
 "axum",
+ "codex-protocol",
 "dirs",
 "futures",
 "keyring",
@@ -1370,6 +1380,7 @@ version = "0.0.0"
 dependencies = [
 "anyhow",
 "arboard",
+ "assert_matches",
 "async-stream",
 "base64",
 "chrono",
@@ -1416,6 +1427,8 @@ dependencies = [
 "tracing",
 "tracing-appender",
 "tracing-subscriber",
+ "tree-sitter-bash",
+ "tree-sitter-highlight",
 "unicode-segmentation",
 "unicode-width 0.2.1",
 "url",
@@ -1435,6 +1448,7 @@ dependencies = [
 name = "codex-utils-readiness"
 version = "0.0.0"
 dependencies = [
+ "assert_matches",
 "async-trait",
 "thiserror 2.0.16",
 "time",
@@ -1565,9 +1579,12 @@ dependencies = [
 "anyhow",
 "assert_cmd",
 "codex-core",
+ "notify",
+ "regex-lite",
 "serde_json",
 "tempfile",
 "tokio",
+ "walkdir",
 "wiremock",
 ]

@@ -2358,6 +2375,15 @@ dependencies = [
 "percent-encoding",
 ]

+[[package]]
+name = "fsevent-sys"
+version = "4.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "76ee7a02da4d231650c7cea31349b889be2f45ddb3ef3032d2ec8185f6313fd2"
+dependencies = [
+ "libc",
+]
+
 [[package]]
 name = "futures"
 version = "0.3.31"
@@ -3044,6 +3070,26 @@ version = "2.0.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f4c7245a08504955605670dbf141fceab975f15ca21570696aebe9d2e71576bd"

+[[package]]
+name = "inotify"
+version = "0.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f37dccff2791ab604f9babef0ba14fbe0be30bd368dc541e2b08d07c8aa908f3"
+dependencies = [
+ "bitflags 2.9.1",
+ "inotify-sys",
+ "libc",
+]
+
+[[package]]
+name = "inotify-sys"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e05c02b5e89bff3b946cedeca278abc628fe811e604f027c45a8aa3cf793d0eb"
+dependencies = [
+ "libc",
+]
+
 [[package]]
 name = "inout"
 version = "0.1.4"
@@ -3244,6 +3290,26 @@ dependencies = [
 "zeroize",
 ]

+[[package]]
+name = "kqueue"
+version = "1.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "eac30106d7dce88daf4a3fcb4879ea939476d5074a9b7ddd0fb97fa4bed5596a"
+dependencies = [
+ "kqueue-sys",
+ "libc",
+]
+
+[[package]]
+name = "kqueue-sys"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ed9625ffda8729b85e45cf04090035ac368927b8cebc34898e7c120f52e4838b"
+dependencies = [
+ "bitflags 1.3.2",
+ "libc",
+]
+
 [[package]]
 name = "lalrpop"
 version = "0.19.12"
@@ -3643,6 +3709,30 @@ version = "0.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "61807f77802ff30975e01f4f071c8ba10c022052f98b3294119f3e615d13e5be"

+[[package]]
+name = "notify"
+version = "8.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4d3d07927151ff8575b7087f245456e549fea62edf0ec4e565a5ee50c8402bc3"
+dependencies = [
+ "bitflags 2.9.1",
+ "fsevent-sys",
+ "inotify",
+ "kqueue",
+ "libc",
+ "log",
+ "mio",
+ "notify-types",
+ "walkdir",
+ "windows-sys 0.60.2",
+]
+
+[[package]]
+name = "notify-types"
+version = "2.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5e0826a989adedc2a244799e823aece04662b66609d96af8dff7ac6df9a8925d"
+
 [[package]]
 name = "nu-ansi-term"
 version = "0.50.1"
@@ -4760,9 +4850,9 @@ dependencies = [

 [[package]]
 name = "rmcp"
-version = "0.7.0"
+version = "0.8.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "534fd1cd0601e798ac30545ff2b7f4a62c6f14edd4aaed1cc5eb1e85f69f09af"
+checksum = "6f35acda8f89fca5fd8c96cae3c6d5b4c38ea0072df4c8030915f3b5ff469c1c"
 dependencies = [
 "base64",
 "bytes",
@@ -4794,9 +4884,9 @@ dependencies = [

 [[package]]
 name = "rmcp-macros"
-version = "0.7.0"
+version = "0.8.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9ba777eb0e5f53a757e36f0e287441da0ab766564ba7201600eeb92a4753022e"
+checksum = "c9f1d5220aaa23b79c3d02e18f7a554403b3ccea544bbb6c69d6bcb3e854a274"
 dependencies = [
 "darling 0.21.3",
 "proc-macro2",
@@ -5344,15 +5434,6 @@ version = "1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "24188a676b6ae68c3b2cb3a01be17fbf7240ce009799bb56d5b1409051e78fde"

-[[package]]
-name = "shellexpand"
-version = "3.1.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8b1fdf65dd6331831494dd616b30351c38e96e45921a27745cf98490458b90bb"
-dependencies = [
- "dirs",
-]
-
 [[package]]
 name = "shlex"
 version = "1.3.0"
@@ -5841,16 +5922,6 @@ dependencies = [
 "cfg-if",
 ]

-[[package]]
-name = "throbber-widgets-tui"
-version = "0.8.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1d36b5738d666a2b4c91b7c24998a8588db724b3107258343ebf8824bf55b06d"
-dependencies = [
- "rand 0.8.5",
- "ratatui",
-]
-
 [[package]]
 name = "tiff"
 version = "0.10.3"
@@ -6028,6 +6099,7 @@ dependencies = [
 "bytes",
 "futures-core",
 "futures-sink",
+ "futures-util",
 "pin-project-lite",
 "tokio",
 ]
@@ -6269,9 +6341,9 @@ dependencies = [

 [[package]]
 name = "tree-sitter"
-version = "0.25.9"
+version = "0.25.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ccd2a058a86cfece0bf96f7cce1021efef9c8ed0e892ab74639173e5ed7a34fa"
+checksum = "78f873475d258561b06f1c595d93308a7ed124d9977cb26b148c2084a4a3cc87"
 dependencies = [
 "cc",
 "regex",
@@ -6291,6 +6363,18 @@ dependencies = [
 "tree-sitter-language",
 ]

+[[package]]
+name = "tree-sitter-highlight"
+version = "0.25.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "adc5f880ad8d8f94e88cb81c3557024cf1a8b75e3b504c50481ed4f5a6006ff3"
+dependencies = [
+ "regex",
+ "streaming-iterator",
+ "thiserror 2.0.16",
+ "tree-sitter",
+]
+
 [[package]]
 name = "tree-sitter-language"
 version = "0.1.5"
--- a/codex-rs/Cargo.toml
+++ b/codex-rs/Cargo.toml
@@ -83,6 +83,7 @@ ansi-to-tui = "7.0.0"
 anyhow = "1"
 arboard = "3"
 askama = "0.12"
+assert_matches = "1.5.0"
 assert_cmd = "2"
 async-channel = "2.3.1"
 async-stream = "0.3.6"
@@ -106,7 +107,6 @@ env_logger = "0.11.5"
 escargot = "0.5"
 eventsource-stream = "0.2.3"
 futures = { version = "0.3", default-features = false }
-fd-lock = "4.0.4"
 icu_decimal = "2.0.0"
 icu_locale_core = "2.0.0"
 ignore = "0.4.23"
@@ -122,6 +122,7 @@ log = "0.4"
 maplit = "1.0.2"
 mime_guess = "2.0.5"
 multimap = "0.10.0"
+notify = "8.2.0"
 nucleo-matcher = "0.3.1"
 openssl-sys = "*"
 opentelemetry = "0.30.0"
@@ -143,7 +144,7 @@ rand = "0.9"
 ratatui = "0.29.0"
 regex-lite = "0.1.7"
 reqwest = "0.12"
-rmcp = { version = "0.7.0", default-features = false }
+rmcp = { version = "0.8.0", default-features = false }
 schemars = "0.8.22"
 seccompiler = "0.5.0"
 serde = "1"
@@ -152,7 +153,6 @@ serde_with = "3.14"
 serial_test = "3.2.0"
 sha1 = "0.10.6"
 sha2 = "0.10"
-shellexpand = "3.1.0"
 shlex = "1.3.0"
 similar = "2.7.0"
 starlark = "0.13.0"
@@ -176,8 +176,9 @@ tracing = "0.1.41"
 tracing-appender = "0.2.3"
 tracing-subscriber = "0.3.20"
 tracing-test = "0.2.5"
-tree-sitter = "0.25.9"
-tree-sitter-bash = "0.25.0"
+tree-sitter = "0.25.10"
+tree-sitter-bash = "0.25"
+tree-sitter-highlight = "0.25.10"
 ts-rs = "11"
 unicode-segmentation = "1.12.0"
 unicode-width = "0.2"
@@ -245,5 +246,9 @@ strip = "symbols"
 codegen-units = 1

 [patch.crates-io]
+# Uncomment to debug local changes.
 # ratatui = { path = "../../ratatui" }
 ratatui = { git = "https://github.com/nornagon/ratatui", branch = "nornagon-v0.29.0-patch" }
+
+# Uncomment to debug local changes.
+# rmcp = { path = "../../rust-sdk/crates/rmcp" }
--- a/codex-rs/README.md
+++ b/codex-rs/README.md
@@ -23,9 +23,15 @@ Codex supports a rich set of configuration options. Note that the Rust CLI uses

 ### Model Context Protocol Support

-Codex CLI functions as an MCP client that can connect to MCP servers on startup. See the [`mcp_servers`](../docs/config.md#mcp_servers) section in the configuration documentation for details.
+#### MCP client

-It is still experimental, but you can also launch Codex as an MCP _server_ by running `codex mcp-server`. Use the [`@modelcontextprotocol/inspector`](https://github.com/modelcontextprotocol/inspector) to try it out:
+Codex CLI functions as an MCP client that allows the Codex CLI and IDE extension to connect to MCP servers on startup. See the [`configuration documentation`](../docs/config.md#mcp_servers) for details.
+
+#### MCP server (experimental)
+
+Codex can be launched as an MCP _server_ by running `codex mcp-server`. This allows _other_ MCP clients to use Codex as a tool for another agent.
+
+Use the [`@modelcontextprotocol/inspector`](https://github.com/modelcontextprotocol/inspector) to try it out:

 ```shell
 npx @modelcontextprotocol/inspector codex mcp-server
@@ -71,9 +77,13 @@ To test to see what happens when a command is run under the sandbox provided by

 ```
 # macOS
-codex debug seatbelt [--full-auto] [COMMAND]...
+codex sandbox macos [--full-auto] [COMMAND]...

 # Linux
+codex sandbox linux [--full-auto] [COMMAND]...
+
+# Legacy aliases
+codex debug seatbelt [--full-auto] [COMMAND]...
 codex debug landlock [--full-auto] [COMMAND]...
 ```

--- a/codex-rs/ansi-escape/src/lib.rs
+++ b/codex-rs/ansi-escape/src/lib.rs
@@ -3,11 +3,30 @@ use ansi_to_tui::IntoText;
 use ratatui::text::Line;
 use ratatui::text::Text;

+// Expand tabs in a best-effort way for transcript rendering.
+// Tabs can interact poorly with left-gutter prefixes in our TUI and CLI
+// transcript views (e.g., `nl` separates line numbers from content with a tab).
+// Replacing tabs with spaces avoids odd visual artifacts without changing
+// semantics for our use cases.
+fn expand_tabs(s: &str) -> std::borrow::Cow<'_, str> {
+    if s.contains('\t') {
+        // Keep it simple: replace each tab with 4 spaces.
+        // We do not try to align to tab stops since most usages (like `nl`)
+        // look acceptable with a fixed substitution and this avoids stateful math
+        // across spans.
+        std::borrow::Cow::Owned(s.replace('\t', "    "))
+    } else {
+        std::borrow::Cow::Borrowed(s)
+    }
+}
+
 /// This function should be used when the contents of `s` are expected to match
 /// a single line. If multiple lines are found, a warning is logged and only the
 /// first line is returned.
 pub fn ansi_escape_line(s: &str) -> Line<'static> {
-    let text = ansi_escape(s);
+    // Normalize tabs to spaces to avoid odd gutter collisions in transcript mode.
+    let s = expand_tabs(s);
+    let text = ansi_escape(&s);
    match text.lines.as_slice() {
        [] => "".into(),
        [only] => only.clone(),
--- a/codex-rs/apply-patch/Cargo.toml
+++ b/codex-rs/apply-patch/Cargo.toml
@@ -23,5 +23,6 @@ tree-sitter-bash = { workspace = true }

 [dev-dependencies]
 assert_cmd = { workspace = true }
+assert_matches = { workspace = true }
 pretty_assertions = { workspace = true }
 tempfile = { workspace = true }
--- a/codex-rs/apply-patch/src/lib.rs
+++ b/codex-rs/apply-patch/src/lib.rs
@@ -843,6 +843,7 @@ pub fn print_summary(
 #[cfg(test)]
 mod tests {
    use super::*;
+    use assert_matches::assert_matches;
    use pretty_assertions::assert_eq;
    use std::fs;
    use std::string::ToString;
@@ -894,10 +895,10 @@ mod tests {

    fn assert_not_match(script: &str) {
        let args = args_bash(script);
-        assert!(matches!(
+        assert_matches!(
            maybe_parse_apply_patch(&args),
            MaybeApplyPatch::NotApplyPatch
-        ));
+        );
    }

    #[test]
@@ -905,10 +906,10 @@ mod tests {
        let patch = "*** Begin Patch\n*** Add File: foo\n+hi\n*** End Patch".to_string();
        let args = vec![patch];
        let dir = tempdir().unwrap();
-        assert!(matches!(
+        assert_matches!(
            maybe_parse_apply_patch_verified(&args, dir.path()),
            MaybeApplyPatchVerified::CorrectnessError(ApplyPatchError::ImplicitInvocation)
-        ));
+        );
    }

    #[test]
@@ -916,10 +917,10 @@ mod tests {
        let script = "*** Begin Patch\n*** Add File: foo\n+hi\n*** End Patch";
        let args = args_bash(script);
        let dir = tempdir().unwrap();
-        assert!(matches!(
+        assert_matches!(
            maybe_parse_apply_patch_verified(&args, dir.path()),
            MaybeApplyPatchVerified::CorrectnessError(ApplyPatchError::ImplicitInvocation)
-        ));
+        );
    }

    #[test]
--- a/codex-rs/cli/Cargo.toml
+++ b/codex-rs/cli/Cargo.toml
@@ -47,6 +47,7 @@ tokio = { workspace = true, features = [
 ] }

 [dev-dependencies]
+assert_matches = { workspace = true }
 assert_cmd = { workspace = true }
 predicates = { workspace = true }
 pretty_assertions = { workspace = true }
--- a/codex-rs/cli/src/main.rs
+++ b/codex-rs/cli/src/main.rs
@@ -26,6 +26,8 @@ use supports_color::Stream;
 mod mcp_cmd;

 use crate::mcp_cmd::McpCli;
+use codex_core::config::Config;
+use codex_core::config::ConfigOverrides;

 /// Codex CLI
 ///
@@ -45,6 +47,9 @@ struct MultitoolCli {
    #[clap(flatten)]
    pub config_overrides: CliConfigOverrides,

+    #[clap(flatten)]
+    pub feature_toggles: FeatureToggles,
+
    #[clap(flatten)]
    interactive: TuiCli,

@@ -76,8 +81,9 @@ enum Subcommand {
    /// Generate shell completion scripts.
    Completion(CompletionCommand),

-    /// Internal debugging commands.
-    Debug(DebugArgs),
+    /// Run commands within a Codex-provided sandbox.
+    #[clap(visible_alias = "debug")]
+    Sandbox(SandboxArgs),

    /// Apply the latest diff produced by Codex agent as a `git apply` to your local working tree.
    #[clap(visible_alias = "a")]
@@ -96,6 +102,9 @@ enum Subcommand {
    /// Internal: run the responses API proxy.
    #[clap(hide = true)]
    ResponsesApiProxy(ResponsesApiProxyArgs),
+
+    /// Inspect feature flags.
+    Features(FeaturesCli),
 }

 #[derive(Debug, Parser)]
@@ -121,18 +130,20 @@ struct ResumeCommand {
 }

 #[derive(Debug, Parser)]
-struct DebugArgs {
+struct SandboxArgs {
    #[command(subcommand)]
-    cmd: DebugCommand,
+    cmd: SandboxCommand,
 }

 #[derive(Debug, clap::Subcommand)]
-enum DebugCommand {
+enum SandboxCommand {
    /// Run a command under Seatbelt (macOS only).
-    Seatbelt(SeatbeltCommand),
+    #[clap(visible_alias = "seatbelt")]
+    Macos(SeatbeltCommand),

    /// Run a command under Landlock+seccomp (Linux only).
-    Landlock(LandlockCommand),
+    #[clap(visible_alias = "landlock")]
+    Linux(LandlockCommand),
 }

 #[derive(Debug, Parser)]
@@ -154,9 +165,7 @@ struct LoginCommand {
    )]
    api_key: Option<String>,

-    /// EXPERIMENTAL: Use device code flow (not yet supported)
-    /// This feature is experimental and may changed in future releases.
-    #[arg(long = "experimental_use-device-code", hide = true)]
+    #[arg(long = "device-auth")]
    use_device_code: bool,

    /// EXPERIMENTAL: Use custom OAuth issuer base URL (advanced)
@@ -230,6 +239,53 @@ fn print_exit_messages(exit_info: AppExitInfo) {
    }
 }

+#[derive(Debug, Default, Parser, Clone)]
+struct FeatureToggles {
+    /// Enable a feature (repeatable). Equivalent to `-c features.<name>=true`.
+    #[arg(long = "enable", value_name = "FEATURE", action = clap::ArgAction::Append, global = true)]
+    enable: Vec<String>,
+
+    /// Disable a feature (repeatable). Equivalent to `-c features.<name>=false`.
+    #[arg(long = "disable", value_name = "FEATURE", action = clap::ArgAction::Append, global = true)]
+    disable: Vec<String>,
+}
+
+impl FeatureToggles {
+    fn to_overrides(&self) -> Vec<String> {
+        let mut v = Vec::new();
+        for k in &self.enable {
+            v.push(format!("features.{k}=true"));
+        }
+        for k in &self.disable {
+            v.push(format!("features.{k}=false"));
+        }
+        v
+    }
+}
+
+#[derive(Debug, Parser)]
+struct FeaturesCli {
+    #[command(subcommand)]
+    sub: FeaturesSubcommand,
+}
+
+#[derive(Debug, Parser)]
+enum FeaturesSubcommand {
+    /// List known features with their stage and effective state.
+    List,
+}
+
+fn stage_str(stage: codex_core::features::Stage) -> &'static str {
+    use codex_core::features::Stage;
+    match stage {
+        Stage::Experimental => "experimental",
+        Stage::Beta => "beta",
+        Stage::Stable => "stable",
+        Stage::Deprecated => "deprecated",
+        Stage::Removed => "removed",
+    }
+}
+
 /// As early as possible in the process lifecycle, apply hardening measures. We
 /// skip this in debug builds to avoid interfering with debugging.
 #[ctor::ctor]
@@ -247,11 +303,17 @@ fn main() -> anyhow::Result<()> {

 async fn cli_main(codex_linux_sandbox_exe: Option<PathBuf>) -> anyhow::Result<()> {
    let MultitoolCli {
-        config_overrides: root_config_overrides,
+        config_overrides: mut root_config_overrides,
+        feature_toggles,
        mut interactive,
        subcommand,
    } = MultitoolCli::parse();

+    // Fold --enable/--disable into config overrides so they flow to all subcommands.
+    root_config_overrides
+        .raw_overrides
+        .extend(feature_toggles.to_overrides());
+
    match subcommand {
        None => {
            prepend_config_flags(
@@ -291,7 +353,8 @@ async fn cli_main(codex_linux_sandbox_exe: Option<PathBuf>) -> anyhow::Result<()
                last,
                config_overrides,
            );
-            codex_tui::run_main(interactive, codex_linux_sandbox_exe).await?;
+            let exit_info = codex_tui::run_main(interactive, codex_linux_sandbox_exe).await?;
+            print_exit_messages(exit_info);
        }
        Some(Subcommand::Login(mut login_cli)) => {
            prepend_config_flags(
@@ -341,8 +404,8 @@ async fn cli_main(codex_linux_sandbox_exe: Option<PathBuf>) -> anyhow::Result<()
            );
            codex_cloud_tasks::run_main(cloud_cli, codex_linux_sandbox_exe).await?;
        }
-        Some(Subcommand::Debug(debug_args)) => match debug_args.cmd {
-            DebugCommand::Seatbelt(mut seatbelt_cli) => {
+        Some(Subcommand::Sandbox(sandbox_args)) => match sandbox_args.cmd {
+            SandboxCommand::Macos(mut seatbelt_cli) => {
                prepend_config_flags(
                    &mut seatbelt_cli.config_overrides,
                    root_config_overrides.clone(),
@@ -353,7 +416,7 @@ async fn cli_main(codex_linux_sandbox_exe: Option<PathBuf>) -> anyhow::Result<()
                )
                .await?;
            }
-            DebugCommand::Landlock(mut landlock_cli) => {
+            SandboxCommand::Linux(mut landlock_cli) => {
                prepend_config_flags(
                    &mut landlock_cli.config_overrides,
                    root_config_overrides.clone(),
@@ -379,6 +442,30 @@ async fn cli_main(codex_linux_sandbox_exe: Option<PathBuf>) -> anyhow::Result<()
        Some(Subcommand::GenerateTs(gen_cli)) => {
            codex_protocol_ts::generate_ts(&gen_cli.out_dir, gen_cli.prettier.as_deref())?;
        }
+        Some(Subcommand::Features(FeaturesCli { sub })) => match sub {
+            FeaturesSubcommand::List => {
+                // Respect root-level `-c` overrides plus top-level flags like `--profile`.
+                let cli_kv_overrides = root_config_overrides
+                    .parse_overrides()
+                    .map_err(|e| anyhow::anyhow!(e))?;
+
+                // Thread through relevant top-level flags (at minimum, `--profile`).
+                // Also honor `--search` since it maps to a feature toggle.
+                let overrides = ConfigOverrides {
+                    config_profile: interactive.config_profile.clone(),
+                    tools_web_search_request: interactive.web_search.then_some(true),
+                    ..Default::default()
+                };
+
+                let config = Config::load_with_cli_overrides(cli_kv_overrides, overrides).await?;
+                for def in codex_core::features::FEATURES.iter() {
+                    let name = def.key;
+                    let stage = stage_str(def.stage);
+                    let enabled = config.features.enabled(def.id);
+                    println!("{name}\t{stage}\t{enabled}");
+                }
+            }
+        },
    }

    Ok(())
@@ -472,6 +559,7 @@ fn print_completion(cmd: CompletionCommand) {
 #[cfg(test)]
 mod tests {
    use super::*;
+    use assert_matches::assert_matches;
    use codex_core::protocol::TokenUsage;
    use codex_protocol::ConversationId;

@@ -481,6 +569,7 @@ mod tests {
            interactive,
            config_overrides: root_overrides,
            subcommand,
+            feature_toggles: _,
        } = cli;

        let Subcommand::Resume(ResumeCommand {
@@ -604,14 +693,14 @@ mod tests {
        assert_eq!(interactive.model.as_deref(), Some("gpt-5-test"));
        assert!(interactive.oss);
        assert_eq!(interactive.config_profile.as_deref(), Some("my-profile"));
-        assert!(matches!(
+        assert_matches!(
            interactive.sandbox_mode,
            Some(codex_common::SandboxModeCliArg::WorkspaceWrite)
-        ));
-        assert!(matches!(
+        );
+        assert_matches!(
            interactive.approval_policy,
            Some(codex_common::ApprovalModeCliArg::OnRequest)
-        ));
+        );
        assert!(interactive.full_auto);
        assert_eq!(
            interactive.cwd.as_deref(),
--- a/codex-rs/cli/src/mcp_cmd.rs
+++ b/codex-rs/cli/src/mcp_cmd.rs
@@ -4,6 +4,7 @@ use anyhow::Context;
 use anyhow::Result;
 use anyhow::anyhow;
 use anyhow::bail;
+use clap::ArgGroup;
 use codex_common::CliConfigOverrides;
 use codex_core::config::Config;
 use codex_core::config::ConfigOverrides;
@@ -12,8 +13,12 @@ use codex_core::config::load_global_mcp_servers;
 use codex_core::config::write_global_mcp_servers;
 use codex_core::config_types::McpServerConfig;
 use codex_core::config_types::McpServerTransportConfig;
+use codex_core::features::Feature;
+use codex_core::mcp::auth::compute_auth_statuses;
+use codex_core::protocol::McpAuthStatus;
 use codex_rmcp_client::delete_oauth_tokens;
 use codex_rmcp_client::perform_oauth_login;
+use codex_rmcp_client::supports_oauth_login;

 /// [experimental] Launch Codex as an MCP server or manage configured MCP servers.
 ///
@@ -77,13 +82,61 @@ pub struct AddArgs {
    /// Name for the MCP server configuration.
    pub name: String,

-    /// Environment variables to set when launching the server.
-    #[arg(long, value_parser = parse_env_pair, value_name = "KEY=VALUE")]
-    pub env: Vec<(String, String)>,
+    #[command(flatten)]
+    pub transport_args: AddMcpTransportArgs,
+}

+#[derive(Debug, clap::Args)]
+#[command(
+    group(
+        ArgGroup::new("transport")
+            .args(["command", "url"])
+            .required(true)
+            .multiple(false)
+    )
+)]
+pub struct AddMcpTransportArgs {
+    #[command(flatten)]
+    pub stdio: Option<AddMcpStdioArgs>,
+
+    #[command(flatten)]
+    pub streamable_http: Option<AddMcpStreamableHttpArgs>,
+}
+
+#[derive(Debug, clap::Args)]
+pub struct AddMcpStdioArgs {
    /// Command to launch the MCP server.
-    #[arg(trailing_var_arg = true, num_args = 1..)]
+    /// Use --url for a streamable HTTP server.
+    #[arg(
+            trailing_var_arg = true,
+            num_args = 0..,
+        )]
    pub command: Vec<String>,
+
+    /// Environment variables to set when launching the server.
+    /// Only valid with stdio servers.
+    #[arg(
+        long,
+        value_parser = parse_env_pair,
+        value_name = "KEY=VALUE",
+    )]
+    pub env: Vec<(String, String)>,
+}
+
+#[derive(Debug, clap::Args)]
+pub struct AddMcpStreamableHttpArgs {
+    /// URL for a streamable HTTP MCP server.
+    #[arg(long)]
+    pub url: String,
+
+    /// Optional environment variable to read for a bearer token.
+    /// Only valid with streamable HTTP servers.
+    #[arg(
+        long = "bearer-token-env-var",
+        value_name = "ENV_VAR",
+        requires = "url"
+    )]
+    pub bearer_token_env_var: Option<String>,
 }

 #[derive(Debug, clap::Parser)]
@@ -138,39 +191,61 @@ impl McpCli {

 async fn run_add(config_overrides: &CliConfigOverrides, add_args: AddArgs) -> Result<()> {
    // Validate any provided overrides even though they are not currently applied.
-    config_overrides.parse_overrides().map_err(|e| anyhow!(e))?;
+    let overrides = config_overrides.parse_overrides().map_err(|e| anyhow!(e))?;
+    let config = Config::load_with_cli_overrides(overrides, ConfigOverrides::default())
+        .await
+        .context("failed to load configuration")?;

-    let AddArgs { name, env, command } = add_args;
+    let AddArgs {
+        name,
+        transport_args,
+    } = add_args;

    validate_server_name(&name)?;

-    let mut command_parts = command.into_iter();
-    let command_bin = command_parts
-        .next()
-        .ok_or_else(|| anyhow!("command is required"))?;
-    let command_args: Vec<String> = command_parts.collect();
-
-    let env_map = if env.is_empty() {
-        None
-    } else {
-        let mut map = HashMap::new();
-        for (key, value) in env {
-            map.insert(key, value);
-        }
-        Some(map)
-    };
-
    let codex_home = find_codex_home().context("failed to resolve CODEX_HOME")?;
    let mut servers = load_global_mcp_servers(&codex_home)
        .await
        .with_context(|| format!("failed to load MCP servers from {}", codex_home.display()))?;

-    let new_entry = McpServerConfig {
-        transport: McpServerTransportConfig::Stdio {
-            command: command_bin,
-            args: command_args,
-            env: env_map,
+    let transport = match transport_args {
+        AddMcpTransportArgs {
+            stdio: Some(stdio), ..
+        } => {
+            let mut command_parts = stdio.command.into_iter();
+            let command_bin = command_parts
+                .next()
+                .ok_or_else(|| anyhow!("command is required"))?;
+            let command_args: Vec<String> = command_parts.collect();
+
+            let env_map = if stdio.env.is_empty() {
+                None
+            } else {
+                Some(stdio.env.into_iter().collect::<HashMap<_, _>>())
+            };
+            McpServerTransportConfig::Stdio {
+                command: command_bin,
+                args: command_args,
+                env: env_map,
+            }
+        }
+        AddMcpTransportArgs {
+            streamable_http:
+                Some(AddMcpStreamableHttpArgs {
+                    url,
+                    bearer_token_env_var,
+                }),
+            ..
+        } => McpServerTransportConfig::StreamableHttp {
+            url,
+            bearer_token_env_var,
        },
+        AddMcpTransportArgs { .. } => bail!("exactly one of --command or --url must be provided"),
+    };
+
+    let new_entry = McpServerConfig {
+        transport: transport.clone(),
+        enabled: true,
        startup_timeout_sec: None,
        tool_timeout_sec: None,
    };
@@ -182,6 +257,17 @@ async fn run_add(config_overrides: &CliConfigOverrides, add_args: AddArgs) -> Re

    println!("Added global MCP server '{name}'.");

+    if let McpServerTransportConfig::StreamableHttp {
+        url,
+        bearer_token_env_var: None,
+    } = transport
+        && matches!(supports_oauth_login(&url).await, Ok(true))
+    {
+        println!("Detected OAuth support. Starting OAuth flow…");
+        perform_oauth_login(&name, &url, config.mcp_oauth_credentials_store_mode).await?;
+        println!("Successfully logged in.");
+    }
+
    Ok(())
 }

@@ -219,7 +305,7 @@ async fn run_login(config_overrides: &CliConfigOverrides, login_args: LoginArgs)
        .await
        .context("failed to load configuration")?;

-    if !config.use_experimental_use_rmcp_client {
+    if !config.features.enabled(Feature::RmcpClient) {
        bail!(
            "OAuth login is only supported when experimental_use_rmcp_client is true in config.toml."
        );
@@ -236,7 +322,7 @@ async fn run_login(config_overrides: &CliConfigOverrides, login_args: LoginArgs)
        _ => bail!("OAuth login is only supported for streamable HTTP servers."),
    };

-    perform_oauth_login(&name, &url).await?;
+    perform_oauth_login(&name, &url, config.mcp_oauth_credentials_store_mode).await?;
    println!("Successfully logged in to MCP server '{name}'.");
    Ok(())
 }
@@ -259,7 +345,7 @@ async fn run_logout(config_overrides: &CliConfigOverrides, logout_args: LogoutAr
        _ => bail!("OAuth logout is only supported for streamable_http transports."),
    };

-    match delete_oauth_tokens(&name, &url) {
+    match delete_oauth_tokens(&name, &url, config.mcp_oauth_credentials_store_mode) {
        Ok(true) => println!("Removed OAuth credentials for '{name}'."),
        Ok(false) => println!("No OAuth credentials stored for '{name}'."),
        Err(err) => return Err(anyhow!("failed to delete OAuth credentials: {err}")),
@@ -276,11 +362,20 @@ async fn run_list(config_overrides: &CliConfigOverrides, list_args: ListArgs) ->

    let mut entries: Vec<_> = config.mcp_servers.iter().collect();
    entries.sort_by(|(a, _), (b, _)| a.cmp(b));
+    let auth_statuses = compute_auth_statuses(
+        config.mcp_servers.iter(),
+        config.mcp_oauth_credentials_store_mode,
+    )
+    .await;

    if list_args.json {
        let json_entries: Vec<_> = entries
            .into_iter()
            .map(|(name, cfg)| {
+                let auth_status = auth_statuses
+                    .get(name.as_str())
+                    .copied()
+                    .unwrap_or(McpAuthStatus::Unsupported);
                let transport = match &cfg.transport {
                    McpServerTransportConfig::Stdio { command, args, env } => serde_json::json!({
                        "type": "stdio",
@@ -288,17 +383,21 @@ async fn run_list(config_overrides: &CliConfigOverrides, list_args: ListArgs) ->
                        "args": args,
                        "env": env,
                    }),
-                    McpServerTransportConfig::StreamableHttp { url, bearer_token } => {
+                    McpServerTransportConfig::StreamableHttp {
+                        url,
+                        bearer_token_env_var,
+                    } => {
                        serde_json::json!({
                            "type": "streamable_http",
                            "url": url,
-                            "bearer_token": bearer_token,
+                            "bearer_token_env_var": bearer_token_env_var,
                        })
                    }
                };

                serde_json::json!({
                    "name": name,
+                    "enabled": cfg.enabled,
                    "transport": transport,
                    "startup_timeout_sec": cfg
                        .startup_timeout_sec
@@ -306,6 +405,7 @@ async fn run_list(config_overrides: &CliConfigOverrides, list_args: ListArgs) ->
                    "tool_timeout_sec": cfg
                        .tool_timeout_sec
                        .map(|timeout| timeout.as_secs_f64()),
+                    "auth_status": auth_status,
                })
            })
            .collect();
@@ -319,8 +419,8 @@ async fn run_list(config_overrides: &CliConfigOverrides, list_args: ListArgs) ->
        return Ok(());
    }

-    let mut stdio_rows: Vec<[String; 4]> = Vec::new();
-    let mut http_rows: Vec<[String; 3]> = Vec::new();
+    let mut stdio_rows: Vec<[String; 6]> = Vec::new();
+    let mut http_rows: Vec<[String; 5]> = Vec::new();

    for (name, cfg) in entries {
        match &cfg.transport {
@@ -343,21 +443,59 @@ async fn run_list(config_overrides: &CliConfigOverrides, list_args: ListArgs) ->
                            .join(", ")
                    }
                };
-                stdio_rows.push([name.clone(), command.clone(), args_display, env_display]);
-            }
-            McpServerTransportConfig::StreamableHttp { url, bearer_token } => {
-                let has_bearer = if bearer_token.is_some() {
-                    "True"
+                let status = if cfg.enabled {
+                    "enabled".to_string()
                } else {
-                    "False"
+                    "disabled".to_string()
                };
-                http_rows.push([name.clone(), url.clone(), has_bearer.into()]);
+                let auth_status = auth_statuses
+                    .get(name.as_str())
+                    .copied()
+                    .unwrap_or(McpAuthStatus::Unsupported)
+                    .to_string();
+                stdio_rows.push([
+                    name.clone(),
+                    command.clone(),
+                    args_display,
+                    env_display,
+                    status,
+                    auth_status,
+                ]);
+            }
+            McpServerTransportConfig::StreamableHttp {
+                url,
+                bearer_token_env_var,
+            } => {
+                let status = if cfg.enabled {
+                    "enabled".to_string()
+                } else {
+                    "disabled".to_string()
+                };
+                let auth_status = auth_statuses
+                    .get(name.as_str())
+                    .copied()
+                    .unwrap_or(McpAuthStatus::Unsupported)
+                    .to_string();
+                http_rows.push([
+                    name.clone(),
+                    url.clone(),
+                    bearer_token_env_var.clone().unwrap_or("-".to_string()),
+                    status,
+                    auth_status,
+                ]);
            }
        }
    }

    if !stdio_rows.is_empty() {
-        let mut widths = ["Name".len(), "Command".len(), "Args".len(), "Env".len()];
+        let mut widths = [
+            "Name".len(),
+            "Command".len(),
+            "Args".len(),
+            "Env".len(),
+            "Status".len(),
+            "Auth".len(),
+        ];
        for row in &stdio_rows {
            for (i, cell) in row.iter().enumerate() {
                widths[i] = widths[i].max(cell.len());
@@ -365,28 +503,36 @@ async fn run_list(config_overrides: &CliConfigOverrides, list_args: ListArgs) ->
        }

        println!(
-            "{:<name_w$}  {:<cmd_w$}  {:<args_w$}  {:<env_w$}",
-            "Name",
-            "Command",
-            "Args",
-            "Env",
+            "{name:<name_w$}  {command:<cmd_w$}  {args:<args_w$}  {env:<env_w$}  {status:<status_w$}  {auth:<auth_w$}",
+            name = "Name",
+            command = "Command",
+            args = "Args",
+            env = "Env",
+            status = "Status",
+            auth = "Auth",
            name_w = widths[0],
            cmd_w = widths[1],
            args_w = widths[2],
            env_w = widths[3],
+            status_w = widths[4],
+            auth_w = widths[5],
        );

        for row in &stdio_rows {
            println!(
-                "{:<name_w$}  {:<cmd_w$}  {:<args_w$}  {:<env_w$}",
-                row[0],
-                row[1],
-                row[2],
-                row[3],
+                "{name:<name_w$}  {command:<cmd_w$}  {args:<args_w$}  {env:<env_w$}  {status:<status_w$}  {auth:<auth_w$}",
+                name = row[0].as_str(),
+                command = row[1].as_str(),
+                args = row[2].as_str(),
+                env = row[3].as_str(),
+                status = row[4].as_str(),
+                auth = row[5].as_str(),
                name_w = widths[0],
                cmd_w = widths[1],
                args_w = widths[2],
                env_w = widths[3],
+                status_w = widths[4],
+                auth_w = widths[5],
            );
        }
    }
@@ -396,7 +542,13 @@ async fn run_list(config_overrides: &CliConfigOverrides, list_args: ListArgs) ->
    }

    if !http_rows.is_empty() {
-        let mut widths = ["Name".len(), "Url".len(), "Has Bearer Token".len()];
+        let mut widths = [
+            "Name".len(),
+            "Url".len(),
+            "Bearer Token Env Var".len(),
+            "Status".len(),
+            "Auth".len(),
+        ];
        for row in &http_rows {
            for (i, cell) in row.iter().enumerate() {
                widths[i] = widths[i].max(cell.len());
@@ -404,24 +556,32 @@ async fn run_list(config_overrides: &CliConfigOverrides, list_args: ListArgs) ->
        }

        println!(
-            "{:<name_w$}  {:<url_w$}  {:<token_w$}",
-            "Name",
-            "Url",
-            "Has Bearer Token",
+            "{name:<name_w$}  {url:<url_w$}  {token:<token_w$}  {status:<status_w$}  {auth:<auth_w$}",
+            name = "Name",
+            url = "Url",
+            token = "Bearer Token Env Var",
+            status = "Status",
+            auth = "Auth",
            name_w = widths[0],
            url_w = widths[1],
            token_w = widths[2],
+            status_w = widths[3],
+            auth_w = widths[4],
        );

        for row in &http_rows {
            println!(
-                "{:<name_w$}  {:<url_w$}  {:<token_w$}",
-                row[0],
-                row[1],
-                row[2],
+                "{name:<name_w$}  {url:<url_w$}  {token:<token_w$}  {status:<status_w$}  {auth:<auth_w$}",
+                name = row[0].as_str(),
+                url = row[1].as_str(),
+                token = row[2].as_str(),
+                status = row[3].as_str(),
+                auth = row[4].as_str(),
                name_w = widths[0],
                url_w = widths[1],
                token_w = widths[2],
+                status_w = widths[3],
+                auth_w = widths[4],
            );
        }
    }
@@ -447,14 +607,18 @@ async fn run_get(config_overrides: &CliConfigOverrides, get_args: GetArgs) -> Re
                "args": args,
                "env": env,
            }),
-            McpServerTransportConfig::StreamableHttp { url, bearer_token } => serde_json::json!({
+            McpServerTransportConfig::StreamableHttp {
+                url,
+                bearer_token_env_var,
+            } => serde_json::json!({
                "type": "streamable_http",
                "url": url,
-                "bearer_token": bearer_token,
+                "bearer_token_env_var": bearer_token_env_var,
            }),
        };
        let output = serde_json::to_string_pretty(&serde_json::json!({
            "name": get_args.name,
+            "enabled": server.enabled,
            "transport": transport,
            "startup_timeout_sec": server
                .startup_timeout_sec
@@ -468,6 +632,7 @@ async fn run_get(config_overrides: &CliConfigOverrides, get_args: GetArgs) -> Re
    }

    println!("{}", get_args.name);
+    println!("  enabled: {}", server.enabled);
    match &server.transport {
        McpServerTransportConfig::Stdio { command, args, env } => {
            println!("  transport: stdio");
@@ -493,11 +658,14 @@ async fn run_get(config_overrides: &CliConfigOverrides, get_args: GetArgs) -> Re
            };
            println!("  env: {env_display}");
        }
-        McpServerTransportConfig::StreamableHttp { url, bearer_token } => {
+        McpServerTransportConfig::StreamableHttp {
+            url,
+            bearer_token_env_var,
+        } => {
            println!("  transport: streamable_http");
            println!("  url: {url}");
-            let bearer = bearer_token.as_deref().unwrap_or("-");
-            println!("  bearer_token: {bearer}");
+            let env_var = bearer_token_env_var.as_deref().unwrap_or("-");
+            println!("  bearer_token_env_var: {env_var}");
        }
    }
    if let Some(timeout) = server.startup_timeout_sec {
--- a/codex-rs/cli/tests/mcp_add_remove.rs
+++ b/codex-rs/cli/tests/mcp_add_remove.rs
@@ -35,6 +35,7 @@ async fn add_and_remove_server_updates_global_config() -> Result<()> {
        }
        other => panic!("unexpected transport: {other:?}"),
    }
+    assert!(docs.enabled);

    let mut remove_cmd = codex_command(codex_home.path())?;
    remove_cmd
@@ -90,6 +91,122 @@ async fn add_with_env_preserves_key_order_and_values() -> Result<()> {
    assert_eq!(env.len(), 2);
    assert_eq!(env.get("FOO"), Some(&"bar".to_string()));
    assert_eq!(env.get("ALPHA"), Some(&"beta".to_string()));
+    assert!(envy.enabled);
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn add_streamable_http_without_manual_token() -> Result<()> {
+    let codex_home = TempDir::new()?;
+
+    let mut add_cmd = codex_command(codex_home.path())?;
+    add_cmd
+        .args(["mcp", "add", "github", "--url", "https://example.com/mcp"])
+        .assert()
+        .success();
+
+    let servers = load_global_mcp_servers(codex_home.path()).await?;
+    let github = servers.get("github").expect("github server should exist");
+    match &github.transport {
+        McpServerTransportConfig::StreamableHttp {
+            url,
+            bearer_token_env_var,
+        } => {
+            assert_eq!(url, "https://example.com/mcp");
+            assert!(bearer_token_env_var.is_none());
+        }
+        other => panic!("unexpected transport: {other:?}"),
+    }
+    assert!(github.enabled);
+
+    assert!(!codex_home.path().join(".credentials.json").exists());
+    assert!(!codex_home.path().join(".env").exists());
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn add_streamable_http_with_custom_env_var() -> Result<()> {
+    let codex_home = TempDir::new()?;
+
+    let mut add_cmd = codex_command(codex_home.path())?;
+    add_cmd
+        .args([
+            "mcp",
+            "add",
+            "issues",
+            "--url",
+            "https://example.com/issues",
+            "--bearer-token-env-var",
+            "GITHUB_TOKEN",
+        ])
+        .assert()
+        .success();
+
+    let servers = load_global_mcp_servers(codex_home.path()).await?;
+    let issues = servers.get("issues").expect("issues server should exist");
+    match &issues.transport {
+        McpServerTransportConfig::StreamableHttp {
+            url,
+            bearer_token_env_var,
+        } => {
+            assert_eq!(url, "https://example.com/issues");
+            assert_eq!(bearer_token_env_var.as_deref(), Some("GITHUB_TOKEN"));
+        }
+        other => panic!("unexpected transport: {other:?}"),
+    }
+    assert!(issues.enabled);
+    Ok(())
+}
+
+#[tokio::test]
+async fn add_streamable_http_rejects_removed_flag() -> Result<()> {
+    let codex_home = TempDir::new()?;
+
+    let mut add_cmd = codex_command(codex_home.path())?;
+    add_cmd
+        .args([
+            "mcp",
+            "add",
+            "github",
+            "--url",
+            "https://example.com/mcp",
+            "--with-bearer-token",
+        ])
+        .assert()
+        .failure()
+        .stderr(contains("--with-bearer-token"));
+
+    let servers = load_global_mcp_servers(codex_home.path()).await?;
+    assert!(servers.is_empty());
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn add_cant_add_command_and_url() -> Result<()> {
+    let codex_home = TempDir::new()?;
+
+    let mut add_cmd = codex_command(codex_home.path())?;
+    add_cmd
+        .args([
+            "mcp",
+            "add",
+            "github",
+            "--url",
+            "https://example.com/mcp",
+            "--command",
+            "--",
+            "echo",
+            "hello",
+        ])
+        .assert()
+        .failure()
+        .stderr(contains("unexpected argument '--command' found"));
+
+    let servers = load_global_mcp_servers(codex_home.path()).await?;
+    assert!(servers.is_empty());

    Ok(())
 }
--- a/codex-rs/cli/tests/mcp_list.rs
+++ b/codex-rs/cli/tests/mcp_list.rs
@@ -1,6 +1,7 @@
 use std::path::Path;

 use anyhow::Result;
+use predicates::prelude::PredicateBooleanExt;
 use predicates::str::contains;
 use pretty_assertions::assert_eq;
 use serde_json::Value as JsonValue;
@@ -53,6 +54,10 @@ fn list_and_get_render_expected_output() -> Result<()> {
    assert!(stdout.contains("docs"));
    assert!(stdout.contains("docs-server"));
    assert!(stdout.contains("TOKEN=secret"));
+    assert!(stdout.contains("Status"));
+    assert!(stdout.contains("Auth"));
+    assert!(stdout.contains("enabled"));
+    assert!(stdout.contains("Unsupported"));

    let mut list_json_cmd = codex_command(codex_home.path())?;
    let json_output = list_json_cmd.args(["mcp", "list", "--json"]).output()?;
@@ -64,6 +69,7 @@ fn list_and_get_render_expected_output() -> Result<()> {
        json!([
          {
            "name": "docs",
+            "enabled": true,
            "transport": {
              "type": "stdio",
              "command": "docs-server",
@@ -76,7 +82,8 @@ fn list_and_get_render_expected_output() -> Result<()> {
              }
            },
            "startup_timeout_sec": null,
-            "tool_timeout_sec": null
+            "tool_timeout_sec": null,
+            "auth_status": "unsupported"
          }
        ]
        )
@@ -91,6 +98,7 @@ fn list_and_get_render_expected_output() -> Result<()> {
    assert!(stdout.contains("command: docs-server"));
    assert!(stdout.contains("args: --port 4000"));
    assert!(stdout.contains("env: TOKEN=secret"));
+    assert!(stdout.contains("enabled: true"));
    assert!(stdout.contains("remove: codex mcp remove docs"));

    let mut get_json_cmd = codex_command(codex_home.path())?;
@@ -98,7 +106,7 @@ fn list_and_get_render_expected_output() -> Result<()> {
        .args(["mcp", "get", "docs", "--json"])
        .assert()
        .success()
-        .stdout(contains("\"name\": \"docs\""));
+        .stdout(contains("\"name\": \"docs\"").and(contains("\"enabled\": true")));

    Ok(())
 }
--- a/codex-rs/cloud-tasks/Cargo.toml
+++ b/codex-rs/cloud-tasks/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
+edition = "2024"
 name = "codex-cloud-tasks"
 version = { workspace = true }
-edition = "2024"

 [lib]
 name = "codex_cloud_tasks"
@@ -11,26 +11,28 @@ path = "src/lib.rs"
 workspace = true

 [dependencies]
-anyhow = "1"
-clap = { version = "4", features = ["derive"] }
+anyhow = { workspace = true }
+base64 = { workspace = true }
+chrono = { workspace = true, features = ["serde"] }
+clap = { workspace = true, features = ["derive"] }
+codex-cloud-tasks-client = { path = "../cloud-tasks-client", features = [
+    "mock",
+    "online",
+] }
 codex-common = { path = "../common", features = ["cli"] }
-tokio = { version = "1", features = ["macros", "rt-multi-thread"] }
-tracing = { version = "0.1.41", features = ["log"] }
-tracing-subscriber = { version = "0.3.19", features = ["env-filter"] }
-codex-cloud-tasks-client = { path = "../cloud-tasks-client", features = ["mock", "online"] }
-ratatui = { version = "0.29.0" }
-crossterm = { version = "0.28.1", features = ["event-stream"] }
-tokio-stream = "0.1.17"
-chrono = { version = "0.4", features = ["serde"] }
-codex-login = { path = "../login" }
 codex-core = { path = "../core" }
-throbber-widgets-tui = "0.8.0"
-base64 = "0.22"
-serde_json = "1"
-reqwest = { version = "0.12", features = ["json"] }
-serde = { version = "1", features = ["derive"] }
-unicode-width = "0.1"
+codex-login = { path = "../login" }
 codex-tui = { path = "../tui" }
+crossterm = { workspace = true, features = ["event-stream"] }
+ratatui = { workspace = true }
+reqwest = { workspace = true, features = ["json"] }
+serde = { workspace = true, features = ["derive"] }
+serde_json = { workspace = true }
+tokio = { workspace = true, features = ["macros", "rt-multi-thread"] }
+tokio-stream = { workspace = true }
+tracing = { workspace = true, features = ["log"] }
+tracing-subscriber = { workspace = true, features = ["env-filter"] }
+unicode-width = { workspace = true }

 [dev-dependencies]
-async-trait = "0.1"
+async-trait = { workspace = true }
--- a/codex-rs/cloud-tasks/src/app.rs
+++ b/codex-rs/cloud-tasks/src/app.rs
@@ -1,4 +1,5 @@
 use std::time::Duration;
+use std::time::Instant;

 // Environment filter data models for the TUI
 #[derive(Clone, Debug, Default)]
@@ -42,15 +43,13 @@ use crate::scrollable_diff::ScrollableDiff;
 use codex_cloud_tasks_client::CloudBackend;
 use codex_cloud_tasks_client::TaskId;
 use codex_cloud_tasks_client::TaskSummary;
-use throbber_widgets_tui::ThrobberState;
-
 #[derive(Default)]
 pub struct App {
    pub tasks: Vec<TaskSummary>,
    pub selected: usize,
    pub status: String,
    pub diff_overlay: Option<DiffOverlay>,
-    pub throbber: ThrobberState,
+    pub spinner_start: Option<Instant>,
    pub refresh_inflight: bool,
    pub details_inflight: bool,
    // Environment filter state
@@ -82,7 +81,7 @@ impl App {
            selected: 0,
            status: "Press r to refresh".to_string(),
            diff_overlay: None,
-            throbber: ThrobberState::default(),
+            spinner_start: None,
            refresh_inflight: false,
            details_inflight: false,
            env_filter: None,
--- a/codex-rs/cloud-tasks/src/cli.rs
+++ b/codex-rs/cloud-tasks/src/cli.rs
@@ -1,3 +1,4 @@
+use clap::Args;
 use clap::Parser;
 use codex_common::CliConfigOverrides;

@@ -6,4 +7,43 @@ use codex_common::CliConfigOverrides;
 pub struct Cli {
    #[clap(skip)]
    pub config_overrides: CliConfigOverrides,
+
+    #[command(subcommand)]
+    pub command: Option<Command>,
+}
+
+#[derive(Debug, clap::Subcommand)]
+pub enum Command {
+    /// Submit a new Codex Cloud task without launching the TUI.
+    Exec(ExecCommand),
+}
+
+#[derive(Debug, Args)]
+pub struct ExecCommand {
+    /// Task prompt to run in Codex Cloud.
+    #[arg(value_name = "QUERY")]
+    pub query: Option<String>,
+
+    /// Target environment identifier (see `codex cloud` to browse).
+    #[arg(long = "env", value_name = "ENV_ID")]
+    pub environment: String,
+
+    /// Number of assistant attempts (best-of-N).
+    #[arg(
+        long = "attempts",
+        default_value_t = 1usize,
+        value_parser = parse_attempts
+    )]
+    pub attempts: usize,
+}
+
+fn parse_attempts(input: &str) -> Result<usize, String> {
+    let value: usize = input
+        .parse()
+        .map_err(|_| "attempts must be an integer between 1 and 4".to_string())?;
+    if (1..=4).contains(&value) {
+        Ok(value)
+    } else {
+        Err("attempts must be between 1 and 4".to_string())
+    }
 }
--- a/codex-rs/cloud-tasks/src/lib.rs
+++ b/codex-rs/cloud-tasks/src/lib.rs
@@ -7,7 +7,9 @@ mod ui;
 pub mod util;
 pub use cli::Cli;

+use anyhow::anyhow;
 use std::io::IsTerminal;
+use std::io::Read;
 use std::path::PathBuf;
 use std::sync::Arc;
 use std::time::Duration;
@@ -23,6 +25,175 @@ struct ApplyJob {
    diff_override: Option<String>,
 }

+struct BackendContext {
+    backend: Arc<dyn codex_cloud_tasks_client::CloudBackend>,
+    base_url: String,
+}
+
+async fn init_backend(user_agent_suffix: &str) -> anyhow::Result<BackendContext> {
+    let use_mock = matches!(
+        std::env::var("CODEX_CLOUD_TASKS_MODE").ok().as_deref(),
+        Some("mock") | Some("MOCK")
+    );
+    let base_url = std::env::var("CODEX_CLOUD_TASKS_BASE_URL")
+        .unwrap_or_else(|_| "https://chatgpt.com/backend-api".to_string());
+
+    set_user_agent_suffix(user_agent_suffix);
+
+    if use_mock {
+        return Ok(BackendContext {
+            backend: Arc::new(codex_cloud_tasks_client::MockClient),
+            base_url,
+        });
+    }
+
+    let ua = codex_core::default_client::get_codex_user_agent();
+    let mut http = codex_cloud_tasks_client::HttpClient::new(base_url.clone())?.with_user_agent(ua);
+    let style = if base_url.contains("/backend-api") {
+        "wham"
+    } else {
+        "codex-api"
+    };
+    append_error_log(format!("startup: base_url={base_url} path_style={style}"));
+
+    let auth = match codex_core::config::find_codex_home()
+        .ok()
+        .map(|home| codex_login::AuthManager::new(home, false))
+        .and_then(|am| am.auth())
+    {
+        Some(auth) => auth,
+        None => {
+            eprintln!(
+                "Not signed in. Please run 'codex login' to sign in with ChatGPT, then re-run 'codex cloud'."
+            );
+            std::process::exit(1);
+        }
+    };
+
+    if let Some(acc) = auth.get_account_id() {
+        append_error_log(format!("auth: mode=ChatGPT account_id={acc}"));
+    }
+
+    let token = match auth.get_token().await {
+        Ok(t) if !t.is_empty() => t,
+        _ => {
+            eprintln!(
+                "Not signed in. Please run 'codex login' to sign in with ChatGPT, then re-run 'codex cloud'."
+            );
+            std::process::exit(1);
+        }
+    };
+
+    http = http.with_bearer_token(token.clone());
+    if let Some(acc) = auth
+        .get_account_id()
+        .or_else(|| util::extract_chatgpt_account_id(&token))
+    {
+        append_error_log(format!("auth: set ChatGPT-Account-Id header: {acc}"));
+        http = http.with_chatgpt_account_id(acc);
+    }
+
+    Ok(BackendContext {
+        backend: Arc::new(http),
+        base_url,
+    })
+}
+
+async fn run_exec_command(args: crate::cli::ExecCommand) -> anyhow::Result<()> {
+    let crate::cli::ExecCommand {
+        query,
+        environment,
+        attempts,
+    } = args;
+    let ctx = init_backend("codex_cloud_tasks_exec").await?;
+    let prompt = resolve_query_input(query)?;
+    let env_id = resolve_environment_id(&ctx, &environment).await?;
+    let created = codex_cloud_tasks_client::CloudBackend::create_task(
+        &*ctx.backend,
+        &env_id,
+        &prompt,
+        "main",
+        false,
+        attempts,
+    )
+    .await?;
+    let url = util::task_url(&ctx.base_url, &created.id.0);
+    println!("{url}");
+    Ok(())
+}
+
+async fn resolve_environment_id(ctx: &BackendContext, requested: &str) -> anyhow::Result<String> {
+    let trimmed = requested.trim();
+    if trimmed.is_empty() {
+        return Err(anyhow!("environment id must not be empty"));
+    }
+    let normalized = util::normalize_base_url(&ctx.base_url);
+    let headers = util::build_chatgpt_headers().await;
+    let environments = crate::env_detect::list_environments(&normalized, &headers).await?;
+    if environments.is_empty() {
+        return Err(anyhow!(
+            "no cloud environments are available for this workspace"
+        ));
+    }
+
+    if let Some(row) = environments.iter().find(|row| row.id == trimmed) {
+        return Ok(row.id.clone());
+    }
+
+    let label_matches = environments
+        .iter()
+        .filter(|row| {
+            row.label
+                .as_deref()
+                .map(|label| label.eq_ignore_ascii_case(trimmed))
+                .unwrap_or(false)
+        })
+        .collect::<Vec<_>>();
+    match label_matches.as_slice() {
+        [] => Err(anyhow!(
+            "environment '{trimmed}' not found; run `codex cloud` to list available environments"
+        )),
+        [single] => Ok(single.id.clone()),
+        [first, rest @ ..] => {
+            let first_id = &first.id;
+            if rest.iter().all(|row| row.id == *first_id) {
+                Ok(first_id.clone())
+            } else {
+                Err(anyhow!(
+                    "environment label '{trimmed}' is ambiguous; run `codex cloud` to pick the desired environment id"
+                ))
+            }
+        }
+    }
+}
+
+fn resolve_query_input(query_arg: Option<String>) -> anyhow::Result<String> {
+    match query_arg {
+        Some(q) if q != "-" => Ok(q),
+        maybe_dash => {
+            let force_stdin = matches!(maybe_dash.as_deref(), Some("-"));
+            if std::io::stdin().is_terminal() && !force_stdin {
+                return Err(anyhow!(
+                    "no query provided. Pass one as an argument or pipe it via stdin."
+                ));
+            }
+            if !force_stdin {
+                eprintln!("Reading query from stdin...");
+            }
+            let mut buffer = String::new();
+            std::io::stdin()
+                .read_to_string(&mut buffer)
+                .map_err(|e| anyhow!("failed to read query from stdin: {e}"))?;
+            if buffer.trim().is_empty() {
+                return Err(anyhow!(
+                    "no query provided via stdin (received empty input)."
+                ));
+            }
+            Ok(buffer)
+        }
+    }
+}
+
 fn level_from_status(status: codex_cloud_tasks_client::ApplyStatus) -> app::ApplyResultLevel {
    match status {
        codex_cloud_tasks_client::ApplyStatus::Success => app::ApplyResultLevel::Success,
@@ -148,7 +319,14 @@ fn spawn_apply(
 // (no standalone patch summarizer needed – UI displays raw diffs)

 /// Entry point for the `codex cloud` subcommand.
-pub async fn run_main(_cli: Cli, _codex_linux_sandbox_exe: Option<PathBuf>) -> anyhow::Result<()> {
+pub async fn run_main(cli: Cli, _codex_linux_sandbox_exe: Option<PathBuf>) -> anyhow::Result<()> {
+    if let Some(command) = cli.command {
+        return match command {
+            crate::cli::Command::Exec(args) => run_exec_command(args).await,
+        };
+    }
+    let Cli { .. } = cli;
+
    // Very minimal logging setup; mirrors other crates' pattern.
    let default_level = "error";
    let _ = tracing_subscriber::fmt()
@@ -162,72 +340,8 @@ pub async fn run_main(_cli: Cli, _codex_linux_sandbox_exe: Option<PathBuf>) -> a
        .try_init();

    info!("Launching Cloud Tasks list UI");
-    set_user_agent_suffix("codex_cloud_tasks_tui");
-
-    // Default to online unless explicitly configured to use mock.
-    let use_mock = matches!(
-        std::env::var("CODEX_CLOUD_TASKS_MODE").ok().as_deref(),
-        Some("mock") | Some("MOCK")
-    );
-
-    let backend: Arc<dyn codex_cloud_tasks_client::CloudBackend> = if use_mock {
-        Arc::new(codex_cloud_tasks_client::MockClient)
-    } else {
-        // Build an HTTP client against the configured (or default) base URL.
-        let base_url = std::env::var("CODEX_CLOUD_TASKS_BASE_URL")
-            .unwrap_or_else(|_| "https://chatgpt.com/backend-api".to_string());
-        let ua = codex_core::default_client::get_codex_user_agent();
-        let mut http =
-            codex_cloud_tasks_client::HttpClient::new(base_url.clone())?.with_user_agent(ua);
-        // Log which base URL and path style we're going to use.
-        let style = if base_url.contains("/backend-api") {
-            "wham"
-        } else {
-            "codex-api"
-        };
-        append_error_log(format!("startup: base_url={base_url} path_style={style}"));
-
-        // Require ChatGPT login (SWIC). Exit with a clear message if missing.
-        let _token = match codex_core::config::find_codex_home()
-            .ok()
-            .map(|home| codex_login::AuthManager::new(home, false))
-            .and_then(|am| am.auth())
-        {
-            Some(auth) => {
-                // Log account context for debugging workspace selection.
-                if let Some(acc) = auth.get_account_id() {
-                    append_error_log(format!("auth: mode=ChatGPT account_id={acc}"));
-                }
-                match auth.get_token().await {
-                    Ok(t) if !t.is_empty() => {
-                        // Attach token and ChatGPT-Account-Id header if available
-                        http = http.with_bearer_token(t.clone());
-                        if let Some(acc) = auth
-                            .get_account_id()
-                            .or_else(|| util::extract_chatgpt_account_id(&t))
-                        {
-                            append_error_log(format!("auth: set ChatGPT-Account-Id header: {acc}"));
-                            http = http.with_chatgpt_account_id(acc);
-                        }
-                        t
-                    }
-                    _ => {
-                        eprintln!(
-                            "Not signed in. Please run 'codex login' to sign in with ChatGPT, then re-run 'codex cloud'."
-                        );
-                        std::process::exit(1);
-                    }
-                }
-            }
-            None => {
-                eprintln!(
-                    "Not signed in. Please run 'codex login' to sign in with ChatGPT, then re-run 'codex cloud'."
-                );
-                std::process::exit(1);
-            }
-        };
-        Arc::new(http)
-    };
+    let BackendContext { backend, .. } = init_backend("codex_cloud_tasks_tui").await?;
+    let backend = backend;

    // Terminal setup
    use crossterm::ExecutableCommand;
@@ -400,16 +514,20 @@ pub async fn run_main(_cli: Cli, _codex_linux_sandbox_exe: Option<PathBuf>) -> a
                        let _ = frame_tx.send(Instant::now() + codex_tui::ComposerInput::recommended_flush_delay());
                    }
                }
-                // Advance throbber only while loading.
+                // Keep spinner pulsing only while loading.
                if app.refresh_inflight
                    || app.details_inflight
                    || app.env_loading
                    || app.apply_preflight_inflight
                    || app.apply_inflight
                {
-                    app.throbber.calc_next();
+                    if app.spinner_start.is_none() {
+                        app.spinner_start = Some(Instant::now());
+                    }
                    needs_redraw = true;
-                    let _ = frame_tx.send(Instant::now() + Duration::from_millis(100));
+                    let _ = frame_tx.send(Instant::now() + Duration::from_millis(600));
+                } else {
+                    app.spinner_start = None;
                }
                render_if_needed(&mut terminal, &mut app, &mut needs_redraw)?;
            }
--- a/codex-rs/cloud-tasks/src/ui.rs
+++ b/codex-rs/cloud-tasks/src/ui.rs
@@ -16,6 +16,7 @@ use ratatui::widgets::ListState;
 use ratatui::widgets::Padding;
 use ratatui::widgets::Paragraph;
 use std::sync::OnceLock;
+use std::time::Instant;

 use crate::app::App;
 use crate::app::AttemptView;
@@ -229,7 +230,7 @@ fn draw_list(frame: &mut Frame, area: Rect, app: &mut App) {

    // In-box spinner during initial/refresh loads
    if app.refresh_inflight {
-        draw_centered_spinner(frame, inner, &mut app.throbber, "Loading tasks…");
+        draw_centered_spinner(frame, inner, &mut app.spinner_start, "Loading tasks…");
    }
 }

@@ -291,7 +292,7 @@ fn draw_footer(frame: &mut Frame, area: Rect, app: &mut App) {
        || app.apply_preflight_inflight
        || app.apply_inflight
    {
-        draw_inline_spinner(frame, top[1], &mut app.throbber, "Loading…");
+        draw_inline_spinner(frame, top[1], &mut app.spinner_start, "Loading…");
    } else {
        frame.render_widget(Clear, top[1]);
    }
@@ -449,7 +450,12 @@ fn draw_diff_overlay(frame: &mut Frame, area: Rect, app: &mut App) {
        .map(|o| o.sd.wrapped_lines().is_empty())
        .unwrap_or(true);
    if app.details_inflight && raw_empty {
-        draw_centered_spinner(frame, content_area, &mut app.throbber, "Loading details…");
+        draw_centered_spinner(
+            frame,
+            content_area,
+            &mut app.spinner_start,
+            "Loading details…",
+        );
    } else {
        let scroll = app
            .diff_overlay
@@ -494,11 +500,11 @@ pub fn draw_apply_modal(frame: &mut Frame, area: Rect, app: &mut App) {
        frame.render_widget(header, rows[0]);
        // Body: spinner while preflight/apply runs; otherwise show result message and path lists
        if app.apply_preflight_inflight {
-            draw_centered_spinner(frame, rows[1], &mut app.throbber, "Checking…");
+            draw_centered_spinner(frame, rows[1], &mut app.spinner_start, "Checking…");
        } else if app.apply_inflight {
-            draw_centered_spinner(frame, rows[1], &mut app.throbber, "Applying…");
+            draw_centered_spinner(frame, rows[1], &mut app.spinner_start, "Applying…");
        } else if m.result_message.is_none() {
-            draw_centered_spinner(frame, rows[1], &mut app.throbber, "Loading…");
+            draw_centered_spinner(frame, rows[1], &mut app.spinner_start, "Loading…");
        } else if let Some(msg) = &m.result_message {
            let mut body_lines: Vec<Line> = Vec::new();
            let first = match m.result_level {
@@ -859,29 +865,29 @@ fn format_relative_time(ts: chrono::DateTime<Utc>) -> String {
 fn draw_inline_spinner(
    frame: &mut Frame,
    area: Rect,
-    state: &mut throbber_widgets_tui::ThrobberState,
+    spinner_start: &mut Option<Instant>,
    label: &str,
 ) {
-    use ratatui::style::Style;
-    use throbber_widgets_tui::BRAILLE_EIGHT;
-    use throbber_widgets_tui::Throbber;
-    use throbber_widgets_tui::WhichUse;
-    let w = Throbber::default()
-        .label(label)
-        .style(Style::default().cyan())
-        .throbber_style(Style::default().magenta().bold())
-        .throbber_set(BRAILLE_EIGHT)
-        .use_type(WhichUse::Spin);
-    frame.render_stateful_widget(w, area, state);
+    use ratatui::widgets::Paragraph;
+    let start = spinner_start.get_or_insert_with(Instant::now);
+    let blink_on = (start.elapsed().as_millis() / 600).is_multiple_of(2);
+    let dot = if blink_on {
+        "• ".into()
+    } else {
+        "◦ ".dim()
+    };
+    let label = label.cyan();
+    let line = Line::from(vec![dot, label]);
+    frame.render_widget(Paragraph::new(line), area);
 }

 fn draw_centered_spinner(
    frame: &mut Frame,
    area: Rect,
-    state: &mut throbber_widgets_tui::ThrobberState,
+    spinner_start: &mut Option<Instant>,
    label: &str,
 ) {
-    // Center a 1xN throbber within the given rect
+    // Center a 1xN spinner within the given rect
    let rows = Layout::default()
        .direction(Direction::Vertical)
        .constraints([
@@ -898,7 +904,7 @@ fn draw_centered_spinner(
            Constraint::Percentage(50),
        ])
        .split(rows[1]);
-    draw_inline_spinner(frame, cols[1], state, label);
+    draw_inline_spinner(frame, cols[1], spinner_start, label);
 }

 // Styling helpers for diff rendering live inline where used.
@@ -918,7 +924,12 @@ pub fn draw_env_modal(frame: &mut Frame, area: Rect, app: &mut App) {
    let content = overlay_content(inner);

    if app.env_loading {
-        draw_centered_spinner(frame, content, &mut app.throbber, "Loading environments…");
+        draw_centered_spinner(
+            frame,
+            content,
+            &mut app.spinner_start,
+            "Loading environments…",
+        );
        return;
    }

--- a/codex-rs/cloud-tasks/src/util.rs
+++ b/codex-rs/cloud-tasks/src/util.rs
@@ -91,3 +91,18 @@ pub async fn build_chatgpt_headers() -> HeaderMap {
    }
    headers
 }
+
+/// Construct a browser-friendly task URL for the given backend base URL.
+pub fn task_url(base_url: &str, task_id: &str) -> String {
+    let normalized = normalize_base_url(base_url);
+    if let Some(root) = normalized.strip_suffix("/backend-api") {
+        return format!("{root}/codex/tasks/{task_id}");
+    }
+    if let Some(root) = normalized.strip_suffix("/api/codex") {
+        return format!("{root}/codex/tasks/{task_id}");
+    }
+    if normalized.ends_with("/codex") {
+        return format!("{normalized}/tasks/{task_id}");
+    }
+    format!("{normalized}/codex/tasks/{task_id}")
+}
--- a/codex-rs/core/Cargo.toml
+++ b/codex-rs/core/Cargo.toml
@@ -31,10 +31,9 @@ dirs = { workspace = true }
 dunce = { workspace = true }
 env-flags = { workspace = true }
 eventsource-stream = { workspace = true }
-fd-lock = { workspace = true }
 futures = { workspace = true }
-gethostname = "0.4"
 indexmap = { workspace = true }
+ignore = { workspace = true }
 libc = { workspace = true }
 mcp-types = { workspace = true }
 os_info = { workspace = true }
@@ -42,10 +41,10 @@ portable-pty = { workspace = true }
 rand = { workspace = true }
 regex-lite = { workspace = true }
 reqwest = { workspace = true, features = ["json", "stream"] }
-shellexpand = { workspace = true }
 serde = { workspace = true, features = ["derive"] }
 serde_json = { workspace = true }
 sha1 = { workspace = true }
+sha2 = { workspace = true }
 shlex = { workspace = true }
 similar = { workspace = true }
 strum_macros = { workspace = true }
@@ -64,7 +63,7 @@ tokio = { workspace = true, features = [
    "rt-multi-thread",
    "signal",
 ] }
-tokio-util = { workspace = true }
+tokio-util = { workspace = true, features = ["rt"] }
 toml = { workspace = true }
 toml_edit = { workspace = true }
 tracing = { workspace = true, features = ["log"] }
@@ -92,6 +91,7 @@ openssl-sys = { workspace = true, features = ["vendored"] }

 [dev-dependencies]
 assert_cmd = { workspace = true }
+assert_matches = { workspace = true }
 core_test_support = { workspace = true }
 escargot = { workspace = true }
 maplit = { workspace = true }
--- a/codex-rs/core/README.md
+++ b/codex-rs/core/README.md
@@ -12,7 +12,7 @@ Expects `/usr/bin/sandbox-exec` to be present.

 ### Linux

-Expects the binary containing `codex-core` to run the equivalent of `codex debug landlock` when `arg0` is `codex-linux-sandbox`. See the `codex-arg0` crate for details.
+Expects the binary containing `codex-core` to run the equivalent of `codex sandbox linux` (legacy alias: `codex debug landlock`) when `arg0` is `codex-linux-sandbox`. See the `codex-arg0` crate for details.

 ### All Platforms

--- a/codex-rs/core/gpt_5_codex_prompt.md
+++ b/codex-rs/core/gpt_5_codex_prompt.md
@@ -10,12 +10,14 @@ You are Codex, based on GPT-5. You are running as a coding agent in the Codex CL

 - Default to ASCII when editing or creating files. Only introduce non-ASCII or other Unicode characters when there is a clear justification and the file already uses them.
 - Add succinct code comments that explain what is going on if code is not self-explanatory. You should not add comments like "Assigns the value to the variable", but a brief comment might be useful ahead of a complex code block that the user would otherwise have to spend time parsing out. Usage of these comments should be rare.
+- Try to use apply_patch for single file edits, but it is fine to explore other options to make the edit if it does not work well. Do not use apply_patch for changes that are auto-generated (i.e. generating package.json or running a lint or format command like gofmt) or when scripting is more efficient (such as search and replacing a string across a codebase).
 - You may be in a dirty git worktree.
    * NEVER revert existing changes you did not make unless explicitly requested, since these changes were made by the user.
    * If asked to make a commit or code edits and there are unrelated changes to your work or changes that you didn't make in those files, don't revert those changes.
    * If the changes are in files you've touched recently, you should read carefully and understand how you can work with the changes rather than reverting them.
    * If the changes are in unrelated files, just ignore them and don't revert them.
 - While you are working, you might notice unexpected changes that you didn't make. If this happens, STOP IMMEDIATELY and ask the user how they would like to proceed.
+- **NEVER** use destructive commands like `git reset --hard` or `git checkout --` unless specifically requested or approved by the user.

 ## Plan tool

--- a/codex-rs/core/src/admin_controls.rs
+++ b/codex-rs/core/src/admin_controls.rs
@@ -1,448 +0,0 @@
-use crate::config_types::AdminAuditEventKind;
-use crate::config_types::AdminAuditToml;
-use crate::config_types::AdminConfigToml;
-use crate::exec::ExecParams;
-use crate::exec::SandboxType;
-use crate::path_utils::expand_tilde;
-use crate::protocol::AskForApproval;
-use crate::protocol::SandboxPolicy;
-use chrono::DateTime;
-use chrono::Utc;
-use fd_lock::RwLock;
-use gethostname::gethostname;
-use reqwest::Client;
-use serde::Serialize;
-use std::collections::HashSet;
-use std::fs;
-use std::fs::OpenOptions;
-use std::io::Write;
-use std::io::{self};
-use std::path::Path;
-use std::path::PathBuf;
-use tokio::runtime::Handle;
-use tracing::warn;
-
-#[cfg(unix)]
-use std::os::unix::fs::OpenOptionsExt;
-
-#[derive(Debug, Clone, PartialEq, Default)]
-pub struct AdminControls {
-    pub danger: DangerControls,
-    pub audit: Option<AdminAuditConfig>,
-    pub pending: Vec<PendingAdminAction>,
-}
-
-#[derive(Debug, Clone, PartialEq, Default)]
-pub struct DangerControls {
-    pub disallow_full_access: bool,
-    pub allow_with_reason: bool,
-}
-
-#[derive(Debug, Clone, PartialEq)]
-pub struct AdminAuditConfig {
-    pub log_file: Option<PathBuf>,
-    pub log_endpoint: Option<String>,
-    pub log_events: HashSet<AdminAuditEventKind>,
-}
-
-#[derive(Debug, Clone, PartialEq)]
-pub enum PendingAdminAction {
-    Danger(DangerPending),
-}
-
-#[derive(Debug, Clone, PartialEq)]
-pub struct DangerPending {
-    pub source: DangerRequestSource,
-    pub requested_sandbox: SandboxPolicy,
-    pub requested_approval: AskForApproval,
-}
-
-#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
-#[serde(rename_all = "snake_case")]
-pub enum DangerRequestSource {
-    Startup,
-    Resume,
-    Approvals,
-    ExecCli,
-}
-
-#[derive(Debug, Clone, Copy, PartialEq, Eq)]
-pub enum DangerDecision {
-    Allowed,
-    RequiresJustification,
-    Denied,
-}
-
-#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
-#[serde(rename_all = "snake_case")]
-pub enum DangerAuditAction {
-    Requested,
-    Approved,
-    Cancelled,
-    Denied,
-}
-
-#[derive(Debug, Clone, Serialize)]
-#[serde(tag = "audit_kind", rename_all = "snake_case")]
-pub enum AdminAuditPayload {
-    Danger {
-        action: DangerAuditAction,
-        justification: Option<String>,
-        requested_by: DangerRequestSource,
-        sandbox_policy: SandboxPolicy,
-        approval_policy: AskForApproval,
-    },
-    Command {
-        command: Vec<String>,
-        command_cwd: PathBuf,
-        cli_cwd: PathBuf,
-        sandbox_type: SandboxType,
-        sandbox_policy: SandboxPolicy,
-        escalated: bool,
-        justification: Option<String>,
-    },
-}
-
-#[derive(Debug, Clone, Serialize)]
-pub struct AdminAuditRecord {
-    timestamp: DateTime<Utc>,
-    username: String,
-    hostname: String,
-    #[serde(flatten)]
-    payload: AdminAuditPayload,
-}
-
-impl AdminControls {
-    pub fn from_toml(raw: Option<AdminConfigToml>) -> io::Result<Self> {
-        let raw = raw.unwrap_or_default();
-        let danger = DangerControls {
-            disallow_full_access: raw.disallow_danger_full_access.unwrap_or(false),
-            allow_with_reason: raw.allow_danger_with_reason.unwrap_or(false),
-        };
-        let audit = match raw.audit {
-            Some(audit_raw) => AdminAuditConfig::from_toml(audit_raw)?,
-            None => None,
-        };
-
-        Ok(Self {
-            danger,
-            audit,
-            pending: Vec::new(),
-        })
-    }
-
-    pub fn decision_for_danger(&self) -> DangerDecision {
-        if !self.danger.disallow_full_access {
-            DangerDecision::Allowed
-        } else if self.danger.allow_with_reason {
-            DangerDecision::RequiresJustification
-        } else {
-            DangerDecision::Denied
-        }
-    }
-
-    pub fn has_pending_danger(&self) -> bool {
-        self.pending
-            .iter()
-            .any(|action| matches!(action, PendingAdminAction::Danger(_)))
-    }
-
-    pub fn take_pending_danger(&mut self) -> Option<DangerPending> {
-        self.pending
-            .extract_if(.., |action| matches!(action, PendingAdminAction::Danger(_)))
-            .next()
-            .map(|action| match action {
-                PendingAdminAction::Danger(pending) => pending,
-            })
-    }
-
-    pub fn peek_pending_danger(&self) -> Option<&DangerPending> {
-        self.pending
-            .iter()
-            .map(|action| match action {
-                PendingAdminAction::Danger(pending) => pending,
-            })
-            .next()
-    }
-}
-
-impl AdminAuditConfig {
-    pub fn from_toml(raw: AdminAuditToml) -> io::Result<Option<Self>> {
-        let AdminAuditToml {
-            log_file,
-            log_endpoint,
-            log_events,
-        } = raw;
-
-        let log_file = match log_file {
-            Some(path) => {
-                let trimmed = path.trim();
-                if trimmed.is_empty() {
-                    None
-                } else {
-                    Some(expand_tilde(trimmed)?)
-                }
-            }
-            None => None,
-        };
-
-        let log_endpoint = log_endpoint
-            .map(|endpoint| endpoint.trim().to_string())
-            .filter(|s| !s.is_empty());
-
-        if log_file.is_none() && log_endpoint.is_none() {
-            return Ok(None);
-        }
-
-        let log_events = log_events.into_iter().collect();
-
-        Ok(Some(Self {
-            log_file,
-            log_endpoint,
-            log_events,
-        }))
-    }
-
-    pub fn should_log(&self, kind: AdminAuditEventKind) -> bool {
-        self.log_events.is_empty() || self.log_events.contains(&kind)
-    }
-}
-
-impl AdminAuditPayload {
-    pub fn kind(&self) -> AdminAuditEventKind {
-        match self {
-            AdminAuditPayload::Danger { .. } => AdminAuditEventKind::Danger,
-            AdminAuditPayload::Command { .. } => AdminAuditEventKind::Command,
-        }
-    }
-}
-
-impl AdminAuditRecord {
-    fn new(payload: AdminAuditPayload) -> Self {
-        Self {
-            timestamp: Utc::now(),
-            username: current_username(),
-            hostname: current_hostname(),
-            payload,
-        }
-    }
-}
-
-pub fn log_admin_event(config: &AdminAuditConfig, payload: AdminAuditPayload) {
-    let kind = payload.kind();
-    if !config.should_log(kind) {
-        return;
-    }
-
-    let record = AdminAuditRecord::new(payload);
-
-    if let Some(path) = &config.log_file
-        && let Err(err) = append_record_to_file(path, &record)
-    {
-        warn!(
-            "failed to write admin audit event to {}: {err:?}",
-            path.display()
-        );
-    }
-
-    if let Some(endpoint) = &config.log_endpoint {
-        if Handle::try_current().is_ok() {
-            let endpoint = endpoint.clone();
-            tokio::spawn(async move {
-                if let Err(err) = send_record_to_endpoint(&endpoint, record).await {
-                    warn!("failed to post admin audit event to {endpoint}: {err:?}");
-                }
-            });
-        } else {
-            warn!(
-                "admin audit HTTP logging requested for {endpoint}, but no async runtime is available",
-            );
-        }
-    }
-}
-
-fn append_record_to_file(path: &Path, record: &AdminAuditRecord) -> io::Result<()> {
-    if let Some(parent) = path.parent() {
-        fs::create_dir_all(parent)?;
-    }
-
-    let mut options = OpenOptions::new();
-    options.create(true).append(true).write(true);
-    #[cfg(unix)]
-    {
-        options.mode(0o600);
-    }
-
-    let file = options.open(path)?;
-    let mut lock = RwLock::new(file);
-    let mut guard = lock.write()?;
-    let line = serde_json::to_string(record).map_err(io::Error::other)?;
-    guard.write_all(line.as_bytes())?;
-    guard.write_all(b"\n")?;
-    guard.flush()?;
-    Ok(())
-}
-
-async fn send_record_to_endpoint(
-    endpoint: &str,
-    record: AdminAuditRecord,
-) -> Result<(), reqwest::Error> {
-    Client::new().post(endpoint).json(&record).send().await?;
-    Ok(())
-}
-
-fn current_username() -> String {
-    env_var("USER")
-        .or_else(|| env_var("USERNAME"))
-        .unwrap_or_else(|| "unknown".to_string())
-}
-
-fn current_hostname() -> String {
-    gethostname()
-        .into_string()
-        .ok()
-        .filter(|value| !value.is_empty())
-        .or_else(|| env_var("HOSTNAME"))
-        .or_else(|| env_var("COMPUTERNAME"))
-        .unwrap_or_else(|| "unknown".to_string())
-}
-
-fn env_var(key: &str) -> Option<String> {
-    std::env::var(key).ok().filter(|value| !value.is_empty())
-}
-
-pub fn build_danger_audit_payload(
-    pending: &DangerPending,
-    action: DangerAuditAction,
-    justification: Option<String>,
-) -> AdminAuditPayload {
-    AdminAuditPayload::Danger {
-        action,
-        justification,
-        requested_by: pending.source,
-        sandbox_policy: pending.requested_sandbox.clone(),
-        approval_policy: pending.requested_approval,
-    }
-}
-
-pub fn build_command_audit_payload(
-    params: &ExecParams,
-    sandbox_type: SandboxType,
-    sandbox_policy: &SandboxPolicy,
-    cli_cwd: &Path,
-) -> AdminAuditPayload {
-    AdminAuditPayload::Command {
-        command: params.command.clone(),
-        command_cwd: params.cwd.clone(),
-        cli_cwd: cli_cwd.to_path_buf(),
-        sandbox_type,
-        sandbox_policy: sandbox_policy.clone(),
-        escalated: params.with_escalated_permissions.unwrap_or(false),
-        justification: params.justification.clone(),
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use serde_json::Value;
-    use std::collections::HashMap;
-    use std::path::Path;
-    use std::path::PathBuf;
-
-    #[test]
-    fn danger_payload_serializes_expected_fields() {
-        let pending = DangerPending {
-            source: DangerRequestSource::Approvals,
-            requested_sandbox: SandboxPolicy::DangerFullAccess,
-            requested_approval: AskForApproval::Never,
-        };
-
-        let payload = build_danger_audit_payload(
-            &pending,
-            DangerAuditAction::Requested,
-            Some("reason".to_string()),
-        );
-        let record = AdminAuditRecord::new(payload);
-        let value = serde_json::to_value(record).expect("serialize record");
-
-        assert_eq!(
-            value.get("audit_kind"),
-            Some(&Value::String("danger".to_string()))
-        );
-        assert_eq!(
-            value.get("action"),
-            Some(&Value::String("requested".to_string()))
-        );
-        assert_eq!(
-            value.get("requested_by"),
-            Some(&Value::String("approvals".to_string()))
-        );
-        assert_eq!(
-            value.get("approval_policy"),
-            Some(&Value::String("never".to_string()))
-        );
-        assert_eq!(
-            value.get("sandbox_policy").and_then(|sp| sp.get("mode")),
-            Some(&Value::String("danger-full-access".to_string()))
-        );
-        assert_eq!(
-            value.get("justification"),
-            Some(&Value::String("reason".to_string()))
-        );
-    }
-
-    #[test]
-    fn command_payload_serializes_expected_fields() {
-        let mut env = HashMap::new();
-        env.insert("PATH".to_string(), "/usr/bin".to_string());
-        let params = ExecParams {
-            command: vec!["echo".to_string(), "hello".to_string()],
-            cwd: PathBuf::from("/tmp"),
-            timeout_ms: Some(1000),
-            env,
-            with_escalated_permissions: Some(true),
-            justification: Some("investigation".to_string()),
-        };
-
-        let sandbox_policy = SandboxPolicy::new_workspace_write_policy();
-        let payload = build_command_audit_payload(
-            &params,
-            SandboxType::MacosSeatbelt,
-            &sandbox_policy,
-            Path::new("/workspace"),
-        );
-        let record = AdminAuditRecord::new(payload);
-        let value = serde_json::to_value(record).expect("serialize record");
-
-        assert_eq!(
-            value.get("audit_kind"),
-            Some(&Value::String("command".to_string()))
-        );
-        assert_eq!(
-            value.get("command"),
-            Some(&serde_json::json!(["echo", "hello"]))
-        );
-        assert_eq!(
-            value.get("command_cwd"),
-            Some(&Value::String("/tmp".to_string()))
-        );
-        assert_eq!(
-            value.get("cli_cwd"),
-            Some(&Value::String("/workspace".to_string()))
-        );
-        assert_eq!(
-            value.get("sandbox_type"),
-            Some(&Value::String("macos-seatbelt".to_string()))
-        );
-        assert_eq!(
-            value.get("sandbox_policy").and_then(|sp| sp.get("mode")),
-            Some(&Value::String("workspace-write".to_string()))
-        );
-        assert_eq!(value.get("escalated"), Some(&Value::Bool(true)));
-        assert_eq!(
-            value.get("justification"),
-            Some(&Value::String("investigation".to_string()))
-        );
-    }
-}
--- a/codex-rs/core/src/chat_completions.rs
+++ b/codex-rs/core/src/chat_completions.rs
@@ -389,10 +389,12 @@ async fn process_chat_sse<S>(
    let mut reasoning_text = String::new();

    loop {
-        let sse = match otel_event_manager
-            .log_sse_event(|| timeout(idle_timeout, stream.next()))
-            .await
-        {
+        let start = std::time::Instant::now();
+        let response = timeout(idle_timeout, stream.next()).await;
+        let duration = start.elapsed();
+        otel_event_manager.log_sse_event(&response, duration);
+
+        let sse = match response {
            Ok(Some(Ok(ev))) => ev,
            Ok(Some(Err(e))) => {
                let _ = tx_event
--- a/codex-rs/core/src/client.rs
+++ b/codex-rs/core/src/client.rs
@@ -47,6 +47,7 @@ use crate::openai_tools::create_tools_json_for_responses_api;
 use crate::protocol::RateLimitSnapshot;
 use crate::protocol::RateLimitWindow;
 use crate::protocol::TokenUsage;
+use crate::state::TaskKind;
 use crate::token_data::PlanType;
 use crate::util::backoff;
 use codex_otel::otel_event_manager::OtelEventManager;
@@ -63,7 +64,6 @@ struct ErrorResponse {
 #[derive(Debug, Deserialize)]
 struct Error {
    r#type: Option<String>,
-    #[allow(dead_code)]
    code: Option<String>,
    message: Option<String>,

@@ -124,8 +124,16 @@ impl ModelClient {
    /// the provider config.  Public callers always invoke `stream()` – the
    /// specialised helpers are private to avoid accidental misuse.
    pub async fn stream(&self, prompt: &Prompt) -> Result<ResponseStream> {
+        self.stream_with_task_kind(prompt, TaskKind::Regular).await
+    }
+
+    pub(crate) async fn stream_with_task_kind(
+        &self,
+        prompt: &Prompt,
+        task_kind: TaskKind,
+    ) -> Result<ResponseStream> {
        match self.provider.wire_api {
-            WireApi::Responses => self.stream_responses(prompt).await,
+            WireApi::Responses => self.stream_responses(prompt, task_kind).await,
            WireApi::Chat => {
                // Create the raw streaming connection first.
                let response_stream = stream_chat_completions(
@@ -166,7 +174,11 @@ impl ModelClient {
    }

    /// Implementation for the OpenAI *Responses* experimental API.
-    async fn stream_responses(&self, prompt: &Prompt) -> Result<ResponseStream> {
+    async fn stream_responses(
+        &self,
+        prompt: &Prompt,
+        task_kind: TaskKind,
+    ) -> Result<ResponseStream> {
        if let Some(path) = &*CODEX_RS_SSE_FIXTURE {
            // short circuit for tests
            warn!(path, "Streaming from fixture");
@@ -228,7 +240,7 @@ impl ModelClient {
            input: &input_with_instructions,
            tools: &tools_json,
            tool_choice: "auto",
-            parallel_tool_calls: false,
+            parallel_tool_calls: prompt.parallel_tool_calls,
            reasoning,
            store: azure_workaround,
            stream: true,
@@ -245,7 +257,7 @@ impl ModelClient {
        let max_attempts = self.provider.request_max_retries();
        for attempt in 0..=max_attempts {
            match self
-                .attempt_stream_responses(attempt, &payload_json, &auth_manager)
+                .attempt_stream_responses(attempt, &payload_json, &auth_manager, task_kind)
                .await
            {
                Ok(stream) => {
@@ -273,6 +285,7 @@ impl ModelClient {
        attempt: u64,
        payload_json: &Value,
        auth_manager: &Option<Arc<AuthManager>>,
+        task_kind: TaskKind,
    ) -> std::result::Result<ResponseStream, StreamAttemptError> {
        // Always fetch the latest auth in case a prior attempt refreshed the token.
        let auth = auth_manager.as_ref().and_then(|m| m.auth());
@@ -295,6 +308,7 @@ impl ModelClient {
            .header("conversation_id", self.conversation_id.to_string())
            .header("session_id", self.conversation_id.to_string())
            .header(reqwest::header::ACCEPT, "text/event-stream")
+            .header("Codex-Task-Type", task_kind.header_value())
            .json(payload_json);

        if let Some(auth) = auth.as_ref()
@@ -650,10 +664,12 @@ async fn process_sse<S>(
    let mut response_error: Option<CodexErr> = None;

    loop {
-        let sse = match otel_event_manager
-            .log_sse_event(|| timeout(idle_timeout, stream.next()))
-            .await
-        {
+        let start = std::time::Instant::now();
+        let response = timeout(idle_timeout, stream.next()).await;
+        let duration = start.elapsed();
+        otel_event_manager.log_sse_event(&response, duration);
+
+        let sse = match response {
            Ok(Some(Ok(sse))) => sse,
            Ok(Some(Err(e))) => {
                debug!("SSE Error: {e:#}");
@@ -794,9 +810,13 @@ async fn process_sse<S>(
                    if let Some(error) = error {
                        match serde_json::from_value::<Error>(error.clone()) {
                            Ok(error) => {
-                                let delay = try_parse_retry_after(&error);
-                                let message = error.message.unwrap_or_default();
-                                response_error = Some(CodexErr::Stream(message, delay));
+                                if is_context_window_error(&error) {
+                                    response_error = Some(CodexErr::ContextWindowExceeded);
+                                } else {
+                                    let delay = try_parse_retry_after(&error);
+                                    let message = error.message.clone().unwrap_or_default();
+                                    response_error = Some(CodexErr::Stream(message, delay));
+                                }
                            }
                            Err(e) => {
                                let error = format!("failed to parse ErrorResponse: {e}");
@@ -922,9 +942,14 @@ fn try_parse_retry_after(err: &Error) -> Option<Duration> {
    None
 }

+fn is_context_window_error(error: &Error) -> bool {
+    error.code.as_deref() == Some("context_length_exceeded")
+}
+
 #[cfg(test)]
 mod tests {
    use super::*;
+    use assert_matches::assert_matches;
    use serde_json::json;
    use tokio::sync::mpsc;
    use tokio_test::io::Builder as IoBuilder;
@@ -1179,6 +1204,74 @@ mod tests {
        }
    }

+    #[tokio::test]
+    async fn context_window_error_is_fatal() {
+        let raw_error = r#"{"type":"response.failed","sequence_number":3,"response":{"id":"resp_5c66275b97b9baef1ed95550adb3b7ec13b17aafd1d2f11b","object":"response","created_at":1759510079,"status":"failed","background":false,"error":{"code":"context_length_exceeded","message":"Your input exceeds the context window of this model. Please adjust your input and try again."},"usage":null,"user":null,"metadata":{}}}"#;
+
+        let sse1 = format!("event: response.failed\ndata: {raw_error}\n\n");
+        let provider = ModelProviderInfo {
+            name: "test".to_string(),
+            base_url: Some("https://test.com".to_string()),
+            env_key: Some("TEST_API_KEY".to_string()),
+            env_key_instructions: None,
+            wire_api: WireApi::Responses,
+            query_params: None,
+            http_headers: None,
+            env_http_headers: None,
+            request_max_retries: Some(0),
+            stream_max_retries: Some(0),
+            stream_idle_timeout_ms: Some(1000),
+            requires_openai_auth: false,
+        };
+
+        let otel_event_manager = otel_event_manager();
+
+        let events = collect_events(&[sse1.as_bytes()], provider, otel_event_manager).await;
+
+        assert_eq!(events.len(), 1);
+
+        match &events[0] {
+            Err(err @ CodexErr::ContextWindowExceeded) => {
+                assert_eq!(err.to_string(), CodexErr::ContextWindowExceeded.to_string());
+            }
+            other => panic!("unexpected context window event: {other:?}"),
+        }
+    }
+
+    #[tokio::test]
+    async fn context_window_error_with_newline_is_fatal() {
+        let raw_error = r#"{"type":"response.failed","sequence_number":4,"response":{"id":"resp_fatal_newline","object":"response","created_at":1759510080,"status":"failed","background":false,"error":{"code":"context_length_exceeded","message":"Your input exceeds the context window of this model. Please adjust your input and try\nagain."},"usage":null,"user":null,"metadata":{}}}"#;
+
+        let sse1 = format!("event: response.failed\ndata: {raw_error}\n\n");
+        let provider = ModelProviderInfo {
+            name: "test".to_string(),
+            base_url: Some("https://test.com".to_string()),
+            env_key: Some("TEST_API_KEY".to_string()),
+            env_key_instructions: None,
+            wire_api: WireApi::Responses,
+            query_params: None,
+            http_headers: None,
+            env_http_headers: None,
+            request_max_retries: Some(0),
+            stream_max_retries: Some(0),
+            stream_idle_timeout_ms: Some(1000),
+            requires_openai_auth: false,
+        };
+
+        let otel_event_manager = otel_event_manager();
+
+        let events = collect_events(&[sse1.as_bytes()], provider, otel_event_manager).await;
+
+        assert_eq!(events.len(), 1);
+
+        match &events[0] {
+            Err(err @ CodexErr::ContextWindowExceeded) => {
+                assert_eq!(err.to_string(), CodexErr::ContextWindowExceeded.to_string());
+            }
+            other => panic!("unexpected context window event: {other:?}"),
+        }
+    }
+
    // ────────────────────────────
    // Table-driven test from `main`
    // ────────────────────────────
@@ -1316,10 +1409,7 @@ mod tests {
        let resp: ErrorResponse =
            serde_json::from_str(json).expect("should deserialize old schema");

-        assert!(matches!(
-            resp.error.plan_type,
-            Some(PlanType::Known(KnownPlan::Pro))
-        ));
+        assert_matches!(resp.error.plan_type, Some(PlanType::Known(KnownPlan::Pro)));

        let plan_json = serde_json::to_string(&resp.error.plan_type).expect("serialize plan_type");
        assert_eq!(plan_json, "\"pro\"");
@@ -1334,7 +1424,7 @@ mod tests {
        let resp: ErrorResponse =
            serde_json::from_str(json).expect("should deserialize old schema");

-        assert!(matches!(resp.error.plan_type, Some(PlanType::Unknown(ref s)) if s == "vip"));
+        assert_matches!(resp.error.plan_type, Some(PlanType::Unknown(ref s)) if s == "vip");

        let plan_json = serde_json::to_string(&resp.error.plan_type).expect("serialize plan_type");
        assert_eq!(plan_json, "\"vip\"");
--- a/codex-rs/core/src/client_common.rs
+++ b/codex-rs/core/src/client_common.rs
@@ -9,9 +9,11 @@ use codex_protocol::config_types::ReasoningSummary as ReasoningSummaryConfig;
 use codex_protocol::config_types::Verbosity as VerbosityConfig;
 use codex_protocol::models::ResponseItem;
 use futures::Stream;
+use serde::Deserialize;
 use serde::Serialize;
 use serde_json::Value;
 use std::borrow::Cow;
+use std::collections::HashSet;
 use std::ops::Deref;
 use std::pin::Pin;
 use std::task::Context;
@@ -31,6 +33,9 @@ pub struct Prompt {
    /// external MCP servers.
    pub(crate) tools: Vec<ToolSpec>,

+    /// Whether parallel tool calls are permitted for this prompt.
+    pub(crate) parallel_tool_calls: bool,
+
    /// Optional override for the built-in BASE_INSTRUCTIONS.
    pub base_instructions_override: Option<String>,

@@ -64,10 +69,125 @@ impl Prompt {
    }

    pub(crate) fn get_formatted_input(&self) -> Vec<ResponseItem> {
-        self.input.clone()
+        let mut input = self.input.clone();
+
+        // when using the *Freeform* apply_patch tool specifically, tool outputs
+        // should be structured text, not json. Do NOT reserialize when using
+        // the Function tool - note that this differs from the check above for
+        // instructions. We declare the result as a named variable for clarity.
+        let is_freeform_apply_patch_tool_present = self.tools.iter().any(|tool| match tool {
+            ToolSpec::Freeform(f) => f.name == "apply_patch",
+            _ => false,
+        });
+        if is_freeform_apply_patch_tool_present {
+            reserialize_shell_outputs(&mut input);
+        }
+
+        input
    }
 }

+fn reserialize_shell_outputs(items: &mut [ResponseItem]) {
+    let mut shell_call_ids: HashSet<String> = HashSet::new();
+
+    items.iter_mut().for_each(|item| match item {
+        ResponseItem::LocalShellCall { call_id, id, .. } => {
+            if let Some(identifier) = call_id.clone().or_else(|| id.clone()) {
+                shell_call_ids.insert(identifier);
+            }
+        }
+        ResponseItem::CustomToolCall {
+            id: _,
+            status: _,
+            call_id,
+            name,
+            input: _,
+        } => {
+            if name == "apply_patch" {
+                shell_call_ids.insert(call_id.clone());
+            }
+        }
+        ResponseItem::CustomToolCallOutput { call_id, output } => {
+            if shell_call_ids.remove(call_id)
+                && let Some(structured) = parse_structured_shell_output(output)
+            {
+                *output = structured
+            }
+        }
+        ResponseItem::FunctionCall { name, call_id, .. }
+            if is_shell_tool_name(name) || name == "apply_patch" =>
+        {
+            shell_call_ids.insert(call_id.clone());
+        }
+        ResponseItem::FunctionCallOutput { call_id, output } => {
+            if shell_call_ids.remove(call_id)
+                && let Some(structured) = parse_structured_shell_output(&output.content)
+            {
+                output.content = structured
+            }
+        }
+        _ => {}
+    })
+}
+
+fn is_shell_tool_name(name: &str) -> bool {
+    matches!(name, "shell" | "container.exec")
+}
+
+#[derive(Deserialize)]
+struct ExecOutputJson {
+    output: String,
+    metadata: ExecOutputMetadataJson,
+}
+
+#[derive(Deserialize)]
+struct ExecOutputMetadataJson {
+    exit_code: i32,
+    duration_seconds: f32,
+}
+
+fn parse_structured_shell_output(raw: &str) -> Option<String> {
+    let parsed: ExecOutputJson = serde_json::from_str(raw).ok()?;
+    Some(build_structured_output(&parsed))
+}
+
+fn build_structured_output(parsed: &ExecOutputJson) -> String {
+    let mut sections = Vec::new();
+    sections.push(format!("Exit code: {}", parsed.metadata.exit_code));
+    sections.push(format!(
+        "Wall time: {} seconds",
+        parsed.metadata.duration_seconds
+    ));
+
+    let mut output = parsed.output.clone();
+    if let Some(total_lines) = extract_total_output_lines(&parsed.output) {
+        sections.push(format!("Total output lines: {total_lines}"));
+        if let Some(stripped) = strip_total_output_header(&output) {
+            output = stripped.to_string();
+        }
+    }
+
+    sections.push("Output:".to_string());
+    sections.push(output);
+
+    sections.join("\n")
+}
+
+fn extract_total_output_lines(output: &str) -> Option<u32> {
+    let marker_start = output.find("[... omitted ")?;
+    let marker = &output[marker_start..];
+    let (_, after_of) = marker.split_once(" of ")?;
+    let (total_segment, _) = after_of.split_once(' ')?;
+    total_segment.parse::<u32>().ok()
+}
+
+fn strip_total_output_header(output: &str) -> Option<&str> {
+    let after_prefix = output.strip_prefix("Total output lines: ")?;
+    let (_, remainder) = after_prefix.split_once('\n')?;
+    let remainder = remainder.strip_prefix('\n').unwrap_or(remainder);
+    Some(remainder)
+}
+
 #[derive(Debug)]
 pub enum ResponseEvent {
    Created,
@@ -182,6 +302,17 @@ pub(crate) mod tools {
        Freeform(FreeformTool),
    }

+    impl ToolSpec {
+        pub(crate) fn name(&self) -> &str {
+            match self {
+                ToolSpec::Function(tool) => tool.name.as_str(),
+                ToolSpec::LocalShell {} => "local_shell",
+                ToolSpec::WebSearch {} => "web_search",
+                ToolSpec::Freeform(tool) => tool.name.as_str(),
+            }
+        }
+    }
+
    #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
    pub struct FreeformTool {
        pub(crate) name: String,
@@ -327,7 +458,7 @@ mod tests {
            input: &input,
            tools: &tools,
            tool_choice: "auto",
-            parallel_tool_calls: false,
+            parallel_tool_calls: true,
            reasoning: None,
            store: false,
            stream: true,
@@ -368,7 +499,7 @@ mod tests {
            input: &input,
            tools: &tools,
            tool_choice: "auto",
-            parallel_tool_calls: false,
+            parallel_tool_calls: true,
            reasoning: None,
            store: false,
            stream: true,
@@ -404,7 +535,7 @@ mod tests {
            input: &input,
            tools: &tools,
            tool_choice: "auto",
-            parallel_tool_calls: false,
+            parallel_tool_calls: true,
            reasoning: None,
            store: false,
            stream: true,
--- a/codex-rs/core/src/codebase_change_notice.rs
+++ b/codex-rs/core/src/codebase_change_notice.rs
@@ -0,0 +1,168 @@
+use std::fmt::Write;
+
+use codex_protocol::models::ContentItem;
+use codex_protocol::models::ResponseItem;
+
+use crate::codebase_snapshot::SnapshotDiff;
+
+pub(crate) const CODEBASE_CHANGE_NOTICE_MAX_PATHS: usize = 40;
+
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub(crate) struct CodebaseChangeNotice {
+    added: Vec<String>,
+    removed: Vec<String>,
+    modified: Vec<String>,
+    truncated: bool,
+}
+
+impl CodebaseChangeNotice {
+    pub(crate) fn new(diff: SnapshotDiff, limit: usize) -> Self {
+        let mut remaining = limit;
+        let mut truncated = false;
+
+        let added = take_paths(diff.added, &mut remaining, &mut truncated);
+        let removed = take_paths(diff.removed, &mut remaining, &mut truncated);
+        let modified = take_paths(diff.modified, &mut remaining, &mut truncated);
+
+        Self {
+            added,
+            removed,
+            modified,
+            truncated,
+        }
+    }
+
+    pub(crate) fn is_empty(&self) -> bool {
+        self.added.is_empty() && self.removed.is_empty() && self.modified.is_empty()
+    }
+
+    pub(crate) fn serialize_to_xml(&self) -> String {
+        let mut output = String::new();
+        if self.truncated {
+            let _ = writeln!(output, "<codebase_changes truncated=\"true\">");
+        } else {
+            let _ = writeln!(output, "<codebase_changes>");
+        }
+
+        let mut summary_parts = Vec::new();
+        if !self.added.is_empty() {
+            summary_parts.push(format!("added {}", self.added.len()));
+        }
+        if !self.removed.is_empty() {
+            summary_parts.push(format!("removed {}", self.removed.len()));
+        }
+        if !self.modified.is_empty() {
+            summary_parts.push(format!("modified {}", self.modified.len()));
+        }
+
+        if summary_parts.is_empty() {
+            let _ = writeln!(output, "  <summary>no changes</summary>");
+        } else {
+            let summary = summary_parts.join(", ");
+            let _ = writeln!(output, "  <summary>{summary}</summary>");
+        }
+
+        serialize_section(&mut output, "added", &self.added);
+        serialize_section(&mut output, "removed", &self.removed);
+        serialize_section(&mut output, "modified", &self.modified);
+        if self.truncated {
+            let _ = writeln!(output, "  <note>additional paths omitted</note>");
+        }
+
+        let _ = writeln!(output, "</codebase_changes>");
+        output
+    }
+}
+
+fn take_paths(mut paths: Vec<String>, remaining: &mut usize, truncated: &mut bool) -> Vec<String> {
+    if *remaining == 0 {
+        if !paths.is_empty() {
+            *truncated = true;
+        }
+        return Vec::new();
+    }
+
+    if paths.len() > *remaining {
+        paths.truncate(*remaining);
+        *truncated = true;
+    }
+
+    *remaining -= paths.len();
+    paths
+}
+
+fn serialize_section(output: &mut String, tag: &str, paths: &[String]) {
+    if paths.is_empty() {
+        return;
+    }
+
+    let _ = writeln!(output, "  <{tag}>");
+    for path in paths {
+        let _ = writeln!(output, "    <path>{}</path>", escape_xml(path));
+    }
+    let _ = writeln!(output, "  </{tag}>");
+}
+
+fn escape_xml(value: &str) -> String {
+    let mut escaped = String::with_capacity(value.len());
+    for ch in value.chars() {
+        match ch {
+            '&' => escaped.push_str("&amp;"),
+            '<' => escaped.push_str("&lt;"),
+            '>' => escaped.push_str("&gt;"),
+            '"' => escaped.push_str("&quot;"),
+            '\'' => escaped.push_str("&apos;"),
+            other => escaped.push(other),
+        }
+    }
+    escaped
+}
+
+impl From<CodebaseChangeNotice> for ResponseItem {
+    fn from(notice: CodebaseChangeNotice) -> Self {
+        ResponseItem::Message {
+            id: None,
+            role: "user".to_string(),
+            content: vec![ContentItem::InputText {
+                text: notice.serialize_to_xml(),
+            }],
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use pretty_assertions::assert_eq;
+
+    #[test]
+    fn constructs_notice_with_limit() {
+        let diff = SnapshotDiff {
+            added: vec!["a.rs".to_string(), "b.rs".to_string()],
+            removed: vec!["c.rs".to_string()],
+            modified: vec!["d.rs".to_string(), "e.rs".to_string()],
+        };
+
+        let notice = CodebaseChangeNotice::new(diff, 3);
+        assert!(notice.truncated);
+        assert_eq!(
+            notice.added.len() + notice.removed.len() + notice.modified.len(),
+            3
+        );
+    }
+
+    #[test]
+    fn serializes_notice() {
+        let diff = SnapshotDiff {
+            added: vec!["src/lib.rs".to_string()],
+            removed: Vec::new(),
+            modified: vec!["src/main.rs".to_string()],
+        };
+        let notice = CodebaseChangeNotice::new(diff, CODEBASE_CHANGE_NOTICE_MAX_PATHS);
+        let xml = notice.serialize_to_xml();
+        assert!(xml.contains("<added>"));
+        assert!(xml.contains("<modified>"));
+        assert!(xml.contains("src/lib.rs"));
+        assert!(xml.contains("src/main.rs"));
+    }
+}
--- a/codex-rs/core/src/codebase_snapshot.rs
+++ b/codex-rs/core/src/codebase_snapshot.rs
@@ -0,0 +1,278 @@
+use std::borrow::Cow;
+use std::collections::BTreeMap;
+use std::fs::File;
+use std::io::Read;
+use std::path::Path;
+use std::path::PathBuf;
+use std::time::SystemTime;
+
+use anyhow::Context;
+use anyhow::Result;
+use ignore::WalkBuilder;
+use sha2::Digest;
+use sha2::Sha256;
+use tokio::task;
+use tracing::warn;
+
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub(crate) struct CodebaseSnapshot {
+    root: PathBuf,
+    entries: BTreeMap<String, EntryFingerprint>,
+    root_digest: DigestBytes,
+}
+
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub(crate) struct EntryFingerprint {
+    pub kind: EntryKind,
+    pub digest: DigestBytes,
+    pub size: u64,
+    pub modified_millis: Option<u128>,
+}
+
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+#[repr(u8)]
+pub(crate) enum EntryKind {
+    File,
+    Symlink,
+}
+
+#[derive(Clone, Debug, PartialEq, Eq, Default)]
+pub(crate) struct SnapshotDiff {
+    pub added: Vec<String>,
+    pub removed: Vec<String>,
+    pub modified: Vec<String>,
+}
+
+impl SnapshotDiff {
+    pub fn is_empty(&self) -> bool {
+        self.added.is_empty() && self.removed.is_empty() && self.modified.is_empty()
+    }
+}
+
+pub(crate) type DigestBytes = [u8; 32];
+
+impl CodebaseSnapshot {
+    pub(crate) async fn capture(root: PathBuf) -> Result<Self> {
+        task::spawn_blocking(move || Self::from_disk(&root))
+            .await
+            .map_err(|e| anyhow::anyhow!("codebase snapshot task failed: {e}"))?
+    }
+
+    pub(crate) fn from_disk(root: &Path) -> Result<Self> {
+        if !root.exists() {
+            return Ok(Self::empty(root));
+        }
+
+        let mut entries: BTreeMap<String, EntryFingerprint> = BTreeMap::new();
+
+        let mut walker = WalkBuilder::new(root);
+        walker
+            .hidden(false)
+            .git_ignore(true)
+            .git_exclude(true)
+            .parents(true)
+            .ignore(true)
+            .follow_links(false);
+
+        for result in walker.build() {
+            let entry = match result {
+                Ok(entry) => entry,
+                Err(err) => {
+                    warn!("codebase snapshot failed to read entry: {err}");
+                    continue;
+                }
+            };
+
+            let path = entry.path();
+            if entry.depth() == 0 {
+                continue;
+            }
+
+            let relative = match path.strip_prefix(root) {
+                Ok(rel) => rel,
+                Err(_) => continue,
+            };
+            if relative.as_os_str().is_empty() {
+                continue;
+            }
+            let rel_string = normalize_rel_path(relative);
+
+            let file_type = match entry.file_type() {
+                Some(file_type) => file_type,
+                None => continue,
+            };
+
+            if file_type.is_dir() {
+                continue;
+            }
+
+            if file_type.is_file() {
+                match fingerprint_file(path) {
+                    Ok(fp) => {
+                        entries.insert(rel_string, fp);
+                    }
+                    Err(err) => {
+                        warn!(
+                            "codebase snapshot failed to hash file {}: {err}",
+                            path.display()
+                        );
+                    }
+                }
+                continue;
+            }
+
+            if file_type.is_symlink() {
+                match fingerprint_symlink(path) {
+                    Ok(fp) => {
+                        entries.insert(rel_string, fp);
+                    }
+                    Err(err) => {
+                        warn!(
+                            "codebase snapshot failed to hash symlink {}: {err}",
+                            path.display()
+                        );
+                    }
+                }
+                continue;
+            }
+        }
+
+        let root_digest = compute_root_digest(&entries);
+
+        Ok(Self {
+            root: root.to_path_buf(),
+            entries,
+            root_digest,
+        })
+    }
+
+    pub(crate) fn diff(&self, newer: &CodebaseSnapshot) -> SnapshotDiff {
+        let mut diff = SnapshotDiff::default();
+
+        for (path, fingerprint) in &newer.entries {
+            match self.entries.get(path) {
+                None => diff.added.push(path.clone()),
+                Some(existing) if existing != fingerprint => diff.modified.push(path.clone()),
+                _ => {}
+            }
+        }
+
+        for path in self.entries.keys() {
+            if !newer.entries.contains_key(path) {
+                diff.removed.push(path.clone());
+            }
+        }
+
+        diff
+    }
+
+    pub(crate) fn root(&self) -> &Path {
+        &self.root
+    }
+
+    fn empty(root: &Path) -> Self {
+        Self {
+            root: root.to_path_buf(),
+            entries: BTreeMap::new(),
+            root_digest: Sha256::digest(b"").into(),
+        }
+    }
+}
+
+fn fingerprint_file(path: &Path) -> Result<EntryFingerprint> {
+    let metadata = path
+        .metadata()
+        .with_context(|| format!("metadata {}", path.display()))?;
+    let mut file = File::open(path).with_context(|| format!("open {}", path.display()))?;
+
+    let mut hasher = Sha256::new();
+    let mut buf = [0u8; 64 * 1024];
+    loop {
+        let read = file.read(&mut buf)?;
+        if read == 0 {
+            break;
+        }
+        hasher.update(&buf[..read]);
+    }
+
+    Ok(EntryFingerprint {
+        kind: EntryKind::File,
+        digest: hasher.finalize().into(),
+        size: metadata.len(),
+        modified_millis: metadata.modified().ok().and_then(system_time_to_millis),
+    })
+}
+
+fn fingerprint_symlink(path: &Path) -> Result<EntryFingerprint> {
+    let target =
+        std::fs::read_link(path).with_context(|| format!("read_link {}", path.display()))?;
+    let mut hasher = Sha256::new();
+    let target_str = normalize_rel_path(&target);
+    hasher.update(target_str.as_bytes());
+    Ok(EntryFingerprint {
+        kind: EntryKind::Symlink,
+        digest: hasher.finalize().into(),
+        size: 0,
+        modified_millis: None,
+    })
+}
+
+fn compute_root_digest(entries: &BTreeMap<String, EntryFingerprint>) -> DigestBytes {
+    let mut hasher = Sha256::new();
+    for (path, fingerprint) in entries {
+        hasher.update(path.as_bytes());
+        hasher.update(fingerprint.digest);
+        hasher.update([fingerprint.kind as u8]);
+        hasher.update(fingerprint.size.to_le_bytes());
+        if let Some(modified) = fingerprint.modified_millis {
+            hasher.update(modified.to_le_bytes());
+        }
+    }
+    hasher.finalize().into()
+}
+
+fn normalize_rel_path(path: &Path) -> String {
+    let s = path_to_cow(path);
+    if s.is_empty() {
+        String::new()
+    } else {
+        s.replace('\\', "/")
+    }
+}
+
+fn path_to_cow(path: &Path) -> Cow<'_, str> {
+    path.to_string_lossy()
+}
+
+fn system_time_to_millis(ts: SystemTime) -> Option<u128> {
+    ts.duration_since(SystemTime::UNIX_EPOCH)
+        .map(|duration| duration.as_millis())
+        .ok()
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use pretty_assertions::assert_eq;
+    use tempfile::tempdir;
+
+    #[test]
+    fn diff_tracks_added_modified_removed() {
+        let dir = tempdir().unwrap();
+        let root = dir.path();
+
+        std::fs::write(root.join("file_a.txt"), "alpha").unwrap();
+        std::fs::write(root.join("file_b.txt"), "bravo").unwrap();
+        let snapshot_one = CodebaseSnapshot::from_disk(root).unwrap();
+
+        std::fs::write(root.join("file_a.txt"), "alpha-updated").unwrap();
+        std::fs::remove_file(root.join("file_b.txt")).unwrap();
+        std::fs::write(root.join("file_c.txt"), "charlie").unwrap();
+        let snapshot_two = CodebaseSnapshot::from_disk(root).unwrap();
+
+        let diff = snapshot_one.diff(&snapshot_two);
+        assert_eq!(diff.added, vec!["file_c.txt".to_string()]);
+        assert_eq!(diff.modified, vec!["file_a.txt".to_string()]);
+        assert_eq!(diff.removed, vec!["file_b.txt".to_string()]);
+    }
+}
--- a/codex-rs/core/src/codex.rs
+++ b/codex-rs/core/src/codex.rs
@@ -1,5 +1,6 @@
 use std::borrow::Cow;
 use std::fmt::Debug;
+use std::path::Path;
 use std::path::PathBuf;
 use std::sync::Arc;
 use std::sync::atomic::AtomicU64;
@@ -17,13 +18,16 @@ use codex_apply_patch::ApplyPatchAction;
 use codex_protocol::ConversationId;
 use codex_protocol::protocol::ConversationPathResponseEvent;
 use codex_protocol::protocol::ExitedReviewModeEvent;
+use codex_protocol::protocol::McpAuthStatus;
 use codex_protocol::protocol::ReviewRequest;
 use codex_protocol::protocol::RolloutItem;
 use codex_protocol::protocol::SessionSource;
 use codex_protocol::protocol::TaskStartedEvent;
 use codex_protocol::protocol::TurnAbortReason;
 use codex_protocol::protocol::TurnContextItem;
+use futures::future::BoxFuture;
 use futures::prelude::*;
+use futures::stream::FuturesOrdered;
 use mcp_types::CallToolResult;
 use serde_json;
 use serde_json::Value;
@@ -40,6 +44,9 @@ use crate::apply_patch::convert_apply_patch_to_protocol;
 use crate::client::ModelClient;
 use crate::client_common::Prompt;
 use crate::client_common::ResponseEvent;
+use crate::codebase_change_notice::CODEBASE_CHANGE_NOTICE_MAX_PATHS;
+use crate::codebase_change_notice::CodebaseChangeNotice;
+use crate::codebase_snapshot::CodebaseSnapshot;
 use crate::config::Config;
 use crate::config_types::ShellEnvironmentPolicy;
 use crate::conversation_history::ConversationHistory;
@@ -55,6 +62,7 @@ use crate::exec_command::WriteStdinParams;
 use crate::executor::Executor;
 use crate::executor::ExecutorConfig;
 use crate::executor::normalize_exec_result;
+use crate::mcp::auth::compute_auth_statuses;
 use crate::mcp_connection_manager::McpConnectionManager;
 use crate::model_family::find_family_for_model;
 use crate::openai_model_info::get_model_info;
@@ -96,11 +104,14 @@ use crate::rollout::RolloutRecorderParams;
 use crate::shell;
 use crate::state::ActiveTurn;
 use crate::state::SessionServices;
+use crate::state::TaskKind;
 use crate::tasks::CompactTask;
 use crate::tasks::RegularTask;
 use crate::tasks::ReviewTask;
 use crate::tools::ToolRouter;
+use crate::tools::context::SharedTurnDiffTracker;
 use crate::tools::format_exec_output_str;
+use crate::tools::parallel::ToolCallRuntime;
 use crate::turn_diff_tracker::TurnDiffTracker;
 use crate::unified_exec::UnifiedExecSessionManager;
 use crate::user_instructions::UserInstructions;
@@ -359,14 +370,32 @@ impl Session {

        let mcp_fut = McpConnectionManager::new(
            config.mcp_servers.clone(),
-            config.use_experimental_use_rmcp_client,
+            config
+                .features
+                .enabled(crate::features::Feature::RmcpClient),
+            config.mcp_oauth_credentials_store_mode,
        );
        let default_shell_fut = shell::default_user_shell();
        let history_meta_fut = crate::message_history::history_metadata(&config);
+        let auth_statuses_fut = compute_auth_statuses(
+            config.mcp_servers.iter(),
+            config.mcp_oauth_credentials_store_mode,
+        );

        // Join all independent futures.
-        let (rollout_recorder, mcp_res, default_shell, (history_log_id, history_entry_count)) =
-            tokio::join!(rollout_fut, mcp_fut, default_shell_fut, history_meta_fut);
+        let (
+            rollout_recorder,
+            mcp_res,
+            default_shell,
+            (history_log_id, history_entry_count),
+            auth_statuses,
+        ) = tokio::join!(
+            rollout_fut,
+            mcp_fut,
+            default_shell_fut,
+            history_meta_fut,
+            auth_statuses_fut
+        );

        let rollout_recorder = rollout_recorder.map_err(|e| {
            error!("failed to initialize rollout recorder: {e:#}");
@@ -393,11 +422,24 @@ impl Session {
        // Surface individual client start-up failures to the user.
        if !failed_clients.is_empty() {
            for (server_name, err) in failed_clients {
-                let message = format!("MCP client for `{server_name}` failed to start: {err:#}");
-                error!("{message}");
+                let log_message =
+                    format!("MCP client for `{server_name}` failed to start: {err:#}");
+                error!("{log_message}");
+                let display_message = if matches!(
+                    auth_statuses.get(&server_name),
+                    Some(McpAuthStatus::NotLoggedIn)
+                ) {
+                    format!(
+                        "The {server_name} MCP server is not logged in. Run `codex mcp login {server_name}` to log in."
+                    )
+                } else {
+                    log_message
+                };
                post_session_configured_error_events.push(Event {
                    id: INITIAL_SUBMIT_ID.to_owned(),
-                    msg: EventMsg::Error(ErrorEvent { message }),
+                    msg: EventMsg::Error(ErrorEvent {
+                        message: display_message,
+                    }),
                });
            }
        }
@@ -440,12 +482,7 @@ impl Session {
            client,
            tools_config: ToolsConfig::new(&ToolsConfigParams {
                model_family: &config.model_family,
-                include_plan_tool: config.include_plan_tool,
-                include_apply_patch_tool: config.include_apply_patch_tool,
-                include_web_search_request: config.tools_web_search_request,
-                use_streamable_shell_tool: config.use_experimental_streamable_shell_tool,
-                include_view_image_tool: config.include_view_image_tool,
-                experimental_unified_exec_tool: config.use_experimental_unified_exec_tool,
+                features: &config.features,
            }),
            user_instructions,
            base_instructions,
@@ -468,7 +505,6 @@ impl Session {
                turn_context.sandbox_policy.clone(),
                turn_context.cwd.clone(),
                config.codex_linux_sandbox_exe.clone(),
-                config.admin.audit.clone(),
            )),
        };

@@ -714,6 +750,73 @@ impl Session {
        self.persist_rollout_items(&rollout_items).await;
    }

+    async fn stored_snapshot_for_root(&self, root: &Path) -> Option<CodebaseSnapshot> {
+        let state = self.state.lock().await;
+        state
+            .codebase_snapshot
+            .as_ref()
+            .filter(|snapshot| snapshot.root() == root)
+            .cloned()
+    }
+
+    async fn set_codebase_snapshot(&self, snapshot: CodebaseSnapshot) {
+        let mut state = self.state.lock().await;
+        state.codebase_snapshot = Some(snapshot);
+    }
+
+    pub(crate) async fn emit_codebase_delta_if_changed(
+        &self,
+        turn_context: &TurnContext,
+        sub_id: &str,
+    ) -> anyhow::Result<()> {
+        let cwd = turn_context.cwd.clone();
+        let previous = self.stored_snapshot_for_root(&cwd).await;
+        let latest = CodebaseSnapshot::capture(cwd.clone()).await?;
+
+        if let Some(previous_snapshot) = previous {
+            let diff = previous_snapshot.diff(&latest);
+            if diff.is_empty() {
+                self.set_codebase_snapshot(latest).await;
+                return Ok(());
+            }
+
+            let notice = CodebaseChangeNotice::new(diff, CODEBASE_CHANGE_NOTICE_MAX_PATHS);
+            if notice.is_empty() {
+                self.set_codebase_snapshot(latest).await;
+                return Ok(());
+            }
+
+            let response_item: ResponseItem = notice.into();
+            self.record_conversation_items(std::slice::from_ref(&response_item))
+                .await;
+
+            for msg in
+                map_response_item_to_event_messages(&response_item, self.show_raw_agent_reasoning())
+            {
+                let event = Event {
+                    id: sub_id.to_string(),
+                    msg,
+                };
+                self.send_event(event).await;
+            }
+
+            self.set_codebase_snapshot(latest).await;
+            return Ok(());
+        }
+
+        self.set_codebase_snapshot(latest).await;
+        Ok(())
+    }
+
+    pub(crate) async fn refresh_codebase_snapshot(
+        &self,
+        turn_context: &TurnContext,
+    ) -> anyhow::Result<()> {
+        let snapshot = CodebaseSnapshot::capture(turn_context.cwd.clone()).await?;
+        self.set_codebase_snapshot(snapshot).await;
+        Ok(())
+    }
+
    pub(crate) fn build_initial_context(&self, turn_context: &TurnContext) -> Vec<ResponseItem> {
        let mut items = Vec::<ResponseItem>::with_capacity(2);
        if let Some(user_instructions) = turn_context.user_instructions.as_deref() {
@@ -783,6 +886,17 @@ impl Session {
        self.send_event(event).await;
    }

+    async fn set_total_tokens_full(&self, sub_id: &str, turn_context: &TurnContext) {
+        let context_window = turn_context.client.get_model_context_window();
+        if let Some(context_window) = context_window {
+            {
+                let mut state = self.state.lock().await;
+                state.set_token_usage_full(context_window);
+            }
+            self.send_token_count_event(sub_id).await;
+        }
+    }
+
    /// Record a user input item to conversation history and also persist a
    /// corresponding UserMessage EventMsg to rollout.
    async fn record_input_and_rollout_usermsg(&self, response_input: &ResponseInputItem) {
@@ -808,7 +922,7 @@ impl Session {

    async fn on_exec_command_begin(
        &self,
-        turn_diff_tracker: &mut TurnDiffTracker,
+        turn_diff_tracker: SharedTurnDiffTracker,
        exec_command_context: ExecCommandContext,
    ) {
        let ExecCommandContext {
@@ -824,7 +938,10 @@ impl Session {
                user_explicitly_approved_this_action,
                changes,
            }) => {
-                turn_diff_tracker.on_patch_begin(&changes);
+                {
+                    let mut tracker = turn_diff_tracker.lock().await;
+                    tracker.on_patch_begin(&changes);
+                }

                EventMsg::PatchApplyBegin(PatchApplyBeginEvent {
                    call_id,
@@ -851,7 +968,7 @@ impl Session {

    async fn on_exec_command_end(
        &self,
-        turn_diff_tracker: &mut TurnDiffTracker,
+        turn_diff_tracker: SharedTurnDiffTracker,
        sub_id: &str,
        call_id: &str,
        output: &ExecToolCallOutput,
@@ -899,7 +1016,10 @@ impl Session {
        // If this is an apply_patch, after we emit the end patch, emit a second event
        // with the full turn diff if there is one.
        if is_apply_patch {
-            let unified_diff = turn_diff_tracker.get_unified_diff();
+            let unified_diff = {
+                let mut tracker = turn_diff_tracker.lock().await;
+                tracker.get_unified_diff()
+            };
            if let Ok(Some(unified_diff)) = unified_diff {
                let msg = EventMsg::TurnDiff(TurnDiffEvent { unified_diff });
                let event = Event {
@@ -916,7 +1036,7 @@ impl Session {
    /// Returns the output of the exec tool call.
    pub(crate) async fn run_exec_with_events(
        &self,
-        turn_diff_tracker: &mut TurnDiffTracker,
+        turn_diff_tracker: SharedTurnDiffTracker,
        prepared: PreparedExec,
        approval_policy: AskForApproval,
    ) -> Result<ExecToolCallOutput, ExecError> {
@@ -925,7 +1045,7 @@ impl Session {
        let sub_id = context.sub_id.clone();
        let call_id = context.call_id.clone();

-        self.on_exec_command_begin(turn_diff_tracker, context.clone())
+        self.on_exec_command_begin(turn_diff_tracker.clone(), context.clone())
            .await;

        let result = self
@@ -1173,12 +1293,7 @@ async fn submission_loop(

                let tools_config = ToolsConfig::new(&ToolsConfigParams {
                    model_family: &effective_family,
-                    include_plan_tool: config.include_plan_tool,
-                    include_apply_patch_tool: config.include_apply_patch_tool,
-                    include_web_search_request: config.tools_web_search_request,
-                    use_streamable_shell_tool: config.use_experimental_streamable_shell_tool,
-                    include_view_image_tool: config.include_view_image_tool,
-                    experimental_unified_exec_tool: config.use_experimental_unified_exec_tool,
+                    features: &config.features,
                });

                let new_turn_context = TurnContext {
@@ -1275,14 +1390,7 @@ async fn submission_loop(
                        client,
                        tools_config: ToolsConfig::new(&ToolsConfigParams {
                            model_family: &model_family,
-                            include_plan_tool: config.include_plan_tool,
-                            include_apply_patch_tool: config.include_apply_patch_tool,
-                            include_web_search_request: config.tools_web_search_request,
-                            use_streamable_shell_tool: config
-                                .use_experimental_streamable_shell_tool,
-                            include_view_image_tool: config.include_view_image_tool,
-                            experimental_unified_exec_tool: config
-                                .use_experimental_unified_exec_tool,
+                            features: &config.features,
                        }),
                        user_instructions: turn_context.user_instructions.clone(),
                        base_instructions: turn_context.base_instructions.clone(),
@@ -1382,10 +1490,18 @@ async fn submission_loop(

                // This is a cheap lookup from the connection manager's cache.
                let tools = sess.services.mcp_connection_manager.list_all_tools();
+                let auth_statuses = compute_auth_statuses(
+                    config.mcp_servers.iter(),
+                    config.mcp_oauth_credentials_store_mode,
+                )
+                .await;
                let event = Event {
                    id: sub_id,
                    msg: EventMsg::McpListToolsResponse(
-                        crate::protocol::McpListToolsResponseEvent { tools },
+                        crate::protocol::McpListToolsResponseEvent {
+                            tools,
+                            auth_statuses,
+                        },
                    ),
                };
                sess.send_event(event).await;
@@ -1506,14 +1622,15 @@ async fn spawn_review_thread(
    let model = config.review_model.clone();
    let review_model_family = find_family_for_model(&model)
        .unwrap_or_else(|| parent_turn_context.client.get_model_family());
+    // For reviews, disable plan, web_search, view_image regardless of global settings.
+    let mut review_features = config.features.clone();
+    review_features.disable(crate::features::Feature::PlanTool);
+    review_features.disable(crate::features::Feature::WebSearchRequest);
+    review_features.disable(crate::features::Feature::ViewImageTool);
+    review_features.disable(crate::features::Feature::StreamableShell);
    let tools_config = ToolsConfig::new(&ToolsConfigParams {
        model_family: &review_model_family,
-        include_plan_tool: false,
-        include_apply_patch_tool: config.include_apply_patch_tool,
-        include_web_search_request: false,
-        use_streamable_shell_tool: false,
-        include_view_image_tool: false,
-        experimental_unified_exec_tool: config.use_experimental_unified_exec_tool,
+        features: &review_features,
    });

    let base_instructions = REVIEW_PROMPT.to_string();
@@ -1566,7 +1683,7 @@ async fn spawn_review_thread(

    // Seed the child task with the review prompt as the initial user message.
    let input: Vec<InputItem> = vec![InputItem::Text {
-        text: format!("{base_instructions}\n\n---\n\nNow, here's your task: {review_prompt}"),
+        text: review_prompt,
    }];
    let tc = Arc::new(review_turn_context);

@@ -1604,6 +1721,7 @@ pub(crate) async fn run_task(
    turn_context: Arc<TurnContext>,
    sub_id: String,
    input: Vec<InputItem>,
+    task_kind: TaskKind,
 ) -> Option<String> {
    if input.is_empty() {
        return None;
@@ -1631,10 +1749,18 @@ pub(crate) async fn run_task(
            .await;
    }

+    if !is_review_mode
+        && let Err(err) = sess
+            .emit_codebase_delta_if_changed(turn_context.as_ref(), &sub_id)
+            .await
+    {
+        warn!(error = ?err, "failed to compute codebase changes");
+    }
+
    let mut last_agent_message: Option<String> = None;
    // Although from the perspective of codex.rs, TurnDiffTracker has the lifecycle of a Task which contains
    // many turns, from the perspective of the user, it is a single turn.
-    let mut turn_diff_tracker = TurnDiffTracker::new();
+    let turn_diff_tracker = Arc::new(tokio::sync::Mutex::new(TurnDiffTracker::new()));
    let mut auto_compact_recently_attempted = false;

    loop {
@@ -1682,11 +1808,12 @@ pub(crate) async fn run_task(
            })
            .collect();
        match run_turn(
-            &sess,
-            turn_context.as_ref(),
-            &mut turn_diff_tracker,
+            Arc::clone(&sess),
+            Arc::clone(&turn_context),
+            Arc::clone(&turn_diff_tracker),
            sub_id.clone(),
            turn_input,
+            task_kind,
        )
        .await
        {
@@ -1839,6 +1966,7 @@ pub(crate) async fn run_task(
                    );
                    sess.notifier()
                        .notify(&UserNotification::AgentTurnComplete {
+                            thread_id: sess.conversation_id.to_string(),
                            turn_id: sub_id.clone(),
                            input_messages: turn_input_messages,
                            last_assistant_message: last_agent_message.clone(),
@@ -1862,6 +1990,11 @@ pub(crate) async fn run_task(
        }
    }

+    if !is_review_mode && let Err(err) = sess.refresh_codebase_snapshot(turn_context.as_ref()).await
+    {
+        warn!(error = ?err, "failed to refresh codebase snapshot");
+    }
+
    // If this was a review thread and we have a final assistant message,
    // try to parse it as a ReviewOutput.
    //
@@ -1907,18 +2040,28 @@ fn parse_review_output_event(text: &str) -> ReviewOutputEvent {
 }

 async fn run_turn(
-    sess: &Session,
-    turn_context: &TurnContext,
-    turn_diff_tracker: &mut TurnDiffTracker,
+    sess: Arc<Session>,
+    turn_context: Arc<TurnContext>,
+    turn_diff_tracker: SharedTurnDiffTracker,
    sub_id: String,
    input: Vec<ResponseItem>,
+    task_kind: TaskKind,
 ) -> CodexResult<TurnRunResult> {
    let mcp_tools = sess.services.mcp_connection_manager.list_all_tools();
-    let router = ToolRouter::from_config(&turn_context.tools_config, Some(mcp_tools));
+    let router = Arc::new(ToolRouter::from_config(
+        &turn_context.tools_config,
+        Some(mcp_tools),
+    ));

+    let model_supports_parallel = turn_context
+        .client
+        .get_model_family()
+        .supports_parallel_tool_calls;
+    let parallel_tool_calls = model_supports_parallel;
    let prompt = Prompt {
        input,
-        tools: router.specs().to_vec(),
+        tools: router.specs(),
+        parallel_tool_calls,
        base_instructions_override: turn_context.base_instructions.clone(),
        output_schema: turn_context.final_output_json_schema.clone(),
    };
@@ -1926,12 +2069,13 @@ async fn run_turn(
    let mut retries = 0;
    loop {
        match try_run_turn(
-            &router,
-            sess,
-            turn_context,
-            turn_diff_tracker,
+            Arc::clone(&router),
+            Arc::clone(&sess),
+            Arc::clone(&turn_context),
+            Arc::clone(&turn_diff_tracker),
            &sub_id,
            &prompt,
+            task_kind,
        )
        .await
        {
@@ -1939,6 +2083,10 @@ async fn run_turn(
            Err(CodexErr::Interrupted) => return Err(CodexErr::Interrupted),
            Err(CodexErr::EnvVar(var)) => return Err(CodexErr::EnvVar(var)),
            Err(e @ CodexErr::Fatal(_)) => return Err(e),
+            Err(e @ CodexErr::ContextWindowExceeded) => {
+                sess.set_total_tokens_full(&sub_id, &turn_context).await;
+                return Err(e);
+            }
            Err(CodexErr::UsageLimitReached(e)) => {
                let rate_limits = e.rate_limits.clone();
                if let Some(rate_limits) = rate_limits {
@@ -1965,9 +2113,7 @@ async fn run_turn(
                    // at a seemingly frozen screen.
                    sess.notify_stream_error(
                        &sub_id,
-                        format!(
-                            "stream error: {e}; retrying {retries}/{max_retries} in {delay:?}…"
-                        ),
+                        format!("Re-connecting... {retries}/{max_retries}"),
                    )
                    .await;

@@ -1985,9 +2131,9 @@ async fn run_turn(
 /// "handled" such that it produces a `ResponseInputItem` that needs to be
 /// sent back to the model on the next turn.
 #[derive(Debug)]
-struct ProcessedResponseItem {
-    item: ResponseItem,
-    response: Option<ResponseInputItem>,
+pub(crate) struct ProcessedResponseItem {
+    pub(crate) item: ResponseItem,
+    pub(crate) response: Option<ResponseInputItem>,
 }

 #[derive(Debug)]
@@ -1997,12 +2143,13 @@ struct TurnRunResult {
 }

 async fn try_run_turn(
-    router: &crate::tools::ToolRouter,
-    sess: &Session,
-    turn_context: &TurnContext,
-    turn_diff_tracker: &mut TurnDiffTracker,
+    router: Arc<ToolRouter>,
+    sess: Arc<Session>,
+    turn_context: Arc<TurnContext>,
+    turn_diff_tracker: SharedTurnDiffTracker,
    sub_id: &str,
    prompt: &Prompt,
+    task_kind: TaskKind,
 ) -> CodexResult<TurnRunResult> {
    // call_ids that are part of this response.
    let completed_call_ids = prompt
@@ -2068,46 +2215,108 @@ async fn try_run_turn(
        summary: turn_context.client.get_reasoning_summary(),
    });
    sess.persist_rollout_items(&[rollout_item]).await;
-    let mut stream = turn_context.client.clone().stream(&prompt).await?;
+    let mut stream = turn_context
+        .client
+        .clone()
+        .stream_with_task_kind(prompt.as_ref(), task_kind)
+        .await?;

-    let mut output = Vec::new();
+    let tool_runtime = ToolCallRuntime::new(
+        Arc::clone(&router),
+        Arc::clone(&sess),
+        Arc::clone(&turn_context),
+        Arc::clone(&turn_diff_tracker),
+        sub_id.to_string(),
+    );
+    let mut output: FuturesOrdered<BoxFuture<CodexResult<ProcessedResponseItem>>> =
+        FuturesOrdered::new();

    loop {
        // Poll the next item from the model stream. We must inspect *both* Ok and Err
        // cases so that transient stream failures (e.g., dropped SSE connection before
        // `response.completed`) bubble up and trigger the caller's retry logic.
        let event = stream.next().await;
-        let Some(event) = event else {
-            // Channel closed without yielding a final Completed event or explicit error.
-            // Treat as a disconnected stream so the caller can retry.
-            return Err(CodexErr::Stream(
-                "stream closed before response.completed".into(),
-                None,
-            ));
+        let event = match event {
+            Some(res) => res?,
+            None => {
+                return Err(CodexErr::Stream(
+                    "stream closed before response.completed".into(),
+                    None,
+                ));
+            }
        };

-        let event = match event {
-            Ok(ev) => ev,
-            Err(e) => {
-                // Propagate the underlying stream error to the caller (run_turn), which
-                // will apply the configured `stream_max_retries` policy.
-                return Err(e);
-            }
+        let add_completed = &mut |response_item: ProcessedResponseItem| {
+            output.push_back(future::ready(Ok(response_item)).boxed());
        };

        match event {
            ResponseEvent::Created => {}
            ResponseEvent::OutputItemDone(item) => {
-                let response = handle_response_item(
-                    router,
-                    sess,
-                    turn_context,
-                    turn_diff_tracker,
-                    sub_id,
-                    item.clone(),
-                )
-                .await?;
-                output.push(ProcessedResponseItem { item, response });
+                match ToolRouter::build_tool_call(sess.as_ref(), item.clone()) {
+                    Ok(Some(call)) => {
+                        let payload_preview = call.payload.log_payload().into_owned();
+                        tracing::info!("ToolCall: {} {}", call.tool_name, payload_preview);
+
+                        let response = tool_runtime.handle_tool_call(call);
+
+                        output.push_back(
+                            async move {
+                                Ok(ProcessedResponseItem {
+                                    item,
+                                    response: Some(response.await?),
+                                })
+                            }
+                            .boxed(),
+                        );
+                    }
+                    Ok(None) => {
+                        let response = handle_non_tool_response_item(
+                            Arc::clone(&sess),
+                            Arc::clone(&turn_context),
+                            sub_id,
+                            item.clone(),
+                        )
+                        .await?;
+                        add_completed(ProcessedResponseItem { item, response });
+                    }
+                    Err(FunctionCallError::MissingLocalShellCallId) => {
+                        let msg = "LocalShellCall without call_id or id";
+                        turn_context
+                            .client
+                            .get_otel_event_manager()
+                            .log_tool_failed("local_shell", msg);
+                        error!(msg);
+
+                        let response = ResponseInputItem::FunctionCallOutput {
+                            call_id: String::new(),
+                            output: FunctionCallOutputPayload {
+                                content: msg.to_string(),
+                                success: None,
+                            },
+                        };
+                        add_completed(ProcessedResponseItem {
+                            item,
+                            response: Some(response),
+                        });
+                    }
+                    Err(FunctionCallError::RespondToModel(message)) => {
+                        let response = ResponseInputItem::FunctionCallOutput {
+                            call_id: String::new(),
+                            output: FunctionCallOutputPayload {
+                                content: message,
+                                success: None,
+                            },
+                        };
+                        add_completed(ProcessedResponseItem {
+                            item,
+                            response: Some(response),
+                        });
+                    }
+                    Err(FunctionCallError::Fatal(message)) => {
+                        return Err(CodexErr::Fatal(message));
+                    }
+                }
            }
            ResponseEvent::WebSearchCallBegin { call_id } => {
                let _ = sess
@@ -2127,10 +2336,15 @@ async fn try_run_turn(
                response_id: _,
                token_usage,
            } => {
-                sess.update_token_usage_info(sub_id, turn_context, token_usage.as_ref())
+                sess.update_token_usage_info(sub_id, turn_context.as_ref(), token_usage.as_ref())
                    .await;

-                let unified_diff = turn_diff_tracker.get_unified_diff();
+                let processed_items: Vec<ProcessedResponseItem> = output.try_collect().await?;
+
+                let unified_diff = {
+                    let mut tracker = turn_diff_tracker.lock().await;
+                    tracker.get_unified_diff()
+                };
                if let Ok(Some(unified_diff)) = unified_diff {
                    let msg = EventMsg::TurnDiff(TurnDiffEvent { unified_diff });
                    let event = Event {
@@ -2141,7 +2355,7 @@ async fn try_run_turn(
                }

                let result = TurnRunResult {
-                    processed_items: output,
+                    processed_items,
                    total_token_usage: token_usage.clone(),
                };

@@ -2189,88 +2403,40 @@ async fn try_run_turn(
    }
 }

-async fn handle_response_item(
-    router: &crate::tools::ToolRouter,
-    sess: &Session,
-    turn_context: &TurnContext,
-    turn_diff_tracker: &mut TurnDiffTracker,
+async fn handle_non_tool_response_item(
+    sess: Arc<Session>,
+    turn_context: Arc<TurnContext>,
    sub_id: &str,
    item: ResponseItem,
 ) -> CodexResult<Option<ResponseInputItem>> {
    debug!(?item, "Output item");

-    match ToolRouter::build_tool_call(sess, item.clone()) {
-        Ok(Some(call)) => {
-            let payload_preview = call.payload.log_payload().into_owned();
-            tracing::info!("ToolCall: {} {}", call.tool_name, payload_preview);
-            match router
-                .dispatch_tool_call(sess, turn_context, turn_diff_tracker, sub_id, call)
-                .await
-            {
-                Ok(response) => Ok(Some(response)),
-                Err(FunctionCallError::Fatal(message)) => Err(CodexErr::Fatal(message)),
-                Err(other) => unreachable!("non-fatal tool error returned: {other:?}"),
+    match &item {
+        ResponseItem::Message { .. }
+        | ResponseItem::Reasoning { .. }
+        | ResponseItem::WebSearchCall { .. } => {
+            let msgs = match &item {
+                ResponseItem::Message { .. } if turn_context.is_review_mode => {
+                    trace!("suppressing assistant Message in review mode");
+                    Vec::new()
+                }
+                _ => map_response_item_to_event_messages(&item, sess.show_raw_agent_reasoning()),
+            };
+            for msg in msgs {
+                let event = Event {
+                    id: sub_id.to_string(),
+                    msg,
+                };
+                sess.send_event(event).await;
            }
        }
-        Ok(None) => {
-            match &item {
-                ResponseItem::Message { .. }
-                | ResponseItem::Reasoning { .. }
-                | ResponseItem::WebSearchCall { .. } => {
-                    let msgs = match &item {
-                        ResponseItem::Message { .. } if turn_context.is_review_mode => {
-                            trace!("suppressing assistant Message in review mode");
-                            Vec::new()
-                        }
-                        _ => map_response_item_to_event_messages(
-                            &item,
-                            sess.show_raw_agent_reasoning(),
-                        ),
-                    };
-                    for msg in msgs {
-                        let event = Event {
-                            id: sub_id.to_string(),
-                            msg,
-                        };
-                        sess.send_event(event).await;
-                    }
-                }
-                ResponseItem::FunctionCallOutput { .. }
-                | ResponseItem::CustomToolCallOutput { .. } => {
-                    debug!("unexpected tool output from stream");
-                }
-                _ => {}
-            }
-
-            Ok(None)
+        ResponseItem::FunctionCallOutput { .. } | ResponseItem::CustomToolCallOutput { .. } => {
+            debug!("unexpected tool output from stream");
        }
-        Err(FunctionCallError::MissingLocalShellCallId) => {
-            let msg = "LocalShellCall without call_id or id";
-            turn_context
-                .client
-                .get_otel_event_manager()
-                .log_tool_failed("local_shell", msg);
-            error!(msg);
-
-            Ok(Some(ResponseInputItem::FunctionCallOutput {
-                call_id: String::new(),
-                output: FunctionCallOutputPayload {
-                    content: msg.to_string(),
-                    success: None,
-                },
-            }))
-        }
-        Err(FunctionCallError::RespondToModel(msg)) => {
-            Ok(Some(ResponseInputItem::FunctionCallOutput {
-                call_id: String::new(),
-                output: FunctionCallOutputPayload {
-                    content: msg,
-                    success: None,
-                },
-            }))
-        }
-        Err(FunctionCallError::Fatal(message)) => Err(CodexErr::Fatal(message)),
+        _ => {}
    }
+
+    Ok(None)
 }

 pub(super) fn get_last_assistant_message_from_turn(responses: &[ResponseItem]) -> Option<String> {
@@ -2506,13 +2672,19 @@ mod tests {

        let out = format_exec_output_str(&exec);

+        // Strip truncation header if present for subsequent assertions
+        let body = out
+            .strip_prefix("Total output lines: ")
+            .and_then(|rest| rest.split_once("\n\n").map(|x| x.1))
+            .unwrap_or(out.as_str());
+
        // Expect elision marker with correct counts
        let omitted = 400 - MODEL_FORMAT_MAX_LINES; // 144
        let marker = format!("\n[... omitted {omitted} of 400 lines ...]\n\n");
        assert!(out.contains(&marker), "missing marker: {out}");

        // Validate head and tail
-        let parts: Vec<&str> = out.split(&marker).collect();
+        let parts: Vec<&str> = body.split(&marker).collect();
        assert_eq!(parts.len(), 2, "expected one marker split");
        let head = parts[0];
        let tail = parts[1];
@@ -2548,14 +2720,19 @@ mod tests {
        };

        let out = format_exec_output_str(&exec);
-        assert!(out.len() <= MODEL_FORMAT_MAX_BYTES, "exceeds byte budget");
+        // Keep strict budget on the truncated body (excluding header)
+        let body = out
+            .strip_prefix("Total output lines: ")
+            .and_then(|rest| rest.split_once("\n\n").map(|x| x.1))
+            .unwrap_or(out.as_str());
+        assert!(body.len() <= MODEL_FORMAT_MAX_BYTES, "exceeds byte budget");
        assert!(out.contains("omitted"), "should contain elision marker");

        // Ensure head and tail are drawn from the original
-        assert!(full.starts_with(out.chars().take(8).collect::<String>().as_str()));
+        assert!(full.starts_with(body.chars().take(8).collect::<String>().as_str()));
        assert!(
            full.ends_with(
-                out.chars()
+                body.chars()
                    .rev()
                    .take(8)
                    .collect::<String>()
@@ -2681,12 +2858,7 @@ mod tests {
        );
        let tools_config = ToolsConfig::new(&ToolsConfigParams {
            model_family: &config.model_family,
-            include_plan_tool: config.include_plan_tool,
-            include_apply_patch_tool: config.include_apply_patch_tool,
-            include_web_search_request: config.tools_web_search_request,
-            use_streamable_shell_tool: config.use_experimental_streamable_shell_tool,
-            include_view_image_tool: config.include_view_image_tool,
-            experimental_unified_exec_tool: config.use_experimental_unified_exec_tool,
+            features: &config.features,
        });
        let turn_context = TurnContext {
            client,
@@ -2712,7 +2884,6 @@ mod tests {
                turn_context.sandbox_policy.clone(),
                turn_context.cwd.clone(),
                None,
-                config.admin.audit.clone(),
            )),
        };
        let session = Session {
@@ -2755,12 +2926,7 @@ mod tests {
        );
        let tools_config = ToolsConfig::new(&ToolsConfigParams {
            model_family: &config.model_family,
-            include_plan_tool: config.include_plan_tool,
-            include_apply_patch_tool: config.include_apply_patch_tool,
-            include_web_search_request: config.tools_web_search_request,
-            use_streamable_shell_tool: config.use_experimental_streamable_shell_tool,
-            include_view_image_tool: config.include_view_image_tool,
-            experimental_unified_exec_tool: config.use_experimental_unified_exec_tool,
+            features: &config.features,
        });
        let turn_context = Arc::new(TurnContext {
            client,
@@ -2786,7 +2952,6 @@ mod tests {
                config.sandbox_policy.clone(),
                config.cwd.clone(),
                None,
-                config.admin.audit.clone(),
            )),
        };
        let session = Arc::new(Session {
@@ -2904,13 +3069,10 @@ mod tests {
    #[tokio::test]
    async fn fatal_tool_error_stops_turn_and_reports_error() {
        let (session, turn_context, _rx) = make_session_and_context_with_rx();
-        let session_ref = session.as_ref();
-        let turn_context_ref = turn_context.as_ref();
        let router = ToolRouter::from_config(
-            &turn_context_ref.tools_config,
-            Some(session_ref.services.mcp_connection_manager.list_all_tools()),
+            &turn_context.tools_config,
+            Some(session.services.mcp_connection_manager.list_all_tools()),
        );
-        let mut tracker = TurnDiffTracker::new();
        let item = ResponseItem::CustomToolCall {
            id: None,
            status: None,
@@ -2919,22 +3081,26 @@ mod tests {
            input: "{}".to_string(),
        };

-        let err = handle_response_item(
-            &router,
-            session_ref,
-            turn_context_ref,
-            &mut tracker,
-            "sub-id",
-            item,
-        )
-        .await
-        .expect_err("expected fatal error");
+        let call = ToolRouter::build_tool_call(session.as_ref(), item.clone())
+            .expect("build tool call")
+            .expect("tool call present");
+        let tracker = Arc::new(tokio::sync::Mutex::new(TurnDiffTracker::new()));
+        let err = router
+            .dispatch_tool_call(
+                Arc::clone(&session),
+                Arc::clone(&turn_context),
+                tracker,
+                "sub-id".to_string(),
+                call,
+            )
+            .await
+            .expect_err("expected fatal error");

        match err {
-            CodexErr::Fatal(message) => {
+            FunctionCallError::Fatal(message) => {
                assert_eq!(message, "tool shell invoked with incompatible payload");
            }
-            other => panic!("expected CodexErr::Fatal, got {other:?}"),
+            other => panic!("expected FunctionCallError::Fatal, got {other:?}"),
        }
    }

@@ -3048,9 +3214,11 @@ mod tests {
        use crate::turn_diff_tracker::TurnDiffTracker;
        use std::collections::HashMap;

-        let (session, mut turn_context) = make_session_and_context();
+        let (session, mut turn_context_raw) = make_session_and_context();
        // Ensure policy is NOT OnRequest so the early rejection path triggers
-        turn_context.approval_policy = AskForApproval::OnFailure;
+        turn_context_raw.approval_policy = AskForApproval::OnFailure;
+        let session = Arc::new(session);
+        let mut turn_context = Arc::new(turn_context_raw);

        let params = ExecParams {
            command: if cfg!(windows) {
@@ -3078,7 +3246,7 @@ mod tests {
            ..params.clone()
        };

-        let mut turn_diff_tracker = TurnDiffTracker::new();
+        let turn_diff_tracker = Arc::new(tokio::sync::Mutex::new(TurnDiffTracker::new()));

        let tool_name = "shell";
        let sub_id = "test-sub".to_string();
@@ -3087,9 +3255,9 @@ mod tests {
        let resp = handle_container_exec_with_params(
            tool_name,
            params,
-            &session,
-            &turn_context,
-            &mut turn_diff_tracker,
+            Arc::clone(&session),
+            Arc::clone(&turn_context),
+            Arc::clone(&turn_diff_tracker),
            sub_id,
            call_id,
        )
@@ -3108,14 +3276,16 @@ mod tests {

        // Now retry the same command WITHOUT escalated permissions; should succeed.
        // Force DangerFullAccess to avoid platform sandbox dependencies in tests.
-        turn_context.sandbox_policy = SandboxPolicy::DangerFullAccess;
+        Arc::get_mut(&mut turn_context)
+            .expect("unique turn context Arc")
+            .sandbox_policy = SandboxPolicy::DangerFullAccess;

        let resp2 = handle_container_exec_with_params(
            tool_name,
            params2,
-            &session,
-            &turn_context,
-            &mut turn_diff_tracker,
+            Arc::clone(&session),
+            Arc::clone(&turn_context),
+            Arc::clone(&turn_diff_tracker),
            "test-sub".to_string(),
            "test-call-2".to_string(),
        )
--- a/codex-rs/core/src/codex/compact.rs
+++ b/codex-rs/core/src/codex/compact.rs
@@ -16,6 +16,7 @@ use crate::protocol::InputItem;
 use crate::protocol::InputMessageKind;
 use crate::protocol::TaskStartedEvent;
 use crate::protocol::TurnContextItem;
+use crate::state::TaskKind;
 use crate::truncate::truncate_middle;
 use crate::util::backoff;
 use askama::Template;
@@ -70,14 +71,10 @@ async fn run_compact_task_inner(
    input: Vec<InputItem>,
 ) {
    let initial_input_for_turn: ResponseInputItem = ResponseInputItem::from(input);
-    let turn_input = sess
+    let mut turn_input = sess
        .turn_input_with_history(vec![initial_input_for_turn.clone().into()])
        .await;
-
-    let prompt = Prompt {
-        input: turn_input,
-        ..Default::default()
-    };
+    let mut truncated_count = 0usize;

    let max_retries = turn_context.client.get_provider().stream_max_retries();
    let mut retries = 0;
@@ -93,25 +90,54 @@ async fn run_compact_task_inner(
    sess.persist_rollout_items(&[rollout_item]).await;

    loop {
+        let prompt = Prompt {
+            input: turn_input.clone(),
+            ..Default::default()
+        };
        let attempt_result =
            drain_to_completed(&sess, turn_context.as_ref(), &sub_id, &prompt).await;

        match attempt_result {
            Ok(()) => {
+                if truncated_count > 0 {
+                    sess.notify_background_event(
+                        &sub_id,
+                        format!(
+                            "Trimmed {truncated_count} older conversation item(s) before compacting so the prompt fits the model context window."
+                        ),
+                    )
+                    .await;
+                }
                break;
            }
            Err(CodexErr::Interrupted) => {
                return;
            }
+            Err(e @ CodexErr::ContextWindowExceeded) => {
+                if turn_input.len() > 1 {
+                    turn_input.remove(0);
+                    truncated_count += 1;
+                    retries = 0;
+                    continue;
+                }
+                sess.set_total_tokens_full(&sub_id, turn_context.as_ref())
+                    .await;
+                let event = Event {
+                    id: sub_id.clone(),
+                    msg: EventMsg::Error(ErrorEvent {
+                        message: e.to_string(),
+                    }),
+                };
+                sess.send_event(event).await;
+                return;
+            }
            Err(e) => {
                if retries < max_retries {
                    retries += 1;
                    let delay = backoff(retries);
                    sess.notify_stream_error(
                        &sub_id,
-                        format!(
-                            "stream error: {e}; retrying {retries}/{max_retries} in {delay:?}…"
-                        ),
+                        format!("Re-connecting... {retries}/{max_retries}"),
                    )
                    .await;
                    tokio::time::sleep(delay).await;
@@ -233,7 +259,11 @@ async fn drain_to_completed(
    sub_id: &str,
    prompt: &Prompt,
 ) -> CodexResult<()> {
-    let mut stream = turn_context.client.clone().stream(prompt).await?;
+    let mut stream = turn_context
+        .client
+        .clone()
+        .stream_with_task_kind(prompt, TaskKind::Compact)
+        .await?;
    loop {
        let maybe_event = stream.next().await;
        let Some(event) = maybe_event else {
--- a/codex-rs/core/src/config.rs
+++ b/codex-rs/core/src/config.rs
@@ -1,17 +1,8 @@
-use crate::admin_controls::AdminControls;
-use crate::admin_controls::DangerAuditAction;
-use crate::admin_controls::DangerDecision;
-use crate::admin_controls::DangerPending;
-use crate::admin_controls::DangerRequestSource;
-use crate::admin_controls::PendingAdminAction;
-use crate::admin_controls::build_danger_audit_payload;
-use crate::admin_controls::log_admin_event;
 use crate::config_loader::LoadedConfigLayers;
 pub use crate::config_loader::load_config_as_toml;
 use crate::config_loader::load_config_layers_with_overrides;
 use crate::config_loader::merge_toml_values;
 use crate::config_profile::ConfigProfile;
-use crate::config_types::AdminConfigToml;
 use crate::config_types::DEFAULT_OTEL_ENVIRONMENT;
 use crate::config_types::History;
 use crate::config_types::McpServerConfig;
@@ -26,6 +17,10 @@ use crate::config_types::ShellEnvironmentPolicy;
 use crate::config_types::ShellEnvironmentPolicyToml;
 use crate::config_types::Tui;
 use crate::config_types::UriBasedFileOpener;
+use crate::features::Feature;
+use crate::features::FeatureOverrides;
+use crate::features::Features;
+use crate::features::FeaturesToml;
 use crate::git_info::resolve_root_git_project_for_trust;
 use crate::model_family::ModelFamily;
 use crate::model_family::derive_default_model_family;
@@ -42,12 +37,15 @@ use codex_protocol::config_types::ReasoningEffort;
 use codex_protocol::config_types::ReasoningSummary;
 use codex_protocol::config_types::SandboxMode;
 use codex_protocol::config_types::Verbosity;
+use codex_rmcp_client::OAuthCredentialsStoreMode;
 use dirs::home_dir;
 use serde::Deserialize;
 use std::collections::BTreeMap;
 use std::collections::HashMap;
+use std::io::ErrorKind;
 use std::path::Path;
 use std::path::PathBuf;
+
 use tempfile::NamedTempFile;
 use toml::Value as TomlValue;
 use toml_edit::Array as TomlArray;
@@ -55,7 +53,10 @@ use toml_edit::DocumentMut;
 use toml_edit::Item as TomlItem;
 use toml_edit::Table as TomlTable;

-const OPENAI_DEFAULT_MODEL: &str = "gpt-5-codex";
+#[cfg(target_os = "windows")]
+pub const OPENAI_DEFAULT_MODEL: &str = "gpt-5";
+#[cfg(not(target_os = "windows"))]
+pub const OPENAI_DEFAULT_MODEL: &str = "gpt-5-codex";
 const OPENAI_DEFAULT_REVIEW_MODEL: &str = "gpt-5-codex";
 pub const GPT_5_CODEX_MEDIUM_MODEL: &str = "gpt-5-codex";

@@ -148,6 +149,15 @@ pub struct Config {
    /// Definition for MCP servers that Codex can reach out to for tool calls.
    pub mcp_servers: HashMap<String, McpServerConfig>,

+    /// Preferred store for MCP OAuth credentials.
+    /// keyring: Use an OS-specific keyring service.
+    ///          Credentials stored in the keyring will only be readable by Codex unless the user explicitly grants access via OS-level keyring access.
+    ///          https://github.com/openai/codex/blob/main/codex-rs/rmcp-client/src/oauth.rs#L2
+    /// file: CODEX_HOME/.credentials.json
+    ///       This file will be readable to Codex and other applications running as the same user.
+    /// auto (default): keyring if available, otherwise file.
+    pub mcp_oauth_credentials_store_mode: OAuthCredentialsStoreMode,
+
    /// Combined provider map (defaults merged with user-defined overrides).
    pub model_providers: HashMap<String, ModelProviderInfo>,

@@ -212,9 +222,15 @@ pub struct Config {
    /// Include the `view_image` tool that lets the agent attach a local image path to context.
    pub include_view_image_tool: bool,

+    /// Centralized feature flags; source of truth for feature gating.
+    pub features: Features,
+
    /// The active profile name used to derive this `Config` (if any).
    pub active_profile: Option<String>,

+    /// Tracks whether the Windows onboarding screen has been acknowledged.
+    pub windows_wsl_setup_acknowledged: bool,
+
    /// When true, disables burst-paste detection for typed input entirely.
    /// All characters are inserted as they are received, and no buffering
    /// or placeholder replacement will occur for fast keypress bursts.
@@ -222,9 +238,6 @@ pub struct Config {

    /// OTEL configuration (exporter type, endpoint, headers, etc.).
    pub otel: crate::config_types::OtelConfig,
-
-    /// Administrator-controlled options and audit configuration.
-    pub admin: AdminControls,
 }

 impl Config {
@@ -307,12 +320,35 @@ pub async fn load_global_mcp_servers(
        return Ok(BTreeMap::new());
    };

+    ensure_no_inline_bearer_tokens(servers_value)?;
+
    servers_value
        .clone()
        .try_into()
        .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))
 }

+/// We briefly allowed plain text bearer_token fields in MCP server configs.
+/// We want to warn people who recently added these fields but can remove this after a few months.
+fn ensure_no_inline_bearer_tokens(value: &TomlValue) -> std::io::Result<()> {
+    let Some(servers_table) = value.as_table() else {
+        return Ok(());
+    };
+
+    for (server_name, server_value) in servers_table {
+        if let Some(server_table) = server_value.as_table()
+            && server_table.contains_key("bearer_token")
+        {
+            let message = format!(
+                "mcp_servers.{server_name} uses unsupported `bearer_token`; set `bearer_token_env_var`."
+            );
+            return Err(std::io::Error::new(ErrorKind::InvalidData, message));
+        }
+    }
+
+    Ok(())
+}
+
 pub fn write_global_mcp_servers(
    codex_home: &Path,
    servers: &BTreeMap<String, McpServerConfig>,
@@ -361,14 +397,21 @@ pub fn write_global_mcp_servers(
                        entry["env"] = TomlItem::Table(env_table);
                    }
                }
-                McpServerTransportConfig::StreamableHttp { url, bearer_token } => {
+                McpServerTransportConfig::StreamableHttp {
+                    url,
+                    bearer_token_env_var,
+                } => {
                    entry["url"] = toml_edit::value(url.clone());
-                    if let Some(token) = bearer_token {
-                        entry["bearer_token"] = toml_edit::value(token.clone());
+                    if let Some(env_var) = bearer_token_env_var {
+                        entry["bearer_token_env_var"] = toml_edit::value(env_var.clone());
                    }
                }
            }

+            if !config.enabled {
+                entry["enabled"] = toml_edit::value(false);
+            }
+
            if let Some(timeout) = config.startup_timeout_sec {
                entry["startup_timeout_sec"] = toml_edit::value(timeout.as_secs_f64());
            }
@@ -480,6 +523,29 @@ pub fn set_project_trusted(codex_home: &Path, project_path: &Path) -> anyhow::Re
    Ok(())
 }

+/// Persist the acknowledgement flag for the Windows onboarding screen.
+pub fn set_windows_wsl_setup_acknowledged(
+    codex_home: &Path,
+    acknowledged: bool,
+) -> anyhow::Result<()> {
+    let config_path = codex_home.join(CONFIG_TOML_FILE);
+    let mut doc = match std::fs::read_to_string(config_path.clone()) {
+        Ok(s) => s.parse::<DocumentMut>()?,
+        Err(e) if e.kind() == std::io::ErrorKind::NotFound => DocumentMut::new(),
+        Err(e) => return Err(e.into()),
+    };
+
+    doc["windows_wsl_setup_acknowledged"] = toml_edit::value(acknowledged);
+
+    std::fs::create_dir_all(codex_home)?;
+
+    let tmp_file = NamedTempFile::new_in(codex_home)?;
+    std::fs::write(tmp_file.path(), doc.to_string())?;
+    tmp_file.persist(config_path)?;
+
+    Ok(())
+}
+
 fn ensure_profile_table<'a>(
    doc: &'a mut DocumentMut,
    profile_name: &str,
@@ -677,6 +743,14 @@ pub struct ConfigToml {
    #[serde(default)]
    pub mcp_servers: HashMap<String, McpServerConfig>,

+    /// Preferred backend for storing MCP OAuth credentials.
+    /// keyring: Use an OS-specific keyring service.
+    ///          https://github.com/openai/codex/blob/main/codex-rs/rmcp-client/src/oauth.rs#L2
+    /// file: Use a file in the Codex home directory.
+    /// auto (default): Use the OS-specific keyring service if available, otherwise use a file.
+    #[serde(default)]
+    pub mcp_oauth_credentials_store: Option<OAuthCredentialsStoreMode>,
+
    /// User-defined provider entries that extend/override the built-in list.
    #[serde(default)]
    pub model_providers: HashMap<String, ModelProviderInfo>,
@@ -727,18 +801,15 @@ pub struct ConfigToml {
    /// Base URL for requests to ChatGPT (as opposed to the OpenAI API).
    pub chatgpt_base_url: Option<String>,

-    /// Experimental path to a file whose contents replace the built-in BASE_INSTRUCTIONS.
-    pub experimental_instructions_file: Option<PathBuf>,
-
-    pub experimental_use_exec_command_tool: Option<bool>,
-    pub experimental_use_unified_exec_tool: Option<bool>,
-    pub experimental_use_rmcp_client: Option<bool>,
-
    pub projects: Option<HashMap<String, ProjectConfig>>,

    /// Nested tools section for feature toggles
    pub tools: Option<ToolsToml>,

+    /// Centralized feature flags (new). Prefer this over individual toggles.
+    #[serde(default)]
+    pub features: Option<FeaturesToml>,
+
    /// When true, disables burst-paste detection for typed input entirely.
    /// All characters are inserted as they are received, and no buffering
    /// or placeholder replacement will occur for fast keypress bursts.
@@ -747,9 +818,15 @@ pub struct ConfigToml {
    /// OTEL configuration.
    pub otel: Option<crate::config_types::OtelConfigToml>,

-    /// Administrator-level controls applied to all users on this host.
-    #[serde(default)]
-    pub admin: Option<AdminConfigToml>,
+    /// Tracks whether the Windows onboarding screen has been acknowledged.
+    pub windows_wsl_setup_acknowledged: Option<bool>,
+
+    /// Legacy, now use features
+    pub experimental_instructions_file: Option<PathBuf>,
+    pub experimental_use_exec_command_tool: Option<bool>,
+    pub experimental_use_unified_exec_tool: Option<bool>,
+    pub experimental_use_rmcp_client: Option<bool>,
+    pub experimental_use_freeform_apply_patch: Option<bool>,
 }

 impl From<ConfigToml> for UserSavedConfig {
@@ -913,9 +990,9 @@ impl Config {
            config_profile: config_profile_key,
            codex_linux_sandbox_exe,
            base_instructions,
-            include_plan_tool,
-            include_apply_patch_tool,
-            include_view_image_tool,
+            include_plan_tool: include_plan_tool_override,
+            include_apply_patch_tool: include_apply_patch_tool_override,
+            include_view_image_tool: include_view_image_tool_override,
            show_raw_agent_reasoning,
            tools_web_search_request: override_tools_web_search_request,
        } = overrides;
@@ -938,68 +1015,16 @@ impl Config {
            None => ConfigProfile::default(),
        };

-        let resolved_approval_policy = approval_policy
-            .or(config_profile.approval_policy)
-            .or(cfg.approval_policy)
-            .unwrap_or_else(AskForApproval::default);
+        let feature_overrides = FeatureOverrides {
+            include_plan_tool: include_plan_tool_override,
+            include_apply_patch_tool: include_apply_patch_tool_override,
+            include_view_image_tool: include_view_image_tool_override,
+            web_search_request: override_tools_web_search_request,
+        };

-        let mut admin = AdminControls::from_toml(cfg.admin.clone())?;
+        let features = Features::from_config(&cfg, &config_profile, feature_overrides);

-        let mut sandbox_policy = cfg.derive_sandbox_policy(sandbox_mode);
-
-        if matches!(sandbox_policy, SandboxPolicy::DangerFullAccess) {
-            match admin.decision_for_danger() {
-                DangerDecision::Allowed => {
-                    if let Some(audit) = admin.audit.as_ref() {
-                        let pending = DangerPending {
-                            source: DangerRequestSource::Startup,
-                            requested_sandbox: SandboxPolicy::DangerFullAccess,
-                            requested_approval: resolved_approval_policy,
-                        };
-                        log_admin_event(
-                            audit,
-                            build_danger_audit_payload(&pending, DangerAuditAction::Approved, None),
-                        );
-                    }
-                }
-                DangerDecision::RequiresJustification => {
-                    let pending = DangerPending {
-                        source: DangerRequestSource::Startup,
-                        requested_sandbox: SandboxPolicy::DangerFullAccess,
-                        requested_approval: resolved_approval_policy,
-                    };
-                    if let Some(audit) = admin.audit.as_ref() {
-                        log_admin_event(
-                            audit,
-                            build_danger_audit_payload(
-                                &pending,
-                                DangerAuditAction::Requested,
-                                None,
-                            ),
-                        );
-                    }
-                    admin.pending.push(PendingAdminAction::Danger(pending));
-                    sandbox_policy = SandboxPolicy::new_workspace_write_policy();
-                }
-                DangerDecision::Denied => {
-                    if let Some(audit) = admin.audit.as_ref() {
-                        let pending = DangerPending {
-                            source: DangerRequestSource::Startup,
-                            requested_sandbox: SandboxPolicy::DangerFullAccess,
-                            requested_approval: resolved_approval_policy,
-                        };
-                        log_admin_event(
-                            audit,
-                            build_danger_audit_payload(&pending, DangerAuditAction::Denied, None),
-                        );
-                    }
-                    return Err(std::io::Error::new(
-                        std::io::ErrorKind::PermissionDenied,
-                        "danger-full-access is disabled by administrator policy",
-                    ));
-                }
-            }
-        }
+        let sandbox_policy = cfg.derive_sandbox_policy(sandbox_mode);

        let mut model_providers = built_in_model_providers();
        // Merge user-defined providers into the built-in list.
@@ -1044,13 +1069,13 @@ impl Config {

        let history = cfg.history.unwrap_or_default();

-        let tools_web_search_request = override_tools_web_search_request
-            .or(cfg.tools.as_ref().and_then(|t| t.web_search))
-            .unwrap_or(false);
-
-        let include_view_image_tool = include_view_image_tool
-            .or(cfg.tools.as_ref().and_then(|t| t.view_image))
-            .unwrap_or(true);
+        let include_plan_tool_flag = features.enabled(Feature::PlanTool);
+        let include_apply_patch_tool_flag = features.enabled(Feature::ApplyPatchFreeform);
+        let include_view_image_tool_flag = features.enabled(Feature::ViewImageTool);
+        let tools_web_search_request = features.enabled(Feature::WebSearchRequest);
+        let use_experimental_streamable_shell_tool = features.enabled(Feature::StreamableShell);
+        let use_experimental_unified_exec_tool = features.enabled(Feature::UnifiedExec);
+        let use_experimental_use_rmcp_client = features.enabled(Feature::RmcpClient);

        let model = model
            .or(config_profile.model)
@@ -1108,13 +1133,19 @@ impl Config {
            model_provider_id,
            model_provider,
            cwd: resolved_cwd,
-            approval_policy: resolved_approval_policy,
+            approval_policy: approval_policy
+                .or(config_profile.approval_policy)
+                .or(cfg.approval_policy)
+                .unwrap_or_else(AskForApproval::default),
            sandbox_policy,
            shell_environment_policy,
            notify: cfg.notify,
            user_instructions,
            base_instructions,
            mcp_servers: cfg.mcp_servers,
+            // The config.toml omits "_mode" because it's a config file. However, "_mode"
+            // is important in code to differentiate the mode from the store implementation.
+            mcp_oauth_credentials_store_mode: cfg.mcp_oauth_credentials_store.unwrap_or_default(),
            model_providers,
            project_doc_max_bytes: cfg.project_doc_max_bytes.unwrap_or(PROJECT_DOC_MAX_BYTES),
            project_doc_fallback_filenames: cfg
@@ -1152,18 +1183,16 @@ impl Config {
                .chatgpt_base_url
                .or(cfg.chatgpt_base_url)
                .unwrap_or("https://chatgpt.com/backend-api/".to_string()),
-            include_plan_tool: include_plan_tool.unwrap_or(false),
-            include_apply_patch_tool: include_apply_patch_tool.unwrap_or(false),
+            include_plan_tool: include_plan_tool_flag,
+            include_apply_patch_tool: include_apply_patch_tool_flag,
            tools_web_search_request,
-            use_experimental_streamable_shell_tool: cfg
-                .experimental_use_exec_command_tool
-                .unwrap_or(false),
-            use_experimental_unified_exec_tool: cfg
-                .experimental_use_unified_exec_tool
-                .unwrap_or(false),
-            use_experimental_use_rmcp_client: cfg.experimental_use_rmcp_client.unwrap_or(false),
-            include_view_image_tool,
+            use_experimental_streamable_shell_tool,
+            use_experimental_unified_exec_tool,
+            use_experimental_use_rmcp_client,
+            include_view_image_tool: include_view_image_tool_flag,
+            features,
            active_profile: active_profile_name,
+            windows_wsl_setup_acknowledged: cfg.windows_wsl_setup_acknowledged.unwrap_or(false),
            disable_paste_burst: cfg.disable_paste_burst.unwrap_or(false),
            tui_notifications: cfg
                .tui
@@ -1183,7 +1212,6 @@ impl Config {
                    exporter,
                }
            },
-            admin,
        };
        Ok(config)
    }
@@ -1293,9 +1321,9 @@ pub fn log_dir(cfg: &Config) -> std::io::Result<PathBuf> {

 #[cfg(test)]
 mod tests {
-    use crate::admin_controls::AdminControls;
    use crate::config_types::HistoryPersistence;
    use crate::config_types::Notifications;
+    use crate::features::Feature;

    use super::*;
    use pretty_assertions::assert_eq;
@@ -1404,6 +1432,172 @@ exclude_slash_tmp = true
        );
    }

+    #[test]
+    fn config_defaults_to_auto_oauth_store_mode() -> std::io::Result<()> {
+        let codex_home = TempDir::new()?;
+        let cfg = ConfigToml::default();
+
+        let config = Config::load_from_base_config_with_overrides(
+            cfg,
+            ConfigOverrides::default(),
+            codex_home.path().to_path_buf(),
+        )?;
+
+        assert_eq!(
+            config.mcp_oauth_credentials_store_mode,
+            OAuthCredentialsStoreMode::Auto,
+        );
+
+        Ok(())
+    }
+
+    #[test]
+    fn profile_legacy_toggles_override_base() -> std::io::Result<()> {
+        let codex_home = TempDir::new()?;
+        let mut profiles = HashMap::new();
+        profiles.insert(
+            "work".to_string(),
+            ConfigProfile {
+                include_plan_tool: Some(true),
+                include_view_image_tool: Some(false),
+                ..Default::default()
+            },
+        );
+        let cfg = ConfigToml {
+            profiles,
+            profile: Some("work".to_string()),
+            ..Default::default()
+        };
+
+        let config = Config::load_from_base_config_with_overrides(
+            cfg,
+            ConfigOverrides::default(),
+            codex_home.path().to_path_buf(),
+        )?;
+
+        assert!(config.features.enabled(Feature::PlanTool));
+        assert!(!config.features.enabled(Feature::ViewImageTool));
+        assert!(config.include_plan_tool);
+        assert!(!config.include_view_image_tool);
+
+        Ok(())
+    }
+
+    #[test]
+    fn feature_table_overrides_legacy_flags() -> std::io::Result<()> {
+        let codex_home = TempDir::new()?;
+        let mut entries = BTreeMap::new();
+        entries.insert("plan_tool".to_string(), false);
+        entries.insert("apply_patch_freeform".to_string(), false);
+        let cfg = ConfigToml {
+            features: Some(crate::features::FeaturesToml { entries }),
+            ..Default::default()
+        };
+
+        let config = Config::load_from_base_config_with_overrides(
+            cfg,
+            ConfigOverrides::default(),
+            codex_home.path().to_path_buf(),
+        )?;
+
+        assert!(!config.features.enabled(Feature::PlanTool));
+        assert!(!config.features.enabled(Feature::ApplyPatchFreeform));
+        assert!(!config.include_plan_tool);
+        assert!(!config.include_apply_patch_tool);
+
+        Ok(())
+    }
+
+    #[test]
+    fn legacy_toggles_map_to_features() -> std::io::Result<()> {
+        let codex_home = TempDir::new()?;
+        let cfg = ConfigToml {
+            experimental_use_exec_command_tool: Some(true),
+            experimental_use_unified_exec_tool: Some(true),
+            experimental_use_rmcp_client: Some(true),
+            experimental_use_freeform_apply_patch: Some(true),
+            ..Default::default()
+        };
+
+        let config = Config::load_from_base_config_with_overrides(
+            cfg,
+            ConfigOverrides::default(),
+            codex_home.path().to_path_buf(),
+        )?;
+
+        assert!(config.features.enabled(Feature::ApplyPatchFreeform));
+        assert!(config.features.enabled(Feature::StreamableShell));
+        assert!(config.features.enabled(Feature::UnifiedExec));
+        assert!(config.features.enabled(Feature::RmcpClient));
+
+        assert!(config.include_apply_patch_tool);
+        assert!(config.use_experimental_streamable_shell_tool);
+        assert!(config.use_experimental_unified_exec_tool);
+        assert!(config.use_experimental_use_rmcp_client);
+
+        Ok(())
+    }
+
+    #[test]
+    fn config_honors_explicit_file_oauth_store_mode() -> std::io::Result<()> {
+        let codex_home = TempDir::new()?;
+        let cfg = ConfigToml {
+            mcp_oauth_credentials_store: Some(OAuthCredentialsStoreMode::File),
+            ..Default::default()
+        };
+
+        let config = Config::load_from_base_config_with_overrides(
+            cfg,
+            ConfigOverrides::default(),
+            codex_home.path().to_path_buf(),
+        )?;
+
+        assert_eq!(
+            config.mcp_oauth_credentials_store_mode,
+            OAuthCredentialsStoreMode::File,
+        );
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn managed_config_overrides_oauth_store_mode() -> anyhow::Result<()> {
+        let codex_home = TempDir::new()?;
+        let managed_path = codex_home.path().join("managed_config.toml");
+        let config_path = codex_home.path().join(CONFIG_TOML_FILE);
+
+        std::fs::write(&config_path, "mcp_oauth_credentials_store = \"file\"\n")?;
+        std::fs::write(&managed_path, "mcp_oauth_credentials_store = \"keyring\"\n")?;
+
+        let overrides = crate::config_loader::LoaderOverrides {
+            managed_config_path: Some(managed_path.clone()),
+            #[cfg(target_os = "macos")]
+            managed_preferences_base64: None,
+        };
+
+        let root_value = load_resolved_config(codex_home.path(), Vec::new(), overrides).await?;
+        let cfg: ConfigToml = root_value.try_into().map_err(|e| {
+            tracing::error!("Failed to deserialize overridden config: {e}");
+            std::io::Error::new(std::io::ErrorKind::InvalidData, e)
+        })?;
+        assert_eq!(
+            cfg.mcp_oauth_credentials_store,
+            Some(OAuthCredentialsStoreMode::Keyring),
+        );
+
+        let final_config = Config::load_from_base_config_with_overrides(
+            cfg,
+            ConfigOverrides::default(),
+            codex_home.path().to_path_buf(),
+        )?;
+        assert_eq!(
+            final_config.mcp_oauth_credentials_store_mode,
+            OAuthCredentialsStoreMode::Keyring,
+        );
+
+        Ok(())
+    }
+
    #[tokio::test]
    async fn load_global_mcp_servers_returns_empty_if_missing() -> anyhow::Result<()> {
        let codex_home = TempDir::new()?;
@@ -1427,6 +1621,7 @@ exclude_slash_tmp = true
                    args: vec!["hello".to_string()],
                    env: None,
                },
+                enabled: true,
                startup_timeout_sec: Some(Duration::from_secs(3)),
                tool_timeout_sec: Some(Duration::from_secs(5)),
            },
@@ -1447,6 +1642,7 @@ exclude_slash_tmp = true
        }
        assert_eq!(docs.startup_timeout_sec, Some(Duration::from_secs(3)));
        assert_eq!(docs.tool_timeout_sec, Some(Duration::from_secs(5)));
+        assert!(docs.enabled);

        let empty = BTreeMap::new();
        write_global_mcp_servers(codex_home.path(), &empty)?;
@@ -1511,6 +1707,31 @@ startup_timeout_ms = 2500
        Ok(())
    }

+    #[tokio::test]
+    async fn load_global_mcp_servers_rejects_inline_bearer_token() -> anyhow::Result<()> {
+        let codex_home = TempDir::new()?;
+        let config_path = codex_home.path().join(CONFIG_TOML_FILE);
+
+        std::fs::write(
+            &config_path,
+            r#"
+[mcp_servers.docs]
+url = "https://example.com/mcp"
+bearer_token = "secret"
+"#,
+        )?;
+
+        let err = load_global_mcp_servers(codex_home.path())
+            .await
+            .expect_err("bearer_token entries should be rejected");
+
+        assert_eq!(err.kind(), std::io::ErrorKind::InvalidData);
+        assert!(err.to_string().contains("bearer_token"));
+        assert!(err.to_string().contains("bearer_token_env_var"));
+
+        Ok(())
+    }
+
    #[tokio::test]
    async fn write_global_mcp_servers_serializes_env_sorted() -> anyhow::Result<()> {
        let codex_home = TempDir::new()?;
@@ -1526,6 +1747,7 @@ startup_timeout_ms = 2500
                        ("ALPHA_VAR".to_string(), "1".to_string()),
                    ])),
                },
+                enabled: true,
                startup_timeout_sec: None,
                tool_timeout_sec: None,
            },
@@ -1574,8 +1796,9 @@ ZIG_VAR = "3"
            McpServerConfig {
                transport: McpServerTransportConfig::StreamableHttp {
                    url: "https://example.com/mcp".to_string(),
-                    bearer_token: Some("secret-token".to_string()),
+                    bearer_token_env_var: Some("MCP_TOKEN".to_string()),
                },
+                enabled: true,
                startup_timeout_sec: Some(Duration::from_secs(2)),
                tool_timeout_sec: None,
            },
@@ -1589,7 +1812,7 @@ ZIG_VAR = "3"
            serialized,
            r#"[mcp_servers.docs]
 url = "https://example.com/mcp"
-bearer_token = "secret-token"
+bearer_token_env_var = "MCP_TOKEN"
 startup_timeout_sec = 2.0
 "#
        );
@@ -1597,9 +1820,12 @@ startup_timeout_sec = 2.0
        let loaded = load_global_mcp_servers(codex_home.path()).await?;
        let docs = loaded.get("docs").expect("docs entry");
        match &docs.transport {
-            McpServerTransportConfig::StreamableHttp { url, bearer_token } => {
+            McpServerTransportConfig::StreamableHttp {
+                url,
+                bearer_token_env_var,
+            } => {
                assert_eq!(url, "https://example.com/mcp");
-                assert_eq!(bearer_token.as_deref(), Some("secret-token"));
+                assert_eq!(bearer_token_env_var.as_deref(), Some("MCP_TOKEN"));
            }
            other => panic!("unexpected transport {other:?}"),
        }
@@ -1610,8 +1836,9 @@ startup_timeout_sec = 2.0
            McpServerConfig {
                transport: McpServerTransportConfig::StreamableHttp {
                    url: "https://example.com/mcp".to_string(),
-                    bearer_token: None,
+                    bearer_token_env_var: None,
                },
+                enabled: true,
                startup_timeout_sec: None,
                tool_timeout_sec: None,
            },
@@ -1629,9 +1856,12 @@ url = "https://example.com/mcp"
        let loaded = load_global_mcp_servers(codex_home.path()).await?;
        let docs = loaded.get("docs").expect("docs entry");
        match &docs.transport {
-            McpServerTransportConfig::StreamableHttp { url, bearer_token } => {
+            McpServerTransportConfig::StreamableHttp {
+                url,
+                bearer_token_env_var,
+            } => {
                assert_eq!(url, "https://example.com/mcp");
-                assert!(bearer_token.is_none());
+                assert!(bearer_token_env_var.is_none());
            }
            other => panic!("unexpected transport {other:?}"),
        }
@@ -1639,6 +1869,40 @@ url = "https://example.com/mcp"
        Ok(())
    }

+    #[tokio::test]
+    async fn write_global_mcp_servers_serializes_disabled_flag() -> anyhow::Result<()> {
+        let codex_home = TempDir::new()?;
+
+        let servers = BTreeMap::from([(
+            "docs".to_string(),
+            McpServerConfig {
+                transport: McpServerTransportConfig::Stdio {
+                    command: "docs-server".to_string(),
+                    args: Vec::new(),
+                    env: None,
+                },
+                enabled: false,
+                startup_timeout_sec: None,
+                tool_timeout_sec: None,
+            },
+        )]);
+
+        write_global_mcp_servers(codex_home.path(), &servers)?;
+
+        let config_path = codex_home.path().join(CONFIG_TOML_FILE);
+        let serialized = std::fs::read_to_string(&config_path)?;
+        assert!(
+            serialized.contains("enabled = false"),
+            "serialized config missing disabled flag:\n{serialized}"
+        );
+
+        let loaded = load_global_mcp_servers(codex_home.path()).await?;
+        let docs = loaded.get("docs").expect("docs entry");
+        assert!(!docs.enabled);
+
+        Ok(())
+    }
+
    #[tokio::test]
    async fn persist_model_selection_updates_defaults() -> anyhow::Result<()> {
        let codex_home = TempDir::new()?;
@@ -1936,6 +2200,7 @@ model_verbosity = "high"
                notify: None,
                cwd: fixture.cwd(),
                mcp_servers: HashMap::new(),
+                mcp_oauth_credentials_store_mode: Default::default(),
                model_providers: fixture.model_provider_map.clone(),
                project_doc_max_bytes: PROJECT_DOC_MAX_BYTES,
                project_doc_fallback_filenames: Vec::new(),
@@ -1957,11 +2222,12 @@ model_verbosity = "high"
                use_experimental_unified_exec_tool: false,
                use_experimental_use_rmcp_client: false,
                include_view_image_tool: true,
+                features: Features::with_defaults(),
                active_profile: Some("o3".to_string()),
+                windows_wsl_setup_acknowledged: false,
                disable_paste_burst: false,
                tui_notifications: Default::default(),
                otel: OtelConfig::default(),
-                admin: AdminControls::default(),
            },
            o3_profile_config
        );
@@ -1998,6 +2264,7 @@ model_verbosity = "high"
            notify: None,
            cwd: fixture.cwd(),
            mcp_servers: HashMap::new(),
+            mcp_oauth_credentials_store_mode: Default::default(),
            model_providers: fixture.model_provider_map.clone(),
            project_doc_max_bytes: PROJECT_DOC_MAX_BYTES,
            project_doc_fallback_filenames: Vec::new(),
@@ -2019,11 +2286,12 @@ model_verbosity = "high"
            use_experimental_unified_exec_tool: false,
            use_experimental_use_rmcp_client: false,
            include_view_image_tool: true,
+            features: Features::with_defaults(),
            active_profile: Some("gpt3".to_string()),
+            windows_wsl_setup_acknowledged: false,
            disable_paste_burst: false,
            tui_notifications: Default::default(),
            otel: OtelConfig::default(),
-            admin: AdminControls::default(),
        };

        assert_eq!(expected_gpt3_profile_config, gpt3_profile_config);
@@ -2075,6 +2343,7 @@ model_verbosity = "high"
            notify: None,
            cwd: fixture.cwd(),
            mcp_servers: HashMap::new(),
+            mcp_oauth_credentials_store_mode: Default::default(),
            model_providers: fixture.model_provider_map.clone(),
            project_doc_max_bytes: PROJECT_DOC_MAX_BYTES,
            project_doc_fallback_filenames: Vec::new(),
@@ -2096,11 +2365,12 @@ model_verbosity = "high"
            use_experimental_unified_exec_tool: false,
            use_experimental_use_rmcp_client: false,
            include_view_image_tool: true,
+            features: Features::with_defaults(),
            active_profile: Some("zdr".to_string()),
+            windows_wsl_setup_acknowledged: false,
            disable_paste_burst: false,
            tui_notifications: Default::default(),
            otel: OtelConfig::default(),
-            admin: AdminControls::default(),
        };

        assert_eq!(expected_zdr_profile_config, zdr_profile_config);
@@ -2138,6 +2408,7 @@ model_verbosity = "high"
            notify: None,
            cwd: fixture.cwd(),
            mcp_servers: HashMap::new(),
+            mcp_oauth_credentials_store_mode: Default::default(),
            model_providers: fixture.model_provider_map.clone(),
            project_doc_max_bytes: PROJECT_DOC_MAX_BYTES,
            project_doc_fallback_filenames: Vec::new(),
@@ -2159,11 +2430,12 @@ model_verbosity = "high"
            use_experimental_unified_exec_tool: false,
            use_experimental_use_rmcp_client: false,
            include_view_image_tool: true,
+            features: Features::with_defaults(),
            active_profile: Some("gpt5".to_string()),
+            windows_wsl_setup_acknowledged: false,
            disable_paste_burst: false,
            tui_notifications: Default::default(),
            otel: OtelConfig::default(),
-            admin: AdminControls::default(),
        };

        assert_eq!(expected_gpt5_profile_config, gpt5_profile_config);
@@ -2271,6 +2543,7 @@ trust_level = "trusted"
 #[cfg(test)]
 mod notifications_tests {
    use crate::config_types::Notifications;
+    use assert_matches::assert_matches;
    use serde::Deserialize;

    #[derive(Deserialize, Debug, PartialEq)]
@@ -2290,10 +2563,7 @@ mod notifications_tests {
            notifications = true
        "#;
        let parsed: RootTomlTest = toml::from_str(toml).expect("deserialize notifications=true");
-        assert!(matches!(
-            parsed.tui.notifications,
-            Notifications::Enabled(true)
-        ));
+        assert_matches!(parsed.tui.notifications, Notifications::Enabled(true));
    }

    #[test]
@@ -2304,9 +2574,9 @@ mod notifications_tests {
        "#;
        let parsed: RootTomlTest =
            toml::from_str(toml).expect("deserialize notifications=[\"foo\"]");
-        assert!(matches!(
+        assert_matches!(
            parsed.tui.notifications,
            Notifications::Custom(ref v) if v == &vec!["foo".to_string()]
-        ));
+        );
    }
 }
--- a/codex-rs/core/src/config_profile.rs
+++ b/codex-rs/core/src/config_profile.rs
@@ -20,6 +20,18 @@ pub struct ConfigProfile {
    pub model_verbosity: Option<Verbosity>,
    pub chatgpt_base_url: Option<String>,
    pub experimental_instructions_file: Option<PathBuf>,
+    pub include_plan_tool: Option<bool>,
+    pub include_apply_patch_tool: Option<bool>,
+    pub include_view_image_tool: Option<bool>,
+    pub experimental_use_unified_exec_tool: Option<bool>,
+    pub experimental_use_exec_command_tool: Option<bool>,
+    pub experimental_use_rmcp_client: Option<bool>,
+    pub experimental_use_freeform_apply_patch: Option<bool>,
+    pub tools_web_search: Option<bool>,
+    pub tools_view_image: Option<bool>,
+    /// Optional feature toggles scoped to this profile.
+    #[serde(default)]
+    pub features: Option<crate::features::FeaturesToml>,
 }

 impl From<ConfigProfile> for codex_app_server_protocol::Profile {
--- a/codex-rs/core/src/config_types.rs
+++ b/codex-rs/core/src/config_types.rs
@@ -20,6 +20,10 @@ pub struct McpServerConfig {
    #[serde(flatten)]
    pub transport: McpServerTransportConfig,

+    /// When `false`, Codex skips initializing this MCP server.
+    #[serde(default = "default_enabled")]
+    pub enabled: bool,
+
    /// Startup timeout in seconds for initializing MCP server & initially listing tools.
    #[serde(
        default,
@@ -48,6 +52,7 @@ impl<'de> Deserialize<'de> for McpServerConfig {

            url: Option<String>,
            bearer_token: Option<String>,
+            bearer_token_env_var: Option<String>,

            #[serde(default)]
            startup_timeout_sec: Option<f64>,
@@ -55,6 +60,8 @@ impl<'de> Deserialize<'de> for McpServerConfig {
            startup_timeout_ms: Option<u64>,
            #[serde(default, with = "option_duration_secs")]
            tool_timeout_sec: Option<Duration>,
+            #[serde(default)]
+            enabled: Option<bool>,
        }

        let raw = RawMcpServerConfig::deserialize(deserializer)?;
@@ -86,11 +93,15 @@ impl<'de> Deserialize<'de> for McpServerConfig {
                args,
                env,
                url,
-                bearer_token,
+                bearer_token_env_var,
                ..
            } => {
                throw_if_set("stdio", "url", url.as_ref())?;
-                throw_if_set("stdio", "bearer_token", bearer_token.as_ref())?;
+                throw_if_set(
+                    "stdio",
+                    "bearer_token_env_var",
+                    bearer_token_env_var.as_ref(),
+                )?;
                McpServerTransportConfig::Stdio {
                    command,
                    args: args.unwrap_or_default(),
@@ -100,6 +111,7 @@ impl<'de> Deserialize<'de> for McpServerConfig {
            RawMcpServerConfig {
                url: Some(url),
                bearer_token,
+                bearer_token_env_var,
                command,
                args,
                env,
@@ -108,7 +120,11 @@ impl<'de> Deserialize<'de> for McpServerConfig {
                throw_if_set("streamable_http", "command", command.as_ref())?;
                throw_if_set("streamable_http", "args", args.as_ref())?;
                throw_if_set("streamable_http", "env", env.as_ref())?;
-                McpServerTransportConfig::StreamableHttp { url, bearer_token }
+                throw_if_set("streamable_http", "bearer_token", bearer_token.as_ref())?;
+                McpServerTransportConfig::StreamableHttp {
+                    url,
+                    bearer_token_env_var,
+                }
            }
            _ => return Err(SerdeError::custom("invalid transport")),
        };
@@ -117,10 +133,15 @@ impl<'de> Deserialize<'de> for McpServerConfig {
            transport,
            startup_timeout_sec,
            tool_timeout_sec: raw.tool_timeout_sec,
+            enabled: raw.enabled.unwrap_or_else(default_enabled),
        })
    }
 }

+const fn default_enabled() -> bool {
+    true
+}
+
 #[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
 #[serde(untagged, deny_unknown_fields, rename_all = "snake_case")]
 pub enum McpServerTransportConfig {
@@ -135,11 +156,11 @@ pub enum McpServerTransportConfig {
    /// https://modelcontextprotocol.io/specification/2025-06-18/basic/transports#streamable-http
    StreamableHttp {
        url: String,
-        /// A plain text bearer token to use for authentication.
-        /// This bearer token will be included in the HTTP request header as an `Authorization: Bearer <token>` header.
-        /// This should be used with caution because it lives on disk in clear text.
+        /// Name of the environment variable to read for an HTTP bearer token.
+        /// When set, requests will include the token via `Authorization: Bearer <token>`.
+        /// The actual secret value must be provided via the environment.
        #[serde(default, skip_serializing_if = "Option::is_none")]
-        bearer_token: Option<String>,
+        bearer_token_env_var: Option<String>,
    },
 }

@@ -450,6 +471,7 @@ mod tests {
                env: None
            }
        );
+        assert!(cfg.enabled);
    }

    #[test]
@@ -470,6 +492,7 @@ mod tests {
                env: None
            }
        );
+        assert!(cfg.enabled);
    }

    #[test]
@@ -491,6 +514,20 @@ mod tests {
                env: Some(HashMap::from([("FOO".to_string(), "BAR".to_string())]))
            }
        );
+        assert!(cfg.enabled);
+    }
+
+    #[test]
+    fn deserialize_disabled_server_config() {
+        let cfg: McpServerConfig = toml::from_str(
+            r#"
+            command = "echo"
+            enabled = false
+        "#,
+        )
+        .expect("should deserialize disabled server config");
+
+        assert!(!cfg.enabled);
    }

    #[test]
@@ -506,17 +543,18 @@ mod tests {
            cfg.transport,
            McpServerTransportConfig::StreamableHttp {
                url: "https://example.com/mcp".to_string(),
-                bearer_token: None
+                bearer_token_env_var: None
            }
        );
+        assert!(cfg.enabled);
    }

    #[test]
-    fn deserialize_streamable_http_server_config_with_bearer_token() {
+    fn deserialize_streamable_http_server_config_with_env_var() {
        let cfg: McpServerConfig = toml::from_str(
            r#"
            url = "https://example.com/mcp"
-            bearer_token = "secret"
+            bearer_token_env_var = "GITHUB_TOKEN"
        "#,
        )
        .expect("should deserialize http config");
@@ -525,9 +563,10 @@ mod tests {
            cfg.transport,
            McpServerTransportConfig::StreamableHttp {
                url: "https://example.com/mcp".to_string(),
-                bearer_token: Some("secret".to_string())
+                bearer_token_env_var: Some("GITHUB_TOKEN".to_string())
            }
        );
+        assert!(cfg.enabled);
    }

    #[test]
@@ -553,44 +592,18 @@ mod tests {
    }

    #[test]
-    fn deserialize_rejects_bearer_token_for_stdio_transport() {
-        toml::from_str::<McpServerConfig>(
+    fn deserialize_rejects_inline_bearer_token_field() {
+        let err = toml::from_str::<McpServerConfig>(
            r#"
-            command = "echo"
+            url = "https://example.com"
            bearer_token = "secret"
        "#,
        )
-        .expect_err("should reject bearer token for stdio transport");
+        .expect_err("should reject bearer_token field");
+
+        assert!(
+            err.to_string().contains("bearer_token is not supported"),
+            "unexpected error: {err}"
+        );
    }
 }
-
-#[derive(Deserialize, Debug, Clone, Default, PartialEq)]
-pub struct AdminConfigToml {
-    #[serde(default)]
-    pub disallow_danger_full_access: Option<bool>,
-
-    #[serde(default)]
-    pub allow_danger_with_reason: Option<bool>,
-
-    #[serde(default)]
-    pub audit: Option<AdminAuditToml>,
-}
-
-#[derive(Deserialize, Debug, Clone, Default, PartialEq)]
-pub struct AdminAuditToml {
-    #[serde(default)]
-    pub log_file: Option<String>,
-
-    #[serde(default)]
-    pub log_endpoint: Option<String>,
-
-    #[serde(default)]
-    pub log_events: Vec<AdminAuditEventKind>,
-}
-
-#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
-#[serde(rename_all = "lowercase")]
-pub enum AdminAuditEventKind {
-    Danger,
-    Command,
-}
--- a/codex-rs/core/src/conversation_manager.rs
+++ b/codex-rs/core/src/conversation_manager.rs
@@ -210,6 +210,7 @@ fn truncate_before_nth_user_message(history: InitialHistory, n: usize) -> Initia
 mod tests {
    use super::*;
    use crate::codex::make_session_and_context;
+    use assert_matches::assert_matches;
    use codex_protocol::models::ContentItem;
    use codex_protocol::models::ReasoningItemReasoningSummary;
    use codex_protocol::models::ResponseItem;
@@ -236,7 +237,7 @@ mod tests {

    #[test]
    fn drops_from_last_user_only() {
-        let items = vec![
+        let items = [
            user_msg("u1"),
            assistant_msg("a1"),
            assistant_msg("a2"),
@@ -283,7 +284,7 @@ mod tests {
            .map(RolloutItem::ResponseItem)
            .collect();
        let truncated2 = truncate_before_nth_user_message(InitialHistory::Forked(initial2), 2);
-        assert!(matches!(truncated2, InitialHistory::New));
+        assert_matches!(truncated2, InitialHistory::New);
    }

    #[test]
--- a/codex-rs/core/src/default_client.rs
+++ b/codex-rs/core/src/default_client.rs
@@ -20,7 +20,7 @@ use std::sync::OnceLock;
 /// The full user agent string is returned from the mcp initialize response.
 /// Parenthesis will be added by Codex. This should only specify what goes inside of the parenthesis.
 pub static USER_AGENT_SUFFIX: LazyLock<Mutex<Option<String>>> = LazyLock::new(|| Mutex::new(None));
-
+pub const DEFAULT_ORIGINATOR: &str = "codex_cli_rs";
 pub const CODEX_INTERNAL_ORIGINATOR_OVERRIDE_ENV_VAR: &str = "CODEX_INTERNAL_ORIGINATOR_OVERRIDE";
 #[derive(Debug, Clone)]
 pub struct Originator {
@@ -35,10 +35,11 @@ pub enum SetOriginatorError {
    AlreadyInitialized,
 }

-fn init_originator_from_env() -> Originator {
-    let default = "codex_cli_rs";
+fn get_originator_value(provided: Option<String>) -> Originator {
    let value = std::env::var(CODEX_INTERNAL_ORIGINATOR_OVERRIDE_ENV_VAR)
-        .unwrap_or_else(|_| default.to_string());
+        .ok()
+        .or(provided)
+        .unwrap_or(DEFAULT_ORIGINATOR.to_string());

    match HeaderValue::from_str(&value) {
        Ok(header_value) => Originator {
@@ -48,31 +49,22 @@ fn init_originator_from_env() -> Originator {
        Err(e) => {
            tracing::error!("Unable to turn originator override {value} into header value: {e}");
            Originator {
-                value: default.to_string(),
-                header_value: HeaderValue::from_static(default),
+                value: DEFAULT_ORIGINATOR.to_string(),
+                header_value: HeaderValue::from_static(DEFAULT_ORIGINATOR),
            }
        }
    }
 }

-fn build_originator(value: String) -> Result<Originator, SetOriginatorError> {
-    let header_value =
-        HeaderValue::from_str(&value).map_err(|_| SetOriginatorError::InvalidHeaderValue)?;
-    Ok(Originator {
-        value,
-        header_value,
-    })
-}
-
-pub fn set_default_originator(value: &str) -> Result<(), SetOriginatorError> {
-    let originator = build_originator(value.to_string())?;
+pub fn set_default_originator(value: String) -> Result<(), SetOriginatorError> {
+    let originator = get_originator_value(Some(value));
    ORIGINATOR
        .set(originator)
        .map_err(|_| SetOriginatorError::AlreadyInitialized)
 }

 pub fn originator() -> &'static Originator {
-    ORIGINATOR.get_or_init(init_originator_from_env)
+    ORIGINATOR.get_or_init(|| get_originator_value(None))
 }

 pub fn get_codex_user_agent() -> String {
--- a/codex-rs/core/src/error.rs
+++ b/codex-rs/core/src/error.rs
@@ -1,6 +1,7 @@
 use crate::exec::ExecToolCallOutput;
 use crate::token_data::KnownPlan;
 use crate::token_data::PlanType;
+use crate::truncate::truncate_middle;
 use codex_protocol::ConversationId;
 use codex_protocol::protocol::RateLimitSnapshot;
 use reqwest::StatusCode;
@@ -12,6 +13,9 @@ use tokio::task::JoinError;

 pub type Result<T> = std::result::Result<T, CodexErr>;

+/// Limit UI error messages to a reasonable size while keeping useful context.
+const ERROR_MESSAGE_UI_MAX_BYTES: usize = 2 * 1024; // 4 KiB
+
 #[derive(Error, Debug)]
 pub enum SandboxErr {
    /// Error from sandbox execution
@@ -55,6 +59,11 @@ pub enum CodexErr {
    #[error("stream disconnected before completion: {0}")]
    Stream(String, Option<Duration>),

+    #[error(
+        "Codex ran out of room in the model's context window. Start a new conversation or clear earlier history before retrying."
+    )]
+    ContextWindowExceeded,
+
    #[error("no conversation with id: {0}")]
    ConversationNotFound(ConversationId),

@@ -299,21 +308,44 @@ impl CodexErr {
 }

 pub fn get_error_message_ui(e: &CodexErr) -> String {
-    match e {
-        CodexErr::Sandbox(SandboxErr::Denied { output }) => output.stderr.text.clone(),
+    let message = match e {
+        CodexErr::Sandbox(SandboxErr::Denied { output }) => {
+            let aggregated = output.aggregated_output.text.trim();
+            if !aggregated.is_empty() {
+                output.aggregated_output.text.clone()
+            } else {
+                let stderr = output.stderr.text.trim();
+                let stdout = output.stdout.text.trim();
+                match (stderr.is_empty(), stdout.is_empty()) {
+                    (false, false) => format!("{stderr}\n{stdout}"),
+                    (false, true) => output.stderr.text.clone(),
+                    (true, false) => output.stdout.text.clone(),
+                    (true, true) => format!(
+                        "command failed inside sandbox with exit code {}",
+                        output.exit_code
+                    ),
+                }
+            }
+        }
        // Timeouts are not sandbox errors from a UX perspective; present them plainly
-        CodexErr::Sandbox(SandboxErr::Timeout { output }) => format!(
-            "error: command timed out after {} ms",
-            output.duration.as_millis()
-        ),
+        CodexErr::Sandbox(SandboxErr::Timeout { output }) => {
+            format!(
+                "error: command timed out after {} ms",
+                output.duration.as_millis()
+            )
+        }
        _ => e.to_string(),
-    }
+    };
+
+    truncate_middle(&message, ERROR_MESSAGE_UI_MAX_BYTES).0
 }

 #[cfg(test)]
 mod tests {
    use super::*;
+    use crate::exec::StreamOutput;
    use codex_protocol::protocol::RateLimitWindow;
+    use pretty_assertions::assert_eq;

    fn rate_limit_snapshot() -> RateLimitSnapshot {
        RateLimitSnapshot {
@@ -343,6 +375,73 @@ mod tests {
        );
    }

+    #[test]
+    fn sandbox_denied_uses_aggregated_output_when_stderr_empty() {
+        let output = ExecToolCallOutput {
+            exit_code: 77,
+            stdout: StreamOutput::new(String::new()),
+            stderr: StreamOutput::new(String::new()),
+            aggregated_output: StreamOutput::new("aggregate detail".to_string()),
+            duration: Duration::from_millis(10),
+            timed_out: false,
+        };
+        let err = CodexErr::Sandbox(SandboxErr::Denied {
+            output: Box::new(output),
+        });
+        assert_eq!(get_error_message_ui(&err), "aggregate detail");
+    }
+
+    #[test]
+    fn sandbox_denied_reports_both_streams_when_available() {
+        let output = ExecToolCallOutput {
+            exit_code: 9,
+            stdout: StreamOutput::new("stdout detail".to_string()),
+            stderr: StreamOutput::new("stderr detail".to_string()),
+            aggregated_output: StreamOutput::new(String::new()),
+            duration: Duration::from_millis(10),
+            timed_out: false,
+        };
+        let err = CodexErr::Sandbox(SandboxErr::Denied {
+            output: Box::new(output),
+        });
+        assert_eq!(get_error_message_ui(&err), "stderr detail\nstdout detail");
+    }
+
+    #[test]
+    fn sandbox_denied_reports_stdout_when_no_stderr() {
+        let output = ExecToolCallOutput {
+            exit_code: 11,
+            stdout: StreamOutput::new("stdout only".to_string()),
+            stderr: StreamOutput::new(String::new()),
+            aggregated_output: StreamOutput::new(String::new()),
+            duration: Duration::from_millis(8),
+            timed_out: false,
+        };
+        let err = CodexErr::Sandbox(SandboxErr::Denied {
+            output: Box::new(output),
+        });
+        assert_eq!(get_error_message_ui(&err), "stdout only");
+    }
+
+    #[test]
+    fn sandbox_denied_reports_exit_code_when_no_output_available() {
+        let output = ExecToolCallOutput {
+            exit_code: 13,
+            stdout: StreamOutput::new(String::new()),
+            stderr: StreamOutput::new(String::new()),
+            aggregated_output: StreamOutput::new(String::new()),
+            duration: Duration::from_millis(5),
+            timed_out: false,
+        };
+        let err = CodexErr::Sandbox(SandboxErr::Denied {
+            output: Box::new(output),
+        });
+        assert_eq!(
+            get_error_message_ui(&err),
+            "command failed inside sandbox with exit code 13"
+        );
+    }
+
    #[test]
    fn usage_limit_reached_error_formats_free_plan() {
        let err = UsageLimitReachedError {
--- a/codex-rs/core/src/event_mapping.rs
+++ b/codex-rs/core/src/event_mapping.rs
@@ -127,6 +127,7 @@ mod tests {
    use super::map_response_item_to_event_messages;
    use crate::protocol::EventMsg;
    use crate::protocol::InputMessageKind;
+    use assert_matches::assert_matches;
    use codex_protocol::models::ContentItem;
    use codex_protocol::models::ResponseItem;
    use pretty_assertions::assert_eq;
@@ -158,7 +159,7 @@ mod tests {
        match &events[0] {
            EventMsg::UserMessage(user) => {
                assert_eq!(user.message, "Hello world");
-                assert!(matches!(user.kind, Some(InputMessageKind::Plain)));
+                assert_matches!(user.kind, Some(InputMessageKind::Plain));
                assert_eq!(user.images, Some(vec![img1, img2]));
            }
            other => panic!("expected UserMessage, got {other:?}"),
--- a/codex-rs/core/src/exec.rs
+++ b/codex-rs/core/src/exec.rs
@@ -27,7 +27,6 @@ use crate::protocol::SandboxPolicy;
 use crate::seatbelt::spawn_command_under_seatbelt;
 use crate::spawn::StdioPolicy;
 use crate::spawn::spawn_child_async;
-use serde::Serialize;

 const DEFAULT_TIMEOUT_MS: u64 = 10_000;

@@ -62,8 +61,7 @@ impl ExecParams {
    }
 }

-#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize)]
-#[serde(rename_all = "kebab-case")]
+#[derive(Clone, Copy, Debug, PartialEq)]
 pub enum SandboxType {
    None,

@@ -179,7 +177,7 @@ pub async fn process_exec_tool_call(
                }));
            }

-            if exit_code != 0 && is_likely_sandbox_denied(sandbox_type, exit_code) {
+            if is_likely_sandbox_denied(sandbox_type, &exec_output) {
                return Err(CodexErr::Sandbox(SandboxErr::Denied {
                    output: Box::new(exec_output),
                }));
@@ -197,21 +195,57 @@ pub async fn process_exec_tool_call(
 /// We don't have a fully deterministic way to tell if our command failed
 /// because of the sandbox - a command in the user's zshrc file might hit an
 /// error, but the command itself might fail or succeed for other reasons.
-/// For now, we conservatively check for 'command not found' (exit code 127),
-/// and can add additional cases as necessary.
-fn is_likely_sandbox_denied(sandbox_type: SandboxType, exit_code: i32) -> bool {
-    if sandbox_type == SandboxType::None {
+/// For now, we conservatively check for well known command failure exit codes and
+/// also look for common sandbox denial keywords in the command output.
+fn is_likely_sandbox_denied(sandbox_type: SandboxType, exec_output: &ExecToolCallOutput) -> bool {
+    if sandbox_type == SandboxType::None || exec_output.exit_code == 0 {
        return false;
    }

    // Quick rejects: well-known non-sandbox shell exit codes
-    // 127: command not found, 2: misuse of shell builtins
-    if exit_code == 127 {
+    // 2: misuse of shell builtins
+    // 126: permission denied
+    // 127: command not found
+    const QUICK_REJECT_EXIT_CODES: [i32; 3] = [2, 126, 127];
+    if QUICK_REJECT_EXIT_CODES.contains(&exec_output.exit_code) {
        return false;
    }

-    // For all other cases, we assume the sandbox is the cause
-    true
+    const SANDBOX_DENIED_KEYWORDS: [&str; 6] = [
+        "operation not permitted",
+        "permission denied",
+        "read-only file system",
+        "seccomp",
+        "sandbox",
+        "landlock",
+    ];
+
+    if [
+        &exec_output.stderr.text,
+        &exec_output.stdout.text,
+        &exec_output.aggregated_output.text,
+    ]
+    .into_iter()
+    .any(|section| {
+        let lower = section.to_lowercase();
+        SANDBOX_DENIED_KEYWORDS
+            .iter()
+            .any(|needle| lower.contains(needle))
+    }) {
+        return true;
+    }
+
+    #[cfg(unix)]
+    {
+        const SIGSYS_CODE: i32 = libc::SIGSYS;
+        if sandbox_type == SandboxType::LinuxSeccomp
+            && exec_output.exit_code == EXIT_CODE_SIGNAL_BASE + SIGSYS_CODE
+        {
+            return true;
+        }
+    }
+
+    false
 }

 #[derive(Debug)]
@@ -438,3 +472,77 @@ fn synthetic_exit_status(code: i32) -> ExitStatus {
    #[expect(clippy::unwrap_used)]
    std::process::ExitStatus::from_raw(code.try_into().unwrap())
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::time::Duration;
+
+    fn make_exec_output(
+        exit_code: i32,
+        stdout: &str,
+        stderr: &str,
+        aggregated: &str,
+    ) -> ExecToolCallOutput {
+        ExecToolCallOutput {
+            exit_code,
+            stdout: StreamOutput::new(stdout.to_string()),
+            stderr: StreamOutput::new(stderr.to_string()),
+            aggregated_output: StreamOutput::new(aggregated.to_string()),
+            duration: Duration::from_millis(1),
+            timed_out: false,
+        }
+    }
+
+    #[test]
+    fn sandbox_detection_requires_keywords() {
+        let output = make_exec_output(1, "", "", "");
+        assert!(!is_likely_sandbox_denied(
+            SandboxType::LinuxSeccomp,
+            &output
+        ));
+    }
+
+    #[test]
+    fn sandbox_detection_identifies_keyword_in_stderr() {
+        let output = make_exec_output(1, "", "Operation not permitted", "");
+        assert!(is_likely_sandbox_denied(SandboxType::LinuxSeccomp, &output));
+    }
+
+    #[test]
+    fn sandbox_detection_respects_quick_reject_exit_codes() {
+        let output = make_exec_output(127, "", "command not found", "");
+        assert!(!is_likely_sandbox_denied(
+            SandboxType::LinuxSeccomp,
+            &output
+        ));
+    }
+
+    #[test]
+    fn sandbox_detection_ignores_non_sandbox_mode() {
+        let output = make_exec_output(1, "", "Operation not permitted", "");
+        assert!(!is_likely_sandbox_denied(SandboxType::None, &output));
+    }
+
+    #[test]
+    fn sandbox_detection_uses_aggregated_output() {
+        let output = make_exec_output(
+            101,
+            "",
+            "",
+            "cargo failed: Read-only file system when writing target",
+        );
+        assert!(is_likely_sandbox_denied(
+            SandboxType::MacosSeatbelt,
+            &output
+        ));
+    }
+
+    #[cfg(unix)]
+    #[test]
+    fn sandbox_detection_flags_sigsys_exit_code() {
+        let exit_code = EXIT_CODE_SIGNAL_BASE + libc::SIGSYS;
+        let output = make_exec_output(exit_code, "", "", "");
+        assert!(is_likely_sandbox_denied(SandboxType::LinuxSeccomp, &output));
+    }
+}
--- a/codex-rs/core/src/executor/backends.rs
+++ b/codex-rs/core/src/executor/backends.rs
@@ -6,6 +6,7 @@ use async_trait::async_trait;
 use crate::CODEX_APPLY_PATCH_ARG1;
 use crate::apply_patch::ApplyPatchExec;
 use crate::exec::ExecParams;
+use crate::executor::ExecutorConfig;
 use crate::function_tool::FunctionCallError;

 pub(crate) enum ExecutionMode {
@@ -22,6 +23,7 @@ pub(crate) trait ExecutionBackend: Send + Sync {
        params: ExecParams,
        // Required for downcasting the apply_patch.
        mode: &ExecutionMode,
+        config: &ExecutorConfig,
    ) -> Result<ExecParams, FunctionCallError>;

    fn stream_stdout(&self, _mode: &ExecutionMode) -> bool {
@@ -47,6 +49,7 @@ impl ExecutionBackend for ShellBackend {
        &self,
        params: ExecParams,
        mode: &ExecutionMode,
+        _config: &ExecutorConfig,
    ) -> Result<ExecParams, FunctionCallError> {
        match mode {
            ExecutionMode::Shell => Ok(params),
@@ -65,17 +68,22 @@ impl ExecutionBackend for ApplyPatchBackend {
        &self,
        params: ExecParams,
        mode: &ExecutionMode,
+        config: &ExecutorConfig,
    ) -> Result<ExecParams, FunctionCallError> {
        match mode {
            ExecutionMode::ApplyPatch(exec) => {
-                let path_to_codex = env::current_exe()
-                    .ok()
-                    .map(|p| p.to_string_lossy().to_string())
-                    .ok_or_else(|| {
-                        FunctionCallError::RespondToModel(
-                            "failed to determine path to codex executable".to_string(),
-                        )
-                    })?;
+                let path_to_codex = if let Some(exe_path) = &config.codex_exe {
+                    exe_path.to_string_lossy().to_string()
+                } else {
+                    env::current_exe()
+                        .ok()
+                        .map(|p| p.to_string_lossy().to_string())
+                        .ok_or_else(|| {
+                            FunctionCallError::RespondToModel(
+                                "failed to determine path to codex executable".to_string(),
+                            )
+                        })?
+                };

                let patch = exec.action.patch.clone();
                Ok(ExecParams {
--- a/codex-rs/core/src/executor/runner.rs
+++ b/codex-rs/core/src/executor/runner.rs
@@ -6,11 +6,7 @@ use std::time::Duration;
 use super::backends::ExecutionMode;
 use super::backends::backend_for_mode;
 use super::cache::ApprovalCache;
-use crate::admin_controls::AdminAuditConfig;
-use crate::admin_controls::build_command_audit_payload;
-use crate::admin_controls::log_admin_event;
 use crate::codex::Session;
-use crate::config_types::AdminAuditEventKind;
 use crate::error::CodexErr;
 use crate::error::SandboxErr;
 use crate::error::get_error_message_ui;
@@ -34,22 +30,19 @@ use codex_otel::otel_event_manager::ToolDecisionSource;
 pub(crate) struct ExecutorConfig {
    pub(crate) sandbox_policy: SandboxPolicy,
    pub(crate) sandbox_cwd: PathBuf,
-    codex_linux_sandbox_exe: Option<PathBuf>,
-    pub(crate) admin_audit: Option<AdminAuditConfig>,
+    pub(crate) codex_exe: Option<PathBuf>,
 }

 impl ExecutorConfig {
    pub(crate) fn new(
        sandbox_policy: SandboxPolicy,
        sandbox_cwd: PathBuf,
-        codex_linux_sandbox_exe: Option<PathBuf>,
-        admin_audit: Option<AdminAuditConfig>,
+        codex_exe: Option<PathBuf>,
    ) -> Self {
        Self {
            sandbox_policy,
            sandbox_cwd,
-            codex_linux_sandbox_exe,
-            admin_audit,
+            codex_exe,
        }
    }
 }
@@ -93,7 +86,14 @@ impl Executor {
                maybe_translate_shell_command(request.params, session, request.use_shell_profile);
        }

-        // Step 1: Normalise parameters via the selected backend.
+        // Step 1: Snapshot sandbox configuration so it stays stable for this run.
+        let config = self
+            .config
+            .read()
+            .map_err(|_| ExecError::rejection("executor config poisoned"))?
+            .clone();
+
+        // Step 2: Normalise parameters via the selected backend.
        let backend = backend_for_mode(&request.mode);
        let stdout_stream = if backend.stream_stdout(&request.mode) {
            request.stdout_stream.clone()
@@ -101,16 +101,9 @@ impl Executor {
            None
        };
        request.params = backend
-            .prepare(request.params, &request.mode)
+            .prepare(request.params, &request.mode, &config)
            .map_err(ExecError::from)?;

-        // Step 2: Snapshot sandbox configuration so it stays stable for this run.
-        let config = self
-            .config
-            .read()
-            .map_err(|_| ExecError::rejection("executor config poisoned"))?
-            .clone();
-
        // Step 3: Decide sandbox placement, prompting for approval when needed.
        let sandbox_decision = select_sandbox(
            &request,
@@ -229,23 +222,12 @@ impl Executor {
        config: &ExecutorConfig,
        stdout_stream: Option<StdoutStream>,
    ) -> Result<ExecToolCallOutput, CodexErr> {
-        if let Some(admin_audit) = config.admin_audit.as_ref()
-            && admin_audit.should_log(AdminAuditEventKind::Command)
-        {
-            let payload = build_command_audit_payload(
-                &params,
-                sandbox,
-                &config.sandbox_policy,
-                &config.sandbox_cwd,
-            );
-            log_admin_event(admin_audit, payload);
-        }
        process_exec_tool_call(
            params,
            sandbox,
            &config.sandbox_policy,
            &config.sandbox_cwd,
-            &config.codex_linux_sandbox_exe,
+            &config.codex_exe,
            stdout_stream,
        )
        .await
@@ -398,6 +380,23 @@ mod tests {
        assert_eq!(message, "failed in sandbox: sandbox stderr");
    }

+    #[test]
+    fn sandbox_failure_message_falls_back_to_aggregated_output() {
+        let output = ExecToolCallOutput {
+            exit_code: 101,
+            stdout: StreamOutput::new(String::new()),
+            stderr: StreamOutput::new(String::new()),
+            aggregated_output: StreamOutput::new("aggregate text".to_string()),
+            duration: Duration::from_millis(10),
+            timed_out: false,
+        };
+        let err = SandboxErr::Denied {
+            output: Box::new(output),
+        };
+        let message = sandbox_failure_message(err);
+        assert_eq!(message, "failed in sandbox: aggregate text");
+    }
+
    #[test]
    fn normalize_function_error_synthesizes_payload() {
        let err = FunctionCallError::RespondToModel("boom".to_string());
--- a/codex-rs/core/src/executor/sandbox.rs
+++ b/codex-rs/core/src/executor/sandbox.rs
@@ -207,7 +207,7 @@ mod tests {
            action,
            user_explicitly_approved_this_action: true,
        };
-        let cfg = ExecutorConfig::new(SandboxPolicy::ReadOnly, std::env::temp_dir(), None, None);
+        let cfg = ExecutorConfig::new(SandboxPolicy::ReadOnly, std::env::temp_dir(), None);
        let request = ExecutionRequest {
            params: ExecParams {
                command: vec!["apply_patch".into()],
@@ -250,12 +250,7 @@ mod tests {
            action,
            user_explicitly_approved_this_action: false,
        };
-        let cfg = ExecutorConfig::new(
-            SandboxPolicy::DangerFullAccess,
-            std::env::temp_dir(),
-            None,
-            None,
-        );
+        let cfg = ExecutorConfig::new(SandboxPolicy::DangerFullAccess, std::env::temp_dir(), None);
        let request = ExecutionRequest {
            params: ExecParams {
                command: vec!["apply_patch".into()],
@@ -299,7 +294,7 @@ mod tests {
            action,
            user_explicitly_approved_this_action: false,
        };
-        let cfg = ExecutorConfig::new(SandboxPolicy::ReadOnly, std::env::temp_dir(), None, None);
+        let cfg = ExecutorConfig::new(SandboxPolicy::ReadOnly, std::env::temp_dir(), None);
        let request = ExecutionRequest {
            params: ExecParams {
                command: vec!["apply_patch".into()],
@@ -338,12 +333,7 @@ mod tests {
    #[tokio::test]
    async fn select_shell_autoapprove_in_danger_mode() {
        let (session, ctx) = make_session_and_context();
-        let cfg = ExecutorConfig::new(
-            SandboxPolicy::DangerFullAccess,
-            std::env::temp_dir(),
-            None,
-            None,
-        );
+        let cfg = ExecutorConfig::new(SandboxPolicy::DangerFullAccess, std::env::temp_dir(), None);
        let request = ExecutionRequest {
            params: ExecParams {
                command: vec!["some-unknown".into()],
@@ -379,7 +369,7 @@ mod tests {
    #[tokio::test]
    async fn select_shell_escalates_on_failure_with_platform_sandbox() {
        let (session, ctx) = make_session_and_context();
-        let cfg = ExecutorConfig::new(SandboxPolicy::ReadOnly, std::env::temp_dir(), None, None);
+        let cfg = ExecutorConfig::new(SandboxPolicy::ReadOnly, std::env::temp_dir(), None);
        let request = ExecutionRequest {
            params: ExecParams {
                // Unknown command => untrusted but not flagged dangerous
--- a/codex-rs/core/src/features.rs
+++ b/codex-rs/core/src/features.rs
@@ -0,0 +1,250 @@
+//! Centralized feature flags and metadata.
+//!
+//! This module defines a small set of toggles that gate experimental and
+//! optional behavior across the codebase. Instead of wiring individual
+//! booleans through multiple types, call sites consult a single `Features`
+//! container attached to `Config`.
+
+use crate::config::ConfigToml;
+use crate::config_profile::ConfigProfile;
+use serde::Deserialize;
+use std::collections::BTreeMap;
+use std::collections::BTreeSet;
+
+mod legacy;
+pub(crate) use legacy::LegacyFeatureToggles;
+
+/// High-level lifecycle stage for a feature.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum Stage {
+    Experimental,
+    Beta,
+    Stable,
+    Deprecated,
+    Removed,
+}
+
+/// Unique features toggled via configuration.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub enum Feature {
+    /// Use the single unified PTY-backed exec tool.
+    UnifiedExec,
+    /// Use the streamable exec-command/write-stdin tool pair.
+    StreamableShell,
+    /// Use the official Rust MCP client (rmcp).
+    RmcpClient,
+    /// Include the plan tool.
+    PlanTool,
+    /// Include the freeform apply_patch tool.
+    ApplyPatchFreeform,
+    /// Include the view_image tool.
+    ViewImageTool,
+    /// Allow the model to request web searches.
+    WebSearchRequest,
+}
+
+impl Feature {
+    pub fn key(self) -> &'static str {
+        self.info().key
+    }
+
+    pub fn stage(self) -> Stage {
+        self.info().stage
+    }
+
+    pub fn default_enabled(self) -> bool {
+        self.info().default_enabled
+    }
+
+    fn info(self) -> &'static FeatureSpec {
+        FEATURES
+            .iter()
+            .find(|spec| spec.id == self)
+            .unwrap_or_else(|| unreachable!("missing FeatureSpec for {:?}", self))
+    }
+}
+
+/// Holds the effective set of enabled features.
+#[derive(Debug, Clone, Default, PartialEq)]
+pub struct Features {
+    enabled: BTreeSet<Feature>,
+}
+
+#[derive(Debug, Clone, Default)]
+pub struct FeatureOverrides {
+    pub include_plan_tool: Option<bool>,
+    pub include_apply_patch_tool: Option<bool>,
+    pub include_view_image_tool: Option<bool>,
+    pub web_search_request: Option<bool>,
+}
+
+impl FeatureOverrides {
+    fn apply(self, features: &mut Features) {
+        LegacyFeatureToggles {
+            include_plan_tool: self.include_plan_tool,
+            include_apply_patch_tool: self.include_apply_patch_tool,
+            include_view_image_tool: self.include_view_image_tool,
+            tools_web_search: self.web_search_request,
+            ..Default::default()
+        }
+        .apply(features);
+    }
+}
+
+impl Features {
+    /// Starts with built-in defaults.
+    pub fn with_defaults() -> Self {
+        let mut set = BTreeSet::new();
+        for spec in FEATURES {
+            if spec.default_enabled {
+                set.insert(spec.id);
+            }
+        }
+        Self { enabled: set }
+    }
+
+    pub fn enabled(&self, f: Feature) -> bool {
+        self.enabled.contains(&f)
+    }
+
+    pub fn enable(&mut self, f: Feature) {
+        self.enabled.insert(f);
+    }
+
+    pub fn disable(&mut self, f: Feature) {
+        self.enabled.remove(&f);
+    }
+
+    /// Apply a table of key -> bool toggles (e.g. from TOML).
+    pub fn apply_map(&mut self, m: &BTreeMap<String, bool>) {
+        for (k, v) in m {
+            match feature_for_key(k) {
+                Some(feat) => {
+                    if *v {
+                        self.enable(feat);
+                    } else {
+                        self.disable(feat);
+                    }
+                }
+                None => {
+                    tracing::warn!("unknown feature key in config: {k}");
+                }
+            }
+        }
+    }
+
+    pub fn from_config(
+        cfg: &ConfigToml,
+        config_profile: &ConfigProfile,
+        overrides: FeatureOverrides,
+    ) -> Self {
+        let mut features = Features::with_defaults();
+
+        let base_legacy = LegacyFeatureToggles {
+            experimental_use_freeform_apply_patch: cfg.experimental_use_freeform_apply_patch,
+            experimental_use_exec_command_tool: cfg.experimental_use_exec_command_tool,
+            experimental_use_unified_exec_tool: cfg.experimental_use_unified_exec_tool,
+            experimental_use_rmcp_client: cfg.experimental_use_rmcp_client,
+            tools_web_search: cfg.tools.as_ref().and_then(|t| t.web_search),
+            tools_view_image: cfg.tools.as_ref().and_then(|t| t.view_image),
+            ..Default::default()
+        };
+        base_legacy.apply(&mut features);
+
+        if let Some(base_features) = cfg.features.as_ref() {
+            features.apply_map(&base_features.entries);
+        }
+
+        let profile_legacy = LegacyFeatureToggles {
+            include_plan_tool: config_profile.include_plan_tool,
+            include_apply_patch_tool: config_profile.include_apply_patch_tool,
+            include_view_image_tool: config_profile.include_view_image_tool,
+            experimental_use_freeform_apply_patch: config_profile
+                .experimental_use_freeform_apply_patch,
+            experimental_use_exec_command_tool: config_profile.experimental_use_exec_command_tool,
+            experimental_use_unified_exec_tool: config_profile.experimental_use_unified_exec_tool,
+            experimental_use_rmcp_client: config_profile.experimental_use_rmcp_client,
+            tools_web_search: config_profile.tools_web_search,
+            tools_view_image: config_profile.tools_view_image,
+        };
+        profile_legacy.apply(&mut features);
+        if let Some(profile_features) = config_profile.features.as_ref() {
+            features.apply_map(&profile_features.entries);
+        }
+
+        overrides.apply(&mut features);
+
+        features
+    }
+}
+
+/// Keys accepted in `[features]` tables.
+fn feature_for_key(key: &str) -> Option<Feature> {
+    for spec in FEATURES {
+        if spec.key == key {
+            return Some(spec.id);
+        }
+    }
+    legacy::feature_for_key(key)
+}
+
+/// Deserializable features table for TOML.
+#[derive(Deserialize, Debug, Clone, Default, PartialEq)]
+pub struct FeaturesToml {
+    #[serde(flatten)]
+    pub entries: BTreeMap<String, bool>,
+}
+
+/// Single, easy-to-read registry of all feature definitions.
+#[derive(Debug, Clone, Copy)]
+pub struct FeatureSpec {
+    pub id: Feature,
+    pub key: &'static str,
+    pub stage: Stage,
+    pub default_enabled: bool,
+}
+
+pub const FEATURES: &[FeatureSpec] = &[
+    FeatureSpec {
+        id: Feature::UnifiedExec,
+        key: "unified_exec",
+        stage: Stage::Experimental,
+        default_enabled: false,
+    },
+    FeatureSpec {
+        id: Feature::StreamableShell,
+        key: "streamable_shell",
+        stage: Stage::Experimental,
+        default_enabled: false,
+    },
+    FeatureSpec {
+        id: Feature::RmcpClient,
+        key: "rmcp_client",
+        stage: Stage::Experimental,
+        default_enabled: false,
+    },
+    FeatureSpec {
+        id: Feature::PlanTool,
+        key: "plan_tool",
+        stage: Stage::Stable,
+        default_enabled: false,
+    },
+    FeatureSpec {
+        id: Feature::ApplyPatchFreeform,
+        key: "apply_patch_freeform",
+        stage: Stage::Beta,
+        default_enabled: false,
+    },
+    FeatureSpec {
+        id: Feature::ViewImageTool,
+        key: "view_image_tool",
+        stage: Stage::Stable,
+        default_enabled: true,
+    },
+    FeatureSpec {
+        id: Feature::WebSearchRequest,
+        key: "web_search_request",
+        stage: Stage::Stable,
+        default_enabled: false,
+    },
+];
--- a/codex-rs/core/src/features/legacy.rs
+++ b/codex-rs/core/src/features/legacy.rs
@@ -0,0 +1,158 @@
+use super::Feature;
+use super::Features;
+use tracing::info;
+
+#[derive(Clone, Copy)]
+struct Alias {
+    legacy_key: &'static str,
+    feature: Feature,
+}
+
+const ALIASES: &[Alias] = &[
+    Alias {
+        legacy_key: "experimental_use_unified_exec_tool",
+        feature: Feature::UnifiedExec,
+    },
+    Alias {
+        legacy_key: "experimental_use_exec_command_tool",
+        feature: Feature::StreamableShell,
+    },
+    Alias {
+        legacy_key: "experimental_use_rmcp_client",
+        feature: Feature::RmcpClient,
+    },
+    Alias {
+        legacy_key: "experimental_use_freeform_apply_patch",
+        feature: Feature::ApplyPatchFreeform,
+    },
+    Alias {
+        legacy_key: "include_apply_patch_tool",
+        feature: Feature::ApplyPatchFreeform,
+    },
+    Alias {
+        legacy_key: "include_plan_tool",
+        feature: Feature::PlanTool,
+    },
+    Alias {
+        legacy_key: "include_view_image_tool",
+        feature: Feature::ViewImageTool,
+    },
+    Alias {
+        legacy_key: "web_search",
+        feature: Feature::WebSearchRequest,
+    },
+];
+
+pub(crate) fn feature_for_key(key: &str) -> Option<Feature> {
+    ALIASES
+        .iter()
+        .find(|alias| alias.legacy_key == key)
+        .map(|alias| {
+            log_alias(alias.legacy_key, alias.feature);
+            alias.feature
+        })
+}
+
+#[derive(Debug, Default)]
+pub struct LegacyFeatureToggles {
+    pub include_plan_tool: Option<bool>,
+    pub include_apply_patch_tool: Option<bool>,
+    pub include_view_image_tool: Option<bool>,
+    pub experimental_use_freeform_apply_patch: Option<bool>,
+    pub experimental_use_exec_command_tool: Option<bool>,
+    pub experimental_use_unified_exec_tool: Option<bool>,
+    pub experimental_use_rmcp_client: Option<bool>,
+    pub tools_web_search: Option<bool>,
+    pub tools_view_image: Option<bool>,
+}
+
+impl LegacyFeatureToggles {
+    pub fn apply(self, features: &mut Features) {
+        set_if_some(
+            features,
+            Feature::PlanTool,
+            self.include_plan_tool,
+            "include_plan_tool",
+        );
+        set_if_some(
+            features,
+            Feature::ApplyPatchFreeform,
+            self.include_apply_patch_tool,
+            "include_apply_patch_tool",
+        );
+        set_if_some(
+            features,
+            Feature::ApplyPatchFreeform,
+            self.experimental_use_freeform_apply_patch,
+            "experimental_use_freeform_apply_patch",
+        );
+        set_if_some(
+            features,
+            Feature::StreamableShell,
+            self.experimental_use_exec_command_tool,
+            "experimental_use_exec_command_tool",
+        );
+        set_if_some(
+            features,
+            Feature::UnifiedExec,
+            self.experimental_use_unified_exec_tool,
+            "experimental_use_unified_exec_tool",
+        );
+        set_if_some(
+            features,
+            Feature::RmcpClient,
+            self.experimental_use_rmcp_client,
+            "experimental_use_rmcp_client",
+        );
+        set_if_some(
+            features,
+            Feature::WebSearchRequest,
+            self.tools_web_search,
+            "tools.web_search",
+        );
+        set_if_some(
+            features,
+            Feature::ViewImageTool,
+            self.include_view_image_tool,
+            "include_view_image_tool",
+        );
+        set_if_some(
+            features,
+            Feature::ViewImageTool,
+            self.tools_view_image,
+            "tools.view_image",
+        );
+    }
+}
+
+fn set_if_some(
+    features: &mut Features,
+    feature: Feature,
+    maybe_value: Option<bool>,
+    alias_key: &'static str,
+) {
+    if let Some(enabled) = maybe_value {
+        set_feature(features, feature, enabled);
+        log_alias(alias_key, feature);
+    }
+}
+
+fn set_feature(features: &mut Features, feature: Feature, enabled: bool) {
+    if enabled {
+        features.enable(feature);
+    } else {
+        features.disable(feature);
+    }
+}
+
+fn log_alias(alias: &str, feature: Feature) {
+    let canonical = feature.key();
+    if alias == canonical {
+        return;
+    }
+    info!(
+        %alias,
+        canonical,
+        "legacy feature toggle detected; prefer `[features].{canonical}`"
+    );
+}
--- a/codex-rs/core/src/lib.rs
+++ b/codex-rs/core/src/lib.rs
@@ -5,13 +5,14 @@
 // the TUI or the tracing stack).
 #![deny(clippy::print_stdout, clippy::print_stderr)]

-pub mod admin_controls;
 mod apply_patch;
 pub mod auth;
 pub mod bash;
 mod chat_completions;
 mod client;
 mod client_common;
+mod codebase_change_notice;
+mod codebase_snapshot;
 pub mod codex;
 mod codex_conversation;
 pub mod token_data;
@@ -30,15 +31,16 @@ pub mod exec;
 mod exec_command;
 pub mod exec_env;
 pub mod executor;
+pub mod features;
 mod flags;
 pub mod git_info;
 pub mod landlock;
+pub mod mcp;
 mod mcp_connection_manager;
 mod mcp_tool_call;
 mod message_history;
 mod model_provider_info;
 pub mod parse_command;
-mod path_utils;
 mod truncate;
 mod unified_exec;
 mod user_instructions;
--- a/codex-rs/core/src/mcp/auth.rs
+++ b/codex-rs/core/src/mcp/auth.rs
@@ -0,0 +1,58 @@
+use std::collections::HashMap;
+
+use anyhow::Result;
+use codex_protocol::protocol::McpAuthStatus;
+use codex_rmcp_client::OAuthCredentialsStoreMode;
+use codex_rmcp_client::determine_streamable_http_auth_status;
+use futures::future::join_all;
+use tracing::warn;
+
+use crate::config_types::McpServerConfig;
+use crate::config_types::McpServerTransportConfig;
+
+pub async fn compute_auth_statuses<'a, I>(
+    servers: I,
+    store_mode: OAuthCredentialsStoreMode,
+) -> HashMap<String, McpAuthStatus>
+where
+    I: IntoIterator<Item = (&'a String, &'a McpServerConfig)>,
+{
+    let futures = servers.into_iter().map(|(name, config)| {
+        let name = name.clone();
+        let config = config.clone();
+        async move {
+            let status = match compute_auth_status(&name, &config, store_mode).await {
+                Ok(status) => status,
+                Err(error) => {
+                    warn!("failed to determine auth status for MCP server `{name}`: {error:?}");
+                    McpAuthStatus::Unsupported
+                }
+            };
+            (name, status)
+        }
+    });
+
+    join_all(futures).await.into_iter().collect()
+}
+
+async fn compute_auth_status(
+    server_name: &str,
+    config: &McpServerConfig,
+    store_mode: OAuthCredentialsStoreMode,
+) -> Result<McpAuthStatus> {
+    match &config.transport {
+        McpServerTransportConfig::Stdio { .. } => Ok(McpAuthStatus::Unsupported),
+        McpServerTransportConfig::StreamableHttp {
+            url,
+            bearer_token_env_var,
+        } => {
+            determine_streamable_http_auth_status(
+                server_name,
+                url,
+                bearer_token_env_var.as_deref(),
+                store_mode,
+            )
+            .await
+        }
+    }
+}
--- a/codex-rs/core/src/mcp/mod.rs
+++ b/codex-rs/core/src/mcp/mod.rs
@@ -0,0 +1 @@
+pub mod auth;
--- a/codex-rs/core/src/mcp_connection_manager.rs
+++ b/codex-rs/core/src/mcp_connection_manager.rs
@@ -8,6 +8,7 @@

 use std::collections::HashMap;
 use std::collections::HashSet;
+use std::env;
 use std::ffi::OsString;
 use std::sync::Arc;
 use std::time::Duration;
@@ -16,6 +17,7 @@ use anyhow::Context;
 use anyhow::Result;
 use anyhow::anyhow;
 use codex_mcp_client::McpClient;
+use codex_rmcp_client::OAuthCredentialsStoreMode;
 use codex_rmcp_client::RmcpClient;
 use mcp_types::ClientCapabilities;
 use mcp_types::Implementation;
@@ -108,9 +110,6 @@ impl McpClientAdapter {
        params: mcp_types::InitializeRequestParams,
        startup_timeout: Duration,
    ) -> Result<Self> {
-        info!(
-            "new_stdio_client use_rmcp_client: {use_rmcp_client} program: {program:?} args: {args:?} env: {env:?} params: {params:?} startup_timeout: {startup_timeout:?}"
-        );
        if use_rmcp_client {
            let client = Arc::new(RmcpClient::new_stdio_client(program, args, env).await?);
            client.initialize(params, Some(startup_timeout)).await?;
@@ -128,9 +127,11 @@ impl McpClientAdapter {
        bearer_token: Option<String>,
        params: mcp_types::InitializeRequestParams,
        startup_timeout: Duration,
+        store_mode: OAuthCredentialsStoreMode,
    ) -> Result<Self> {
        let client = Arc::new(
-            RmcpClient::new_streamable_http_client(&server_name, &url, bearer_token).await?,
+            RmcpClient::new_streamable_http_client(&server_name, &url, bearer_token, store_mode)
+                .await?,
        );
        client.initialize(params, Some(startup_timeout)).await?;
        Ok(McpClientAdapter::Rmcp(client))
@@ -185,6 +186,7 @@ impl McpConnectionManager {
    pub async fn new(
        mcp_servers: HashMap<String, McpServerConfig>,
        use_rmcp_client: bool,
+        store_mode: OAuthCredentialsStoreMode,
    ) -> Result<(Self, ClientStartErrors)> {
        // Early exit if no servers are configured.
        if mcp_servers.is_empty() {
@@ -205,20 +207,21 @@ impl McpConnectionManager {
                continue;
            }

-            if matches!(
-                cfg.transport,
-                McpServerTransportConfig::StreamableHttp { .. }
-            ) && !use_rmcp_client
-            {
-                info!(
-                    "skipping MCP server `{server_name}` because the legacy MCP client only supports stdio servers",
-                );
+            if !cfg.enabled {
                continue;
            }

            let startup_timeout = cfg.startup_timeout_sec.unwrap_or(DEFAULT_STARTUP_TIMEOUT);
            let tool_timeout = cfg.tool_timeout_sec.unwrap_or(DEFAULT_TOOL_TIMEOUT);

+            let resolved_bearer_token = match &cfg.transport {
+                McpServerTransportConfig::StreamableHttp {
+                    bearer_token_env_var,
+                    ..
+                } => resolve_bearer_token(&server_name, bearer_token_env_var.as_deref()),
+                _ => Ok(None),
+            };
+
            join_set.spawn(async move {
                let McpServerConfig { transport, .. } = cfg;
                let params = mcp_types::InitializeRequestParams {
@@ -256,13 +259,14 @@ impl McpConnectionManager {
                        )
                        .await
                    }
-                    McpServerTransportConfig::StreamableHttp { url, bearer_token } => {
+                    McpServerTransportConfig::StreamableHttp { url, .. } => {
                        McpClientAdapter::new_streamable_http_client(
                            server_name.clone(),
                            url,
-                            bearer_token,
+                            resolved_bearer_token.unwrap_or_default(),
                            params,
                            startup_timeout,
+                            store_mode,
                        )
                        .await
                    }
@@ -350,6 +354,33 @@ impl McpConnectionManager {
    }
 }

+fn resolve_bearer_token(
+    server_name: &str,
+    bearer_token_env_var: Option<&str>,
+) -> Result<Option<String>> {
+    let Some(env_var) = bearer_token_env_var else {
+        return Ok(None);
+    };
+
+    match env::var(env_var) {
+        Ok(value) => {
+            if value.is_empty() {
+                Err(anyhow!(
+                    "Environment variable {env_var} for MCP server '{server_name}' is empty"
+                ))
+            } else {
+                Ok(Some(value))
+            }
+        }
+        Err(env::VarError::NotPresent) => Err(anyhow!(
+            "Environment variable {env_var} for MCP server '{server_name}' is not set"
+        )),
+        Err(env::VarError::NotUnicode(_)) => Err(anyhow!(
+            "Environment variable {env_var} for MCP server '{server_name}' contains invalid Unicode"
+        )),
+    }
+}
+
 /// Query every server for its available tools and return a single map that
 /// contains **all** tools. Each key is the fully-qualified name for the tool.
 async fn list_all_tools(clients: &HashMap<String, ManagedClient>) -> Result<Vec<ToolInfo>> {
--- a/codex-rs/core/src/model_family.rs
+++ b/codex-rs/core/src/model_family.rs
@@ -35,6 +35,10 @@ pub struct ModelFamily {
    // See https://platform.openai.com/docs/guides/tools-local-shell
    pub uses_local_shell_tool: bool,

+    /// Whether this model supports parallel tool calls when using the
+    /// Responses API.
+    pub supports_parallel_tool_calls: bool,
+
    /// Present if the model performs better when `apply_patch` is provided as
    /// a tool call instead of just a bash command
    pub apply_patch_tool_type: Option<ApplyPatchToolType>,
@@ -58,6 +62,7 @@ macro_rules! model_family {
            supports_reasoning_summaries: false,
            reasoning_summary_format: ReasoningSummaryFormat::None,
            uses_local_shell_tool: false,
+            supports_parallel_tool_calls: false,
            apply_patch_tool_type: None,
            base_instructions: BASE_INSTRUCTIONS.to_string(),
            experimental_supported_tools: Vec::new(),
@@ -72,7 +77,11 @@ macro_rules! model_family {

 /// Returns a `ModelFamily` for the given model slug, or `None` if the slug
 /// does not match any known model family.
-pub fn find_family_for_model(slug: &str) -> Option<ModelFamily> {
+pub fn find_family_for_model(mut slug: &str) -> Option<ModelFamily> {
+    // TODO(jif) clean once we have proper feature flags
+    if matches!(std::env::var("CODEX_EXPERIMENTAL").as_deref(), Ok("1")) {
+        slug = "codex-experimental";
+    }
    if slug.starts_with("o3") {
        model_family!(
            slug, "o3",
@@ -103,13 +112,45 @@ pub fn find_family_for_model(slug: &str) -> Option<ModelFamily> {
        model_family!(slug, "gpt-4o", needs_special_apply_patch_instructions: true)
    } else if slug.starts_with("gpt-3.5") {
        model_family!(slug, "gpt-3.5", needs_special_apply_patch_instructions: true)
-    } else if slug.starts_with("codex-") || slug.starts_with("gpt-5-codex") {
+    } else if slug.starts_with("test-gpt-5-codex") {
        model_family!(
            slug, slug,
            supports_reasoning_summaries: true,
            reasoning_summary_format: ReasoningSummaryFormat::Experimental,
            base_instructions: GPT_5_CODEX_INSTRUCTIONS.to_string(),
-            experimental_supported_tools: vec!["read_file".to_string()],
+            experimental_supported_tools: vec![
+                "grep_files".to_string(),
+                "list_dir".to_string(),
+                "read_file".to_string(),
+                "test_sync_tool".to_string(),
+            ],
+            supports_parallel_tool_calls: true,
+        )
+
+    // Internal models.
+    } else if slug.starts_with("codex-") {
+        model_family!(
+            slug, slug,
+            supports_reasoning_summaries: true,
+            reasoning_summary_format: ReasoningSummaryFormat::Experimental,
+            base_instructions: GPT_5_CODEX_INSTRUCTIONS.to_string(),
+            apply_patch_tool_type: Some(ApplyPatchToolType::Freeform),
+            experimental_supported_tools: vec![
+                "grep_files".to_string(),
+                "list_dir".to_string(),
+                "read_file".to_string(),
+            ],
+            supports_parallel_tool_calls: true,
+        )
+
+    // Production models.
+    } else if slug.starts_with("gpt-5-codex") {
+        model_family!(
+            slug, slug,
+            supports_reasoning_summaries: true,
+            reasoning_summary_format: ReasoningSummaryFormat::Experimental,
+            base_instructions: GPT_5_CODEX_INSTRUCTIONS.to_string(),
+            apply_patch_tool_type: Some(ApplyPatchToolType::Freeform),
        )
    } else if slug.starts_with("gpt-5") {
        model_family!(
@@ -130,6 +171,7 @@ pub fn derive_default_model_family(model: &str) -> ModelFamily {
        supports_reasoning_summaries: false,
        reasoning_summary_format: ReasoningSummaryFormat::None,
        uses_local_shell_tool: false,
+        supports_parallel_tool_calls: false,
        apply_patch_tool_type: None,
        base_instructions: BASE_INSTRUCTIONS.to_string(),
        experimental_supported_tools: Vec::new(),
--- a/codex-rs/core/src/path_utils.rs
+++ b/codex-rs/core/src/path_utils.rs
@@ -1,19 +0,0 @@
-use std::io;
-use std::path::PathBuf;
-
-pub(crate) fn expand_tilde(raw: &str) -> io::Result<PathBuf> {
-    if raw.starts_with('~') {
-        // `shellexpand::tilde` falls back to returning the input when the home directory
-        // cannot be resolved; mirror the previous error semantics in that case.
-        let expanded = shellexpand::tilde(raw);
-        if expanded.starts_with('~') {
-            return Err(io::Error::new(
-                io::ErrorKind::NotFound,
-                "could not resolve home directory while expanding path",
-            ));
-        }
-        return Ok(PathBuf::from(expanded.as_ref()));
-    }
-
-    Ok(PathBuf::from(raw))
-}
--- a/codex-rs/core/src/state/session.rs
+++ b/codex-rs/core/src/state/session.rs
@@ -2,6 +2,7 @@

 use codex_protocol::models::ResponseItem;

+use crate::codebase_snapshot::CodebaseSnapshot;
 use crate::conversation_history::ConversationHistory;
 use crate::protocol::RateLimitSnapshot;
 use crate::protocol::TokenUsage;
@@ -13,6 +14,7 @@ pub(crate) struct SessionState {
    pub(crate) history: ConversationHistory,
    pub(crate) token_info: Option<TokenUsageInfo>,
    pub(crate) latest_rate_limits: Option<RateLimitSnapshot>,
+    pub(crate) codebase_snapshot: Option<CodebaseSnapshot>,
 }

 impl SessionState {
@@ -64,5 +66,14 @@ impl SessionState {
        (self.token_info.clone(), self.latest_rate_limits.clone())
    }

+    pub(crate) fn set_token_usage_full(&mut self, context_window: u64) {
+        match &mut self.token_info {
+            Some(info) => info.fill_to_context_window(context_window),
+            None => {
+                self.token_info = Some(TokenUsageInfo::full_context_window(context_window));
+            }
+        }
+    }
+
    // Pending input/approval moved to TurnState.
 }
--- a/codex-rs/core/src/state/turn.rs
+++ b/codex-rs/core/src/state/turn.rs
@@ -34,6 +34,16 @@ pub(crate) enum TaskKind {
    Compact,
 }

+impl TaskKind {
+    pub(crate) fn header_value(self) -> &'static str {
+        match self {
+            TaskKind::Regular => "standard",
+            TaskKind::Review => "review",
+            TaskKind::Compact => "compact",
+        }
+    }
+}
+
 #[derive(Clone)]
 pub(crate) struct RunningTask {
    pub(crate) handle: AbortHandle,
@@ -113,3 +123,15 @@ impl ActiveTurn {
        }
    }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::TaskKind;
+
+    #[test]
+    fn header_value_matches_expected_labels() {
+        assert_eq!(TaskKind::Regular.header_value(), "standard");
+        assert_eq!(TaskKind::Review.header_value(), "review");
+        assert_eq!(TaskKind::Compact.header_value(), "compact");
+    }
+}
--- a/codex-rs/core/src/tasks/regular.rs
+++ b/codex-rs/core/src/tasks/regular.rs
@@ -27,6 +27,6 @@ impl SessionTask for RegularTask {
        input: Vec<InputItem>,
    ) -> Option<String> {
        let sess = session.clone_session();
-        run_task(sess, ctx, sub_id, input).await
+        run_task(sess, ctx, sub_id, input, TaskKind::Regular).await
    }
 }
--- a/codex-rs/core/src/tasks/review.rs
+++ b/codex-rs/core/src/tasks/review.rs
@@ -28,7 +28,7 @@ impl SessionTask for ReviewTask {
        input: Vec<InputItem>,
    ) -> Option<String> {
        let sess = session.clone_session();
-        run_task(sess, ctx, sub_id, input).await
+        run_task(sess, ctx, sub_id, input, TaskKind::Review).await
    }

    async fn abort(&self, session: Arc<SessionTaskContext>, sub_id: &str) {
--- a/codex-rs/core/src/tools/context.rs
+++ b/codex-rs/core/src/tools/context.rs
@@ -14,12 +14,17 @@ use mcp_types::CallToolResult;
 use std::borrow::Cow;
 use std::collections::HashMap;
 use std::path::PathBuf;
+use std::sync::Arc;
+use tokio::sync::Mutex;

-pub struct ToolInvocation<'a> {
-    pub session: &'a Session,
-    pub turn: &'a TurnContext,
-    pub tracker: &'a mut TurnDiffTracker,
-    pub sub_id: &'a str,
+pub type SharedTurnDiffTracker = Arc<Mutex<TurnDiffTracker>>;
+
+#[derive(Clone)]
+pub struct ToolInvocation {
+    pub session: Arc<Session>,
+    pub turn: Arc<TurnContext>,
+    pub tracker: SharedTurnDiffTracker,
+    pub sub_id: String,
    pub call_id: String,
    pub tool_name: String,
    pub payload: ToolPayload,
--- a/codex-rs/core/src/tools/handlers/apply_patch.rs
+++ b/codex-rs/core/src/tools/handlers/apply_patch.rs
@@ -1,5 +1,6 @@
 use std::collections::BTreeMap;
 use std::collections::HashMap;
+use std::sync::Arc;

 use crate::client_common::tools::FreeformTool;
 use crate::client_common::tools::FreeformToolFormat;
@@ -36,10 +37,7 @@ impl ToolHandler for ApplyPatchHandler {
        )
    }

-    async fn handle(
-        &self,
-        invocation: ToolInvocation<'_>,
-    ) -> Result<ToolOutput, FunctionCallError> {
+    async fn handle(&self, invocation: ToolInvocation) -> Result<ToolOutput, FunctionCallError> {
        let ToolInvocation {
            session,
            turn,
@@ -79,10 +77,10 @@ impl ToolHandler for ApplyPatchHandler {
        let content = handle_container_exec_with_params(
            tool_name.as_str(),
            exec_params,
-            session,
-            turn,
-            tracker,
-            sub_id.to_string(),
+            Arc::clone(&session),
+            Arc::clone(&turn),
+            Arc::clone(&tracker),
+            sub_id.clone(),
            call_id.clone(),
        )
        .await?;
@@ -106,7 +104,7 @@ pub enum ApplyPatchToolType {
 pub(crate) fn create_apply_patch_freeform_tool() -> ToolSpec {
    ToolSpec::Freeform(FreeformTool {
        name: "apply_patch".to_string(),
-        description: "Use the `apply_patch` tool to edit files".to_string(),
+        description: "Use the `apply_patch` tool to edit files. This is a FREEFORM tool, so do not wrap the patch in JSON.".to_string(),
        format: FreeformToolFormat {
            r#type: "grammar".to_string(),
            syntax: "lark".to_string(),
--- a/codex-rs/core/src/tools/handlers/exec_stream.rs
+++ b/codex-rs/core/src/tools/handlers/exec_stream.rs
@@ -19,10 +19,7 @@ impl ToolHandler for ExecStreamHandler {
        ToolKind::Function
    }

-    async fn handle(
-        &self,
-        invocation: ToolInvocation<'_>,
-    ) -> Result<ToolOutput, FunctionCallError> {
+    async fn handle(&self, invocation: ToolInvocation) -> Result<ToolOutput, FunctionCallError> {
        let ToolInvocation {
            session,
            tool_name,
--- a/codex-rs/core/src/tools/handlers/grep_files.rs
+++ b/codex-rs/core/src/tools/handlers/grep_files.rs
@@ -0,0 +1,272 @@
+use std::path::Path;
+use std::time::Duration;
+
+use async_trait::async_trait;
+use serde::Deserialize;
+use tokio::process::Command;
+use tokio::time::timeout;
+
+use crate::function_tool::FunctionCallError;
+use crate::tools::context::ToolInvocation;
+use crate::tools::context::ToolOutput;
+use crate::tools::context::ToolPayload;
+use crate::tools::registry::ToolHandler;
+use crate::tools::registry::ToolKind;
+
+pub struct GrepFilesHandler;
+
+const DEFAULT_LIMIT: usize = 100;
+const MAX_LIMIT: usize = 2000;
+const COMMAND_TIMEOUT: Duration = Duration::from_secs(30);
+
+fn default_limit() -> usize {
+    DEFAULT_LIMIT
+}
+
+#[derive(Deserialize)]
+struct GrepFilesArgs {
+    pattern: String,
+    #[serde(default)]
+    include: Option<String>,
+    #[serde(default)]
+    path: Option<String>,
+    #[serde(default = "default_limit")]
+    limit: usize,
+}
+
+#[async_trait]
+impl ToolHandler for GrepFilesHandler {
+    fn kind(&self) -> ToolKind {
+        ToolKind::Function
+    }
+
+    async fn handle(&self, invocation: ToolInvocation) -> Result<ToolOutput, FunctionCallError> {
+        let ToolInvocation { payload, turn, .. } = invocation;
+
+        let arguments = match payload {
+            ToolPayload::Function { arguments } => arguments,
+            _ => {
+                return Err(FunctionCallError::RespondToModel(
+                    "grep_files handler received unsupported payload".to_string(),
+                ));
+            }
+        };
+
+        let args: GrepFilesArgs = serde_json::from_str(&arguments).map_err(|err| {
+            FunctionCallError::RespondToModel(format!(
+                "failed to parse function arguments: {err:?}"
+            ))
+        })?;
+
+        let pattern = args.pattern.trim();
+        if pattern.is_empty() {
+            return Err(FunctionCallError::RespondToModel(
+                "pattern must not be empty".to_string(),
+            ));
+        }
+
+        if args.limit == 0 {
+            return Err(FunctionCallError::RespondToModel(
+                "limit must be greater than zero".to_string(),
+            ));
+        }
+
+        let limit = args.limit.min(MAX_LIMIT);
+        let search_path = turn.resolve_path(args.path.clone());
+
+        verify_path_exists(&search_path).await?;
+
+        let include = args.include.as_deref().map(str::trim).and_then(|val| {
+            if val.is_empty() {
+                None
+            } else {
+                Some(val.to_string())
+            }
+        });
+
+        let search_results =
+            run_rg_search(pattern, include.as_deref(), &search_path, limit, &turn.cwd).await?;
+
+        if search_results.is_empty() {
+            Ok(ToolOutput::Function {
+                content: "No matches found.".to_string(),
+                success: Some(false),
+            })
+        } else {
+            Ok(ToolOutput::Function {
+                content: search_results.join("\n"),
+                success: Some(true),
+            })
+        }
+    }
+}
+
+async fn verify_path_exists(path: &Path) -> Result<(), FunctionCallError> {
+    tokio::fs::metadata(path).await.map_err(|err| {
+        FunctionCallError::RespondToModel(format!("unable to access `{}`: {err}", path.display()))
+    })?;
+    Ok(())
+}
+
+async fn run_rg_search(
+    pattern: &str,
+    include: Option<&str>,
+    search_path: &Path,
+    limit: usize,
+    cwd: &Path,
+) -> Result<Vec<String>, FunctionCallError> {
+    let mut command = Command::new("rg");
+    command
+        .current_dir(cwd)
+        .arg("--files-with-matches")
+        .arg("--sortr=modified")
+        .arg("--regexp")
+        .arg(pattern)
+        .arg("--no-messages");
+
+    if let Some(glob) = include {
+        command.arg("--glob").arg(glob);
+    }
+
+    command.arg("--").arg(search_path);
+
+    let output = timeout(COMMAND_TIMEOUT, command.output())
+        .await
+        .map_err(|_| {
+            FunctionCallError::RespondToModel("rg timed out after 30 seconds".to_string())
+        })?
+        .map_err(|err| {
+            FunctionCallError::RespondToModel(format!(
+                "failed to launch rg: {err}. Ensure ripgrep is installed and on PATH."
+            ))
+        })?;
+
+    match output.status.code() {
+        Some(0) => Ok(parse_results(&output.stdout, limit)),
+        Some(1) => Ok(Vec::new()),
+        _ => {
+            let stderr = String::from_utf8_lossy(&output.stderr);
+            Err(FunctionCallError::RespondToModel(format!(
+                "rg failed: {stderr}"
+            )))
+        }
+    }
+}
+
+fn parse_results(stdout: &[u8], limit: usize) -> Vec<String> {
+    let mut results = Vec::new();
+    for line in stdout.split(|byte| *byte == b'\n') {
+        if line.is_empty() {
+            continue;
+        }
+        if let Ok(text) = std::str::from_utf8(line) {
+            if text.is_empty() {
+                continue;
+            }
+            results.push(text.to_string());
+            if results.len() == limit {
+                break;
+            }
+        }
+    }
+    results
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::process::Command as StdCommand;
+    use tempfile::tempdir;
+
+    #[test]
+    fn parses_basic_results() {
+        let stdout = b"/tmp/file_a.rs\n/tmp/file_b.rs\n";
+        let parsed = parse_results(stdout, 10);
+        assert_eq!(
+            parsed,
+            vec!["/tmp/file_a.rs".to_string(), "/tmp/file_b.rs".to_string()]
+        );
+    }
+
+    #[test]
+    fn parse_truncates_after_limit() {
+        let stdout = b"/tmp/file_a.rs\n/tmp/file_b.rs\n/tmp/file_c.rs\n";
+        let parsed = parse_results(stdout, 2);
+        assert_eq!(
+            parsed,
+            vec!["/tmp/file_a.rs".to_string(), "/tmp/file_b.rs".to_string()]
+        );
+    }
+
+    #[tokio::test]
+    async fn run_search_returns_results() -> anyhow::Result<()> {
+        if !rg_available() {
+            return Ok(());
+        }
+        let temp = tempdir().expect("create temp dir");
+        let dir = temp.path();
+        std::fs::write(dir.join("match_one.txt"), "alpha beta gamma").unwrap();
+        std::fs::write(dir.join("match_two.txt"), "alpha delta").unwrap();
+        std::fs::write(dir.join("other.txt"), "omega").unwrap();
+
+        let results = run_rg_search("alpha", None, dir, 10, dir).await?;
+        assert_eq!(results.len(), 2);
+        assert!(results.iter().any(|path| path.ends_with("match_one.txt")));
+        assert!(results.iter().any(|path| path.ends_with("match_two.txt")));
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn run_search_with_glob_filter() -> anyhow::Result<()> {
+        if !rg_available() {
+            return Ok(());
+        }
+        let temp = tempdir().expect("create temp dir");
+        let dir = temp.path();
+        std::fs::write(dir.join("match_one.rs"), "alpha beta gamma").unwrap();
+        std::fs::write(dir.join("match_two.txt"), "alpha delta").unwrap();
+
+        let results = run_rg_search("alpha", Some("*.rs"), dir, 10, dir).await?;
+        assert_eq!(results.len(), 1);
+        assert!(results.iter().all(|path| path.ends_with("match_one.rs")));
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn run_search_respects_limit() -> anyhow::Result<()> {
+        if !rg_available() {
+            return Ok(());
+        }
+        let temp = tempdir().expect("create temp dir");
+        let dir = temp.path();
+        std::fs::write(dir.join("one.txt"), "alpha one").unwrap();
+        std::fs::write(dir.join("two.txt"), "alpha two").unwrap();
+        std::fs::write(dir.join("three.txt"), "alpha three").unwrap();
+
+        let results = run_rg_search("alpha", None, dir, 2, dir).await?;
+        assert_eq!(results.len(), 2);
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn run_search_handles_no_matches() -> anyhow::Result<()> {
+        if !rg_available() {
+            return Ok(());
+        }
+        let temp = tempdir().expect("create temp dir");
+        let dir = temp.path();
+        std::fs::write(dir.join("one.txt"), "omega").unwrap();
+
+        let results = run_rg_search("alpha", None, dir, 5, dir).await?;
+        assert!(results.is_empty());
+        Ok(())
+    }
+
+    fn rg_available() -> bool {
+        StdCommand::new("rg")
+            .arg("--version")
+            .output()
+            .map(|output| output.status.success())
+            .unwrap_or(false)
+    }
+}
--- a/codex-rs/core/src/tools/handlers/list_dir.rs
+++ b/codex-rs/core/src/tools/handlers/list_dir.rs
@@ -0,0 +1,476 @@
+use std::collections::VecDeque;
+use std::ffi::OsStr;
+use std::fs::FileType;
+use std::path::Path;
+use std::path::PathBuf;
+
+use async_trait::async_trait;
+use codex_utils_string::take_bytes_at_char_boundary;
+use serde::Deserialize;
+use tokio::fs;
+
+use crate::function_tool::FunctionCallError;
+use crate::tools::context::ToolInvocation;
+use crate::tools::context::ToolOutput;
+use crate::tools::context::ToolPayload;
+use crate::tools::registry::ToolHandler;
+use crate::tools::registry::ToolKind;
+
+pub struct ListDirHandler;
+
+const MAX_ENTRY_LENGTH: usize = 500;
+const INDENTATION_SPACES: usize = 2;
+
+fn default_offset() -> usize {
+    1
+}
+
+fn default_limit() -> usize {
+    25
+}
+
+fn default_depth() -> usize {
+    2
+}
+
+#[derive(Deserialize)]
+struct ListDirArgs {
+    dir_path: String,
+    #[serde(default = "default_offset")]
+    offset: usize,
+    #[serde(default = "default_limit")]
+    limit: usize,
+    #[serde(default = "default_depth")]
+    depth: usize,
+}
+
+#[async_trait]
+impl ToolHandler for ListDirHandler {
+    fn kind(&self) -> ToolKind {
+        ToolKind::Function
+    }
+
+    async fn handle(&self, invocation: ToolInvocation) -> Result<ToolOutput, FunctionCallError> {
+        let ToolInvocation { payload, .. } = invocation;
+
+        let arguments = match payload {
+            ToolPayload::Function { arguments } => arguments,
+            _ => {
+                return Err(FunctionCallError::RespondToModel(
+                    "list_dir handler received unsupported payload".to_string(),
+                ));
+            }
+        };
+
+        let args: ListDirArgs = serde_json::from_str(&arguments).map_err(|err| {
+            FunctionCallError::RespondToModel(format!(
+                "failed to parse function arguments: {err:?}"
+            ))
+        })?;
+
+        let ListDirArgs {
+            dir_path,
+            offset,
+            limit,
+            depth,
+        } = args;
+
+        if offset == 0 {
+            return Err(FunctionCallError::RespondToModel(
+                "offset must be a 1-indexed entry number".to_string(),
+            ));
+        }
+
+        if limit == 0 {
+            return Err(FunctionCallError::RespondToModel(
+                "limit must be greater than zero".to_string(),
+            ));
+        }
+
+        if depth == 0 {
+            return Err(FunctionCallError::RespondToModel(
+                "depth must be greater than zero".to_string(),
+            ));
+        }
+
+        let path = PathBuf::from(&dir_path);
+        if !path.is_absolute() {
+            return Err(FunctionCallError::RespondToModel(
+                "dir_path must be an absolute path".to_string(),
+            ));
+        }
+
+        let entries = list_dir_slice(&path, offset, limit, depth).await?;
+        let mut output = Vec::with_capacity(entries.len() + 1);
+        output.push(format!("Absolute path: {}", path.display()));
+        output.extend(entries);
+        Ok(ToolOutput::Function {
+            content: output.join("\n"),
+            success: Some(true),
+        })
+    }
+}
+
+async fn list_dir_slice(
+    path: &Path,
+    offset: usize,
+    limit: usize,
+    depth: usize,
+) -> Result<Vec<String>, FunctionCallError> {
+    let mut entries = Vec::new();
+    collect_entries(path, Path::new(""), depth, &mut entries).await?;
+
+    if entries.is_empty() {
+        return Ok(Vec::new());
+    }
+
+    let start_index = offset - 1;
+    if start_index >= entries.len() {
+        return Err(FunctionCallError::RespondToModel(
+            "offset exceeds directory entry count".to_string(),
+        ));
+    }
+
+    let remaining_entries = entries.len() - start_index;
+    let capped_limit = limit.min(remaining_entries);
+    let end_index = start_index + capped_limit;
+    let mut selected_entries = entries[start_index..end_index].to_vec();
+    selected_entries.sort_unstable_by(|a, b| a.name.cmp(&b.name));
+    let mut formatted = Vec::with_capacity(selected_entries.len());
+
+    for entry in &selected_entries {
+        formatted.push(format_entry_line(entry));
+    }
+
+    if end_index < entries.len() {
+        formatted.push(format!("More than {capped_limit} entries found"));
+    }
+
+    Ok(formatted)
+}
+
+async fn collect_entries(
+    dir_path: &Path,
+    relative_prefix: &Path,
+    depth: usize,
+    entries: &mut Vec<DirEntry>,
+) -> Result<(), FunctionCallError> {
+    let mut queue = VecDeque::new();
+    queue.push_back((dir_path.to_path_buf(), relative_prefix.to_path_buf(), depth));
+
+    while let Some((current_dir, prefix, remaining_depth)) = queue.pop_front() {
+        let mut read_dir = fs::read_dir(&current_dir).await.map_err(|err| {
+            FunctionCallError::RespondToModel(format!("failed to read directory: {err}"))
+        })?;
+
+        let mut dir_entries = Vec::new();
+
+        while let Some(entry) = read_dir.next_entry().await.map_err(|err| {
+            FunctionCallError::RespondToModel(format!("failed to read directory: {err}"))
+        })? {
+            let file_type = entry.file_type().await.map_err(|err| {
+                FunctionCallError::RespondToModel(format!("failed to inspect entry: {err}"))
+            })?;
+
+            let file_name = entry.file_name();
+            let relative_path = if prefix.as_os_str().is_empty() {
+                PathBuf::from(&file_name)
+            } else {
+                prefix.join(&file_name)
+            };
+
+            let display_name = format_entry_component(&file_name);
+            let display_depth = prefix.components().count();
+            let sort_key = format_entry_name(&relative_path);
+            let kind = DirEntryKind::from(&file_type);
+            dir_entries.push((
+                entry.path(),
+                relative_path,
+                kind,
+                DirEntry {
+                    name: sort_key,
+                    display_name,
+                    depth: display_depth,
+                    kind,
+                },
+            ));
+        }
+
+        dir_entries.sort_unstable_by(|a, b| a.3.name.cmp(&b.3.name));
+
+        for (entry_path, relative_path, kind, dir_entry) in dir_entries {
+            if kind == DirEntryKind::Directory && remaining_depth > 1 {
+                queue.push_back((entry_path, relative_path, remaining_depth - 1));
+            }
+            entries.push(dir_entry);
+        }
+    }
+
+    Ok(())
+}
+
+fn format_entry_name(path: &Path) -> String {
+    let normalized = path.to_string_lossy().replace("\\", "/");
+    if normalized.len() > MAX_ENTRY_LENGTH {
+        take_bytes_at_char_boundary(&normalized, MAX_ENTRY_LENGTH).to_string()
+    } else {
+        normalized
+    }
+}
+
+fn format_entry_component(name: &OsStr) -> String {
+    let normalized = name.to_string_lossy();
+    if normalized.len() > MAX_ENTRY_LENGTH {
+        take_bytes_at_char_boundary(&normalized, MAX_ENTRY_LENGTH).to_string()
+    } else {
+        normalized.to_string()
+    }
+}
+
+fn format_entry_line(entry: &DirEntry) -> String {
+    let indent = " ".repeat(entry.depth * INDENTATION_SPACES);
+    let mut name = entry.display_name.clone();
+    match entry.kind {
+        DirEntryKind::Directory => name.push('/'),
+        DirEntryKind::Symlink => name.push('@'),
+        DirEntryKind::Other => name.push('?'),
+        DirEntryKind::File => {}
+    }
+    format!("{indent}{name}")
+}
+
+#[derive(Clone)]
+struct DirEntry {
+    name: String,
+    display_name: String,
+    depth: usize,
+    kind: DirEntryKind,
+}
+
+#[derive(Clone, Copy, PartialEq, Eq)]
+enum DirEntryKind {
+    Directory,
+    File,
+    Symlink,
+    Other,
+}
+
+impl From<&FileType> for DirEntryKind {
+    fn from(file_type: &FileType) -> Self {
+        if file_type.is_symlink() {
+            DirEntryKind::Symlink
+        } else if file_type.is_dir() {
+            DirEntryKind::Directory
+        } else if file_type.is_file() {
+            DirEntryKind::File
+        } else {
+            DirEntryKind::Other
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use tempfile::tempdir;
+
+    #[tokio::test]
+    async fn lists_directory_entries() {
+        let temp = tempdir().expect("create tempdir");
+        let dir_path = temp.path();
+
+        let sub_dir = dir_path.join("nested");
+        tokio::fs::create_dir(&sub_dir)
+            .await
+            .expect("create sub dir");
+
+        let deeper_dir = sub_dir.join("deeper");
+        tokio::fs::create_dir(&deeper_dir)
+            .await
+            .expect("create deeper dir");
+
+        tokio::fs::write(dir_path.join("entry.txt"), b"content")
+            .await
+            .expect("write file");
+        tokio::fs::write(sub_dir.join("child.txt"), b"child")
+            .await
+            .expect("write child");
+        tokio::fs::write(deeper_dir.join("grandchild.txt"), b"grandchild")
+            .await
+            .expect("write grandchild");
+
+        #[cfg(unix)]
+        {
+            use std::os::unix::fs::symlink;
+            let link_path = dir_path.join("link");
+            symlink(dir_path.join("entry.txt"), &link_path).expect("create symlink");
+        }
+
+        let entries = list_dir_slice(dir_path, 1, 20, 3)
+            .await
+            .expect("list directory");
+
+        #[cfg(unix)]
+        let expected = vec![
+            "entry.txt".to_string(),
+            "link@".to_string(),
+            "nested/".to_string(),
+            "  child.txt".to_string(),
+            "  deeper/".to_string(),
+            "    grandchild.txt".to_string(),
+        ];
+
+        #[cfg(not(unix))]
+        let expected = vec![
+            "entry.txt".to_string(),
+            "nested/".to_string(),
+            "  child.txt".to_string(),
+            "  deeper/".to_string(),
+            "    grandchild.txt".to_string(),
+        ];
+
+        assert_eq!(entries, expected);
+    }
+
+    #[tokio::test]
+    async fn errors_when_offset_exceeds_entries() {
+        let temp = tempdir().expect("create tempdir");
+        let dir_path = temp.path();
+        tokio::fs::create_dir(dir_path.join("nested"))
+            .await
+            .expect("create sub dir");
+
+        let err = list_dir_slice(dir_path, 10, 1, 2)
+            .await
+            .expect_err("offset exceeds entries");
+        assert_eq!(
+            err,
+            FunctionCallError::RespondToModel("offset exceeds directory entry count".to_string())
+        );
+    }
+
+    #[tokio::test]
+    async fn respects_depth_parameter() {
+        let temp = tempdir().expect("create tempdir");
+        let dir_path = temp.path();
+        let nested = dir_path.join("nested");
+        let deeper = nested.join("deeper");
+        tokio::fs::create_dir(&nested).await.expect("create nested");
+        tokio::fs::create_dir(&deeper).await.expect("create deeper");
+        tokio::fs::write(dir_path.join("root.txt"), b"root")
+            .await
+            .expect("write root");
+        tokio::fs::write(nested.join("child.txt"), b"child")
+            .await
+            .expect("write nested");
+        tokio::fs::write(deeper.join("grandchild.txt"), b"deep")
+            .await
+            .expect("write deeper");
+
+        let entries_depth_one = list_dir_slice(dir_path, 1, 10, 1)
+            .await
+            .expect("list depth 1");
+        assert_eq!(
+            entries_depth_one,
+            vec!["nested/".to_string(), "root.txt".to_string(),]
+        );
+
+        let entries_depth_two = list_dir_slice(dir_path, 1, 20, 2)
+            .await
+            .expect("list depth 2");
+        assert_eq!(
+            entries_depth_two,
+            vec![
+                "nested/".to_string(),
+                "  child.txt".to_string(),
+                "  deeper/".to_string(),
+                "root.txt".to_string(),
+            ]
+        );
+
+        let entries_depth_three = list_dir_slice(dir_path, 1, 30, 3)
+            .await
+            .expect("list depth 3");
+        assert_eq!(
+            entries_depth_three,
+            vec![
+                "nested/".to_string(),
+                "  child.txt".to_string(),
+                "  deeper/".to_string(),
+                "    grandchild.txt".to_string(),
+                "root.txt".to_string(),
+            ]
+        );
+    }
+
+    #[tokio::test]
+    async fn handles_large_limit_without_overflow() {
+        let temp = tempdir().expect("create tempdir");
+        let dir_path = temp.path();
+        tokio::fs::write(dir_path.join("alpha.txt"), b"alpha")
+            .await
+            .expect("write alpha");
+        tokio::fs::write(dir_path.join("beta.txt"), b"beta")
+            .await
+            .expect("write beta");
+        tokio::fs::write(dir_path.join("gamma.txt"), b"gamma")
+            .await
+            .expect("write gamma");
+
+        let entries = list_dir_slice(dir_path, 2, usize::MAX, 1)
+            .await
+            .expect("list without overflow");
+        assert_eq!(
+            entries,
+            vec!["beta.txt".to_string(), "gamma.txt".to_string(),]
+        );
+    }
+
+    #[tokio::test]
+    async fn indicates_truncated_results() {
+        let temp = tempdir().expect("create tempdir");
+        let dir_path = temp.path();
+
+        for idx in 0..40 {
+            let file = dir_path.join(format!("file_{idx:02}.txt"));
+            tokio::fs::write(file, b"content")
+                .await
+                .expect("write file");
+        }
+
+        let entries = list_dir_slice(dir_path, 1, 25, 1)
+            .await
+            .expect("list directory");
+        assert_eq!(entries.len(), 26);
+        assert_eq!(
+            entries.last(),
+            Some(&"More than 25 entries found".to_string())
+        );
+    }
+
+    #[tokio::test]
+    async fn bfs_truncation() -> anyhow::Result<()> {
+        let temp = tempdir()?;
+        let dir_path = temp.path();
+        let nested = dir_path.join("nested");
+        let deeper = nested.join("deeper");
+        tokio::fs::create_dir(&nested).await?;
+        tokio::fs::create_dir(&deeper).await?;
+        tokio::fs::write(dir_path.join("root.txt"), b"root").await?;
+        tokio::fs::write(nested.join("child.txt"), b"child").await?;
+        tokio::fs::write(deeper.join("grandchild.txt"), b"deep").await?;
+
+        let entries_depth_three = list_dir_slice(dir_path, 1, 3, 3).await?;
+        assert_eq!(
+            entries_depth_three,
+            vec![
+                "nested/".to_string(),
+                "  child.txt".to_string(),
+                "root.txt".to_string(),
+                "More than 3 entries found".to_string()
+            ]
+        );
+
+        Ok(())
+    }
+}
--- a/codex-rs/core/src/tools/handlers/mcp.rs
+++ b/codex-rs/core/src/tools/handlers/mcp.rs
@@ -16,10 +16,7 @@ impl ToolHandler for McpHandler {
        ToolKind::Mcp
    }

-    async fn handle(
-        &self,
-        invocation: ToolInvocation<'_>,
-    ) -> Result<ToolOutput, FunctionCallError> {
+    async fn handle(&self, invocation: ToolInvocation) -> Result<ToolOutput, FunctionCallError> {
        let ToolInvocation {
            session,
            sub_id,
@@ -45,8 +42,8 @@ impl ToolHandler for McpHandler {
        let arguments_str = raw_arguments;

        let response = handle_mcp_tool_call(
-            session,
-            sub_id,
+            session.as_ref(),
+            &sub_id,
            call_id.clone(),
            server,
            tool,
--- a/codex-rs/core/src/tools/handlers/mod.rs
+++ b/codex-rs/core/src/tools/handlers/mod.rs
@@ -1,9 +1,12 @@
 pub mod apply_patch;
 mod exec_stream;
+mod grep_files;
+mod list_dir;
 mod mcp;
 mod plan;
 mod read_file;
 mod shell;
+mod test_sync;
 mod unified_exec;
 mod view_image;

@@ -11,9 +14,12 @@ pub use plan::PLAN_TOOL;

 pub use apply_patch::ApplyPatchHandler;
 pub use exec_stream::ExecStreamHandler;
+pub use grep_files::GrepFilesHandler;
+pub use list_dir::ListDirHandler;
 pub use mcp::McpHandler;
 pub use plan::PlanHandler;
 pub use read_file::ReadFileHandler;
 pub use shell::ShellHandler;
+pub use test_sync::TestSyncHandler;
 pub use unified_exec::UnifiedExecHandler;
 pub use view_image::ViewImageHandler;
--- a/codex-rs/core/src/tools/handlers/plan.rs
+++ b/codex-rs/core/src/tools/handlers/plan.rs
@@ -65,10 +65,7 @@ impl ToolHandler for PlanHandler {
        ToolKind::Function
    }

-    async fn handle(
-        &self,
-        invocation: ToolInvocation<'_>,
-    ) -> Result<ToolOutput, FunctionCallError> {
+    async fn handle(&self, invocation: ToolInvocation) -> Result<ToolOutput, FunctionCallError> {
        let ToolInvocation {
            session,
            sub_id,
@@ -86,7 +83,8 @@ impl ToolHandler for PlanHandler {
            }
        };

-        let content = handle_update_plan(session, arguments, sub_id.to_string(), call_id).await?;
+        let content =
+            handle_update_plan(session.as_ref(), arguments, sub_id.clone(), call_id).await?;

        Ok(ToolOutput::Function {
            content,
--- a/codex-rs/core/src/tools/handlers/read_file.rs
+++ b/codex-rs/core/src/tools/handlers/read_file.rs
--- a/codex-rs/core/src/tools/handlers/shell.rs
+++ b/codex-rs/core/src/tools/handlers/shell.rs
@@ -1,5 +1,6 @@
 use async_trait::async_trait;
 use codex_protocol::models::ShellToolCallParams;
+use std::sync::Arc;

 use crate::codex::TurnContext;
 use crate::exec::ExecParams;
@@ -40,10 +41,7 @@ impl ToolHandler for ShellHandler {
        )
    }

-    async fn handle(
-        &self,
-        invocation: ToolInvocation<'_>,
-    ) -> Result<ToolOutput, FunctionCallError> {
+    async fn handle(&self, invocation: ToolInvocation) -> Result<ToolOutput, FunctionCallError> {
        let ToolInvocation {
            session,
            turn,
@@ -62,14 +60,14 @@ impl ToolHandler for ShellHandler {
                            "failed to parse function arguments: {e:?}"
                        ))
                    })?;
-                let exec_params = Self::to_exec_params(params, turn);
+                let exec_params = Self::to_exec_params(params, turn.as_ref());
                let content = handle_container_exec_with_params(
                    tool_name.as_str(),
                    exec_params,
-                    session,
-                    turn,
-                    tracker,
-                    sub_id.to_string(),
+                    Arc::clone(&session),
+                    Arc::clone(&turn),
+                    Arc::clone(&tracker),
+                    sub_id.clone(),
                    call_id.clone(),
                )
                .await?;
@@ -79,14 +77,14 @@ impl ToolHandler for ShellHandler {
                })
            }
            ToolPayload::LocalShell { params } => {
-                let exec_params = Self::to_exec_params(params, turn);
+                let exec_params = Self::to_exec_params(params, turn.as_ref());
                let content = handle_container_exec_with_params(
                    tool_name.as_str(),
                    exec_params,
-                    session,
-                    turn,
-                    tracker,
-                    sub_id.to_string(),
+                    Arc::clone(&session),
+                    Arc::clone(&turn),
+                    Arc::clone(&tracker),
+                    sub_id.clone(),
                    call_id.clone(),
                )
                .await?;
--- a/codex-rs/core/src/tools/handlers/test_sync.rs
+++ b/codex-rs/core/src/tools/handlers/test_sync.rs
@@ -0,0 +1,158 @@
+use std::collections::HashMap;
+use std::collections::hash_map::Entry;
+use std::sync::Arc;
+use std::sync::OnceLock;
+use std::time::Duration;
+
+use async_trait::async_trait;
+use serde::Deserialize;
+use tokio::sync::Barrier;
+use tokio::time::sleep;
+
+use crate::function_tool::FunctionCallError;
+use crate::tools::context::ToolInvocation;
+use crate::tools::context::ToolOutput;
+use crate::tools::context::ToolPayload;
+use crate::tools::registry::ToolHandler;
+use crate::tools::registry::ToolKind;
+
+pub struct TestSyncHandler;
+
+const DEFAULT_TIMEOUT_MS: u64 = 1_000;
+
+static BARRIERS: OnceLock<tokio::sync::Mutex<HashMap<String, BarrierState>>> = OnceLock::new();
+
+struct BarrierState {
+    barrier: Arc<Barrier>,
+    participants: usize,
+}
+
+#[derive(Debug, Deserialize)]
+struct BarrierArgs {
+    id: String,
+    participants: usize,
+    #[serde(default = "default_timeout_ms")]
+    timeout_ms: u64,
+}
+
+#[derive(Debug, Deserialize)]
+struct TestSyncArgs {
+    #[serde(default)]
+    sleep_before_ms: Option<u64>,
+    #[serde(default)]
+    sleep_after_ms: Option<u64>,
+    #[serde(default)]
+    barrier: Option<BarrierArgs>,
+}
+
+fn default_timeout_ms() -> u64 {
+    DEFAULT_TIMEOUT_MS
+}
+
+fn barrier_map() -> &'static tokio::sync::Mutex<HashMap<String, BarrierState>> {
+    BARRIERS.get_or_init(|| tokio::sync::Mutex::new(HashMap::new()))
+}
+
+#[async_trait]
+impl ToolHandler for TestSyncHandler {
+    fn kind(&self) -> ToolKind {
+        ToolKind::Function
+    }
+
+    async fn handle(&self, invocation: ToolInvocation) -> Result<ToolOutput, FunctionCallError> {
+        let ToolInvocation { payload, .. } = invocation;
+
+        let arguments = match payload {
+            ToolPayload::Function { arguments } => arguments,
+            _ => {
+                return Err(FunctionCallError::RespondToModel(
+                    "test_sync_tool handler received unsupported payload".to_string(),
+                ));
+            }
+        };
+
+        let args: TestSyncArgs = serde_json::from_str(&arguments).map_err(|err| {
+            FunctionCallError::RespondToModel(format!(
+                "failed to parse function arguments: {err:?}"
+            ))
+        })?;
+
+        if let Some(delay) = args.sleep_before_ms
+            && delay > 0
+        {
+            sleep(Duration::from_millis(delay)).await;
+        }
+
+        if let Some(barrier) = args.barrier {
+            wait_on_barrier(barrier).await?;
+        }
+
+        if let Some(delay) = args.sleep_after_ms
+            && delay > 0
+        {
+            sleep(Duration::from_millis(delay)).await;
+        }
+
+        Ok(ToolOutput::Function {
+            content: "ok".to_string(),
+            success: Some(true),
+        })
+    }
+}
+
+async fn wait_on_barrier(args: BarrierArgs) -> Result<(), FunctionCallError> {
+    if args.participants == 0 {
+        return Err(FunctionCallError::RespondToModel(
+            "barrier participants must be greater than zero".to_string(),
+        ));
+    }
+
+    if args.timeout_ms == 0 {
+        return Err(FunctionCallError::RespondToModel(
+            "barrier timeout must be greater than zero".to_string(),
+        ));
+    }
+
+    let barrier_id = args.id.clone();
+    let barrier = {
+        let mut map = barrier_map().lock().await;
+        match map.entry(barrier_id.clone()) {
+            Entry::Occupied(entry) => {
+                let state = entry.get();
+                if state.participants != args.participants {
+                    let existing = state.participants;
+                    return Err(FunctionCallError::RespondToModel(format!(
+                        "barrier {barrier_id} already registered with {existing} participants"
+                    )));
+                }
+                state.barrier.clone()
+            }
+            Entry::Vacant(entry) => {
+                let barrier = Arc::new(Barrier::new(args.participants));
+                entry.insert(BarrierState {
+                    barrier: barrier.clone(),
+                    participants: args.participants,
+                });
+                barrier
+            }
+        }
+    };
+
+    let timeout = Duration::from_millis(args.timeout_ms);
+    let wait_result = tokio::time::timeout(timeout, barrier.wait())
+        .await
+        .map_err(|_| {
+            FunctionCallError::RespondToModel("test_sync_tool barrier wait timed out".to_string())
+        })?;
+
+    if wait_result.is_leader() {
+        let mut map = barrier_map().lock().await;
+        if let Some(state) = map.get(&barrier_id)
+            && Arc::ptr_eq(&state.barrier, &barrier)
+        {
+            map.remove(&barrier_id);
+        }
+    }
+
+    Ok(())
+}
--- a/codex-rs/core/src/tools/handlers/unified_exec.rs
+++ b/codex-rs/core/src/tools/handlers/unified_exec.rs
@@ -33,10 +33,7 @@ impl ToolHandler for UnifiedExecHandler {
        )
    }

-    async fn handle(
-        &self,
-        invocation: ToolInvocation<'_>,
-    ) -> Result<ToolOutput, FunctionCallError> {
+    async fn handle(&self, invocation: ToolInvocation) -> Result<ToolOutput, FunctionCallError> {
        let ToolInvocation {
            session, payload, ..
        } = invocation;
--- a/codex-rs/core/src/tools/handlers/view_image.rs
+++ b/codex-rs/core/src/tools/handlers/view_image.rs
@@ -26,10 +26,7 @@ impl ToolHandler for ViewImageHandler {
        ToolKind::Function
    }

-    async fn handle(
-        &self,
-        invocation: ToolInvocation<'_>,
-    ) -> Result<ToolOutput, FunctionCallError> {
+    async fn handle(&self, invocation: ToolInvocation) -> Result<ToolOutput, FunctionCallError> {
        let ToolInvocation {
            session,
            turn,
--- a/codex-rs/core/src/tools/mod.rs
+++ b/codex-rs/core/src/tools/mod.rs
@@ -1,5 +1,6 @@
 pub mod context;
 pub(crate) mod handlers;
+pub mod parallel;
 pub mod registry;
 pub mod router;
 pub mod spec;
@@ -21,7 +22,7 @@ use crate::executor::linkers::PreparedExec;
 use crate::function_tool::FunctionCallError;
 use crate::tools::context::ApplyPatchCommandContext;
 use crate::tools::context::ExecCommandContext;
-use crate::turn_diff_tracker::TurnDiffTracker;
+use crate::tools::context::SharedTurnDiffTracker;
 use codex_apply_patch::MaybeApplyPatchVerified;
 use codex_apply_patch::maybe_parse_apply_patch_verified;
 use codex_protocol::protocol::AskForApproval;
@@ -29,6 +30,7 @@ use codex_utils_string::take_bytes_at_char_boundary;
 use codex_utils_string::take_last_bytes_at_char_boundary;
 pub use router::ToolRouter;
 use serde::Serialize;
+use std::sync::Arc;
 use tracing::trace;

 // Model-formatting limits: clients get full streams; only content sent to the model is truncated.
@@ -48,9 +50,9 @@ pub(crate) const TELEMETRY_PREVIEW_TRUNCATION_NOTICE: &str =
 pub(crate) async fn handle_container_exec_with_params(
    tool_name: &str,
    params: ExecParams,
-    sess: &Session,
-    turn_context: &TurnContext,
-    turn_diff_tracker: &mut TurnDiffTracker,
+    sess: Arc<Session>,
+    turn_context: Arc<TurnContext>,
+    turn_diff_tracker: SharedTurnDiffTracker,
    sub_id: String,
    call_id: String,
 ) -> Result<String, FunctionCallError> {
@@ -68,7 +70,15 @@ pub(crate) async fn handle_container_exec_with_params(
    // check if this was a patch, and apply it if so
    let apply_patch_exec = match maybe_parse_apply_patch_verified(&params.command, &params.cwd) {
        MaybeApplyPatchVerified::Body(changes) => {
-            match apply_patch::apply_patch(sess, turn_context, &sub_id, &call_id, changes).await {
+            match apply_patch::apply_patch(
+                sess.as_ref(),
+                turn_context.as_ref(),
+                &sub_id,
+                &call_id,
+                changes,
+            )
+            .await
+            {
                InternalApplyPatchInvocation::Output(item) => return item,
                InternalApplyPatchInvocation::DelegateToExec(apply_patch_exec) => {
                    Some(apply_patch_exec)
@@ -139,12 +149,13 @@ pub(crate) async fn handle_container_exec_with_params(

    let output_result = sess
        .run_exec_with_events(
-            turn_diff_tracker,
+            turn_diff_tracker.clone(),
            prepared_exec,
            turn_context.approval_policy,
        )
        .await;

+    // always make sure to truncate the output if its length isn't controlled.
    match output_result {
        Ok(output) => {
            let ExecToolCallOutput { exit_code, .. } = &output;
@@ -155,13 +166,16 @@ pub(crate) async fn handle_container_exec_with_params(
                Err(FunctionCallError::RespondToModel(content))
            }
        }
-        Err(ExecError::Function(err)) => Err(err),
+        Err(ExecError::Function(err)) => Err(truncate_function_error(err)),
        Err(ExecError::Codex(CodexErr::Sandbox(SandboxErr::Timeout { output }))) => Err(
            FunctionCallError::RespondToModel(format_exec_output_apply_patch(&output)),
        ),
-        Err(ExecError::Codex(err)) => Err(FunctionCallError::RespondToModel(format!(
-            "execution error: {err:?}"
-        ))),
+        Err(ExecError::Codex(err)) => {
+            let message = format!("execution error: {err:?}");
+            Err(FunctionCallError::RespondToModel(format_exec_output(
+                &message,
+            )))
+        }
    }
 }

@@ -206,26 +220,42 @@ pub fn format_exec_output_str(exec_output: &ExecToolCallOutput) -> String {
        aggregated_output, ..
    } = exec_output;

-    // Head+tail truncation for the model: show the beginning and end with an elision.
-    // Clients still receive full streams; only this formatted summary is capped.
-
-    let mut s = &aggregated_output.text;
-    let prefixed_str: String;
+    let content = aggregated_output.text.as_str();

    if exec_output.timed_out {
-        prefixed_str = format!(
-            "command timed out after {} milliseconds\n",
+        let prefixed = format!(
+            "command timed out after {} milliseconds\n{content}",
            exec_output.duration.as_millis()
-        ) + s;
-        s = &prefixed_str;
+        );
+        return format_exec_output(&prefixed);
    }

-    let total_lines = s.lines().count();
-    if s.len() <= MODEL_FORMAT_MAX_BYTES && total_lines <= MODEL_FORMAT_MAX_LINES {
-        return s.to_string();
-    }
+    format_exec_output(content)
+}

-    let segments: Vec<&str> = s.split_inclusive('\n').collect();
+fn truncate_function_error(err: FunctionCallError) -> FunctionCallError {
+    match err {
+        FunctionCallError::RespondToModel(msg) => {
+            FunctionCallError::RespondToModel(format_exec_output(&msg))
+        }
+        FunctionCallError::Fatal(msg) => FunctionCallError::Fatal(format_exec_output(&msg)),
+        other => other,
+    }
+}
+
+fn format_exec_output(content: &str) -> String {
+    // Head+tail truncation for the model: show the beginning and end with an elision.
+    // Clients still receive full streams; only this formatted summary is capped.
+    let total_lines = content.lines().count();
+    if content.len() <= MODEL_FORMAT_MAX_BYTES && total_lines <= MODEL_FORMAT_MAX_LINES {
+        return content.to_string();
+    }
+    let output = truncate_formatted_exec_output(content, total_lines);
+    format!("Total output lines: {total_lines}\n\n{output}")
+}
+
+fn truncate_formatted_exec_output(content: &str, total_lines: usize) -> String {
+    let segments: Vec<&str> = content.split_inclusive('\n').collect();
    let head_take = MODEL_FORMAT_HEAD_LINES.min(segments.len());
    let tail_take = MODEL_FORMAT_TAIL_LINES.min(segments.len().saturating_sub(head_take));
    let omitted = segments.len().saturating_sub(head_take + tail_take);
@@ -236,9 +266,9 @@ pub fn format_exec_output_str(exec_output: &ExecToolCallOutput) -> String {
        .map(|segment| segment.len())
        .sum();
    let tail_slice_start: usize = if tail_take == 0 {
-        s.len()
+        content.len()
    } else {
-        s.len()
+        content.len()
            - segments
                .iter()
                .rev()
@@ -260,9 +290,9 @@ pub fn format_exec_output_str(exec_output: &ExecToolCallOutput) -> String {
        head_budget = MODEL_FORMAT_MAX_BYTES.saturating_sub(marker.len());
    }

-    let head_slice = &s[..head_slice_end];
+    let head_slice = &content[..head_slice_end];
    let head_part = take_bytes_at_char_boundary(head_slice, head_budget);
-    let mut result = String::with_capacity(MODEL_FORMAT_MAX_BYTES.min(s.len()));
+    let mut result = String::with_capacity(MODEL_FORMAT_MAX_BYTES.min(content.len()));

    result.push_str(head_part);
    result.push_str(&marker);
@@ -272,9 +302,86 @@ pub fn format_exec_output_str(exec_output: &ExecToolCallOutput) -> String {
        return result;
    }

-    let tail_slice = &s[tail_slice_start..];
+    let tail_slice = &content[tail_slice_start..];
    let tail_part = take_last_bytes_at_char_boundary(tail_slice, remaining);
    result.push_str(tail_part);

    result
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use regex_lite::Regex;
+
+    fn assert_truncated_message_matches(message: &str, line: &str, total_lines: usize) {
+        let pattern = truncated_message_pattern(line, total_lines);
+        let regex = Regex::new(&pattern).unwrap_or_else(|err| {
+            panic!("failed to compile regex {pattern}: {err}");
+        });
+        let captures = regex
+            .captures(message)
+            .unwrap_or_else(|| panic!("message failed to match pattern {pattern}: {message}"));
+        let body = captures
+            .name("body")
+            .expect("missing body capture")
+            .as_str();
+        assert!(
+            body.len() <= MODEL_FORMAT_MAX_BYTES,
+            "body exceeds byte limit: {} bytes",
+            body.len()
+        );
+    }
+
+    fn truncated_message_pattern(line: &str, total_lines: usize) -> String {
+        let head_take = MODEL_FORMAT_HEAD_LINES.min(total_lines);
+        let tail_take = MODEL_FORMAT_TAIL_LINES.min(total_lines.saturating_sub(head_take));
+        let omitted = total_lines.saturating_sub(head_take + tail_take);
+        let escaped_line = regex_lite::escape(line);
+        format!(
+            r"(?s)^Total output lines: {total_lines}\n\n(?P<body>{escaped_line}.*\n\[\.{{3}} omitted {omitted} of {total_lines} lines \.{{3}}]\n\n.*)$",
+        )
+    }
+
+    #[test]
+    fn truncate_formatted_exec_output_truncates_large_error() {
+        let line = "very long execution error line that should trigger truncation\n";
+        let large_error = line.repeat(2_500); // way beyond both byte and line limits
+
+        let truncated = format_exec_output(&large_error);
+
+        let total_lines = large_error.lines().count();
+        assert_truncated_message_matches(&truncated, line, total_lines);
+        assert_ne!(truncated, large_error);
+    }
+
+    #[test]
+    fn truncate_function_error_trims_respond_to_model() {
+        let line = "respond-to-model error that should be truncated\n";
+        let huge = line.repeat(3_000);
+        let total_lines = huge.lines().count();
+
+        let err = truncate_function_error(FunctionCallError::RespondToModel(huge));
+        match err {
+            FunctionCallError::RespondToModel(message) => {
+                assert_truncated_message_matches(&message, line, total_lines);
+            }
+            other => panic!("unexpected error variant: {other:?}"),
+        }
+    }
+
+    #[test]
+    fn truncate_function_error_trims_fatal() {
+        let line = "fatal error output that should be truncated\n";
+        let huge = line.repeat(3_000);
+        let total_lines = huge.lines().count();
+
+        let err = truncate_function_error(FunctionCallError::Fatal(huge));
+        match err {
+            FunctionCallError::Fatal(message) => {
+                assert_truncated_message_matches(&message, line, total_lines);
+            }
+            other => panic!("unexpected error variant: {other:?}"),
+        }
+    }
+}
--- a/codex-rs/core/src/tools/parallel.rs
+++ b/codex-rs/core/src/tools/parallel.rs
@@ -0,0 +1,80 @@
+use std::sync::Arc;
+
+use tokio::sync::RwLock;
+use tokio_util::either::Either;
+use tokio_util::task::AbortOnDropHandle;
+
+use crate::codex::Session;
+use crate::codex::TurnContext;
+use crate::error::CodexErr;
+use crate::function_tool::FunctionCallError;
+use crate::tools::context::SharedTurnDiffTracker;
+use crate::tools::router::ToolCall;
+use crate::tools::router::ToolRouter;
+use codex_protocol::models::ResponseInputItem;
+
+pub(crate) struct ToolCallRuntime {
+    router: Arc<ToolRouter>,
+    session: Arc<Session>,
+    turn_context: Arc<TurnContext>,
+    tracker: SharedTurnDiffTracker,
+    sub_id: String,
+    parallel_execution: Arc<RwLock<()>>,
+}
+
+impl ToolCallRuntime {
+    pub(crate) fn new(
+        router: Arc<ToolRouter>,
+        session: Arc<Session>,
+        turn_context: Arc<TurnContext>,
+        tracker: SharedTurnDiffTracker,
+        sub_id: String,
+    ) -> Self {
+        Self {
+            router,
+            session,
+            turn_context,
+            tracker,
+            sub_id,
+            parallel_execution: Arc::new(RwLock::new(())),
+        }
+    }
+
+    pub(crate) fn handle_tool_call(
+        &self,
+        call: ToolCall,
+    ) -> impl std::future::Future<Output = Result<ResponseInputItem, CodexErr>> {
+        let supports_parallel = self.router.tool_supports_parallel(&call.tool_name);
+
+        let router = Arc::clone(&self.router);
+        let session = Arc::clone(&self.session);
+        let turn = Arc::clone(&self.turn_context);
+        let tracker = Arc::clone(&self.tracker);
+        let sub_id = self.sub_id.clone();
+        let lock = Arc::clone(&self.parallel_execution);
+
+        let handle: AbortOnDropHandle<Result<ResponseInputItem, FunctionCallError>> =
+            AbortOnDropHandle::new(tokio::spawn(async move {
+                let _guard = if supports_parallel {
+                    Either::Left(lock.read().await)
+                } else {
+                    Either::Right(lock.write().await)
+                };
+
+                router
+                    .dispatch_tool_call(session, turn, tracker, sub_id, call)
+                    .await
+            }));
+
+        async move {
+            match handle.await {
+                Ok(Ok(response)) => Ok(response),
+                Ok(Err(FunctionCallError::Fatal(message))) => Err(CodexErr::Fatal(message)),
+                Ok(Err(other)) => Err(CodexErr::Fatal(other.to_string())),
+                Err(err) => Err(CodexErr::Fatal(format!(
+                    "tool task failed to receive: {err:?}"
+                ))),
+            }
+        }
+    }
+}
--- a/codex-rs/core/src/tools/registry.rs
+++ b/codex-rs/core/src/tools/registry.rs
@@ -32,8 +32,7 @@ pub trait ToolHandler: Send + Sync {
        )
    }

-    async fn handle(&self, invocation: ToolInvocation<'_>)
-    -> Result<ToolOutput, FunctionCallError>;
+    async fn handle(&self, invocation: ToolInvocation) -> Result<ToolOutput, FunctionCallError>;
 }

 pub struct ToolRegistry {
@@ -57,9 +56,9 @@ impl ToolRegistry {
    //     }
    // }

-    pub async fn dispatch<'a>(
+    pub async fn dispatch(
        &self,
-        invocation: ToolInvocation<'a>,
+        invocation: ToolInvocation,
    ) -> Result<ResponseInputItem, FunctionCallError> {
        let tool_name = invocation.tool_name.clone();
        let call_id_owned = invocation.call_id.clone();
@@ -137,9 +136,24 @@ impl ToolRegistry {
    }
 }

+#[derive(Debug, Clone)]
+pub struct ConfiguredToolSpec {
+    pub spec: ToolSpec,
+    pub supports_parallel_tool_calls: bool,
+}
+
+impl ConfiguredToolSpec {
+    pub fn new(spec: ToolSpec, supports_parallel_tool_calls: bool) -> Self {
+        Self {
+            spec,
+            supports_parallel_tool_calls,
+        }
+    }
+}
+
 pub struct ToolRegistryBuilder {
    handlers: HashMap<String, Arc<dyn ToolHandler>>,
-    specs: Vec<ToolSpec>,
+    specs: Vec<ConfiguredToolSpec>,
 }

 impl ToolRegistryBuilder {
@@ -151,7 +165,16 @@ impl ToolRegistryBuilder {
    }

    pub fn push_spec(&mut self, spec: ToolSpec) {
-        self.specs.push(spec);
+        self.push_spec_with_parallel_support(spec, false);
+    }
+
+    pub fn push_spec_with_parallel_support(
+        &mut self,
+        spec: ToolSpec,
+        supports_parallel_tool_calls: bool,
+    ) {
+        self.specs
+            .push(ConfiguredToolSpec::new(spec, supports_parallel_tool_calls));
    }

    pub fn register_handler(&mut self, name: impl Into<String>, handler: Arc<dyn ToolHandler>) {
@@ -183,7 +206,7 @@ impl ToolRegistryBuilder {
    //     }
    // }

-    pub fn build(self) -> (Vec<ToolSpec>, ToolRegistry) {
+    pub fn build(self) -> (Vec<ConfiguredToolSpec>, ToolRegistry) {
        let registry = ToolRegistry::new(self.handlers);
        (self.specs, registry)
    }
--- a/codex-rs/core/src/tools/router.rs
+++ b/codex-rs/core/src/tools/router.rs
@@ -1,15 +1,17 @@
 use std::collections::HashMap;
+use std::sync::Arc;

 use crate::client_common::tools::ToolSpec;
 use crate::codex::Session;
 use crate::codex::TurnContext;
 use crate::function_tool::FunctionCallError;
+use crate::tools::context::SharedTurnDiffTracker;
 use crate::tools::context::ToolInvocation;
 use crate::tools::context::ToolPayload;
+use crate::tools::registry::ConfiguredToolSpec;
 use crate::tools::registry::ToolRegistry;
 use crate::tools::spec::ToolsConfig;
 use crate::tools::spec::build_specs;
-use crate::turn_diff_tracker::TurnDiffTracker;
 use codex_protocol::models::LocalShellAction;
 use codex_protocol::models::ResponseInputItem;
 use codex_protocol::models::ResponseItem;
@@ -24,7 +26,7 @@ pub struct ToolCall {

 pub struct ToolRouter {
    registry: ToolRegistry,
-    specs: Vec<ToolSpec>,
+    specs: Vec<ConfiguredToolSpec>,
 }

 impl ToolRouter {
@@ -34,11 +36,22 @@ impl ToolRouter {
    ) -> Self {
        let builder = build_specs(config, mcp_tools);
        let (specs, registry) = builder.build();
+
        Self { registry, specs }
    }

-    pub fn specs(&self) -> &[ToolSpec] {
-        &self.specs
+    pub fn specs(&self) -> Vec<ToolSpec> {
+        self.specs
+            .iter()
+            .map(|config| config.spec.clone())
+            .collect()
+    }
+
+    pub fn tool_supports_parallel(&self, tool_name: &str) -> bool {
+        self.specs
+            .iter()
+            .filter(|config| config.supports_parallel_tool_calls)
+            .any(|config| config.spec.name() == tool_name)
    }

    pub fn build_tool_call(
@@ -118,10 +131,10 @@ impl ToolRouter {

    pub async fn dispatch_tool_call(
        &self,
-        session: &Session,
-        turn: &TurnContext,
-        tracker: &mut TurnDiffTracker,
-        sub_id: &str,
+        session: Arc<Session>,
+        turn: Arc<TurnContext>,
+        tracker: SharedTurnDiffTracker,
+        sub_id: String,
        call: ToolCall,
    ) -> Result<ResponseInputItem, FunctionCallError> {
        let ToolCall {
--- a/codex-rs/core/src/tools/spec.rs
+++ b/codex-rs/core/src/tools/spec.rs
@@ -1,5 +1,7 @@
 use crate::client_common::tools::ResponsesApiTool;
 use crate::client_common::tools::ToolSpec;
+use crate::features::Feature;
+use crate::features::Features;
 use crate::model_family::ModelFamily;
 use crate::tools::handlers::PLAN_TOOL;
 use crate::tools::handlers::apply_patch::ApplyPatchToolType;
@@ -33,26 +35,23 @@ pub(crate) struct ToolsConfig {

 pub(crate) struct ToolsConfigParams<'a> {
    pub(crate) model_family: &'a ModelFamily,
-    pub(crate) include_plan_tool: bool,
-    pub(crate) include_apply_patch_tool: bool,
-    pub(crate) include_web_search_request: bool,
-    pub(crate) use_streamable_shell_tool: bool,
-    pub(crate) include_view_image_tool: bool,
-    pub(crate) experimental_unified_exec_tool: bool,
+    pub(crate) features: &'a Features,
 }

 impl ToolsConfig {
    pub fn new(params: &ToolsConfigParams) -> Self {
        let ToolsConfigParams {
            model_family,
-            include_plan_tool,
-            include_apply_patch_tool,
-            include_web_search_request,
-            use_streamable_shell_tool,
-            include_view_image_tool,
-            experimental_unified_exec_tool,
+            features,
        } = params;
-        let shell_type = if *use_streamable_shell_tool {
+        let use_streamable_shell_tool = features.enabled(Feature::StreamableShell);
+        let experimental_unified_exec_tool = features.enabled(Feature::UnifiedExec);
+        let include_plan_tool = features.enabled(Feature::PlanTool);
+        let include_apply_patch_tool = features.enabled(Feature::ApplyPatchFreeform);
+        let include_web_search_request = features.enabled(Feature::WebSearchRequest);
+        let include_view_image_tool = features.enabled(Feature::ViewImageTool);
+
+        let shell_type = if use_streamable_shell_tool {
            ConfigShellToolType::Streamable
        } else if model_family.uses_local_shell_tool {
            ConfigShellToolType::Local
@@ -64,7 +63,7 @@ impl ToolsConfig {
            Some(ApplyPatchToolType::Freeform) => Some(ApplyPatchToolType::Freeform),
            Some(ApplyPatchToolType::Function) => Some(ApplyPatchToolType::Function),
            None => {
-                if *include_apply_patch_tool {
+                if include_apply_patch_tool {
                    Some(ApplyPatchToolType::Freeform)
                } else {
                    None
@@ -74,11 +73,11 @@ impl ToolsConfig {

        Self {
            shell_type,
-            plan_tool: *include_plan_tool,
+            plan_tool: include_plan_tool,
            apply_patch_tool_type,
-            web_search_request: *include_web_search_request,
-            include_view_image_tool: *include_view_image_tool,
-            experimental_unified_exec_tool: *experimental_unified_exec_tool,
+            web_search_request: include_web_search_request,
+            include_view_image_tool,
+            experimental_unified_exec_tool,
            experimental_supported_tools: model_family.experimental_supported_tools.clone(),
        }
    }
@@ -258,6 +257,118 @@ fn create_view_image_tool() -> ToolSpec {
    })
 }

+fn create_test_sync_tool() -> ToolSpec {
+    let mut properties = BTreeMap::new();
+    properties.insert(
+        "sleep_before_ms".to_string(),
+        JsonSchema::Number {
+            description: Some("Optional delay in milliseconds before any other action".to_string()),
+        },
+    );
+    properties.insert(
+        "sleep_after_ms".to_string(),
+        JsonSchema::Number {
+            description: Some(
+                "Optional delay in milliseconds after completing the barrier".to_string(),
+            ),
+        },
+    );
+
+    let mut barrier_properties = BTreeMap::new();
+    barrier_properties.insert(
+        "id".to_string(),
+        JsonSchema::String {
+            description: Some(
+                "Identifier shared by concurrent calls that should rendezvous".to_string(),
+            ),
+        },
+    );
+    barrier_properties.insert(
+        "participants".to_string(),
+        JsonSchema::Number {
+            description: Some(
+                "Number of tool calls that must arrive before the barrier opens".to_string(),
+            ),
+        },
+    );
+    barrier_properties.insert(
+        "timeout_ms".to_string(),
+        JsonSchema::Number {
+            description: Some("Maximum time in milliseconds to wait at the barrier".to_string()),
+        },
+    );
+
+    properties.insert(
+        "barrier".to_string(),
+        JsonSchema::Object {
+            properties: barrier_properties,
+            required: Some(vec!["id".to_string(), "participants".to_string()]),
+            additional_properties: Some(false.into()),
+        },
+    );
+
+    ToolSpec::Function(ResponsesApiTool {
+        name: "test_sync_tool".to_string(),
+        description: "Internal synchronization helper used by Codex integration tests.".to_string(),
+        strict: false,
+        parameters: JsonSchema::Object {
+            properties,
+            required: None,
+            additional_properties: Some(false.into()),
+        },
+    })
+}
+
+fn create_grep_files_tool() -> ToolSpec {
+    let mut properties = BTreeMap::new();
+    properties.insert(
+        "pattern".to_string(),
+        JsonSchema::String {
+            description: Some("Regular expression pattern to search for.".to_string()),
+        },
+    );
+    properties.insert(
+        "include".to_string(),
+        JsonSchema::String {
+            description: Some(
+                "Optional glob that limits which files are searched (e.g. \"*.rs\" or \
+                 \"*.{ts,tsx}\")."
+                    .to_string(),
+            ),
+        },
+    );
+    properties.insert(
+        "path".to_string(),
+        JsonSchema::String {
+            description: Some(
+                "Directory or file path to search. Defaults to the session's working directory."
+                    .to_string(),
+            ),
+        },
+    );
+    properties.insert(
+        "limit".to_string(),
+        JsonSchema::Number {
+            description: Some(
+                "Maximum number of file paths to return (defaults to 100).".to_string(),
+            ),
+        },
+    );
+
+    ToolSpec::Function(ResponsesApiTool {
+        name: "grep_files".to_string(),
+        description: "Finds files whose contents match the pattern and lists them by modification \
+                      time."
+            .to_string(),
+        strict: false,
+        parameters: JsonSchema::Object {
+            properties,
+            required: Some(vec!["pattern".to_string()]),
+            additional_properties: Some(false.into()),
+        },
+    })
+}
+
 fn create_read_file_tool() -> ToolSpec {
    let mut properties = BTreeMap::new();
    properties.insert(
@@ -280,11 +391,72 @@ fn create_read_file_tool() -> ToolSpec {
            description: Some("The maximum number of lines to return.".to_string()),
        },
    );
+    properties.insert(
+        "mode".to_string(),
+        JsonSchema::String {
+            description: Some(
+                "Optional mode selector: \"slice\" for simple ranges (default) or \"indentation\" \
+                 to expand around an anchor line."
+                    .to_string(),
+            ),
+        },
+    );
+
+    let mut indentation_properties = BTreeMap::new();
+    indentation_properties.insert(
+        "anchor_line".to_string(),
+        JsonSchema::Number {
+            description: Some(
+                "Anchor line to center the indentation lookup on (defaults to offset).".to_string(),
+            ),
+        },
+    );
+    indentation_properties.insert(
+        "max_levels".to_string(),
+        JsonSchema::Number {
+            description: Some(
+                "How many parent indentation levels (smaller indents) to include.".to_string(),
+            ),
+        },
+    );
+    indentation_properties.insert(
+        "include_siblings".to_string(),
+        JsonSchema::Boolean {
+            description: Some(
+                "When true, include additional blocks that share the anchor indentation."
+                    .to_string(),
+            ),
+        },
+    );
+    indentation_properties.insert(
+        "include_header".to_string(),
+        JsonSchema::Boolean {
+            description: Some(
+                "Include doc comments or attributes directly above the selected block.".to_string(),
+            ),
+        },
+    );
+    indentation_properties.insert(
+        "max_lines".to_string(),
+        JsonSchema::Number {
+            description: Some(
+                "Hard cap on the number of lines returned when using indentation mode.".to_string(),
+            ),
+        },
+    );
+    properties.insert(
+        "indentation".to_string(),
+        JsonSchema::Object {
+            properties: indentation_properties,
+            required: None,
+            additional_properties: Some(false.into()),
+        },
+    );

    ToolSpec::Function(ResponsesApiTool {
        name: "read_file".to_string(),
        description:
-            "Reads a local file with 1-indexed line numbers and returns up to the requested number of lines."
+            "Reads a local file with 1-indexed line numbers, supporting slice and indentation-aware block modes."
                .to_string(),
        strict: false,
        parameters: JsonSchema::Object {
@@ -294,6 +466,51 @@ fn create_read_file_tool() -> ToolSpec {
        },
    })
 }
+
+fn create_list_dir_tool() -> ToolSpec {
+    let mut properties = BTreeMap::new();
+    properties.insert(
+        "dir_path".to_string(),
+        JsonSchema::String {
+            description: Some("Absolute path to the directory to list.".to_string()),
+        },
+    );
+    properties.insert(
+        "offset".to_string(),
+        JsonSchema::Number {
+            description: Some(
+                "The entry number to start listing from. Must be 1 or greater.".to_string(),
+            ),
+        },
+    );
+    properties.insert(
+        "limit".to_string(),
+        JsonSchema::Number {
+            description: Some("The maximum number of entries to return.".to_string()),
+        },
+    );
+    properties.insert(
+        "depth".to_string(),
+        JsonSchema::Number {
+            description: Some(
+                "The maximum directory depth to traverse. Must be 1 or greater.".to_string(),
+            ),
+        },
+    );
+
+    ToolSpec::Function(ResponsesApiTool {
+        name: "list_dir".to_string(),
+        description:
+            "Lists entries in a local directory with 1-indexed entry numbers and simple type labels."
+                .to_string(),
+        strict: false,
+        parameters: JsonSchema::Object {
+            properties,
+            required: Some(vec!["dir_path".to_string()]),
+            additional_properties: Some(false.into()),
+        },
+    })
+}
 /// TODO(dylan): deprecate once we get rid of json tool
 #[derive(Serialize, Deserialize)]
 pub(crate) struct ApplyPatchToolArgs {
@@ -503,10 +720,13 @@ pub(crate) fn build_specs(
    use crate::exec_command::create_write_stdin_tool_for_responses_api;
    use crate::tools::handlers::ApplyPatchHandler;
    use crate::tools::handlers::ExecStreamHandler;
+    use crate::tools::handlers::GrepFilesHandler;
+    use crate::tools::handlers::ListDirHandler;
    use crate::tools::handlers::McpHandler;
    use crate::tools::handlers::PlanHandler;
    use crate::tools::handlers::ReadFileHandler;
    use crate::tools::handlers::ShellHandler;
+    use crate::tools::handlers::TestSyncHandler;
    use crate::tools::handlers::UnifiedExecHandler;
    use crate::tools::handlers::ViewImageHandler;
    use std::sync::Arc;
@@ -569,20 +789,47 @@ pub(crate) fn build_specs(

    if config
        .experimental_supported_tools
-        .iter()
-        .any(|tool| tool == "read_file")
+        .contains(&"grep_files".to_string())
+    {
+        let grep_files_handler = Arc::new(GrepFilesHandler);
+        builder.push_spec_with_parallel_support(create_grep_files_tool(), true);
+        builder.register_handler("grep_files", grep_files_handler);
+    }
+
+    if config
+        .experimental_supported_tools
+        .contains(&"read_file".to_string())
    {
        let read_file_handler = Arc::new(ReadFileHandler);
-        builder.push_spec(create_read_file_tool());
+        builder.push_spec_with_parallel_support(create_read_file_tool(), true);
        builder.register_handler("read_file", read_file_handler);
    }

+    if config
+        .experimental_supported_tools
+        .iter()
+        .any(|tool| tool == "list_dir")
+    {
+        let list_dir_handler = Arc::new(ListDirHandler);
+        builder.push_spec_with_parallel_support(create_list_dir_tool(), true);
+        builder.register_handler("list_dir", list_dir_handler);
+    }
+
+    if config
+        .experimental_supported_tools
+        .contains(&"test_sync_tool".to_string())
+    {
+        let test_sync_handler = Arc::new(TestSyncHandler);
+        builder.push_spec_with_parallel_support(create_test_sync_tool(), true);
+        builder.register_handler("test_sync_tool", test_sync_handler);
+    }
+
    if config.web_search_request {
        builder.push_spec(ToolSpec::WebSearch {});
    }

    if config.include_view_image_tool {
-        builder.push_spec(create_view_image_tool());
+        builder.push_spec_with_parallel_support(create_view_image_tool(), true);
        builder.register_handler("view_image", view_image_handler);
    }

@@ -610,20 +857,25 @@ pub(crate) fn build_specs(
 mod tests {
    use crate::client_common::tools::FreeformTool;
    use crate::model_family::find_family_for_model;
+    use crate::tools::registry::ConfiguredToolSpec;
    use mcp_types::ToolInputSchema;
    use pretty_assertions::assert_eq;

    use super::*;

-    fn assert_eq_tool_names(tools: &[ToolSpec], expected_names: &[&str]) {
+    fn tool_name(tool: &ToolSpec) -> &str {
+        match tool {
+            ToolSpec::Function(ResponsesApiTool { name, .. }) => name,
+            ToolSpec::LocalShell {} => "local_shell",
+            ToolSpec::WebSearch {} => "web_search",
+            ToolSpec::Freeform(FreeformTool { name, .. }) => name,
+        }
+    }
+
+    fn assert_eq_tool_names(tools: &[ConfiguredToolSpec], expected_names: &[&str]) {
        let tool_names = tools
            .iter()
-            .map(|tool| match tool {
-                ToolSpec::Function(ResponsesApiTool { name, .. }) => name,
-                ToolSpec::LocalShell {} => "local_shell",
-                ToolSpec::WebSearch {} => "web_search",
-                ToolSpec::Freeform(FreeformTool { name, .. }) => name,
-            })
+            .map(|tool| tool_name(&tool.spec))
            .collect::<Vec<_>>();

        assert_eq!(
@@ -639,18 +891,27 @@ mod tests {
        }
    }

+    fn find_tool<'a>(
+        tools: &'a [ConfiguredToolSpec],
+        expected_name: &str,
+    ) -> &'a ConfiguredToolSpec {
+        tools
+            .iter()
+            .find(|tool| tool_name(&tool.spec) == expected_name)
+            .unwrap_or_else(|| panic!("expected tool {expected_name}"))
+    }
+
    #[test]
    fn test_build_specs() {
        let model_family = find_family_for_model("codex-mini-latest")
            .expect("codex-mini-latest should be a valid model family");
+        let mut features = Features::with_defaults();
+        features.enable(Feature::PlanTool);
+        features.enable(Feature::WebSearchRequest);
+        features.enable(Feature::UnifiedExec);
        let config = ToolsConfig::new(&ToolsConfigParams {
            model_family: &model_family,
-            include_plan_tool: true,
-            include_apply_patch_tool: false,
-            include_web_search_request: true,
-            use_streamable_shell_tool: false,
-            include_view_image_tool: true,
-            experimental_unified_exec_tool: true,
+            features: &features,
        });
        let (tools, _) = build_specs(&config, Some(HashMap::new())).build();

@@ -663,14 +924,13 @@ mod tests {
    #[test]
    fn test_build_specs_default_shell() {
        let model_family = find_family_for_model("o3").expect("o3 should be a valid model family");
+        let mut features = Features::with_defaults();
+        features.enable(Feature::PlanTool);
+        features.enable(Feature::WebSearchRequest);
+        features.enable(Feature::UnifiedExec);
        let config = ToolsConfig::new(&ToolsConfigParams {
            model_family: &model_family,
-            include_plan_tool: true,
-            include_apply_patch_tool: false,
-            include_web_search_request: true,
-            use_streamable_shell_tool: false,
-            include_view_image_tool: true,
-            experimental_unified_exec_tool: true,
+            features: &features,
        });
        let (tools, _) = build_specs(&config, Some(HashMap::new())).build();

@@ -681,34 +941,64 @@ mod tests {
    }

    #[test]
-    fn test_build_specs_includes_beta_read_file_tool() {
+    #[ignore]
+    fn test_parallel_support_flags() {
        let model_family = find_family_for_model("gpt-5-codex")
-            .expect("gpt-5-codex should be a valid model family");
+            .expect("codex-mini-latest should be a valid model family");
+        let mut features = Features::with_defaults();
+        features.disable(Feature::ViewImageTool);
+        features.enable(Feature::UnifiedExec);
        let config = ToolsConfig::new(&ToolsConfigParams {
            model_family: &model_family,
-            include_plan_tool: false,
-            include_apply_patch_tool: false,
-            include_web_search_request: false,
-            use_streamable_shell_tool: false,
-            include_view_image_tool: false,
-            experimental_unified_exec_tool: true,
+            features: &features,
        });
-        let (tools, _) = build_specs(&config, Some(HashMap::new())).build();
+        let (tools, _) = build_specs(&config, None).build();

-        assert_eq_tool_names(&tools, &["unified_exec", "read_file"]);
+        assert!(!find_tool(&tools, "unified_exec").supports_parallel_tool_calls);
+        assert!(find_tool(&tools, "grep_files").supports_parallel_tool_calls);
+        assert!(find_tool(&tools, "list_dir").supports_parallel_tool_calls);
+        assert!(find_tool(&tools, "read_file").supports_parallel_tool_calls);
+    }
+
+    #[test]
+    fn test_test_model_family_includes_sync_tool() {
+        let model_family = find_family_for_model("test-gpt-5-codex")
+            .expect("test-gpt-5-codex should be a valid model family");
+        let mut features = Features::with_defaults();
+        features.disable(Feature::ViewImageTool);
+        let config = ToolsConfig::new(&ToolsConfigParams {
+            model_family: &model_family,
+            features: &features,
+        });
+        let (tools, _) = build_specs(&config, None).build();
+
+        assert!(
+            tools
+                .iter()
+                .any(|tool| tool_name(&tool.spec) == "test_sync_tool")
+        );
+        assert!(
+            tools
+                .iter()
+                .any(|tool| tool_name(&tool.spec) == "read_file")
+        );
+        assert!(
+            tools
+                .iter()
+                .any(|tool| tool_name(&tool.spec) == "grep_files")
+        );
+        assert!(tools.iter().any(|tool| tool_name(&tool.spec) == "list_dir"));
    }

    #[test]
    fn test_build_specs_mcp_tools() {
        let model_family = find_family_for_model("o3").expect("o3 should be a valid model family");
+        let mut features = Features::with_defaults();
+        features.enable(Feature::UnifiedExec);
+        features.enable(Feature::WebSearchRequest);
        let config = ToolsConfig::new(&ToolsConfigParams {
            model_family: &model_family,
-            include_plan_tool: false,
-            include_apply_patch_tool: false,
-            include_web_search_request: true,
-            use_streamable_shell_tool: false,
-            include_view_image_tool: true,
-            experimental_unified_exec_tool: true,
+            features: &features,
        });
        let (tools, _) = build_specs(
            &config,
@@ -760,7 +1050,7 @@ mod tests {
        );

        assert_eq!(
-            tools[3],
+            tools[3].spec,
            ToolSpec::Function(ResponsesApiTool {
                name: "test_server/do_something_cool".to_string(),
                parameters: JsonSchema::Object {
@@ -806,14 +1096,11 @@ mod tests {
    #[test]
    fn test_build_specs_mcp_tools_sorted_by_name() {
        let model_family = find_family_for_model("o3").expect("o3 should be a valid model family");
+        let mut features = Features::with_defaults();
+        features.enable(Feature::UnifiedExec);
        let config = ToolsConfig::new(&ToolsConfigParams {
            model_family: &model_family,
-            include_plan_tool: false,
-            include_apply_patch_tool: false,
-            include_web_search_request: false,
-            use_streamable_shell_tool: false,
-            include_view_image_tool: true,
-            experimental_unified_exec_tool: true,
+            features: &features,
        });

        // Intentionally construct a map with keys that would sort alphabetically.
@@ -883,14 +1170,12 @@ mod tests {
    fn test_mcp_tool_property_missing_type_defaults_to_string() {
        let model_family = find_family_for_model("gpt-5-codex")
            .expect("gpt-5-codex should be a valid model family");
+        let mut features = Features::with_defaults();
+        features.enable(Feature::UnifiedExec);
+        features.enable(Feature::WebSearchRequest);
        let config = ToolsConfig::new(&ToolsConfigParams {
            model_family: &model_family,
-            include_plan_tool: false,
-            include_apply_patch_tool: false,
-            include_web_search_request: true,
-            use_streamable_shell_tool: false,
-            include_view_image_tool: true,
-            experimental_unified_exec_tool: true,
+            features: &features,
        });

        let (tools, _) = build_specs(
@@ -921,7 +1206,7 @@ mod tests {
            &tools,
            &[
                "unified_exec",
-                "read_file",
+                "apply_patch",
                "web_search",
                "view_image",
                "dash/search",
@@ -929,7 +1214,7 @@ mod tests {
        );

        assert_eq!(
-            tools[4],
+            tools[4].spec,
            ToolSpec::Function(ResponsesApiTool {
                name: "dash/search".to_string(),
                parameters: JsonSchema::Object {
@@ -952,14 +1237,12 @@ mod tests {
    fn test_mcp_tool_integer_normalized_to_number() {
        let model_family = find_family_for_model("gpt-5-codex")
            .expect("gpt-5-codex should be a valid model family");
+        let mut features = Features::with_defaults();
+        features.enable(Feature::UnifiedExec);
+        features.enable(Feature::WebSearchRequest);
        let config = ToolsConfig::new(&ToolsConfigParams {
            model_family: &model_family,
-            include_plan_tool: false,
-            include_apply_patch_tool: false,
-            include_web_search_request: true,
-            use_streamable_shell_tool: false,
-            include_view_image_tool: true,
-            experimental_unified_exec_tool: true,
+            features: &features,
        });

        let (tools, _) = build_specs(
@@ -988,14 +1271,14 @@ mod tests {
            &tools,
            &[
                "unified_exec",
-                "read_file",
+                "apply_patch",
                "web_search",
                "view_image",
                "dash/paginate",
            ],
        );
        assert_eq!(
-            tools[4],
+            tools[4].spec,
            ToolSpec::Function(ResponsesApiTool {
                name: "dash/paginate".to_string(),
                parameters: JsonSchema::Object {
@@ -1016,14 +1299,13 @@ mod tests {
    fn test_mcp_tool_array_without_items_gets_default_string_items() {
        let model_family = find_family_for_model("gpt-5-codex")
            .expect("gpt-5-codex should be a valid model family");
+        let mut features = Features::with_defaults();
+        features.enable(Feature::UnifiedExec);
+        features.enable(Feature::WebSearchRequest);
+        features.enable(Feature::ApplyPatchFreeform);
        let config = ToolsConfig::new(&ToolsConfigParams {
            model_family: &model_family,
-            include_plan_tool: false,
-            include_apply_patch_tool: false,
-            include_web_search_request: true,
-            use_streamable_shell_tool: false,
-            include_view_image_tool: true,
-            experimental_unified_exec_tool: true,
+            features: &features,
        });

        let (tools, _) = build_specs(
@@ -1052,14 +1334,14 @@ mod tests {
            &tools,
            &[
                "unified_exec",
-                "read_file",
+                "apply_patch",
                "web_search",
                "view_image",
                "dash/tags",
            ],
        );
        assert_eq!(
-            tools[4],
+            tools[4].spec,
            ToolSpec::Function(ResponsesApiTool {
                name: "dash/tags".to_string(),
                parameters: JsonSchema::Object {
@@ -1083,14 +1365,12 @@ mod tests {
    fn test_mcp_tool_anyof_defaults_to_string() {
        let model_family = find_family_for_model("gpt-5-codex")
            .expect("gpt-5-codex should be a valid model family");
+        let mut features = Features::with_defaults();
+        features.enable(Feature::UnifiedExec);
+        features.enable(Feature::WebSearchRequest);
        let config = ToolsConfig::new(&ToolsConfigParams {
            model_family: &model_family,
-            include_plan_tool: false,
-            include_apply_patch_tool: false,
-            include_web_search_request: true,
-            use_streamable_shell_tool: false,
-            include_view_image_tool: true,
-            experimental_unified_exec_tool: true,
+            features: &features,
        });

        let (tools, _) = build_specs(
@@ -1119,14 +1399,14 @@ mod tests {
            &tools,
            &[
                "unified_exec",
-                "read_file",
+                "apply_patch",
                "web_search",
                "view_image",
                "dash/value",
            ],
        );
        assert_eq!(
-            tools[4],
+            tools[4].spec,
            ToolSpec::Function(ResponsesApiTool {
                name: "dash/value".to_string(),
                parameters: JsonSchema::Object {
@@ -1162,14 +1442,12 @@ mod tests {
    fn test_get_openai_tools_mcp_tools_with_additional_properties_schema() {
        let model_family = find_family_for_model("gpt-5-codex")
            .expect("gpt-5-codex should be a valid model family");
+        let mut features = Features::with_defaults();
+        features.enable(Feature::UnifiedExec);
+        features.enable(Feature::WebSearchRequest);
        let config = ToolsConfig::new(&ToolsConfigParams {
            model_family: &model_family,
-            include_plan_tool: false,
-            include_apply_patch_tool: false,
-            include_web_search_request: true,
-            use_streamable_shell_tool: false,
-            include_view_image_tool: true,
-            experimental_unified_exec_tool: true,
+            features: &features,
        });
        let (tools, _) = build_specs(
            &config,
@@ -1223,7 +1501,7 @@ mod tests {
            &tools,
            &[
                "unified_exec",
-                "read_file",
+                "apply_patch",
                "web_search",
                "view_image",
                "test_server/do_something_cool",
@@ -1231,7 +1509,7 @@ mod tests {
        );

        assert_eq!(
-            tools[4],
+            tools[4].spec,
            ToolSpec::Function(ResponsesApiTool {
                name: "test_server/do_something_cool".to_string(),
                parameters: JsonSchema::Object {
--- a/codex-rs/core/src/unified_exec/mod.rs
+++ b/codex-rs/core/src/unified_exec/mod.rs
@@ -110,11 +110,22 @@ impl ManagedUnifiedExecSession {
        let buffer_clone = Arc::clone(&output_buffer);
        let notify_clone = Arc::clone(&output_notify);
        let output_task = tokio::spawn(async move {
-            while let Ok(chunk) = receiver.recv().await {
-                let mut guard = buffer_clone.lock().await;
-                guard.push_chunk(chunk);
-                drop(guard);
-                notify_clone.notify_waiters();
+            loop {
+                match receiver.recv().await {
+                    Ok(chunk) => {
+                        let mut guard = buffer_clone.lock().await;
+                        guard.push_chunk(chunk);
+                        drop(guard);
+                        notify_clone.notify_waiters();
+                    }
+                    // If we lag behind the broadcast buffer, skip missed
+                    // messages but keep the task alive to continue streaming.
+                    Err(tokio::sync::broadcast::error::RecvError::Lagged(_)) => {
+                        continue;
+                    }
+                    // When the sender closes, exit the task.
+                    Err(tokio::sync::broadcast::error::RecvError::Closed) => break,
+                }
            }
        });

--- a/codex-rs/core/src/user_notification.rs
+++ b/codex-rs/core/src/user_notification.rs
@@ -49,6 +49,7 @@ impl UserNotifier {
 pub(crate) enum UserNotification {
    #[serde(rename_all = "kebab-case")]
    AgentTurnComplete {
+        thread_id: String,
        turn_id: String,

        /// Messages that the user sent to the agent to initiate the turn.
@@ -67,6 +68,7 @@ mod tests {
    #[test]
    fn test_user_notification() -> Result<()> {
        let notification = UserNotification::AgentTurnComplete {
+            thread_id: "b5f6c1c2-1111-2222-3333-444455556666".to_string(),
            turn_id: "12345".to_string(),
            input_messages: vec!["Rename `foo` to `bar` and update the callsites.".to_string()],
            last_assistant_message: Some(
@@ -76,7 +78,7 @@ mod tests {
        let serialized = serde_json::to_string(&notification)?;
        assert_eq!(
            serialized,
-            r#"{"type":"agent-turn-complete","turn-id":"12345","input-messages":["Rename `foo` to `bar` and update the callsites."],"last-assistant-message":"Rename complete and verified `cargo build` succeeds."}"#
+            r#"{"type":"agent-turn-complete","thread-id":"b5f6c1c2-1111-2222-3333-444455556666","turn-id":"12345","input-messages":["Rename `foo` to `bar` and update the callsites."],"last-assistant-message":"Rename complete and verified `cargo build` succeeds."}"#
        );
        Ok(())
    }
--- a/codex-rs/core/tests/chat_completions_sse.rs
+++ b/codex-rs/core/tests/chat_completions_sse.rs
@@ -1,3 +1,4 @@
+use assert_matches::assert_matches;
 use std::sync::Arc;
 use tracing_test::traced_test;

@@ -178,7 +179,7 @@ async fn streams_text_without_reasoning() {
        other => panic!("expected terminal message, got {other:?}"),
    }

-    assert!(matches!(events[2], ResponseEvent::Completed { .. }));
+    assert_matches!(events[2], ResponseEvent::Completed { .. });
 }

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
@@ -219,7 +220,7 @@ async fn streams_reasoning_from_string_delta() {
        other => panic!("expected message item, got {other:?}"),
    }

-    assert!(matches!(events[4], ResponseEvent::Completed { .. }));
+    assert_matches!(events[4], ResponseEvent::Completed { .. });
 }

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
@@ -266,7 +267,7 @@ async fn streams_reasoning_from_object_delta() {
        other => panic!("expected message item, got {other:?}"),
    }

-    assert!(matches!(events[5], ResponseEvent::Completed { .. }));
+    assert_matches!(events[5], ResponseEvent::Completed { .. });
 }

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
@@ -293,7 +294,7 @@ async fn streams_reasoning_from_final_message() {
        other => panic!("expected reasoning item, got {other:?}"),
    }

-    assert!(matches!(events[2], ResponseEvent::Completed { .. }));
+    assert_matches!(events[2], ResponseEvent::Completed { .. });
 }

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
@@ -337,7 +338,7 @@ async fn streams_reasoning_before_tool_call() {
        other => panic!("expected function call, got {other:?}"),
    }

-    assert!(matches!(events[3], ResponseEvent::Completed { .. }));
+    assert_matches!(events[3], ResponseEvent::Completed { .. });
 }

 #[tokio::test]
--- a/codex-rs/core/tests/common/Cargo.toml
+++ b/codex-rs/core/tests/common/Cargo.toml
@@ -10,7 +10,10 @@ path = "lib.rs"
 anyhow = { workspace = true }
 assert_cmd = { workspace = true }
 codex-core = { workspace = true }
+notify = { workspace = true }
+regex-lite = { workspace = true }
 serde_json = { workspace = true }
 tempfile = { workspace = true }
 tokio = { workspace = true, features = ["time"] }
+walkdir = { workspace = true }
 wiremock = { workspace = true }
--- a/codex-rs/core/tests/common/lib.rs
+++ b/codex-rs/core/tests/common/lib.rs
@@ -6,6 +6,7 @@ use codex_core::CodexConversation;
 use codex_core::config::Config;
 use codex_core::config::ConfigOverrides;
 use codex_core::config::ConfigToml;
+use regex_lite::Regex;

 #[cfg(target_os = "linux")]
 use assert_cmd::cargo::cargo_bin;
@@ -14,6 +15,16 @@ pub mod responses;
 pub mod test_codex;
 pub mod test_codex_exec;

+#[track_caller]
+pub fn assert_regex_match<'s>(pattern: &str, actual: &'s str) -> regex_lite::Captures<'s> {
+    let regex = Regex::new(pattern).unwrap_or_else(|err| {
+        panic!("failed to compile regex {pattern:?}: {err}");
+    });
+    regex
+        .captures(actual)
+        .unwrap_or_else(|| panic!("regex {pattern:?} did not match {actual:?}"))
+}
+
 /// Returns a default `Config` whose on-disk state is confined to the provided
 /// temporary directory. Using a per-test directory keeps tests hermetic and
 /// avoids clobbering a developer’s real `~/.codex`.
@@ -153,6 +164,149 @@ pub fn sandbox_network_env_var() -> &'static str {
    codex_core::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR
 }

+pub mod fs_wait {
+    use anyhow::Result;
+    use anyhow::anyhow;
+    use notify::RecursiveMode;
+    use notify::Watcher;
+    use std::path::Path;
+    use std::path::PathBuf;
+    use std::sync::mpsc;
+    use std::sync::mpsc::RecvTimeoutError;
+    use std::time::Duration;
+    use std::time::Instant;
+    use tokio::task;
+    use walkdir::WalkDir;
+
+    pub async fn wait_for_path_exists(
+        path: impl Into<PathBuf>,
+        timeout: Duration,
+    ) -> Result<PathBuf> {
+        let path = path.into();
+        task::spawn_blocking(move || wait_for_path_exists_blocking(path, timeout)).await?
+    }
+
+    pub async fn wait_for_matching_file(
+        root: impl Into<PathBuf>,
+        timeout: Duration,
+        predicate: impl FnMut(&Path) -> bool + Send + 'static,
+    ) -> Result<PathBuf> {
+        let root = root.into();
+        task::spawn_blocking(move || {
+            let mut predicate = predicate;
+            blocking_find_matching_file(root, timeout, &mut predicate)
+        })
+        .await?
+    }
+
+    fn wait_for_path_exists_blocking(path: PathBuf, timeout: Duration) -> Result<PathBuf> {
+        if path.exists() {
+            return Ok(path);
+        }
+
+        let watch_root = nearest_existing_ancestor(&path);
+        let (tx, rx) = mpsc::channel();
+        let mut watcher = notify::recommended_watcher(move |res| {
+            let _ = tx.send(res);
+        })?;
+        watcher.watch(&watch_root, RecursiveMode::Recursive)?;
+
+        let deadline = Instant::now() + timeout;
+        loop {
+            if path.exists() {
+                return Ok(path.clone());
+            }
+            let now = Instant::now();
+            if now >= deadline {
+                break;
+            }
+            let remaining = deadline.saturating_duration_since(now);
+            match rx.recv_timeout(remaining) {
+                Ok(Ok(_event)) => {
+                    if path.exists() {
+                        return Ok(path.clone());
+                    }
+                }
+                Ok(Err(err)) => return Err(err.into()),
+                Err(RecvTimeoutError::Timeout) => break,
+                Err(RecvTimeoutError::Disconnected) => break,
+            }
+        }
+
+        if path.exists() {
+            Ok(path)
+        } else {
+            Err(anyhow!("timed out waiting for {:?}", path))
+        }
+    }
+
+    fn blocking_find_matching_file(
+        root: PathBuf,
+        timeout: Duration,
+        predicate: &mut impl FnMut(&Path) -> bool,
+    ) -> Result<PathBuf> {
+        let root = wait_for_path_exists_blocking(root, timeout)?;
+
+        if let Some(found) = scan_for_match(&root, predicate) {
+            return Ok(found);
+        }
+
+        let (tx, rx) = mpsc::channel();
+        let mut watcher = notify::recommended_watcher(move |res| {
+            let _ = tx.send(res);
+        })?;
+        watcher.watch(&root, RecursiveMode::Recursive)?;
+
+        let deadline = Instant::now() + timeout;
+
+        while Instant::now() < deadline {
+            let remaining = deadline.saturating_duration_since(Instant::now());
+            match rx.recv_timeout(remaining) {
+                Ok(Ok(_event)) => {
+                    if let Some(found) = scan_for_match(&root, predicate) {
+                        return Ok(found);
+                    }
+                }
+                Ok(Err(err)) => return Err(err.into()),
+                Err(RecvTimeoutError::Timeout) => break,
+                Err(RecvTimeoutError::Disconnected) => break,
+            }
+        }
+
+        if let Some(found) = scan_for_match(&root, predicate) {
+            Ok(found)
+        } else {
+            Err(anyhow!("timed out waiting for matching file in {:?}", root))
+        }
+    }
+
+    fn scan_for_match(root: &Path, predicate: &mut impl FnMut(&Path) -> bool) -> Option<PathBuf> {
+        for entry in WalkDir::new(root).into_iter().filter_map(Result::ok) {
+            let path = entry.path();
+            if !entry.file_type().is_file() {
+                continue;
+            }
+            if predicate(path) {
+                return Some(path.to_path_buf());
+            }
+        }
+        None
+    }
+
+    fn nearest_existing_ancestor(path: &Path) -> PathBuf {
+        let mut current = path;
+        loop {
+            if current.exists() {
+                return current.to_path_buf();
+            }
+            match current.parent() {
+                Some(parent) => current = parent,
+                None => return PathBuf::from("."),
+            }
+        }
+    }
+}
+
 #[macro_export]
 macro_rules! skip_if_sandbox {
    () => {{
--- a/codex-rs/core/tests/common/responses.rs
+++ b/codex-rs/core/tests/common/responses.rs
@@ -1,11 +1,105 @@
+use std::sync::Arc;
+use std::sync::Mutex;
+
 use serde_json::Value;
 use wiremock::BodyPrintLimit;
+use wiremock::Match;
 use wiremock::Mock;
+use wiremock::MockBuilder;
 use wiremock::MockServer;
 use wiremock::Respond;
 use wiremock::ResponseTemplate;
 use wiremock::matchers::method;
-use wiremock::matchers::path;
+use wiremock::matchers::path_regex;
+
+#[derive(Debug, Clone)]
+pub struct ResponseMock {
+    requests: Arc<Mutex<Vec<ResponsesRequest>>>,
+}
+
+impl ResponseMock {
+    fn new() -> Self {
+        Self {
+            requests: Arc::new(Mutex::new(Vec::new())),
+        }
+    }
+
+    pub fn single_request(&self) -> ResponsesRequest {
+        let requests = self.requests.lock().unwrap();
+        if requests.len() != 1 {
+            panic!("expected 1 request, got {}", requests.len());
+        }
+        requests.first().unwrap().clone()
+    }
+
+    pub fn requests(&self) -> Vec<ResponsesRequest> {
+        self.requests.lock().unwrap().clone()
+    }
+}
+
+#[derive(Debug, Clone)]
+pub struct ResponsesRequest(wiremock::Request);
+
+impl ResponsesRequest {
+    pub fn body_json(&self) -> Value {
+        self.0.body_json().unwrap()
+    }
+
+    pub fn input(&self) -> Vec<Value> {
+        self.0.body_json::<Value>().unwrap()["input"]
+            .as_array()
+            .expect("input array not found in request")
+            .clone()
+    }
+
+    pub fn function_call_output(&self, call_id: &str) -> Value {
+        self.call_output(call_id, "function_call_output")
+    }
+
+    pub fn custom_tool_call_output(&self, call_id: &str) -> Value {
+        self.call_output(call_id, "custom_tool_call_output")
+    }
+
+    pub fn call_output(&self, call_id: &str, call_type: &str) -> Value {
+        self.input()
+            .iter()
+            .find(|item| {
+                item.get("type").unwrap() == call_type && item.get("call_id").unwrap() == call_id
+            })
+            .cloned()
+            .unwrap_or_else(|| panic!("function call output {call_id} item not found in request"))
+    }
+
+    pub fn header(&self, name: &str) -> Option<String> {
+        self.0
+            .headers
+            .get(name)
+            .and_then(|v| v.to_str().ok())
+            .map(str::to_string)
+    }
+
+    pub fn path(&self) -> String {
+        self.0.url.path().to_string()
+    }
+
+    pub fn query_param(&self, name: &str) -> Option<String> {
+        self.0
+            .url
+            .query_pairs()
+            .find(|(k, _)| k == name)
+            .map(|(_, v)| v.to_string())
+    }
+}
+
+impl Match for ResponseMock {
+    fn matches(&self, request: &wiremock::Request) -> bool {
+        self.requests
+            .lock()
+            .unwrap()
+            .push(ResponsesRequest(request.clone()));
+        true
+    }
+}

 /// Build an SSE stream body from a list of JSON events.
 pub fn sse(events: Vec<Value>) -> String {
@@ -34,6 +128,16 @@ pub fn ev_completed(id: &str) -> Value {
    })
 }

+/// Convenience: SSE event for a created response with a specific id.
+pub fn ev_response_created(id: &str) -> Value {
+    serde_json::json!({
+        "type": "response.created",
+        "response": {
+            "id": id,
+        }
+    })
+}
+
 pub fn ev_completed_with_tokens(id: &str, total_tokens: u64) -> Value {
    serde_json::json!({
        "type": "response.completed",
@@ -135,40 +239,56 @@ pub fn ev_apply_patch_function_call(call_id: &str, patch: &str) -> Value {
    })
 }

+pub fn sse_failed(id: &str, code: &str, message: &str) -> String {
+    sse(vec![serde_json::json!({
+        "type": "response.failed",
+        "response": {
+            "id": id,
+            "error": {"code": code, "message": message}
+        }
+    })])
+}
+
 pub fn sse_response(body: String) -> ResponseTemplate {
    ResponseTemplate::new(200)
        .insert_header("content-type", "text/event-stream")
        .set_body_raw(body, "text/event-stream")
 }

-pub async fn mount_sse_once_match<M>(server: &MockServer, matcher: M, body: String)
+fn base_mock() -> (MockBuilder, ResponseMock) {
+    let response_mock = ResponseMock::new();
+    let mock = Mock::given(method("POST"))
+        .and(path_regex(".*/responses$"))
+        .and(response_mock.clone());
+    (mock, response_mock)
+}
+
+pub async fn mount_sse_once_match<M>(server: &MockServer, matcher: M, body: String) -> ResponseMock
 where
    M: wiremock::Match + Send + Sync + 'static,
 {
-    Mock::given(method("POST"))
-        .and(path("/v1/responses"))
-        .and(matcher)
+    let (mock, response_mock) = base_mock();
+    mock.and(matcher)
        .respond_with(sse_response(body))
        .up_to_n_times(1)
        .mount(server)
        .await;
+    response_mock
 }

-pub async fn mount_sse_once(server: &MockServer, body: String) {
-    Mock::given(method("POST"))
-        .and(path("/v1/responses"))
-        .respond_with(sse_response(body))
-        .expect(1)
+pub async fn mount_sse_once(server: &MockServer, body: String) -> ResponseMock {
+    let (mock, response_mock) = base_mock();
+    mock.respond_with(sse_response(body))
+        .up_to_n_times(1)
        .mount(server)
        .await;
+    response_mock
 }

-pub async fn mount_sse(server: &MockServer, body: String) {
-    Mock::given(method("POST"))
-        .and(path("/v1/responses"))
-        .respond_with(sse_response(body))
-        .mount(server)
-        .await;
+pub async fn mount_sse(server: &MockServer, body: String) -> ResponseMock {
+    let (mock, response_mock) = base_mock();
+    mock.respond_with(sse_response(body)).mount(server).await;
+    response_mock
 }

 pub async fn start_mock_server() -> MockServer {
@@ -181,7 +301,7 @@ pub async fn start_mock_server() -> MockServer {
 /// Mounts a sequence of SSE response bodies and serves them in order for each
 /// POST to `/v1/responses`. Panics if more requests are received than bodies
 /// provided. Also asserts the exact number of expected calls.
-pub async fn mount_sse_sequence(server: &MockServer, bodies: Vec<String>) {
+pub async fn mount_sse_sequence(server: &MockServer, bodies: Vec<String>) -> ResponseMock {
    use std::sync::atomic::AtomicUsize;
    use std::sync::atomic::Ordering;

@@ -208,10 +328,11 @@ pub async fn mount_sse_sequence(server: &MockServer, bodies: Vec<String>) {
        responses: bodies,
    };

-    Mock::given(method("POST"))
-        .and(path("/v1/responses"))
-        .respond_with(responder)
+    let (mock, response_mock) = base_mock();
+    mock.respond_with(responder)
        .expect(num_calls as u64)
        .mount(server)
        .await;
+
+    response_mock
 }
--- a/codex-rs/core/tests/common/test_codex.rs
+++ b/codex-rs/core/tests/common/test_codex.rs
@@ -1,4 +1,5 @@
 use std::mem::swap;
+use std::path::PathBuf;
 use std::sync::Arc;

 use codex_core::CodexAuth;
@@ -39,6 +40,12 @@ impl TestCodexBuilder {
        let mut config = load_default_config_for_test(&home);
        config.cwd = cwd.path().to_path_buf();
        config.model_provider = model_provider;
+        config.codex_linux_sandbox_exe = Some(PathBuf::from(
+            assert_cmd::Command::cargo_bin("codex")?
+                .get_program()
+                .to_os_string(),
+        ));
+
        let mut mutators = vec![];
        swap(&mut self.config_mutators, &mut mutators);

--- a/codex-rs/core/tests/responses_headers.rs
+++ b/codex-rs/core/tests/responses_headers.rs
@@ -0,0 +1,102 @@
+use std::sync::Arc;
+
+use codex_app_server_protocol::AuthMode;
+use codex_core::ContentItem;
+use codex_core::ModelClient;
+use codex_core::ModelProviderInfo;
+use codex_core::Prompt;
+use codex_core::ResponseEvent;
+use codex_core::ResponseItem;
+use codex_core::WireApi;
+use codex_otel::otel_event_manager::OtelEventManager;
+use codex_protocol::ConversationId;
+use core_test_support::load_default_config_for_test;
+use core_test_support::responses;
+use futures::StreamExt;
+use tempfile::TempDir;
+use wiremock::matchers::header;
+
+#[tokio::test]
+async fn responses_stream_includes_task_type_header() {
+    core_test_support::skip_if_no_network!();
+
+    let server = responses::start_mock_server().await;
+    let response_body = responses::sse(vec![
+        responses::ev_response_created("resp-1"),
+        responses::ev_completed("resp-1"),
+    ]);
+
+    let request_recorder = responses::mount_sse_once_match(
+        &server,
+        header("Codex-Task-Type", "standard"),
+        response_body,
+    )
+    .await;
+
+    let provider = ModelProviderInfo {
+        name: "mock".into(),
+        base_url: Some(format!("{}/v1", server.uri())),
+        env_key: None,
+        env_key_instructions: None,
+        wire_api: WireApi::Responses,
+        query_params: None,
+        http_headers: None,
+        env_http_headers: None,
+        request_max_retries: Some(0),
+        stream_max_retries: Some(0),
+        stream_idle_timeout_ms: Some(5_000),
+        requires_openai_auth: false,
+    };
+
+    let codex_home = TempDir::new().expect("failed to create TempDir");
+    let mut config = load_default_config_for_test(&codex_home);
+    config.model_provider_id = provider.name.clone();
+    config.model_provider = provider.clone();
+    let effort = config.model_reasoning_effort;
+    let summary = config.model_reasoning_summary;
+    let config = Arc::new(config);
+
+    let conversation_id = ConversationId::new();
+
+    let otel_event_manager = OtelEventManager::new(
+        conversation_id,
+        config.model.as_str(),
+        config.model_family.slug.as_str(),
+        None,
+        Some(AuthMode::ChatGPT),
+        false,
+        "test".to_string(),
+    );
+
+    let client = ModelClient::new(
+        Arc::clone(&config),
+        None,
+        otel_event_manager,
+        provider,
+        effort,
+        summary,
+        conversation_id,
+    );
+
+    let mut prompt = Prompt::default();
+    prompt.input = vec![ResponseItem::Message {
+        id: None,
+        role: "user".into(),
+        content: vec![ContentItem::InputText {
+            text: "hello".into(),
+        }],
+    }];
+
+    let mut stream = client.stream(&prompt).await.expect("stream failed");
+    while let Some(event) = stream.next().await {
+        if matches!(event, Ok(ResponseEvent::Completed { .. })) {
+            break;
+        }
+    }
+
+    let request = request_recorder.single_request();
+    assert_eq!(
+        request.header("Codex-Task-Type").as_deref(),
+        Some("standard")
+    );
+}
--- a/codex-rs/core/tests/suite/abort_tasks.rs
+++ b/codex-rs/core/tests/suite/abort_tasks.rs
@@ -3,14 +3,14 @@ use std::time::Duration;
 use codex_core::protocol::EventMsg;
 use codex_core::protocol::InputItem;
 use codex_core::protocol::Op;
+use core_test_support::responses::ev_completed;
 use core_test_support::responses::ev_function_call;
-use core_test_support::responses::mount_sse_once_match;
+use core_test_support::responses::mount_sse_once;
 use core_test_support::responses::sse;
 use core_test_support::responses::start_mock_server;
 use core_test_support::test_codex::test_codex;
 use core_test_support::wait_for_event_with_timeout;
 use serde_json::json;
-use wiremock::matchers::body_string_contains;

 /// Integration test: spawn a long‑running shell tool via a mocked Responses SSE
 /// function call, then interrupt the session and expect TurnAborted.
@@ -27,10 +27,13 @@ async fn interrupt_long_running_tool_emits_turn_aborted() {
        "timeout_ms": 60_000
    })
    .to_string();
-    let body = sse(vec![ev_function_call("call_sleep", "shell", &args)]);
+    let body = sse(vec![
+        ev_function_call("call_sleep", "shell", &args),
+        ev_completed("done"),
+    ]);

    let server = start_mock_server().await;
-    mount_sse_once_match(&server, body_string_contains("start sleep"), body).await;
+    mount_sse_once(&server, body).await;

    let codex = test_codex().build(&server).await.unwrap().codex;

--- a/codex-rs/core/tests/suite/cli_stream.rs
+++ b/codex-rs/core/tests/suite/cli_stream.rs
@@ -1,12 +1,11 @@
 use assert_cmd::Command as AssertCommand;
 use codex_core::RolloutRecorder;
 use codex_core::protocol::GitInfo;
+use core_test_support::fs_wait;
 use core_test_support::skip_if_no_network;
 use std::time::Duration;
-use std::time::Instant;
 use tempfile::TempDir;
 use uuid::Uuid;
-use walkdir::WalkDir;
 use wiremock::Mock;
 use wiremock::MockServer;
 use wiremock::ResponseTemplate;
@@ -106,16 +105,12 @@ async fn exec_cli_applies_experimental_instructions_file() {
        "data: {\"type\":\"response.created\",\"response\":{}}\n\n",
        "data: {\"type\":\"response.completed\",\"response\":{\"id\":\"r1\"}}\n\n"
    );
-    Mock::given(method("POST"))
-        .and(path("/v1/responses"))
-        .respond_with(
-            ResponseTemplate::new(200)
-                .insert_header("content-type", "text/event-stream")
-                .set_body_raw(sse, "text/event-stream"),
-        )
-        .expect(1)
-        .mount(&server)
-        .await;
+    let resp_mock = core_test_support::responses::mount_sse_once_match(
+        &server,
+        path("/v1/responses"),
+        sse.to_string(),
+    )
+    .await;

    // Create a temporary instructions file with a unique marker we can assert
    // appears in the outbound request payload.
@@ -164,8 +159,8 @@ async fn exec_cli_applies_experimental_instructions_file() {

    // Inspect the captured request and verify our custom base instructions were
    // included in the `instructions` field.
-    let request = &server.received_requests().await.unwrap()[0];
-    let body = request.body_json::<serde_json::Value>().unwrap();
+    let request = resp_mock.single_request();
+    let body = request.body_json();
    let instructions = body
        .get("instructions")
        .and_then(|v| v.as_str())
@@ -215,12 +210,12 @@ async fn responses_api_stream_cli() {

 /// End-to-end: create a session (writes rollout), verify the file, then resume and confirm append.
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-async fn integration_creates_and_checks_session_file() {
+async fn integration_creates_and_checks_session_file() -> anyhow::Result<()> {
    // Honor sandbox network restrictions for CI parity with the other tests.
-    skip_if_no_network!();
+    skip_if_no_network!(Ok(()));

    // 1. Temp home so we read/write isolated session files.
-    let home = TempDir::new().unwrap();
+    let home = TempDir::new()?;

    // 2. Unique marker we'll look for in the session log.
    let marker = format!("integration-test-{}", Uuid::new_v4());
@@ -258,63 +253,20 @@ async fn integration_creates_and_checks_session_file() {

    // Wait for sessions dir to appear.
    let sessions_dir = home.path().join("sessions");
-    let dir_deadline = Instant::now() + Duration::from_secs(5);
-    while !sessions_dir.exists() && Instant::now() < dir_deadline {
-        std::thread::sleep(Duration::from_millis(50));
-    }
-    assert!(sessions_dir.exists(), "sessions directory never appeared");
+    fs_wait::wait_for_path_exists(&sessions_dir, Duration::from_secs(5)).await?;

    // Find the session file that contains `marker`.
-    let deadline = Instant::now() + Duration::from_secs(10);
-    let mut matching_path: Option<std::path::PathBuf> = None;
-    while Instant::now() < deadline && matching_path.is_none() {
-        for entry in WalkDir::new(&sessions_dir) {
-            let entry = match entry {
-                Ok(e) => e,
-                Err(_) => continue,
-            };
-            if !entry.file_type().is_file() {
-                continue;
-            }
-            if !entry.file_name().to_string_lossy().ends_with(".jsonl") {
-                continue;
-            }
-            let path = entry.path();
-            let Ok(content) = std::fs::read_to_string(path) else {
-                continue;
-            };
-            let mut lines = content.lines();
-            if lines.next().is_none() {
-                continue;
-            }
-            for line in lines {
-                if line.trim().is_empty() {
-                    continue;
-                }
-                let item: serde_json::Value = match serde_json::from_str(line) {
-                    Ok(v) => v,
-                    Err(_) => continue,
-                };
-                if item.get("type").and_then(|t| t.as_str()) == Some("response_item")
-                    && let Some(payload) = item.get("payload")
-                    && payload.get("type").and_then(|t| t.as_str()) == Some("message")
-                    && let Some(c) = payload.get("content")
-                    && c.to_string().contains(&marker)
-                {
-                    matching_path = Some(path.to_path_buf());
-                    break;
-                }
-            }
+    let marker_clone = marker.clone();
+    let path = fs_wait::wait_for_matching_file(&sessions_dir, Duration::from_secs(10), move |p| {
+        if p.extension().and_then(|ext| ext.to_str()) != Some("jsonl") {
+            return false;
        }
-        if matching_path.is_none() {
-            std::thread::sleep(Duration::from_millis(50));
-        }
-    }
-
-    let path = match matching_path {
-        Some(p) => p,
-        None => panic!("No session file containing the marker was found"),
-    };
+        let Ok(content) = std::fs::read_to_string(p) else {
+            return false;
+        };
+        content.contains(&marker_clone)
+    })
+    .await?;

    // Basic sanity checks on location and metadata.
    let rel = match path.strip_prefix(&sessions_dir) {
@@ -422,42 +374,25 @@ async fn integration_creates_and_checks_session_file() {
    assert!(output2.status.success(), "resume codex-cli run failed");

    // Find the new session file containing the resumed marker.
-    let deadline = Instant::now() + Duration::from_secs(10);
-    let mut resumed_path: Option<std::path::PathBuf> = None;
-    while Instant::now() < deadline && resumed_path.is_none() {
-        for entry in WalkDir::new(&sessions_dir) {
-            let entry = match entry {
-                Ok(e) => e,
-                Err(_) => continue,
-            };
-            if !entry.file_type().is_file() {
-                continue;
+    let marker2_clone = marker2.clone();
+    let resumed_path =
+        fs_wait::wait_for_matching_file(&sessions_dir, Duration::from_secs(10), move |p| {
+            if p.extension().and_then(|ext| ext.to_str()) != Some("jsonl") {
+                return false;
            }
-            if !entry.file_name().to_string_lossy().ends_with(".jsonl") {
-                continue;
-            }
-            let p = entry.path();
-            let Ok(c) = std::fs::read_to_string(p) else {
-                continue;
-            };
-            if c.contains(&marker2) {
-                resumed_path = Some(p.to_path_buf());
-                break;
-            }
-        }
-        if resumed_path.is_none() {
-            std::thread::sleep(Duration::from_millis(50));
-        }
-    }
+            std::fs::read_to_string(p)
+                .map(|content| content.contains(&marker2_clone))
+                .unwrap_or(false)
+        })
+        .await?;

-    let resumed_path = resumed_path.expect("No resumed session file found containing the marker2");
    // Resume should write to the existing log file.
    assert_eq!(
        resumed_path, path,
        "resume should create a new session file"
    );

-    let resumed_content = std::fs::read_to_string(&resumed_path).unwrap();
+    let resumed_content = std::fs::read_to_string(&resumed_path)?;
    assert!(
        resumed_content.contains(&marker),
        "resumed file missing original marker"
@@ -466,6 +401,7 @@ async fn integration_creates_and_checks_session_file() {
        resumed_content.contains(&marker2),
        "resumed file missing resumed marker"
    );
+    Ok(())
 }

 /// Integration test to verify git info is collected and recorded in session files.
--- a/codex-rs/core/tests/suite/client.rs
+++ b/codex-rs/core/tests/suite/client.rs
@@ -14,6 +14,8 @@ use codex_core::ResponseEvent;
 use codex_core::ResponseItem;
 use codex_core::WireApi;
 use codex_core::built_in_model_providers;
+use codex_core::error::CodexErr;
+use codex_core::model_family::find_family_for_model;
 use codex_core::protocol::EventMsg;
 use codex_core::protocol::InputItem;
 use codex_core::protocol::Op;
@@ -26,8 +28,10 @@ use core_test_support::load_default_config_for_test;
 use core_test_support::load_sse_fixture_with_id;
 use core_test_support::responses;
 use core_test_support::skip_if_no_network;
+use core_test_support::test_codex::TestCodex;
 use core_test_support::test_codex::test_codex;
 use core_test_support::wait_for_event;
+use core_test_support::wait_for_event_with_timeout;
 use futures::StreamExt;
 use serde_json::json;
 use std::io::Write;
@@ -37,6 +41,7 @@ use uuid::Uuid;
 use wiremock::Mock;
 use wiremock::MockServer;
 use wiremock::ResponseTemplate;
+use wiremock::matchers::body_string_contains;
 use wiremock::matchers::header_regex;
 use wiremock::matchers::method;
 use wiremock::matchers::path;
@@ -218,15 +223,9 @@ async fn resume_includes_initial_messages_and_sends_prior_items() {

    // Mock server that will receive the resumed request
    let server = MockServer::start().await;
-    let first = ResponseTemplate::new(200)
-        .insert_header("content-type", "text/event-stream")
-        .set_body_raw(sse_completed("resp1"), "text/event-stream");
-    Mock::given(method("POST"))
-        .and(path("/v1/responses"))
-        .respond_with(first)
-        .expect(1)
-        .mount(&server)
-        .await;
+    let resp_mock =
+        responses::mount_sse_once_match(&server, path("/v1/responses"), sse_completed("resp1"))
+            .await;

    // Configure Codex to resume from our file
    let model_provider = ModelProviderInfo {
@@ -272,8 +271,8 @@ async fn resume_includes_initial_messages_and_sends_prior_items() {
        .unwrap();
    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;

-    let request = &server.received_requests().await.unwrap()[0];
-    let request_body = request.body_json::<serde_json::Value>().unwrap();
+    let request = resp_mock.single_request();
+    let request_body = request.body_json();
    let expected_input = json!([
        {
            "type": "message",
@@ -367,18 +366,9 @@ async fn includes_base_instructions_override_in_request() {
    skip_if_no_network!();
    // Mock server
    let server = MockServer::start().await;
-
-    // First request – must NOT include `previous_response_id`.
-    let first = ResponseTemplate::new(200)
-        .insert_header("content-type", "text/event-stream")
-        .set_body_raw(sse_completed("resp1"), "text/event-stream");
-
-    Mock::given(method("POST"))
-        .and(path("/v1/responses"))
-        .respond_with(first)
-        .expect(1)
-        .mount(&server)
-        .await;
+    let resp_mock =
+        responses::mount_sse_once_match(&server, path("/v1/responses"), sse_completed("resp1"))
+            .await;

    let model_provider = ModelProviderInfo {
        base_url: Some(format!("{}/v1", server.uri())),
@@ -409,8 +399,8 @@ async fn includes_base_instructions_override_in_request() {

    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;

-    let request = &server.received_requests().await.unwrap()[0];
-    let request_body = request.body_json::<serde_json::Value>().unwrap();
+    let request = resp_mock.single_request();
+    let request_body = request.body_json();

    assert!(
        request_body["instructions"]
@@ -565,16 +555,9 @@ async fn includes_user_instructions_message_in_request() {
    skip_if_no_network!();
    let server = MockServer::start().await;

-    let first = ResponseTemplate::new(200)
-        .insert_header("content-type", "text/event-stream")
-        .set_body_raw(sse_completed("resp1"), "text/event-stream");
-
-    Mock::given(method("POST"))
-        .and(path("/v1/responses"))
-        .respond_with(first)
-        .expect(1)
-        .mount(&server)
-        .await;
+    let resp_mock =
+        responses::mount_sse_once_match(&server, path("/v1/responses"), sse_completed("resp1"))
+            .await;

    let model_provider = ModelProviderInfo {
        base_url: Some(format!("{}/v1", server.uri())),
@@ -605,8 +588,8 @@ async fn includes_user_instructions_message_in_request() {

    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;

-    let request = &server.received_requests().await.unwrap()[0];
-    let request_body = request.body_json::<serde_json::Value>().unwrap();
+    let request = resp_mock.single_request();
+    let request_body = request.body_json();

    assert!(
        !request_body["instructions"]
@@ -996,6 +979,100 @@ async fn usage_limit_error_emits_rate_limit_event() -> anyhow::Result<()> {
    Ok(())
 }

+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn context_window_error_sets_total_tokens_to_model_window() -> anyhow::Result<()> {
+    skip_if_no_network!(Ok(()));
+    let server = MockServer::start().await;
+
+    responses::mount_sse_once_match(
+        &server,
+        body_string_contains("trigger context window"),
+        responses::sse_failed(
+            "resp_context_window",
+            "context_length_exceeded",
+            "Your input exceeds the context window of this model. Please adjust your input and try again.",
+        ),
+    )
+    .await;
+
+    responses::mount_sse_once_match(
+        &server,
+        body_string_contains("seed turn"),
+        sse_completed("resp_seed"),
+    )
+    .await;
+
+    let TestCodex { codex, .. } = test_codex()
+        .with_config(|config| {
+            config.model = "gpt-5".to_string();
+            config.model_family = find_family_for_model("gpt-5").expect("known gpt-5 model family");
+            config.model_context_window = Some(272_000);
+        })
+        .build(&server)
+        .await?;
+
+    codex
+        .submit(Op::UserInput {
+            items: vec![InputItem::Text {
+                text: "seed turn".into(),
+            }],
+        })
+        .await?;
+
+    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
+
+    codex
+        .submit(Op::UserInput {
+            items: vec![InputItem::Text {
+                text: "trigger context window".into(),
+            }],
+        })
+        .await?;
+
+    use std::time::Duration;
+
+    let token_event = wait_for_event_with_timeout(
+        &codex,
+        |event| {
+            matches!(
+                event,
+                EventMsg::TokenCount(payload)
+                    if payload.info.as_ref().is_some_and(|info| {
+                        info.model_context_window == Some(info.total_token_usage.total_tokens)
+                            && info.total_token_usage.total_tokens > 0
+                    })
+            )
+        },
+        Duration::from_secs(5),
+    )
+    .await;
+
+    let EventMsg::TokenCount(token_payload) = token_event else {
+        unreachable!("wait_for_event_with_timeout returned unexpected event");
+    };
+
+    let info = token_payload
+        .info
+        .expect("token usage info present when context window is exceeded");
+
+    assert_eq!(info.model_context_window, Some(272_000));
+    assert_eq!(info.total_token_usage.total_tokens, 272_000);
+
+    let error_event = wait_for_event(&codex, |ev| matches!(ev, EventMsg::Error(_))).await;
+    let expected_context_window_message = CodexErr::ContextWindowExceeded.to_string();
+    assert!(
+        matches!(
+            error_event,
+            EventMsg::Error(ref err) if err.message == expected_context_window_message
+        ),
+        "expected context window error; got {error_event:?}"
+    );
+
+    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
+
+    Ok(())
+}
+
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn azure_overrides_assign_properties_used_for_responses_url() {
    skip_if_no_network!();
--- a/codex-rs/core/tests/suite/compact.rs
+++ b/codex-rs/core/tests/suite/compact.rs
@@ -13,12 +13,6 @@ use core_test_support::load_default_config_for_test;
 use core_test_support::skip_if_no_network;
 use core_test_support::wait_for_event;
 use tempfile::TempDir;
-use wiremock::Mock;
-use wiremock::Request;
-use wiremock::Respond;
-use wiremock::ResponseTemplate;
-use wiremock::matchers::method;
-use wiremock::matchers::path;

 use codex_core::codex::compact::SUMMARIZATION_PROMPT;
 use core_test_support::responses::ev_assistant_message;
@@ -26,14 +20,11 @@ use core_test_support::responses::ev_completed;
 use core_test_support::responses::ev_completed_with_tokens;
 use core_test_support::responses::ev_function_call;
 use core_test_support::responses::mount_sse_once_match;
+use core_test_support::responses::mount_sse_sequence;
 use core_test_support::responses::sse;
-use core_test_support::responses::sse_response;
+use core_test_support::responses::sse_failed;
 use core_test_support::responses::start_mock_server;
 use pretty_assertions::assert_eq;
-use std::sync::Arc;
-use std::sync::Mutex;
-use std::sync::atomic::AtomicUsize;
-use std::sync::atomic::Ordering;
 // --- Test helpers -----------------------------------------------------------

 pub(super) const FIRST_REPLY: &str = "FIRST_REPLY";
@@ -48,6 +39,8 @@ const SECOND_LARGE_REPLY: &str = "SECOND_LARGE_REPLY";
 const FIRST_AUTO_SUMMARY: &str = "FIRST_AUTO_SUMMARY";
 const SECOND_AUTO_SUMMARY: &str = "SECOND_AUTO_SUMMARY";
 const FINAL_REPLY: &str = "FINAL_REPLY";
+const CONTEXT_LIMIT_MESSAGE: &str =
+    "Your input exceeds the context window of this model. Please adjust your input and try again.";
 const DUMMY_FUNCTION_NAME: &str = "unsupported_tool";
 const DUMMY_CALL_ID: &str = "call-multi-auto";

@@ -295,12 +288,7 @@ async fn auto_compact_runs_after_token_limit_hit() {
            && !body.contains(SECOND_AUTO_MSG)
            && !body.contains("You have exceeded the maximum number of tokens")
    };
-    Mock::given(method("POST"))
-        .and(path("/v1/responses"))
-        .and(first_matcher)
-        .respond_with(sse_response(sse1))
-        .mount(&server)
-        .await;
+    mount_sse_once_match(&server, first_matcher, sse1).await;

    let second_matcher = |req: &wiremock::Request| {
        let body = std::str::from_utf8(&req.body).unwrap_or("");
@@ -308,23 +296,13 @@ async fn auto_compact_runs_after_token_limit_hit() {
            && body.contains(FIRST_AUTO_MSG)
            && !body.contains("You have exceeded the maximum number of tokens")
    };
-    Mock::given(method("POST"))
-        .and(path("/v1/responses"))
-        .and(second_matcher)
-        .respond_with(sse_response(sse2))
-        .mount(&server)
-        .await;
+    mount_sse_once_match(&server, second_matcher, sse2).await;

    let third_matcher = |req: &wiremock::Request| {
        let body = std::str::from_utf8(&req.body).unwrap_or("");
        body.contains("You have exceeded the maximum number of tokens")
    };
-    Mock::given(method("POST"))
-        .and(path("/v1/responses"))
-        .and(third_matcher)
-        .respond_with(sse_response(sse3))
-        .mount(&server)
-        .await;
+    mount_sse_once_match(&server, third_matcher, sse3).await;

    let model_provider = ModelProviderInfo {
        base_url: Some(format!("{}/v1", server.uri())),
@@ -455,12 +433,7 @@ async fn auto_compact_persists_rollout_entries() {
            && !body.contains(SECOND_AUTO_MSG)
            && !body.contains("You have exceeded the maximum number of tokens")
    };
-    Mock::given(method("POST"))
-        .and(path("/v1/responses"))
-        .and(first_matcher)
-        .respond_with(sse_response(sse1))
-        .mount(&server)
-        .await;
+    mount_sse_once_match(&server, first_matcher, sse1).await;

    let second_matcher = |req: &wiremock::Request| {
        let body = std::str::from_utf8(&req.body).unwrap_or("");
@@ -468,23 +441,13 @@ async fn auto_compact_persists_rollout_entries() {
            && body.contains(FIRST_AUTO_MSG)
            && !body.contains("You have exceeded the maximum number of tokens")
    };
-    Mock::given(method("POST"))
-        .and(path("/v1/responses"))
-        .and(second_matcher)
-        .respond_with(sse_response(sse2))
-        .mount(&server)
-        .await;
+    mount_sse_once_match(&server, second_matcher, sse2).await;

    let third_matcher = |req: &wiremock::Request| {
        let body = std::str::from_utf8(&req.body).unwrap_or("");
        body.contains("You have exceeded the maximum number of tokens")
    };
-    Mock::given(method("POST"))
-        .and(path("/v1/responses"))
-        .and(third_matcher)
-        .respond_with(sse_response(sse3))
-        .mount(&server)
-        .await;
+    mount_sse_once_match(&server, third_matcher, sse3).await;

    let model_provider = ModelProviderInfo {
        base_url: Some(format!("{}/v1", server.uri())),
@@ -582,35 +545,20 @@ async fn auto_compact_stops_after_failed_attempt() {
        body.contains(FIRST_AUTO_MSG)
            && !body.contains("You have exceeded the maximum number of tokens")
    };
-    Mock::given(method("POST"))
-        .and(path("/v1/responses"))
-        .and(first_matcher)
-        .respond_with(sse_response(sse1.clone()))
-        .mount(&server)
-        .await;
+    mount_sse_once_match(&server, first_matcher, sse1.clone()).await;

    let second_matcher = |req: &wiremock::Request| {
        let body = std::str::from_utf8(&req.body).unwrap_or("");
        body.contains("You have exceeded the maximum number of tokens")
    };
-    Mock::given(method("POST"))
-        .and(path("/v1/responses"))
-        .and(second_matcher)
-        .respond_with(sse_response(sse2.clone()))
-        .mount(&server)
-        .await;
+    mount_sse_once_match(&server, second_matcher, sse2.clone()).await;

    let third_matcher = |req: &wiremock::Request| {
        let body = std::str::from_utf8(&req.body).unwrap_or("");
        !body.contains("You have exceeded the maximum number of tokens")
            && body.contains(SUMMARY_TEXT)
    };
-    Mock::given(method("POST"))
-        .and(path("/v1/responses"))
-        .and(third_matcher)
-        .respond_with(sse_response(sse3.clone()))
-        .mount(&server)
-        .await;
+    mount_sse_once_match(&server, third_matcher, sse3.clone()).await;

    let model_provider = ModelProviderInfo {
        base_url: Some(format!("{}/v1", server.uri())),
@@ -677,6 +625,130 @@ async fn auto_compact_stops_after_failed_attempt() {
    );
 }

+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn manual_compact_retries_after_context_window_error() {
+    skip_if_no_network!();
+
+    let server = start_mock_server().await;
+
+    let user_turn = sse(vec![
+        ev_assistant_message("m1", FIRST_REPLY),
+        ev_completed("r1"),
+    ]);
+    let compact_failed = sse_failed(
+        "resp-fail",
+        "context_length_exceeded",
+        CONTEXT_LIMIT_MESSAGE,
+    );
+    let compact_succeeds = sse(vec![
+        ev_assistant_message("m2", SUMMARY_TEXT),
+        ev_completed("r2"),
+    ]);
+
+    let request_log = mount_sse_sequence(
+        &server,
+        vec![
+            user_turn.clone(),
+            compact_failed.clone(),
+            compact_succeeds.clone(),
+        ],
+    )
+    .await;
+
+    let model_provider = ModelProviderInfo {
+        base_url: Some(format!("{}/v1", server.uri())),
+        ..built_in_model_providers()["openai"].clone()
+    };
+
+    let home = TempDir::new().unwrap();
+    let mut config = load_default_config_for_test(&home);
+    config.model_provider = model_provider;
+    config.model_auto_compact_token_limit = Some(200_000);
+    let codex = ConversationManager::with_auth(CodexAuth::from_api_key("dummy"))
+        .new_conversation(config)
+        .await
+        .unwrap()
+        .conversation;
+
+    codex
+        .submit(Op::UserInput {
+            items: vec![InputItem::Text {
+                text: "first turn".into(),
+            }],
+        })
+        .await
+        .unwrap();
+    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
+
+    codex.submit(Op::Compact).await.unwrap();
+
+    let EventMsg::BackgroundEvent(event) =
+        wait_for_event(&codex, |ev| matches!(ev, EventMsg::BackgroundEvent(_))).await
+    else {
+        panic!("expected background event after compact retry");
+    };
+    assert!(
+        event.message.contains("Trimmed 1 older conversation item"),
+        "background event should mention trimmed item count: {}",
+        event.message
+    );
+    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
+
+    let requests = request_log.requests();
+    assert_eq!(
+        requests.len(),
+        3,
+        "expected user turn and two compact attempts"
+    );
+
+    let compact_attempt = requests[1].body_json();
+    let retry_attempt = requests[2].body_json();
+
+    let compact_input = compact_attempt["input"]
+        .as_array()
+        .unwrap_or_else(|| panic!("compact attempt missing input array: {compact_attempt}"));
+    let retry_input = retry_attempt["input"]
+        .as_array()
+        .unwrap_or_else(|| panic!("retry attempt missing input array: {retry_attempt}"));
+    assert_eq!(
+        compact_input
+            .last()
+            .and_then(|item| item.get("content"))
+            .and_then(|v| v.as_array())
+            .and_then(|items| items.first())
+            .and_then(|entry| entry.get("text"))
+            .and_then(|text| text.as_str()),
+        Some(SUMMARIZATION_PROMPT),
+        "compact attempt should include summarization prompt"
+    );
+    assert_eq!(
+        retry_input
+            .last()
+            .and_then(|item| item.get("content"))
+            .and_then(|v| v.as_array())
+            .and_then(|items| items.first())
+            .and_then(|entry| entry.get("text"))
+            .and_then(|text| text.as_str()),
+        Some(SUMMARIZATION_PROMPT),
+        "retry attempt should include summarization prompt"
+    );
+    assert_eq!(
+        retry_input.len(),
+        compact_input.len().saturating_sub(1),
+        "retry should drop exactly one history item (before {} vs after {})",
+        compact_input.len(),
+        retry_input.len()
+    );
+    if let (Some(first_before), Some(first_after)) = (compact_input.first(), retry_input.first()) {
+        assert_ne!(
+            first_before, first_after,
+            "retry should drop the oldest conversation item"
+        );
+    } else {
+        panic!("expected non-empty compact inputs");
+    }
+}
+
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn auto_compact_allows_multiple_attempts_when_interleaved_with_other_turn_events() {
    skip_if_no_network!();
@@ -708,49 +780,7 @@ async fn auto_compact_allows_multiple_attempts_when_interleaved_with_other_turn_
        ev_completed_with_tokens("r6", 120),
    ]);

-    #[derive(Clone)]
-    struct SeqResponder {
-        bodies: Arc<Vec<String>>,
-        calls: Arc<AtomicUsize>,
-        requests: Arc<Mutex<Vec<Vec<u8>>>>,
-    }
-
-    impl SeqResponder {
-        fn new(bodies: Vec<String>) -> Self {
-            Self {
-                bodies: Arc::new(bodies),
-                calls: Arc::new(AtomicUsize::new(0)),
-                requests: Arc::new(Mutex::new(Vec::new())),
-            }
-        }
-
-        fn recorded_requests(&self) -> Vec<Vec<u8>> {
-            self.requests.lock().unwrap().clone()
-        }
-    }
-
-    impl Respond for SeqResponder {
-        fn respond(&self, req: &Request) -> ResponseTemplate {
-            let idx = self.calls.fetch_add(1, Ordering::SeqCst);
-            self.requests.lock().unwrap().push(req.body.clone());
-            let body = self
-                .bodies
-                .get(idx)
-                .unwrap_or_else(|| panic!("unexpected request index {idx}"))
-                .clone();
-            ResponseTemplate::new(200)
-                .insert_header("content-type", "text/event-stream")
-                .set_body_raw(body, "text/event-stream")
-        }
-    }
-
-    let responder = SeqResponder::new(vec![sse1, sse2, sse3, sse4, sse5, sse6]);
-    Mock::given(method("POST"))
-        .and(path("/v1/responses"))
-        .respond_with(responder.clone())
-        .expect(6)
-        .mount(&server)
-        .await;
+    mount_sse_sequence(&server, vec![sse1, sse2, sse3, sse4, sse5, sse6]).await;

    let model_provider = ModelProviderInfo {
        base_url: Some(format!("{}/v1", server.uri())),
@@ -801,10 +831,12 @@ async fn auto_compact_allows_multiple_attempts_when_interleaved_with_other_turn_
        "auto compact should not emit task lifecycle events"
    );

-    let request_bodies: Vec<String> = responder
-        .recorded_requests()
+    let request_bodies: Vec<String> = server
+        .received_requests()
+        .await
+        .unwrap()
        .into_iter()
-        .map(|body| String::from_utf8(body).unwrap_or_default())
+        .map(|request| String::from_utf8(request.body).unwrap_or_default())
        .collect();
    assert_eq!(
        request_bodies.len(),
--- a/codex-rs/core/tests/suite/compact_resume_fork.rs
+++ b/codex-rs/core/tests/suite/compact_resume_fork.rs
@@ -17,6 +17,7 @@ use codex_core::NewConversation;
 use codex_core::built_in_model_providers;
 use codex_core::codex::compact::SUMMARIZATION_PROMPT;
 use codex_core::config::Config;
+use codex_core::config::OPENAI_DEFAULT_MODEL;
 use codex_core::protocol::ConversationPathResponseEvent;
 use codex_core::protocol::EventMsg;
 use codex_core::protocol::InputItem;
@@ -131,9 +132,10 @@ async fn compact_resume_and_fork_preserve_model_history_view() {
        .as_str()
        .unwrap_or_default()
        .to_string();
+    let expected_model = OPENAI_DEFAULT_MODEL;
    let user_turn_1 = json!(
    {
-      "model": "gpt-5-codex",
+      "model": expected_model,
      "instructions": prompt,
      "input": [
        {
@@ -182,7 +184,7 @@ async fn compact_resume_and_fork_preserve_model_history_view() {
    });
    let compact_1 = json!(
    {
-      "model": "gpt-5-codex",
+      "model": expected_model,
      "instructions": prompt,
      "input": [
        {
@@ -251,7 +253,7 @@ async fn compact_resume_and_fork_preserve_model_history_view() {
    });
    let user_turn_2_after_compact = json!(
    {
-      "model": "gpt-5-codex",
+      "model": expected_model,
      "instructions": prompt,
      "input": [
        {
@@ -316,7 +318,7 @@ SUMMARY_ONLY_CONTEXT"
    });
    let usert_turn_3_after_resume = json!(
    {
-      "model": "gpt-5-codex",
+      "model": expected_model,
      "instructions": prompt,
      "input": [
        {
@@ -401,7 +403,7 @@ SUMMARY_ONLY_CONTEXT"
    });
    let user_turn_3_after_fork = json!(
    {
-      "model": "gpt-5-codex",
+      "model": expected_model,
      "instructions": prompt,
      "input": [
        {
--- a/codex-rs/core/tests/suite/grep_files.rs
+++ b/codex-rs/core/tests/suite/grep_files.rs
@@ -0,0 +1,237 @@
+#![cfg(not(target_os = "windows"))]
+
+use anyhow::Result;
+use codex_core::model_family::find_family_for_model;
+use codex_core::protocol::AskForApproval;
+use codex_core::protocol::EventMsg;
+use codex_core::protocol::InputItem;
+use codex_core::protocol::Op;
+use codex_core::protocol::SandboxPolicy;
+use codex_protocol::config_types::ReasoningSummary;
+use core_test_support::responses;
+use core_test_support::responses::ev_assistant_message;
+use core_test_support::responses::ev_completed;
+use core_test_support::responses::ev_function_call;
+use core_test_support::responses::ev_response_created;
+use core_test_support::responses::sse;
+use core_test_support::responses::start_mock_server;
+use core_test_support::skip_if_no_network;
+use core_test_support::test_codex::TestCodex;
+use core_test_support::test_codex::test_codex;
+use core_test_support::wait_for_event;
+use serde_json::Value;
+use std::collections::HashSet;
+use std::path::Path;
+use std::process::Command as StdCommand;
+use wiremock::matchers::any;
+
+const MODEL_WITH_TOOL: &str = "test-gpt-5-codex";
+
+fn ripgrep_available() -> bool {
+    StdCommand::new("rg")
+        .arg("--version")
+        .output()
+        .map(|output| output.status.success())
+        .unwrap_or(false)
+}
+
+macro_rules! skip_if_ripgrep_missing {
+    ($ret:expr $(,)?) => {{
+        if !ripgrep_available() {
+            eprintln!("rg not available in PATH; skipping test");
+            return $ret;
+        }
+    }};
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn grep_files_tool_collects_matches() -> Result<()> {
+    skip_if_no_network!(Ok(()));
+    skip_if_ripgrep_missing!(Ok(()));
+
+    let server = start_mock_server().await;
+    let test = build_test_codex(&server).await?;
+
+    let search_dir = test.cwd.path().join("src");
+    std::fs::create_dir_all(&search_dir)?;
+    let alpha = search_dir.join("alpha.rs");
+    let beta = search_dir.join("beta.rs");
+    let gamma = search_dir.join("gamma.txt");
+    std::fs::write(&alpha, "alpha needle\n")?;
+    std::fs::write(&beta, "beta needle\n")?;
+    std::fs::write(&gamma, "needle in text but excluded\n")?;
+
+    let call_id = "grep-files-collect";
+    let arguments = serde_json::json!({
+        "pattern": "needle",
+        "path": search_dir.to_string_lossy(),
+        "include": "*.rs",
+    })
+    .to_string();
+
+    mount_tool_sequence(&server, call_id, &arguments, "grep_files").await;
+    submit_turn(&test, "please find uses of needle").await?;
+
+    let bodies = recorded_bodies(&server).await?;
+    let tool_output = find_tool_output(&bodies, call_id).expect("tool output present");
+    let payload = tool_output.get("output").expect("output field present");
+    let (content_opt, success_opt) = extract_content_and_success(payload);
+    let content = content_opt.expect("content present");
+    let success = success_opt.unwrap_or(true);
+    assert!(success, "expected success for matches, got {payload:?}");
+
+    let entries = collect_file_names(content);
+    assert_eq!(entries.len(), 2, "content: {content}");
+    assert!(
+        entries.contains("alpha.rs"),
+        "missing alpha.rs in {entries:?}"
+    );
+    assert!(
+        entries.contains("beta.rs"),
+        "missing beta.rs in {entries:?}"
+    );
+    assert!(
+        !entries.contains("gamma.txt"),
+        "txt file should be filtered out: {entries:?}"
+    );
+
+    Ok(())
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn grep_files_tool_reports_empty_results() -> Result<()> {
+    skip_if_no_network!(Ok(()));
+    skip_if_ripgrep_missing!(Ok(()));
+
+    let server = start_mock_server().await;
+    let test = build_test_codex(&server).await?;
+
+    let search_dir = test.cwd.path().join("logs");
+    std::fs::create_dir_all(&search_dir)?;
+    std::fs::write(search_dir.join("output.txt"), "no hits here")?;
+
+    let call_id = "grep-files-empty";
+    let arguments = serde_json::json!({
+        "pattern": "needle",
+        "path": search_dir.to_string_lossy(),
+        "limit": 5,
+    })
+    .to_string();
+
+    mount_tool_sequence(&server, call_id, &arguments, "grep_files").await;
+    submit_turn(&test, "search again").await?;
+
+    let bodies = recorded_bodies(&server).await?;
+    let tool_output = find_tool_output(&bodies, call_id).expect("tool output present");
+    let payload = tool_output.get("output").expect("output field present");
+    let (content_opt, success_opt) = extract_content_and_success(payload);
+    let content = content_opt.expect("content present");
+    if let Some(success) = success_opt {
+        assert!(!success, "expected success=false payload: {payload:?}");
+    }
+    assert_eq!(content, "No matches found.");
+
+    Ok(())
+}
+
+#[allow(clippy::expect_used)]
+async fn build_test_codex(server: &wiremock::MockServer) -> Result<TestCodex> {
+    let mut builder = test_codex().with_config(|config| {
+        config.model = MODEL_WITH_TOOL.to_string();
+        config.model_family =
+            find_family_for_model(MODEL_WITH_TOOL).expect("model family for test model");
+    });
+    builder.build(server).await
+}
+
+async fn submit_turn(test: &TestCodex, prompt: &str) -> Result<()> {
+    let session_model = test.session_configured.model.clone();
+
+    test.codex
+        .submit(Op::UserTurn {
+            items: vec![InputItem::Text {
+                text: prompt.into(),
+            }],
+            final_output_json_schema: None,
+            cwd: test.cwd.path().to_path_buf(),
+            approval_policy: AskForApproval::Never,
+            sandbox_policy: SandboxPolicy::DangerFullAccess,
+            model: session_model,
+            effort: None,
+            summary: ReasoningSummary::Auto,
+        })
+        .await?;
+
+    wait_for_event(&test.codex, |event| {
+        matches!(event, EventMsg::TaskComplete(_))
+    })
+    .await;
+    Ok(())
+}
+
+async fn mount_tool_sequence(
+    server: &wiremock::MockServer,
+    call_id: &str,
+    arguments: &str,
+    tool_name: &str,
+) {
+    let first_response = sse(vec![
+        ev_response_created("resp-1"),
+        ev_function_call(call_id, tool_name, arguments),
+        ev_completed("resp-1"),
+    ]);
+    responses::mount_sse_once_match(server, any(), first_response).await;
+
+    let second_response = sse(vec![
+        ev_assistant_message("msg-1", "done"),
+        ev_completed("resp-2"),
+    ]);
+    responses::mount_sse_once_match(server, any(), second_response).await;
+}
+
+#[allow(clippy::expect_used)]
+async fn recorded_bodies(server: &wiremock::MockServer) -> Result<Vec<Value>> {
+    let requests = server.received_requests().await.expect("requests recorded");
+    Ok(requests
+        .iter()
+        .map(|req| req.body_json::<Value>().expect("request json"))
+        .collect())
+}
+
+fn find_tool_output<'a>(requests: &'a [Value], call_id: &str) -> Option<&'a Value> {
+    requests.iter().find_map(|body| {
+        body.get("input")
+            .and_then(Value::as_array)
+            .and_then(|items| {
+                items.iter().find(|item| {
+                    item.get("type").and_then(Value::as_str) == Some("function_call_output")
+                        && item.get("call_id").and_then(Value::as_str) == Some(call_id)
+                })
+            })
+    })
+}
+
+fn collect_file_names(content: &str) -> HashSet<String> {
+    content
+        .lines()
+        .filter_map(|line| {
+            if line.trim().is_empty() {
+                return None;
+            }
+            Path::new(line)
+                .file_name()
+                .map(|name| name.to_string_lossy().into_owned())
+        })
+        .collect()
+}
+
+fn extract_content_and_success(value: &Value) -> (Option<&str>, Option<bool>) {
+    match value {
+        Value::String(text) => (Some(text.as_str()), None),
+        Value::Object(obj) => (
+            obj.get("content").and_then(Value::as_str),
+            obj.get("success").and_then(Value::as_bool),
+        ),
+        _ => (None, None),
+    }
+}
--- a/codex-rs/core/tests/suite/list_dir.rs
+++ b/codex-rs/core/tests/suite/list_dir.rs
@@ -0,0 +1,460 @@
+#![cfg(not(target_os = "windows"))]
+
+use codex_core::protocol::AskForApproval;
+use codex_core::protocol::EventMsg;
+use codex_core::protocol::InputItem;
+use codex_core::protocol::Op;
+use codex_core::protocol::SandboxPolicy;
+use codex_protocol::config_types::ReasoningSummary;
+use core_test_support::responses;
+use core_test_support::responses::ev_assistant_message;
+use core_test_support::responses::ev_completed;
+use core_test_support::responses::ev_function_call;
+use core_test_support::responses::ev_response_created;
+use core_test_support::responses::sse;
+use core_test_support::responses::start_mock_server;
+use core_test_support::skip_if_no_network;
+use core_test_support::test_codex::TestCodex;
+use core_test_support::test_codex::test_codex;
+use core_test_support::wait_for_event;
+use pretty_assertions::assert_eq;
+use serde_json::Value;
+use wiremock::matchers::any;
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+#[ignore = "disabled until we enable list_dir tool"]
+async fn list_dir_tool_returns_entries() -> anyhow::Result<()> {
+    skip_if_no_network!(Ok(()));
+
+    let server = start_mock_server().await;
+
+    let TestCodex {
+        codex,
+        cwd,
+        session_configured,
+        ..
+    } = test_codex().build(&server).await?;
+
+    let dir_path = cwd.path().join("sample_dir");
+    std::fs::create_dir(&dir_path)?;
+    std::fs::write(dir_path.join("alpha.txt"), "first file")?;
+    std::fs::create_dir(dir_path.join("nested"))?;
+    let dir_path = dir_path.to_string_lossy().to_string();
+
+    let call_id = "list-dir-call";
+    let arguments = serde_json::json!({
+        "dir_path": dir_path,
+        "offset": 1,
+        "limit": 2,
+    })
+    .to_string();
+
+    let first_response = sse(vec![
+        ev_response_created("resp-1"),
+        ev_function_call(call_id, "list_dir", &arguments),
+        ev_completed("resp-1"),
+    ]);
+    responses::mount_sse_once_match(&server, any(), first_response).await;
+
+    let second_response = sse(vec![
+        ev_assistant_message("msg-1", "done"),
+        ev_completed("resp-2"),
+    ]);
+    responses::mount_sse_once_match(&server, any(), second_response).await;
+
+    let session_model = session_configured.model.clone();
+
+    codex
+        .submit(Op::UserTurn {
+            items: vec![InputItem::Text {
+                text: "list directory contents".into(),
+            }],
+            final_output_json_schema: None,
+            cwd: cwd.path().to_path_buf(),
+            approval_policy: AskForApproval::Never,
+            sandbox_policy: SandboxPolicy::DangerFullAccess,
+            model: session_model,
+            effort: None,
+            summary: ReasoningSummary::Auto,
+        })
+        .await?;
+
+    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
+
+    let requests = server.received_requests().await.expect("recorded requests");
+    let request_bodies = requests
+        .iter()
+        .map(|req| req.body_json::<Value>().unwrap())
+        .collect::<Vec<_>>();
+    assert!(
+        !request_bodies.is_empty(),
+        "expected at least one request body"
+    );
+
+    let tool_output_item = request_bodies
+        .iter()
+        .find_map(|body| {
+            body.get("input")
+                .and_then(Value::as_array)
+                .and_then(|items| {
+                    items.iter().find(|item| {
+                        item.get("type").and_then(Value::as_str) == Some("function_call_output")
+                    })
+                })
+        })
+        .unwrap_or_else(|| {
+            panic!("function_call_output item not found in requests: {request_bodies:#?}")
+        });
+
+    assert_eq!(
+        tool_output_item.get("call_id").and_then(Value::as_str),
+        Some(call_id)
+    );
+
+    let output_text = tool_output_item
+        .get("output")
+        .and_then(|value| match value {
+            Value::String(text) => Some(text.as_str()),
+            Value::Object(obj) => obj.get("content").and_then(Value::as_str),
+            _ => None,
+        })
+        .expect("output text present");
+    assert_eq!(output_text, "E1: [file] alpha.txt\nE2: [dir] nested");
+
+    Ok(())
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+#[ignore = "disabled until we enable list_dir tool"]
+async fn list_dir_tool_depth_one_omits_children() -> anyhow::Result<()> {
+    skip_if_no_network!(Ok(()));
+
+    let server = start_mock_server().await;
+
+    let TestCodex {
+        codex,
+        cwd,
+        session_configured,
+        ..
+    } = test_codex().build(&server).await?;
+
+    let dir_path = cwd.path().join("depth_one");
+    std::fs::create_dir(&dir_path)?;
+    std::fs::write(dir_path.join("alpha.txt"), "alpha")?;
+    std::fs::create_dir(dir_path.join("nested"))?;
+    std::fs::write(dir_path.join("nested").join("beta.txt"), "beta")?;
+    let dir_path = dir_path.to_string_lossy().to_string();
+
+    let call_id = "list-dir-depth1";
+    let arguments = serde_json::json!({
+        "dir_path": dir_path,
+        "offset": 1,
+        "limit": 10,
+        "depth": 1,
+    })
+    .to_string();
+
+    let first_response = sse(vec![
+        ev_response_created("resp-1"),
+        ev_function_call(call_id, "list_dir", &arguments),
+        ev_completed("resp-1"),
+    ]);
+    responses::mount_sse_once_match(&server, any(), first_response).await;
+
+    let second_response = sse(vec![
+        ev_assistant_message("msg-1", "done"),
+        ev_completed("resp-2"),
+    ]);
+    responses::mount_sse_once_match(&server, any(), second_response).await;
+
+    let session_model = session_configured.model.clone();
+
+    codex
+        .submit(Op::UserTurn {
+            items: vec![InputItem::Text {
+                text: "list directory contents depth one".into(),
+            }],
+            final_output_json_schema: None,
+            cwd: cwd.path().to_path_buf(),
+            approval_policy: AskForApproval::Never,
+            sandbox_policy: SandboxPolicy::DangerFullAccess,
+            model: session_model,
+            effort: None,
+            summary: ReasoningSummary::Auto,
+        })
+        .await?;
+
+    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
+
+    let requests = server.received_requests().await.expect("recorded requests");
+    let request_bodies = requests
+        .iter()
+        .map(|req| req.body_json::<Value>().unwrap())
+        .collect::<Vec<_>>();
+    assert!(
+        !request_bodies.is_empty(),
+        "expected at least one request body"
+    );
+
+    let tool_output_item = request_bodies
+        .iter()
+        .find_map(|body| {
+            body.get("input")
+                .and_then(Value::as_array)
+                .and_then(|items| {
+                    items.iter().find(|item| {
+                        item.get("type").and_then(Value::as_str) == Some("function_call_output")
+                    })
+                })
+        })
+        .unwrap_or_else(|| {
+            panic!("function_call_output item not found in requests: {request_bodies:#?}")
+        });
+
+    assert_eq!(
+        tool_output_item.get("call_id").and_then(Value::as_str),
+        Some(call_id)
+    );
+
+    let output_text = tool_output_item
+        .get("output")
+        .and_then(|value| match value {
+            Value::String(text) => Some(text.as_str()),
+            Value::Object(obj) => obj.get("content").and_then(Value::as_str),
+            _ => None,
+        })
+        .expect("output text present");
+    assert_eq!(output_text, "E1: [file] alpha.txt\nE2: [dir] nested");
+
+    Ok(())
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+#[ignore = "disabled until we enable list_dir tool"]
+async fn list_dir_tool_depth_two_includes_children_only() -> anyhow::Result<()> {
+    skip_if_no_network!(Ok(()));
+
+    let server = start_mock_server().await;
+
+    let TestCodex {
+        codex,
+        cwd,
+        session_configured,
+        ..
+    } = test_codex().build(&server).await?;
+
+    let dir_path = cwd.path().join("depth_two");
+    std::fs::create_dir(&dir_path)?;
+    std::fs::write(dir_path.join("alpha.txt"), "alpha")?;
+    let nested = dir_path.join("nested");
+    std::fs::create_dir(&nested)?;
+    std::fs::write(nested.join("beta.txt"), "beta")?;
+    let deeper = nested.join("grand");
+    std::fs::create_dir(&deeper)?;
+    std::fs::write(deeper.join("gamma.txt"), "gamma")?;
+    let dir_path_string = dir_path.to_string_lossy().to_string();
+
+    let call_id = "list-dir-depth2";
+    let arguments = serde_json::json!({
+        "dir_path": dir_path_string,
+        "offset": 1,
+        "limit": 10,
+        "depth": 2,
+    })
+    .to_string();
+
+    let first_response = sse(vec![
+        serde_json::json!({
+            "type": "response.created",
+            "response": {"id": "resp-1"}
+        }),
+        ev_function_call(call_id, "list_dir", &arguments),
+        ev_completed("resp-1"),
+    ]);
+    responses::mount_sse_once_match(&server, any(), first_response).await;
+
+    let second_response = sse(vec![
+        ev_assistant_message("msg-1", "done"),
+        ev_completed("resp-2"),
+    ]);
+    responses::mount_sse_once_match(&server, any(), second_response).await;
+
+    let session_model = session_configured.model.clone();
+
+    codex
+        .submit(Op::UserTurn {
+            items: vec![InputItem::Text {
+                text: "list directory contents depth two".into(),
+            }],
+            final_output_json_schema: None,
+            cwd: cwd.path().to_path_buf(),
+            approval_policy: AskForApproval::Never,
+            sandbox_policy: SandboxPolicy::DangerFullAccess,
+            model: session_model,
+            effort: None,
+            summary: ReasoningSummary::Auto,
+        })
+        .await?;
+
+    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
+
+    let requests = server.received_requests().await.expect("recorded requests");
+    let request_bodies = requests
+        .iter()
+        .map(|req| req.body_json::<Value>().unwrap())
+        .collect::<Vec<_>>();
+    assert!(
+        !request_bodies.is_empty(),
+        "expected at least one request body"
+    );
+
+    let tool_output_item = request_bodies
+        .iter()
+        .find_map(|body| {
+            body.get("input")
+                .and_then(Value::as_array)
+                .and_then(|items| {
+                    items.iter().find(|item| {
+                        item.get("type").and_then(Value::as_str) == Some("function_call_output")
+                    })
+                })
+        })
+        .unwrap_or_else(|| {
+            panic!("function_call_output item not found in requests: {request_bodies:#?}")
+        });
+
+    assert_eq!(
+        tool_output_item.get("call_id").and_then(Value::as_str),
+        Some(call_id)
+    );
+
+    let output_text = tool_output_item
+        .get("output")
+        .and_then(|value| match value {
+            Value::String(text) => Some(text.as_str()),
+            Value::Object(obj) => obj.get("content").and_then(Value::as_str),
+            _ => None,
+        })
+        .expect("output text present");
+    assert_eq!(
+        output_text,
+        "E1: [file] alpha.txt\nE2: [dir] nested\nE3: [file] nested/beta.txt\nE4: [dir] nested/grand"
+    );
+
+    Ok(())
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+#[ignore = "disabled until we enable list_dir tool"]
+async fn list_dir_tool_depth_three_includes_grandchildren() -> anyhow::Result<()> {
+    skip_if_no_network!(Ok(()));
+
+    let server = start_mock_server().await;
+
+    let TestCodex {
+        codex,
+        cwd,
+        session_configured,
+        ..
+    } = test_codex().build(&server).await?;
+
+    let dir_path = cwd.path().join("depth_three");
+    std::fs::create_dir(&dir_path)?;
+    std::fs::write(dir_path.join("alpha.txt"), "alpha")?;
+    let nested = dir_path.join("nested");
+    std::fs::create_dir(&nested)?;
+    std::fs::write(nested.join("beta.txt"), "beta")?;
+    let deeper = nested.join("grand");
+    std::fs::create_dir(&deeper)?;
+    std::fs::write(deeper.join("gamma.txt"), "gamma")?;
+    let dir_path_string = dir_path.to_string_lossy().to_string();
+
+    let call_id = "list-dir-depth3";
+    let arguments = serde_json::json!({
+        "dir_path": dir_path_string,
+        "offset": 1,
+        "limit": 10,
+        "depth": 3,
+    })
+    .to_string();
+
+    let first_response = sse(vec![
+        serde_json::json!({
+            "type": "response.created",
+            "response": {"id": "resp-1"}
+        }),
+        ev_function_call(call_id, "list_dir", &arguments),
+        ev_completed("resp-1"),
+    ]);
+    responses::mount_sse_once_match(&server, any(), first_response).await;
+
+    let second_response = sse(vec![
+        ev_assistant_message("msg-1", "done"),
+        ev_completed("resp-2"),
+    ]);
+    responses::mount_sse_once_match(&server, any(), second_response).await;
+
+    let session_model = session_configured.model.clone();
+
+    codex
+        .submit(Op::UserTurn {
+            items: vec![InputItem::Text {
+                text: "list directory contents depth three".into(),
+            }],
+            final_output_json_schema: None,
+            cwd: cwd.path().to_path_buf(),
+            approval_policy: AskForApproval::Never,
+            sandbox_policy: SandboxPolicy::DangerFullAccess,
+            model: session_model,
+            effort: None,
+            summary: ReasoningSummary::Auto,
+        })
+        .await?;
+
+    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
+
+    let requests = server.received_requests().await.expect("recorded requests");
+    let request_bodies = requests
+        .iter()
+        .map(|req| req.body_json::<Value>().unwrap())
+        .collect::<Vec<_>>();
+    assert!(
+        !request_bodies.is_empty(),
+        "expected at least one request body"
+    );
+
+    let tool_output_item = request_bodies
+        .iter()
+        .find_map(|body| {
+            body.get("input")
+                .and_then(Value::as_array)
+                .and_then(|items| {
+                    items.iter().find(|item| {
+                        item.get("type").and_then(Value::as_str) == Some("function_call_output")
+                    })
+                })
+        })
+        .unwrap_or_else(|| {
+            panic!("function_call_output item not found in requests: {request_bodies:#?}")
+        });
+
+    assert_eq!(
+        tool_output_item.get("call_id").and_then(Value::as_str),
+        Some(call_id)
+    );
+
+    let output_text = tool_output_item
+        .get("output")
+        .and_then(|value| match value {
+            Value::String(text) => Some(text.as_str()),
+            Value::Object(obj) => obj.get("content").and_then(Value::as_str),
+            _ => None,
+        })
+        .expect("output text present");
+    assert_eq!(
+        output_text,
+        "E1: [file] alpha.txt\nE2: [dir] nested\nE3: [file] nested/beta.txt\nE4: [dir] nested/grand\nE5: [file] nested/grand/gamma.txt"
+    );
+
+    Ok(())
+}
--- a/codex-rs/core/tests/suite/mod.rs
+++ b/codex-rs/core/tests/suite/mod.rs
@@ -9,7 +9,9 @@ mod compact_resume_fork;
 mod exec;
 mod exec_stream_events;
 mod fork_conversation;
+mod grep_files;
 mod json_result;
+mod list_dir;
 mod live_cli;
 mod model_overrides;
 mod model_tools;
@@ -20,9 +22,11 @@ mod review;
 mod rmcp_client;
 mod rollout_list_find;
 mod seatbelt;
+mod shell_serialization;
 mod stream_error_allows_next_turn;
 mod stream_no_completed;
 mod tool_harness;
+mod tool_parallelism;
 mod tools;
 mod unified_exec;
 mod user_notification;
--- a/codex-rs/core/tests/suite/model_tools.rs
+++ b/codex-rs/core/tests/suite/model_tools.rs
@@ -4,20 +4,18 @@ use codex_core::CodexAuth;
 use codex_core::ConversationManager;
 use codex_core::ModelProviderInfo;
 use codex_core::built_in_model_providers;
+use codex_core::features::Feature;
 use codex_core::model_family::find_family_for_model;
 use codex_core::protocol::EventMsg;
 use codex_core::protocol::InputItem;
 use codex_core::protocol::Op;
 use core_test_support::load_default_config_for_test;
 use core_test_support::load_sse_fixture_with_id;
+use core_test_support::responses;
 use core_test_support::skip_if_no_network;
 use core_test_support::wait_for_event;
 use tempfile::TempDir;
-use wiremock::Mock;
 use wiremock::MockServer;
-use wiremock::ResponseTemplate;
-use wiremock::matchers::method;
-use wiremock::matchers::path;

 fn sse_completed(id: &str) -> String {
    load_sse_fixture_with_id("tests/fixtures/completed_template.json", id)
@@ -44,16 +42,7 @@ async fn collect_tool_identifiers_for_model(model: &str) -> Vec<String> {
    let server = MockServer::start().await;

    let sse = sse_completed(model);
-    let template = ResponseTemplate::new(200)
-        .insert_header("content-type", "text/event-stream")
-        .set_body_raw(sse, "text/event-stream");
-
-    Mock::given(method("POST"))
-        .and(path("/v1/responses"))
-        .respond_with(template)
-        .expect(1)
-        .mount(&server)
-        .await;
+    let resp_mock = responses::mount_sse_once_match(&server, wiremock::matchers::any(), sse).await;

    let model_provider = ModelProviderInfo {
        base_url: Some(format!("{}/v1", server.uri())),
@@ -68,12 +57,12 @@ async fn collect_tool_identifiers_for_model(model: &str) -> Vec<String> {
    config.model = model.to_string();
    config.model_family =
        find_family_for_model(model).unwrap_or_else(|| panic!("unknown model family for {model}"));
-    config.include_plan_tool = false;
-    config.include_apply_patch_tool = false;
-    config.include_view_image_tool = false;
-    config.tools_web_search_request = false;
-    config.use_experimental_streamable_shell_tool = false;
-    config.use_experimental_unified_exec_tool = false;
+    config.features.disable(Feature::PlanTool);
+    config.features.disable(Feature::ApplyPatchFreeform);
+    config.features.disable(Feature::ViewImageTool);
+    config.features.disable(Feature::WebSearchRequest);
+    config.features.disable(Feature::StreamableShell);
+    config.features.disable(Feature::UnifiedExec);

    let conversation_manager =
        ConversationManager::with_auth(CodexAuth::from_api_key("Test API Key"));
@@ -93,13 +82,7 @@ async fn collect_tool_identifiers_for_model(model: &str) -> Vec<String> {
        .unwrap();
    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;

-    let requests = server.received_requests().await.unwrap();
-    assert_eq!(
-        requests.len(),
-        1,
-        "expected a single request for model {model}"
-    );
-    let body = requests[0].body_json::<serde_json::Value>().unwrap();
+    let body = resp_mock.single_request().body_json();
    tool_identifiers(&body)
 }

@@ -125,7 +108,7 @@ async fn model_selects_expected_tools() {
    let gpt5_codex_tools = collect_tool_identifiers_for_model("gpt-5-codex").await;
    assert_eq!(
        gpt5_codex_tools,
-        vec!["shell".to_string(), "read_file".to_string()],
-        "gpt-5-codex should expose the beta read_file tool",
+        vec!["shell".to_string(), "apply_patch".to_string(),],
+        "gpt-5-codex should expose the apply_patch tool",
    );
 }
--- a/codex-rs/core/tests/suite/prompt_caching.rs
+++ b/codex-rs/core/tests/suite/prompt_caching.rs
@@ -4,6 +4,8 @@ use codex_core::CodexAuth;
 use codex_core::ConversationManager;
 use codex_core::ModelProviderInfo;
 use codex_core::built_in_model_providers;
+use codex_core::config::OPENAI_DEFAULT_MODEL;
+use codex_core::features::Feature;
 use codex_core::model_family::find_family_for_model;
 use codex_core::protocol::AskForApproval;
 use codex_core::protocol::EventMsg;
@@ -18,6 +20,7 @@ use core_test_support::load_default_config_for_test;
 use core_test_support::load_sse_fixture_with_id;
 use core_test_support::skip_if_no_network;
 use core_test_support::wait_for_event;
+use std::collections::HashMap;
 use tempfile::TempDir;
 use wiremock::Mock;
 use wiremock::MockServer;
@@ -97,10 +100,10 @@ async fn codex_mini_latest_tools() {
    config.cwd = cwd.path().to_path_buf();
    config.model_provider = model_provider;
    config.user_instructions = Some("be consistent and helpful".to_string());
+    config.features.disable(Feature::ApplyPatchFreeform);

    let conversation_manager =
        ConversationManager::with_auth(CodexAuth::from_api_key("Test API Key"));
-    config.include_apply_patch_tool = false;
    config.model = "codex-mini-latest".to_string();
    config.model_family = find_family_for_model("codex-mini-latest").unwrap();

@@ -178,16 +181,16 @@ async fn prompt_tools_are_consistent_across_requests() {

    let cwd = TempDir::new().unwrap();
    let codex_home = TempDir::new().unwrap();
+
    let mut config = load_default_config_for_test(&codex_home);
    config.cwd = cwd.path().to_path_buf();
    config.model_provider = model_provider;
    config.user_instructions = Some("be consistent and helpful".to_string());
-    config.include_apply_patch_tool = true;
-    config.include_plan_tool = true;
+    config.features.enable(Feature::PlanTool);

    let conversation_manager =
        ConversationManager::with_auth(CodexAuth::from_api_key("Test API Key"));
-    let expected_instructions = config.model_family.base_instructions.clone();
+    let base_instructions = config.model_family.base_instructions.clone();
    let codex = conversation_manager
        .new_conversation(config)
        .await
@@ -219,14 +222,29 @@ async fn prompt_tools_are_consistent_across_requests() {

    // our internal implementation is responsible for keeping tools in sync
    // with the OpenAI schema, so we just verify the tool presence here
-    let expected_tools_names: &[&str] = &[
-        "shell",
-        "update_plan",
-        "apply_patch",
-        "read_file",
-        "view_image",
-    ];
+    let tools_by_model: HashMap<&'static str, Vec<&'static str>> = HashMap::from([
+        ("gpt-5", vec!["shell", "update_plan", "view_image"]),
+        (
+            "gpt-5-codex",
+            vec!["shell", "update_plan", "apply_patch", "view_image"],
+        ),
+    ]);
+    let expected_tools_names = tools_by_model
+        .get(OPENAI_DEFAULT_MODEL)
+        .unwrap_or_else(|| panic!("expected tools to be defined for model {OPENAI_DEFAULT_MODEL}"))
+        .as_slice();
    let body0 = requests[0].body_json::<serde_json::Value>().unwrap();
+
+    let expected_instructions = if expected_tools_names.contains(&"apply_patch") {
+        base_instructions
+    } else {
+        [
+            base_instructions.clone(),
+            include_str!("../../../apply-patch/apply_patch_tool_instructions.md").to_string(),
+        ]
+        .join("\n")
+    };
+
    assert_eq!(
        body0["instructions"],
        serde_json::json!(expected_instructions),
--- a/codex-rs/core/tests/suite/read_file.rs
+++ b/codex-rs/core/tests/suite/read_file.rs
@@ -10,6 +10,7 @@ use core_test_support::responses;
 use core_test_support::responses::ev_assistant_message;
 use core_test_support::responses::ev_completed;
 use core_test_support::responses::ev_function_call;
+use core_test_support::responses::ev_response_created;
 use core_test_support::responses::sse;
 use core_test_support::responses::start_mock_server;
 use core_test_support::skip_if_no_network;
@@ -21,6 +22,7 @@ use serde_json::Value;
 use wiremock::matchers::any;

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+#[ignore = "disabled until we enable read_file tool"]
 async fn read_file_tool_returns_requested_lines() -> anyhow::Result<()> {
    skip_if_no_network!(Ok(()));

@@ -46,10 +48,7 @@ async fn read_file_tool_returns_requested_lines() -> anyhow::Result<()> {
    .to_string();

    let first_response = sse(vec![
-        serde_json::json!({
-            "type": "response.created",
-            "response": {"id": "resp-1"}
-        }),
+        ev_response_created("resp-1"),
        ev_function_call(call_id, "read_file", &arguments),
        ev_completed("resp-1"),
    ]);
@@ -59,7 +58,7 @@ async fn read_file_tool_returns_requested_lines() -> anyhow::Result<()> {
        ev_assistant_message("msg-1", "done"),
        ev_completed("resp-2"),
    ]);
-    responses::mount_sse_once_match(&server, any(), second_response).await;
+    let second_mock = responses::mount_sse_once_match(&server, any(), second_response).await;

    let session_model = session_configured.model.clone();

@@ -80,36 +79,12 @@ async fn read_file_tool_returns_requested_lines() -> anyhow::Result<()> {

    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;

-    let requests = server.received_requests().await.expect("recorded requests");
-    let request_bodies = requests
-        .iter()
-        .map(|req| req.body_json::<Value>().unwrap())
-        .collect::<Vec<_>>();
-    assert!(
-        !request_bodies.is_empty(),
-        "expected at least one request body"
-    );
-
-    let tool_output_item = request_bodies
-        .iter()
-        .find_map(|body| {
-            body.get("input")
-                .and_then(Value::as_array)
-                .and_then(|items| {
-                    items.iter().find(|item| {
-                        item.get("type").and_then(Value::as_str) == Some("function_call_output")
-                    })
-                })
-        })
-        .unwrap_or_else(|| {
-            panic!("function_call_output item not found in requests: {request_bodies:#?}")
-        });
-
+    let req = second_mock.single_request();
+    let tool_output_item = req.function_call_output(call_id);
    assert_eq!(
        tool_output_item.get("call_id").and_then(Value::as_str),
        Some(call_id)
    );
-
    let output_text = tool_output_item
        .get("output")
        .and_then(|value| match value {
--- a/codex-rs/core/tests/suite/review.rs
+++ b/codex-rs/core/tests/suite/review.rs
@@ -24,6 +24,7 @@ use core_test_support::load_default_config_for_test;
 use core_test_support::load_sse_fixture_with_id_from_str;
 use core_test_support::skip_if_no_network;
 use core_test_support::wait_for_event;
+use core_test_support::wait_for_event_with_timeout;
 use pretty_assertions::assert_eq;
 use std::path::PathBuf;
 use std::sync::Arc;
@@ -260,25 +261,28 @@ async fn review_does_not_emit_agent_message_on_structured_output() {
        .unwrap();

    // Drain events until TaskComplete; ensure none are AgentMessage.
-    use tokio::time::Duration;
-    use tokio::time::timeout;
    let mut saw_entered = false;
    let mut saw_exited = false;
-    loop {
-        let ev = timeout(Duration::from_secs(5), codex.next_event())
-            .await
-            .expect("timeout waiting for event")
-            .expect("stream ended unexpectedly");
-        match ev.msg {
-            EventMsg::TaskComplete(_) => break,
+    wait_for_event_with_timeout(
+        &codex,
+        |event| match event {
+            EventMsg::TaskComplete(_) => true,
            EventMsg::AgentMessage(_) => {
                panic!("unexpected AgentMessage during review with structured output")
            }
-            EventMsg::EnteredReviewMode(_) => saw_entered = true,
-            EventMsg::ExitedReviewMode(_) => saw_exited = true,
-            _ => {}
-        }
-    }
+            EventMsg::EnteredReviewMode(_) => {
+                saw_entered = true;
+                false
+            }
+            EventMsg::ExitedReviewMode(_) => {
+                saw_exited = true;
+                false
+            }
+            _ => false,
+        },
+        tokio::time::Duration::from_secs(5),
+    )
+    .await;
    assert!(saw_entered && saw_exited, "missing review lifecycle events");

    server.verify().await;
@@ -441,7 +445,7 @@ async fn review_input_isolated_from_parent_history() {
    .await;
    let _complete = wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;

-    // Assert the request `input` contains the environment context followed by the review prompt.
+    // Assert the request `input` contains the environment context followed by the user review prompt.
    let request = &server.received_requests().await.unwrap()[0];
    let body = request.body_json::<serde_json::Value>().unwrap();
    let input = body["input"].as_array().expect("input array");
@@ -469,9 +473,14 @@ async fn review_input_isolated_from_parent_history() {
    assert_eq!(review_msg["role"].as_str().unwrap(), "user");
    assert_eq!(
        review_msg["content"][0]["text"].as_str().unwrap(),
-        format!("{REVIEW_PROMPT}\n\n---\n\nNow, here's your task: Please review only this",)
+        review_prompt,
+        "user message should only contain the raw review prompt"
    );

+    // Ensure the REVIEW_PROMPT rubric is sent via instructions.
+    let instructions = body["instructions"].as_str().expect("instructions string");
+    assert_eq!(instructions, REVIEW_PROMPT);
+
    // Also verify that a user interruption note was recorded in the rollout.
    codex.submit(Op::GetPath).await.unwrap();
    let history_event =
--- a/codex-rs/core/tests/suite/rmcp_client.rs
+++ b/codex-rs/core/tests/suite/rmcp_client.rs
@@ -9,6 +9,7 @@ use std::time::UNIX_EPOCH;

 use codex_core::config_types::McpServerConfig;
 use codex_core::config_types::McpServerTransportConfig;
+use codex_core::features::Feature;

 use codex_core::protocol::AskForApproval;
 use codex_core::protocol::EventMsg;
@@ -47,10 +48,7 @@ async fn stdio_server_round_trip() -> anyhow::Result<()> {
        &server,
        any(),
        responses::sse(vec![
-            serde_json::json!({
-                "type": "response.created",
-                "response": {"id": "resp-1"}
-            }),
+            responses::ev_response_created("resp-1"),
            responses::ev_function_call(call_id, &tool_name, "{\"message\":\"ping\"}"),
            responses::ev_completed("resp-1"),
        ]),
@@ -77,7 +75,7 @@ async fn stdio_server_round_trip() -> anyhow::Result<()> {

    let fixture = test_codex()
        .with_config(move |config| {
-            config.use_experimental_use_rmcp_client = true;
+            config.features.enable(Feature::RmcpClient);
            config.mcp_servers.insert(
                server_name.to_string(),
                McpServerConfig {
@@ -89,6 +87,7 @@ async fn stdio_server_round_trip() -> anyhow::Result<()> {
                            expected_env_value.to_string(),
                        )])),
                    },
+                    enabled: true,
                    startup_timeout_sec: Some(Duration::from_secs(10)),
                    tool_timeout_sec: None,
                },
@@ -184,10 +183,7 @@ async fn streamable_http_tool_call_round_trip() -> anyhow::Result<()> {
        &server,
        any(),
        responses::sse(vec![
-            serde_json::json!({
-                "type": "response.created",
-                "response": {"id": "resp-1"}
-            }),
+            responses::ev_response_created("resp-1"),
            responses::ev_function_call(call_id, &tool_name, "{\"message\":\"ping\"}"),
            responses::ev_completed("resp-1"),
        ]),
@@ -232,14 +228,15 @@ async fn streamable_http_tool_call_round_trip() -> anyhow::Result<()> {

    let fixture = test_codex()
        .with_config(move |config| {
-            config.use_experimental_use_rmcp_client = true;
+            config.features.enable(Feature::RmcpClient);
            config.mcp_servers.insert(
                server_name.to_string(),
                McpServerConfig {
                    transport: McpServerTransportConfig::StreamableHttp {
                        url: server_url,
-                        bearer_token: None,
+                        bearer_token_env_var: None,
                    },
+                    enabled: true,
                    startup_timeout_sec: Some(Duration::from_secs(10)),
                    tool_timeout_sec: None,
                },
@@ -352,10 +349,7 @@ async fn streamable_http_with_oauth_round_trip() -> anyhow::Result<()> {
        &server,
        any(),
        responses::sse(vec![
-            serde_json::json!({
-                "type": "response.created",
-                "response": {"id": "resp-1"}
-            }),
+            responses::ev_response_created("resp-1"),
            responses::ev_function_call(call_id, &tool_name, "{\"message\":\"ping\"}"),
            responses::ev_completed("resp-1"),
        ]),
@@ -415,14 +409,15 @@ async fn streamable_http_with_oauth_round_trip() -> anyhow::Result<()> {

    let fixture = test_codex()
        .with_config(move |config| {
-            config.use_experimental_use_rmcp_client = true;
+            config.features.enable(Feature::RmcpClient);
            config.mcp_servers.insert(
                server_name.to_string(),
                McpServerConfig {
                    transport: McpServerTransportConfig::StreamableHttp {
                        url: server_url,
-                        bearer_token: None,
+                        bearer_token_env_var: None,
                    },
+                    enabled: true,
                    startup_timeout_sec: Some(Duration::from_secs(10)),
                    tool_timeout_sec: None,
                },
--- a/Show More
+++ b/Show More