changes

2026-02-07 01:13:40 +00:00 · 2026-01-05 16:52:25 -08:00 · 2026-01-05 16:38:33 -08:00 · 2026-01-05 16:32:27 -08:00 · 2026-01-05 15:30:18 -08:00 · 2026-01-05 10:42:33 -08:00
230 changed files with 10879 additions and 6070 deletions
--- a/.github/codex-cli-login.png
+++ b/.github/codex-cli-login.png
--- a/.github/codex-cli-permissions.png
+++ b/.github/codex-cli-permissions.png
--- a/.github/codex-cli-splash.png
+++ b/.github/codex-cli-splash.png
--- a/.github/demo.gif
+++ b/.github/demo.gif
--- a/.github/workflows/close-stale-contributor-prs.yml
+++ b/.github/workflows/close-stale-contributor-prs.yml
@@ -12,6 +12,8 @@ permissions:

 jobs:
  close-stale-contributor-prs:
+    # Prevent scheduled runs on forks
+    if: github.repository == 'openai/codex'
    runs-on: ubuntu-latest
    steps:
      - name: Close inactive PRs from contributors
--- a/.github/workflows/issue-deduplicator.yml
+++ b/.github/workflows/issue-deduplicator.yml
@@ -9,7 +9,8 @@ on:
 jobs:
  gather-duplicates:
    name: Identify potential duplicates
-    if: ${{ github.event.action == 'opened' || (github.event.action == 'labeled' && github.event.label.name == 'codex-deduplicate') }}
+    # Prevent runs on forks (requires OpenAI API key, wastes Actions minutes)
+    if: github.repository == 'openai/codex' && (github.event.action == 'opened' || (github.event.action == 'labeled' && github.event.label.name == 'codex-deduplicate'))
    runs-on: ubuntu-latest
    permissions:
      contents: read
--- a/.github/workflows/issue-labeler.yml
+++ b/.github/workflows/issue-labeler.yml
@@ -9,7 +9,8 @@ on:
 jobs:
  gather-labels:
    name: Generate label suggestions
-    if: ${{ github.event.action == 'opened' || (github.event.action == 'labeled' && github.event.label.name == 'codex-label') }}
+    # Prevent runs on forks (requires OpenAI API key, wastes Actions minutes)
+    if: github.repository == 'openai/codex' && (github.event.action == 'opened' || (github.event.action == 'labeled' && github.event.label.name == 'codex-label'))
    runs-on: ubuntu-latest
    permissions:
      contents: read
--- a/.github/workflows/rust-release-prepare.yml
+++ b/.github/workflows/rust-release-prepare.yml
@@ -14,6 +14,8 @@ permissions:

 jobs:
  prepare:
+    # Prevent scheduled runs on forks (no secrets, wastes Actions minutes)
+    if: github.repository == 'openai/codex'
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v6
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -77,6 +77,12 @@ If you don’t have the tool:
 - Prefer deep equals comparisons whenever possible. Perform `assert_eq!()` on entire objects, rather than individual fields.
 - Avoid mutating process environment in tests; prefer passing environment-derived flags or dependencies from above.

+### Spawning workspace binaries in tests (Cargo vs Buck2)
+
+- Prefer `codex_utils_cargo_bin::cargo_bin("...")` over `assert_cmd::Command::cargo_bin(...)` or `escargot` when tests need to spawn first-party binaries.
+  - Under Buck2, `CARGO_BIN_EXE_*` may be project-relative (e.g. `buck-out/...`), which breaks if a test changes its working directory. `codex_utils_cargo_bin::cargo_bin` resolves to an absolute path first.
+- When locating fixture files under Buck2, avoid `env!("CARGO_MANIFEST_DIR")` (Buck codegen sets it to `"."`). Prefer deriving paths from `codex_utils_cargo_bin::buck_project_root()` when needed.
+
 ### Integration tests (core)

 - Prefer the utilities in `core_test_support::responses` when writing end-to-end Codex tests.
--- a/README.md
+++ b/README.md
@@ -1,13 +1,11 @@
 <p align="center"><code>npm i -g @openai/codex</code><br />or <code>brew install --cask codex</code></p>
-
 <p align="center"><strong>Codex CLI</strong> is a coding agent from OpenAI that runs locally on your computer.
-</br>
-</br>If you want Codex in your code editor (VS Code, Cursor, Windsurf), <a href="https://developers.openai.com/codex/ide">install in your IDE</a>
-</br>If you are looking for the <em>cloud-based agent</em> from OpenAI, <strong>Codex Web</strong>, go to <a href="https://chatgpt.com/codex">chatgpt.com/codex</a></p>
-
 <p align="center">
  <img src="./.github/codex-cli-splash.png" alt="Codex CLI splash" width="80%" />
-  </p>
+</p>
+</br>
+If you want Codex in your code editor (VS Code, Cursor, Windsurf), <a href="https://developers.openai.com/codex/ide">install in your IDE.</a>
+</br>If you are looking for the <em>cloud-based agent</em> from OpenAI, <strong>Codex Web</strong>, go to <a href="https://chatgpt.com/codex">chatgpt.com/codex</a>.</p>

 ---

@@ -15,25 +13,19 @@

 ### Installing and running Codex CLI

-Install globally with your preferred package manager. If you use npm:
+Install globally with your preferred package manager:

 ```shell
+# Install using npm
 npm install -g @openai/codex
 ```

-Alternatively, if you use Homebrew:
-
 ```shell
+# Install using Homebrew
 brew install --cask codex
 ```

-Then simply run `codex` to get started:
-
-```shell
-codex
-```
-
-If you're running into upgrade issues with Homebrew, see the [FAQ entry on brew upgrade codex](./docs/faq.md#brew-upgrade-codex-isnt-upgrading-me).
+Then simply run `codex` to get started.

 <details>
 <summary>You can also go to the <a href="https://github.com/openai/codex/releases/latest">latest GitHub Release</a> and download the appropriate binary for your platform.</summary>
@@ -53,60 +45,15 @@ Each archive contains a single entry with the platform baked into the name (e.g.

 ### Using Codex with your ChatGPT plan

-<p align="center">
-  <img src="./.github/codex-cli-login.png" alt="Codex CLI login" width="80%" />
-  </p>
-
 Run `codex` and select **Sign in with ChatGPT**. We recommend signing into your ChatGPT account to use Codex as part of your Plus, Pro, Team, Edu, or Enterprise plan. [Learn more about what's included in your ChatGPT plan](https://help.openai.com/en/articles/11369540-codex-in-chatgpt).

-You can also use Codex with an API key, but this requires [additional setup](./docs/authentication.md#usage-based-billing-alternative-use-an-openai-api-key). If you previously used an API key for usage-based billing, see the [migration steps](./docs/authentication.md#migrating-from-usage-based-billing-api-key). If you're having trouble with login, please comment on [this issue](https://github.com/openai/codex/issues/1243).
+You can also use Codex with an API key, but this requires [additional setup](https://developers.openai.com/codex/auth#sign-in-with-an-api-key).

-### Model Context Protocol (MCP)
+## Docs

-Codex can access MCP servers. To configure them, refer to the [config docs](./docs/config.md#mcp_servers).
-
-### Configuration
-
-Codex CLI supports a rich set of configuration options, with preferences stored in `~/.codex/config.toml`. For full configuration options, see [Configuration](./docs/config.md).
-
-### Execpolicy
-
-See the [Execpolicy quickstart](./docs/execpolicy.md) to set up rules that govern what commands Codex can execute.
-
-### Docs & FAQ
-
- [**Getting started**](./docs/getting-started.md)
-  - [CLI usage](./docs/getting-started.md#cli-usage)
-  - [Slash Commands](./docs/slash_commands.md)
-  - [Running with a prompt as input](./docs/getting-started.md#running-with-a-prompt-as-input)
-  - [Example prompts](./docs/getting-started.md#example-prompts)
-  - [Custom prompts](./docs/prompts.md)
-  - [Memory with AGENTS.md](./docs/getting-started.md#memory-with-agentsmd)
- [**Configuration**](./docs/config.md)
-  - [Example config](./docs/example-config.md)
- [**Sandbox & approvals**](./docs/sandbox.md)
- [**Execpolicy quickstart**](./docs/execpolicy.md)
- [**Authentication**](./docs/authentication.md)
-  - [Auth methods](./docs/authentication.md#forcing-a-specific-auth-method-advanced)
-  - [Login on a "Headless" machine](./docs/authentication.md#connecting-on-a-headless-machine)
- **Automating Codex**
-  - [GitHub Action](https://github.com/openai/codex-action)
-  - [TypeScript SDK](./sdk/typescript/README.md)
-  - [Non-interactive mode (`codex exec`)](./docs/exec.md)
- [**Advanced**](./docs/advanced.md)
-  - [Tracing / verbose logging](./docs/advanced.md#tracing--verbose-logging)
-  - [Model Context Protocol (MCP)](./docs/advanced.md#model-context-protocol-mcp)
- [**Zero data retention (ZDR)**](./docs/zdr.md)
+- [**Codex Documentation**](https://developers.openai.com/codex)
 - [**Contributing**](./docs/contributing.md)
- [**Install & build**](./docs/install.md)
-  - [System Requirements](./docs/install.md#system-requirements)
-  - [DotSlash](./docs/install.md#dotslash)
-  - [Build from source](./docs/install.md#build-from-source)
- [**FAQ**](./docs/faq.md)
+- [**Installing & building**](./docs/install.md)
 - [**Open source fund**](./docs/open-source-fund.md)

---
-
-## License
-
 This repository is licensed under the [Apache-2.0 License](LICENSE).
--- a/codex-cli/scripts/install_native_deps.py
+++ b/codex-cli/scripts/install_native_deps.py
@@ -2,6 +2,7 @@
 """Install Codex native binaries (Rust CLI plus ripgrep helpers)."""

 import argparse
+from contextlib import contextmanager
 import json
 import os
 import shutil
@@ -12,6 +13,7 @@ import zipfile
 from dataclasses import dataclass
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from pathlib import Path
+import sys
 from typing import Iterable, Sequence
 from urllib.parse import urlparse
 from urllib.request import urlopen
@@ -77,6 +79,45 @@ RG_TARGET_PLATFORM_PAIRS: list[tuple[str, str]] = [
 RG_TARGET_TO_PLATFORM = {target: platform for target, platform in RG_TARGET_PLATFORM_PAIRS}
 DEFAULT_RG_TARGETS = [target for target, _ in RG_TARGET_PLATFORM_PAIRS]

+# urllib.request.urlopen() defaults to no timeout (can hang indefinitely), which is painful in CI.
+DOWNLOAD_TIMEOUT_SECS = 60
+
+
+def _gha_enabled() -> bool:
+    # GitHub Actions supports "workflow commands" (e.g. ::group:: / ::error::) that make logs
+    # much easier to scan: groups collapse noisy sections and error annotations surface the
+    # failure in the UI without changing the actual exception/traceback output.
+    return os.environ.get("GITHUB_ACTIONS") == "true"
+
+
+def _gha_escape(value: str) -> str:
+    # Workflow commands require percent/newline escaping.
+    return value.replace("%", "%25").replace("\r", "%0D").replace("\n", "%0A")
+
+
+def _gha_error(*, title: str, message: str) -> None:
+    # Emit a GitHub Actions error annotation. This does not replace stdout/stderr logs; it just
+    # adds a prominent summary line to the job UI so the root cause is easier to spot.
+    if not _gha_enabled():
+        return
+    print(
+        f"::error title={_gha_escape(title)}::{_gha_escape(message)}",
+        flush=True,
+    )
+
+
+@contextmanager
+def _gha_group(title: str):
+    # Wrap a block in a collapsible log group on GitHub Actions. Outside of GHA this is a no-op
+    # so local output remains unchanged.
+    if _gha_enabled():
+        print(f"::group::{_gha_escape(title)}", flush=True)
+    try:
+        yield
+    finally:
+        if _gha_enabled():
+            print("::endgroup::", flush=True)
+

 def parse_args() -> argparse.Namespace:
    parser = argparse.ArgumentParser(description="Install native Codex binaries.")
@@ -131,18 +172,20 @@ def main() -> int:
    workflow_id = workflow_url.rstrip("/").split("/")[-1]
    print(f"Downloading native artifacts from workflow {workflow_id}...")

-    with tempfile.TemporaryDirectory(prefix="codex-native-artifacts-") as artifacts_dir_str:
-        artifacts_dir = Path(artifacts_dir_str)
-        _download_artifacts(workflow_id, artifacts_dir)
-        install_binary_components(
-            artifacts_dir,
-            vendor_dir,
-            [BINARY_COMPONENTS[name] for name in components if name in BINARY_COMPONENTS],
-        )
+    with _gha_group(f"Download native artifacts from workflow {workflow_id}"):
+        with tempfile.TemporaryDirectory(prefix="codex-native-artifacts-") as artifacts_dir_str:
+            artifacts_dir = Path(artifacts_dir_str)
+            _download_artifacts(workflow_id, artifacts_dir)
+            install_binary_components(
+                artifacts_dir,
+                vendor_dir,
+                [BINARY_COMPONENTS[name] for name in components if name in BINARY_COMPONENTS],
+            )

    if "rg" in components:
-        print("Fetching ripgrep binaries...")
-        fetch_rg(vendor_dir, DEFAULT_RG_TARGETS, manifest_path=RG_MANIFEST)
+        with _gha_group("Fetch ripgrep binaries"):
+            print("Fetching ripgrep binaries...")
+            fetch_rg(vendor_dir, DEFAULT_RG_TARGETS, manifest_path=RG_MANIFEST)

    print(f"Installed native dependencies into {vendor_dir}")
    return 0
@@ -203,7 +246,14 @@ def fetch_rg(

        for future in as_completed(future_map):
            target = future_map[future]
-            results[target] = future.result()
+            try:
+                results[target] = future.result()
+            except Exception as exc:
+                _gha_error(
+                    title="ripgrep install failed",
+                    message=f"target={target} error={exc!r}",
+                )
+                raise RuntimeError(f"Failed to install ripgrep for target {target}.") from exc
            print(f"  installed ripgrep for {target}")

    return [results[target] for target in targets]
@@ -301,6 +351,8 @@ def _fetch_single_rg(
    url = providers[0]["url"]
    archive_format = platform_info.get("format", "zst")
    archive_member = platform_info.get("path")
+    digest = platform_info.get("digest")
+    expected_size = platform_info.get("size")

    dest_dir = vendor_dir / target / "path"
    dest_dir.mkdir(parents=True, exist_ok=True)
@@ -313,10 +365,32 @@ def _fetch_single_rg(
        tmp_dir = Path(tmp_dir_str)
        archive_filename = os.path.basename(urlparse(url).path)
        download_path = tmp_dir / archive_filename
-        _download_file(url, download_path)
+        print(
+            f"  downloading ripgrep for {target} ({platform_key}) from {url}",
+            flush=True,
+        )
+        try:
+            _download_file(url, download_path)
+        except Exception as exc:
+            _gha_error(
+                title="ripgrep download failed",
+                message=f"target={target} platform={platform_key} url={url} error={exc!r}",
+            )
+            raise RuntimeError(
+                "Failed to download ripgrep "
+                f"(target={target}, platform={platform_key}, format={archive_format}, "
+                f"expected_size={expected_size!r}, digest={digest!r}, url={url}, dest={download_path})."
+            ) from exc

        dest.unlink(missing_ok=True)
-        extract_archive(download_path, archive_format, archive_member, dest)
+        try:
+            extract_archive(download_path, archive_format, archive_member, dest)
+        except Exception as exc:
+            raise RuntimeError(
+                "Failed to extract ripgrep "
+                f"(target={target}, platform={platform_key}, format={archive_format}, "
+                f"member={archive_member!r}, url={url}, archive={download_path})."
+            ) from exc

    if not is_windows:
        dest.chmod(0o755)
@@ -326,7 +400,9 @@ def _fetch_single_rg(

 def _download_file(url: str, dest: Path) -> None:
    dest.parent.mkdir(parents=True, exist_ok=True)
-    with urlopen(url) as response, open(dest, "wb") as out:
+    dest.unlink(missing_ok=True)
+
+    with urlopen(url, timeout=DOWNLOAD_TIMEOUT_SECS) as response, open(dest, "wb") as out:
        shutil.copyfileobj(response, out)


--- a/codex-rs/Cargo.lock
+++ b/codex-rs/Cargo.lock
@@ -42,7 +42,7 @@ dependencies = [
 "bitflags 2.10.0",
 "bytes",
 "bytestring",
- "derive_more 2.0.1",
+ "derive_more 2.1.1",
 "encoding_rs",
 "foldhash 0.1.5",
 "futures-core",
@@ -137,7 +137,7 @@ dependencies = [
 "bytes",
 "bytestring",
 "cfg-if",
- "derive_more 2.0.1",
+ "derive_more 2.1.1",
 "encoding_rs",
 "foldhash 0.1.5",
 "futures-core",
@@ -329,12 +329,12 @@ name = "app_test_support"
 version = "0.0.0"
 dependencies = [
 "anyhow",
- "assert_cmd",
 "base64",
 "chrono",
 "codex-app-server-protocol",
 "codex-core",
 "codex-protocol",
+ "codex-utils-cargo-bin",
 "core_test_support",
 "serde",
 "serde_json",
@@ -912,9 +912,9 @@ dependencies = [

 [[package]]
 name = "clap_complete"
-version = "4.5.57"
+version = "4.5.64"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4d9501bd3f5f09f7bbee01da9a511073ed30a80cd7a509f1214bb74eadea71ad"
+checksum = "4c0da80818b2d95eca9aa614a30783e42f62bf5fdfee24e68cfb960b071ba8d1"
 dependencies = [
 "clap",
 ]
@@ -993,7 +993,6 @@ version = "0.0.0"
 dependencies = [
 "anyhow",
 "app_test_support",
- "assert_cmd",
 "base64",
 "chrono",
 "codex-app-server-protocol",
@@ -1064,6 +1063,7 @@ dependencies = [
 "anyhow",
 "assert_cmd",
 "assert_matches",
+ "codex-utils-cargo-bin",
 "pretty_assertions",
 "similar",
 "tempfile",
@@ -1161,6 +1161,7 @@ dependencies = [
 "codex-tui",
 "codex-tui2",
 "codex-utils-absolute-path",
+ "codex-utils-cargo-bin",
 "codex-windows-sandbox",
 "ctor 0.5.0",
 "libc",
@@ -1285,6 +1286,7 @@ dependencies = [
 "codex-protocol",
 "codex-rmcp-client",
 "codex-utils-absolute-path",
+ "codex-utils-cargo-bin",
 "codex-utils-pty",
 "codex-utils-readiness",
 "codex-utils-string",
@@ -1334,7 +1336,7 @@ dependencies = [
 "tokio",
 "tokio-util",
 "toml 0.9.5",
- "toml_edit",
+ "toml_edit 0.24.0+spec-1.1.0",
 "tracing",
 "tracing-subscriber",
 "tracing-test",
@@ -1360,6 +1362,7 @@ dependencies = [
 "codex-core",
 "codex-protocol",
 "codex-utils-absolute-path",
+ "codex-utils-cargo-bin",
 "core_test_support",
 "libc",
 "mcp-types",
@@ -1385,11 +1388,11 @@ name = "codex-exec-server"
 version = "0.0.0"
 dependencies = [
 "anyhow",
- "assert_cmd",
 "async-trait",
 "clap",
 "codex-core",
 "codex-execpolicy",
+ "codex-utils-cargo-bin",
 "exec_server_test_support",
 "libc",
 "maplit",
@@ -1430,7 +1433,7 @@ dependencies = [
 "allocative",
 "anyhow",
 "clap",
- "derive_more 2.0.1",
+ "derive_more 2.1.1",
 "env_logger",
 "log",
 "multimap",
@@ -1451,6 +1454,7 @@ dependencies = [
 "codex-protocol",
 "pretty_assertions",
 "sentry",
+ "tracing",
 "tracing-subscriber",
 ]

@@ -1548,7 +1552,6 @@ name = "codex-mcp-server"
 version = "0.0.0"
 dependencies = [
 "anyhow",
- "assert_cmd",
 "codex-arg0",
 "codex-common",
 "codex-core",
@@ -1607,7 +1610,6 @@ dependencies = [
 "serde_json",
 "strum_macros 0.27.2",
 "tokio",
- "tonic",
 "tracing",
 "tracing-opentelemetry",
 "tracing-subscriber",
@@ -1672,8 +1674,8 @@ dependencies = [
 "axum",
 "codex-keyring-store",
 "codex-protocol",
+ "codex-utils-cargo-bin",
 "dirs",
- "escargot",
 "futures",
 "keyring",
 "mcp-types",
@@ -1700,6 +1702,7 @@ version = "0.0.0"
 dependencies = [
 "anyhow",
 "assert_cmd",
+ "codex-utils-cargo-bin",
 "pretty_assertions",
 "tempfile",
 "uds_windows",
@@ -1729,7 +1732,7 @@ dependencies = [
 "codex-windows-sandbox",
 "color-eyre",
 "crossterm",
- "derive_more 2.0.1",
+ "derive_more 2.1.1",
 "diffy",
 "dirs",
 "dunce",
@@ -1802,7 +1805,7 @@ dependencies = [
 "codex-windows-sandbox",
 "color-eyre",
 "crossterm",
- "derive_more 2.0.1",
+ "derive_more 2.1.1",
 "diffy",
 "dirs",
 "dunce",
@@ -1817,6 +1820,7 @@ dependencies = [
 "pulldown-cmark",
 "rand 0.9.2",
 "ratatui",
+ "ratatui-core",
 "ratatui-macros",
 "regex-lite",
 "reqwest",
@@ -1838,6 +1842,7 @@ dependencies = [
 "tracing-subscriber",
 "tree-sitter-bash",
 "tree-sitter-highlight",
+ "tui-scrollbar",
 "unicode-segmentation",
 "unicode-width 0.2.1",
 "url",
@@ -1866,6 +1871,14 @@ dependencies = [
 "tokio",
 ]

+[[package]]
+name = "codex-utils-cargo-bin"
+version = "0.0.0"
+dependencies = [
+ "assert_cmd",
+ "thiserror 2.0.17",
+]
+
 [[package]]
 name = "codex-utils-image"
 version = "0.0.0"
@@ -1994,6 +2007,20 @@ dependencies = [
 "static_assertions",
 ]

+[[package]]
+name = "compact_str"
+version = "0.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3fdb1325a1cece981e8a296ab8f0f9b63ae357bd0784a9faaf548cc7b480707a"
+dependencies = [
+ "castaway",
+ "cfg-if",
+ "itoa",
+ "rustversion",
+ "ryu",
+ "static_assertions",
+]
+
 [[package]]
 name = "concurrent-queue"
 version = "2.5.0"
@@ -2015,6 +2042,18 @@ dependencies = [
 "windows-sys 0.59.0",
 ]

+[[package]]
+name = "const-hex"
+version = "1.17.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3bb320cac8a0750d7f25280aa97b09c26edfe161164238ecbbb31092b079e735"
+dependencies = [
+ "cfg-if",
+ "cpufeatures",
+ "proptest",
+ "serde_core",
+]
+
 [[package]]
 name = "convert_case"
 version = "0.6.0"
@@ -2026,9 +2065,9 @@ dependencies = [

 [[package]]
 name = "convert_case"
-version = "0.7.1"
+version = "0.10.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bb402b8d4c85569410425650ce3eddc7d698ed96d39a73f941b08fb63082f1e7"
+checksum = "633458d4ef8c78b72454de2d54fd6ab2e60f9e02be22f3c6104cdc8a4e0fceb9"
 dependencies = [
 "unicode-segmentation",
 ]
@@ -2069,6 +2108,7 @@ dependencies = [
 "codex-core",
 "codex-protocol",
 "codex-utils-absolute-path",
+ "codex-utils-cargo-bin",
 "notify",
 "pretty_assertions",
 "regex-lite",
@@ -2414,11 +2454,11 @@ dependencies = [

 [[package]]
 name = "derive_more"
-version = "2.0.1"
+version = "2.1.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "093242cf7570c207c83073cf82f79706fe7b8317e98620a47d5be7c3d8497678"
+checksum = "d751e9e49156b02b44f9c1815bcb94b984cdcc4396ecc32521c739452808b134"
 dependencies = [
- "derive_more-impl 2.0.1",
+ "derive_more-impl 2.1.1",
 ]

 [[package]]
@@ -2436,13 +2476,14 @@ dependencies = [

 [[package]]
 name = "derive_more-impl"
-version = "2.0.1"
+version = "2.1.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bda628edc44c4bb645fbe0f758797143e4e07926f7ebf4e9bdfbd3d2ce621df3"
+checksum = "799a97264921d8623a957f6c3b9011f3b5492f557bbb7a5a19b7fa6d06ba8dcb"
 dependencies = [
- "convert_case 0.7.1",
+ "convert_case 0.10.0",
 "proc-macro2",
 "quote",
+ "rustc_version",
 "syn 2.0.104",
 "unicode-xid",
 ]
@@ -2558,6 +2599,15 @@ version = "0.3.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10"

+[[package]]
+name = "document-features"
+version = "0.2.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d4b8a88685455ed29a21542a33abd9cb6510b6b129abadabdcef0f4c55bc8f61"
+dependencies = [
+ "litrs",
+]
+
 [[package]]
 name = "dotenvy"
 version = "0.15.7"
@@ -2788,8 +2838,8 @@ name = "exec_server_test_support"
 version = "0.0.0"
 dependencies = [
 "anyhow",
- "assert_cmd",
 "codex-core",
+ "codex-utils-cargo-bin",
 "rmcp",
 "serde_json",
 "tokio",
@@ -3724,13 +3774,14 @@ dependencies = [

 [[package]]
 name = "insta"
-version = "1.44.3"
+version = "1.46.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b5c943d4415edd8153251b6f197de5eb1640e56d84e8d9159bea190421c73698"
+checksum = "1b66886d14d18d420ab5052cbff544fc5d34d0b2cdd35eb5976aaa10a4a472e5"
 dependencies = [
 "console",
 "once_cell",
 "similar",
+ "tempfile",
 ]

 [[package]]
@@ -3755,17 +3806,6 @@ dependencies = [
 "rustversion",
 ]

-[[package]]
-name = "io-uring"
-version = "0.7.9"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d93587f37623a1a17d94ef2bc9ada592f5465fe7732084ab7beefabe5c77c0c4"
-dependencies = [
- "bitflags 2.10.0",
- "cfg-if",
- "libc",
-]
-
 [[package]]
 name = "ipnet"
 version = "2.11.0"
@@ -3894,6 +3934,16 @@ dependencies = [
 "wasm-bindgen",
 ]

+[[package]]
+name = "kasuari"
+version = "0.4.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8fe90c1150662e858c7d5f945089b7517b0a80d8bf7ba4b1b5ffc984e7230a5b"
+dependencies = [
+ "hashbrown 0.16.0",
+ "thiserror 2.0.17",
+]
+
 [[package]]
 name = "keyring"
 version = "3.6.3"
@@ -4039,6 +4089,12 @@ version = "0.8.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "241eaef5fd12c88705a01fc1066c48c4b36e0dd4377dcdc7ec3942cea7a69956"

+[[package]]
+name = "litrs"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "11d3d7f243d5c5a8b9bb5d6dd2b1602c0cb0b9db1621bafc7ed66e35ff9fe092"
+
 [[package]]
 name = "local-waker"
 version = "0.1.4"
@@ -4157,9 +4213,9 @@ name = "mcp_test_support"
 version = "0.0.0"
 dependencies = [
 "anyhow",
- "assert_cmd",
 "codex-core",
 "codex-mcp-server",
+ "codex-utils-cargo-bin",
 "core_test_support",
 "mcp-types",
 "os_info",
@@ -4660,9 +4716,9 @@ dependencies = [

 [[package]]
 name = "opentelemetry"
-version = "0.30.0"
+version = "0.31.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "aaf416e4cb72756655126f7dd7bb0af49c674f4c1b9903e80c009e0c37e552e6"
+checksum = "b84bcd6ae87133e903af7ef497404dda70c60d0ea14895fc8a5e6722754fc2a0"
 dependencies = [
 "futures-core",
 "futures-sink",
@@ -4674,9 +4730,9 @@ dependencies = [

 [[package]]
 name = "opentelemetry-appender-tracing"
-version = "0.30.1"
+version = "0.31.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e68f63eca5fad47e570e00e893094fc17be959c80c79a7d6ec1abdd5ae6ffc16"
+checksum = "ef6a1ac5ca3accf562b8c306fa8483c85f4390f768185ab775f242f7fe8fdcc2"
 dependencies = [
 "opentelemetry",
 "tracing",
@@ -4686,9 +4742,9 @@ dependencies = [

 [[package]]
 name = "opentelemetry-http"
-version = "0.30.0"
+version = "0.31.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "50f6639e842a97dbea8886e3439710ae463120091e2e064518ba8e716e6ac36d"
+checksum = "d7a6d09a73194e6b66df7c8f1b680f156d916a1a942abf2de06823dd02b7855d"
 dependencies = [
 "async-trait",
 "bytes",
@@ -4699,9 +4755,9 @@ dependencies = [

 [[package]]
 name = "opentelemetry-otlp"
-version = "0.30.0"
+version = "0.31.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dbee664a43e07615731afc539ca60c6d9f1a9425e25ca09c57bc36c87c55852b"
+checksum = "7a2366db2dca4d2ad033cad11e6ee42844fd727007af5ad04a1730f4cb8163bf"
 dependencies = [
 "http 1.3.1",
 "opentelemetry",
@@ -4719,30 +4775,32 @@ dependencies = [

 [[package]]
 name = "opentelemetry-proto"
-version = "0.30.0"
+version = "0.31.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2e046fd7660710fe5a05e8748e70d9058dc15c94ba914e7c4faa7c728f0e8ddc"
+checksum = "a7175df06de5eaee9909d4805a3d07e28bb752c34cab57fa9cff549da596b30f"
 dependencies = [
 "base64",
- "hex",
+ "const-hex",
 "opentelemetry",
 "opentelemetry_sdk",
 "prost",
 "serde",
+ "serde_json",
 "tonic",
+ "tonic-prost",
 ]

 [[package]]
 name = "opentelemetry-semantic-conventions"
-version = "0.30.0"
+version = "0.31.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "83d059a296a47436748557a353c5e6c5705b9470ef6c95cfc52c21a8814ddac2"
+checksum = "e62e29dfe041afb8ed2a6c9737ab57db4907285d999ef8ad3a59092a36bdc846"

 [[package]]
 name = "opentelemetry_sdk"
-version = "0.30.0"
+version = "0.31.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "11f644aa9e5e31d11896e024305d7e3c98a88884d9f8919dbf37a9991bc47a4b"
+checksum = "e14ae4f5991976fd48df6d843de219ca6d31b01daaab2dad5af2badeded372bd"
 dependencies = [
 "futures-channel",
 "futures-executor",
@@ -4750,7 +4808,6 @@ dependencies = [
 "opentelemetry",
 "percent-encoding",
 "rand 0.9.2",
- "serde_json",
 "thiserror 2.0.17",
 "tokio",
 "tokio-stream",
@@ -5106,7 +5163,7 @@ version = "3.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "219cb19e96be00ab2e37d6e299658a0cfa83e52429179969b0f0121b4ac46983"
 dependencies = [
- "toml_edit",
+ "toml_edit 0.23.10+spec-1.0.0",
 ]

 [[package]]
@@ -5133,10 +5190,25 @@ dependencies = [
 ]

 [[package]]
-name = "prost"
-version = "0.13.5"
+name = "proptest"
+version = "1.9.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2796faa41db3ec313a31f7624d9286acf277b52de526150b7e69f3debf891ee5"
+checksum = "bee689443a2bd0a16ab0348b52ee43e3b2d1b1f931c8aa5c9f8de4c86fbe8c40"
+dependencies = [
+ "bitflags 2.10.0",
+ "num-traits",
+ "rand 0.9.2",
+ "rand_chacha 0.9.0",
+ "rand_xorshift",
+ "regex-syntax 0.8.5",
+ "unarray",
+]
+
+[[package]]
+name = "prost"
+version = "0.14.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7231bd9b3d3d33c86b58adbac74b5ec0ad9f496b19d22801d773636feaa95f3d"
 dependencies = [
 "bytes",
 "prost-derive",
@@ -5144,9 +5216,9 @@ dependencies = [

 [[package]]
 name = "prost-derive"
-version = "0.13.5"
+version = "0.14.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d"
+checksum = "9120690fafc389a67ba3803df527d0ec9cbbc9cc45e4cc20b332996dfb672425"
 dependencies = [
 "anyhow",
 "itertools 0.14.0",
@@ -5346,6 +5418,15 @@ dependencies = [
 "getrandom 0.3.3",
 ]

+[[package]]
+name = "rand_xorshift"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "513962919efc330f829edb2535844d1b912b0fbe2ca165d613e4e8788bb05a5a"
+dependencies = [
+ "rand_core 0.9.3",
+]
+
 [[package]]
 name = "ratatui"
 version = "0.29.0"
@@ -5353,7 +5434,7 @@ source = "git+https://github.com/nornagon/ratatui?branch=nornagon-v0.29.0-patch#
 dependencies = [
 "bitflags 2.10.0",
 "cassowary",
- "compact_str",
+ "compact_str 0.8.1",
 "crossterm",
 "indoc",
 "instability",
@@ -5362,7 +5443,27 @@ dependencies = [
 "paste",
 "strum 0.26.3",
 "unicode-segmentation",
- "unicode-truncate",
+ "unicode-truncate 1.1.0",
+ "unicode-width 0.2.1",
+]
+
+[[package]]
+name = "ratatui-core"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5ef8dea09a92caaf73bff7adb70b76162e5937524058a7e5bff37869cbbec293"
+dependencies = [
+ "bitflags 2.10.0",
+ "compact_str 0.9.0",
+ "hashbrown 0.16.0",
+ "indoc",
+ "itertools 0.14.0",
+ "kasuari",
+ "lru 0.16.2",
+ "strum 0.27.2",
+ "thiserror 2.0.17",
+ "unicode-segmentation",
+ "unicode-truncate 2.0.0",
 "unicode-width 0.2.1",
 ]

@@ -5451,9 +5552,9 @@ dependencies = [

 [[package]]
 name = "regex-lite"
-version = "0.1.7"
+version = "0.1.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "943f41321c63ef1c92fd763bfe054d2668f7f225a5c29f0105903dc2fc04ba30"
+checksum = "8d942b98df5e658f56f20d592c7f868833fe38115e65c33003d8cd224b0155da"

 [[package]]
 name = "regex-syntax"
@@ -6529,6 +6630,9 @@ name = "strum"
 version = "0.27.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "af23d6f6c1a224baef9d3f61e287d2761385a5b88fdab4eb4c6f11aeb54c4bcf"
+dependencies = [
+ "strum_macros 0.27.2",
+]

 [[package]]
 name = "strum_macros"
@@ -6920,29 +7024,26 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"

 [[package]]
 name = "tokio"
-version = "1.47.1"
+version = "1.48.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "89e49afdadebb872d3145a5638b59eb0691ea23e46ca484037cfab3b76b95038"
+checksum = "ff360e02eab121e0bc37a2d3b4d4dc622e6eda3a8e5253d5435ecf5bd4c68408"
 dependencies = [
- "backtrace",
 "bytes",
- "io-uring",
 "libc",
 "mio",
 "parking_lot",
 "pin-project-lite",
 "signal-hook-registry",
- "slab",
 "socket2 0.6.1",
 "tokio-macros",
- "windows-sys 0.59.0",
+ "windows-sys 0.61.1",
 ]

 [[package]]
 name = "tokio-macros"
-version = "2.5.0"
+version = "2.6.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8"
+checksum = "af407857209536a95c8e56f8231ef2c2e2aff839b22e07a1ffcbc617e9db9fa5"
 dependencies = [
 "proc-macro2",
 "quote",
@@ -6971,9 +7072,9 @@ dependencies = [

 [[package]]
 name = "tokio-stream"
-version = "0.1.17"
+version = "0.1.18"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "eca58d7bba4a75707817a2c44174253f9236b2d5fbd055602e9d5c07c139a047"
+checksum = "32da49809aab5c3bc678af03902d4ccddea2a87d028d86392a4b1560c6906c70"
 dependencies = [
 "futures-core",
 "pin-project-lite",
@@ -7035,18 +7136,30 @@ dependencies = [

 [[package]]
 name = "toml_datetime"
-version = "0.7.3"
+version = "0.7.5+spec-1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f2cdb639ebbc97961c51720f858597f7f24c4fc295327923af55b74c3c724533"
+checksum = "92e1cfed4a3038bc5a127e35a2d360f145e1f4b971b551a2ba5fd7aedf7e1347"
 dependencies = [
 "serde_core",
 ]

 [[package]]
 name = "toml_edit"
-version = "0.23.7"
+version = "0.23.10+spec-1.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6485ef6d0d9b5d0ec17244ff7eb05310113c3f316f2d14200d4de56b3cb98f8d"
+checksum = "84c8b9f757e028cee9fa244aea147aab2a9ec09d5325a9b01e0a49730c2b5269"
+dependencies = [
+ "indexmap 2.12.0",
+ "toml_datetime",
+ "toml_parser",
+ "winnow",
+]
+
+[[package]]
+name = "toml_edit"
+version = "0.24.0+spec-1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8c740b185920170a6d9191122cafef7010bd6270a3824594bff6784c04d7f09e"
 dependencies = [
 "indexmap 2.12.0",
 "toml_datetime",
@@ -7057,30 +7170,28 @@ dependencies = [

 [[package]]
 name = "toml_parser"
-version = "1.0.4"
+version = "1.0.6+spec-1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c0cbe268d35bdb4bb5a56a2de88d0ad0eb70af5384a99d648cd4b3d04039800e"
+checksum = "a3198b4b0a8e11f09dd03e133c0280504d0801269e9afa46362ffde1cbeebf44"
 dependencies = [
 "winnow",
 ]

 [[package]]
 name = "toml_writer"
-version = "1.0.4"
+version = "1.0.6+spec-1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "df8b2b54733674ad286d16267dcfc7a71ed5c776e4ac7aa3c3e2561f7c637bf2"
+checksum = "ab16f14aed21ee8bfd8ec22513f7287cd4a91aa92e44edfe2c17ddd004e92607"

 [[package]]
 name = "tonic"
-version = "0.13.1"
+version = "0.14.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7e581ba15a835f4d9ea06c55ab1bd4dce26fc53752c69a04aac00703bfb49ba9"
+checksum = "eb7613188ce9f7df5bfe185db26c5814347d110db17920415cf2fbcad85e7203"
 dependencies = [
 "async-trait",
- "axum",
 "base64",
 "bytes",
- "h2",
 "http 1.3.1",
 "http-body",
 "http-body-util",
@@ -7089,9 +7200,8 @@ dependencies = [
 "hyper-util",
 "percent-encoding",
 "pin-project",
- "prost",
 "rustls-native-certs",
- "socket2 0.5.10",
+ "sync_wrapper",
 "tokio",
 "tokio-rustls",
 "tokio-stream",
@@ -7101,6 +7211,17 @@ dependencies = [
 "tracing",
 ]

+[[package]]
+name = "tonic-prost"
+version = "0.14.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "66bd50ad6ce1252d87ef024b3d64fe4c3cf54a86fb9ef4c631fdd0ded7aeaa67"
+dependencies = [
+ "bytes",
+ "prost",
+ "tonic",
+]
+
 [[package]]
 name = "tower"
 version = "0.5.2"
@@ -7218,15 +7339,16 @@ dependencies = [

 [[package]]
 name = "tracing-opentelemetry"
-version = "0.31.0"
+version = "0.32.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ddcf5959f39507d0d04d6413119c04f33b623f4f951ebcbdddddfad2d0623a9c"
+checksum = "1e6e5658463dd88089aba75c7791e1d3120633b1bfde22478b28f625a9bb1b8e"
 dependencies = [
 "js-sys",
- "once_cell",
 "opentelemetry",
 "opentelemetry_sdk",
+ "rustversion",
 "smallvec",
+ "thiserror 2.0.17",
 "tracing",
 "tracing-core",
 "tracing-log",
@@ -7236,9 +7358,9 @@ dependencies = [

 [[package]]
 name = "tracing-subscriber"
-version = "0.3.20"
+version = "0.3.22"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2054a14f5307d601f88daf0553e1cbf472acc4f2c51afab632431cdcd72124d5"
+checksum = "2f30143827ddab0d256fd843b7a66d164e9f271cfa0dde49142c5ca0ca291f1e"
 dependencies = [
 "matchers",
 "nu-ansi-term",
@@ -7357,6 +7479,16 @@ dependencies = [
 "termcolor",
 ]

+[[package]]
+name = "tui-scrollbar"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c42613099915b2e30e9f144670666e858e2538366f77742e1cf1c2f230efcacd"
+dependencies = [
+ "document-features",
+ "ratatui-core",
+]
+
 [[package]]
 name = "typenum"
 version = "1.18.0"
@@ -7383,6 +7515,12 @@ dependencies = [
 "libc",
 ]

+[[package]]
+name = "unarray"
+version = "0.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "eaea85b334db583fe3274d12b4cd1880032beab409c0d774be044d4480ab9a94"
+
 [[package]]
 name = "unicase"
 version = "2.8.1"
@@ -7418,6 +7556,17 @@ dependencies = [
 "unicode-width 0.1.14",
 ]

+[[package]]
+name = "unicode-truncate"
+version = "2.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8fbf03860ff438702f3910ca5f28f8dac63c1c11e7efb5012b8b175493606330"
+dependencies = [
+ "itertools 0.13.0",
+ "unicode-segmentation",
+ "unicode-width 0.2.1",
+]
+
 [[package]]
 name = "unicode-width"
 version = "0.1.14"
--- a/codex-rs/Cargo.toml
+++ b/codex-rs/Cargo.toml
@@ -36,6 +36,7 @@ members = [
    "tui",
    "tui2",
    "utils/absolute-path",
+    "utils/cargo-bin",
    "utils/git",
    "utils/cache",
    "utils/image",
@@ -93,6 +94,7 @@ codex-tui = { path = "tui" }
 codex-tui2 = { path = "tui2" }
 codex-utils-absolute-path = { path = "utils/absolute-path" }
 codex-utils-cache = { path = "utils/cache" }
+codex-utils-cargo-bin = { path = "utils/cargo-bin" }
 codex-utils-image = { path = "utils/image" }
 codex-utils-json-to-toml = { path = "utils/json-to-toml" }
 codex-utils-pty = { path = "utils/pty" }
@@ -143,7 +145,7 @@ ignore = "0.4.23"
 image = { version = "^0.25.9", default-features = false }
 include_dir = "0.7.4"
 indexmap = "2.12.0"
-insta = "1.44.3"
+insta = "1.46.0"
 itertools = "0.14.0"
 keyring = { version = "3.6", default-features = false }
 landlock = "0.4.4"
@@ -158,12 +160,12 @@ notify = "8.2.0"
 nucleo-matcher = "0.3.1"
 once_cell = "1.20.2"
 openssl-sys = "*"
-opentelemetry = "0.30.0"
-opentelemetry-appender-tracing = "0.30.0"
-opentelemetry-otlp = "0.30.0"
-opentelemetry-semantic-conventions = "0.30.0"
-opentelemetry_sdk = "0.30.0"
-tracing-opentelemetry = "0.31.0"
+opentelemetry = "0.31.0"
+opentelemetry-appender-tracing = "0.31.0"
+opentelemetry-otlp = "0.31.0"
+opentelemetry-semantic-conventions = "0.31.0"
+opentelemetry_sdk = "0.31.0"
+tracing-opentelemetry = "0.32.0"
 os_info = "3.12.0"
 owo-colors = "4.2.0"
 path-absolutize = "3.1.1"
@@ -174,9 +176,10 @@ pretty_assertions = "1.4.1"
 pulldown-cmark = "0.10"
 rand = "0.9"
 ratatui = "0.29.0"
+ratatui-core = "0.1.0"
 ratatui-macros = "0.6.0"
 regex = "1.12.2"
-regex-lite = "0.1.7"
+regex-lite = "0.1.8"
 reqwest = "0.12"
 rmcp = { version = "0.12.0", default-features = false }
 schemars = "0.8.22"
@@ -204,20 +207,20 @@ thiserror = "2.0.17"
 time = "0.3"
 tiny_http = "0.12"
 tokio = "1"
-tokio-stream = "0.1.17"
+tokio-stream = "0.1.18"
 tokio-test = "0.4"
 tokio-util = "0.7.16"
 toml = "0.9.5"
-toml_edit = "0.23.5"
-tonic = "0.13.1"
+toml_edit = "0.24.0"
 tracing = "0.1.43"
 tracing-appender = "0.2.3"
-tracing-subscriber = "0.3.20"
+tracing-subscriber = "0.3.22"
 tracing-test = "0.2.5"
 tree-sitter = "0.25.10"
 tree-sitter-bash = "0.25"
 tree-sitter-highlight = "0.25.10"
 ts-rs = "11"
+tui-scrollbar = "0.2.1"
 uds_windows = "1.1.0"
 unicode-segmentation = "1.12.0"
 unicode-width = "0.2"
--- a/codex-rs/README.md
+++ b/codex-rs/README.md
@@ -15,8 +15,8 @@ You can also install via Homebrew (`brew install --cask codex`) or download a pl

 ## Documentation quickstart

- First run with Codex? Follow the walkthrough in [`docs/getting-started.md`](../docs/getting-started.md) for prompts, keyboard shortcuts, and session management.
- Already shipping with Codex and want deeper control? Jump to [`docs/advanced.md`](../docs/advanced.md) and the configuration reference at [`docs/config.md`](../docs/config.md).
+- First run with Codex? Start with [`docs/getting-started.md`](../docs/getting-started.md) (links to the walkthrough for prompts, keyboard shortcuts, and session management).
+- Want deeper control? See [`docs/config.md`](../docs/config.md) and [`docs/install.md`](../docs/install.md).

 ## What's new in the Rust CLI

@@ -30,7 +30,7 @@ Codex supports a rich set of configuration options. Note that the Rust CLI uses

 #### MCP client

-Codex CLI functions as an MCP client that allows the Codex CLI and IDE extension to connect to MCP servers on startup. See the [`configuration documentation`](../docs/config.md#mcp_servers) for details.
+Codex CLI functions as an MCP client that allows the Codex CLI and IDE extension to connect to MCP servers on startup. See the [`configuration documentation`](../docs/config.md#connecting-to-mcp-servers) for details.

 #### MCP server (experimental)

--- a/codex-rs/app-server-protocol/src/protocol/v1.rs
+++ b/codex-rs/app-server-protocol/src/protocol/v1.rs
@@ -384,6 +384,8 @@ pub struct SendUserTurnParams {
    pub model: String,
    pub effort: Option<ReasoningEffort>,
    pub summary: ReasoningSummary,
+    /// Optional JSON Schema used to constrain the final assistant message for this turn.
+    pub output_schema: Option<serde_json::Value>,
 }

 #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
--- a/codex-rs/app-server-protocol/src/protocol/v2.rs
+++ b/codex-rs/app-server-protocol/src/protocol/v2.rs
@@ -1274,6 +1274,8 @@ pub struct Turn {
 pub struct TurnError {
    pub message: String,
    pub codex_error_info: Option<CodexErrorInfo>,
+    #[serde(default)]
+    pub additional_details: Option<String>,
 }

 #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
@@ -1317,6 +1319,8 @@ pub struct TurnStartParams {
    pub effort: Option<ReasoningEffort>,
    /// Override the reasoning summary for this turn and subsequent turns.
    pub summary: Option<ReasoningSummary>,
+    /// Optional JSON Schema used to constrain the final assistant message for this turn.
+    pub output_schema: Option<JsonValue>,
 }

 #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
--- a/codex-rs/app-server-test-client/src/main.rs
+++ b/codex-rs/app-server-test-client/src/main.rs
@@ -13,6 +13,7 @@ use std::time::Duration;
 use anyhow::Context;
 use anyhow::Result;
 use anyhow::bail;
+use clap::ArgAction;
 use clap::Parser;
 use clap::Subcommand;
 use codex_app_server_protocol::AddConversationListenerParams;
@@ -65,6 +66,19 @@ struct Cli {
    #[arg(long, env = "CODEX_BIN", default_value = "codex")]
    codex_bin: String,

+    /// Forwarded to the `codex` CLI as `--config key=value`. Repeatable.
+    ///
+    /// Example:
+    ///   `--config 'model_providers.mock.base_url="http://localhost:4010/v2"'`
+    #[arg(
+        short = 'c',
+        long = "config",
+        value_name = "key=value",
+        action = ArgAction::Append,
+        global = true
+    )]
+    config_overrides: Vec<String>,
+
    #[command(subcommand)]
    command: CliCommand,
 }
@@ -116,29 +130,42 @@ enum CliCommand {
 }

 fn main() -> Result<()> {
-    let Cli { codex_bin, command } = Cli::parse();
+    let Cli {
+        codex_bin,
+        config_overrides,
+        command,
+    } = Cli::parse();

    match command {
-        CliCommand::SendMessage { user_message } => send_message(codex_bin, user_message),
-        CliCommand::SendMessageV2 { user_message } => send_message_v2(codex_bin, user_message),
+        CliCommand::SendMessage { user_message } => {
+            send_message(&codex_bin, &config_overrides, user_message)
+        }
+        CliCommand::SendMessageV2 { user_message } => {
+            send_message_v2(&codex_bin, &config_overrides, user_message)
+        }
        CliCommand::TriggerCmdApproval { user_message } => {
-            trigger_cmd_approval(codex_bin, user_message)
+            trigger_cmd_approval(&codex_bin, &config_overrides, user_message)
        }
        CliCommand::TriggerPatchApproval { user_message } => {
-            trigger_patch_approval(codex_bin, user_message)
+            trigger_patch_approval(&codex_bin, &config_overrides, user_message)
        }
-        CliCommand::NoTriggerCmdApproval => no_trigger_cmd_approval(codex_bin),
+        CliCommand::NoTriggerCmdApproval => no_trigger_cmd_approval(&codex_bin, &config_overrides),
        CliCommand::SendFollowUpV2 {
            first_message,
            follow_up_message,
-        } => send_follow_up_v2(codex_bin, first_message, follow_up_message),
-        CliCommand::TestLogin => test_login(codex_bin),
-        CliCommand::GetAccountRateLimits => get_account_rate_limits(codex_bin),
+        } => send_follow_up_v2(
+            &codex_bin,
+            &config_overrides,
+            first_message,
+            follow_up_message,
+        ),
+        CliCommand::TestLogin => test_login(&codex_bin, &config_overrides),
+        CliCommand::GetAccountRateLimits => get_account_rate_limits(&codex_bin, &config_overrides),
    }
 }

-fn send_message(codex_bin: String, user_message: String) -> Result<()> {
-    let mut client = CodexClient::spawn(codex_bin)?;
+fn send_message(codex_bin: &str, config_overrides: &[String], user_message: String) -> Result<()> {
+    let mut client = CodexClient::spawn(codex_bin, config_overrides)?;

    let initialize = client.initialize()?;
    println!("< initialize response: {initialize:?}");
@@ -159,46 +186,61 @@ fn send_message(codex_bin: String, user_message: String) -> Result<()> {
    Ok(())
 }

-fn send_message_v2(codex_bin: String, user_message: String) -> Result<()> {
-    send_message_v2_with_policies(codex_bin, user_message, None, None)
+fn send_message_v2(
+    codex_bin: &str,
+    config_overrides: &[String],
+    user_message: String,
+) -> Result<()> {
+    send_message_v2_with_policies(codex_bin, config_overrides, user_message, None, None)
 }

-fn trigger_cmd_approval(codex_bin: String, user_message: Option<String>) -> Result<()> {
+fn trigger_cmd_approval(
+    codex_bin: &str,
+    config_overrides: &[String],
+    user_message: Option<String>,
+) -> Result<()> {
    let default_prompt =
        "Run `touch /tmp/should-trigger-approval` so I can confirm the file exists.";
    let message = user_message.unwrap_or_else(|| default_prompt.to_string());
    send_message_v2_with_policies(
        codex_bin,
+        config_overrides,
        message,
        Some(AskForApproval::OnRequest),
        Some(SandboxPolicy::ReadOnly),
    )
 }

-fn trigger_patch_approval(codex_bin: String, user_message: Option<String>) -> Result<()> {
+fn trigger_patch_approval(
+    codex_bin: &str,
+    config_overrides: &[String],
+    user_message: Option<String>,
+) -> Result<()> {
    let default_prompt =
        "Create a file named APPROVAL_DEMO.txt containing a short hello message using apply_patch.";
    let message = user_message.unwrap_or_else(|| default_prompt.to_string());
    send_message_v2_with_policies(
        codex_bin,
+        config_overrides,
        message,
        Some(AskForApproval::OnRequest),
        Some(SandboxPolicy::ReadOnly),
    )
 }

-fn no_trigger_cmd_approval(codex_bin: String) -> Result<()> {
+fn no_trigger_cmd_approval(codex_bin: &str, config_overrides: &[String]) -> Result<()> {
    let prompt = "Run `touch should_not_trigger_approval.txt`";
-    send_message_v2_with_policies(codex_bin, prompt.to_string(), None, None)
+    send_message_v2_with_policies(codex_bin, config_overrides, prompt.to_string(), None, None)
 }

 fn send_message_v2_with_policies(
-    codex_bin: String,
+    codex_bin: &str,
+    config_overrides: &[String],
    user_message: String,
    approval_policy: Option<AskForApproval>,
    sandbox_policy: Option<SandboxPolicy>,
 ) -> Result<()> {
-    let mut client = CodexClient::spawn(codex_bin)?;
+    let mut client = CodexClient::spawn(codex_bin, config_overrides)?;

    let initialize = client.initialize()?;
    println!("< initialize response: {initialize:?}");
@@ -222,11 +264,12 @@ fn send_message_v2_with_policies(
 }

 fn send_follow_up_v2(
-    codex_bin: String,
+    codex_bin: &str,
+    config_overrides: &[String],
    first_message: String,
    follow_up_message: String,
 ) -> Result<()> {
-    let mut client = CodexClient::spawn(codex_bin)?;
+    let mut client = CodexClient::spawn(codex_bin, config_overrides)?;

    let initialize = client.initialize()?;
    println!("< initialize response: {initialize:?}");
@@ -259,8 +302,8 @@ fn send_follow_up_v2(
    Ok(())
 }

-fn test_login(codex_bin: String) -> Result<()> {
-    let mut client = CodexClient::spawn(codex_bin)?;
+fn test_login(codex_bin: &str, config_overrides: &[String]) -> Result<()> {
+    let mut client = CodexClient::spawn(codex_bin, config_overrides)?;

    let initialize = client.initialize()?;
    println!("< initialize response: {initialize:?}");
@@ -289,8 +332,8 @@ fn test_login(codex_bin: String) -> Result<()> {
    }
 }

-fn get_account_rate_limits(codex_bin: String) -> Result<()> {
-    let mut client = CodexClient::spawn(codex_bin)?;
+fn get_account_rate_limits(codex_bin: &str, config_overrides: &[String]) -> Result<()> {
+    let mut client = CodexClient::spawn(codex_bin, config_overrides)?;

    let initialize = client.initialize()?;
    println!("< initialize response: {initialize:?}");
@@ -309,8 +352,12 @@ struct CodexClient {
 }

 impl CodexClient {
-    fn spawn(codex_bin: String) -> Result<Self> {
-        let mut codex_app_server = Command::new(&codex_bin)
+    fn spawn(codex_bin: &str, config_overrides: &[String]) -> Result<Self> {
+        let mut cmd = Command::new(codex_bin);
+        for override_kv in config_overrides {
+            cmd.arg("--config").arg(override_kv);
+        }
+        let mut codex_app_server = cmd
            .arg("app-server")
            .stdin(Stdio::piped())
            .stdout(Stdio::piped())
--- a/codex-rs/app-server/Cargo.toml
+++ b/codex-rs/app-server/Cargo.toml
@@ -48,7 +48,6 @@ uuid = { workspace = true, features = ["serde", "v7"] }

 [dev-dependencies]
 app_test_support = { workspace = true }
-assert_cmd = { workspace = true }
 base64 = { workspace = true }
 core_test_support = { workspace = true }
 mcp-types = { workspace = true }
--- a/codex-rs/app-server/README.md
+++ b/codex-rs/app-server/README.md
@@ -82,7 +82,7 @@ Example (from OpenAI's official VSCode extension):
 - `mcpServerStatus/list` — enumerate configured MCP servers with their tools, resources, resource templates, and auth status; supports cursor+limit pagination.
 - `feedback/upload` — submit a feedback report (classification + optional reason/logs and conversation_id); returns the tracking thread id.
 - `command/exec` — run a single command under the server sandbox without starting a thread/turn (handy for utilities and validation).
- `config/read` — fetch the effective config on disk after resolving config layering.
+- `config/read` — fetch the effective config on disk after resolving config layering (thread-agnostic; does not include in-repo `.codex/` layers).
 - `config/value/write` — write a single config key/value to the user's config.toml on disk.
 - `config/batchWrite` — apply multiple config edits atomically to the user's config.toml on disk.

@@ -162,7 +162,7 @@ Turns attach user input (text or images) to a thread and trigger Codex generatio
 - `{"type":"image","url":"https://…png"}`
 - `{"type":"localImage","path":"/tmp/screenshot.png"}`

-You can optionally specify config overrides on the new turn. If specified, these settings become the default for subsequent turns on the same thread.
+You can optionally specify config overrides on the new turn. If specified, these settings become the default for subsequent turns on the same thread. `outputSchema` applies only to the current turn.

 ```json
 { "method": "turn/start", "id": 30, "params": {
@@ -178,7 +178,14 @@ You can optionally specify config overrides on the new turn. If specified, these
    },
    "model": "gpt-5.1-codex",
    "effort": "medium",
-    "summary": "concise"
+    "summary": "concise",
+    // Optional JSON Schema to constrain the final assistant message for this turn.
+    "outputSchema": {
+        "type": "object",
+        "properties": { "answer": { "type": "string" } },
+        "required": ["answer"],
+        "additionalProperties": false
+    }
 } }
 { "id": 30, "result": { "turn": {
    "id": "turn_456",
@@ -302,7 +309,7 @@ Event notifications are the server-initiated event stream for thread lifecycles,
 The app-server streams JSON-RPC notifications while a turn is running. Each turn starts with `turn/started` (initial `turn`) and ends with `turn/completed` (final `turn` status). Token usage events stream separately via `thread/tokenUsage/updated`. Clients subscribe to the events they care about, rendering each item incrementally as updates arrive. The per-item lifecycle is always: `item/started` → zero or more item-specific deltas → `item/completed`.

 - `turn/started` — `{ turn }` with the turn id, empty `items`, and `status: "inProgress"`.
- `turn/completed` — `{ turn }` where `turn.status` is `completed`, `interrupted`, or `failed`; failures carry `{ error: { message, codexErrorInfo? } }`.
+- `turn/completed` — `{ turn }` where `turn.status` is `completed`, `interrupted`, or `failed`; failures carry `{ error: { message, codexErrorInfo?, additionalDetails? } }`.
 - `turn/diff/updated` — `{ threadId, turnId, diff }` represents the up-to-date snapshot of the turn-level unified diff, emitted after every FileChange item. `diff` is the latest aggregated unified diff across every file change in the turn. UIs can render this to show the full "what changed" view without stitching individual `fileChange` items.
 - `turn/plan/updated` — `{ turnId, explanation?, plan }` whenever the agent shares or changes its plan; each `plan` entry is `{ step, status }` with `status` in `pending`, `inProgress`, or `completed`.

@@ -352,7 +359,7 @@ There are additional item-specific events:

 ### Errors

-`error` event is emitted whenever the server hits an error mid-turn (for example, upstream model errors or quota limits). Carries the same `{ error: { message, codexErrorInfo? } }` payload as `turn.status: "failed"` and may precede that terminal notification.
+`error` event is emitted whenever the server hits an error mid-turn (for example, upstream model errors or quota limits). Carries the same `{ error: { message, codexErrorInfo?, additionalDetails? } }` payload as `turn.status: "failed"` and may precede that terminal notification.

 `codexErrorInfo` maps to the `CodexErrorInfo` enum. Common values:

--- a/codex-rs/app-server/src/bespoke_event_handling.rs
+++ b/codex-rs/app-server/src/bespoke_event_handling.rs
@@ -340,6 +340,7 @@ pub(crate) async fn apply_bespoke_event_handling(
            let turn_error = TurnError {
                message: ev.message,
                codex_error_info: ev.codex_error_info.map(V2CodexErrorInfo::from),
+                additional_details: None,
            };
            handle_error(conversation_id, turn_error.clone(), &turn_summary_store).await;
            outgoing
@@ -357,6 +358,7 @@ pub(crate) async fn apply_bespoke_event_handling(
            let turn_error = TurnError {
                message: ev.message,
                codex_error_info: ev.codex_error_info.map(V2CodexErrorInfo::from),
+                additional_details: ev.additional_details,
            };
            outgoing
                .send_server_notification(ServerNotification::Error(ErrorNotification {
@@ -1340,6 +1342,7 @@ mod tests {
            TurnError {
                message: "boom".to_string(),
                codex_error_info: Some(V2CodexErrorInfo::InternalServerError),
+                additional_details: None,
            },
            &turn_summary_store,
        )
@@ -1351,6 +1354,7 @@ mod tests {
            Some(TurnError {
                message: "boom".to_string(),
                codex_error_info: Some(V2CodexErrorInfo::InternalServerError),
+                additional_details: None,
            })
        );
        Ok(())
@@ -1398,6 +1402,7 @@ mod tests {
            TurnError {
                message: "oops".to_string(),
                codex_error_info: None,
+                additional_details: None,
            },
            &turn_summary_store,
        )
@@ -1439,6 +1444,7 @@ mod tests {
            TurnError {
                message: "bad".to_string(),
                codex_error_info: Some(V2CodexErrorInfo::Other),
+                additional_details: None,
            },
            &turn_summary_store,
        )
@@ -1467,6 +1473,7 @@ mod tests {
                    Some(TurnError {
                        message: "bad".to_string(),
                        codex_error_info: Some(V2CodexErrorInfo::Other),
+                        additional_details: None,
                    })
                );
            }
@@ -1691,6 +1698,7 @@ mod tests {
            TurnError {
                message: "a1".to_string(),
                codex_error_info: Some(V2CodexErrorInfo::BadRequest),
+                additional_details: None,
            },
            &turn_summary_store,
        )
@@ -1710,6 +1718,7 @@ mod tests {
            TurnError {
                message: "b1".to_string(),
                codex_error_info: None,
+                additional_details: None,
            },
            &turn_summary_store,
        )
@@ -1746,6 +1755,7 @@ mod tests {
                    Some(TurnError {
                        message: "a1".to_string(),
                        codex_error_info: Some(V2CodexErrorInfo::BadRequest),
+                        additional_details: None,
                    })
                );
            }
@@ -1766,6 +1776,7 @@ mod tests {
                    Some(TurnError {
                        message: "b1".to_string(),
                        codex_error_info: None,
+                        additional_details: None,
                    })
                );
            }
--- a/codex-rs/app-server/src/codex_message_processor.rs
+++ b/codex-rs/app-server/src/codex_message_processor.rs
@@ -1,4 +1,5 @@
 use crate::bespoke_event_handling::apply_bespoke_event_handling;
+use crate::config_api::ConfigApi;
 use crate::error_code::INTERNAL_ERROR_CODE;
 use crate::error_code::INVALID_REQUEST_ERROR_CODE;
 use crate::fuzzy_file_search::run_fuzzy_file_search;
@@ -155,7 +156,6 @@ use codex_protocol::protocol::SessionMetaLine;
 use codex_protocol::protocol::USER_MESSAGE_BEGIN;
 use codex_protocol::user_input::UserInput as CoreInputItem;
 use codex_rmcp_client::perform_oauth_login_return_url;
-use codex_utils_json_to_toml::json_to_toml;
 use std::collections::HashMap;
 use std::collections::HashSet;
 use std::ffi::OsStr;
@@ -215,7 +215,7 @@ pub(crate) struct CodexMessageProcessor {
    outgoing: Arc<OutgoingMessageSender>,
    codex_linux_sandbox_exe: Option<PathBuf>,
    config: Arc<Config>,
-    cli_overrides: Vec<(String, TomlValue)>,
+    config_api: ConfigApi,
    conversation_listeners: HashMap<Uuid, oneshot::Sender<()>>,
    active_login: Arc<Mutex<Option<ActiveLogin>>>,
    // Queue of pending interrupt requests per conversation. We reply when TurnAborted arrives.
@@ -265,13 +265,14 @@ impl CodexMessageProcessor {
        cli_overrides: Vec<(String, TomlValue)>,
        feedback: CodexFeedback,
    ) -> Self {
+        let config_api = ConfigApi::new(config.codex_home.clone(), cli_overrides.clone());
        Self {
            auth_manager,
            conversation_manager,
            outgoing,
            codex_linux_sandbox_exe,
            config,
-            cli_overrides,
+            config_api,
            conversation_listeners: HashMap::new(),
            active_login: Arc::new(Mutex::new(None)),
            pending_interrupts: Arc::new(Mutex::new(HashMap::new())),
@@ -282,13 +283,7 @@ impl CodexMessageProcessor {
    }

    async fn load_latest_config(&self) -> Result<Config, JSONRPCErrorError> {
-        Config::load_with_cli_overrides(self.cli_overrides.clone())
-            .await
-            .map_err(|err| JSONRPCErrorError {
-                code: INTERNAL_ERROR_CODE,
-                message: format!("failed to reload config: {err}"),
-                data: None,
-            })
+        self.config_api.load_latest_thread_agnostic_config().await
    }

    fn review_request_from_target(
@@ -1278,18 +1273,20 @@ impl CodexMessageProcessor {
            );
        }

-        let config = match derive_config_from_params(overrides, Some(cli_overrides)).await {
-            Ok(config) => config,
-            Err(err) => {
-                let error = JSONRPCErrorError {
-                    code: INVALID_REQUEST_ERROR_CODE,
-                    message: format!("error deriving config: {err}"),
-                    data: None,
-                };
-                self.outgoing.send_error(request_id, error).await;
-                return;
-            }
-        };
+        let config =
+            match derive_config_from_params(&self.config_api, overrides, Some(cli_overrides)).await
+            {
+                Ok(config) => config,
+                Err(err) => {
+                    let error = JSONRPCErrorError {
+                        code: INVALID_REQUEST_ERROR_CODE,
+                        message: format!("error deriving config: {err}"),
+                        data: None,
+                    };
+                    self.outgoing.send_error(request_id, error).await;
+                    return;
+                }
+            };

        match self.conversation_manager.new_conversation(config).await {
            Ok(conversation_id) => {
@@ -1300,7 +1297,7 @@ impl CodexMessageProcessor {
                } = conversation_id;
                let response = NewConversationResponse {
                    conversation_id,
-                    model: session_configured.model_family.slug,
+                    model: session_configured.model,
                    reasoning_effort: session_configured.reasoning_effort,
                    rollout_path: session_configured.rollout_path,
                };
@@ -1328,18 +1325,19 @@ impl CodexMessageProcessor {
            params.developer_instructions,
        );

-        let config = match derive_config_from_params(overrides, params.config).await {
-            Ok(config) => config,
-            Err(err) => {
-                let error = JSONRPCErrorError {
-                    code: INVALID_REQUEST_ERROR_CODE,
-                    message: format!("error deriving config: {err}"),
-                    data: None,
-                };
-                self.outgoing.send_error(request_id, error).await;
-                return;
-            }
-        };
+        let config =
+            match derive_config_from_params(&self.config_api, overrides, params.config).await {
+                Ok(config) => config,
+                Err(err) => {
+                    let error = JSONRPCErrorError {
+                        code: INVALID_REQUEST_ERROR_CODE,
+                        message: format!("error deriving config: {err}"),
+                        data: None,
+                    };
+                    self.outgoing.send_error(request_id, error).await;
+                    return;
+                }
+            };

        match self.conversation_manager.new_conversation(config).await {
            Ok(new_conv) => {
@@ -1374,7 +1372,7 @@ impl CodexMessageProcessor {
                };

                let SessionConfiguredEvent {
-                    model_family,
+                    model,
                    model_provider_id,
                    cwd,
                    approval_policy,
@@ -1383,7 +1381,7 @@ impl CodexMessageProcessor {
                } = session_configured;
                let response = ThreadStartResponse {
                    thread: thread.clone(),
-                    model: model_family.slug,
+                    model,
                    model_provider: model_provider_id,
                    cwd,
                    approval_policy: approval_policy.into(),
@@ -1567,7 +1565,7 @@ impl CodexMessageProcessor {
                base_instructions,
                developer_instructions,
            );
-            match derive_config_from_params(overrides, cli_overrides).await {
+            match derive_config_from_params(&self.config_api, overrides, cli_overrides).await {
                Ok(config) => config,
                Err(err) => {
                    let error = JSONRPCErrorError {
@@ -1717,7 +1715,7 @@ impl CodexMessageProcessor {

                let response = ThreadResumeResponse {
                    thread,
-                    model: session_configured.model_family.slug,
+                    model: session_configured.model,
                    model_provider: session_configured.model_provider_id,
                    cwd: session_configured.cwd,
                    approval_policy: session_configured.approval_policy.into(),
@@ -2228,7 +2226,7 @@ impl CodexMessageProcessor {
                    ..Default::default()
                };

-                derive_config_from_params(overrides, Some(cli_overrides)).await
+                derive_config_from_params(&self.config_api, overrides, Some(cli_overrides)).await
            }
            None => Ok(self.config.as_ref().clone()),
        };
@@ -2330,7 +2328,7 @@ impl CodexMessageProcessor {
                    .send_server_notification(ServerNotification::SessionConfigured(
                        SessionConfiguredNotification {
                            session_id: session_configured.session_id,
-                            model: session_configured.model_family.slug.clone(),
+                            model: session_configured.model.clone(),
                            reasoning_effort: session_configured.reasoning_effort,
                            history_log_id: session_configured.history_log_id,
                            history_entry_count: session_configured.history_entry_count,
@@ -2346,7 +2344,7 @@ impl CodexMessageProcessor {
                // Reply with conversation id + model and initial messages (when present)
                let response = ResumeConversationResponse {
                    conversation_id,
-                    model: session_configured.model_family.slug.clone(),
+                    model: session_configured.model.clone(),
                    initial_messages,
                    rollout_path: session_configured.rollout_path.clone(),
                };
@@ -2579,6 +2577,7 @@ impl CodexMessageProcessor {
        let _ = conversation
            .submit(Op::UserInput {
                items: mapped_items,
+                final_output_json_schema: None,
            })
            .await;

@@ -2598,6 +2597,7 @@ impl CodexMessageProcessor {
            model,
            effort,
            summary,
+            output_schema,
        } = params;

        let Ok(conversation) = self
@@ -2632,7 +2632,7 @@ impl CodexMessageProcessor {
                model,
                effort,
                summary,
-                final_output_json_schema: None,
+                final_output_json_schema: output_schema,
            })
            .await;

@@ -2741,6 +2741,7 @@ impl CodexMessageProcessor {
        let turn_id = conversation
            .submit(Op::UserInput {
                items: mapped_items,
+                final_output_json_schema: params.output_schema,
            })
            .await;

@@ -3341,16 +3342,13 @@ fn errors_to_info(
 }

 async fn derive_config_from_params(
+    config_api: &ConfigApi,
    overrides: ConfigOverrides,
    cli_overrides: Option<HashMap<String, serde_json::Value>>,
 ) -> std::io::Result<Config> {
-    let cli_overrides = cli_overrides
-        .unwrap_or_default()
-        .into_iter()
-        .map(|(k, v)| (k, json_to_toml(v)))
-        .collect();
-
-    Config::load_with_cli_overrides_and_harness_overrides(cli_overrides, overrides).await
+    config_api
+        .load_thread_agnostic_config(overrides, cli_overrides)
+        .await
 }

 async fn read_summary_from_rollout(
--- a/codex-rs/app-server/src/config_api.rs
+++ b/codex-rs/app-server/src/config_api.rs
@@ -7,21 +7,28 @@ use codex_app_server_protocol::ConfigValueWriteParams;
 use codex_app_server_protocol::ConfigWriteErrorCode;
 use codex_app_server_protocol::ConfigWriteResponse;
 use codex_app_server_protocol::JSONRPCErrorError;
+use codex_core::config::Config;
+use codex_core::config::ConfigBuilder;
 use codex_core::config::ConfigService;
 use codex_core::config::ConfigServiceError;
+use codex_utils_json_to_toml::json_to_toml;
 use serde_json::json;
 use std::path::PathBuf;
 use toml::Value as TomlValue;

 #[derive(Clone)]
 pub(crate) struct ConfigApi {
+    codex_home: PathBuf,
+    cli_overrides: Vec<(String, TomlValue)>,
    service: ConfigService,
 }

 impl ConfigApi {
    pub(crate) fn new(codex_home: PathBuf, cli_overrides: Vec<(String, TomlValue)>) -> Self {
        Self {
-            service: ConfigService::new(codex_home, cli_overrides),
+            service: ConfigService::new(codex_home.clone(), cli_overrides.clone()),
+            codex_home,
+            cli_overrides,
        }
    }

@@ -32,6 +39,30 @@ impl ConfigApi {
        self.service.read(params).await.map_err(map_error)
    }

+    pub(crate) async fn load_thread_agnostic_config(
+        &self,
+        overrides: codex_core::config::ConfigOverrides,
+        request_cli_overrides: Option<std::collections::HashMap<String, serde_json::Value>>,
+    ) -> std::io::Result<Config> {
+        // Apply the app server's startup `--config` overrides, then apply request-scoped overrides
+        // with higher precedence.
+        let mut merged_cli_overrides = self.cli_overrides.clone();
+        merged_cli_overrides.extend(
+            request_cli_overrides
+                .unwrap_or_default()
+                .into_iter()
+                .map(|(k, v)| (k, json_to_toml(v))),
+        );
+
+        ConfigBuilder::default()
+            .codex_home(self.codex_home.clone())
+            .cli_overrides(merged_cli_overrides)
+            .harness_overrides(overrides)
+            .thread_agnostic()
+            .build()
+            .await
+    }
+
    pub(crate) async fn write_value(
        &self,
        params: ConfigValueWriteParams,
@@ -45,6 +76,18 @@ impl ConfigApi {
    ) -> Result<ConfigWriteResponse, JSONRPCErrorError> {
        self.service.batch_write(params).await.map_err(map_error)
    }
+
+    pub(crate) async fn load_latest_thread_agnostic_config(
+        &self,
+    ) -> Result<Config, JSONRPCErrorError> {
+        self.load_thread_agnostic_config(codex_core::config::ConfigOverrides::default(), None)
+            .await
+            .map_err(|err| JSONRPCErrorError {
+                code: INTERNAL_ERROR_CODE,
+                message: format!("failed to reload config: {err}"),
+                data: None,
+            })
+    }
 }

 fn map_error(err: ConfigServiceError) -> JSONRPCErrorError {
--- a/codex-rs/app-server/src/lib.rs
+++ b/codex-rs/app-server/src/lib.rs
@@ -17,13 +17,11 @@ use tokio::io::BufReader;
 use tokio::io::{self};
 use tokio::sync::mpsc;
 use toml::Value as TomlValue;
-use tracing::Level;
 use tracing::debug;
 use tracing::error;
 use tracing::info;
 use tracing_subscriber::EnvFilter;
 use tracing_subscriber::Layer;
-use tracing_subscriber::filter::Targets;
 use tracing_subscriber::layer::SubscriberExt;
 use tracing_subscriber::util::SubscriberInitExt;

@@ -103,11 +101,8 @@ pub async fn run_main(
        .with_span_events(tracing_subscriber::fmt::format::FmtSpan::FULL)
        .with_filter(EnvFilter::from_default_env());

-    let feedback_layer = tracing_subscriber::fmt::layer()
-        .with_writer(feedback.make_writer())
-        .with_ansi(false)
-        .with_target(false)
-        .with_filter(Targets::new().with_default(Level::TRACE));
+    let feedback_layer = feedback.logger_layer();
+    let feedback_metadata_layer = feedback.metadata_layer();

    let otel_logger_layer = otel.as_ref().and_then(|o| o.logger_layer());

@@ -116,6 +111,7 @@ pub async fn run_main(
    let _ = tracing_subscriber::registry()
        .with(stderr_fmt)
        .with(feedback_layer)
+        .with(feedback_metadata_layer)
        .with(otel_logger_layer)
        .with(otel_tracing_layer)
        .try_init();
--- a/codex-rs/app-server/tests/common/Cargo.toml
+++ b/codex-rs/app-server/tests/common/Cargo.toml
@@ -9,12 +9,12 @@ path = "lib.rs"

 [dependencies]
 anyhow = { workspace = true }
-assert_cmd = { workspace = true }
 base64 = { workspace = true }
 chrono = { workspace = true }
 codex-app-server-protocol = { workspace = true }
 codex-core = { workspace = true, features = ["test-support"] }
 codex-protocol = { workspace = true }
+codex-utils-cargo-bin = { workspace = true }
 serde = { workspace = true }
 serde_json = { workspace = true }
 tokio = { workspace = true, features = [
--- a/codex-rs/app-server/tests/common/mcp_process.rs
+++ b/codex-rs/app-server/tests/common/mcp_process.rs
@@ -11,7 +11,6 @@ use tokio::process::ChildStdin;
 use tokio::process::ChildStdout;

 use anyhow::Context;
-use assert_cmd::prelude::*;
 use codex_app_server_protocol::AddConversationListenerParams;
 use codex_app_server_protocol::ArchiveConversationParams;
 use codex_app_server_protocol::CancelLoginAccountParams;
@@ -49,7 +48,6 @@ use codex_app_server_protocol::ThreadResumeParams;
 use codex_app_server_protocol::ThreadStartParams;
 use codex_app_server_protocol::TurnInterruptParams;
 use codex_app_server_protocol::TurnStartParams;
-use std::process::Command as StdCommand;
 use tokio::process::Command;

 pub struct McpProcess {
@@ -78,12 +76,8 @@ impl McpProcess {
        codex_home: &Path,
        env_overrides: &[(&str, Option<&str>)],
    ) -> anyhow::Result<Self> {
-        // Use assert_cmd to locate the binary path and then switch to tokio::process::Command
-        let std_cmd = StdCommand::cargo_bin("codex-app-server")
-            .context("should find binary for codex-mcp-server")?;
-
-        let program = std_cmd.get_program().to_owned();
-
+        let program = codex_utils_cargo_bin::cargo_bin("codex-app-server")
+            .context("should find binary for codex-app-server")?;
        let mut cmd = Command::new(program);

        cmd.stdin(Stdio::piped());
--- a/codex-rs/app-server/tests/common/models_cache.rs
+++ b/codex-rs/app-server/tests/common/models_cache.rs
@@ -5,7 +5,6 @@ use codex_protocol::openai_models::ConfigShellToolType;
 use codex_protocol::openai_models::ModelInfo;
 use codex_protocol::openai_models::ModelPreset;
 use codex_protocol::openai_models::ModelVisibility;
-use codex_protocol::openai_models::ReasoningSummaryFormat;
 use codex_protocol::openai_models::TruncationPolicyConfig;
 use serde_json::json;
 use std::path::Path;
@@ -35,7 +34,6 @@ fn preset_to_info(preset: &ModelPreset, priority: i32) -> ModelInfo {
        truncation_policy: TruncationPolicyConfig::bytes(10_000),
        supports_parallel_tool_calls: false,
        context_window: None,
-        reasoning_summary_format: ReasoningSummaryFormat::None,
        experimental_supported_tools: Vec::new(),
    }
 }
--- a/codex-rs/app-server/tests/suite/codex_message_processor_flow.rs
+++ b/codex-rs/app-server/tests/suite/codex_message_processor_flow.rs
@@ -305,6 +305,7 @@ async fn test_send_user_turn_changes_approval_policy_behavior() -> Result<()> {
            model: "mock-model".to_string(),
            effort: Some(ReasoningEffort::Medium),
            summary: ReasoningSummary::Auto,
+            output_schema: None,
        })
        .await?;
    // Acknowledge sendUserTurn
@@ -418,6 +419,7 @@ async fn test_send_user_turn_updates_sandbox_and_cwd_between_turns() -> Result<(
            model: model.clone(),
            effort: Some(ReasoningEffort::Medium),
            summary: ReasoningSummary::Auto,
+            output_schema: None,
        })
        .await?;
    timeout(
@@ -443,6 +445,7 @@ async fn test_send_user_turn_updates_sandbox_and_cwd_between_turns() -> Result<(
            model: model.clone(),
            effort: Some(ReasoningEffort::Medium),
            summary: ReasoningSummary::Auto,
+            output_schema: None,
        })
        .await?;
    timeout(
--- a/codex-rs/app-server/tests/suite/mod.rs
+++ b/codex-rs/app-server/tests/suite/mod.rs
@@ -7,6 +7,7 @@ mod fuzzy_file_search;
 mod interrupt;
 mod list_resume;
 mod login;
+mod output_schema;
 mod send_message;
 mod set_default_model;
 mod user_agent;
--- a/codex-rs/app-server/tests/suite/output_schema.rs
+++ b/codex-rs/app-server/tests/suite/output_schema.rs
@@ -0,0 +1,282 @@
+use anyhow::Result;
+use app_test_support::McpProcess;
+use app_test_support::to_response;
+use codex_app_server_protocol::AddConversationListenerParams;
+use codex_app_server_protocol::InputItem;
+use codex_app_server_protocol::JSONRPCResponse;
+use codex_app_server_protocol::NewConversationParams;
+use codex_app_server_protocol::NewConversationResponse;
+use codex_app_server_protocol::RequestId;
+use codex_app_server_protocol::SendUserTurnParams;
+use codex_app_server_protocol::SendUserTurnResponse;
+use codex_core::protocol::AskForApproval;
+use codex_core::protocol::SandboxPolicy;
+use codex_protocol::config_types::ReasoningSummary;
+use codex_protocol::openai_models::ReasoningEffort;
+use core_test_support::responses;
+use core_test_support::skip_if_no_network;
+use pretty_assertions::assert_eq;
+use std::path::Path;
+use tempfile::TempDir;
+use tokio::time::timeout;
+
+const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(10);
+
+#[tokio::test]
+async fn send_user_turn_accepts_output_schema_v1() -> Result<()> {
+    skip_if_no_network!(Ok(()));
+
+    let server = responses::start_mock_server().await;
+    let body = responses::sse(vec![
+        responses::ev_response_created("resp-1"),
+        responses::ev_assistant_message("msg-1", "Done"),
+        responses::ev_completed("resp-1"),
+    ]);
+    let response_mock = responses::mount_sse_once(&server, body).await;
+
+    let codex_home = TempDir::new()?;
+    create_config_toml(codex_home.path(), &server.uri())?;
+
+    let mut mcp = McpProcess::new(codex_home.path()).await?;
+    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;
+
+    let new_conv_id = mcp
+        .send_new_conversation_request(NewConversationParams {
+            ..Default::default()
+        })
+        .await?;
+    let new_conv_resp: JSONRPCResponse = timeout(
+        DEFAULT_READ_TIMEOUT,
+        mcp.read_stream_until_response_message(RequestId::Integer(new_conv_id)),
+    )
+    .await??;
+    let NewConversationResponse {
+        conversation_id, ..
+    } = to_response::<NewConversationResponse>(new_conv_resp)?;
+
+    let listener_id = mcp
+        .send_add_conversation_listener_request(AddConversationListenerParams {
+            conversation_id,
+            experimental_raw_events: false,
+        })
+        .await?;
+    timeout(
+        DEFAULT_READ_TIMEOUT,
+        mcp.read_stream_until_response_message(RequestId::Integer(listener_id)),
+    )
+    .await??;
+
+    let output_schema = serde_json::json!({
+        "type": "object",
+        "properties": {
+            "answer": { "type": "string" }
+        },
+        "required": ["answer"],
+        "additionalProperties": false
+    });
+
+    let send_turn_id = mcp
+        .send_send_user_turn_request(SendUserTurnParams {
+            conversation_id,
+            items: vec![InputItem::Text {
+                text: "Hello".to_string(),
+            }],
+            cwd: codex_home.path().to_path_buf(),
+            approval_policy: AskForApproval::Never,
+            sandbox_policy: SandboxPolicy::new_read_only_policy(),
+            model: "mock-model".to_string(),
+            effort: Some(ReasoningEffort::Medium),
+            summary: ReasoningSummary::Auto,
+            output_schema: Some(output_schema.clone()),
+        })
+        .await?;
+    let _send_turn_resp: SendUserTurnResponse = to_response::<SendUserTurnResponse>(
+        timeout(
+            DEFAULT_READ_TIMEOUT,
+            mcp.read_stream_until_response_message(RequestId::Integer(send_turn_id)),
+        )
+        .await??,
+    )?;
+
+    timeout(
+        DEFAULT_READ_TIMEOUT,
+        mcp.read_stream_until_notification_message("codex/event/task_complete"),
+    )
+    .await??;
+
+    let request = response_mock.single_request();
+    let payload = request.body_json();
+    let text = payload.get("text").expect("request missing text field");
+    let format = text
+        .get("format")
+        .expect("request missing text.format field");
+    assert_eq!(
+        format,
+        &serde_json::json!({
+            "name": "codex_output_schema",
+            "type": "json_schema",
+            "strict": true,
+            "schema": output_schema,
+        })
+    );
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn send_user_turn_output_schema_is_per_turn_v1() -> Result<()> {
+    skip_if_no_network!(Ok(()));
+
+    let server = responses::start_mock_server().await;
+    let body1 = responses::sse(vec![
+        responses::ev_response_created("resp-1"),
+        responses::ev_assistant_message("msg-1", "Done"),
+        responses::ev_completed("resp-1"),
+    ]);
+    let response_mock1 = responses::mount_sse_once(&server, body1).await;
+
+    let codex_home = TempDir::new()?;
+    create_config_toml(codex_home.path(), &server.uri())?;
+
+    let mut mcp = McpProcess::new(codex_home.path()).await?;
+    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;
+
+    let new_conv_id = mcp
+        .send_new_conversation_request(NewConversationParams {
+            ..Default::default()
+        })
+        .await?;
+    let new_conv_resp: JSONRPCResponse = timeout(
+        DEFAULT_READ_TIMEOUT,
+        mcp.read_stream_until_response_message(RequestId::Integer(new_conv_id)),
+    )
+    .await??;
+    let NewConversationResponse {
+        conversation_id, ..
+    } = to_response::<NewConversationResponse>(new_conv_resp)?;
+
+    let listener_id = mcp
+        .send_add_conversation_listener_request(AddConversationListenerParams {
+            conversation_id,
+            experimental_raw_events: false,
+        })
+        .await?;
+    timeout(
+        DEFAULT_READ_TIMEOUT,
+        mcp.read_stream_until_response_message(RequestId::Integer(listener_id)),
+    )
+    .await??;
+
+    let output_schema = serde_json::json!({
+        "type": "object",
+        "properties": {
+            "answer": { "type": "string" }
+        },
+        "required": ["answer"],
+        "additionalProperties": false
+    });
+
+    let send_turn_id = mcp
+        .send_send_user_turn_request(SendUserTurnParams {
+            conversation_id,
+            items: vec![InputItem::Text {
+                text: "Hello".to_string(),
+            }],
+            cwd: codex_home.path().to_path_buf(),
+            approval_policy: AskForApproval::Never,
+            sandbox_policy: SandboxPolicy::new_read_only_policy(),
+            model: "mock-model".to_string(),
+            effort: Some(ReasoningEffort::Medium),
+            summary: ReasoningSummary::Auto,
+            output_schema: Some(output_schema.clone()),
+        })
+        .await?;
+    let _send_turn_resp: SendUserTurnResponse = to_response::<SendUserTurnResponse>(
+        timeout(
+            DEFAULT_READ_TIMEOUT,
+            mcp.read_stream_until_response_message(RequestId::Integer(send_turn_id)),
+        )
+        .await??,
+    )?;
+
+    timeout(
+        DEFAULT_READ_TIMEOUT,
+        mcp.read_stream_until_notification_message("codex/event/task_complete"),
+    )
+    .await??;
+
+    let payload1 = response_mock1.single_request().body_json();
+    assert_eq!(
+        payload1.pointer("/text/format"),
+        Some(&serde_json::json!({
+            "name": "codex_output_schema",
+            "type": "json_schema",
+            "strict": true,
+            "schema": output_schema,
+        }))
+    );
+
+    let body2 = responses::sse(vec![
+        responses::ev_response_created("resp-2"),
+        responses::ev_assistant_message("msg-2", "Done"),
+        responses::ev_completed("resp-2"),
+    ]);
+    let response_mock2 = responses::mount_sse_once(&server, body2).await;
+
+    let send_turn_id_2 = mcp
+        .send_send_user_turn_request(SendUserTurnParams {
+            conversation_id,
+            items: vec![InputItem::Text {
+                text: "Hello again".to_string(),
+            }],
+            cwd: codex_home.path().to_path_buf(),
+            approval_policy: AskForApproval::Never,
+            sandbox_policy: SandboxPolicy::new_read_only_policy(),
+            model: "mock-model".to_string(),
+            effort: Some(ReasoningEffort::Medium),
+            summary: ReasoningSummary::Auto,
+            output_schema: None,
+        })
+        .await?;
+    let _send_turn_resp_2: SendUserTurnResponse = to_response::<SendUserTurnResponse>(
+        timeout(
+            DEFAULT_READ_TIMEOUT,
+            mcp.read_stream_until_response_message(RequestId::Integer(send_turn_id_2)),
+        )
+        .await??,
+    )?;
+
+    timeout(
+        DEFAULT_READ_TIMEOUT,
+        mcp.read_stream_until_notification_message("codex/event/task_complete"),
+    )
+    .await??;
+
+    let payload2 = response_mock2.single_request().body_json();
+    assert_eq!(payload2.pointer("/text/format"), None);
+
+    Ok(())
+}
+
+fn create_config_toml(codex_home: &Path, server_uri: &str) -> std::io::Result<()> {
+    let config_toml = codex_home.join("config.toml");
+    std::fs::write(
+        config_toml,
+        format!(
+            r#"
+model = "mock-model"
+approval_policy = "never"
+sandbox_mode = "read-only"
+
+model_provider = "mock_provider"
+
+[model_providers.mock_provider]
+name = "Mock provider for test"
+base_url = "{server_uri}/v1"
+wire_api = "responses"
+request_max_retries = 0
+stream_max_retries = 0
+"#
+        ),
+    )
+}
--- a/codex-rs/app-server/tests/suite/v2/mod.rs
+++ b/codex-rs/app-server/tests/suite/v2/mod.rs
@@ -1,6 +1,7 @@
 mod account;
 mod config_rpc;
 mod model_list;
+mod output_schema;
 mod rate_limits;
 mod review;
 mod thread_archive;
--- a/codex-rs/app-server/tests/suite/v2/output_schema.rs
+++ b/codex-rs/app-server/tests/suite/v2/output_schema.rs
@@ -0,0 +1,231 @@
+use anyhow::Result;
+use app_test_support::McpProcess;
+use app_test_support::to_response;
+use codex_app_server_protocol::JSONRPCResponse;
+use codex_app_server_protocol::RequestId;
+use codex_app_server_protocol::ThreadStartParams;
+use codex_app_server_protocol::ThreadStartResponse;
+use codex_app_server_protocol::TurnStartParams;
+use codex_app_server_protocol::TurnStartResponse;
+use codex_app_server_protocol::UserInput as V2UserInput;
+use core_test_support::responses;
+use core_test_support::skip_if_no_network;
+use pretty_assertions::assert_eq;
+use std::path::Path;
+use tempfile::TempDir;
+use tokio::time::timeout;
+
+const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(10);
+
+#[tokio::test]
+async fn turn_start_accepts_output_schema_v2() -> Result<()> {
+    skip_if_no_network!(Ok(()));
+
+    let server = responses::start_mock_server().await;
+    let body = responses::sse(vec![
+        responses::ev_response_created("resp-1"),
+        responses::ev_assistant_message("msg-1", "Done"),
+        responses::ev_completed("resp-1"),
+    ]);
+    let response_mock = responses::mount_sse_once(&server, body).await;
+
+    let codex_home = TempDir::new()?;
+    create_config_toml(codex_home.path(), &server.uri())?;
+
+    let mut mcp = McpProcess::new(codex_home.path()).await?;
+    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;
+
+    let thread_req = mcp
+        .send_thread_start_request(ThreadStartParams {
+            ..Default::default()
+        })
+        .await?;
+    let thread_resp: JSONRPCResponse = timeout(
+        DEFAULT_READ_TIMEOUT,
+        mcp.read_stream_until_response_message(RequestId::Integer(thread_req)),
+    )
+    .await??;
+    let ThreadStartResponse { thread, .. } = to_response::<ThreadStartResponse>(thread_resp)?;
+
+    let output_schema = serde_json::json!({
+        "type": "object",
+        "properties": {
+            "answer": { "type": "string" }
+        },
+        "required": ["answer"],
+        "additionalProperties": false
+    });
+
+    let turn_req = mcp
+        .send_turn_start_request(TurnStartParams {
+            thread_id: thread.id.clone(),
+            input: vec![V2UserInput::Text {
+                text: "Hello".to_string(),
+            }],
+            output_schema: Some(output_schema.clone()),
+            ..Default::default()
+        })
+        .await?;
+    let turn_resp: JSONRPCResponse = timeout(
+        DEFAULT_READ_TIMEOUT,
+        mcp.read_stream_until_response_message(RequestId::Integer(turn_req)),
+    )
+    .await??;
+    let _turn: TurnStartResponse = to_response::<TurnStartResponse>(turn_resp)?;
+
+    timeout(
+        DEFAULT_READ_TIMEOUT,
+        mcp.read_stream_until_notification_message("turn/completed"),
+    )
+    .await??;
+
+    let request = response_mock.single_request();
+    let payload = request.body_json();
+    let text = payload.get("text").expect("request missing text field");
+    let format = text
+        .get("format")
+        .expect("request missing text.format field");
+    assert_eq!(
+        format,
+        &serde_json::json!({
+            "name": "codex_output_schema",
+            "type": "json_schema",
+            "strict": true,
+            "schema": output_schema,
+        })
+    );
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn turn_start_output_schema_is_per_turn_v2() -> Result<()> {
+    skip_if_no_network!(Ok(()));
+
+    let server = responses::start_mock_server().await;
+    let body1 = responses::sse(vec![
+        responses::ev_response_created("resp-1"),
+        responses::ev_assistant_message("msg-1", "Done"),
+        responses::ev_completed("resp-1"),
+    ]);
+    let response_mock1 = responses::mount_sse_once(&server, body1).await;
+
+    let codex_home = TempDir::new()?;
+    create_config_toml(codex_home.path(), &server.uri())?;
+
+    let mut mcp = McpProcess::new(codex_home.path()).await?;
+    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;
+
+    let thread_req = mcp
+        .send_thread_start_request(ThreadStartParams {
+            ..Default::default()
+        })
+        .await?;
+    let thread_resp: JSONRPCResponse = timeout(
+        DEFAULT_READ_TIMEOUT,
+        mcp.read_stream_until_response_message(RequestId::Integer(thread_req)),
+    )
+    .await??;
+    let ThreadStartResponse { thread, .. } = to_response::<ThreadStartResponse>(thread_resp)?;
+
+    let output_schema = serde_json::json!({
+        "type": "object",
+        "properties": {
+            "answer": { "type": "string" }
+        },
+        "required": ["answer"],
+        "additionalProperties": false
+    });
+
+    let turn_req_1 = mcp
+        .send_turn_start_request(TurnStartParams {
+            thread_id: thread.id.clone(),
+            input: vec![V2UserInput::Text {
+                text: "Hello".to_string(),
+            }],
+            output_schema: Some(output_schema.clone()),
+            ..Default::default()
+        })
+        .await?;
+    let turn_resp_1: JSONRPCResponse = timeout(
+        DEFAULT_READ_TIMEOUT,
+        mcp.read_stream_until_response_message(RequestId::Integer(turn_req_1)),
+    )
+    .await??;
+    let _turn: TurnStartResponse = to_response::<TurnStartResponse>(turn_resp_1)?;
+
+    timeout(
+        DEFAULT_READ_TIMEOUT,
+        mcp.read_stream_until_notification_message("turn/completed"),
+    )
+    .await??;
+
+    let payload1 = response_mock1.single_request().body_json();
+    assert_eq!(
+        payload1.pointer("/text/format"),
+        Some(&serde_json::json!({
+            "name": "codex_output_schema",
+            "type": "json_schema",
+            "strict": true,
+            "schema": output_schema,
+        }))
+    );
+
+    let body2 = responses::sse(vec![
+        responses::ev_response_created("resp-2"),
+        responses::ev_assistant_message("msg-2", "Done"),
+        responses::ev_completed("resp-2"),
+    ]);
+    let response_mock2 = responses::mount_sse_once(&server, body2).await;
+
+    let turn_req_2 = mcp
+        .send_turn_start_request(TurnStartParams {
+            thread_id: thread.id.clone(),
+            input: vec![V2UserInput::Text {
+                text: "Hello again".to_string(),
+            }],
+            output_schema: None,
+            ..Default::default()
+        })
+        .await?;
+    let turn_resp_2: JSONRPCResponse = timeout(
+        DEFAULT_READ_TIMEOUT,
+        mcp.read_stream_until_response_message(RequestId::Integer(turn_req_2)),
+    )
+    .await??;
+    let _turn: TurnStartResponse = to_response::<TurnStartResponse>(turn_resp_2)?;
+
+    timeout(
+        DEFAULT_READ_TIMEOUT,
+        mcp.read_stream_until_notification_message("turn/completed"),
+    )
+    .await??;
+
+    let payload2 = response_mock2.single_request().body_json();
+    assert_eq!(payload2.pointer("/text/format"), None);
+
+    Ok(())
+}
+
+fn create_config_toml(codex_home: &Path, server_uri: &str) -> std::io::Result<()> {
+    let config_toml = codex_home.join("config.toml");
+    std::fs::write(
+        config_toml,
+        format!(
+            r#"
+model = "mock-model"
+approval_policy = "never"
+sandbox_mode = "read-only"
+
+model_provider = "mock_provider"
+
+[model_providers.mock_provider]
+name = "Mock provider for test"
+base_url = "{server_uri}/v1"
+wire_api = "responses"
+request_max_retries = 0
+stream_max_retries = 0
+"#
+        ),
+    )
+}
--- a/codex-rs/app-server/tests/suite/v2/turn_start.rs
+++ b/codex-rs/app-server/tests/suite/v2/turn_start.rs
@@ -540,6 +540,7 @@ async fn turn_start_updates_sandbox_and_cwd_between_turns_v2() -> Result<()> {
            model: Some("mock-model".to_string()),
            effort: Some(ReasoningEffort::Medium),
            summary: Some(ReasoningSummary::Auto),
+            output_schema: None,
        })
        .await?;
    timeout(
@@ -566,6 +567,7 @@ async fn turn_start_updates_sandbox_and_cwd_between_turns_v2() -> Result<()> {
            model: Some("mock-model".to_string()),
            effort: Some(ReasoningEffort::Medium),
            summary: Some(ReasoningSummary::Auto),
+            output_schema: None,
        })
        .await?;
    timeout(
--- a/codex-rs/apply-patch/Cargo.toml
+++ b/codex-rs/apply-patch/Cargo.toml
@@ -25,5 +25,6 @@ tree-sitter-bash = { workspace = true }
 [dev-dependencies]
 assert_cmd = { workspace = true }
 assert_matches = { workspace = true }
+codex-utils-cargo-bin = { workspace = true }
 pretty_assertions = { workspace = true }
 tempfile = { workspace = true }
--- a/codex-rs/apply-patch/tests/suite/cli.rs
+++ b/codex-rs/apply-patch/tests/suite/cli.rs
@@ -1,8 +1,13 @@
-use assert_cmd::prelude::*;
+use assert_cmd::Command;
 use std::fs;
-use std::process::Command;
 use tempfile::tempdir;

+fn apply_patch_command() -> anyhow::Result<Command> {
+    Ok(Command::new(codex_utils_cargo_bin::cargo_bin(
+        "apply_patch",
+    )?))
+}
+
 #[test]
 fn test_apply_patch_cli_add_and_update() -> anyhow::Result<()> {
    let tmp = tempdir()?;
@@ -16,8 +21,7 @@ fn test_apply_patch_cli_add_and_update() -> anyhow::Result<()> {
 +hello
 *** End Patch"#
    );
-    Command::cargo_bin("apply_patch")
-        .expect("should find apply_patch binary")
+    apply_patch_command()?
        .arg(add_patch)
        .current_dir(tmp.path())
        .assert()
@@ -34,8 +38,7 @@ fn test_apply_patch_cli_add_and_update() -> anyhow::Result<()> {
 +world
 *** End Patch"#
    );
-    Command::cargo_bin("apply_patch")
-        .expect("should find apply_patch binary")
+    apply_patch_command()?
        .arg(update_patch)
        .current_dir(tmp.path())
        .assert()
@@ -59,10 +62,9 @@ fn test_apply_patch_cli_stdin_add_and_update() -> anyhow::Result<()> {
 +hello
 *** End Patch"#
    );
-    let mut cmd =
-        assert_cmd::Command::cargo_bin("apply_patch").expect("should find apply_patch binary");
-    cmd.current_dir(tmp.path());
-    cmd.write_stdin(add_patch)
+    apply_patch_command()?
+        .current_dir(tmp.path())
+        .write_stdin(add_patch)
        .assert()
        .success()
        .stdout(format!("Success. Updated the following files:\nA {file}\n"));
@@ -77,10 +79,9 @@ fn test_apply_patch_cli_stdin_add_and_update() -> anyhow::Result<()> {
 +world
 *** End Patch"#
    );
-    let mut cmd =
-        assert_cmd::Command::cargo_bin("apply_patch").expect("should find apply_patch binary");
-    cmd.current_dir(tmp.path());
-    cmd.write_stdin(update_patch)
+    apply_patch_command()?
+        .current_dir(tmp.path())
+        .write_stdin(update_patch)
        .assert()
        .success()
        .stdout(format!("Success. Updated the following files:\nM {file}\n"));
--- a/codex-rs/apply-patch/tests/suite/scenarios.rs
+++ b/codex-rs/apply-patch/tests/suite/scenarios.rs
@@ -1,4 +1,3 @@
-use assert_cmd::prelude::*;
 use pretty_assertions::assert_eq;
 use std::collections::BTreeMap;
 use std::fs;
@@ -9,7 +8,8 @@ use tempfile::tempdir;

 #[test]
 fn test_apply_patch_scenarios() -> anyhow::Result<()> {
-    for scenario in fs::read_dir("tests/fixtures/scenarios")? {
+    let scenarios_dir = Path::new(env!("CARGO_MANIFEST_DIR")).join("tests/fixtures/scenarios");
+    for scenario in fs::read_dir(scenarios_dir)? {
        let scenario = scenario?;
        let path = scenario.path();
        if path.is_dir() {
@@ -36,7 +36,7 @@ fn run_apply_patch_scenario(dir: &Path) -> anyhow::Result<()> {
    // Run apply_patch in the temporary directory. We intentionally do not assert
    // on the exit status here; the scenarios are specified purely in terms of
    // final filesystem state, which we compare below.
-    Command::cargo_bin("apply_patch")?
+    Command::new(codex_utils_cargo_bin::cargo_bin("apply_patch")?)
        .arg(patch)
        .current_dir(tmp.path())
        .output()?;
@@ -82,11 +82,15 @@ fn snapshot_dir_recursive(
            continue;
        };
        let rel = stripped.to_path_buf();
-        let file_type = entry.file_type()?;
-        if file_type.is_dir() {
+
+        // Under Buck2, files in `__srcs` are often materialized as symlinks.
+        // Use `metadata()` (follows symlinks) so our fixture snapshots work
+        // under both Cargo and Buck2.
+        let metadata = fs::metadata(&path)?;
+        if metadata.is_dir() {
            entries.insert(rel.clone(), Entry::Dir);
            snapshot_dir_recursive(base, &path, entries)?;
-        } else if file_type.is_file() {
+        } else if metadata.is_file() {
            let contents = fs::read(&path)?;
            entries.insert(rel, Entry::File(contents));
        }
@@ -98,12 +102,14 @@ fn copy_dir_recursive(src: &Path, dst: &Path) -> anyhow::Result<()> {
    for entry in fs::read_dir(src)? {
        let entry = entry?;
        let path = entry.path();
-        let file_type = entry.file_type()?;
        let dest_path = dst.join(entry.file_name());
-        if file_type.is_dir() {
+
+        // See note in `snapshot_dir_recursive` about Buck2 symlink trees.
+        let metadata = fs::metadata(&path)?;
+        if metadata.is_dir() {
            fs::create_dir_all(&dest_path)?;
            copy_dir_recursive(&path, &dest_path)?;
-        } else if file_type.is_file() {
+        } else if metadata.is_file() {
            if let Some(parent) = dest_path.parent() {
                fs::create_dir_all(parent)?;
            }
--- a/codex-rs/apply-patch/tests/suite/tool.rs
+++ b/codex-rs/apply-patch/tests/suite/tool.rs
@@ -5,13 +5,13 @@ use std::path::Path;
 use tempfile::tempdir;

 fn run_apply_patch_in_dir(dir: &Path, patch: &str) -> anyhow::Result<assert_cmd::assert::Assert> {
-    let mut cmd = Command::cargo_bin("apply_patch")?;
+    let mut cmd = Command::new(codex_utils_cargo_bin::cargo_bin("apply_patch")?);
    cmd.current_dir(dir);
    Ok(cmd.arg(patch).assert())
 }

 fn apply_patch_command(dir: &Path) -> anyhow::Result<Command> {
-    let mut cmd = Command::cargo_bin("apply_patch")?;
+    let mut cmd = Command::new(codex_utils_cargo_bin::cargo_bin("apply_patch")?);
    cmd.current_dir(dir);
    Ok(cmd)
 }
--- a/codex-rs/cli/Cargo.toml
+++ b/codex-rs/cli/Cargo.toml
@@ -60,6 +60,7 @@ codex_windows_sandbox = { package = "codex-windows-sandbox", path = "../windows-
 [dev-dependencies]
 assert_cmd = { workspace = true }
 assert_matches = { workspace = true }
+codex-utils-cargo-bin = { workspace = true }
 predicates = { workspace = true }
 pretty_assertions = { workspace = true }
 tempfile = { workspace = true }
--- a/codex-rs/cli/tests/execpolicy.rs
+++ b/codex-rs/cli/tests/execpolicy.rs
@@ -24,7 +24,7 @@ prefix_rule(
 "#,
    )?;

-    let output = Command::cargo_bin("codex")?
+    let output = Command::new(codex_utils_cargo_bin::cargo_bin("codex")?)
        .env("CODEX_HOME", codex_home.path())
        .args([
            "execpolicy",
--- a/codex-rs/cli/tests/mcp_add_remove.rs
+++ b/codex-rs/cli/tests/mcp_add_remove.rs
@@ -8,7 +8,7 @@ use pretty_assertions::assert_eq;
 use tempfile::TempDir;

 fn codex_command(codex_home: &Path) -> Result<assert_cmd::Command> {
-    let mut cmd = assert_cmd::Command::cargo_bin("codex")?;
+    let mut cmd = assert_cmd::Command::new(codex_utils_cargo_bin::cargo_bin("codex")?);
    cmd.env("CODEX_HOME", codex_home);
    Ok(cmd)
 }
--- a/codex-rs/cli/tests/mcp_list.rs
+++ b/codex-rs/cli/tests/mcp_list.rs
@@ -12,7 +12,7 @@ use serde_json::json;
 use tempfile::TempDir;

 fn codex_command(codex_home: &Path) -> Result<assert_cmd::Command> {
-    let mut cmd = assert_cmd::Command::cargo_bin("codex")?;
+    let mut cmd = assert_cmd::Command::new(codex_utils_cargo_bin::cargo_bin("codex")?);
    cmd.env("CODEX_HOME", codex_home);
    Ok(cmd)
 }
--- a/codex-rs/codex-api/src/common.rs
+++ b/codex-rs/codex-api/src/common.rs
@@ -59,6 +59,7 @@ pub enum ResponseEvent {
        summary_index: i64,
    },
    RateLimits(RateLimitSnapshot),
+    ModelsEtag(String),
 }

 #[derive(Debug, Serialize, Clone)]
--- a/codex-rs/codex-api/src/endpoint/chat.rs
+++ b/codex-rs/codex-api/src/endpoint/chat.rs
@@ -152,6 +152,9 @@ impl Stream for AggregatedStream {
                Poll::Ready(Some(Ok(ResponseEvent::RateLimits(snapshot)))) => {
                    return Poll::Ready(Some(Ok(ResponseEvent::RateLimits(snapshot))));
                }
+                Poll::Ready(Some(Ok(ResponseEvent::ModelsEtag(etag)))) => {
+                    return Poll::Ready(Some(Ok(ResponseEvent::ModelsEtag(etag))));
+                }
                Poll::Ready(Some(Ok(ResponseEvent::Completed {
                    response_id,
                    token_usage,
--- a/codex-rs/codex-api/src/endpoint/models.rs
+++ b/codex-rs/codex-api/src/endpoint/models.rs
@@ -5,6 +5,7 @@ use crate::provider::Provider;
 use crate::telemetry::run_with_request_telemetry;
 use codex_client::HttpTransport;
 use codex_client::RequestTelemetry;
+use codex_protocol::openai_models::ModelInfo;
 use codex_protocol::openai_models::ModelsResponse;
 use http::HeaderMap;
 use http::Method;
@@ -41,7 +42,7 @@ impl<T: HttpTransport, A: AuthProvider> ModelsClient<T, A> {
        &self,
        client_version: &str,
        extra_headers: HeaderMap,
-    ) -> Result<ModelsResponse, ApiError> {
+    ) -> Result<(Vec<ModelInfo>, Option<String>), ApiError> {
        let builder = || {
            let mut req = self.provider.build_request(Method::GET, self.path());
            req.headers.extend(extra_headers.clone());
@@ -66,7 +67,7 @@ impl<T: HttpTransport, A: AuthProvider> ModelsClient<T, A> {
            .and_then(|value| value.to_str().ok())
            .map(ToString::to_string);

-        let ModelsResponse { models, etag } = serde_json::from_slice::<ModelsResponse>(&resp.body)
+        let ModelsResponse { models } = serde_json::from_slice::<ModelsResponse>(&resp.body)
            .map_err(|e| {
                ApiError::Stream(format!(
                    "failed to decode models response: {e}; body: {}",
@@ -74,9 +75,7 @@ impl<T: HttpTransport, A: AuthProvider> ModelsClient<T, A> {
                ))
            })?;

-        let etag = header_etag.unwrap_or(etag);
-
-        Ok(ModelsResponse { models, etag })
+        Ok((models, header_etag))
    }
 }

@@ -102,16 +101,15 @@ mod tests {
    struct CapturingTransport {
        last_request: Arc<Mutex<Option<Request>>>,
        body: Arc<ModelsResponse>,
+        etag: Option<String>,
    }

    impl Default for CapturingTransport {
        fn default() -> Self {
            Self {
                last_request: Arc::new(Mutex::new(None)),
-                body: Arc::new(ModelsResponse {
-                    models: Vec::new(),
-                    etag: String::new(),
-                }),
+                body: Arc::new(ModelsResponse { models: Vec::new() }),
+                etag: None,
            }
        }
    }
@@ -122,8 +120,8 @@ mod tests {
            *self.last_request.lock().unwrap() = Some(req);
            let body = serde_json::to_vec(&*self.body).unwrap();
            let mut headers = HeaderMap::new();
-            if !self.body.etag.is_empty() {
-                headers.insert(ETAG, self.body.etag.parse().unwrap());
+            if let Some(etag) = &self.etag {
+                headers.insert(ETAG, etag.parse().unwrap());
            }
            Ok(Response {
                status: StatusCode::OK,
@@ -166,14 +164,12 @@ mod tests {

    #[tokio::test]
    async fn appends_client_version_query() {
-        let response = ModelsResponse {
-            models: Vec::new(),
-            etag: String::new(),
-        };
+        let response = ModelsResponse { models: Vec::new() };

        let transport = CapturingTransport {
            last_request: Arc::new(Mutex::new(None)),
            body: Arc::new(response),
+            etag: None,
        };

        let client = ModelsClient::new(
@@ -182,12 +178,12 @@ mod tests {
            DummyAuth,
        );

-        let result = client
+        let (models, _) = client
            .list_models("0.99.0", HeaderMap::new())
            .await
            .expect("request should succeed");

-        assert_eq!(result.models.len(), 0);
+        assert_eq!(models.len(), 0);

        let url = transport
            .last_request
@@ -227,17 +223,16 @@ mod tests {
                    "truncation_policy": {"mode": "bytes", "limit": 10_000},
                    "supports_parallel_tool_calls": false,
                    "context_window": null,
-                    "reasoning_summary_format": "none",
                    "experimental_supported_tools": [],
                }))
                .unwrap(),
            ],
-            etag: String::new(),
        };

        let transport = CapturingTransport {
            last_request: Arc::new(Mutex::new(None)),
            body: Arc::new(response),
+            etag: None,
        };

        let client = ModelsClient::new(
@@ -246,27 +241,25 @@ mod tests {
            DummyAuth,
        );

-        let result = client
+        let (models, _) = client
            .list_models("0.99.0", HeaderMap::new())
            .await
            .expect("request should succeed");

-        assert_eq!(result.models.len(), 1);
-        assert_eq!(result.models[0].slug, "gpt-test");
-        assert_eq!(result.models[0].supported_in_api, true);
-        assert_eq!(result.models[0].priority, 1);
+        assert_eq!(models.len(), 1);
+        assert_eq!(models[0].slug, "gpt-test");
+        assert_eq!(models[0].supported_in_api, true);
+        assert_eq!(models[0].priority, 1);
    }

    #[tokio::test]
    async fn list_models_includes_etag() {
-        let response = ModelsResponse {
-            models: Vec::new(),
-            etag: "\"abc\"".to_string(),
-        };
+        let response = ModelsResponse { models: Vec::new() };

        let transport = CapturingTransport {
            last_request: Arc::new(Mutex::new(None)),
            body: Arc::new(response),
+            etag: Some("\"abc\"".to_string()),
        };

        let client = ModelsClient::new(
@@ -275,12 +268,12 @@ mod tests {
            DummyAuth,
        );

-        let result = client
+        let (models, etag) = client
            .list_models("0.1.0", HeaderMap::new())
            .await
            .expect("request should succeed");

-        assert_eq!(result.models.len(), 0);
-        assert_eq!(result.etag, "\"abc\"");
+        assert_eq!(models.len(), 0);
+        assert_eq!(etag, Some("\"abc\"".to_string()));
    }
 }
--- a/codex-rs/codex-api/src/requests/chat.rs
+++ b/codex-rs/codex-api/src/requests/chat.rs
@@ -204,24 +204,16 @@ impl<'a> ChatRequestBuilder<'a> {
                    call_id,
                    ..
                } => {
-                    let mut msg = json!({
-                        "role": "assistant",
-                        "content": null,
-                        "tool_calls": [{
-                            "id": call_id,
-                            "type": "function",
-                            "function": {
-                                "name": name,
-                                "arguments": arguments,
-                            }
-                        }]
+                    let reasoning = reasoning_by_anchor_index.get(&idx).map(String::as_str);
+                    let tool_call = json!({
+                        "id": call_id,
+                        "type": "function",
+                        "function": {
+                            "name": name,
+                            "arguments": arguments,
+                        }
                    });
-                    if let Some(reasoning) = reasoning_by_anchor_index.get(&idx)
-                        && let Some(obj) = msg.as_object_mut()
-                    {
-                        obj.insert("reasoning".to_string(), json!(reasoning));
-                    }
-                    messages.push(msg);
+                    push_tool_call_message(&mut messages, tool_call, reasoning);
                }
                ResponseItem::LocalShellCall {
                    id,
@@ -229,22 +221,14 @@ impl<'a> ChatRequestBuilder<'a> {
                    status,
                    action,
                } => {
-                    let mut msg = json!({
-                        "role": "assistant",
-                        "content": null,
-                        "tool_calls": [{
-                            "id": id.clone().unwrap_or_default(),
-                            "type": "local_shell_call",
-                            "status": status,
-                            "action": action,
-                        }]
+                    let reasoning = reasoning_by_anchor_index.get(&idx).map(String::as_str);
+                    let tool_call = json!({
+                        "id": id.clone().unwrap_or_default(),
+                        "type": "local_shell_call",
+                        "status": status,
+                        "action": action,
                    });
-                    if let Some(reasoning) = reasoning_by_anchor_index.get(&idx)
-                        && let Some(obj) = msg.as_object_mut()
-                    {
-                        obj.insert("reasoning".to_string(), json!(reasoning));
-                    }
-                    messages.push(msg);
+                    push_tool_call_message(&mut messages, tool_call, reasoning);
                }
                ResponseItem::FunctionCallOutput { call_id, output } => {
                    let content_value = if let Some(items) = &output.content_items {
@@ -277,18 +261,16 @@ impl<'a> ChatRequestBuilder<'a> {
                    input,
                    status: _,
                } => {
-                    messages.push(json!({
-                        "role": "assistant",
-                        "content": null,
-                        "tool_calls": [{
-                            "id": id,
-                            "type": "custom",
-                            "custom": {
-                                "name": name,
-                                "input": input,
-                            }
-                        }]
-                    }));
+                    let tool_call = json!({
+                        "id": id,
+                        "type": "custom",
+                        "custom": {
+                            "name": name,
+                            "input": input,
+                        }
+                    });
+                    let reasoning = reasoning_by_anchor_index.get(&idx).map(String::as_str);
+                    push_tool_call_message(&mut messages, tool_call, reasoning);
                }
                ResponseItem::CustomToolCallOutput { call_id, output } => {
                    messages.push(json!({
@@ -328,11 +310,50 @@ impl<'a> ChatRequestBuilder<'a> {
    }
 }

+fn push_tool_call_message(messages: &mut Vec<Value>, tool_call: Value, reasoning: Option<&str>) {
+    // Chat Completions requires that tool calls are grouped into a single assistant message
+    // (with `tool_calls: [...]`) followed by tool role responses.
+    if let Some(Value::Object(obj)) = messages.last_mut()
+        && obj.get("role").and_then(Value::as_str) == Some("assistant")
+        && obj.get("content").is_some_and(Value::is_null)
+        && let Some(tool_calls) = obj.get_mut("tool_calls").and_then(Value::as_array_mut)
+    {
+        tool_calls.push(tool_call);
+        if let Some(reasoning) = reasoning {
+            if let Some(Value::String(existing)) = obj.get_mut("reasoning") {
+                if !existing.is_empty() {
+                    existing.push('\n');
+                }
+                existing.push_str(reasoning);
+            } else {
+                obj.insert(
+                    "reasoning".to_string(),
+                    Value::String(reasoning.to_string()),
+                );
+            }
+        }
+        return;
+    }
+
+    let mut msg = json!({
+        "role": "assistant",
+        "content": null,
+        "tool_calls": [tool_call],
+    });
+    if let Some(reasoning) = reasoning
+        && let Some(obj) = msg.as_object_mut()
+    {
+        obj.insert("reasoning".to_string(), json!(reasoning));
+    }
+    messages.push(msg);
+}
+
 #[cfg(test)]
 mod tests {
    use super::*;
    use crate::provider::RetryConfig;
    use crate::provider::WireApi;
+    use codex_protocol::models::FunctionCallOutputPayload;
    use codex_protocol::protocol::SessionSource;
    use codex_protocol::protocol::SubAgentSource;
    use http::HeaderValue;
@@ -385,4 +406,89 @@ mod tests {
            Some(&HeaderValue::from_static("review"))
        );
    }
+
+    #[test]
+    fn groups_consecutive_tool_calls_into_a_single_assistant_message() {
+        let prompt_input = vec![
+            ResponseItem::Message {
+                id: None,
+                role: "user".to_string(),
+                content: vec![ContentItem::InputText {
+                    text: "read these".to_string(),
+                }],
+            },
+            ResponseItem::FunctionCall {
+                id: None,
+                name: "read_file".to_string(),
+                arguments: r#"{"path":"a.txt"}"#.to_string(),
+                call_id: "call-a".to_string(),
+            },
+            ResponseItem::FunctionCall {
+                id: None,
+                name: "read_file".to_string(),
+                arguments: r#"{"path":"b.txt"}"#.to_string(),
+                call_id: "call-b".to_string(),
+            },
+            ResponseItem::FunctionCall {
+                id: None,
+                name: "read_file".to_string(),
+                arguments: r#"{"path":"c.txt"}"#.to_string(),
+                call_id: "call-c".to_string(),
+            },
+            ResponseItem::FunctionCallOutput {
+                call_id: "call-a".to_string(),
+                output: FunctionCallOutputPayload {
+                    content: "A".to_string(),
+                    ..Default::default()
+                },
+            },
+            ResponseItem::FunctionCallOutput {
+                call_id: "call-b".to_string(),
+                output: FunctionCallOutputPayload {
+                    content: "B".to_string(),
+                    ..Default::default()
+                },
+            },
+            ResponseItem::FunctionCallOutput {
+                call_id: "call-c".to_string(),
+                output: FunctionCallOutputPayload {
+                    content: "C".to_string(),
+                    ..Default::default()
+                },
+            },
+        ];
+
+        let req = ChatRequestBuilder::new("gpt-test", "inst", &prompt_input, &[])
+            .build(&provider())
+            .expect("request");
+
+        let messages = req
+            .body
+            .get("messages")
+            .and_then(|v| v.as_array())
+            .expect("messages array");
+        // system + user + assistant(tool_calls=[...]) + 3 tool outputs
+        assert_eq!(messages.len(), 6);
+
+        assert_eq!(messages[0]["role"], "system");
+        assert_eq!(messages[1]["role"], "user");
+
+        let tool_calls_msg = &messages[2];
+        assert_eq!(tool_calls_msg["role"], "assistant");
+        assert_eq!(tool_calls_msg["content"], serde_json::Value::Null);
+        let tool_calls = tool_calls_msg["tool_calls"]
+            .as_array()
+            .expect("tool_calls array");
+        assert_eq!(tool_calls.len(), 3);
+        assert_eq!(tool_calls[0]["id"], "call-a");
+        assert_eq!(tool_calls[1]["id"], "call-b");
+        assert_eq!(tool_calls[2]["id"], "call-c");
+
+        assert_eq!(messages[3]["role"], "tool");
+        assert_eq!(messages[3]["tool_call_id"], "call-a");
+        assert_eq!(messages[4]["role"], "tool");
+        assert_eq!(messages[4]["tool_call_id"], "call-b");
+        assert_eq!(messages[5]["role"], "tool");
+        assert_eq!(messages[5]["tool_call_id"], "call-c");
+    }
 }
--- a/codex-rs/codex-api/src/sse/chat.rs
+++ b/codex-rs/codex-api/src/sse/chat.rs
@@ -30,6 +30,21 @@ pub(crate) fn spawn_chat_stream(
    ResponseStream { rx_event }
 }

+/// Processes Server-Sent Events from the legacy Chat Completions streaming API.
+///
+/// The upstream protocol terminates a streaming response with a final sentinel event
+/// (`data: [DONE]`). Historically, some of our test stubs have emitted `data: DONE`
+/// (without brackets) instead.
+///
+/// `eventsource_stream` delivers these sentinels as regular events rather than signaling
+/// end-of-stream. If we try to parse them as JSON, we log and skip them, then keep
+/// polling for more events.
+///
+/// On servers that keep the HTTP connection open after emitting the sentinel (notably
+/// wiremock on Windows), skipping the sentinel means we never emit `ResponseEvent::Completed`.
+/// Higher-level workflows/tests that wait for completion before issuing subsequent model
+/// calls will then stall, which shows up as "expected N requests, got 1" verification
+/// failures in the mock server.
 pub async fn process_chat_sse<S>(
    stream: S,
    tx_event: mpsc::Sender<Result<ResponseEvent, ApiError>>,
@@ -57,6 +72,31 @@ pub async fn process_chat_sse<S>(
    let mut reasoning_item: Option<ResponseItem> = None;
    let mut completed_sent = false;

+    async fn flush_and_complete(
+        tx_event: &mpsc::Sender<Result<ResponseEvent, ApiError>>,
+        reasoning_item: &mut Option<ResponseItem>,
+        assistant_item: &mut Option<ResponseItem>,
+    ) {
+        if let Some(reasoning) = reasoning_item.take() {
+            let _ = tx_event
+                .send(Ok(ResponseEvent::OutputItemDone(reasoning)))
+                .await;
+        }
+
+        if let Some(assistant) = assistant_item.take() {
+            let _ = tx_event
+                .send(Ok(ResponseEvent::OutputItemDone(assistant)))
+                .await;
+        }
+
+        let _ = tx_event
+            .send(Ok(ResponseEvent::Completed {
+                response_id: String::new(),
+                token_usage: None,
+            }))
+            .await;
+    }
+
    loop {
        let start = Instant::now();
        let response = timeout(idle_timeout, stream.next()).await;
@@ -70,24 +110,8 @@ pub async fn process_chat_sse<S>(
                return;
            }
            Ok(None) => {
-                if let Some(reasoning) = reasoning_item {
-                    let _ = tx_event
-                        .send(Ok(ResponseEvent::OutputItemDone(reasoning)))
-                        .await;
-                }
-
-                if let Some(assistant) = assistant_item {
-                    let _ = tx_event
-                        .send(Ok(ResponseEvent::OutputItemDone(assistant)))
-                        .await;
-                }
                if !completed_sent {
-                    let _ = tx_event
-                        .send(Ok(ResponseEvent::Completed {
-                            response_id: String::new(),
-                            token_usage: None,
-                        }))
-                        .await;
+                    flush_and_complete(&tx_event, &mut reasoning_item, &mut assistant_item).await;
                }
                return;
            }
@@ -101,16 +125,25 @@ pub async fn process_chat_sse<S>(

        trace!("SSE event: {}", sse.data);

-        if sse.data.trim().is_empty() {
+        let data = sse.data.trim();
+
+        if data.is_empty() {
            continue;
        }

-        let value: serde_json::Value = match serde_json::from_str(&sse.data) {
+        if data == "[DONE]" || data == "DONE" {
+            if !completed_sent {
+                flush_and_complete(&tx_event, &mut reasoning_item, &mut assistant_item).await;
+            }
+            return;
+        }
+
+        let value: serde_json::Value = match serde_json::from_str(data) {
            Ok(val) => val,
            Err(err) => {
                debug!(
                    "Failed to parse ChatCompletions SSE event: {err}, data: {}",
-                    &sse.data
+                    data
                );
                continue;
            }
@@ -362,6 +395,16 @@ mod tests {
        body
    }

+    /// Regression test: the stream should complete when we see a `[DONE]` sentinel.
+    ///
+    /// This is important for tests/mocks that don't immediately close the underlying
+    /// connection after emitting the sentinel.
+    #[tokio::test]
+    async fn completes_on_done_sentinel_without_json() {
+        let events = collect_events("event: message\ndata: [DONE]\n\n").await;
+        assert_matches!(&events[..], [ResponseEvent::Completed { .. }]);
+    }
+
    async fn collect_events(body: &str) -> Vec<ResponseEvent> {
        let reader = ReaderStream::new(std::io::Cursor::new(body.to_string()))
            .map_err(|err| codex_client::TransportError::Network(err.to_string()));
--- a/codex-rs/codex-api/src/sse/responses.rs
+++ b/codex-rs/codex-api/src/sse/responses.rs
@@ -51,11 +51,19 @@ pub fn spawn_response_stream(
    telemetry: Option<Arc<dyn SseTelemetry>>,
 ) -> ResponseStream {
    let rate_limits = parse_rate_limit(&stream_response.headers);
+    let models_etag = stream_response
+        .headers
+        .get("X-Models-Etag")
+        .and_then(|v| v.to_str().ok())
+        .map(ToString::to_string);
    let (tx_event, rx_event) = mpsc::channel::<Result<ResponseEvent, ApiError>>(1600);
    tokio::spawn(async move {
        if let Some(snapshot) = rate_limits {
            let _ = tx_event.send(Ok(ResponseEvent::RateLimits(snapshot))).await;
        }
+        if let Some(etag) = models_etag {
+            let _ = tx_event.send(Ok(ResponseEvent::ModelsEtag(etag))).await;
+        }
        process_sse(stream_response.bytes, tx_event, idle_timeout, telemetry).await;
    });

--- a/codex-rs/codex-api/tests/models_integration.rs
+++ b/codex-rs/codex-api/tests/models_integration.rs
@@ -10,7 +10,6 @@ use codex_protocol::openai_models::ModelVisibility;
 use codex_protocol::openai_models::ModelsResponse;
 use codex_protocol::openai_models::ReasoningEffort;
 use codex_protocol::openai_models::ReasoningEffortPreset;
-use codex_protocol::openai_models::ReasoningSummaryFormat;
 use codex_protocol::openai_models::TruncationPolicyConfig;
 use http::HeaderMap;
 use http::Method;
@@ -85,10 +84,8 @@ async fn models_client_hits_models_endpoint() {
            truncation_policy: TruncationPolicyConfig::bytes(10_000),
            supports_parallel_tool_calls: false,
            context_window: None,
-            reasoning_summary_format: ReasoningSummaryFormat::None,
            experimental_supported_tools: Vec::new(),
        }],
-        etag: String::new(),
    };

    Mock::given(method("GET"))
@@ -104,13 +101,13 @@ async fn models_client_hits_models_endpoint() {
    let transport = ReqwestTransport::new(reqwest::Client::new());
    let client = ModelsClient::new(transport, provider(&base_url), DummyAuth);

-    let result = client
+    let (models, _) = client
        .list_models("0.1.0", HeaderMap::new())
        .await
        .expect("models request should succeed");

-    assert_eq!(result.models.len(), 1);
-    assert_eq!(result.models[0].slug, "gpt-test");
+    assert_eq!(models.len(), 1);
+    assert_eq!(models[0].slug, "gpt-test");

    let received = server
        .received_requests()
--- a/codex-rs/codex-client/src/transport.rs
+++ b/codex-rs/codex-client/src/transport.rs
@@ -69,6 +69,15 @@ impl ReqwestTransport {
 #[async_trait]
 impl HttpTransport for ReqwestTransport {
    async fn execute(&self, req: Request) -> Result<Response, TransportError> {
+        if enabled!(Level::TRACE) {
+            trace!(
+                "{} to {}: {}",
+                req.method,
+                req.url,
+                req.body.as_ref().unwrap_or_default()
+            );
+        }
+
        let builder = self.build(req)?;
        let resp = builder.send().await.map_err(Self::map_error)?;
        let status = resp.status();
--- a/codex-rs/core/Cargo.toml
+++ b/codex-rs/core/Cargo.toml
@@ -123,6 +123,7 @@ assert_cmd = { workspace = true }
 assert_matches = { workspace = true }
 codex-arg0 = { workspace = true }
 codex-core = { path = ".", features = ["deterministic_process_ids"] }
+codex-utils-cargo-bin = { workspace = true }
 core_test_support = { workspace = true }
 ctor = { workspace = true }
 escargot = { workspace = true }
--- a/codex-rs/core/src/client.rs
+++ b/codex-rs/core/src/client.rs
@@ -49,9 +49,9 @@ use crate::features::FEATURES;
 use crate::flags::CODEX_RS_SSE_FIXTURE;
 use crate::model_provider_info::ModelProviderInfo;
 use crate::model_provider_info::WireApi;
+use crate::models_manager::model_family::ModelFamily;
 use crate::tools::spec::create_tools_json_for_chat_completions_api;
 use crate::tools::spec::create_tools_json_for_responses_api;
-use codex_protocol::openai_models::ModelFamily;

 #[derive(Debug, Clone)]
 pub struct ModelClient {
--- a/codex-rs/core/src/client_common.rs
+++ b/codex-rs/core/src/client_common.rs
@@ -1,9 +1,9 @@
 use crate::client_common::tools::ToolSpec;
 use crate::error::Result;
+use crate::models_manager::model_family::ModelFamily;
 pub use codex_api::common::ResponseEvent;
 use codex_apply_patch::APPLY_PATCH_TOOL_INSTRUCTIONS;
 use codex_protocol::models::ResponseItem;
-use codex_protocol::openai_models::ModelFamily;
 use futures::Stream;
 use serde::Deserialize;
 use serde_json::Value;
--- a/codex-rs/core/src/codex.rs
+++ b/codex-rs/core/src/codex.rs
@@ -17,12 +17,14 @@ use crate::exec_policy::ExecPolicyManager;
 use crate::features::Feature;
 use crate::features::Features;
 use crate::models_manager::manager::ModelsManager;
+use crate::models_manager::model_family::ModelFamily;
 use crate::parse_command::parse_command;
 use crate::parse_turn_item;
 use crate::stream_events_utils::HandleOutputCtx;
 use crate::stream_events_utils::handle_non_tool_response_item;
 use crate::stream_events_utils::handle_output_item_done;
 use crate::terminal;
+use crate::truncate::TruncationPolicy;
 use crate::user_notification::UserNotifier;
 use crate::util::error_or_panic;
 use async_channel::Receiver;
@@ -30,8 +32,6 @@ use async_channel::Sender;
 use codex_protocol::ConversationId;
 use codex_protocol::approvals::ExecPolicyAmendment;
 use codex_protocol::items::TurnItem;
-use codex_protocol::openai_models::ModelFamily;
-use codex_protocol::openai_models::TruncationPolicy;
 use codex_protocol::protocol::FileChange;
 use codex_protocol::protocol::HasLegacyEvent;
 use codex_protocol::protocol::ItemCompletedEvent;
@@ -88,6 +88,7 @@ use crate::error::Result as CodexResult;
 #[cfg(test)]
 use crate::exec::StreamOutput;
 use crate::exec_policy::ExecPolicyUpdateError;
+use crate::feedback_tags;
 use crate::mcp::auth::compute_auth_statuses;
 use crate::mcp_connection_manager::McpConnectionManager;
 use crate::model_provider_info::CHAT_WIRE_API_DEPRECATION_SUMMARY;
@@ -246,7 +247,9 @@ impl Codex {

        let config = Arc::new(config);
        if config.features.enabled(Feature::RemoteModels)
-            && let Err(err) = models_manager.refresh_available_models(&config).await
+            && let Err(err) = models_manager
+                .refresh_available_models_with_cache(&config)
+                .await
        {
            error!("failed to refresh available models: {err:?}");
        }
@@ -527,7 +530,7 @@ impl Session {
            final_output_json_schema: None,
            codex_linux_sandbox_exe: per_turn_config.codex_linux_sandbox_exe.clone(),
            tool_call_gate: Arc::new(ReadinessFlag::new()),
-            truncation_policy: crate::truncate::new_truncation_policy(
+            truncation_policy: TruncationPolicy::new(
                per_turn_config.as_ref(),
                model_family.truncation_policy,
            ),
@@ -681,14 +684,11 @@ impl Session {
        // Dispatch the SessionConfiguredEvent first and then report any errors.
        // If resuming, include converted initial messages in the payload so UIs can render them immediately.
        let initial_messages = initial_history.get_event_msgs();
-        let session_model_family = models_manager
-            .construct_model_family(session_configuration.model.as_str(), config.as_ref())
-            .await;
        let events = std::iter::once(Event {
            id: INITIAL_SUBMIT_ID.to_owned(),
            msg: EventMsg::SessionConfigured(SessionConfiguredEvent {
                session_id: conversation_id,
-                model_family: session_model_family,
+                model: session_configuration.model.clone(),
                model_provider_id: config.model_provider_id.clone(),
                approval_policy: session_configuration.approval_policy.value(),
                sandbox_policy: session_configuration.sandbox_policy.get().clone(),
@@ -811,6 +811,13 @@ impl Session {
                        .await;
                }

+                // Seed usage info from the recorded rollout so UIs can show token counts
+                // immediately on resume/fork.
+                if let Some(info) = Self::last_token_info_from_rollout(&rollout_items) {
+                    let mut state = self.state.lock().await;
+                    state.set_token_info(Some(info));
+                }
+
                // If persisting, persist all rollout items as-is (recorder filters)
                if persist && !rollout_items.is_empty() {
                    self.persist_rollout_items(&rollout_items).await;
@@ -821,6 +828,13 @@ impl Session {
        }
    }

+    fn last_token_info_from_rollout(rollout_items: &[RolloutItem]) -> Option<TokenUsageInfo> {
+        rollout_items.iter().rev().find_map(|item| match item {
+            RolloutItem::EventMsg(EventMsg::TokenCount(ev)) => ev.info.clone(),
+            _ => None,
+        })
+    }
+
    pub(crate) async fn update_settings(
        &self,
        updates: SessionSettingsUpdate,
@@ -1425,12 +1439,14 @@ impl Session {
        message: impl Into<String>,
        codex_error: CodexErr,
    ) {
+        let additional_details = codex_error.to_string();
        let codex_error_info = CodexErrorInfo::ResponseStreamDisconnected {
            http_status_code: codex_error.http_status_code_value(),
        };
        let event = EventMsg::StreamError(StreamErrorEvent {
            message: message.into(),
            codex_error_info: Some(codex_error_info),
+            additional_details: Some(additional_details),
        });
        self.send_event(turn_context, event).await;
    }
@@ -1760,7 +1776,16 @@ mod handlers {
                    final_output_json_schema: Some(final_output_json_schema),
                },
            ),
-            Op::UserInput { items } => (items, SessionSettingsUpdate::default()),
+            Op::UserInput {
+                items,
+                final_output_json_schema,
+            } => (
+                items,
+                SessionSettingsUpdate {
+                    final_output_json_schema: Some(final_output_json_schema),
+                    ..Default::default()
+                },
+            ),
            _ => unreachable!(),
        };

@@ -2052,7 +2077,7 @@ mod handlers {
        review_request: ReviewRequest,
    ) {
        let turn_context = sess.new_default_turn_with_sub_id(sub_id.clone()).await;
-        match resolve_review_request(review_request, config.cwd.as_path()) {
+        match resolve_review_request(review_request, turn_context.cwd.as_path()) {
            Ok(resolved) => {
                spawn_review_thread(
                    Arc::clone(sess),
@@ -2147,10 +2172,7 @@ async fn spawn_review_thread(
        final_output_json_schema: None,
        codex_linux_sandbox_exe: parent_turn_context.codex_linux_sandbox_exe.clone(),
        tool_call_gate: Arc::new(ReadinessFlag::new()),
-        truncation_policy: crate::truncate::new_truncation_policy(
-            &per_turn_config,
-            model_family.truncation_policy,
-        ),
+        truncation_policy: TruncationPolicy::new(&per_turn_config, model_family.truncation_policy),
    };

    // Seed the child task with the review prompt as the initial user message.
@@ -2527,6 +2549,15 @@ async fn try_run_turn(
        truncation_policy: Some(turn_context.truncation_policy.into()),
    });

+    feedback_tags!(
+        model = turn_context.client.get_model(),
+        approval_policy = turn_context.approval_policy,
+        sandbox_policy = turn_context.sandbox_policy,
+        effort = turn_context.client.get_reasoning_effort(),
+        auth_mode = sess.services.auth_manager.get_auth_mode(),
+        features = sess.features.enabled_features(),
+    );
+
    sess.persist_rollout_items(&[rollout_item]).await;
    let mut stream = turn_context
        .client
@@ -2617,6 +2648,13 @@ async fn try_run_turn(
                // token usage is available to avoid duplicate TokenCount events.
                sess.update_rate_limits(&turn_context, snapshot).await;
            }
+            ResponseEvent::ModelsEtag(etag) => {
+                // Update internal state with latest models etag
+                sess.services
+                    .models_manager
+                    .refresh_if_new_etag(etag, sess.features.enabled(Feature::RemoteModels))
+                    .await;
+            }
            ResponseEvent::Completed {
                response_id: _,
                token_usage,
@@ -2757,6 +2795,9 @@ mod tests {
    use crate::protocol::RateLimitSnapshot;
    use crate::protocol::RateLimitWindow;
    use crate::protocol::ResumedHistory;
+    use crate::protocol::TokenCountEvent;
+    use crate::protocol::TokenUsage;
+    use crate::protocol::TokenUsageInfo;
    use crate::state::TaskKind;
    use crate::tasks::SessionTask;
    use crate::tasks::SessionTaskContext;
@@ -2811,6 +2852,83 @@ mod tests {
        assert_eq!(expected, actual);
    }

+    #[tokio::test]
+    async fn record_initial_history_seeds_token_info_from_rollout() {
+        let (session, turn_context) = make_session_and_context().await;
+        let (mut rollout_items, _expected) = sample_rollout(&session, &turn_context);
+
+        let info1 = TokenUsageInfo {
+            total_token_usage: TokenUsage {
+                input_tokens: 10,
+                cached_input_tokens: 0,
+                output_tokens: 20,
+                reasoning_output_tokens: 0,
+                total_tokens: 30,
+            },
+            last_token_usage: TokenUsage {
+                input_tokens: 3,
+                cached_input_tokens: 0,
+                output_tokens: 4,
+                reasoning_output_tokens: 0,
+                total_tokens: 7,
+            },
+            model_context_window: Some(1_000),
+        };
+        let info2 = TokenUsageInfo {
+            total_token_usage: TokenUsage {
+                input_tokens: 100,
+                cached_input_tokens: 50,
+                output_tokens: 200,
+                reasoning_output_tokens: 25,
+                total_tokens: 375,
+            },
+            last_token_usage: TokenUsage {
+                input_tokens: 10,
+                cached_input_tokens: 0,
+                output_tokens: 20,
+                reasoning_output_tokens: 5,
+                total_tokens: 35,
+            },
+            model_context_window: Some(2_000),
+        };
+
+        rollout_items.push(RolloutItem::EventMsg(EventMsg::TokenCount(
+            TokenCountEvent {
+                info: Some(info1),
+                rate_limits: None,
+            },
+        )));
+        rollout_items.push(RolloutItem::EventMsg(EventMsg::TokenCount(
+            TokenCountEvent {
+                info: None,
+                rate_limits: None,
+            },
+        )));
+        rollout_items.push(RolloutItem::EventMsg(EventMsg::TokenCount(
+            TokenCountEvent {
+                info: Some(info2.clone()),
+                rate_limits: None,
+            },
+        )));
+        rollout_items.push(RolloutItem::EventMsg(EventMsg::TokenCount(
+            TokenCountEvent {
+                info: None,
+                rate_limits: None,
+            },
+        )));
+
+        session
+            .record_initial_history(InitialHistory::Resumed(ResumedHistory {
+                conversation_id: ConversationId::default(),
+                history: rollout_items,
+                rollout_path: PathBuf::from("/tmp/resume.jsonl"),
+            }))
+            .await;
+
+        let actual = session.state.lock().await.token_info();
+        assert_eq!(actual, Some(info2));
+    }
+
    #[tokio::test]
    async fn record_initial_history_reconstructs_forked_transcript() {
        let (session, turn_context) = make_session_and_context().await;
@@ -3144,7 +3262,7 @@ mod tests {
            exec_policy,
            auth_manager: auth_manager.clone(),
            otel_manager: otel_manager.clone(),
-            models_manager,
+            models_manager: Arc::clone(&models_manager),
            tool_approvals: Mutex::new(ApprovalStore::default()),
            skills_manager,
        };
@@ -3231,7 +3349,7 @@ mod tests {
            exec_policy,
            auth_manager: Arc::clone(&auth_manager),
            otel_manager: otel_manager.clone(),
-            models_manager,
+            models_manager: Arc::clone(&models_manager),
            tool_approvals: Mutex::new(ApprovalStore::default()),
            skills_manager,
        };
--- a/codex-rs/core/src/codex_delegate.rs
+++ b/codex-rs/core/src/codex_delegate.rs
@@ -118,7 +118,11 @@ pub(crate) async fn run_codex_conversation_one_shot(
    .await?;

    // Send the initial input to kick off the one-shot turn.
-    io.submit(Op::UserInput { items: input }).await?;
+    io.submit(Op::UserInput {
+        items: input,
+        final_output_json_schema: None,
+    })
+    .await?;

    // Bridge events so we can observe completion and shut down automatically.
    let (tx_bridge, rx_bridge) = async_channel::bounded(SUBMISSION_CHANNEL_CAPACITY);
@@ -184,6 +188,10 @@ async fn forward_events(
                        id: _,
                        msg: EventMsg::AgentMessageDelta(_) | EventMsg::AgentReasoningDelta(_),
                    } => {}
+                    Event {
+                        id: _,
+                        msg: EventMsg::TokenCount(_),
+                    } => {}
                    Event {
                        id: _,
                        msg: EventMsg::SessionConfigured(_),
--- a/codex-rs/core/src/compact.rs
+++ b/codex-rs/core/src/compact.rs
@@ -15,6 +15,7 @@ use crate::protocol::EventMsg;
 use crate::protocol::TaskStartedEvent;
 use crate::protocol::TurnContextItem;
 use crate::protocol::WarningEvent;
+use crate::truncate::TruncationPolicy;
 use crate::truncate::approx_token_count;
 use crate::truncate::truncate_text;
 use crate::util::backoff;
@@ -22,7 +23,6 @@ use codex_protocol::items::TurnItem;
 use codex_protocol::models::ContentItem;
 use codex_protocol::models::ResponseInputItem;
 use codex_protocol::models::ResponseItem;
-use codex_protocol::openai_models::TruncationPolicy;
 use codex_protocol::protocol::RolloutItem;
 use codex_protocol::user_input::UserInput;
 use futures::prelude::*;
--- a/codex-rs/core/src/config/mod.rs
+++ b/codex-rs/core/src/config/mod.rs
@@ -38,7 +38,6 @@ use codex_protocol::config_types::SandboxMode;
 use codex_protocol::config_types::TrustLevel;
 use codex_protocol::config_types::Verbosity;
 use codex_protocol::openai_models::ReasoningEffort;
-use codex_protocol::openai_models::ReasoningSummaryFormat;
 use codex_rmcp_client::OAuthCredentialsStoreMode;
 use codex_utils_absolute_path::AbsolutePathBuf;
 use codex_utils_absolute_path::AbsolutePathBufGuard;
@@ -303,9 +302,6 @@ pub struct Config {
    /// Optional override to force-enable reasoning summaries for the configured model.
    pub model_supports_reasoning_summaries: Option<bool>,

-    /// Optional override to force reasoning summary format for the configured model.
-    pub model_reasoning_summary_format: Option<ReasoningSummaryFormat>,
-
    /// Optional verbosity control for GPT-5 models (Responses API `text.verbosity`).
    pub model_verbosity: Option<Verbosity>,

@@ -367,6 +363,7 @@ pub struct ConfigBuilder {
    cli_overrides: Option<Vec<(String, TomlValue)>>,
    harness_overrides: Option<ConfigOverrides>,
    loader_overrides: Option<LoaderOverrides>,
+    thread_agnostic: bool,
 }

 impl ConfigBuilder {
@@ -375,6 +372,13 @@ impl ConfigBuilder {
        self
    }

+    /// Load a "thread-agnostic" config stack, which intentionally ignores any
+    /// in-repo `.codex/` config layers (because there is no cwd/project context).
+    pub fn thread_agnostic(mut self) -> Self {
+        self.thread_agnostic = true;
+        self
+    }
+
    pub fn cli_overrides(mut self, cli_overrides: Vec<(String, TomlValue)>) -> Self {
        self.cli_overrides = Some(cli_overrides);
        self
@@ -396,18 +400,22 @@ impl ConfigBuilder {
            cli_overrides,
            harness_overrides,
            loader_overrides,
+            thread_agnostic,
        } = self;
        let codex_home = codex_home.map_or_else(find_codex_home, std::io::Result::Ok)?;
        let cli_overrides = cli_overrides.unwrap_or_default();
        let harness_overrides = harness_overrides.unwrap_or_default();
        let loader_overrides = loader_overrides.unwrap_or_default();
-        let cwd = match harness_overrides.cwd.as_deref() {
-            Some(path) => AbsolutePathBuf::try_from(path)?,
-            None => AbsolutePathBuf::current_dir()?,
+        let cwd = if thread_agnostic {
+            None
+        } else {
+            Some(match harness_overrides.cwd.as_deref() {
+                Some(path) => AbsolutePathBuf::try_from(path)?,
+                None => AbsolutePathBuf::current_dir()?,
+            })
        };
        let config_layer_stack =
-            load_config_layers_state(&codex_home, Some(cwd), &cli_overrides, loader_overrides)
-                .await?;
+            load_config_layers_state(&codex_home, cwd, &cli_overrides, loader_overrides).await?;
        let merged_toml = config_layer_stack.effective_config();

        // Note that each layer in ConfigLayerStack should have resolved
@@ -786,9 +794,6 @@ pub struct ConfigToml {
    /// Override to force-enable reasoning summaries for the configured model.
    pub model_supports_reasoning_summaries: Option<bool>,

-    /// Override to force reasoning summary format for the configured model.
-    pub model_reasoning_summary_format: Option<ReasoningSummaryFormat>,
-
    /// Base URL for requests to ChatGPT (as opposed to the OpenAI API).
    pub chatgpt_base_url: Option<String>,

@@ -1379,7 +1384,6 @@ impl Config {
                .or(cfg.model_reasoning_summary)
                .unwrap_or_default(),
            model_supports_reasoning_summaries: cfg.model_supports_reasoning_summaries,
-            model_reasoning_summary_format: cfg.model_reasoning_summary_format.clone(),
            model_verbosity: config_profile.model_verbosity.or(cfg.model_verbosity),
            chatgpt_base_url: config_profile
                .chatgpt_base_url
@@ -2058,6 +2062,7 @@ trust_level = "trusted"
            managed_config_path: Some(managed_path.clone()),
            #[cfg(target_os = "macos")]
            managed_preferences_base64: None,
+            macos_managed_config_requirements_base64: None,
        };

        let cwd = AbsolutePathBuf::try_from(codex_home.path())?;
@@ -2089,6 +2094,43 @@ trust_level = "trusted"
        Ok(())
    }

+    #[tokio::test]
+    async fn config_builder_thread_agnostic_ignores_project_layers() -> anyhow::Result<()> {
+        let tmp = TempDir::new()?;
+        let codex_home = tmp.path().join("codex_home");
+        std::fs::create_dir_all(&codex_home)?;
+        std::fs::write(codex_home.join(CONFIG_TOML_FILE), "model = \"from-user\"\n")?;
+
+        let project = tmp.path().join("project");
+        std::fs::create_dir_all(project.join(".codex"))?;
+        std::fs::write(
+            project.join(".codex").join(CONFIG_TOML_FILE),
+            "model = \"from-project\"\n",
+        )?;
+
+        let harness_overrides = ConfigOverrides {
+            cwd: Some(project),
+            ..Default::default()
+        };
+
+        let with_project_layers = ConfigBuilder::default()
+            .codex_home(codex_home.clone())
+            .harness_overrides(harness_overrides.clone())
+            .build()
+            .await?;
+        assert_eq!(with_project_layers.model.as_deref(), Some("from-project"));
+
+        let thread_agnostic = ConfigBuilder::default()
+            .codex_home(codex_home)
+            .harness_overrides(harness_overrides)
+            .thread_agnostic()
+            .build()
+            .await?;
+        assert_eq!(thread_agnostic.model.as_deref(), Some("from-user"));
+
+        Ok(())
+    }
+
    #[tokio::test]
    async fn load_global_mcp_servers_returns_empty_if_missing() -> anyhow::Result<()> {
        let codex_home = TempDir::new()?;
@@ -2178,6 +2220,7 @@ trust_level = "trusted"
            managed_config_path: Some(managed_path),
            #[cfg(target_os = "macos")]
            managed_preferences_base64: None,
+            macos_managed_config_requirements_base64: None,
        };

        let cwd = AbsolutePathBuf::try_from(codex_home.path())?;
@@ -3189,7 +3232,6 @@ model_verbosity = "high"
                model_reasoning_effort: Some(ReasoningEffort::High),
                model_reasoning_summary: ReasoningSummary::Detailed,
                model_supports_reasoning_summaries: None,
-                model_reasoning_summary_format: None,
                model_verbosity: None,
                chatgpt_base_url: "https://chatgpt.com/backend-api/".to_string(),
                base_instructions: None,
@@ -3273,7 +3315,6 @@ model_verbosity = "high"
            model_reasoning_effort: None,
            model_reasoning_summary: ReasoningSummary::default(),
            model_supports_reasoning_summaries: None,
-            model_reasoning_summary_format: None,
            model_verbosity: None,
            chatgpt_base_url: "https://chatgpt.com/backend-api/".to_string(),
            base_instructions: None,
@@ -3372,7 +3413,6 @@ model_verbosity = "high"
            model_reasoning_effort: None,
            model_reasoning_summary: ReasoningSummary::default(),
            model_supports_reasoning_summaries: None,
-            model_reasoning_summary_format: None,
            model_verbosity: None,
            chatgpt_base_url: "https://chatgpt.com/backend-api/".to_string(),
            base_instructions: None,
@@ -3457,7 +3497,6 @@ model_verbosity = "high"
            model_reasoning_effort: Some(ReasoningEffort::High),
            model_reasoning_summary: ReasoningSummary::Detailed,
            model_supports_reasoning_summaries: None,
-            model_reasoning_summary_format: None,
            model_verbosity: Some(Verbosity::High),
            chatgpt_base_url: "https://chatgpt.com/backend-api/".to_string(),
            base_instructions: None,
--- a/codex-rs/core/src/config/service.rs
+++ b/codex-rs/core/src/config/service.rs
@@ -755,6 +755,7 @@ remote_compaction = true
                managed_config_path: Some(managed_path.clone()),
                #[cfg(target_os = "macos")]
                managed_preferences_base64: None,
+                macos_managed_config_requirements_base64: None,
            },
        );

@@ -835,6 +836,7 @@ remote_compaction = true
                managed_config_path: Some(managed_path.clone()),
                #[cfg(target_os = "macos")]
                managed_preferences_base64: None,
+                macos_managed_config_requirements_base64: None,
            },
        );

@@ -937,6 +939,7 @@ remote_compaction = true
                managed_config_path: Some(managed_path.clone()),
                #[cfg(target_os = "macos")]
                managed_preferences_base64: None,
+                macos_managed_config_requirements_base64: None,
            },
        );

@@ -984,6 +987,7 @@ remote_compaction = true
                managed_config_path: Some(managed_path.clone()),
                #[cfg(target_os = "macos")]
                managed_preferences_base64: None,
+                macos_managed_config_requirements_base64: None,
            },
        );

@@ -1029,6 +1033,7 @@ remote_compaction = true
                managed_config_path: Some(managed_path.clone()),
                #[cfg(target_os = "macos")]
                managed_preferences_base64: None,
+                macos_managed_config_requirements_base64: None,
            },
        );

--- a/codex-rs/core/src/config_loader/layer_io.rs
+++ b/codex-rs/core/src/config_loader/layer_io.rs
@@ -33,11 +33,13 @@ pub(super) async fn load_config_layers_internal(
    let LoaderOverrides {
        managed_config_path,
        managed_preferences_base64,
+        ..
    } = overrides;

    #[cfg(not(target_os = "macos"))]
    let LoaderOverrides {
        managed_config_path,
+        ..
    } = overrides;

    let managed_config_path = AbsolutePathBuf::from_absolute_path(
--- a/codex-rs/core/src/config_loader/macos.rs
+++ b/codex-rs/core/src/config_loader/macos.rs
@@ -1,3 +1,4 @@
+use super::config_requirements::ConfigRequirementsToml;
 use base64::Engine;
 use base64::prelude::BASE64_STANDARD;
 use core_foundation::base::TCFType;
@@ -10,6 +11,7 @@ use toml::Value as TomlValue;

 const MANAGED_PREFERENCES_APPLICATION_ID: &str = "com.openai.codex";
 const MANAGED_PREFERENCES_CONFIG_KEY: &str = "config_toml_base64";
+const MANAGED_PREFERENCES_REQUIREMENTS_KEY: &str = "requirements_toml_base64";

 pub(crate) async fn load_managed_admin_config_layer(
    override_base64: Option<&str>,
@@ -19,82 +21,126 @@ pub(crate) async fn load_managed_admin_config_layer(
        return if trimmed.is_empty() {
            Ok(None)
        } else {
-            parse_managed_preferences_base64(trimmed).map(Some)
+            parse_managed_config_base64(trimmed).map(Some)
        };
    }

-    const LOAD_ERROR: &str = "Failed to load managed preferences configuration";
-
    match task::spawn_blocking(load_managed_admin_config).await {
        Ok(result) => result,
        Err(join_err) => {
            if join_err.is_cancelled() {
-                tracing::error!("Managed preferences load task was cancelled");
+                tracing::error!("Managed config load task was cancelled");
            } else {
-                tracing::error!("Managed preferences load task failed: {join_err}");
+                tracing::error!("Managed config load task failed: {join_err}");
            }
-            Err(io::Error::other(LOAD_ERROR))
+            Err(io::Error::other("Failed to load managed config"))
        }
    }
 }

 fn load_managed_admin_config() -> io::Result<Option<TomlValue>> {
+    load_managed_preference(MANAGED_PREFERENCES_CONFIG_KEY)?
+        .as_deref()
+        .map(str::trim)
+        .map(parse_managed_config_base64)
+        .transpose()
+}
+
+pub(crate) async fn load_managed_admin_requirements_toml(
+    target: &mut ConfigRequirementsToml,
+    override_base64: Option<&str>,
+) -> io::Result<()> {
+    if let Some(encoded) = override_base64 {
+        let trimmed = encoded.trim();
+        if !trimmed.is_empty() {
+            target.merge_unset_fields(parse_managed_requirements_base64(trimmed)?);
+        }
+        return Ok(());
+    }
+
+    match task::spawn_blocking(load_managed_admin_requirements).await {
+        Ok(result) => {
+            if let Some(requirements) = result? {
+                target.merge_unset_fields(requirements);
+            }
+            Ok(())
+        }
+        Err(join_err) => {
+            if join_err.is_cancelled() {
+                tracing::error!("Managed requirements load task was cancelled");
+            } else {
+                tracing::error!("Managed requirements load task failed: {join_err}");
+            }
+            Err(io::Error::other("Failed to load managed requirements"))
+        }
+    }
+}
+
+fn load_managed_admin_requirements() -> io::Result<Option<ConfigRequirementsToml>> {
+    load_managed_preference(MANAGED_PREFERENCES_REQUIREMENTS_KEY)?
+        .as_deref()
+        .map(str::trim)
+        .map(parse_managed_requirements_base64)
+        .transpose()
+}
+
+fn load_managed_preference(key_name: &str) -> io::Result<Option<String>> {
    #[link(name = "CoreFoundation", kind = "framework")]
    unsafe extern "C" {
        fn CFPreferencesCopyAppValue(key: CFStringRef, application_id: CFStringRef) -> *mut c_void;
    }

-    let application_id = CFString::new(MANAGED_PREFERENCES_APPLICATION_ID);
-    let key = CFString::new(MANAGED_PREFERENCES_CONFIG_KEY);
-
    let value_ref = unsafe {
        CFPreferencesCopyAppValue(
-            key.as_concrete_TypeRef(),
-            application_id.as_concrete_TypeRef(),
+            CFString::new(key_name).as_concrete_TypeRef(),
+            CFString::new(MANAGED_PREFERENCES_APPLICATION_ID).as_concrete_TypeRef(),
        )
    };

    if value_ref.is_null() {
        tracing::debug!(
-            "Managed preferences for {} key {} not found",
-            MANAGED_PREFERENCES_APPLICATION_ID,
-            MANAGED_PREFERENCES_CONFIG_KEY
+            "Managed preferences for {MANAGED_PREFERENCES_APPLICATION_ID} key {key_name} not found",
        );
        return Ok(None);
    }

-    let value = unsafe { CFString::wrap_under_create_rule(value_ref as _) };
-    let contents = value.to_string();
-    let trimmed = contents.trim();
-
-    parse_managed_preferences_base64(trimmed).map(Some)
+    let value = unsafe { CFString::wrap_under_create_rule(value_ref as _) }.to_string();
+    Ok(Some(value))
 }

-fn parse_managed_preferences_base64(encoded: &str) -> io::Result<TomlValue> {
-    let decoded = BASE64_STANDARD.decode(encoded.as_bytes()).map_err(|err| {
-        tracing::error!("Failed to decode managed preferences as base64: {err}");
-        io::Error::new(io::ErrorKind::InvalidData, err)
-    })?;
-
-    let decoded_str = String::from_utf8(decoded).map_err(|err| {
-        tracing::error!("Managed preferences base64 contents were not valid UTF-8: {err}");
-        io::Error::new(io::ErrorKind::InvalidData, err)
-    })?;
-
-    match toml::from_str::<TomlValue>(&decoded_str) {
+fn parse_managed_config_base64(encoded: &str) -> io::Result<TomlValue> {
+    match toml::from_str::<TomlValue>(&decode_managed_preferences_base64(encoded)?) {
        Ok(TomlValue::Table(parsed)) => Ok(TomlValue::Table(parsed)),
        Ok(other) => {
-            tracing::error!(
-                "Managed preferences TOML must have a table at the root, found {other:?}",
-            );
+            tracing::error!("Managed config TOML must have a table at the root, found {other:?}",);
            Err(io::Error::new(
                io::ErrorKind::InvalidData,
-                "managed preferences root must be a table",
+                "managed config root must be a table",
            ))
        }
        Err(err) => {
-            tracing::error!("Failed to parse managed preferences TOML: {err}");
+            tracing::error!("Failed to parse managed config TOML: {err}");
            Err(io::Error::new(io::ErrorKind::InvalidData, err))
        }
    }
 }
+
+fn parse_managed_requirements_base64(encoded: &str) -> io::Result<ConfigRequirementsToml> {
+    toml::from_str::<ConfigRequirementsToml>(&decode_managed_preferences_base64(encoded)?).map_err(
+        |err| {
+            tracing::error!("Failed to parse managed requirements TOML: {err}");
+            io::Error::new(io::ErrorKind::InvalidData, err)
+        },
+    )
+}
+
+fn decode_managed_preferences_base64(encoded: &str) -> io::Result<String> {
+    String::from_utf8(BASE64_STANDARD.decode(encoded.as_bytes()).map_err(|err| {
+        tracing::error!("Failed to decode managed value as base64: {err}",);
+        io::Error::new(io::ErrorKind::InvalidData, err)
+    })?)
+    .map_err(|err| {
+        tracing::error!("Managed value base64 contents were not valid UTF-8: {err}",);
+        io::Error::new(io::ErrorKind::InvalidData, err)
+    })
+}
--- a/codex-rs/core/src/config_loader/mod.rs
+++ b/codex-rs/core/src/config_loader/mod.rs
@@ -78,8 +78,14 @@ pub async fn load_config_layers_state(
 ) -> io::Result<ConfigLayerStack> {
    let mut config_requirements_toml = ConfigRequirementsToml::default();

-    // TODO(gt): Support an entry in MDM for config requirements and use it
-    // with `config_requirements_toml.merge_unset_fields(...)`, if present.
+    #[cfg(target_os = "macos")]
+    macos::load_managed_admin_requirements_toml(
+        &mut config_requirements_toml,
+        overrides
+            .macos_managed_config_requirements_base64
+            .as_deref(),
+    )
+    .await?;

    // Honor /etc/codex/requirements.toml.
    if cfg!(unix) {
@@ -101,8 +107,6 @@ pub async fn load_config_layers_state(

    let mut layers = Vec::<ConfigLayerEntry>::new();

-    // TODO(gt): Honor managed preferences (macOS only).
-
    // Include an entry for the "system" config folder, loading its config.toml,
    // if it exists.
    let system_config_toml_file = if cfg!(unix) {
--- a/codex-rs/core/src/config_loader/state.rs
+++ b/codex-rs/core/src/config_loader/state.rs
@@ -12,11 +12,14 @@ use std::collections::HashMap;
 use std::path::PathBuf;
 use toml::Value as TomlValue;

+/// LoaderOverrides overrides managed configuration inputs (primarily for tests).
 #[derive(Debug, Default, Clone)]
 pub struct LoaderOverrides {
    pub managed_config_path: Option<PathBuf>,
+    //TODO(gt): Add a macos_ prefix to this field and remove the target_os check.
    #[cfg(target_os = "macos")]
    pub managed_preferences_base64: Option<String>,
+    pub macos_managed_config_requirements_base64: Option<String>,
 }

 #[derive(Debug, Clone, PartialEq)]
--- a/codex-rs/core/src/config_loader/tests.rs
+++ b/codex-rs/core/src/config_loader/tests.rs
@@ -9,6 +9,8 @@ use crate::config_loader::config_requirements::ConfigRequirementsToml;
 use crate::config_loader::fingerprint::version_for_toml;
 use crate::config_loader::load_requirements_toml;
 use codex_protocol::protocol::AskForApproval;
+#[cfg(target_os = "macos")]
+use codex_protocol::protocol::SandboxPolicy;
 use codex_utils_absolute_path::AbsolutePathBuf;
 use pretty_assertions::assert_eq;
 use tempfile::tempdir;
@@ -43,6 +45,7 @@ extra = true
        managed_config_path: Some(managed_path),
        #[cfg(target_os = "macos")]
        managed_preferences_base64: None,
+        macos_managed_config_requirements_base64: None,
    };

    let cwd = AbsolutePathBuf::try_from(tmp.path()).expect("cwd");
@@ -73,10 +76,12 @@ extra = true
 async fn returns_empty_when_all_layers_missing() {
    let tmp = tempdir().expect("tempdir");
    let managed_path = tmp.path().join("managed_config.toml");
+
    let overrides = LoaderOverrides {
        managed_config_path: Some(managed_path),
        #[cfg(target_os = "macos")]
        managed_preferences_base64: None,
+        macos_managed_config_requirements_base64: None,
    };

    let cwd = AbsolutePathBuf::try_from(tmp.path()).expect("cwd");
@@ -141,12 +146,6 @@ async fn returns_empty_when_all_layers_missing() {
 async fn managed_preferences_take_highest_precedence() {
    use base64::Engine;

-    let managed_payload = r#"
-[nested]
-value = "managed"
-flag = false
-"#;
-    let encoded = base64::prelude::BASE64_STANDARD.encode(managed_payload.as_bytes());
    let tmp = tempdir().expect("tempdir");
    let managed_path = tmp.path().join("managed_config.toml");

@@ -168,7 +167,17 @@ flag = true

    let overrides = LoaderOverrides {
        managed_config_path: Some(managed_path),
-        managed_preferences_base64: Some(encoded),
+        managed_preferences_base64: Some(
+            base64::prelude::BASE64_STANDARD.encode(
+                r#"
+[nested]
+value = "managed"
+flag = false
+"#
+                .as_bytes(),
+            ),
+        ),
+        macos_managed_config_requirements_base64: None,
    };

    let cwd = AbsolutePathBuf::try_from(tmp.path()).expect("cwd");
@@ -192,6 +201,108 @@ flag = true
    assert_eq!(nested.get("flag"), Some(&TomlValue::Boolean(false)));
 }

+#[cfg(target_os = "macos")]
+#[tokio::test]
+async fn managed_preferences_requirements_are_applied() -> anyhow::Result<()> {
+    use base64::Engine;
+
+    let tmp = tempdir()?;
+
+    let state = load_config_layers_state(
+        tmp.path(),
+        Some(AbsolutePathBuf::try_from(tmp.path())?),
+        &[] as &[(String, TomlValue)],
+        LoaderOverrides {
+            managed_config_path: Some(tmp.path().join("managed_config.toml")),
+            managed_preferences_base64: Some(String::new()),
+            macos_managed_config_requirements_base64: Some(
+                base64::prelude::BASE64_STANDARD.encode(
+                    r#"
+allowed_approval_policies = ["never"]
+allowed_sandbox_modes = ["read-only"]
+"#
+                    .as_bytes(),
+                ),
+            ),
+        },
+    )
+    .await?;
+
+    assert_eq!(
+        state.requirements().approval_policy.value(),
+        AskForApproval::Never
+    );
+    assert_eq!(
+        *state.requirements().sandbox_policy.get(),
+        SandboxPolicy::ReadOnly
+    );
+    assert!(
+        state
+            .requirements()
+            .approval_policy
+            .can_set(&AskForApproval::OnRequest)
+            .is_err()
+    );
+    assert!(
+        state
+            .requirements()
+            .sandbox_policy
+            .can_set(&SandboxPolicy::WorkspaceWrite {
+                writable_roots: Vec::new(),
+                network_access: false,
+                exclude_tmpdir_env_var: false,
+                exclude_slash_tmp: false,
+            })
+            .is_err()
+    );
+
+    Ok(())
+}
+
+#[cfg(target_os = "macos")]
+#[tokio::test]
+async fn managed_preferences_requirements_take_precedence() -> anyhow::Result<()> {
+    use base64::Engine;
+
+    let tmp = tempdir()?;
+    let managed_path = tmp.path().join("managed_config.toml");
+
+    tokio::fs::write(&managed_path, "approval_policy = \"on-request\"\n").await?;
+
+    let state = load_config_layers_state(
+        tmp.path(),
+        Some(AbsolutePathBuf::try_from(tmp.path())?),
+        &[] as &[(String, TomlValue)],
+        LoaderOverrides {
+            managed_config_path: Some(managed_path),
+            managed_preferences_base64: Some(String::new()),
+            macos_managed_config_requirements_base64: Some(
+                base64::prelude::BASE64_STANDARD.encode(
+                    r#"
+allowed_approval_policies = ["never"]
+"#
+                    .as_bytes(),
+                ),
+            ),
+        },
+    )
+    .await?;
+
+    assert_eq!(
+        state.requirements().approval_policy.value(),
+        AskForApproval::Never
+    );
+    assert!(
+        state
+            .requirements()
+            .approval_policy
+            .can_set(&AskForApproval::OnRequest)
+            .is_err()
+    );
+
+    Ok(())
+}
+
 #[tokio::test(flavor = "current_thread")]
 async fn load_requirements_toml_produces_expected_constraints() -> anyhow::Result<()> {
    let tmp = tempdir()?;
--- a/codex-rs/core/src/context_manager/history.rs
+++ b/codex-rs/core/src/context_manager/history.rs
@@ -1,5 +1,6 @@
 use crate::codex::TurnContext;
 use crate::context_manager::normalize;
+use crate::truncate::TruncationPolicy;
 use crate::truncate::approx_token_count;
 use crate::truncate::approx_tokens_from_byte_count;
 use crate::truncate::truncate_function_output_items_with_policy;
@@ -8,7 +9,6 @@ use codex_protocol::models::ContentItem;
 use codex_protocol::models::FunctionCallOutputContentItem;
 use codex_protocol::models::FunctionCallOutputPayload;
 use codex_protocol::models::ResponseItem;
-use codex_protocol::openai_models::TruncationPolicy;
 use codex_protocol::protocol::TokenUsage;
 use codex_protocol::protocol::TokenUsageInfo;
 use std::ops::Deref;
@@ -225,7 +225,7 @@ impl ContextManager {
    }

    fn process_item(&self, item: &ResponseItem, policy: TruncationPolicy) -> ResponseItem {
-        let policy_with_serialization_budget = policy * 1.2;
+        let policy_with_serialization_budget = policy.mul(1.2);
        match item {
            ResponseItem::FunctionCallOutput { call_id, output } => {
                let truncated =
--- a/codex-rs/core/src/context_manager/history_tests.rs
+++ b/codex-rs/core/src/context_manager/history_tests.rs
@@ -1,5 +1,6 @@
 use super::*;
 use crate::truncate;
+use crate::truncate::TruncationPolicy;
 use codex_git::GhostCommit;
 use codex_protocol::models::ContentItem;
 use codex_protocol::models::FunctionCallOutputPayload;
@@ -8,7 +9,6 @@ use codex_protocol::models::LocalShellExecAction;
 use codex_protocol::models::LocalShellStatus;
 use codex_protocol::models::ReasoningItemContent;
 use codex_protocol::models::ReasoningItemReasoningSummary;
-use codex_protocol::openai_models::TruncationPolicy;
 use pretty_assertions::assert_eq;
 use regex_lite::Regex;

--- a/codex-rs/core/src/error.rs
+++ b/codex-rs/core/src/error.rs
@@ -1,6 +1,7 @@
 use crate::exec::ExecToolCallOutput;
 use crate::token_data::KnownPlan;
 use crate::token_data::PlanType;
+use crate::truncate::TruncationPolicy;
 use crate::truncate::truncate_text;
 use chrono::DateTime;
 use chrono::Datelike;
@@ -8,7 +9,6 @@ use chrono::Local;
 use chrono::Utc;
 use codex_async_utils::CancelErr;
 use codex_protocol::ConversationId;
-use codex_protocol::openai_models::TruncationPolicy;
 use codex_protocol::protocol::CodexErrorInfo;
 use codex_protocol::protocol::ErrorEvent;
 use codex_protocol::protocol::RateLimitSnapshot;
--- a/codex-rs/core/src/features.rs
+++ b/codex-rs/core/src/features.rs
@@ -255,6 +255,10 @@ impl Features {

        features
    }
+
+    pub fn enabled_features(&self) -> Vec<Feature> {
+        self.enabled.iter().copied().collect()
+    }
 }

 /// Keys accepted in `[features]` tables.
--- a/codex-rs/core/src/mcp_connection_manager.rs
+++ b/codex-rs/core/src/mcp_connection_manager.rs
@@ -79,26 +79,60 @@ pub const DEFAULT_STARTUP_TIMEOUT: Duration = Duration::from_secs(10);
 /// Default timeout for individual tool calls.
 const DEFAULT_TOOL_TIMEOUT: Duration = Duration::from_secs(60);

+/// The Responses API requires tool names to match `^[a-zA-Z0-9_-]+$`.
+/// MCP server/tool names are user-controlled, so sanitize the fully-qualified
+/// name we expose to the model by replacing any disallowed character with `_`.
+fn sanitize_responses_api_tool_name(name: &str) -> String {
+    let mut sanitized = String::with_capacity(name.len());
+    for c in name.chars() {
+        if c.is_ascii_alphanumeric() || c == '_' || c == '-' {
+            sanitized.push(c);
+        } else {
+            sanitized.push('_');
+        }
+    }
+
+    if sanitized.is_empty() {
+        "_".to_string()
+    } else {
+        sanitized
+    }
+}
+
+fn sha1_hex(s: &str) -> String {
+    let mut hasher = Sha1::new();
+    hasher.update(s.as_bytes());
+    let sha1 = hasher.finalize();
+    format!("{sha1:x}")
+}
+
 fn qualify_tools<I>(tools: I) -> HashMap<String, ToolInfo>
 where
    I: IntoIterator<Item = ToolInfo>,
 {
    let mut used_names = HashSet::new();
+    let mut seen_raw_names = HashSet::new();
    let mut qualified_tools = HashMap::new();
    for tool in tools {
-        let mut qualified_name = format!(
+        let qualified_name_raw = format!(
            "mcp{}{}{}{}",
            MCP_TOOL_NAME_DELIMITER, tool.server_name, MCP_TOOL_NAME_DELIMITER, tool.tool_name
        );
+        if !seen_raw_names.insert(qualified_name_raw.clone()) {
+            warn!("skipping duplicated tool {}", qualified_name_raw);
+            continue;
+        }
+
+        // Start from a "pretty" name (sanitized), then deterministically disambiguate on
+        // collisions by appending a hash of the *raw* (unsanitized) qualified name. This
+        // ensures tools like `foo.bar` and `foo_bar` don't collapse to the same key.
+        let mut qualified_name = sanitize_responses_api_tool_name(&qualified_name_raw);
+
+        // Enforce length constraints early; use the raw name for the hash input so the
+        // output remains stable even when sanitization changes.
        if qualified_name.len() > MAX_TOOL_NAME_LENGTH {
-            let mut hasher = Sha1::new();
-            hasher.update(qualified_name.as_bytes());
-            let sha1 = hasher.finalize();
-            let sha1_str = format!("{sha1:x}");
-
-            // Truncate to make room for the hash suffix
+            let sha1_str = sha1_hex(&qualified_name_raw);
            let prefix_len = MAX_TOOL_NAME_LENGTH - sha1_str.len();
-
            qualified_name = format!("{}{}", &qualified_name[..prefix_len], sha1_str);
        }

@@ -1035,6 +1069,28 @@ mod tests {
        );
    }

+    #[test]
+    fn test_qualify_tools_sanitizes_invalid_characters() {
+        let tools = vec![create_test_tool("server.one", "tool.two")];
+
+        let qualified_tools = qualify_tools(tools);
+
+        assert_eq!(qualified_tools.len(), 1);
+        let (qualified_name, tool) = qualified_tools.into_iter().next().expect("one tool");
+        assert_eq!(qualified_name, "mcp__server_one__tool_two");
+
+        // The key is sanitized for OpenAI, but we keep original parts for the actual MCP call.
+        assert_eq!(tool.server_name, "server.one");
+        assert_eq!(tool.tool_name, "tool.two");
+
+        assert!(
+            qualified_name
+                .chars()
+                .all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '-'),
+            "qualified name must be Responses API compatible: {qualified_name:?}"
+        );
+    }
+
    #[test]
    fn tool_filter_allows_by_default() {
        let filter = ToolFilter::default();
--- a/codex-rs/core/src/models_manager/manager.rs
+++ b/codex-rs/core/src/models_manager/manager.rs
@@ -10,7 +10,6 @@ use std::collections::HashSet;
 use std::path::PathBuf;
 use std::sync::Arc;
 use std::time::Duration;
-use tokio::sync::Mutex;
 use tokio::sync::RwLock;
 use tokio::sync::TryLockError;
 use tracing::error;
@@ -25,12 +24,11 @@ use crate::default_client::build_reqwest_client;
 use crate::error::Result as CoreResult;
 use crate::features::Feature;
 use crate::model_provider_info::ModelProviderInfo;
+use crate::models_manager::model_family::ModelFamily;
 use crate::models_manager::model_presets::builtin_model_presets;
-use codex_protocol::openai_models::ModelFamily;

 const MODEL_CACHE_FILE: &str = "models_cache.json";
 const DEFAULT_MODEL_CACHE_TTL: Duration = Duration::from_secs(300);
-const MODELS_REFRESH_TIMEOUT: Duration = Duration::from_secs(5);
 const OPENAI_DEFAULT_API_MODEL: &str = "gpt-5.1-codex-max";
 const OPENAI_DEFAULT_CHATGPT_MODEL: &str = "gpt-5.2-codex";
 const CODEX_AUTO_BALANCED_MODEL: &str = "codex-auto-balanced";
@@ -41,7 +39,6 @@ pub struct ModelsManager {
    // todo(aibrahim) merge available_models and model family creation into one struct
    local_models: Vec<ModelPreset>,
    remote_models: RwLock<Vec<ModelInfo>>,
-    refresh_lock: Mutex<()>,
    auth_manager: Arc<AuthManager>,
    etag: RwLock<Option<String>>,
    codex_home: PathBuf,
@@ -56,7 +53,6 @@ impl ModelsManager {
        Self {
            local_models: builtin_model_presets(auth_manager.get_auth_mode()),
            remote_models: RwLock::new(Self::load_remote_models_from_file().unwrap_or_default()),
-            refresh_lock: Mutex::new(()),
            auth_manager,
            etag: RwLock::new(None),
            codex_home,
@@ -72,7 +68,6 @@ impl ModelsManager {
        Self {
            local_models: builtin_model_presets(auth_manager.get_auth_mode()),
            remote_models: RwLock::new(Self::load_remote_models_from_file().unwrap_or_default()),
-            refresh_lock: Mutex::new(()),
            auth_manager,
            etag: RwLock::new(None),
            codex_home,
@@ -82,21 +77,26 @@ impl ModelsManager {
    }

    /// Fetch the latest remote models, using the on-disk cache when still fresh.
-    pub async fn refresh_available_models(&self, config: &Config) -> CoreResult<()> {
+    pub async fn refresh_available_models_with_cache(&self, config: &Config) -> CoreResult<()> {
        if !config.features.enabled(Feature::RemoteModels)
            || self.auth_manager.get_auth_mode() == Some(AuthMode::ApiKey)
        {
            return Ok(());
        }
-
-        // Prevent duplicate `/models` refreshes when multiple callers try to refresh
-        // concurrently during startup (or when multiple features request models).
-        let _guard = self.refresh_lock.lock().await;
-
        if self.try_load_cache().await {
            return Ok(());
        }
+        self.refresh_available_models_no_cache(config.features.enabled(Feature::RemoteModels))
+            .await
+    }

+    pub(crate) async fn refresh_available_models_no_cache(
+        &self,
+        remote_models_feature: bool,
+    ) -> CoreResult<()> {
+        if !remote_models_feature || self.auth_manager.get_auth_mode() == Some(AuthMode::ApiKey) {
+            return Ok(());
+        }
        let auth = self.auth_manager.auth();
        let api_provider = self.provider.to_api_provider(Some(AuthMode::ChatGPT))?;
        let api_auth = auth_provider_from_auth(auth.clone(), &self.provider).await?;
@@ -104,25 +104,10 @@ impl ModelsManager {
        let client = ModelsClient::new(transport, api_provider, api_auth);

        let client_version = format_client_version_to_whole();
-        let response = tokio::time::timeout(
-            MODELS_REFRESH_TIMEOUT,
-            client.list_models(&client_version, HeaderMap::new()),
-        )
-        .await;
-
-        let ModelsResponse { models, etag } = match response {
-            Ok(response) => response.map_err(map_api_error)?,
-            Err(_) => {
-                error!(
-                    "timed out refreshing /models after {}s",
-                    MODELS_REFRESH_TIMEOUT.as_secs()
-                );
-                // Leave `remote_models` unchanged so the preloaded fallback remains available.
-                return Ok(());
-            }
-        };
-
-        let etag = (!etag.is_empty()).then_some(etag);
+        let (models, etag) = client
+            .list_models(&client_version, HeaderMap::new())
+            .await
+            .map_err(map_api_error)?;

        self.apply_remote_models(models.clone()).await;
        *self.etag.write().await = etag.clone();
@@ -131,7 +116,7 @@ impl ModelsManager {
    }

    pub async fn list_models(&self, config: &Config) -> Vec<ModelPreset> {
-        if let Err(err) = self.refresh_available_models(config).await {
+        if let Err(err) = self.refresh_available_models_with_cache(config).await {
            error!("failed to refresh available models: {err}");
        }
        let remote_models = self.remote_models(config).await;
@@ -149,18 +134,16 @@ impl ModelsManager {

    /// Look up the requested model family while applying remote metadata overrides.
    pub async fn construct_model_family(&self, model: &str, config: &Config) -> ModelFamily {
-        crate::models_manager::model_family::with_config_overrides(
-            Self::find_family_for_model(model)
-                .with_remote_overrides(self.remote_models(config).await),
-            config,
-        )
+        Self::find_family_for_model(model)
+            .with_remote_overrides(self.remote_models(config).await)
+            .with_config_overrides(config)
    }

    pub async fn get_model(&self, model: &Option<String>, config: &Config) -> String {
        if let Some(model) = model.as_ref() {
            return model.to_string();
        }
-        if let Err(err) = self.refresh_available_models(config).await {
+        if let Err(err) = self.refresh_available_models_with_cache(config).await {
            error!("failed to refresh available models: {err}");
        }
        // if codex-auto-balanced exists & signed in with chatgpt mode, return it, otherwise return the default model
@@ -178,6 +161,18 @@ impl ModelsManager {
        }
        OPENAI_DEFAULT_API_MODEL.to_string()
    }
+    pub async fn refresh_if_new_etag(&self, etag: String, remote_models_feature: bool) {
+        let current_etag = self.get_etag().await;
+        if current_etag.clone().is_some() && current_etag.as_deref() == Some(etag.as_str()) {
+            return;
+        }
+        if let Err(err) = self
+            .refresh_available_models_no_cache(remote_models_feature)
+            .await
+        {
+            error!("failed to refresh available models: {err}");
+        }
+    }

    #[cfg(any(test, feature = "test-support"))]
    pub fn get_model_offline(model: Option<&str>) -> String {
@@ -187,10 +182,11 @@ impl ModelsManager {
    #[cfg(any(test, feature = "test-support"))]
    /// Offline helper that builds a `ModelFamily` without consulting remote state.
    pub fn construct_model_family_offline(model: &str, config: &Config) -> ModelFamily {
-        crate::models_manager::model_family::with_config_overrides(
-            Self::find_family_for_model(model),
-            config,
-        )
+        Self::find_family_for_model(model).with_config_overrides(config)
+    }
+
+    async fn get_etag(&self) -> Option<String> {
+        self.etag.read().await.clone()
    }

    /// Replace the cached remote models and rebuild the derived presets list.
@@ -316,26 +312,14 @@ impl ModelsManager {

 /// Convert a client version string to a whole version string (e.g. "1.2.3-alpha.4" -> "1.2.3")
 fn format_client_version_to_whole() -> String {
-    format_client_version_from_parts(
+    format!(
+        "{}.{}.{}",
        env!("CARGO_PKG_VERSION_MAJOR"),
        env!("CARGO_PKG_VERSION_MINOR"),
-        env!("CARGO_PKG_VERSION_PATCH"),
+        env!("CARGO_PKG_VERSION_PATCH")
    )
 }

-fn format_client_version_from_parts(major: &str, minor: &str, patch: &str) -> String {
-    const DEV_VERSION: &str = "0.0.0";
-    const FALLBACK_VERSION: &str = "99.99.99";
-
-    let normalized = format!("{major}.{minor}.{patch}");
-
-    if normalized == DEV_VERSION {
-        FALLBACK_VERSION.to_string()
-    } else {
-        normalized
-    }
-}
-
 #[cfg(test)]
 mod tests {
    use super::cache::ModelsCache;
@@ -382,7 +366,6 @@ mod tests {
            "truncation_policy": {"mode": "bytes", "limit": 10_000},
            "supports_parallel_tool_calls": false,
            "context_window": null,
-            "reasoning_summary_format": "none",
            "experimental_supported_tools": [],
        }))
        .expect("valid model")
@@ -417,7 +400,6 @@ mod tests {
            &server,
            ModelsResponse {
                models: remote_models.clone(),
-                etag: String::new(),
            },
        )
        .await;
@@ -435,7 +417,7 @@ mod tests {
        let manager = ModelsManager::with_provider(auth_manager, provider);

        manager
-            .refresh_available_models(&config)
+            .refresh_available_models_with_cache(&config)
            .await
            .expect("refresh succeeds");
        let cached_remote = manager.remote_models(&config).await;
@@ -474,7 +456,6 @@ mod tests {
            &server,
            ModelsResponse {
                models: remote_models.clone(),
-                etag: String::new(),
            },
        )
        .await;
@@ -495,7 +476,7 @@ mod tests {
        let manager = ModelsManager::with_provider(auth_manager, provider);

        manager
-            .refresh_available_models(&config)
+            .refresh_available_models_with_cache(&config)
            .await
            .expect("first refresh succeeds");
        assert_eq!(
@@ -506,7 +487,7 @@ mod tests {

        // Second call should read from cache and avoid the network.
        manager
-            .refresh_available_models(&config)
+            .refresh_available_models_with_cache(&config)
            .await
            .expect("cached refresh succeeds");
        assert_eq!(
@@ -529,7 +510,6 @@ mod tests {
            &server,
            ModelsResponse {
                models: initial_models.clone(),
-                etag: String::new(),
            },
        )
        .await;
@@ -550,7 +530,7 @@ mod tests {
        let manager = ModelsManager::with_provider(auth_manager, provider);

        manager
-            .refresh_available_models(&config)
+            .refresh_available_models_with_cache(&config)
            .await
            .expect("initial refresh succeeds");

@@ -570,13 +550,12 @@ mod tests {
            &server,
            ModelsResponse {
                models: updated_models.clone(),
-                etag: String::new(),
            },
        )
        .await;

        manager
-            .refresh_available_models(&config)
+            .refresh_available_models_with_cache(&config)
            .await
            .expect("second refresh succeeds");
        assert_eq!(
@@ -604,7 +583,6 @@ mod tests {
            &server,
            ModelsResponse {
                models: initial_models,
-                etag: String::new(),
            },
        )
        .await;
@@ -623,7 +601,7 @@ mod tests {
        manager.cache_ttl = Duration::ZERO;

        manager
-            .refresh_available_models(&config)
+            .refresh_available_models_with_cache(&config)
            .await
            .expect("initial refresh succeeds");

@@ -633,13 +611,12 @@ mod tests {
            &server,
            ModelsResponse {
                models: refreshed_models,
-                etag: String::new(),
            },
        )
        .await;

        manager
-            .refresh_available_models(&config)
+            .refresh_available_models_with_cache(&config)
            .await
            .expect("second refresh succeeds");

--- a/codex-rs/core/src/models_manager/model_family.rs
+++ b/codex-rs/core/src/models_manager/model_family.rs
@@ -1,12 +1,11 @@
 use codex_protocol::config_types::Verbosity;
 use codex_protocol::openai_models::ApplyPatchToolType;
 use codex_protocol::openai_models::ConfigShellToolType;
-use codex_protocol::openai_models::ModelFamily;
+use codex_protocol::openai_models::ModelInfo;
 use codex_protocol::openai_models::ReasoningEffort;
-use codex_protocol::openai_models::ReasoningSummaryFormat;
-use codex_protocol::openai_models::TruncationPolicy;

 use crate::config::Config;
+use crate::truncate::TruncationPolicy;

 /// The `instructions` field in the payload sent to a model should always start
 /// with this content.
@@ -19,20 +18,139 @@ const GPT_5_1_CODEX_MAX_INSTRUCTIONS: &str = include_str!("../../gpt-5.1-codex-m
 const GPT_5_2_CODEX_INSTRUCTIONS: &str = include_str!("../../gpt-5.2-codex_prompt.md");
 pub(crate) const CONTEXT_WINDOW_272K: i64 = 272_000;

-pub fn with_config_overrides(mut mf: ModelFamily, config: &Config) -> ModelFamily {
-    if let Some(supports_reasoning_summaries) = config.model_supports_reasoning_summaries {
-        mf.supports_reasoning_summaries = supports_reasoning_summaries;
+/// A model family is a group of models that share certain characteristics.
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub struct ModelFamily {
+    /// The full model slug used to derive this model family, e.g.
+    /// "gpt-4.1-2025-04-14".
+    pub slug: String,
+
+    /// The model family name, e.g. "gpt-4.1". This string is used when deriving
+    /// default metadata for the family, such as context windows.
+    pub family: String,
+
+    /// True if the model needs additional instructions on how to use the
+    /// "virtual" `apply_patch` CLI.
+    pub needs_special_apply_patch_instructions: bool,
+
+    /// Maximum supported context window, if known.
+    pub context_window: Option<i64>,
+
+    /// Token threshold for automatic compaction if config does not override it.
+    auto_compact_token_limit: Option<i64>,
+
+    // Whether the `reasoning` field can be set when making a request to this
+    // model family. Note it has `effort` and `summary` subfields (though
+    // `summary` is optional).
+    pub supports_reasoning_summaries: bool,
+
+    // The reasoning effort to use for this model family when none is explicitly chosen.
+    pub default_reasoning_effort: Option<ReasoningEffort>,
+
+    /// Whether this model supports parallel tool calls when using the
+    /// Responses API.
+    pub supports_parallel_tool_calls: bool,
+
+    /// Present if the model performs better when `apply_patch` is provided as
+    /// a tool call instead of just a bash command
+    pub apply_patch_tool_type: Option<ApplyPatchToolType>,
+
+    // Instructions to use for querying the model
+    pub base_instructions: String,
+
+    /// Names of beta tools that should be exposed to this model family.
+    pub experimental_supported_tools: Vec<String>,
+
+    /// Percentage of the context window considered usable for inputs, after
+    /// reserving headroom for system prompts, tool overhead, and model output.
+    /// This is applied when computing the effective context window seen by
+    /// consumers.
+    pub effective_context_window_percent: i64,
+
+    /// If the model family supports setting the verbosity level when using Responses API.
+    pub support_verbosity: bool,
+
+    // The default verbosity level for this model family when using Responses API.
+    pub default_verbosity: Option<Verbosity>,
+
+    /// Preferred shell tool type for this model family when features do not override it.
+    pub shell_type: ConfigShellToolType,
+
+    pub truncation_policy: TruncationPolicy,
+}
+
+impl ModelFamily {
+    pub(super) fn with_config_overrides(mut self, config: &Config) -> Self {
+        if let Some(supports_reasoning_summaries) = config.model_supports_reasoning_summaries {
+            self.supports_reasoning_summaries = supports_reasoning_summaries;
+        }
+        if let Some(context_window) = config.model_context_window {
+            self.context_window = Some(context_window);
+        }
+        if let Some(auto_compact_token_limit) = config.model_auto_compact_token_limit {
+            self.auto_compact_token_limit = Some(auto_compact_token_limit);
+        }
+        self
    }
-    if let Some(reasoning_summary_format) = config.model_reasoning_summary_format.as_ref() {
-        mf.reasoning_summary_format = reasoning_summary_format.clone();
+    pub(super) fn with_remote_overrides(mut self, remote_models: Vec<ModelInfo>) -> Self {
+        for model in remote_models {
+            if model.slug == self.slug {
+                self.apply_remote_overrides(model);
+            }
+        }
+        self
    }
-    if let Some(context_window) = config.model_context_window {
-        mf.context_window = Some(context_window);
+
+    fn apply_remote_overrides(&mut self, model: ModelInfo) {
+        let ModelInfo {
+            slug: _,
+            display_name: _,
+            description: _,
+            default_reasoning_level,
+            supported_reasoning_levels: _,
+            shell_type,
+            visibility: _,
+            supported_in_api: _,
+            priority: _,
+            upgrade: _,
+            base_instructions,
+            supports_reasoning_summaries,
+            support_verbosity,
+            default_verbosity,
+            apply_patch_tool_type,
+            truncation_policy,
+            supports_parallel_tool_calls,
+            context_window,
+            experimental_supported_tools,
+        } = model;
+
+        self.default_reasoning_effort = Some(default_reasoning_level);
+        self.shell_type = shell_type;
+        if let Some(base) = base_instructions {
+            self.base_instructions = base;
+        }
+        self.supports_reasoning_summaries = supports_reasoning_summaries;
+        self.support_verbosity = support_verbosity;
+        self.default_verbosity = default_verbosity;
+        self.apply_patch_tool_type = apply_patch_tool_type;
+        self.truncation_policy = truncation_policy.into();
+        self.supports_parallel_tool_calls = supports_parallel_tool_calls;
+        self.context_window = context_window;
+        self.experimental_supported_tools = experimental_supported_tools;
    }
-    if let Some(auto_compact_token_limit) = config.model_auto_compact_token_limit {
-        mf.auto_compact_token_limit = Some(auto_compact_token_limit);
+
+    pub fn auto_compact_token_limit(&self) -> Option<i64> {
+        self.auto_compact_token_limit
+            .or(self.context_window.map(Self::default_auto_compact_limit))
+    }
+
+    const fn default_auto_compact_limit(context_window: i64) -> i64 {
+        (context_window * 9) / 10
+    }
+
+    pub fn get_model_slug(&self) -> &str {
+        &self.slug
    }
-    mf
 }

 macro_rules! model_family {
@@ -48,7 +166,6 @@ macro_rules! model_family {
            context_window: Some(CONTEXT_WINDOW_272K),
            auto_compact_token_limit: None,
            supports_reasoning_summaries: false,
-            reasoning_summary_format: ReasoningSummaryFormat::None,
            supports_parallel_tool_calls: false,
            apply_patch_tool_type: None,
            base_instructions: BASE_INSTRUCTIONS.to_string(),
@@ -123,7 +240,6 @@ pub(super) fn find_family_for_model(slug: &str) -> ModelFamily {
        model_family!(
            slug, slug,
            supports_reasoning_summaries: true,
-            reasoning_summary_format: ReasoningSummaryFormat::Experimental,
            base_instructions: GPT_5_CODEX_INSTRUCTIONS.to_string(),
            experimental_supported_tools: vec![
                "grep_files".to_string(),
@@ -143,7 +259,6 @@ pub(super) fn find_family_for_model(slug: &str) -> ModelFamily {
        model_family!(
            slug, slug,
            supports_reasoning_summaries: true,
-            reasoning_summary_format: ReasoningSummaryFormat::Experimental,
            base_instructions: GPT_5_2_CODEX_INSTRUCTIONS.to_string(),
            apply_patch_tool_type: Some(ApplyPatchToolType::Freeform),
            shell_type: ConfigShellToolType::ShellCommand,
@@ -172,7 +287,6 @@ pub(super) fn find_family_for_model(slug: &str) -> ModelFamily {
        model_family!(
            slug, slug,
            supports_reasoning_summaries: true,
-            reasoning_summary_format: ReasoningSummaryFormat::Experimental,
            base_instructions: GPT_5_2_CODEX_INSTRUCTIONS.to_string(),
            apply_patch_tool_type: Some(ApplyPatchToolType::Freeform),
            shell_type: ConfigShellToolType::ShellCommand,
@@ -185,7 +299,6 @@ pub(super) fn find_family_for_model(slug: &str) -> ModelFamily {
        model_family!(
            slug, slug,
            supports_reasoning_summaries: true,
-            reasoning_summary_format: ReasoningSummaryFormat::Experimental,
            base_instructions: GPT_5_2_CODEX_INSTRUCTIONS.to_string(),
            apply_patch_tool_type: Some(ApplyPatchToolType::Freeform),
            shell_type: ConfigShellToolType::ShellCommand,
@@ -198,7 +311,6 @@ pub(super) fn find_family_for_model(slug: &str) -> ModelFamily {
        model_family!(
            slug, slug,
            supports_reasoning_summaries: true,
-            reasoning_summary_format: ReasoningSummaryFormat::Experimental,
            base_instructions: GPT_5_1_CODEX_MAX_INSTRUCTIONS.to_string(),
            apply_patch_tool_type: Some(ApplyPatchToolType::Freeform),
            shell_type: ConfigShellToolType::ShellCommand,
@@ -214,7 +326,6 @@ pub(super) fn find_family_for_model(slug: &str) -> ModelFamily {
        model_family!(
            slug, slug,
            supports_reasoning_summaries: true,
-            reasoning_summary_format: ReasoningSummaryFormat::Experimental,
            base_instructions: GPT_5_CODEX_INSTRUCTIONS.to_string(),
            apply_patch_tool_type: Some(ApplyPatchToolType::Freeform),
            shell_type: ConfigShellToolType::ShellCommand,
@@ -289,7 +400,6 @@ fn derive_default_model_family(model: &str) -> ModelFamily {
        context_window: None,
        auto_compact_token_limit: None,
        supports_reasoning_summaries: false,
-        reasoning_summary_format: ReasoningSummaryFormat::None,
        supports_parallel_tool_calls: false,
        apply_patch_tool_type: None,
        base_instructions: BASE_INSTRUCTIONS.to_string(),
@@ -306,7 +416,6 @@ fn derive_default_model_family(model: &str) -> ModelFamily {
 #[cfg(test)]
 mod tests {
    use super::*;
-    use codex_protocol::openai_models::ModelInfo;
    use codex_protocol::openai_models::ModelVisibility;
    use codex_protocol::openai_models::ReasoningEffortPreset;
    use codex_protocol::openai_models::TruncationPolicyConfig;
@@ -334,7 +443,6 @@ mod tests {
            truncation_policy: TruncationPolicyConfig::bytes(10_000),
            supports_parallel_tool_calls: false,
            context_window: None,
-            reasoning_summary_format: ReasoningSummaryFormat::None,
            experimental_supported_tools: Vec::new(),
        }
    }
@@ -398,7 +506,6 @@ mod tests {
            experimental_supported_tools: vec!["local".to_string()],
            truncation_policy: TruncationPolicy::Bytes(10_000),
            context_window: Some(100),
-            reasoning_summary_format: ReasoningSummaryFormat::None,
        );

        let updated = family.with_remote_overrides(vec![ModelInfo {
@@ -423,7 +530,6 @@ mod tests {
            truncation_policy: TruncationPolicyConfig::tokens(2_000),
            supports_parallel_tool_calls: true,
            context_window: Some(400_000),
-            reasoning_summary_format: ReasoningSummaryFormat::Experimental,
            experimental_supported_tools: vec!["alpha".to_string(), "beta".to_string()],
        }]);

@@ -442,10 +548,6 @@ mod tests {
        assert_eq!(updated.truncation_policy, TruncationPolicy::Tokens(2_000));
        assert!(updated.supports_parallel_tool_calls);
        assert_eq!(updated.context_window, Some(400_000));
-        assert_eq!(
-            updated.reasoning_summary_format,
-            ReasoningSummaryFormat::Experimental
-        );
        assert_eq!(
            updated.experimental_supported_tools,
            vec!["alpha".to_string(), "beta".to_string()]
--- a/codex-rs/core/src/state/session.rs
+++ b/codex-rs/core/src/state/session.rs
@@ -7,7 +7,7 @@ use crate::context_manager::ContextManager;
 use crate::protocol::RateLimitSnapshot;
 use crate::protocol::TokenUsage;
 use crate::protocol::TokenUsageInfo;
-use codex_protocol::openai_models::TruncationPolicy;
+use crate::truncate::TruncationPolicy;

 /// Persistent, session-scoped state previously stored directly on `Session`.
 pub(crate) struct SessionState {
--- a/codex-rs/core/src/tools/mod.rs
+++ b/codex-rs/core/src/tools/mod.rs
@@ -10,9 +10,9 @@ pub mod sandboxing;
 pub mod spec;

 use crate::exec::ExecToolCallOutput;
+use crate::truncate::TruncationPolicy;
 use crate::truncate::formatted_truncate_text;
 use crate::truncate::truncate_text;
-use codex_protocol::openai_models::TruncationPolicy;
 pub use router::ToolRouter;
 use serde::Serialize;

--- a/codex-rs/core/src/tools/spec.rs
+++ b/codex-rs/core/src/tools/spec.rs
@@ -2,13 +2,13 @@ use crate::client_common::tools::ResponsesApiTool;
 use crate::client_common::tools::ToolSpec;
 use crate::features::Feature;
 use crate::features::Features;
+use crate::models_manager::model_family::ModelFamily;
 use crate::tools::handlers::PLAN_TOOL;
 use crate::tools::handlers::apply_patch::create_apply_patch_freeform_tool;
 use crate::tools::handlers::apply_patch::create_apply_patch_json_tool;
 use crate::tools::registry::ToolRegistryBuilder;
 use codex_protocol::openai_models::ApplyPatchToolType;
 use codex_protocol::openai_models::ConfigShellToolType;
-use codex_protocol::openai_models::ModelFamily;
 use serde::Deserialize;
 use serde::Serialize;
 use serde_json::Value as JsonValue;
--- a/codex-rs/core/src/truncate.rs
+++ b/codex-rs/core/src/truncate.rs
@@ -4,63 +4,99 @@

 use crate::config::Config;
 use codex_protocol::models::FunctionCallOutputContentItem;
-use codex_protocol::openai_models::TruncationPolicy;
+use codex_protocol::openai_models::TruncationMode;
+use codex_protocol::openai_models::TruncationPolicyConfig;
+use codex_protocol::protocol::TruncationPolicy as ProtocolTruncationPolicy;

 const APPROX_BYTES_PER_TOKEN: usize = 4;

-/// Create a new `TruncationPolicy` with config overrides applied.
-pub fn new_truncation_policy(
-    config: &Config,
-    truncation_policy: TruncationPolicy,
-) -> TruncationPolicy {
-    let config_token_limit = config.tool_output_token_limit;
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+pub enum TruncationPolicy {
+    Bytes(usize),
+    Tokens(usize),
+}

-    match truncation_policy {
-        TruncationPolicy::Bytes(family_bytes) => {
-            if let Some(token_limit) = config_token_limit {
-                TruncationPolicy::Bytes(approx_bytes_for_tokens(token_limit))
-            } else {
-                TruncationPolicy::Bytes(family_bytes)
-            }
-        }
-        TruncationPolicy::Tokens(family_tokens) => {
-            if let Some(token_limit) = config_token_limit {
-                TruncationPolicy::Tokens(token_limit)
-            } else {
-                TruncationPolicy::Tokens(family_tokens)
-            }
+impl From<TruncationPolicy> for ProtocolTruncationPolicy {
+    fn from(value: TruncationPolicy) -> Self {
+        match value {
+            TruncationPolicy::Bytes(bytes) => Self::Bytes(bytes),
+            TruncationPolicy::Tokens(tokens) => Self::Tokens(tokens),
        }
    }
 }

-/// Returns a token budget derived from this policy.
-///
-/// - For `Tokens`, this is the explicit token limit.
-/// - For `Bytes`, this is an approximate token budget using the global
-///   bytes-per-token heuristic.
-pub fn token_budget(policy: &TruncationPolicy) -> usize {
-    match policy {
-        TruncationPolicy::Bytes(bytes) => {
-            usize::try_from(approx_tokens_from_byte_count(*bytes)).unwrap_or(usize::MAX)
+impl From<TruncationPolicyConfig> for TruncationPolicy {
+    fn from(config: TruncationPolicyConfig) -> Self {
+        match config.mode {
+            TruncationMode::Bytes => Self::Bytes(config.limit as usize),
+            TruncationMode::Tokens => Self::Tokens(config.limit as usize),
        }
-        TruncationPolicy::Tokens(tokens) => *tokens,
    }
 }

-/// Returns a byte budget derived from this policy.
-///
-/// - For `Bytes`, this is the explicit byte limit.
-/// - For `Tokens`, this is an approximate byte budget using the global
-///   bytes-per-token heuristic.
-pub fn byte_budget(policy: &TruncationPolicy) -> usize {
-    match policy {
-        TruncationPolicy::Bytes(bytes) => *bytes,
-        TruncationPolicy::Tokens(tokens) => approx_bytes_for_tokens(*tokens),
+impl TruncationPolicy {
+    /// Scale the underlying budget by `multiplier`, rounding up to avoid under-budgeting.
+    pub fn mul(self, multiplier: f64) -> Self {
+        match self {
+            TruncationPolicy::Bytes(bytes) => {
+                TruncationPolicy::Bytes((bytes as f64 * multiplier).ceil() as usize)
+            }
+            TruncationPolicy::Tokens(tokens) => {
+                TruncationPolicy::Tokens((tokens as f64 * multiplier).ceil() as usize)
+            }
+        }
+    }
+
+    pub fn new(config: &Config, truncation_policy: TruncationPolicy) -> Self {
+        let config_token_limit = config.tool_output_token_limit;
+
+        match truncation_policy {
+            TruncationPolicy::Bytes(family_bytes) => {
+                if let Some(token_limit) = config_token_limit {
+                    Self::Bytes(approx_bytes_for_tokens(token_limit))
+                } else {
+                    Self::Bytes(family_bytes)
+                }
+            }
+            TruncationPolicy::Tokens(family_tokens) => {
+                if let Some(token_limit) = config_token_limit {
+                    Self::Tokens(token_limit)
+                } else {
+                    Self::Tokens(family_tokens)
+                }
+            }
+        }
+    }
+
+    /// Returns a token budget derived from this policy.
+    ///
+    /// - For `Tokens`, this is the explicit token limit.
+    /// - For `Bytes`, this is an approximate token budget using the global
+    ///   bytes-per-token heuristic.
+    pub fn token_budget(&self) -> usize {
+        match self {
+            TruncationPolicy::Bytes(bytes) => {
+                usize::try_from(approx_tokens_from_byte_count(*bytes)).unwrap_or(usize::MAX)
+            }
+            TruncationPolicy::Tokens(tokens) => *tokens,
+        }
+    }
+
+    /// Returns a byte budget derived from this policy.
+    ///
+    /// - For `Bytes`, this is the explicit byte limit.
+    /// - For `Tokens`, this is an approximate byte budget using the global
+    ///   bytes-per-token heuristic.
+    pub fn byte_budget(&self) -> usize {
+        match self {
+            TruncationPolicy::Bytes(bytes) => *bytes,
+            TruncationPolicy::Tokens(tokens) => approx_bytes_for_tokens(*tokens),
+        }
    }
 }

 pub(crate) fn formatted_truncate_text(content: &str, policy: TruncationPolicy) -> String {
-    if content.len() <= byte_budget(&policy) {
+    if content.len() <= policy.byte_budget() {
        return content.to_string();
    }
    let total_lines = content.lines().count();
@@ -86,8 +122,8 @@ pub(crate) fn truncate_function_output_items_with_policy(
 ) -> Vec<FunctionCallOutputContentItem> {
    let mut out: Vec<FunctionCallOutputContentItem> = Vec::with_capacity(items.len());
    let mut remaining_budget = match policy {
-        TruncationPolicy::Bytes(_) => byte_budget(&policy),
-        TruncationPolicy::Tokens(_) => token_budget(&policy),
+        TruncationPolicy::Bytes(_) => policy.byte_budget(),
+        TruncationPolicy::Tokens(_) => policy.token_budget(),
    };
    let mut omitted_text_items = 0usize;

@@ -146,7 +182,7 @@ fn truncate_with_token_budget(s: &str, policy: TruncationPolicy) -> (String, Opt
    if s.is_empty() {
        return (String::new(), None);
    }
-    let max_tokens = token_budget(&policy);
+    let max_tokens = policy.token_budget();

    let byte_len = s.len();
    if max_tokens > 0 && byte_len <= approx_bytes_for_tokens(max_tokens) {
@@ -172,7 +208,7 @@ fn truncate_with_byte_estimate(s: &str, policy: TruncationPolicy) -> String {
    }

    let total_chars = s.chars().count();
-    let max_bytes = byte_budget(&policy);
+    let max_bytes = policy.byte_budget();

    if max_bytes == 0 {
        // No budget to show content; just report that everything was truncated.
--- a/codex-rs/core/src/unified_exec/async_watcher.rs
+++ b/codex-rs/core/src/unified_exec/async_watcher.rs
@@ -10,6 +10,7 @@ use tokio::time::Sleep;
 use crate::codex::Session;
 use crate::codex::TurnContext;
 use crate::exec::ExecToolCallOutput;
+use crate::exec::MAX_EXEC_OUTPUT_DELTAS_PER_CALL;
 use crate::exec::StreamOutput;
 use crate::protocol::EventMsg;
 use crate::protocol::ExecCommandOutputDeltaEvent;
@@ -25,6 +26,14 @@ use super::session::UnifiedExecSession;

 pub(crate) const TRAILING_OUTPUT_GRACE: Duration = Duration::from_millis(100);

+/// Upper bound for a single ExecCommandOutputDelta chunk emitted by unified exec.
+///
+/// The unified exec output buffer already caps *retained* output (see
+/// `UNIFIED_EXEC_OUTPUT_MAX_BYTES`), but we also cap per-event payload size so
+/// downstream event consumers (especially app-server JSON-RPC) don't have to
+/// process arbitrarily large delta payloads.
+const UNIFIED_EXEC_OUTPUT_DELTA_MAX_BYTES: usize = 8192;
+
 /// Spawn a background task that continuously reads from the PTY, appends to the
 /// shared transcript, and emits ExecCommandOutputDelta events on UTF‑8
 /// boundaries.
@@ -45,6 +54,7 @@ pub(crate) fn start_streaming_output(
        use tokio::sync::broadcast::error::RecvError;

        let mut pending = Vec::<u8>::new();
+        let mut emitted_deltas: usize = 0;

        let mut grace_sleep: Option<Pin<Box<Sleep>>> = None;

@@ -82,6 +92,7 @@ pub(crate) fn start_streaming_output(
                        &call_id,
                        &session_ref,
                        &turn_ref,
+                        &mut emitted_deltas,
                        chunk,
                    ).await;
                }
@@ -135,6 +146,7 @@ async fn process_chunk(
    call_id: &str,
    session_ref: &Arc<Session>,
    turn_ref: &Arc<TurnContext>,
+    emitted_deltas: &mut usize,
    chunk: Vec<u8>,
 ) {
    pending.extend_from_slice(&chunk);
@@ -144,6 +156,10 @@ async fn process_chunk(
            guard.append(&prefix);
        }

+        if *emitted_deltas >= MAX_EXEC_OUTPUT_DELTAS_PER_CALL {
+            continue;
+        }
+
        let event = ExecCommandOutputDeltaEvent {
            call_id: call_id.to_string(),
            stream: ExecOutputStream::Stdout,
@@ -152,6 +168,7 @@ async fn process_chunk(
        session_ref
            .send_event(turn_ref.as_ref(), EventMsg::ExecCommandOutputDelta(event))
            .await;
+        *emitted_deltas += 1;
    }
 }

@@ -193,12 +210,16 @@ pub(crate) async fn emit_exec_end_for_unified_exec(
 }

 fn split_valid_utf8_prefix(buffer: &mut Vec<u8>) -> Option<Vec<u8>> {
+    split_valid_utf8_prefix_with_max(buffer, UNIFIED_EXEC_OUTPUT_DELTA_MAX_BYTES)
+}
+
+fn split_valid_utf8_prefix_with_max(buffer: &mut Vec<u8>, max_bytes: usize) -> Option<Vec<u8>> {
    if buffer.is_empty() {
        return None;
    }

-    let len = buffer.len();
-    let mut split = len;
+    let max_len = buffer.len().min(max_bytes);
+    let mut split = max_len;
    while split > 0 {
        if std::str::from_utf8(&buffer[..split]).is_ok() {
            let prefix = buffer[..split].to_vec();
@@ -206,7 +227,7 @@ fn split_valid_utf8_prefix(buffer: &mut Vec<u8>) -> Option<Vec<u8>> {
            return Some(prefix);
        }

-        if len - split > 4 {
+        if max_len - split > 4 {
            break;
        }
        split -= 1;
@@ -229,3 +250,42 @@ async fn resolve_aggregated_output(

    String::from_utf8_lossy(&guard.data).to_string()
 }
+
+#[cfg(test)]
+mod tests {
+    use super::split_valid_utf8_prefix_with_max;
+
+    use pretty_assertions::assert_eq;
+
+    #[test]
+    fn split_valid_utf8_prefix_respects_max_bytes_for_ascii() {
+        let mut buf = b"hello word!".to_vec();
+
+        let first = split_valid_utf8_prefix_with_max(&mut buf, 5).expect("expected prefix");
+        assert_eq!(first, b"hello".to_vec());
+        assert_eq!(buf, b" word!".to_vec());
+
+        let second = split_valid_utf8_prefix_with_max(&mut buf, 5).expect("expected prefix");
+        assert_eq!(second, b" word".to_vec());
+        assert_eq!(buf, b"!".to_vec());
+    }
+
+    #[test]
+    fn split_valid_utf8_prefix_avoids_splitting_utf8_codepoints() {
+        // "é" is 2 bytes in UTF-8. With a max of 3 bytes, we should only emit 1 char (2 bytes).
+        let mut buf = "ééé".as_bytes().to_vec();
+
+        let first = split_valid_utf8_prefix_with_max(&mut buf, 3).expect("expected prefix");
+        assert_eq!(std::str::from_utf8(&first).unwrap(), "é");
+        assert_eq!(buf, "éé".as_bytes().to_vec());
+    }
+
+    #[test]
+    fn split_valid_utf8_prefix_makes_progress_on_invalid_utf8() {
+        let mut buf = vec![0xff, b'a', b'b'];
+
+        let first = split_valid_utf8_prefix_with_max(&mut buf, 2).expect("expected prefix");
+        assert_eq!(first, vec![0xff]);
+        assert_eq!(buf, b"ab".to_vec());
+    }
+}
--- a/codex-rs/core/src/unified_exec/session.rs
+++ b/codex-rs/core/src/unified_exec/session.rs
@@ -14,8 +14,8 @@ use crate::exec::ExecToolCallOutput;
 use crate::exec::SandboxType;
 use crate::exec::StreamOutput;
 use crate::exec::is_likely_sandbox_denied;
+use crate::truncate::TruncationPolicy;
 use crate::truncate::formatted_truncate_text;
-use codex_protocol::openai_models::TruncationPolicy;
 use codex_utils_pty::ExecCommandSession;
 use codex_utils_pty::SpawnedPty;

--- a/codex-rs/core/src/unified_exec/session_manager.rs
+++ b/codex-rs/core/src/unified_exec/session_manager.rs
@@ -22,9 +22,9 @@ use crate::tools::orchestrator::ToolOrchestrator;
 use crate::tools::runtimes::unified_exec::UnifiedExecRequest as UnifiedExecToolRequest;
 use crate::tools::runtimes::unified_exec::UnifiedExecRuntime;
 use crate::tools::sandboxing::ToolCtx;
+use crate::truncate::TruncationPolicy;
 use crate::truncate::approx_token_count;
 use crate::truncate::formatted_truncate_text;
-use codex_protocol::openai_models::TruncationPolicy;

 use super::CommandTranscript;
 use super::ExecCommandRequest;
@@ -47,7 +47,7 @@ use super::session::OutputBuffer;
 use super::session::OutputHandles;
 use super::session::UnifiedExecSession;

-const UNIFIED_EXEC_ENV: [(&str, &str); 8] = [
+const UNIFIED_EXEC_ENV: [(&str, &str); 9] = [
    ("NO_COLOR", "1"),
    ("TERM", "dumb"),
    ("LANG", "C.UTF-8"),
@@ -56,6 +56,7 @@ const UNIFIED_EXEC_ENV: [(&str, &str); 8] = [
    ("COLORTERM", ""),
    ("PAGER", "cat"),
    ("GIT_PAGER", "cat"),
+    ("GH_PAGER", "cat"),
 ];

 fn apply_unified_exec_env(mut env: HashMap<String, String>) -> HashMap<String, String> {
@@ -679,6 +680,7 @@ mod tests {
            ("COLORTERM".to_string(), String::new()),
            ("PAGER".to_string(), "cat".to_string()),
            ("GIT_PAGER".to_string(), "cat".to_string()),
+            ("GH_PAGER".to_string(), "cat".to_string()),
        ]);

        assert_eq!(env, expected);
--- a/codex-rs/core/src/util.rs
+++ b/codex-rs/core/src/util.rs
@@ -9,6 +9,31 @@ use tracing::error;
 const INITIAL_DELAY_MS: u64 = 200;
 const BACKOFF_FACTOR: f64 = 2.0;

+/// Emit structured feedback metadata as key/value pairs.
+///
+/// This logs a tracing event with `target: "feedback_tags"`. If
+/// `codex_feedback::CodexFeedback::metadata_layer()` is installed, these fields are captured and
+/// later attached as tags when feedback is uploaded.
+///
+/// Values are wrapped with [`tracing::field::DebugValue`], so the expression only needs to
+/// implement [`std::fmt::Debug`].
+///
+/// Example:
+///
+/// ```rust
+/// codex_core::feedback_tags!(model = "gpt-5", cached = true);
+/// codex_core::feedback_tags!(provider = provider_id, request_id = request_id);
+/// ```
+#[macro_export]
+macro_rules! feedback_tags {
+    ($( $key:ident = $value:expr ),+ $(,)?) => {
+        ::tracing::info!(
+            target: "feedback_tags",
+            $( $key = ::tracing::field::debug(&$value) ),+
+        );
+    };
+}
+
 pub(crate) fn backoff(attempt: u64) -> Duration {
    let exp = BACKOFF_FACTOR.powi(attempt.saturating_sub(1) as i32);
    let base = (INITIAL_DELAY_MS as f64 * exp) as u64;
@@ -74,4 +99,12 @@ mod tests {
        let message = try_parse_error_message(text);
        assert_eq!(message, r#"{"message": "test"}"#);
    }
+
+    #[test]
+    fn feedback_tags_macro_compiles() {
+        #[derive(Debug)]
+        struct OnlyDebug;
+
+        feedback_tags!(model = "gpt-5", cached = true, debug_only = OnlyDebug);
+    }
 }
--- a/codex-rs/core/tests/common/Cargo.toml
+++ b/codex-rs/core/tests/common/Cargo.toml
@@ -14,6 +14,7 @@ base64 = { workspace = true }
 codex-core = { workspace = true, features = ["test-support"] }
 codex-protocol = { workspace = true }
 codex-utils-absolute-path = { workspace = true }
+codex-utils-cargo-bin = { workspace = true }
 notify = { workspace = true }
 regex-lite = { workspace = true }
 serde_json = { workspace = true }
--- a/codex-rs/core/tests/common/lib.rs
+++ b/codex-rs/core/tests/common/lib.rs
@@ -10,9 +10,6 @@ use codex_utils_absolute_path::AbsolutePathBuf;
 use regex_lite::Regex;
 use std::path::PathBuf;

-#[cfg(target_os = "linux")]
-use assert_cmd::cargo::cargo_bin;
-
 pub mod process;
 pub mod responses;
 pub mod streaming_sse;
@@ -87,7 +84,10 @@ pub async fn load_default_config_for_test(codex_home: &TempDir) -> Config {
 #[cfg(target_os = "linux")]
 fn default_test_overrides() -> ConfigOverrides {
    ConfigOverrides {
-        codex_linux_sandbox_exe: Some(cargo_bin("codex-linux-sandbox")),
+        codex_linux_sandbox_exe: Some(
+            codex_utils_cargo_bin::cargo_bin("codex-linux-sandbox")
+                .expect("should find binary for codex-linux-sandbox"),
+        ),
        ..ConfigOverrides::default()
    }
 }
--- a/codex-rs/core/tests/common/responses.rs
+++ b/codex-rs/core/tests/common/responses.rs
@@ -670,6 +670,25 @@ pub async fn mount_models_once(server: &MockServer, body: ModelsResponse) -> Mod
    models_mock
 }

+pub async fn mount_models_once_with_etag(
+    server: &MockServer,
+    body: ModelsResponse,
+    etag: &str,
+) -> ModelsMock {
+    let (mock, models_mock) = models_mock();
+    mock.respond_with(
+        ResponseTemplate::new(200)
+            .insert_header("content-type", "application/json")
+            // ModelsClient reads the ETag header, not a JSON field.
+            .insert_header("ETag", etag)
+            .set_body_json(body.clone()),
+    )
+    .up_to_n_times(1)
+    .mount(server)
+    .await;
+    models_mock
+}
+
 pub async fn start_mock_server() -> MockServer {
    let server = MockServer::builder()
        .body_print_limit(BodyPrintLimit::Limited(80_000))
@@ -677,14 +696,7 @@ pub async fn start_mock_server() -> MockServer {
        .await;

    // Provide a default `/models` response so tests remain hermetic when the client queries it.
-    let _ = mount_models_once(
-        &server,
-        ModelsResponse {
-            models: Vec::new(),
-            etag: String::new(),
-        },
-    )
-    .await;
+    let _ = mount_models_once(&server, ModelsResponse { models: Vec::new() }).await;

    server
 }
--- a/codex-rs/core/tests/common/test_codex.rs
+++ b/codex-rs/core/tests/common/test_codex.rs
@@ -184,8 +184,8 @@ impl TestCodexBuilder {
        for hook in self.pre_build_hooks.drain(..) {
            hook(home.path());
        }
-        if let Ok(cmd) = assert_cmd::Command::cargo_bin("codex") {
-            config.codex_linux_sandbox_exe = Some(PathBuf::from(cmd.get_program().to_os_string()));
+        if let Ok(path) = codex_utils_cargo_bin::cargo_bin("codex") {
+            config.codex_linux_sandbox_exe = Some(path);
        }

        let mut mutators = vec![];
@@ -250,7 +250,7 @@ impl TestCodex {
        approval_policy: AskForApproval,
        sandbox_policy: SandboxPolicy,
    ) -> Result<()> {
-        let session_model = self.session_configured.model_family.slug.clone();
+        let session_model = self.session_configured.model.clone();
        self.codex
            .submit(Op::UserTurn {
                items: vec![UserInput::Text {
--- a/codex-rs/core/tests/common/test_codex_exec.rs
+++ b/codex-rs/core/tests/common/test_codex_exec.rs
@@ -11,8 +11,10 @@ pub struct TestCodexExecBuilder {

 impl TestCodexExecBuilder {
    pub fn cmd(&self) -> assert_cmd::Command {
-        let mut cmd = assert_cmd::Command::cargo_bin("codex-exec")
-            .expect("should find binary for codex-exec");
+        let mut cmd = assert_cmd::Command::new(
+            codex_utils_cargo_bin::cargo_bin("codex-exec")
+                .expect("should find binary for codex-exec"),
+        );
        cmd.current_dir(self.cwd.path())
            .env("CODEX_HOME", self.home.path())
            .env(CODEX_API_KEY_ENV_VAR, "dummy");
--- a/codex-rs/core/tests/responses_headers.rs
+++ b/codex-rs/core/tests/responses_headers.rs
@@ -14,7 +14,6 @@ use codex_core::models_manager::manager::ModelsManager;
 use codex_otel::otel_manager::OtelManager;
 use codex_protocol::ConversationId;
 use codex_protocol::config_types::ReasoningSummary;
-use codex_protocol::openai_models::ReasoningSummaryFormat;
 use codex_protocol::protocol::SessionSource;
 use codex_protocol::protocol::SubAgentSource;
 use core_test_support::load_default_config_for_test;
@@ -246,7 +245,6 @@ async fn responses_respects_model_family_overrides_from_config() {
    config.model_provider_id = provider.name.clone();
    config.model_provider = provider.clone();
    config.model_supports_reasoning_summaries = Some(true);
-    config.model_reasoning_summary_format = Some(ReasoningSummaryFormat::Experimental);
    config.model_reasoning_summary = ReasoningSummary::Detailed;
    let effort = config.model_reasoning_effort;
    let summary = config.model_reasoning_summary;
--- a/codex-rs/core/tests/suite/abort_tasks.rs
+++ b/codex-rs/core/tests/suite/abort_tasks.rs
@@ -49,6 +49,7 @@ async fn interrupt_long_running_tool_emits_turn_aborted() {
            items: vec![UserInput::Text {
                text: "start sleep".into(),
            }],
+            final_output_json_schema: None,
        })
        .await
        .unwrap();
@@ -101,6 +102,7 @@ async fn interrupt_tool_records_history_entries() {
            items: vec![UserInput::Text {
                text: "start history recording".into(),
            }],
+            final_output_json_schema: None,
        })
        .await
        .unwrap();
@@ -117,6 +119,7 @@ async fn interrupt_tool_records_history_entries() {
            items: vec![UserInput::Text {
                text: "follow up".into(),
            }],
+            final_output_json_schema: None,
        })
        .await
        .unwrap();
--- a/codex-rs/core/tests/suite/apply_patch_cli.rs
+++ b/codex-rs/core/tests/suite/apply_patch_cli.rs
@@ -297,7 +297,7 @@ async fn apply_patch_cli_move_without_content_change_has_no_turn_diff(
    let call_id = "apply-move-no-change";
    mount_apply_patch(&harness, call_id, patch, "ok", model_output).await;

-    let model = test.session_configured.model_family.slug.clone();
+    let model = test.session_configured.model.clone();
    codex
        .submit(Op::UserTurn {
            items: vec![UserInput::Text {
@@ -883,7 +883,7 @@ async fn apply_patch_shell_command_heredoc_with_cd_emits_turn_diff() -> Result<(
    ];
    mount_sse_sequence(harness.server(), bodies).await;

-    let model = test.session_configured.model_family.slug.clone();
+    let model = test.session_configured.model.clone();
    codex
        .submit(Op::UserTurn {
            items: vec![UserInput::Text {
@@ -960,7 +960,7 @@ async fn apply_patch_shell_command_failure_propagates_error_and_skips_diff() ->
    ];
    mount_sse_sequence(harness.server(), bodies).await;

-    let model = test.session_configured.model_family.slug.clone();
+    let model = test.session_configured.model.clone();
    codex
        .submit(Op::UserTurn {
            items: vec![UserInput::Text {
@@ -1107,7 +1107,7 @@ async fn apply_patch_emits_turn_diff_event_with_unified_diff(
    let patch = format!("*** Begin Patch\n*** Add File: {file}\n+hello\n*** End Patch\n");
    mount_apply_patch(&harness, call_id, patch.as_str(), "ok", model_output).await;

-    let model = test.session_configured.model_family.slug.clone();
+    let model = test.session_configured.model.clone();
    codex
        .submit(Op::UserTurn {
            items: vec![UserInput::Text {
@@ -1167,7 +1167,7 @@ async fn apply_patch_turn_diff_for_rename_with_content_change(
    let patch = "*** Begin Patch\n*** Update File: old.txt\n*** Move to: new.txt\n@@\n-old\n+new\n*** End Patch";
    mount_apply_patch(&harness, call_id, patch, "ok", model_output).await;

-    let model = test.session_configured.model_family.slug.clone();
+    let model = test.session_configured.model.clone();
    codex
        .submit(Op::UserTurn {
            items: vec![UserInput::Text {
@@ -1235,7 +1235,7 @@ async fn apply_patch_aggregates_diff_across_multiple_tool_calls() -> Result<()>
    ]);
    mount_sse_sequence(harness.server(), vec![s1, s2, s3]).await;

-    let model = test.session_configured.model_family.slug.clone();
+    let model = test.session_configured.model.clone();
    codex
        .submit(Op::UserTurn {
            items: vec![UserInput::Text {
@@ -1303,7 +1303,7 @@ async fn apply_patch_aggregates_diff_preserves_success_after_failure() -> Result
    ];
    mount_sse_sequence(harness.server(), responses).await;

-    let model = test.session_configured.model_family.slug.clone();
+    let model = test.session_configured.model.clone();
    codex
        .submit(Op::UserTurn {
            items: vec![UserInput::Text {
--- a/codex-rs/core/tests/suite/approvals.rs
+++ b/codex-rs/core/tests/suite/approvals.rs
@@ -486,7 +486,7 @@ async fn submit_turn(
    approval_policy: AskForApproval,
    sandbox_policy: SandboxPolicy,
 ) -> Result<()> {
-    let session_model = test.session_configured.model_family.slug.clone();
+    let session_model = test.session_configured.model.clone();

    test.codex
        .submit(Op::UserTurn {
--- a/codex-rs/core/tests/suite/cli_stream.rs
+++ b/codex-rs/core/tests/suite/cli_stream.rs
@@ -1,5 +1,4 @@
 use assert_cmd::Command as AssertCommand;
-use assert_cmd::cargo::cargo_bin;
 use codex_core::RolloutRecorder;
 use codex_core::protocol::GitInfo;
 use core_test_support::fs_wait;
@@ -45,7 +44,7 @@ async fn chat_mode_stream_cli() {
        "model_providers.mock={{ name = \"mock\", base_url = \"{}/v1\", env_key = \"PATH\", wire_api = \"chat\" }}",
        server.uri()
    );
-    let bin = cargo_bin("codex");
+    let bin = codex_utils_cargo_bin::cargo_bin("codex").unwrap();
    let mut cmd = AssertCommand::new(bin);
    cmd.arg("exec")
        .arg("--skip-git-repo-check")
@@ -128,7 +127,7 @@ async fn exec_cli_applies_experimental_instructions_file() {
    );

    let home = TempDir::new().unwrap();
-    let bin = cargo_bin("codex");
+    let bin = codex_utils_cargo_bin::cargo_bin("codex").unwrap();
    let mut cmd = AssertCommand::new(bin);
    cmd.arg("exec")
        .arg("--skip-git-repo-check")
@@ -182,7 +181,7 @@ async fn responses_api_stream_cli() {
        std::path::Path::new(env!("CARGO_MANIFEST_DIR")).join("tests/cli_responses_fixture.sse");

    let home = TempDir::new().unwrap();
-    let bin = cargo_bin("codex");
+    let bin = codex_utils_cargo_bin::cargo_bin("codex").unwrap();
    let mut cmd = AssertCommand::new(bin);
    cmd.arg("exec")
        .arg("--skip-git-repo-check")
@@ -218,7 +217,7 @@ async fn integration_creates_and_checks_session_file() -> anyhow::Result<()> {
        std::path::Path::new(env!("CARGO_MANIFEST_DIR")).join("tests/cli_responses_fixture.sse");

    // 4. Run the codex CLI and invoke `exec`, which is what records a session.
-    let bin = cargo_bin("codex");
+    let bin = codex_utils_cargo_bin::cargo_bin("codex").unwrap();
    let mut cmd = AssertCommand::new(bin);
    cmd.arg("exec")
        .arg("--skip-git-repo-check")
@@ -339,7 +338,7 @@ async fn integration_creates_and_checks_session_file() -> anyhow::Result<()> {
    // Second run: resume should update the existing file.
    let marker2 = format!("integration-resume-{}", Uuid::new_v4());
    let prompt2 = format!("echo {marker2}");
-    let bin2 = cargo_bin("codex");
+    let bin2 = codex_utils_cargo_bin::cargo_bin("codex").unwrap();
    let mut cmd2 = AssertCommand::new(bin2);
    cmd2.arg("exec")
        .arg("--skip-git-repo-check")
--- a/codex-rs/core/tests/suite/client.rs
+++ b/codex-rs/core/tests/suite/client.rs
@@ -290,6 +290,7 @@ async fn resume_includes_initial_messages_and_sends_prior_items() {
            items: vec![UserInput::Text {
                text: "hello".into(),
            }],
+            final_output_json_schema: None,
        })
        .await
        .unwrap();
@@ -365,6 +366,7 @@ async fn includes_conversation_id_and_model_headers_in_request() {
            items: vec![UserInput::Text {
                text: "hello".into(),
            }],
+            final_output_json_schema: None,
        })
        .await
        .unwrap();
@@ -424,6 +426,7 @@ async fn includes_base_instructions_override_in_request() {
            items: vec![UserInput::Text {
                text: "hello".into(),
            }],
+            final_output_json_schema: None,
        })
        .await
        .unwrap();
@@ -488,6 +491,7 @@ async fn chatgpt_auth_sends_correct_request() {
            items: vec![UserInput::Text {
                text: "hello".into(),
            }],
+            final_output_json_schema: None,
        })
        .await
        .unwrap();
@@ -582,6 +586,7 @@ async fn prefers_apikey_when_config_prefers_apikey_even_with_chatgpt_tokens() {
            items: vec![UserInput::Text {
                text: "hello".into(),
            }],
+            final_output_json_schema: None,
        })
        .await
        .unwrap();
@@ -622,6 +627,7 @@ async fn includes_user_instructions_message_in_request() {
            items: vec![UserInput::Text {
                text: "hello".into(),
            }],
+            final_output_json_schema: None,
        })
        .await
        .unwrap();
@@ -692,6 +698,7 @@ async fn skills_append_to_instructions() {
            items: vec![UserInput::Text {
                text: "hello".into(),
            }],
+            final_output_json_schema: None,
        })
        .await
        .unwrap();
@@ -741,6 +748,7 @@ async fn includes_configured_effort_in_request() -> anyhow::Result<()> {
            items: vec![UserInput::Text {
                text: "hello".into(),
            }],
+            final_output_json_schema: None,
        })
        .await
        .unwrap();
@@ -777,6 +785,7 @@ async fn includes_no_effort_in_request() -> anyhow::Result<()> {
            items: vec![UserInput::Text {
                text: "hello".into(),
            }],
+            final_output_json_schema: None,
        })
        .await
        .unwrap();
@@ -811,6 +820,7 @@ async fn includes_default_reasoning_effort_in_request_when_defined_by_model_fami
            items: vec![UserInput::Text {
                text: "hello".into(),
            }],
+            final_output_json_schema: None,
        })
        .await
        .unwrap();
@@ -849,6 +859,7 @@ async fn configured_reasoning_summary_is_sent() -> anyhow::Result<()> {
            items: vec![UserInput::Text {
                text: "hello".into(),
            }],
+            final_output_json_schema: None,
        })
        .await
        .unwrap();
@@ -887,6 +898,7 @@ async fn reasoning_summary_is_omitted_when_disabled() -> anyhow::Result<()> {
            items: vec![UserInput::Text {
                text: "hello".into(),
            }],
+            final_output_json_schema: None,
        })
        .await
        .unwrap();
@@ -919,6 +931,7 @@ async fn includes_default_verbosity_in_request() -> anyhow::Result<()> {
            items: vec![UserInput::Text {
                text: "hello".into(),
            }],
+            final_output_json_schema: None,
        })
        .await
        .unwrap();
@@ -958,6 +971,7 @@ async fn configured_verbosity_not_sent_for_models_without_support() -> anyhow::R
            items: vec![UserInput::Text {
                text: "hello".into(),
            }],
+            final_output_json_schema: None,
        })
        .await
        .unwrap();
@@ -996,6 +1010,7 @@ async fn configured_verbosity_is_sent() -> anyhow::Result<()> {
            items: vec![UserInput::Text {
                text: "hello".into(),
            }],
+            final_output_json_schema: None,
        })
        .await
        .unwrap();
@@ -1050,6 +1065,7 @@ async fn includes_developer_instructions_message_in_request() {
            items: vec![UserInput::Text {
                text: "hello".into(),
            }],
+            final_output_json_schema: None,
        })
        .await
        .unwrap();
@@ -1280,6 +1296,7 @@ async fn token_count_includes_rate_limits_snapshot() {
            items: vec![UserInput::Text {
                text: "hello".into(),
            }],
+            final_output_json_schema: None,
        })
        .await
        .unwrap();
@@ -1437,6 +1454,7 @@ async fn usage_limit_error_emits_rate_limit_event() -> anyhow::Result<()> {
            items: vec![UserInput::Text {
                text: "hello".into(),
            }],
+            final_output_json_schema: None,
        })
        .await
        .expect("submission should succeed while emitting usage limit error events");
@@ -1506,6 +1524,7 @@ async fn context_window_error_sets_total_tokens_to_model_window() -> anyhow::Res
            items: vec![UserInput::Text {
                text: "seed turn".into(),
            }],
+            final_output_json_schema: None,
        })
        .await?;

@@ -1516,6 +1535,7 @@ async fn context_window_error_sets_total_tokens_to_model_window() -> anyhow::Res
            items: vec![UserInput::Text {
                text: "trigger context window".into(),
            }],
+            final_output_json_schema: None,
        })
        .await?;

@@ -1635,6 +1655,7 @@ async fn azure_overrides_assign_properties_used_for_responses_url() {
            items: vec![UserInput::Text {
                text: "hello".into(),
            }],
+            final_output_json_schema: None,
        })
        .await
        .unwrap();
@@ -1717,6 +1738,7 @@ async fn env_var_overrides_loaded_auth() {
            items: vec![UserInput::Text {
                text: "hello".into(),
            }],
+            final_output_json_schema: None,
        })
        .await
        .unwrap();
@@ -1800,6 +1822,7 @@ async fn history_dedupes_streamed_and_final_messages_across_turns() {
    codex
        .submit(Op::UserInput {
            items: vec![UserInput::Text { text: "U1".into() }],
+            final_output_json_schema: None,
        })
        .await
        .unwrap();
@@ -1809,6 +1832,7 @@ async fn history_dedupes_streamed_and_final_messages_across_turns() {
    codex
        .submit(Op::UserInput {
            items: vec![UserInput::Text { text: "U2".into() }],
+            final_output_json_schema: None,
        })
        .await
        .unwrap();
@@ -1818,6 +1842,7 @@ async fn history_dedupes_streamed_and_final_messages_across_turns() {
    codex
        .submit(Op::UserInput {
            items: vec![UserInput::Text { text: "U3".into() }],
+            final_output_json_schema: None,
        })
        .await
        .unwrap();
--- a/codex-rs/core/tests/suite/compact.rs
+++ b/codex-rs/core/tests/suite/compact.rs
@@ -8,11 +8,14 @@ use codex_core::compact::SUMMARIZATION_PROMPT;
 use codex_core::compact::SUMMARY_PREFIX;
 use codex_core::config::Config;
 use codex_core::features::Feature;
+use codex_core::protocol::AskForApproval;
 use codex_core::protocol::EventMsg;
 use codex_core::protocol::Op;
 use codex_core::protocol::RolloutItem;
 use codex_core::protocol::RolloutLine;
+use codex_core::protocol::SandboxPolicy;
 use codex_core::protocol::WarningEvent;
+use codex_protocol::config_types::ReasoningSummary;
 use codex_protocol::user_input::UserInput;
 use core_test_support::load_default_config_for_test;
 use core_test_support::responses::ev_local_shell_call;
@@ -158,6 +161,7 @@ async fn summarize_context_three_requests_and_instructions() {
            items: vec![UserInput::Text {
                text: "hello world".into(),
            }],
+            final_output_json_schema: None,
        })
        .await
        .unwrap();
@@ -178,6 +182,7 @@ async fn summarize_context_three_requests_and_instructions() {
            items: vec![UserInput::Text {
                text: THIRD_USER_MSG.into(),
            }],
+            final_output_json_schema: None,
        })
        .await
        .unwrap();
@@ -577,6 +582,7 @@ async fn multiple_auto_compact_per_task_runs_after_token_limit_hit() {
            items: vec![UserInput::Text {
                text: user_message.into(),
            }],
+            final_output_json_schema: None,
        })
        .await
        .expect("submit user input");
@@ -1081,6 +1087,7 @@ async fn auto_compact_runs_after_token_limit_hit() {
            items: vec![UserInput::Text {
                text: FIRST_AUTO_MSG.into(),
            }],
+            final_output_json_schema: None,
        })
        .await
        .unwrap();
@@ -1092,6 +1099,7 @@ async fn auto_compact_runs_after_token_limit_hit() {
            items: vec![UserInput::Text {
                text: SECOND_AUTO_MSG.into(),
            }],
+            final_output_json_schema: None,
        })
        .await
        .unwrap();
@@ -1103,6 +1111,7 @@ async fn auto_compact_runs_after_token_limit_hit() {
            items: vec![UserInput::Text {
                text: POST_AUTO_USER_MSG.into(),
            }],
+            final_output_json_schema: None,
        })
        .await
        .unwrap();
@@ -1228,6 +1237,117 @@ async fn auto_compact_runs_after_token_limit_hit() {
    );
 }

+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn auto_compact_runs_after_resume_when_token_usage_is_over_limit() {
+    skip_if_no_network!();
+
+    let server = start_mock_server().await;
+
+    let limit = 200_000;
+    let over_limit_tokens = 250_000;
+    let remote_summary = "REMOTE_COMPACT_SUMMARY";
+
+    let compacted_history = vec![
+        codex_protocol::models::ResponseItem::Message {
+            id: None,
+            role: "assistant".to_string(),
+            content: vec![codex_protocol::models::ContentItem::OutputText {
+                text: remote_summary.to_string(),
+            }],
+        },
+        codex_protocol::models::ResponseItem::Compaction {
+            encrypted_content: "ENCRYPTED_COMPACTION_SUMMARY".to_string(),
+        },
+    ];
+    let compact_mock =
+        mount_compact_json_once(&server, serde_json::json!({ "output": compacted_history })).await;
+
+    let mut builder = test_codex().with_config(move |config| {
+        set_test_compact_prompt(config);
+        config.model_auto_compact_token_limit = Some(limit);
+        config.features.enable(Feature::RemoteCompaction);
+    });
+    let initial = builder.build(&server).await.unwrap();
+    let home = initial.home.clone();
+    let rollout_path = initial.session_configured.rollout_path.clone();
+
+    // A single over-limit completion should not auto-compact until the next user message.
+    mount_sse_once(
+        &server,
+        sse(vec![
+            ev_assistant_message("m1", FIRST_REPLY),
+            ev_completed_with_tokens("r1", over_limit_tokens),
+        ]),
+    )
+    .await;
+    initial.submit_turn("OVER_LIMIT_TURN").await.unwrap();
+
+    assert!(
+        compact_mock.requests().is_empty(),
+        "remote compaction should not run before the next user message"
+    );
+
+    let mut resume_builder = test_codex().with_config(move |config| {
+        set_test_compact_prompt(config);
+        config.model_auto_compact_token_limit = Some(limit);
+        config.features.enable(Feature::RemoteCompaction);
+    });
+    let resumed = resume_builder
+        .resume(&server, home, rollout_path)
+        .await
+        .unwrap();
+
+    let follow_up_user = "AFTER_RESUME_USER";
+    let sse_follow_up = sse(vec![
+        ev_assistant_message("m2", FINAL_REPLY),
+        ev_completed("r2"),
+    ]);
+
+    let follow_up_matcher = move |req: &wiremock::Request| {
+        let body = std::str::from_utf8(&req.body).unwrap_or("");
+        body.contains(follow_up_user) && body.contains(remote_summary)
+    };
+    mount_sse_once_match(&server, follow_up_matcher, sse_follow_up).await;
+
+    resumed
+        .codex
+        .submit(Op::UserTurn {
+            items: vec![UserInput::Text {
+                text: follow_up_user.into(),
+            }],
+            final_output_json_schema: None,
+            cwd: resumed.cwd.path().to_path_buf(),
+            approval_policy: AskForApproval::Never,
+            sandbox_policy: SandboxPolicy::DangerFullAccess,
+            model: resumed.session_configured.model.clone(),
+            effort: None,
+            summary: ReasoningSummary::Auto,
+        })
+        .await
+        .unwrap();
+
+    wait_for_event(&resumed.codex, |event| {
+        matches!(event, EventMsg::ContextCompacted(_))
+    })
+    .await;
+    wait_for_event(&resumed.codex, |event| {
+        matches!(event, EventMsg::TaskComplete(_))
+    })
+    .await;
+
+    let compact_requests = compact_mock.requests();
+    assert_eq!(
+        compact_requests.len(),
+        1,
+        "remote compaction should run once after resume"
+    );
+    assert_eq!(
+        compact_requests[0].path(),
+        "/v1/responses/compact",
+        "remote compaction should hit the compact endpoint"
+    );
+}
+
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn auto_compact_persists_rollout_entries() {
    skip_if_no_network!();
@@ -1304,6 +1424,7 @@ async fn auto_compact_persists_rollout_entries() {
            items: vec![UserInput::Text {
                text: FIRST_AUTO_MSG.into(),
            }],
+            final_output_json_schema: None,
        })
        .await
        .unwrap();
@@ -1314,6 +1435,7 @@ async fn auto_compact_persists_rollout_entries() {
            items: vec![UserInput::Text {
                text: SECOND_AUTO_MSG.into(),
            }],
+            final_output_json_schema: None,
        })
        .await
        .unwrap();
@@ -1324,6 +1446,7 @@ async fn auto_compact_persists_rollout_entries() {
            items: vec![UserInput::Text {
                text: POST_AUTO_USER_MSG.into(),
            }],
+            final_output_json_schema: None,
        })
        .await
        .unwrap();
@@ -1415,6 +1538,7 @@ async fn manual_compact_retries_after_context_window_error() {
            items: vec![UserInput::Text {
                text: "first turn".into(),
            }],
+            final_output_json_schema: None,
        })
        .await
        .unwrap();
@@ -1547,6 +1671,7 @@ async fn manual_compact_twice_preserves_latest_user_messages() {
            items: vec![UserInput::Text {
                text: first_user_message.into(),
            }],
+            final_output_json_schema: None,
        })
        .await
        .unwrap();
@@ -1560,6 +1685,7 @@ async fn manual_compact_twice_preserves_latest_user_messages() {
            items: vec![UserInput::Text {
                text: second_user_message.into(),
            }],
+            final_output_json_schema: None,
        })
        .await
        .unwrap();
@@ -1573,6 +1699,7 @@ async fn manual_compact_twice_preserves_latest_user_messages() {
            items: vec![UserInput::Text {
                text: final_user_message.into(),
            }],
+            final_output_json_schema: None,
        })
        .await
        .unwrap();
@@ -1752,6 +1879,7 @@ async fn auto_compact_allows_multiple_attempts_when_interleaved_with_other_turn_
        codex
            .submit(Op::UserInput {
                items: vec![UserInput::Text { text: user.into() }],
+                final_output_json_schema: None,
            })
            .await
            .unwrap();
@@ -1864,6 +1992,7 @@ async fn auto_compact_triggers_after_function_call_over_95_percent_usage() {
            items: vec![UserInput::Text {
                text: FUNCTION_CALL_LIMIT_MSG.into(),
            }],
+            final_output_json_schema: None,
        })
        .await
        .unwrap();
@@ -1875,6 +2004,7 @@ async fn auto_compact_triggers_after_function_call_over_95_percent_usage() {
            items: vec![UserInput::Text {
                text: follow_up_user.into(),
            }],
+            final_output_json_schema: None,
        })
        .await
        .unwrap();
@@ -1989,6 +2119,7 @@ async fn auto_compact_counts_encrypted_reasoning_before_last_user() {
        codex
            .submit(Op::UserInput {
                items: vec![UserInput::Text { text: user.into() }],
+                final_output_json_schema: None,
            })
            .await
            .unwrap();
--- a/codex-rs/core/tests/suite/compact_remote.rs
+++ b/codex-rs/core/tests/suite/compact_remote.rs
@@ -74,6 +74,7 @@ async fn remote_compact_replaces_history_for_followups() -> Result<()> {
            items: vec![UserInput::Text {
                text: "hello remote compact".into(),
            }],
+            final_output_json_schema: None,
        })
        .await?;
    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
@@ -86,6 +87,7 @@ async fn remote_compact_replaces_history_for_followups() -> Result<()> {
            items: vec![UserInput::Text {
                text: "after compact".into(),
            }],
+            final_output_json_schema: None,
        })
        .await?;
    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
@@ -103,7 +105,7 @@ async fn remote_compact_replaces_history_for_followups() -> Result<()> {
    let compact_body = compact_request.body_json();
    assert_eq!(
        compact_body.get("model").and_then(|v| v.as_str()),
-        Some(harness.test().session_configured.model_family.slug.as_str())
+        Some(harness.test().session_configured.model.as_str())
    );
    let compact_body_text = compact_body.to_string();
    assert!(
@@ -191,6 +193,7 @@ async fn remote_compact_runs_automatically() -> Result<()> {
            items: vec![UserInput::Text {
                text: "hello remote compact".into(),
            }],
+            final_output_json_schema: None,
        })
        .await?;
    let message = wait_for_event_match(&codex, |ev| match ev {
@@ -263,6 +266,7 @@ async fn remote_compact_persists_replacement_history_in_rollout() -> Result<()>
            items: vec![UserInput::Text {
                text: "needs compaction".into(),
            }],
+            final_output_json_schema: None,
        })
        .await?;
    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
--- a/codex-rs/core/tests/suite/compact_resume_fork.rs
+++ b/codex-rs/core/tests/suite/compact_resume_fork.rs
@@ -884,6 +884,7 @@ async fn user_turn(conversation: &Arc<CodexConversation>, text: &str) {
    conversation
        .submit(Op::UserInput {
            items: vec![UserInput::Text { text: text.into() }],
+            final_output_json_schema: None,
        })
        .await
        .expect("submit user turn");
--- a/codex-rs/core/tests/suite/exec_policy.rs
+++ b/codex-rs/core/tests/suite/exec_policy.rs
@@ -67,7 +67,7 @@ async fn execpolicy_blocks_shell_invocation() -> Result<()> {
    )
    .await;

-    let session_model = test.session_configured.model_family.slug.clone();
+    let session_model = test.session_configured.model.clone();
    test.codex
        .submit(Op::UserTurn {
            items: vec![UserInput::Text {
--- a/codex-rs/core/tests/suite/fork_conversation.rs
+++ b/codex-rs/core/tests/suite/fork_conversation.rs
@@ -74,6 +74,7 @@ async fn fork_conversation_twice_drops_to_first_message() {
                items: vec![UserInput::Text {
                    text: text.to_string(),
                }],
+                final_output_json_schema: None,
            })
            .await
            .unwrap();
--- a/codex-rs/core/tests/suite/items.rs
+++ b/codex-rs/core/tests/suite/items.rs
@@ -43,6 +43,7 @@ async fn user_message_item_is_emitted() -> anyhow::Result<()> {
            items: (vec![UserInput::Text {
                text: "please inspect sample.txt".into(),
            }]),
+            final_output_json_schema: None,
        })
        .await?;

@@ -99,6 +100,7 @@ async fn assistant_message_item_is_emitted() -> anyhow::Result<()> {
            items: vec![UserInput::Text {
                text: "please summarize results".into(),
            }],
+            final_output_json_schema: None,
        })
        .await?;

@@ -155,6 +157,7 @@ async fn reasoning_item_is_emitted() -> anyhow::Result<()> {
            items: vec![UserInput::Text {
                text: "explain your reasoning".into(),
            }],
+            final_output_json_schema: None,
        })
        .await?;

@@ -213,6 +216,7 @@ async fn web_search_item_is_emitted() -> anyhow::Result<()> {
            items: vec![UserInput::Text {
                text: "find the weather".into(),
            }],
+            final_output_json_schema: None,
        })
        .await?;

@@ -265,6 +269,7 @@ async fn agent_message_content_delta_has_item_metadata() -> anyhow::Result<()> {
            items: vec![UserInput::Text {
                text: "please stream text".into(),
            }],
+            final_output_json_schema: None,
        })
        .await?;

@@ -330,6 +335,7 @@ async fn reasoning_content_delta_has_item_metadata() -> anyhow::Result<()> {
            items: vec![UserInput::Text {
                text: "reason through it".into(),
            }],
+            final_output_json_schema: None,
        })
        .await?;

@@ -387,6 +393,7 @@ async fn reasoning_raw_content_delta_respects_flag() -> anyhow::Result<()> {
            items: vec![UserInput::Text {
                text: "show raw reasoning".into(),
            }],
+            final_output_json_schema: None,
        })
        .await?;

--- a/codex-rs/core/tests/suite/live_cli.rs
+++ b/codex-rs/core/tests/suite/live_cli.rs
@@ -30,7 +30,7 @@ fn run_live(prompt: &str) -> (assert_cmd::assert::Assert, TempDir) {
    // implementation). Instead we configure the std `Command` ourselves, then later hand the
    // resulting `Output` to `assert_cmd` for the familiar assertions.

-    let mut cmd = Command::cargo_bin("codex-rs").unwrap();
+    let mut cmd = Command::new(codex_utils_cargo_bin::cargo_bin("codex-rs").unwrap());
    cmd.current_dir(dir.path());
    cmd.env("OPENAI_API_KEY", require_api_key());

--- a/codex-rs/core/tests/suite/mod.rs
+++ b/codex-rs/core/tests/suite/mod.rs
@@ -37,6 +37,7 @@ mod list_models;
 mod live_cli;
 mod model_overrides;
 mod model_tools;
+mod models_etag_responses;
 mod otel;
 mod prompt_caching;
 mod quota_exceeded;
--- a/codex-rs/core/tests/suite/models_etag_responses.rs
+++ b/codex-rs/core/tests/suite/models_etag_responses.rs
@@ -0,0 +1,139 @@
+#![cfg(not(target_os = "windows"))]
+
+use std::sync::Arc;
+
+use anyhow::Result;
+use codex_core::CodexAuth;
+use codex_core::features::Feature;
+use codex_core::protocol::AskForApproval;
+use codex_core::protocol::EventMsg;
+use codex_core::protocol::Op;
+use codex_core::protocol::SandboxPolicy;
+use codex_protocol::config_types::ReasoningSummary;
+use codex_protocol::openai_models::ModelsResponse;
+use codex_protocol::user_input::UserInput;
+use core_test_support::responses;
+use core_test_support::responses::ev_assistant_message;
+use core_test_support::responses::ev_completed;
+use core_test_support::responses::ev_local_shell_call;
+use core_test_support::responses::ev_response_created;
+use core_test_support::responses::sse;
+use core_test_support::responses::sse_response;
+use core_test_support::skip_if_no_network;
+use core_test_support::test_codex::test_codex;
+use core_test_support::wait_for_event;
+use pretty_assertions::assert_eq;
+use wiremock::MockServer;
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn refresh_models_on_models_etag_mismatch_and_avoid_duplicate_models_fetch() -> Result<()> {
+    skip_if_no_network!(Ok(()));
+
+    const ETAG_1: &str = "\"models-etag-1\"";
+    const ETAG_2: &str = "\"models-etag-2\"";
+    const CALL_ID: &str = "local-shell-call-1";
+
+    let server = MockServer::start().await;
+
+    // 1) On spawn, Codex fetches /models and stores the ETag.
+    let spawn_models_mock = responses::mount_models_once_with_etag(
+        &server,
+        ModelsResponse { models: Vec::new() },
+        ETAG_1,
+    )
+    .await;
+
+    let auth = CodexAuth::create_dummy_chatgpt_auth_for_testing();
+    let mut builder = test_codex()
+        .with_auth(auth)
+        .with_model("gpt-5")
+        .with_config(|config| {
+            config.features.enable(Feature::RemoteModels);
+            // Keep this test deterministic: no request retries, and a small stream retry budget.
+            config.model_provider.request_max_retries = Some(0);
+            config.model_provider.stream_max_retries = Some(1);
+        });
+
+    let test = builder.build(&server).await?;
+    let codex = Arc::clone(&test.codex);
+    let cwd = Arc::clone(&test.cwd);
+    let session_model = test.session_configured.model.clone();
+
+    assert_eq!(spawn_models_mock.requests().len(), 1);
+    assert_eq!(spawn_models_mock.single_request_path(), "/v1/models");
+
+    // 2) If the server sends a different X-Models-Etag on /responses, Codex refreshes /models.
+    let refresh_models_mock = responses::mount_models_once_with_etag(
+        &server,
+        ModelsResponse { models: Vec::new() },
+        ETAG_2,
+    )
+    .await;
+
+    // First /responses request (user message) succeeds and returns a tool call.
+    // It also includes a mismatched X-Models-Etag, which should trigger a /models refresh.
+    let first_response_body = sse(vec![
+        ev_response_created("resp-1"),
+        ev_local_shell_call(CALL_ID, "completed", vec!["/bin/echo", "etag ok"]),
+        ev_completed("resp-1"),
+    ]);
+    responses::mount_response_once(
+        &server,
+        sse_response(first_response_body).insert_header("X-Models-Etag", ETAG_2),
+    )
+    .await;
+
+    // Second /responses request (tool output) includes the same X-Models-Etag; Codex should not
+    // refetch /models again after it has already refreshed the catalog.
+    let completion_response_body = sse(vec![
+        ev_response_created("resp-2"),
+        ev_assistant_message("msg-1", "done"),
+        ev_completed("resp-2"),
+    ]);
+    let tool_output_mock = responses::mount_response_once(
+        &server,
+        sse_response(completion_response_body).insert_header("X-Models-Etag", ETAG_2),
+    )
+    .await;
+
+    codex
+        .submit(Op::UserTurn {
+            items: vec![UserInput::Text {
+                text: "please run a tool".into(),
+            }],
+            final_output_json_schema: None,
+            cwd: cwd.path().to_path_buf(),
+            approval_policy: AskForApproval::Never,
+            sandbox_policy: SandboxPolicy::DangerFullAccess,
+            model: session_model,
+            effort: None,
+            summary: ReasoningSummary::Auto,
+        })
+        .await?;
+
+    let _ = wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
+
+    // Assert /models was refreshed exactly once after the X-Models-Etag mismatch.
+    assert_eq!(refresh_models_mock.requests().len(), 1);
+    assert_eq!(refresh_models_mock.single_request_path(), "/v1/models");
+    let refresh_req = refresh_models_mock
+        .requests()
+        .into_iter()
+        .next()
+        .expect("one request");
+    // Ensure Codex includes client_version on refresh. (This is a stable signal that we're using the /models client.)
+    assert!(
+        refresh_req
+            .url
+            .query_pairs()
+            .any(|(k, _)| k == "client_version"),
+        "expected /models refresh to include client_version query param"
+    );
+
+    // Assert the tool output /responses request succeeded and did not trigger another /models fetch.
+    let tool_req = tool_output_mock.single_request();
+    let _ = tool_req.function_call_output(CALL_ID);
+    assert_eq!(refresh_models_mock.requests().len(), 1);
+
+    Ok(())
+}
--- a/codex-rs/core/tests/suite/otel.rs
+++ b/codex-rs/core/tests/suite/otel.rs
@@ -46,6 +46,7 @@ async fn responses_api_emits_api_request_event() {
            items: vec![UserInput::Text {
                text: "hello".into(),
            }],
+            final_output_json_schema: None,
        })
        .await
        .unwrap();
@@ -87,6 +88,7 @@ async fn process_sse_emits_tracing_for_output_item() {
            items: vec![UserInput::Text {
                text: "hello".into(),
            }],
+            final_output_json_schema: None,
        })
        .await
        .unwrap();
@@ -125,6 +127,7 @@ async fn process_sse_emits_failed_event_on_parse_error() {
            items: vec![UserInput::Text {
                text: "hello".into(),
            }],
+            final_output_json_schema: None,
        })
        .await
        .unwrap();
@@ -164,6 +167,7 @@ async fn process_sse_records_failed_event_when_stream_closes_without_completed()
            items: vec![UserInput::Text {
                text: "hello".into(),
            }],
+            final_output_json_schema: None,
        })
        .await
        .unwrap();
@@ -223,6 +227,7 @@ async fn process_sse_failed_event_records_response_error_message() {
            items: vec![UserInput::Text {
                text: "hello".into(),
            }],
+            final_output_json_schema: None,
        })
        .await
        .unwrap();
@@ -280,6 +285,7 @@ async fn process_sse_failed_event_logs_parse_error() {
            items: vec![UserInput::Text {
                text: "hello".into(),
            }],
+            final_output_json_schema: None,
        })
        .await
        .unwrap();
@@ -324,6 +330,7 @@ async fn process_sse_failed_event_logs_missing_error() {
            items: vec![UserInput::Text {
                text: "hello".into(),
            }],
+            final_output_json_schema: None,
        })
        .await
        .unwrap();
@@ -377,6 +384,7 @@ async fn process_sse_failed_event_logs_response_completed_parse_error() {
            items: vec![UserInput::Text {
                text: "hello".into(),
            }],
+            final_output_json_schema: None,
        })
        .await
        .unwrap();
@@ -427,6 +435,7 @@ async fn process_sse_emits_completed_telemetry() {
            items: vec![UserInput::Text {
                text: "hello".into(),
            }],
+            final_output_json_schema: None,
        })
        .await
        .unwrap();
@@ -494,6 +503,7 @@ async fn handle_responses_span_records_response_kind_and_tool_name() {
            items: vec![UserInput::Text {
                text: "hello".into(),
            }],
+            final_output_json_schema: None,
        })
        .await
        .unwrap();
@@ -558,6 +568,7 @@ async fn record_responses_sets_span_fields_for_response_events() {
            items: vec![UserInput::Text {
                text: "hello".into(),
            }],
+            final_output_json_schema: None,
        })
        .await
        .unwrap();
@@ -637,6 +648,7 @@ async fn handle_response_item_records_tool_result_for_custom_tool_call() {
            items: vec![UserInput::Text {
                text: "hello".into(),
            }],
+            final_output_json_schema: None,
        })
        .await
        .unwrap();
@@ -704,6 +716,7 @@ async fn handle_response_item_records_tool_result_for_function_call() {
            items: vec![UserInput::Text {
                text: "hello".into(),
            }],
+            final_output_json_schema: None,
        })
        .await
        .unwrap();
@@ -781,6 +794,7 @@ async fn handle_response_item_records_tool_result_for_local_shell_missing_ids()
            items: vec![UserInput::Text {
                text: "hello".into(),
            }],
+            final_output_json_schema: None,
        })
        .await
        .unwrap();
@@ -842,6 +856,7 @@ async fn handle_response_item_records_tool_result_for_local_shell_call() {
            items: vec![UserInput::Text {
                text: "hello".into(),
            }],
+            final_output_json_schema: None,
        })
        .await
        .unwrap();
@@ -946,6 +961,7 @@ async fn handle_container_exec_autoapprove_from_config_records_tool_decision() {
            items: vec![UserInput::Text {
                text: "hello".into(),
            }],
+            final_output_json_schema: None,
        })
        .await
        .unwrap();
@@ -994,6 +1010,7 @@ async fn handle_container_exec_user_approved_records_tool_decision() {
            items: vec![UserInput::Text {
                text: "approved".into(),
            }],
+            final_output_json_schema: None,
        })
        .await
        .unwrap();
@@ -1052,6 +1069,7 @@ async fn handle_container_exec_user_approved_for_session_records_tool_decision()
            items: vec![UserInput::Text {
                text: "persist".into(),
            }],
+            final_output_json_schema: None,
        })
        .await
        .unwrap();
@@ -1110,6 +1128,7 @@ async fn handle_sandbox_error_user_approves_retry_records_tool_decision() {
            items: vec![UserInput::Text {
                text: "retry".into(),
            }],
+            final_output_json_schema: None,
        })
        .await
        .unwrap();
@@ -1168,6 +1187,7 @@ async fn handle_container_exec_user_denies_records_tool_decision() {
            items: vec![UserInput::Text {
                text: "deny".into(),
            }],
+            final_output_json_schema: None,
        })
        .await
        .unwrap();
@@ -1226,6 +1246,7 @@ async fn handle_sandbox_error_user_approves_for_session_records_tool_decision()
            items: vec![UserInput::Text {
                text: "persist".into(),
            }],
+            final_output_json_schema: None,
        })
        .await
        .unwrap();
@@ -1285,6 +1306,7 @@ async fn handle_sandbox_error_user_denies_records_tool_decision() {
            items: vec![UserInput::Text {
                text: "deny".into(),
            }],
+            final_output_json_schema: None,
        })
        .await
        .unwrap();
--- a/codex-rs/core/tests/suite/prompt_caching.rs
+++ b/codex-rs/core/tests/suite/prompt_caching.rs
@@ -102,6 +102,7 @@ async fn prompt_tools_are_consistent_across_requests() -> anyhow::Result<()> {
            items: vec![UserInput::Text {
                text: "hello 1".into(),
            }],
+            final_output_json_schema: None,
        })
        .await?;
    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
@@ -111,6 +112,7 @@ async fn prompt_tools_are_consistent_across_requests() -> anyhow::Result<()> {
            items: vec![UserInput::Text {
                text: "hello 2".into(),
            }],
+            final_output_json_schema: None,
        })
        .await?;
    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
@@ -175,6 +177,7 @@ async fn codex_mini_latest_tools() -> anyhow::Result<()> {
            items: vec![UserInput::Text {
                text: "hello 1".into(),
            }],
+            final_output_json_schema: None,
        })
        .await?;

@@ -184,6 +187,7 @@ async fn codex_mini_latest_tools() -> anyhow::Result<()> {
            items: vec![UserInput::Text {
                text: "hello 2".into(),
            }],
+            final_output_json_schema: None,
        })
        .await?;

@@ -238,6 +242,7 @@ async fn prefixes_context_and_instructions_once_and_consistently_across_requests
            items: vec![UserInput::Text {
                text: "hello 1".into(),
            }],
+            final_output_json_schema: None,
        })
        .await?;
    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
@@ -247,6 +252,7 @@ async fn prefixes_context_and_instructions_once_and_consistently_across_requests
            items: vec![UserInput::Text {
                text: "hello 2".into(),
            }],
+            final_output_json_schema: None,
        })
        .await?;
    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
@@ -307,6 +313,7 @@ async fn overrides_turn_context_but_keeps_cached_prefix_and_key_constant() -> an
            items: vec![UserInput::Text {
                text: "hello 1".into(),
            }],
+            final_output_json_schema: None,
        })
        .await?;
    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
@@ -334,6 +341,7 @@ async fn overrides_turn_context_but_keeps_cached_prefix_and_key_constant() -> an
            items: vec![UserInput::Text {
                text: "hello 2".into(),
            }],
+            final_output_json_schema: None,
        })
        .await?;
    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
@@ -412,6 +420,7 @@ async fn override_before_first_turn_emits_environment_context() -> anyhow::Resul
            items: vec![UserInput::Text {
                text: "first message".into(),
            }],
+            final_output_json_schema: None,
        })
        .await?;

@@ -504,6 +513,7 @@ async fn per_turn_overrides_keep_cached_prefix_and_key_constant() -> anyhow::Res
            items: vec![UserInput::Text {
                text: "hello 1".into(),
            }],
+            final_output_json_schema: None,
        })
        .await?;
    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
@@ -606,7 +616,7 @@ async fn send_user_turn_with_no_changes_does_not_send_environment_context() -> a
    let default_cwd = config.cwd.clone();
    let default_approval_policy = config.approval_policy.value();
    let default_sandbox_policy = config.sandbox_policy.get();
-    let default_model = session_configured.model_family.slug;
+    let default_model = session_configured.model;
    let default_effort = config.model_reasoning_effort;
    let default_summary = config.model_reasoning_summary;

@@ -696,7 +706,7 @@ async fn send_user_turn_with_changes_sends_environment_context() -> anyhow::Resu
    let default_cwd = config.cwd.clone();
    let default_approval_policy = config.approval_policy.value();
    let default_sandbox_policy = config.sandbox_policy.get();
-    let default_model = session_configured.model_family.slug;
+    let default_model = session_configured.model;
    let default_effort = config.model_reasoning_effort;
    let default_summary = config.model_reasoning_summary;

--- a/codex-rs/core/tests/suite/quota_exceeded.rs
+++ b/codex-rs/core/tests/suite/quota_exceeded.rs
@@ -44,6 +44,7 @@ async fn quota_exceeded_emits_single_error_event() -> Result<()> {
            items: vec![UserInput::Text {
                text: "quota?".into(),
            }],
+            final_output_json_schema: None,
        })
        .await
        .unwrap();
--- a/codex-rs/core/tests/suite/remote_models.rs
+++ b/codex-rs/core/tests/suite/remote_models.rs
@@ -24,7 +24,6 @@ use codex_protocol::openai_models::ModelVisibility;
 use codex_protocol::openai_models::ModelsResponse;
 use codex_protocol::openai_models::ReasoningEffort;
 use codex_protocol::openai_models::ReasoningEffortPreset;
-use codex_protocol::openai_models::ReasoningSummaryFormat;
 use codex_protocol::openai_models::TruncationPolicyConfig;
 use codex_protocol::user_input::UserInput;
 use core_test_support::load_default_config_for_test;
@@ -83,7 +82,6 @@ async fn remote_models_remote_model_uses_unified_exec() -> Result<()> {
        truncation_policy: TruncationPolicyConfig::bytes(10_000),
        supports_parallel_tool_calls: false,
        context_window: None,
-        reasoning_summary_format: ReasoningSummaryFormat::None,
        experimental_supported_tools: Vec::new(),
    };

@@ -91,7 +89,6 @@ async fn remote_models_remote_model_uses_unified_exec() -> Result<()> {
        &server,
        ModelsResponse {
            models: vec![remote_model],
-            etag: String::new(),
        },
    )
    .await;
@@ -222,14 +219,12 @@ async fn remote_models_apply_remote_base_instructions() -> Result<()> {
        truncation_policy: TruncationPolicyConfig::bytes(10_000),
        supports_parallel_tool_calls: false,
        context_window: None,
-        reasoning_summary_format: ReasoningSummaryFormat::None,
        experimental_supported_tools: Vec::new(),
    };
    mount_models_once(
        &server,
        ModelsResponse {
            models: vec![remote_model],
-            etag: String::new(),
        },
    )
    .await;
@@ -307,7 +302,6 @@ async fn remote_models_preserve_builtin_presets() -> Result<()> {
        &server,
        ModelsResponse {
            models: vec![remote_model.clone()],
-            etag: String::new(),
        },
    )
    .await;
@@ -327,7 +321,7 @@ async fn remote_models_preserve_builtin_presets() -> Result<()> {
    );

    manager
-        .refresh_available_models(&config)
+        .refresh_available_models_with_cache(&config)
        .await
        .expect("refresh succeeds");

@@ -365,7 +359,6 @@ async fn remote_models_hide_picker_only_models() -> Result<()> {
        &server,
        ModelsResponse {
            models: vec![remote_model],
-            etag: String::new(),
        },
    )
    .await;
@@ -486,7 +479,6 @@ fn test_remote_model(slug: &str, visibility: ModelVisibility, priority: i32) ->
        truncation_policy: TruncationPolicyConfig::bytes(10_000),
        supports_parallel_tool_calls: false,
        context_window: None,
-        reasoning_summary_format: ReasoningSummaryFormat::None,
        experimental_supported_tools: Vec::new(),
    }
 }
--- a/Show More
+++ b/Show More