wip

2026-04-22 21:54:49 +00:00 · 2025-08-04 15:53:28 -07:00 · 2025-08-04 15:53:28 -07:00 · 2025-08-04 15:53:27 -07:00 · 2025-08-04 15:44:23 -07:00 · 2025-08-04 11:26:51 -07:00
176 changed files with 18426 additions and 5690 deletions
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -21,7 +21,7 @@
      "settings": {
        "terminal.integrated.defaultProfile.linux": "bash"
      },
-      "extensions": ["rust-lang.rust-analyzer"]
+      "extensions": ["rust-lang.rust-analyzer", "tamasfe.even-better-toml"]
    }
  }
 }
--- a/.github/actions/codex/bun.lock
+++ b/.github/actions/codex/bun.lock
@@ -8,8 +8,8 @@
        "@actions/github": "^6.0.1",
      },
      "devDependencies": {
-        "@types/bun": "^1.2.18",
-        "@types/node": "^24.0.13",
+        "@types/bun": "^1.2.19",
+        "@types/node": "^24.1.0",
        "prettier": "^3.6.2",
        "typescript": "^5.8.3",
      },
@@ -48,15 +48,15 @@

    "@octokit/types": ["@octokit/types@13.10.0", "", { "dependencies": { "@octokit/openapi-types": "^24.2.0" } }, "sha512-ifLaO34EbbPj0Xgro4G5lP5asESjwHracYJvVaPIyXMuiuXLlhic3S47cBdTb+jfODkTE5YtGCLt3Ay3+J97sA=="],

-    "@types/bun": ["@types/bun@1.2.18", "", { "dependencies": { "bun-types": "1.2.18" } }, "sha512-Xf6RaWVheyemaThV0kUfaAUvCNokFr+bH8Jxp+tTZfx7dAPA8z9ePnP9S9+Vspzuxxx9JRAXhnyccRj3GyCMdQ=="],
+    "@types/bun": ["@types/bun@1.2.19", "", { "dependencies": { "bun-types": "1.2.19" } }, "sha512-d9ZCmrH3CJ2uYKXQIUuZ/pUnTqIvLDS0SK7pFmbx8ma+ziH/FRMoAq5bYpRG7y+w1gl+HgyNZbtqgMq4W4e2Lg=="],

-    "@types/node": ["@types/node@24.0.13", "", { "dependencies": { "undici-types": "~7.8.0" } }, "sha512-Qm9OYVOFHFYg3wJoTSrz80hoec5Lia/dPp84do3X7dZvLikQvM1YpmvTBEdIr/e+U8HTkFjLHLnl78K/qjf+jQ=="],
+    "@types/node": ["@types/node@24.1.0", "", { "dependencies": { "undici-types": "~7.8.0" } }, "sha512-ut5FthK5moxFKH2T1CUOC6ctR67rQRvvHdFLCD2Ql6KXmMuCrjsSsRI9UsLCm9M18BMwClv4pn327UvB7eeO1w=="],

    "@types/react": ["@types/react@19.1.8", "", { "dependencies": { "csstype": "^3.0.2" } }, "sha512-AwAfQ2Wa5bCx9WP8nZL2uMZWod7J7/JSplxbTmBQ5ms6QpqNYm672H0Vu9ZVKVngQ+ii4R/byguVEUZQyeg44g=="],

    "before-after-hook": ["before-after-hook@2.2.3", "", {}, "sha512-NzUnlZexiaH/46WDhANlyR2bXRopNg4F/zuSA3OpZnllCUgRaOF2znDioDWrmbNVsuZk6l9pMquQB38cfBZwkQ=="],

-    "bun-types": ["bun-types@1.2.18", "", { "dependencies": { "@types/node": "*" }, "peerDependencies": { "@types/react": "^19" } }, "sha512-04+Eha5NP7Z0A9YgDAzMk5PHR16ZuLVa83b26kH5+cp1qZW4F6FmAURngE7INf4tKOvCE69vYvDEwoNl1tGiWw=="],
+    "bun-types": ["bun-types@1.2.19", "", { "dependencies": { "@types/node": "*" }, "peerDependencies": { "@types/react": "^19" } }, "sha512-uAOTaZSPuYsWIXRpj7o56Let0g/wjihKCkeRqUBhlLVM/Bt+Fj9xTo+LhC1OV1XDaGkz4hNC80et5xgy+9KTHQ=="],

    "csstype": ["csstype@3.1.3", "", {}, "sha512-M1uQkMl8rQK/szD0LNhtqxIPLpimGm8sOBwU7lLnCpSbTyY3yeU1Vc7l4KT5zT4s/yOxHH5O7tIuuLOCnLADRw=="],

@@ -82,6 +82,8 @@

    "@octokit/plugin-rest-endpoint-methods/@octokit/types": ["@octokit/types@12.6.0", "", { "dependencies": { "@octokit/openapi-types": "^20.0.0" } }, "sha512-1rhSOfRa6H9w4YwK0yrf5faDaDTb+yLyBUKOCV4xtCDB5VmIPqd/v9yr9o6SAzOAlRxMiRiCic6JVM1/kunVkw=="],

+    "bun-types/@types/node": ["@types/node@24.0.13", "", { "dependencies": { "undici-types": "~7.8.0" } }, "sha512-Qm9OYVOFHFYg3wJoTSrz80hoec5Lia/dPp84do3X7dZvLikQvM1YpmvTBEdIr/e+U8HTkFjLHLnl78K/qjf+jQ=="],
+
    "@octokit/plugin-paginate-rest/@octokit/types/@octokit/openapi-types": ["@octokit/openapi-types@20.0.0", "", {}, "sha512-EtqRBEjp1dL/15V7WiX5LJMIxxkdiGJnabzYx5Apx4FkQIFgAfKumXeYAqqJCj1s+BMX4cPFIFC4OLCR6stlnA=="],

    "@octokit/plugin-rest-endpoint-methods/@octokit/types/@octokit/openapi-types": ["@octokit/openapi-types@20.0.0", "", {}, "sha512-EtqRBEjp1dL/15V7WiX5LJMIxxkdiGJnabzYx5Apx4FkQIFgAfKumXeYAqqJCj1s+BMX4cPFIFC4OLCR6stlnA=="],
--- a/.github/actions/codex/package.json
+++ b/.github/actions/codex/package.json
@@ -13,8 +13,8 @@
        "@actions/github": "^6.0.1"
    },
    "devDependencies": {
-        "@types/bun": "^1.2.18",
-        "@types/node": "^24.0.13",
+        "@types/bun": "^1.2.19",
+        "@types/node": "^24.1.0",
        "prettier": "^3.6.2",
        "typescript": "^5.8.3"
    }
--- a/.github/codex/labels/codex-rust-review.md
+++ b/.github/codex/labels/codex-rust-review.md
@@ -0,0 +1,23 @@
+Review this PR and respond with a very concise final message, formatted in Markdown.
+
+There should be a summary of the changes (1-2 sentences) and a few bullet points if necessary.
+
+Then provide the **review** (1-2 sentences plus bullet points, friendly tone).
+
+Things to look out for when doing the review:
+
+- **Make sure the pull request body explains the motivation behind the change.** If the author has failed to do this, call it out, and if you think you can deduce the motivation behind the change, propose copy.
+- Ideally, the PR body also contains a small summary of the change. For small changes, the PR title may be sufficient.
+- Each PR should ideally do one conceptual thing. For example, if a PR does a refactoring as well as introducing a new feature, push back and suggest the refactoring be done in a separate PR. This makes things easier for the reviewer, as refactoring changes can often be far-reaching, yet quick to review.
+- If the nature of the change seems to have a visual component (which is often the case for changes to `codex-rs/tui`), recommend including a screenshot or video to demonstrate the change, if appropriate.
+- Rust files should generally be organized such that the public parts of the API appear near the top of the file and helper functions go below. This is analagous to the "inverted pyramid" structure that is favored in journalism.
+- Encourage the use of small enums or the newtype pattern in Rust if it helps readability without adding significant cognitive load or lines of code.
+- Be wary of large files and offer suggestions for how to break things into more reasonably-sized files.
+- When modifying a `Cargo.toml` file, make sure that dependency lists stay alphabetically sorted. Also consider whether a new dependency is added to the appropriate place (e.g., `[dependencies]` versus `[dev-dependencies]`)
+- If you see opportunities for the changes in a diff to use more idiomatic Rust, please make specific recommendations. For example, favor the use of expressions over `return`.
+- When introducing new code, be on the lookout for code that duplicates existing code. When found, propose a way to refactor the existing code such that it should be reused.
+- Each create in the Cargo workspace in `codex-rs` has a specific purpose: make a note if you believe new code is not introduced in the correct crate.
+- When possible, try to keep the `core` crate as small as possible. Non-core but shared logic is often a good candidate for `codex-rs/common`.
+- References to existing GitHub issues and PRs are encouraged, where appropriate, though you likely do not have network access, so may not be able to help here.
+
+{CODEX_ACTION_GITHUB_EVENT_PATH} contains the JSON that triggered this GitHub workflow. It contains the `base` and `head` refs that define this PR. Both refs are available locally.
--- a/.github/workflows/codex.yml
+++ b/.github/workflows/codex.yml
@@ -20,7 +20,7 @@ jobs:
      (github.event_name == 'issues' && (
        (github.event.action == 'labeled' && (github.event.label.name == 'codex-attempt' || github.event.label.name == 'codex-triage'))
      )) ||
-      (github.event_name == 'pull_request' && github.event.action == 'labeled' && github.event.label.name == 'codex-review')
+      (github.event_name == 'pull_request' && github.event.action == 'labeled' && (github.event.label.name == 'codex-review' || github.event.label.name == 'codex-rust-review'))
    runs-on: ubuntu-latest
    permissions:
      contents: write # can push or create branches
--- a/.github/workflows/rust-release.yml
+++ b/.github/workflows/rust-release.yml
@@ -93,7 +93,7 @@ jobs:
          sudo apt install -y musl-tools pkg-config

      - name: Cargo build
-        run: cargo build --target ${{ matrix.target }} --release --all-targets --all-features
+        run: cargo build --target ${{ matrix.target }} --release --bin codex --bin codex-exec --bin codex-linux-sandbox

      - name: Stage artifacts
        shell: bash
--- a/.vscode/extensions.json
+++ b/.vscode/extensions.json
@@ -0,0 +1,5 @@
+{
+    "recommendations": [
+        "tamasfe.even-better-toml",
+    ]
+}
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -0,0 +1,18 @@
+{
+    "version": "0.2.0",
+    "configurations": [
+        {
+            "type": "lldb",
+            "request": "launch",
+            "name": "Cargo launch",
+            "cargo": {
+                "cwd": "${workspaceFolder}/codex-rs",
+                "args": [
+                    "build",
+                    "--bin=codex-tui"
+                ]
+            },
+            "args": []
+        }
+    ]
+}
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -0,0 +1,16 @@
+{
+    "rust-analyzer.checkOnSave": true,
+    "rust-analyzer.check.command": "clippy",
+    "rust-analyzer.check.extraArgs": ["--all-features", "--tests"],
+    "rust-analyzer.rustfmt.extraArgs": ["--config", "imports_granularity=Item"],
+    "[rust]": {
+        "editor.defaultFormatter": "rust-lang.rust-analyzer",
+        "editor.formatOnSave": true,
+    },
+    "[toml]": {
+        "editor.defaultFormatter": "tamasfe.even-better-toml",
+        "editor.formatOnSave": true,
+    },
+    "evenBetterToml.formatter.reorderArrays": true,
+    "evenBetterToml.formatter.reorderKeys": true,
+}
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -2,4 +2,10 @@

 In the codex-rs folder where the rust code lives:

- Never add or modify any code related to `CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR`. You operate in a sandbox where `CODEX_SANDBOX_NETWORK_DISABLED=1` will be set whenever you use the `shell` tool. Any existing code that uses `CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR` was authored with this fact in mind. It is often used to early exit out of tests that the author knew you would not be able to run given your sandbox limitations.
+- Never add or modify any code related to `CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR` or `CODEX_SANDBOX_ENV_VAR`.
+  - You operate in a sandbox where `CODEX_SANDBOX_NETWORK_DISABLED=1` will be set whenever you use the `shell` tool. Any existing code that uses `CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR` was authored with this fact in mind. It is often used to early exit out of tests that the author knew you would not be able to run given your sandbox limitations.
+  - Similarly, when you spawn a process using Seatbelt (`/usr/bin/sandbox-exec`), `CODEX_SANDBOX=seatbelt` will be set on the child process. Integration tests that want to run Seatbelt themselves cannot be run under Seatbelt, so checks for `CODEX_SANDBOX=seatbelt` are also often used to early exit out of tests, as appropriate.
+
+Before creating a pull request with changes to `codex-rs`, run `just fmt` (in `codex-rs` directory) to format the code and `just fix` (in `codex-rs` directory) to fix any linter issues in the code, ensure the test suite passes by running `cargo test --all-features` in the `codex-rs` directory.
+
+When making individual changes prefer running tests on individual files or projects first.
--- a/4
+++ b/4
@@ -1,2 +1,6 @@
 OpenAI Codex
 Copyright 2025 OpenAI
+
+This project includes code derived from [Ratatui](https://github.com/ratatui/ratatui), licensed under the MIT license.
+Copyright (c) 2016-2022 Florian Dehau
+Copyright (c) 2023-2025 The Ratatui Developers
--- a/README.md
+++ b/README.md
@@ -95,6 +95,12 @@ codex login

 If you complete the process successfully, you should have a `~/.codex/auth.json` file that contains the credentials that Codex will use.

+To verify whether you are currently logged in, run:
+
+```
+codex login status
+```
+
 If you encounter problems with the login flow, please comment on <https://github.com/openai/codex/issues/1243>.

 <details>
--- a/SUMMARY.md
+++ b/SUMMARY.md
@@ -0,0 +1,21 @@
+You are a summarization assistant. A conversation follows between a user and a coding-focused AI (Codex). Your task is to generate a clear summary capturing:
+
+• High-level objective or problem being solved  
+• Key instructions or design decisions given by the user  
+• Main code actions or behaviors from the AI  
+• Important variables, functions, modules, or outputs discussed  
+• Any unresolved questions or next steps
+
+Produce the summary in a structured format like:
+
+**Objective:** …
+
+**User instructions:** … (bulleted)
+
+**AI actions / code behavior:** … (bulleted)
+
+**Important entities:** … (e.g. function names, variables, files)
+
+**Open issues / next steps:** … (if any)
+
+**Summary (concise):** (one or two sentences)
--- a/codex-cli/bin/codex.js
+++ b/codex-cli/bin/codex.js
@@ -15,7 +15,6 @@
 *      current platform / architecture, an error is thrown.
 */

-import { spawnSync } from "child_process";
 import fs from "fs";
 import path from "path";
 import { fileURLToPath, pathToFileURL } from "url";
@@ -35,7 +34,7 @@ const wantsNative = fs.existsSync(path.join(__dirname, "use-native")) ||
    : false);

 // Try native binary if requested.
-if (wantsNative) {
+if (wantsNative && process.platform !== 'win32') {
  const { platform, arch } = process;

  let targetTriple = null;
@@ -74,22 +73,77 @@ if (wantsNative) {
  }

  const binaryPath = path.join(__dirname, "..", "bin", `codex-${targetTriple}`);
-  const result = spawnSync(binaryPath, process.argv.slice(2), {
+
+  // Use an asynchronous spawn instead of spawnSync so that Node is able to
+  // respond to signals (e.g. Ctrl-C / SIGINT) while the native binary is
+  // executing. This allows us to forward those signals to the child process
+  // and guarantees that when either the child terminates or the parent
+  // receives a fatal signal, both processes exit in a predictable manner.
+  const { spawn } = await import("child_process");
+
+  const child = spawn(binaryPath, process.argv.slice(2), {
    stdio: "inherit",
+    env: { ...process.env, CODEX_MANAGED_BY_NPM: "1" },
  });

-  const exitCode = typeof result.status === "number" ? result.status : 1;
-  process.exit(exitCode);
-}
+  child.on("error", (err) => {
+    // Typically triggered when the binary is missing or not executable.
+    // Re-throwing here will terminate the parent with a non-zero exit code
+    // while still printing a helpful stack trace.
+    // eslint-disable-next-line no-console
+    console.error(err);
+    process.exit(1);
+  });

-// Fallback: execute the original JavaScript CLI.
+  // Forward common termination signals to the child so that it shuts down
+  // gracefully. In the handler we temporarily disable the default behavior of
+  // exiting immediately; once the child has been signaled we simply wait for
+  // its exit event which will in turn terminate the parent (see below).
+  const forwardSignal = (signal) => {
+    if (child.killed) {
+      return;
+    }
+    try {
+      child.kill(signal);
+    } catch {
+      /* ignore */
+    }
+  };

-// Resolve the path to the compiled CLI bundle
-const cliPath = path.resolve(__dirname, "../dist/cli.js");
-const cliUrl = pathToFileURL(cliPath).href;
+  ["SIGINT", "SIGTERM", "SIGHUP"].forEach((sig) => {
+    process.on(sig, () => forwardSignal(sig));
+  });

-// Load and execute the CLI
-(async () => {
+  // When the child exits, mirror its termination reason in the parent so that
+  // shell scripts and other tooling observe the correct exit status.
+  // Wrap the lifetime of the child process in a Promise so that we can await
+  // its termination in a structured way. The Promise resolves with an object
+  // describing how the child exited: either via exit code or due to a signal.
+  const childResult = await new Promise((resolve) => {
+    child.on("exit", (code, signal) => {
+      if (signal) {
+        resolve({ type: "signal", signal });
+      } else {
+        resolve({ type: "code", exitCode: code ?? 1 });
+      }
+    });
+  });
+
+  if (childResult.type === "signal") {
+    // Re-emit the same signal so that the parent terminates with the expected
+    // semantics (this also sets the correct exit code of 128 + n).
+    process.kill(process.pid, childResult.signal);
+  } else {
+    process.exit(childResult.exitCode);
+  }
+} else {
+  // Fallback: execute the original JavaScript CLI.
+
+  // Resolve the path to the compiled CLI bundle
+  const cliPath = path.resolve(__dirname, "../dist/cli.js");
+  const cliUrl = pathToFileURL(cliPath).href;
+
+  // Load and execute the CLI
  try {
    await import(cliUrl);
  } catch (err) {
@@ -97,4 +151,4 @@ const cliUrl = pathToFileURL(cliPath).href;
    console.error(err);
    process.exit(1);
  }
-})();
+}
--- a/codex-cli/src/approvals.ts
+++ b/codex-cli/src/approvals.ts
@@ -370,11 +370,26 @@ export function isSafeCommand(
        reason: "View file with line numbers",
        group: "Reading files",
      };
-    case "rg":
+    case "rg": {
+      // Certain ripgrep options execute external commands or invoke other
+      // processes, so we must reject them.
+      const isUnsafe = command.some(
+        (arg: string) =>
+          UNSAFE_OPTIONS_FOR_RIPGREP_WITHOUT_ARGS.has(arg) ||
+          [...UNSAFE_OPTIONS_FOR_RIPGREP_WITH_ARGS].some(
+            (opt) => arg === opt || arg.startsWith(`${opt}=`),
+          ),
+      );
+
+      if (isUnsafe) {
+        break;
+      }
+
      return {
        reason: "Ripgrep search",
        group: "Searching",
      };
+    }
    case "find": {
      // Certain options to `find` allow executing arbitrary processes, so we
      // cannot auto-approve them.
@@ -495,6 +510,22 @@ const UNSAFE_OPTIONS_FOR_FIND_COMMAND: ReadonlySet<string> = new Set([
  "-fprintf",
 ]);

+// Ripgrep options that are considered unsafe because they may execute
+// arbitrary commands or spawn auxiliary processes.
+const UNSAFE_OPTIONS_FOR_RIPGREP_WITH_ARGS: ReadonlySet<string> = new Set([
+  // Executes an arbitrary command for each matching file.
+  "--pre",
+  // Allows custom hostname command which could leak environment details.
+  "--hostname-bin",
+]);
+
+const UNSAFE_OPTIONS_FOR_RIPGREP_WITHOUT_ARGS: ReadonlySet<string> = new Set([
+  // Enables searching inside archives which triggers external decompression
+  // utilities – reject out of an abundance of caution.
+  "--search-zip",
+  "-z",
+]);
+
 // ---------------- Helper utilities for complex shell expressions -----------------

 // A conservative allow-list of bash operators that do not, on their own, cause
--- a/codex-cli/src/components/chat/terminal-chat-input.tsx
+++ b/codex-cli/src/components/chat/terminal-chat-input.tsx
@@ -10,25 +10,19 @@ import type {
 import MultilineTextEditor from "./multiline-editor";
 import { TerminalChatCommandReview } from "./terminal-chat-command-review.js";
 import TextCompletions from "./terminal-chat-completions.js";
-import { loadConfig, type AppConfig } from "../../utils/config.js";
+import { loadConfig } from "../../utils/config.js";
 import { getFileSystemSuggestions } from "../../utils/file-system-suggestions.js";
 import { expandFileTags } from "../../utils/file-tag-utils";
 import { createInputItem } from "../../utils/input-utils.js";
 import { log } from "../../utils/logger/log.js";
 import { setSessionId } from "../../utils/session.js";
 import { SLASH_COMMANDS, type SlashCommand } from "../../utils/slash-commands";
-import {
-  runSecurityReview,
-  SecurityReviewError,
-} from "../../utils/security-review.js";
-import type { SecurityReviewMode } from "../../utils/security-review.js";
 import {
  loadCommandHistory,
  addToHistory,
 } from "../../utils/storage/command-history.js";
 import { clearTerminal, onExit } from "../../utils/terminal.js";
 import { Box, Text, useApp, useInput, useStdin } from "ink";
-import path from "node:path";
 import { fileURLToPath } from "node:url";
 import React, {
  useCallback,
@@ -45,130 +39,6 @@ const suggestions = [
  "are there any bugs in my code?",
 ];

-const SEC_REVIEW_COMMAND = "/secreview";
-
-type SecReviewCommandOptions = {
-  mode: SecurityReviewMode;
-  includePaths: Array<string>;
-  outputPath?: string;
-  repoPath?: string;
-  modelName?: string;
-};
-
-function tokenizeCommand(input: string): Array<string> {
-  const tokens: Array<string> = [];
-  const regex = /"([^"]*)"|'([^']*)'|(\S+)/g;
-  let match: RegExpExecArray | null;
-  while ((match = regex.exec(input)) !== null) {
-    if (match[1] != null) {
-      tokens.push(match[1]);
-    } else if (match[2] != null) {
-      tokens.push(match[2]);
-    } else if (match[3] != null) {
-      tokens.push(match[3]);
-    }
-  }
-  return tokens;
-}
-
-function parseSecReviewCommand(input: string): SecReviewCommandOptions {
-  const tokens = tokenizeCommand(input).slice(1); // drop the command itself
-  let mode: SecurityReviewMode = "full";
-  const includePaths: Array<string> = [];
-  let outputPath: string | undefined;
-  let repoPath: string | undefined;
-  let modelName: string | undefined;
-
-  const parseMode = (value: string, option: string) => {
-    if (value === "bugs") {
-      mode = "bugs";
-    } else if (value === "full") {
-      mode = "full";
-    } else {
-      throw new Error(`Unknown ${option} value "${value}". Use "full" or "bugs".`);
-    }
-  };
-
-  for (let i = 0; i < tokens.length; i += 1) {
-    const token = tokens[i];
-
-    const expectValue = (label: string): string => {
-      if (i + 1 >= tokens.length) {
-        throw new Error(`Expected value after ${label}`);
-      }
-      i += 1;
-      return tokens[i];
-    };
-
-    if (token === "--") {
-      break;
-    } else if (token === "bugs" || token === "--bugs" || token === "--mode=bugs") {
-      mode = "bugs";
-    } else if (token === "full" || token === "--full" || token === "--mode=full") {
-      mode = "full";
-    } else if (token === "--mode") {
-      parseMode(expectValue("--mode"), "--mode");
-    } else if (token.startsWith("--mode=")) {
-      parseMode(token.slice("--mode=".length), "--mode");
-    } else if (token === "--path" || token === "-p") {
-      includePaths.push(expectValue(token));
-    } else if (token.startsWith("--path=")) {
-      includePaths.push(token.slice("--path=".length));
-    } else if (token.startsWith("-p=")) {
-      includePaths.push(token.slice("-p=".length));
-    } else if (
-      token === "--output" ||
-      token === "-o" ||
-      token === "--output-location"
-    ) {
-      outputPath = expectValue(token);
-    } else if (token.startsWith("--output=")) {
-      outputPath = token.slice("--output=".length);
-    } else if (token.startsWith("-o=")) {
-      outputPath = token.slice("-o=".length);
-    } else if (
-      token === "--repo" ||
-      token === "--repo-location" ||
-      token === "--repository"
-    ) {
-      repoPath = expectValue(token);
-    } else if (token.startsWith("--repo=")) {
-      repoPath = token.slice("--repo=".length);
-    } else if (token.startsWith("--repo-location=")) {
-      repoPath = token.slice("--repo-location=".length);
-    } else if (token === "--model" || token === "--model-name") {
-      modelName = expectValue(token);
-    } else if (token.startsWith("--model=")) {
-      modelName = token.slice("--model=".length);
-    } else if (token.startsWith("--model-name=")) {
-      modelName = token.slice("--model-name=".length);
-    } else if (token.length > 0) {
-      includePaths.push(token);
-    }
-  }
-
-  return {
-    mode,
-    includePaths: includePaths.filter((p) => p.length > 0),
-    outputPath,
-    repoPath,
-    modelName,
-  };
-}
-
-function trimLogOutput(logText: string, maxLines: number = 40): string {
-  const normalised = logText.replace(/\r\n/g, "\n").trimEnd();
-  if (normalised === "") {
-    return "(empty)";
-  }
-  const lines = normalised.split("\n");
-  if (lines.length <= maxLines) {
-    return normalised;
-  }
-  const tail = lines.slice(-maxLines);
-  return ["… (showing last " + maxLines + " lines)", ...tail].join("\n");
-}
-
 export default function TerminalChatInput({
  isNew,
  loading,
@@ -190,7 +60,6 @@ export default function TerminalChatInput({
  active,
  thinkingSeconds,
  items = [],
-  config,
 }: {
  isNew: boolean;
  loading: boolean;
@@ -216,7 +85,6 @@ export default function TerminalChatInput({
  thinkingSeconds: number;
  // New: current conversation items so we can include them in bug reports
  items?: Array<ResponseItem>;
-  config: AppConfig;
 }): React.ReactElement {
  // Slash command suggestion index
  const [selectedSlashSuggestion, setSelectedSlashSuggestion] =
@@ -644,230 +512,6 @@ export default function TerminalChatInput({
      } else if (inputValue.startsWith("/approval")) {
        setInput("");
        openApprovalOverlay();
-        return;
-      } else if (inputValue.startsWith(SEC_REVIEW_COMMAND)) {
-        setInput("");
-        const commandId = `secreview-${Date.now()}`;
-
-        let parsed: SecReviewCommandOptions;
-        try {
-          parsed = parseSecReviewCommand(inputValue);
-        } catch (error) {
-          const message =
-            error instanceof Error ? error.message : String(error);
-          setItems((prev) => [
-            ...prev,
-            {
-              id: `${commandId}-parse-error`,
-              type: "message",
-              role: "system",
-              content: [
-                {
-                  type: "input_text",
-                  text: `⚠️ Unable to parse ${SEC_REVIEW_COMMAND} arguments: ${message}`,
-                },
-              ],
-            },
-          ]);
-          return;
-        }
-
-        const repoPath = parsed.repoPath
-          ? path.isAbsolute(parsed.repoPath)
-            ? parsed.repoPath
-            : path.resolve(process.cwd(), parsed.repoPath)
-          : process.cwd();
-
-        const resolvedOutputPath =
-          parsed.outputPath != null
-            ? path.isAbsolute(parsed.outputPath)
-              ? parsed.outputPath
-              : path.resolve(repoPath, parsed.outputPath)
-            : undefined;
-
-        const scopeDescription =
-          parsed.includePaths.length > 0
-            ? parsed.includePaths.join(", ")
-            : "entire repository";
-
-        const introLines = [
-          `🔐 Running AppSec security review (mode: ${parsed.mode}).`,
-          `Repository: ${repoPath}`,
-          `Scope: ${scopeDescription}`,
-        ];
-
-        if (resolvedOutputPath) {
-          introLines.push(`Output: ${resolvedOutputPath}`);
-        }
-
-        if (parsed.modelName) {
-          introLines.push(`Model override: ${parsed.modelName}`);
-        }
-
-        setItems((prev) => [
-          ...prev,
-          {
-            id: `${commandId}-start`,
-            type: "message",
-            role: "system",
-            content: [
-              {
-                type: "input_text",
-                text: introLines.join("\n"),
-              },
-            ],
-          },
-        ]);
-
-        try {
-          const result = await runSecurityReview({
-            repoPath,
-            includePaths: parsed.includePaths,
-            outputPath: resolvedOutputPath,
-            modelName: parsed.modelName,
-            mode: parsed.mode,
-            config,
-          });
-
-          const summaryLines = [
-            "✅ AppSec review complete.",
-            `Artifacts: ${result.outputRoot}`,
-          ];
-          if (!result.reportContent) {
-            summaryLines.push("ℹ️ report.md not found in output.");
-          }
-          if (!result.bugsContent) {
-            summaryLines.push("ℹ️ context/bugs.md not found in output.");
-          }
-
-          setItems((prev) => [
-            ...prev,
-            {
-              id: `${commandId}-complete`,
-              type: "message",
-              role: "system",
-              content: [
-                {
-                  type: "input_text",
-                  text: summaryLines.join("\n"),
-                },
-              ],
-            },
-          ]);
-
-          if (parsed.mode === "full" && result.reportContent) {
-            setItems((prev) => [
-              ...prev,
-              {
-                id: `${commandId}-report`,
-                type: "message",
-                role: "assistant",
-                content: [
-                  {
-                    type: "output_text",
-                    text: `# AppSec Security Review Report\n\n${result.reportContent}`,
-                  },
-                ],
-              },
-            ]);
-          }
-
-          if (result.bugsContent) {
-            const heading =
-              parsed.mode === "full"
-                ? "## Bugs Summary"
-                : "# AppSec Bugs Summary";
-            setItems((prev) => [
-              ...prev,
-              {
-                id: `${commandId}-bugs`,
-                type: "message",
-                role: "assistant",
-                content: [
-                  {
-                    type: "output_text",
-                    text: `${heading}\n\n${result.bugsContent}`,
-                  },
-                ],
-              },
-            ]);
-          } else {
-            setItems((prev) => [
-              ...prev,
-              {
-                id: `${commandId}-no-bugs`,
-                type: "message",
-                role: "system",
-                content: [
-                  {
-                    type: "input_text",
-                    text:
-                      "No bug summary produced. Check the output directory for details.",
-                  },
-                ],
-              },
-            ]);
-          }
-
-          if (parsed.mode === "bugs" && result.reportContent) {
-            setItems((prev) => [
-              ...prev,
-              {
-                id: `${commandId}-report-location`,
-                type: "message",
-                role: "system",
-                content: [
-                  {
-                    type: "input_text",
-                    text: `Full report available at ${result.reportPath}`,
-                  },
-                ],
-              },
-            ]);
-          }
-          if (result.stdout.trim()) {
-            setItems((prev) => [
-              ...prev,
-              {
-                id: `${commandId}-logs`,
-                type: "message",
-                role: "system",
-                content: [
-                  {
-                    type: "input_text",
-                    text: `Logs:\n${trimLogOutput(result.stdout)}`,
-                  },
-                ],
-              },
-            ]);
-          }
-        } catch (error) {
-          const message =
-            error instanceof Error ? error.message : String(error);
-          const stderr =
-            error instanceof SecurityReviewError && error.stderr
-              ? `\n\nstderr last lines:\n${trimLogOutput(error.stderr)}`
-              : "";
-          const stdout =
-            error instanceof SecurityReviewError && error.stdout
-              ? `\n\nstdout last lines:\n${trimLogOutput(error.stdout)}`
-              : "";
-          setItems((prev) => [
-            ...prev,
-            {
-              id: `${commandId}-error`,
-              type: "message",
-              role: "system",
-              content: [
-                {
-                  type: "input_text",
-                  text: `❌ AppSec review failed: ${message}${stderr}${stdout}`,
-                },
-              ],
-            },
-          ]);
-        }
-
        return;
      } else if (["exit", "q", ":q"].includes(inputValue)) {
        setInput("");
@@ -1063,13 +707,13 @@ export default function TerminalChatInput({
      submitInput([inputItem]);

      // Get config for history persistence.
-      const historyConfig = loadConfig();
+      const config = loadConfig();

      // Add to history and update state.
      const updatedHistory = await addToHistory(value, history, {
-        maxSize: historyConfig.history?.maxSize ?? 1000,
-        saveHistory: historyConfig.history?.saveHistory ?? true,
-        sensitivePatterns: historyConfig.history?.sensitivePatterns ?? [],
+        maxSize: config.history?.maxSize ?? 1000,
+        saveHistory: config.history?.saveHistory ?? true,
+        sensitivePatterns: config.history?.sensitivePatterns ?? [],
      });

      setHistory(updatedHistory);
@@ -1098,7 +742,6 @@ export default function TerminalChatInput({
      onCompact,
      skipNextSubmit,
      items,
-      config,
    ],
  );

--- a/codex-cli/src/components/chat/terminal-chat.tsx
+++ b/codex-cli/src/components/chat/terminal-chat.tsx
@@ -580,7 +580,6 @@ export default function TerminalChat({
            }}
            items={items}
            thinkingSeconds={thinkingSeconds}
-            config={config}
          />
        )}
        {overlayMode === "history" && (
--- a/codex-cli/src/components/help-overlay.tsx
+++ b/codex-cli/src/components/help-overlay.tsx
@@ -56,10 +56,6 @@ export default function HelpOverlay({
          <Text color="cyan">/bug</Text> – generate a prefilled GitHub issue URL
          with session log
        </Text>
-        <Text>
-          <Text color="cyan">/secreview</Text> – run AppSec security review and
-          show the results
-        </Text>
        <Text>
          <Text color="cyan">/diff</Text> – view working tree git diff
        </Text>
--- a/codex-cli/src/utils/agent/sandbox/macos-seatbelt.ts
+++ b/codex-cli/src/utils/agent/sandbox/macos-seatbelt.ts
@@ -147,4 +147,8 @@ const READ_ONLY_SEATBELT_POLICY = `
  (sysctl-name "kern.version")
  (sysctl-name "sysctl.proc_cputype")
  (sysctl-name-prefix "hw.perflevel")
-)`.trim();
+)
+
+; Added on top of Chrome profile
+; Needed for python multiprocessing on MacOS for the SemLock
+(allow ipc-posix-sem)`.trim();
--- a/codex-cli/src/utils/slash-commands.ts
+++ b/codex-cli/src/utils/slash-commands.ts
@@ -24,10 +24,6 @@ export const SLASH_COMMANDS: Array<SlashCommand> = [
  { command: "/help", description: "Show list of commands" },
  { command: "/model", description: "Open model selection panel" },
  { command: "/approval", description: "Open approval mode selection panel" },
-  {
-    command: "/secreview",
-    description: "Run AppSec security review and display the generated reports",
-  },
  {
    command: "/bug",
    description: "Generate a prefilled GitHub issue URL with session log",
--- a/codex-cli/tests/approvals.test.ts
+++ b/codex-cli/tests/approvals.test.ts
@@ -44,6 +44,14 @@ describe("canAutoApprove()", () => {
      group: "Navigating",
      runInSandbox: false,
    });
+
+    // Ripgrep safe invocation.
+    expect(check(["rg", "TODO"])).toEqual({
+      type: "auto-approve",
+      reason: "Ripgrep search",
+      group: "Searching",
+      runInSandbox: false,
+    });
  });

  test("simple safe commands within a `bash -lc` call", () => {
@@ -67,6 +75,24 @@ describe("canAutoApprove()", () => {
    });
  });

+  test("ripgrep unsafe flags", () => {
+    // Flags that do not take arguments
+    expect(check(["rg", "--search-zip", "TODO"])).toEqual({ type: "ask-user" });
+    expect(check(["rg", "-z", "TODO"])).toEqual({ type: "ask-user" });
+
+    // Flags that take arguments (provided separately)
+    expect(check(["rg", "--pre", "cat", "TODO"])).toEqual({ type: "ask-user" });
+    expect(check(["rg", "--hostname-bin", "hostname", "TODO"])).toEqual({
+      type: "ask-user",
+    });
+
+    // Flags that take arguments in = form
+    expect(check(["rg", "--pre=cat", "TODO"])).toEqual({ type: "ask-user" });
+    expect(check(["rg", "--hostname-bin=hostname", "TODO"])).toEqual({
+      type: "ask-user",
+    });
+  });
+
  test("bash -lc commands with unsafe redirects", () => {
    expect(check(["bash", "-lc", "echo hello > file.txt"])).toEqual({
      type: "ask-user",
--- a/codex-cli/tests/clear-command.test.tsx
+++ b/codex-cli/tests/clear-command.test.tsx
@@ -61,11 +61,6 @@ describe("/clear command", () => {
      active: true,
      thinkingSeconds: 0,
      items: existingItems,
-      config: {
-        model: "codex-mini-latest",
-        instructions: "",
-        provider: "openai",
-      },
    };

    const { stdin, flush, cleanup } = renderTui(
--- a/codex-cli/tests/multiline-history-behavior.test.tsx
+++ b/codex-cli/tests/multiline-history-behavior.test.tsx
@@ -66,27 +66,17 @@ function stubProps(): any {
    loading: false,
    submitInput: vi.fn(),
    confirmationPrompt: null,
-    explanation: undefined,
    submitConfirmation: vi.fn(),
    setLastResponseId: vi.fn(),
-    setItems: vi.fn(),
+    // Cast to any to satisfy the generic React.Dispatch signature without
+    // pulling the ResponseItem type into the test bundle.
+    setItems: (() => {}) as any,
    contextLeftPercent: 100,
    openOverlay: vi.fn(),
    openModelOverlay: vi.fn(),
    openHelpOverlay: vi.fn(),
-    openApprovalOverlay: vi.fn(),
-    openSessionsOverlay: vi.fn(),
-    openDiffOverlay: vi.fn(),
-    onCompact: vi.fn(),
    interruptAgent: vi.fn(),
    active: true,
-    thinkingSeconds: 0,
-    items: [],
-    config: {
-      model: "codex-mini-latest",
-      instructions: "",
-      provider: "openai",
-    },
  };
 }

--- a/codex-cli/tests/slash-commands.test.ts
+++ b/codex-cli/tests/slash-commands.test.ts
@@ -10,7 +10,6 @@ test("SLASH_COMMANDS includes expected commands", () => {
  expect(commands).toContain("/help");
  expect(commands).toContain("/model");
  expect(commands).toContain("/approval");
-  expect(commands).toContain("/secreview");
  expect(commands).toContain("/clearhistory");
  expect(commands).toContain("/diff");
 });
--- a/codex-cli/tests/terminal-chat-input-compact.test.tsx
+++ b/codex-cli/tests/terminal-chat-input-compact.test.tsx
@@ -26,11 +26,6 @@ describe("TerminalChatInput compact command", () => {
      interruptAgent: () => {},
      active: true,
      thinkingSeconds: 0,
-      config: {
-        model: "codex-mini-latest",
-        instructions: "",
-        provider: "openai",
-      },
    };
    const { lastFrameStripped } = renderTui(<TerminalChatInput {...props} />);
    const frame = lastFrameStripped();
--- a/codex-cli/tests/terminal-chat-input-file-tag-suggestions.test.tsx
+++ b/codex-cli/tests/terminal-chat-input-file-tag-suggestions.test.tsx
@@ -81,11 +81,6 @@ describe("TerminalChatInput file tag suggestions", () => {
    interruptAgent: vi.fn(),
    active: true,
    thinkingSeconds: 0,
-    config: {
-      model: "codex-mini-latest",
-      instructions: "",
-      provider: "openai",
-    },
  };

  beforeEach(() => {
--- a/codex-cli/tests/terminal-chat-input-multiline.test.tsx
+++ b/codex-cli/tests/terminal-chat-input-multiline.test.tsx
@@ -47,11 +47,6 @@ describe("TerminalChatInput multiline functionality", () => {
      interruptAgent: () => {},
      active: true,
      thinkingSeconds: 0,
-      config: {
-        model: "codex-mini-latest",
-        instructions: "",
-        provider: "openai",
-      },
    };

    const { stdin, lastFrameStripped, flush, cleanup } = renderTui(
@@ -104,11 +99,6 @@ describe("TerminalChatInput multiline functionality", () => {
      interruptAgent: () => {},
      active: true,
      thinkingSeconds: 0,
-      config: {
-        model: "codex-mini-latest",
-        instructions: "",
-        provider: "openai",
-      },
    };

    const { stdin, lastFrameStripped, flush, cleanup } = renderTui(
--- a/codex-rs/Cargo.lock
+++ b/codex-rs/Cargo.lock
@@ -399,6 +399,15 @@ version = "2.6.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6099cdc01846bc367c4e7dd630dc5966dccf36b652fae7a74e17b640411a91b2"

+[[package]]
+name = "block-buffer"
+version = "0.10.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71"
+dependencies = [
+ "generic-array",
+]
+
 [[package]]
 name = "bstr"
 version = "1.12.0"
@@ -454,18 +463,18 @@ checksum = "df8670b8c7b9dae1793364eafadf7239c40d669904660c5960d74cfd80b46a53"

 [[package]]
 name = "castaway"
-version = "0.2.3"
+version = "0.2.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0abae9be0aaf9ea96a3b1b8b1b55c602ca751eba1b1500220cea4ecbafe7c0d5"
+checksum = "dec551ab6e7578819132c713a93c022a05d60159dc86e7a7050223577484c55a"
 dependencies = [
 "rustversion",
 ]

 [[package]]
 name = "cc"
-version = "1.2.29"
+version = "1.2.30"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5c1599538de2394445747c8cf7935946e3cc27e9625f889d979bfb2aaf569362"
+checksum = "deec109607ca693028562ed836a5f1c4b8bd77755c4e132fc5ce11b0b6211ae7"
 dependencies = [
 "jobserver",
 "libc",
@@ -561,9 +570,9 @@ checksum = "b94f61472cee1439c0b966b47e3aca9ae07e45d070759512cd390ea2bebc6675"

 [[package]]
 name = "clipboard-win"
-version = "5.4.0"
+version = "5.4.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "15efe7a882b08f34e38556b14f2fb3daa98769d06c7f0c1b076dfd0d983bc892"
+checksum = "bde03770d3df201d4fb868f2c9c59e66a3e4e2bd06692a0fe701e7103c7e84d4"
 dependencies = [
 "error-code",
 ]
@@ -596,6 +605,18 @@ dependencies = [
 "tree-sitter-bash",
 ]

+[[package]]
+name = "codex-arg0"
+version = "0.0.0"
+dependencies = [
+ "anyhow",
+ "codex-apply-patch",
+ "codex-core",
+ "codex-linux-sandbox",
+ "dotenvy",
+ "tokio",
+]
+
 [[package]]
 name = "codex-chatgpt"
 version = "0.0.0"
@@ -619,11 +640,11 @@ dependencies = [
 "anyhow",
 "clap",
 "clap_complete",
+ "codex-arg0",
 "codex-chatgpt",
 "codex-common",
 "codex-core",
 "codex-exec",
- "codex-linux-sandbox",
 "codex-login",
 "codex-mcp-server",
 "codex-tui",
@@ -640,7 +661,7 @@ dependencies = [
 "clap",
 "codex-core",
 "serde",
- "toml 0.9.1",
+ "toml 0.9.2",
 ]

 [[package]]
@@ -652,37 +673,46 @@ dependencies = [
 "async-channel",
 "base64 0.22.1",
 "bytes",
+ "chrono",
 "codex-apply-patch",
+ "codex-login",
 "codex-mcp-client",
+ "core_test_support",
 "dirs",
 "env-flags",
 "eventsource-stream",
 "fs2",
 "futures",
 "landlock",
+ "libc",
 "maplit",
 "mcp-types",
 "mime_guess",
 "openssl-sys",
 "predicates",
 "pretty_assertions",
- "rand 0.9.1",
+ "rand 0.9.2",
 "reqwest",
 "seccompiler",
 "serde",
+ "serde_bytes",
 "serde_json",
- "strum_macros 0.27.1",
+ "sha1",
+ "shlex",
+ "strum_macros 0.27.2",
 "tempfile",
 "thiserror 2.0.12",
 "time",
 "tokio",
 "tokio-test",
 "tokio-util",
- "toml 0.9.1",
+ "toml 0.9.2",
 "tracing",
 "tree-sitter",
 "tree-sitter-bash",
 "uuid",
+ "walkdir",
+ "whoami",
 "wildmatch",
 "wiremock",
 ]
@@ -692,14 +722,17 @@ name = "codex-exec"
 version = "0.0.0"
 dependencies = [
 "anyhow",
+ "assert_cmd",
 "chrono",
 "clap",
+ "codex-arg0",
 "codex-common",
 "codex-core",
- "codex-linux-sandbox",
 "owo-colors",
+ "predicates",
 "serde_json",
 "shlex",
+ "tempfile",
 "tokio",
 "tracing",
 "tracing-subscriber",
@@ -744,6 +777,7 @@ version = "0.0.0"
 dependencies = [
 "anyhow",
 "clap",
+ "codex-common",
 "codex-core",
 "landlock",
 "libc",
@@ -760,6 +794,7 @@ dependencies = [
 "reqwest",
 "serde",
 "serde_json",
+ "tempfile",
 "tokio",
 ]

@@ -781,17 +816,25 @@ name = "codex-mcp-server"
 version = "0.0.0"
 dependencies = [
 "anyhow",
+ "assert_cmd",
+ "codex-arg0",
 "codex-core",
- "codex-linux-sandbox",
 "mcp-types",
+ "mcp_test_support",
 "pretty_assertions",
 "schemars 0.8.22",
 "serde",
 "serde_json",
+ "shlex",
+ "strum_macros 0.27.2",
+ "tempfile",
 "tokio",
- "toml 0.9.1",
+ "tokio-test",
+ "toml 0.9.2",
 "tracing",
 "tracing-subscriber",
+ "uuid",
+ "wiremock",
 ]

 [[package]]
@@ -800,12 +843,13 @@ version = "0.0.0"
 dependencies = [
 "anyhow",
 "base64 0.22.1",
+ "chrono",
 "clap",
 "codex-ansi-escape",
+ "codex-arg0",
 "codex-common",
 "codex-core",
 "codex-file-search",
- "codex-linux-sandbox",
 "codex-login",
 "color-eyre",
 "crossterm",
@@ -815,23 +859,26 @@ dependencies = [
 "mcp-types",
 "path-clean",
 "pretty_assertions",
+ "rand 0.8.5",
 "ratatui",
 "ratatui-image",
 "regex-lite",
 "reqwest",
+ "serde",
 "serde_json",
 "shlex",
- "strum 0.27.1",
- "strum_macros 0.27.1",
- "time",
+ "strum 0.27.2",
+ "strum_macros 0.27.2",
+ "supports-color",
+ "textwrap 0.16.2",
 "tokio",
 "tracing",
 "tracing-appender",
 "tracing-subscriber",
 "tui-input",
 "tui-markdown",
- "tui-textarea",
 "unicode-segmentation",
+ "unicode-width 0.1.14",
 "uuid",
 ]

@@ -935,10 +982,29 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b"

 [[package]]
-name = "crc32fast"
-version = "1.4.2"
+name = "core_test_support"
+version = "0.0.0"
+dependencies = [
+ "codex-core",
+ "serde_json",
+ "tempfile",
+ "tokio",
+]
+
+[[package]]
+name = "cpufeatures"
+version = "0.2.17"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3"
+checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "crc32fast"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511"
 dependencies = [
 "cfg-if",
 ]
@@ -1008,6 +1074,16 @@ version = "0.2.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5"

+[[package]]
+name = "crypto-common"
+version = "0.1.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3"
+dependencies = [
+ "generic-array",
+ "typenum",
+]
+
 [[package]]
 name = "ctor"
 version = "0.1.26"
@@ -1158,6 +1234,16 @@ version = "0.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6184e33543162437515c2e2b48714794e37845ec9851711914eec9d308f6ebe8"

+[[package]]
+name = "digest"
+version = "0.10.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292"
+dependencies = [
+ "block-buffer",
+ "crypto-common",
+]
+
 [[package]]
 name = "dirs"
 version = "6.0.0"
@@ -1227,6 +1313,12 @@ version = "0.3.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10"

+[[package]]
+name = "dotenvy"
+version = "0.15.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1aaf95b3e5c8f23aa320147307562d361db0ae0d51242340f558153b4eb2439b"
+
 [[package]]
 name = "dupe"
 version = "0.9.1"
@@ -1459,7 +1551,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0ce92ff622d6dadf7349484f42c93271a0d49b7cc4d466a936405bacbe10aa78"
 dependencies = [
 "cfg-if",
- "rustix 1.0.7",
+ "rustix 1.0.8",
 "windows-sys 0.59.0",
 ]

@@ -1647,6 +1739,16 @@ dependencies = [
 "byteorder",
 ]

+[[package]]
+name = "generic-array"
+version = "0.14.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a"
+dependencies = [
+ "typenum",
+ "version_check",
+]
+
 [[package]]
 name = "getopts"
 version = "0.2.23"
@@ -1898,9 +2000,9 @@ dependencies = [

 [[package]]
 name = "hyper-util"
-version = "0.1.15"
+version = "0.1.16"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7f66d5bd4c6f02bf0542fad85d626775bab9258cf795a4256dcaf3161114d1df"
+checksum = "8d9b05277c7e8da2c93a568989bb6207bef0112e8d17df7a6eda4a3cf143bc5e"
 dependencies = [
 "base64 0.22.1",
 "bytes",
@@ -1914,7 +2016,7 @@ dependencies = [
 "libc",
 "percent-encoding",
 "pin-project-lite",
- "socket2",
+ "socket2 0.6.0",
 "system-configuration",
 "tokio",
 "tower-service",
@@ -2167,9 +2269,9 @@ dependencies = [

 [[package]]
 name = "instability"
-version = "0.3.7"
+version = "0.3.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0bf9fed6d91cfb734e7476a06bde8300a1b94e217e1b523b6f0cd1a01998c71d"
+checksum = "435d80800b936787d62688c927b6490e887c7ef5ff9ce922c6c6050fca75eb9a"
 dependencies = [
 "darling",
 "indoc",
@@ -2200,9 +2302,9 @@ dependencies = [

 [[package]]
 name = "io-uring"
-version = "0.7.8"
+version = "0.7.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b86e202f00093dcba4275d4636b93ef9dd75d025ae560d2521b45ea28ab49013"
+checksum = "d93587f37623a1a17d94ef2bc9ada592f5465fe7732084ab7beefabe5c77c0c4"
 dependencies = [
 "bitflags 2.9.1",
 "cfg-if",
@@ -2236,6 +2338,12 @@ dependencies = [
 "windows-sys 0.59.0",
 ]

+[[package]]
+name = "is_ci"
+version = "1.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7655c9839580ee829dfacba1d1278c2b7883e50a277ff7541299489d6bdfdc45"
+
 [[package]]
 name = "is_terminal_polyfill"
 version = "1.70.1"
@@ -2406,9 +2514,9 @@ dependencies = [

 [[package]]
 name = "libredox"
-version = "0.1.4"
+version = "0.1.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1580801010e535496706ba011c15f8532df6b42297d2e471fec38ceadd8c0638"
+checksum = "4488594b9328dee448adb906d8b126d9b7deb7cf5c22161ee591610bb1be83c0"
 dependencies = [
 "bitflags 2.9.1",
 "libc",
@@ -2541,6 +2649,24 @@ dependencies = [
 "serde_json",
 ]

+[[package]]
+name = "mcp_test_support"
+version = "0.0.0"
+dependencies = [
+ "anyhow",
+ "assert_cmd",
+ "codex-core",
+ "codex-mcp-server",
+ "mcp-types",
+ "pretty_assertions",
+ "serde_json",
+ "shlex",
+ "tempfile",
+ "tokio",
+ "uuid",
+ "wiremock",
+]
+
 [[package]]
 name = "memchr"
 version = "2.7.5"
@@ -3216,9 +3342,9 @@ dependencies = [

 [[package]]
 name = "rand"
-version = "0.9.1"
+version = "0.9.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9fbfd9d094a40bf3ae768db9361049ace4c0e04a4fd6b359518bd7b73a73dd97"
+checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1"
 dependencies = [
 "rand_chacha 0.9.0",
 "rand_core 0.9.3",
@@ -3265,8 +3391,7 @@ dependencies = [
 [[package]]
 name = "ratatui"
 version = "0.29.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "eabd94c2f37801c20583fc49dd5cd6b0ba68c716787c2dd6ed18571e1e63117b"
+source = "git+https://github.com/nornagon/ratatui?branch=nornagon-v0.29.0-patch#bca287ddc5d38fe088c79e2eda22422b96226f2e"
 dependencies = [
 "bitflags 2.9.1",
 "cassowary",
@@ -3371,9 +3496,9 @@ dependencies = [

 [[package]]
 name = "redox_syscall"
-version = "0.5.13"
+version = "0.5.15"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0d04b7d0ee6b4a0207a0a7adb104d23ecb0b47d6beae7152d0fa34b692b29fd6"
+checksum = "7e8af0dde094006011e6a740d4879319439489813bd0bcdc7d821beaeeff48ec"
 dependencies = [
 "bitflags 2.9.1",
 ]
@@ -3521,9 +3646,9 @@ dependencies = [

 [[package]]
 name = "rgb"
-version = "0.8.51"
+version = "0.8.52"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a457e416a0f90d246a4c3288bd7a25b2304ca727f253f95be383dd17af56be8f"
+checksum = "0c6a884d2998352bb4daf0183589aec883f16a6da1f4dde84d8e2e9a5409a1ce"

 [[package]]
 name = "ring"
@@ -3599,22 +3724,22 @@ dependencies = [

 [[package]]
 name = "rustix"
-version = "1.0.7"
+version = "1.0.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c71e83d6afe7ff64890ec6b71d6a69bb8a610ab78ce364b3352876bb4c801266"
+checksum = "11181fbabf243db407ef8df94a6ce0b2f9a733bd8be4ad02b4eda9602296cac8"
 dependencies = [
 "bitflags 2.9.1",
 "errno",
 "libc",
 "linux-raw-sys 0.9.4",
- "windows-sys 0.59.0",
+ "windows-sys 0.60.2",
 ]

 [[package]]
 name = "rustls"
-version = "0.23.28"
+version = "0.23.29"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7160e3e10bf4535308537f3c4e1641468cd0e485175d6163087c0393c7d46643"
+checksum = "2491382039b29b9b11ff08b76ff6c97cf287671dbb74f0be44bda389fffe9bd1"
 dependencies = [
 "once_cell",
 "rustls-pki-types",
@@ -3634,9 +3759,9 @@ dependencies = [

 [[package]]
 name = "rustls-webpki"
-version = "0.103.3"
+version = "0.103.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e4a72fe2bcf7a6ac6fd7d0b9e5cb68aeb7d4c0a0271730218b3e92d43b4eb435"
+checksum = "0a17884ae0c1b773f1ccd2bd4a8c72f16da897310a98b0e84bf349ad5ead92fc"
 dependencies = [
 "ring",
 "rustls-pki-types",
@@ -3838,6 +3963,15 @@ dependencies = [
 "serde_derive",
 ]

+[[package]]
+name = "serde_bytes"
+version = "0.11.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8437fd221bde2d4ca316d61b90e337e9e702b3820b87d63caa9ba6c02bd06d96"
+dependencies = [
+ "serde",
+]
+
 [[package]]
 name = "serde_derive"
 version = "1.0.219"
@@ -3862,9 +3996,9 @@ dependencies = [

 [[package]]
 name = "serde_json"
-version = "1.0.140"
+version = "1.0.141"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "20068b6e96dc6c9bd23e01df8827e6c7e1f2fddd43c21810382803c136b99373"
+checksum = "30b9eff21ebe718216c6ec64e1d9ac57087aad11efc64e32002bce4a0d4c03d3"
 dependencies = [
 "indexmap 2.10.0",
 "itoa",
@@ -3946,6 +4080,17 @@ dependencies = [
 "syn 2.0.104",
 ]

+[[package]]
+name = "sha1"
+version = "0.10.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba"
+dependencies = [
+ "cfg-if",
+ "cpufeatures",
+ "digest",
+]
+
 [[package]]
 name = "sharded-slab"
 version = "0.1.7"
@@ -4036,6 +4181,12 @@ version = "1.15.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03"

+[[package]]
+name = "smawk"
+version = "0.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b7c388c1b5e93756d0c740965c41e8822f866621d41acbdf6336a6a168f8840c"
+
 [[package]]
 name = "socket2"
 version = "0.5.10"
@@ -4046,6 +4197,16 @@ dependencies = [
 "windows-sys 0.52.0",
 ]

+[[package]]
+name = "socket2"
+version = "0.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "233504af464074f9d066d7b5416c5f9b894a5862a6506e306f7b816cdd6f1807"
+dependencies = [
+ "libc",
+ "windows-sys 0.59.0",
+]
+
 [[package]]
 name = "stable_deref_trait"
 version = "1.2.0"
@@ -4088,7 +4249,7 @@ dependencies = [
 "starlark_syntax",
 "static_assertions",
 "strsim 0.10.0",
- "textwrap",
+ "textwrap 0.11.0",
 "thiserror 1.0.69",
 ]

@@ -4189,9 +4350,9 @@ dependencies = [

 [[package]]
 name = "strum"
-version = "0.27.1"
+version = "0.27.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f64def088c51c9510a8579e3c5d67c65349dcf755e5479ad3d010aa6454e2c32"
+checksum = "af23d6f6c1a224baef9d3f61e287d2761385a5b88fdab4eb4c6f11aeb54c4bcf"

 [[package]]
 name = "strum_macros"
@@ -4208,14 +4369,13 @@ dependencies = [

 [[package]]
 name = "strum_macros"
-version = "0.27.1"
+version = "0.27.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c77a8c5abcaf0f9ce05d62342b7d298c346515365c36b673df4ebe3ced01fde8"
+checksum = "7695ce3845ea4b33927c055a39dc438a45b059f7c1b3d91d38d10355fb8cbca7"
 dependencies = [
 "heck",
 "proc-macro2",
 "quote",
- "rustversion",
 "syn 2.0.104",
 ]

@@ -4225,6 +4385,15 @@ version = "2.6.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292"

+[[package]]
+name = "supports-color"
+version = "3.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c64fc7232dd8d2e4ac5ce4ef302b1d81e0b80d055b9d77c7c4f51f6aa4c867d6"
+dependencies = [
+ "is_ci",
+]
+
 [[package]]
 name = "syn"
 version = "1.0.109"
@@ -4338,7 +4507,7 @@ dependencies = [
 "fastrand",
 "getrandom 0.3.3",
 "once_cell",
- "rustix 1.0.7",
+ "rustix 1.0.8",
 "windows-sys 0.59.0",
 ]

@@ -4359,7 +4528,7 @@ version = "0.4.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "45c6481c4829e4cc63825e62c49186a34538b7b2750b73b266581ffb612fb5ed"
 dependencies = [
- "rustix 1.0.7",
+ "rustix 1.0.8",
 "windows-sys 0.59.0",
 ]

@@ -4378,6 +4547,17 @@ dependencies = [
 "unicode-width 0.1.14",
 ]

+[[package]]
+name = "textwrap"
+version = "0.16.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c13547615a44dc9c452a8a534638acdf07120d4b6847c8178705da06306a3057"
+dependencies = [
+ "smawk",
+ "unicode-linebreak",
+ "unicode-width 0.2.0",
+]
+
 [[package]]
 name = "thiserror"
 version = "1.0.69"
@@ -4505,7 +4685,7 @@ dependencies = [
 "pin-project-lite",
 "signal-hook-registry",
 "slab",
- "socket2",
+ "socket2 0.5.10",
 "tokio-macros",
 "windows-sys 0.52.0",
 ]
@@ -4592,9 +4772,9 @@ dependencies = [

 [[package]]
 name = "toml"
-version = "0.9.1"
+version = "0.9.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0207d6ed1852c2a124c1fbec61621acb8330d2bf969a5d0643131e9affd985a5"
+checksum = "ed0aee96c12fa71097902e0bb061a5e1ebd766a6636bb605ba401c45c1650eac"
 dependencies = [
 "indexmap 2.10.0",
 "serde",
@@ -4638,18 +4818,18 @@ dependencies = [

 [[package]]
 name = "toml_parser"
-version = "1.0.0"
+version = "1.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b5c1c469eda89749d2230d8156a5969a69ffe0d6d01200581cdc6110674d293e"
+checksum = "97200572db069e74c512a14117b296ba0a80a30123fbbb5aa1f4a348f639ca30"
 dependencies = [
 "winnow",
 ]

 [[package]]
 name = "toml_writer"
-version = "1.0.0"
+version = "1.0.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b679217f2848de74cabd3e8fc5e6d66f40b7da40f8e1954d92054d9010690fd5"
+checksum = "fcc842091f2def52017664b53082ecbbeb5c7731092bad69d2c63050401dfd64"

 [[package]]
 name = "tower"
@@ -4782,9 +4962,9 @@ dependencies = [

 [[package]]
 name = "tree-sitter"
-version = "0.25.6"
+version = "0.25.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a7cf18d43cbf0bfca51f657132cc616a5097edc4424d538bae6fa60142eaf9f0"
+checksum = "6d7b8994f367f16e6fa14b5aebbcb350de5d7cbea82dc5b00ae997dd71680dd2"
 dependencies = [
 "cc",
 "regex",
@@ -4843,15 +5023,10 @@ dependencies = [
 ]

 [[package]]
-name = "tui-textarea"
-version = "0.7.0"
+name = "typenum"
+version = "1.18.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0a5318dd619ed73c52a9417ad19046724effc1287fb75cdcc4eca1d6ac1acbae"
-dependencies = [
- "crossterm",
- "ratatui",
- "unicode-width 0.2.0",
-]
+checksum = "1dccffe3ce07af9386bfd29e80c0ab1a8205a2fc34e4bcd40364df902cfa8f3f"

 [[package]]
 name = "unicase"
@@ -4865,6 +5040,12 @@ version = "1.0.18"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512"

+[[package]]
+name = "unicode-linebreak"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3b09c83c3c29d37506a3e260c08c03743a6bb66a9cd432c6934ab501a190571f"
+
 [[package]]
 name = "unicode-segmentation"
 version = "1.12.0"
@@ -5020,6 +5201,12 @@ dependencies = [
 "wit-bindgen-rt",
 ]

+[[package]]
+name = "wasite"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b8dad83b4f25e74f184f64c43b150b91efe7647395b42289f38e50566d82855b"
+
 [[package]]
 name = "wasm-bindgen"
 version = "0.2.100"
@@ -5120,6 +5307,17 @@ version = "0.1.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "a751b3277700db47d3e574514de2eced5e54dc8a5436a3bf7a0b248b2cee16f3"

+[[package]]
+name = "whoami"
+version = "1.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6994d13118ab492c3c80c1f81928718159254c53c472bf9ce36f8dae4add02a7"
+dependencies = [
+ "redox_syscall",
+ "wasite",
+ "web-sys",
+]
+
 [[package]]
 name = "wildmatch"
 version = "2.4.0"
@@ -5448,9 +5646,9 @@ checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486"

 [[package]]
 name = "winnow"
-version = "0.7.11"
+version = "0.7.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "74c7b26e3480b707944fc872477815d29a8e429d2f93a1ce000f5fa84a15cbcd"
+checksum = "f3edebf492c8125044983378ecb5766203ad3b4c2f7a922bd7dd207f6d443e95"
 dependencies = [
 "memchr",
 ]
--- a/codex-rs/Cargo.toml
+++ b/codex-rs/Cargo.toml
@@ -1,8 +1,8 @@
 [workspace]
-resolver = "2"
 members = [
    "ansi-escape",
    "apply-patch",
+    "arg0",
    "cli",
    "common",
    "core",
@@ -16,6 +16,7 @@ members = [
    "mcp-types",
    "tui",
 ]
+resolver = "2"

 [workspace.package]
 version = "0.0.0"
@@ -40,3 +41,7 @@ strip = "symbols"

 # See https://github.com/openai/codex/issues/1411 for details.
 codegen-units = 1
+
+[patch.crates-io]
+# ratatui = { path = "../../ratatui" }
+ratatui = { git = "https://github.com/nornagon/ratatui", branch = "nornagon-v0.29.0-patch" }
--- a/codex-rs/ansi-escape/Cargo.toml
+++ b/codex-rs/ansi-escape/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
+edition = "2024"
 name = "codex-ansi-escape"
 version = { workspace = true }
-edition = "2024"

 [lib]
 name = "codex_ansi_escape"
@@ -10,7 +10,7 @@ path = "src/lib.rs"
 [dependencies]
 ansi-to-tui = "7.0.0"
 ratatui = { version = "0.29.0", features = [
-    "unstable-widget-ref",
    "unstable-rendered-line-info",
+    "unstable-widget-ref",
 ] }
 tracing = { version = "0.1.41", features = ["log"] }
--- a/codex-rs/apply-patch/Cargo.toml
+++ b/codex-rs/apply-patch/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
+edition = "2024"
 name = "codex-apply-patch"
 version = { workspace = true }
-edition = "2024"

 [lib]
 name = "codex_apply_patch"
@@ -14,7 +14,7 @@ workspace = true
 anyhow = "1"
 similar = "2.7.0"
 thiserror = "2.0.12"
-tree-sitter = "0.25.3"
+tree-sitter = "0.25.8"
 tree-sitter-bash = "0.25.0"

 [dev-dependencies]
--- a/codex-rs/apply-patch/src/lib.rs
+++ b/codex-rs/apply-patch/src/lib.rs
@@ -58,16 +58,24 @@ impl PartialEq for IoError {

 #[derive(Debug, PartialEq)]
 pub enum MaybeApplyPatch {
-    Body(Vec<Hunk>),
+    Body(ApplyPatchArgs),
    ShellParseError(ExtractHeredocError),
    PatchParseError(ParseError),
    NotApplyPatch,
 }

+/// Both the raw PATCH argument to `apply_patch` as well as the PATCH argument
+/// parsed into hunks.
+#[derive(Debug, PartialEq)]
+pub struct ApplyPatchArgs {
+    pub patch: String,
+    pub hunks: Vec<Hunk>,
+}
+
 pub fn maybe_parse_apply_patch(argv: &[String]) -> MaybeApplyPatch {
    match argv {
        [cmd, body] if cmd == "apply_patch" => match parse_patch(body) {
-            Ok(hunks) => MaybeApplyPatch::Body(hunks),
+            Ok(source) => MaybeApplyPatch::Body(source),
            Err(e) => MaybeApplyPatch::PatchParseError(e),
        },
        [bash, flag, script]
@@ -77,7 +85,7 @@ pub fn maybe_parse_apply_patch(argv: &[String]) -> MaybeApplyPatch {
        {
            match extract_heredoc_body_from_apply_patch_command(script) {
                Ok(body) => match parse_patch(&body) {
-                    Ok(hunks) => MaybeApplyPatch::Body(hunks),
+                    Ok(source) => MaybeApplyPatch::Body(source),
                    Err(e) => MaybeApplyPatch::PatchParseError(e),
                },
                Err(e) => MaybeApplyPatch::ShellParseError(e),
@@ -116,11 +124,19 @@ pub enum MaybeApplyPatchVerified {
    NotApplyPatch,
 }

-#[derive(Debug, PartialEq)]
 /// ApplyPatchAction is the result of parsing an `apply_patch` command. By
 /// construction, all paths should be absolute paths.
+#[derive(Debug, PartialEq)]
 pub struct ApplyPatchAction {
    changes: HashMap<PathBuf, ApplyPatchFileChange>,
+
+    /// The raw patch argument that can be used with `apply_patch` as an exec
+    /// call. i.e., if the original arg was parsed in "lenient" mode with a
+    /// heredoc, this should be the value without the heredoc wrapper.
+    pub patch: String,
+
+    /// The working directory that was used to resolve relative paths in the patch.
+    pub cwd: PathBuf,
 }

 impl ApplyPatchAction {
@@ -140,8 +156,28 @@ impl ApplyPatchAction {
            panic!("path must be absolute");
        }

+        #[allow(clippy::expect_used)]
+        let filename = path
+            .file_name()
+            .expect("path should not be empty")
+            .to_string_lossy();
+        let patch = format!(
+            r#"*** Begin Patch
+*** Update File: {filename}
+@@
+ {content}
+*** End Patch"#,
+        );
        let changes = HashMap::from([(path.to_path_buf(), ApplyPatchFileChange::Add { content })]);
-        Self { changes }
+        #[allow(clippy::expect_used)]
+        Self {
+            changes,
+            cwd: path
+                .parent()
+                .expect("path should have parent")
+                .to_path_buf(),
+            patch,
+        }
    }
 }

@@ -149,7 +185,7 @@ impl ApplyPatchAction {
 /// patch.
 pub fn maybe_parse_apply_patch_verified(argv: &[String], cwd: &Path) -> MaybeApplyPatchVerified {
    match maybe_parse_apply_patch(argv) {
-        MaybeApplyPatch::Body(hunks) => {
+        MaybeApplyPatch::Body(ApplyPatchArgs { patch, hunks }) => {
            let mut changes = HashMap::new();
            for hunk in hunks {
                let path = hunk.resolve_path(cwd);
@@ -183,7 +219,11 @@ pub fn maybe_parse_apply_patch_verified(argv: &[String], cwd: &Path) -> MaybeApp
                    }
                }
            }
-            MaybeApplyPatchVerified::Body(ApplyPatchAction { changes })
+            MaybeApplyPatchVerified::Body(ApplyPatchAction {
+                changes,
+                patch,
+                cwd: cwd.to_path_buf(),
+            })
        }
        MaybeApplyPatch::ShellParseError(e) => MaybeApplyPatchVerified::ShellParseError(e),
        MaybeApplyPatch::PatchParseError(e) => MaybeApplyPatchVerified::CorrectnessError(e.into()),
@@ -264,7 +304,7 @@ pub fn apply_patch(
    stderr: &mut impl std::io::Write,
 ) -> Result<(), ApplyPatchError> {
    let hunks = match parse_patch(patch) {
-        Ok(hunks) => hunks,
+        Ok(source) => source.hunks,
        Err(e) => {
            match &e {
                InvalidPatchError(message) => {
@@ -652,7 +692,7 @@ mod tests {
        ]);

        match maybe_parse_apply_patch(&args) {
-            MaybeApplyPatch::Body(hunks) => {
+            MaybeApplyPatch::Body(ApplyPatchArgs { hunks, patch: _ }) => {
                assert_eq!(
                    hunks,
                    vec![Hunk::AddFile {
@@ -679,7 +719,7 @@ PATCH"#,
        ]);

        match maybe_parse_apply_patch(&args) {
-            MaybeApplyPatch::Body(hunks) => {
+            MaybeApplyPatch::Body(ApplyPatchArgs { hunks, patch: _ }) => {
                assert_eq!(
                    hunks,
                    vec![Hunk::AddFile {
@@ -954,7 +994,7 @@ PATCH"#,
        ));
        let patch = parse_patch(&patch).unwrap();

-        let update_file_chunks = match patch.as_slice() {
+        let update_file_chunks = match patch.hunks.as_slice() {
            [Hunk::UpdateFile { chunks, .. }] => chunks,
            _ => panic!("Expected a single UpdateFile hunk"),
        };
@@ -992,7 +1032,7 @@ PATCH"#,
        ));

        let patch = parse_patch(&patch).unwrap();
-        let chunks = match patch.as_slice() {
+        let chunks = match patch.hunks.as_slice() {
            [Hunk::UpdateFile { chunks, .. }] => chunks,
            _ => panic!("Expected a single UpdateFile hunk"),
        };
@@ -1029,7 +1069,7 @@ PATCH"#,
        ));

        let patch = parse_patch(&patch).unwrap();
-        let chunks = match patch.as_slice() {
+        let chunks = match patch.hunks.as_slice() {
            [Hunk::UpdateFile { chunks, .. }] => chunks,
            _ => panic!("Expected a single UpdateFile hunk"),
        };
@@ -1064,7 +1104,7 @@ PATCH"#,
        ));

        let patch = parse_patch(&patch).unwrap();
-        let chunks = match patch.as_slice() {
+        let chunks = match patch.hunks.as_slice() {
            [Hunk::UpdateFile { chunks, .. }] => chunks,
            _ => panic!("Expected a single UpdateFile hunk"),
        };
@@ -1110,7 +1150,7 @@ PATCH"#,

        // Extract chunks then build the unified diff.
        let parsed = parse_patch(&patch).unwrap();
-        let chunks = match parsed.as_slice() {
+        let chunks = match parsed.hunks.as_slice() {
            [Hunk::UpdateFile { chunks, .. }] => chunks,
            _ => panic!("Expected a single UpdateFile hunk"),
        };
@@ -1193,6 +1233,8 @@ g
                        new_content: "updated session directory content\n".to_string(),
                    },
                )]),
+                patch: argv[1].clone(),
+                cwd: session_dir.path().to_path_buf(),
            })
        );
    }
--- a/codex-rs/apply-patch/src/parser.rs
+++ b/codex-rs/apply-patch/src/parser.rs
@@ -22,6 +22,7 @@
 //!
 //! The parser below is a little more lenient than the explicit spec and allows for
 //! leading/trailing whitespace around patch markers.
+use crate::ApplyPatchArgs;
 use std::path::Path;
 use std::path::PathBuf;

@@ -102,7 +103,7 @@ pub struct UpdateFileChunk {
    pub is_end_of_file: bool,
 }

-pub fn parse_patch(patch: &str) -> Result<Vec<Hunk>, ParseError> {
+pub fn parse_patch(patch: &str) -> Result<ApplyPatchArgs, ParseError> {
    let mode = if PARSE_IN_STRICT_MODE {
        ParseMode::Strict
    } else {
@@ -150,7 +151,7 @@ enum ParseMode {
    Lenient,
 }

-fn parse_patch_text(patch: &str, mode: ParseMode) -> Result<Vec<Hunk>, ParseError> {
+fn parse_patch_text(patch: &str, mode: ParseMode) -> Result<ApplyPatchArgs, ParseError> {
    let lines: Vec<&str> = patch.trim().lines().collect();
    let lines: &[&str] = match check_patch_boundaries_strict(&lines) {
        Ok(()) => &lines,
@@ -173,7 +174,8 @@ fn parse_patch_text(patch: &str, mode: ParseMode) -> Result<Vec<Hunk>, ParseErro
        line_number += hunk_lines;
        remaining_lines = &remaining_lines[hunk_lines..]
    }
-    Ok(hunks)
+    let patch = lines.join("\n");
+    Ok(ApplyPatchArgs { hunks, patch })
 }

 /// Checks the start and end lines of the patch text for `apply_patch`,
@@ -425,6 +427,7 @@ fn parse_update_file_chunk(
 }

 #[test]
+#[allow(clippy::unwrap_used)]
 fn test_parse_patch() {
    assert_eq!(
        parse_patch_text("bad", ParseMode::Strict),
@@ -455,8 +458,10 @@ fn test_parse_patch() {
            "*** Begin Patch\n\
             *** End Patch",
            ParseMode::Strict
-        ),
-        Ok(Vec::new())
+        )
+        .unwrap()
+        .hunks,
+        Vec::new()
    );
    assert_eq!(
        parse_patch_text(
@@ -472,8 +477,10 @@ fn test_parse_patch() {
             +    return 123\n\
             *** End Patch",
            ParseMode::Strict
-        ),
-        Ok(vec![
+        )
+        .unwrap()
+        .hunks,
+        vec![
            AddFile {
                path: PathBuf::from("path/add.py"),
                contents: "abc\ndef\n".to_string()
@@ -491,7 +498,7 @@ fn test_parse_patch() {
                    is_end_of_file: false
                }]
            }
-        ])
+        ]
    );
    // Update hunk followed by another hunk (Add File).
    assert_eq!(
@@ -504,8 +511,10 @@ fn test_parse_patch() {
             +content\n\
             *** End Patch",
            ParseMode::Strict
-        ),
-        Ok(vec![
+        )
+        .unwrap()
+        .hunks,
+        vec![
            UpdateFile {
                path: PathBuf::from("file.py"),
                move_path: None,
@@ -520,7 +529,7 @@ fn test_parse_patch() {
                path: PathBuf::from("other.py"),
                contents: "content\n".to_string()
            }
-        ])
+        ]
    );

    // Update hunk without an explicit @@ header for the first chunk should parse.
@@ -533,8 +542,10 @@ fn test_parse_patch() {
 +bar
 *** End Patch"#,
            ParseMode::Strict
-        ),
-        Ok(vec![UpdateFile {
+        )
+        .unwrap()
+        .hunks,
+        vec![UpdateFile {
            path: PathBuf::from("file2.py"),
            move_path: None,
            chunks: vec![UpdateFileChunk {
@@ -543,7 +554,7 @@ fn test_parse_patch() {
                new_lines: vec!["import foo".to_string(), "bar".to_string()],
                is_end_of_file: false,
            }],
-        }])
+        }]
    );
 }

@@ -574,7 +585,10 @@ fn test_parse_patch_lenient() {
    );
    assert_eq!(
        parse_patch_text(&patch_text_in_heredoc, ParseMode::Lenient),
-        Ok(expected_patch.clone())
+        Ok(ApplyPatchArgs {
+            hunks: expected_patch.clone(),
+            patch: patch_text.to_string()
+        })
    );

    let patch_text_in_single_quoted_heredoc = format!("<<'EOF'\n{patch_text}\nEOF\n");
@@ -584,7 +598,10 @@ fn test_parse_patch_lenient() {
    );
    assert_eq!(
        parse_patch_text(&patch_text_in_single_quoted_heredoc, ParseMode::Lenient),
-        Ok(expected_patch.clone())
+        Ok(ApplyPatchArgs {
+            hunks: expected_patch.clone(),
+            patch: patch_text.to_string()
+        })
    );

    let patch_text_in_double_quoted_heredoc = format!("<<\"EOF\"\n{patch_text}\nEOF\n");
@@ -594,7 +611,10 @@ fn test_parse_patch_lenient() {
    );
    assert_eq!(
        parse_patch_text(&patch_text_in_double_quoted_heredoc, ParseMode::Lenient),
-        Ok(expected_patch.clone())
+        Ok(ApplyPatchArgs {
+            hunks: expected_patch.clone(),
+            patch: patch_text.to_string()
+        })
    );

    let patch_text_in_mismatched_quotes_heredoc = format!("<<\"EOF'\n{patch_text}\nEOF\n");
--- a/codex-rs/arg0/Cargo.toml
+++ b/codex-rs/arg0/Cargo.toml
@@ -0,0 +1,19 @@
+[package]
+edition = "2024"
+name = "codex-arg0"
+version = { workspace = true }
+
+[lib]
+name = "codex_arg0"
+path = "src/lib.rs"
+
+[lints]
+workspace = true
+
+[dependencies]
+anyhow = "1"
+codex-apply-patch = { path = "../apply-patch" }
+codex-core = { path = "../core" }
+codex-linux-sandbox = { path = "../linux-sandbox" }
+dotenvy = "0.15.7"
+tokio = { version = "1", features = ["rt-multi-thread"] }
--- a/codex-rs/arg0/src/lib.rs
+++ b/codex-rs/arg0/src/lib.rs
@@ -0,0 +1,91 @@
+use std::future::Future;
+use std::path::Path;
+use std::path::PathBuf;
+
+use codex_core::CODEX_APPLY_PATCH_ARG1;
+
+/// While we want to deploy the Codex CLI as a single executable for simplicity,
+/// we also want to expose some of its functionality as distinct CLIs, so we use
+/// the "arg0 trick" to determine which CLI to dispatch. This effectively allows
+/// us to simulate deploying multiple executables as a single binary on Mac and
+/// Linux (but not Windows).
+///
+/// When the current executable is invoked through the hard-link or alias named
+/// `codex-linux-sandbox` we *directly* execute
+/// [`codex_linux_sandbox::run_main`] (which never returns). Otherwise we:
+///
+/// 1.  Use [`dotenvy::from_path`] and [`dotenvy::dotenv`] to modify the
+///     environment before creating any threads.
+/// 2.  Construct a Tokio multi-thread runtime.
+/// 3.  Derive the path to the current executable (so children can re-invoke the
+///     sandbox) when running on Linux.
+/// 4.  Execute the provided async `main_fn` inside that runtime, forwarding any
+///     error. Note that `main_fn` receives `codex_linux_sandbox_exe:
+///     Option<PathBuf>`, as an argument, which is generally needed as part of
+///     constructing [`codex_core::config::Config`].
+///
+/// This function should be used to wrap any `main()` function in binary crates
+/// in this workspace that depends on these helper CLIs.
+pub fn arg0_dispatch_or_else<F, Fut>(main_fn: F) -> anyhow::Result<()>
+where
+    F: FnOnce(Option<PathBuf>) -> Fut,
+    Fut: Future<Output = anyhow::Result<()>>,
+{
+    // Determine if we were invoked via the special alias.
+    let mut args = std::env::args_os();
+    let argv0 = args.next().unwrap_or_default();
+    let exe_name = Path::new(&argv0)
+        .file_name()
+        .and_then(|s| s.to_str())
+        .unwrap_or("");
+
+    if exe_name == "codex-linux-sandbox" {
+        // Safety: [`run_main`] never returns.
+        codex_linux_sandbox::run_main();
+    }
+
+    let argv1 = args.next().unwrap_or_default();
+    if argv1 == CODEX_APPLY_PATCH_ARG1 {
+        let patch_arg = args.next().and_then(|s| s.to_str().map(|s| s.to_owned()));
+        let exit_code = match patch_arg {
+            Some(patch_arg) => {
+                let mut stdout = std::io::stdout();
+                let mut stderr = std::io::stderr();
+                match codex_apply_patch::apply_patch(&patch_arg, &mut stdout, &mut stderr) {
+                    Ok(()) => 0,
+                    Err(_) => 1,
+                }
+            }
+            None => {
+                eprintln!("Error: {CODEX_APPLY_PATCH_ARG1} requires a UTF-8 PATCH argument.");
+                1
+            }
+        };
+        std::process::exit(exit_code);
+    }
+
+    // This modifies the environment, which is not thread-safe, so do this
+    // before creating any threads/the Tokio runtime.
+    load_dotenv();
+
+    // Regular invocation – create a Tokio runtime and execute the provided
+    // async entry-point.
+    let runtime = tokio::runtime::Runtime::new()?;
+    runtime.block_on(async move {
+        let codex_linux_sandbox_exe: Option<PathBuf> = if cfg!(target_os = "linux") {
+            std::env::current_exe().ok()
+        } else {
+            None
+        };
+
+        main_fn(codex_linux_sandbox_exe).await
+    })
+}
+
+/// Load env vars from ~/.codex/.env and `$(pwd)/.env`.
+fn load_dotenv() {
+    if let Ok(codex_home) = codex_core::config::find_codex_home() {
+        dotenvy::from_path(codex_home.join(".env")).ok();
+    }
+    dotenvy::dotenv().ok();
+}
--- a/codex-rs/chatgpt/Cargo.toml
+++ b/codex-rs/chatgpt/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
+edition = "2024"
 name = "codex-chatgpt"
 version = { workspace = true }
-edition = "2024"

 [lints]
 workspace = true
@@ -9,12 +9,12 @@ workspace = true
 [dependencies]
 anyhow = "1"
 clap = { version = "4", features = ["derive"] }
-serde = { version = "1", features = ["derive"] }
-serde_json = "1"
 codex-common = { path = "../common", features = ["cli"] }
 codex-core = { path = "../core" }
 codex-login = { path = "../login" }
 reqwest = { version = "0.12", features = ["json", "stream"] }
+serde = { version = "1", features = ["derive"] }
+serde_json = "1"
 tokio = { version = "1", features = ["full"] }

 [dev-dependencies]
--- a/codex-rs/chatgpt/src/apply_command.rs
+++ b/codex-rs/chatgpt/src/apply_command.rs
@@ -1,3 +1,5 @@
+use std::path::PathBuf;
+
 use clap::Parser;
 use codex_common::CliConfigOverrides;
 use codex_core::config::Config;
@@ -17,7 +19,10 @@ pub struct ApplyCommand {
    #[clap(flatten)]
    pub config_overrides: CliConfigOverrides,
 }
-pub async fn run_apply_command(apply_cli: ApplyCommand) -> anyhow::Result<()> {
+pub async fn run_apply_command(
+    apply_cli: ApplyCommand,
+    cwd: Option<PathBuf>,
+) -> anyhow::Result<()> {
    let config = Config::load_with_cli_overrides(
        apply_cli
            .config_overrides
@@ -29,10 +34,13 @@ pub async fn run_apply_command(apply_cli: ApplyCommand) -> anyhow::Result<()> {
    init_chatgpt_token_from_auth(&config.codex_home).await?;

    let task_response = get_task(&config, apply_cli.task_id).await?;
-    apply_diff_from_task(task_response).await
+    apply_diff_from_task(task_response, cwd).await
 }

-pub async fn apply_diff_from_task(task_response: GetTaskResponse) -> anyhow::Result<()> {
+pub async fn apply_diff_from_task(
+    task_response: GetTaskResponse,
+    cwd: Option<PathBuf>,
+) -> anyhow::Result<()> {
    let diff_turn = match task_response.current_diff_task_turn {
        Some(turn) => turn,
        None => anyhow::bail!("No diff turn found"),
@@ -42,13 +50,17 @@ pub async fn apply_diff_from_task(task_response: GetTaskResponse) -> anyhow::Res
        _ => None,
    });
    match output_diff {
-        Some(output_diff) => apply_diff(&output_diff.diff).await,
+        Some(output_diff) => apply_diff(&output_diff.diff, cwd).await,
        None => anyhow::bail!("No PR output item found"),
    }
 }

-async fn apply_diff(diff: &str) -> anyhow::Result<()> {
-    let toplevel_output = tokio::process::Command::new("git")
+async fn apply_diff(diff: &str, cwd: Option<PathBuf>) -> anyhow::Result<()> {
+    let mut cmd = tokio::process::Command::new("git");
+    if let Some(cwd) = cwd {
+        cmd.current_dir(cwd);
+    }
+    let toplevel_output = cmd
        .args(vec!["rev-parse", "--show-toplevel"])
        .output()
        .await?;
--- a/codex-rs/chatgpt/src/chatgpt_client.rs
+++ b/codex-rs/chatgpt/src/chatgpt_client.rs
@@ -21,10 +21,14 @@ pub(crate) async fn chatgpt_get_request<T: DeserializeOwned>(
    let token =
        get_chatgpt_token_data().ok_or_else(|| anyhow::anyhow!("ChatGPT token not available"))?;

+    let account_id = token.account_id.ok_or_else(|| {
+        anyhow::anyhow!("ChatGPT account ID not available, please re-run `codex login`")
+    });
+
    let response = client
        .get(&url)
        .bearer_auth(&token.access_token)
-        .header("chatgpt-account-id", &token.account_id)
+        .header("chatgpt-account-id", account_id?)
        .header("Content-Type", "application/json")
        .header("User-Agent", "codex-cli")
        .send()
--- a/codex-rs/chatgpt/src/chatgpt_token.rs
+++ b/codex-rs/chatgpt/src/chatgpt_token.rs
@@ -18,7 +18,10 @@ pub fn set_chatgpt_token_data(value: TokenData) {

 /// Initialize the ChatGPT token from auth.json file
 pub async fn init_chatgpt_token_from_auth(codex_home: &Path) -> std::io::Result<()> {
-    let auth_json = codex_login::try_read_auth_json(codex_home).await?;
-    set_chatgpt_token_data(auth_json.tokens.clone());
+    let auth = codex_login::load_auth(codex_home, true)?;
+    if let Some(auth) = auth {
+        let token_data = auth.get_token_data().await?;
+        set_chatgpt_token_data(token_data);
+    }
    Ok(())
 }
--- a/codex-rs/chatgpt/tests/apply_command_e2e.rs
+++ b/codex-rs/chatgpt/tests/apply_command_e2e.rs
@@ -10,8 +10,13 @@ use tokio::process::Command;
 async fn create_temp_git_repo() -> anyhow::Result<TempDir> {
    let temp_dir = TempDir::new()?;
    let repo_path = temp_dir.path();
+    let envs = vec![
+        ("GIT_CONFIG_GLOBAL", "/dev/null"),
+        ("GIT_CONFIG_NOSYSTEM", "1"),
+    ];

    let output = Command::new("git")
+        .envs(envs.clone())
        .args(["init"])
        .current_dir(repo_path)
        .output()
@@ -25,12 +30,14 @@ async fn create_temp_git_repo() -> anyhow::Result<TempDir> {
    }

    Command::new("git")
+        .envs(envs.clone())
        .args(["config", "user.email", "test@example.com"])
        .current_dir(repo_path)
        .output()
        .await?;

    Command::new("git")
+        .envs(envs.clone())
        .args(["config", "user.name", "Test User"])
        .current_dir(repo_path)
        .output()
@@ -39,12 +46,14 @@ async fn create_temp_git_repo() -> anyhow::Result<TempDir> {
    std::fs::write(repo_path.join("README.md"), "# Test Repo\n")?;

    Command::new("git")
+        .envs(envs.clone())
        .args(["add", "README.md"])
        .current_dir(repo_path)
        .output()
        .await?;

    let output = Command::new("git")
+        .envs(envs.clone())
        .args(["commit", "-m", "Initial commit"])
        .current_dir(repo_path)
        .output()
@@ -78,17 +87,7 @@ async fn test_apply_command_creates_fibonacci_file() {
        .await
        .expect("Failed to load fixture");

-    let original_dir = std::env::current_dir().expect("Failed to get current dir");
-    std::env::set_current_dir(repo_path).expect("Failed to change directory");
-    struct DirGuard(std::path::PathBuf);
-    impl Drop for DirGuard {
-        fn drop(&mut self) {
-            let _ = std::env::set_current_dir(&self.0);
-        }
-    }
-    let _guard = DirGuard(original_dir);
-
-    apply_diff_from_task(task_response)
+    apply_diff_from_task(task_response, Some(repo_path.to_path_buf()))
        .await
        .expect("Failed to apply diff from task");

@@ -173,7 +172,7 @@ console.log(fib(10));
        .await
        .expect("Failed to load fixture");

-    let apply_result = apply_diff_from_task(task_response).await;
+    let apply_result = apply_diff_from_task(task_response, Some(repo_path.to_path_buf())).await;

    assert!(
        apply_result.is_err(),
--- a/codex-rs/cli/Cargo.toml
+++ b/codex-rs/cli/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
+edition = "2024"
 name = "codex-cli"
 version = { workspace = true }
-edition = "2024"

 [[bin]]
 name = "codex"
@@ -18,12 +18,12 @@ workspace = true
 anyhow = "1"
 clap = { version = "4", features = ["derive"] }
 clap_complete = "4"
+codex-arg0 = { path = "../arg0" }
 codex-chatgpt = { path = "../chatgpt" }
-codex-core = { path = "../core" }
 codex-common = { path = "../common", features = ["cli"] }
+codex-core = { path = "../core" }
 codex-exec = { path = "../exec" }
 codex-login = { path = "../login" }
-codex-linux-sandbox = { path = "../linux-sandbox" }
 codex-mcp-server = { path = "../mcp-server" }
 codex-tui = { path = "../tui" }
 serde_json = "1"
--- a/codex-rs/cli/src/debug_sandbox.rs
+++ b/codex-rs/cli/src/debug_sandbox.rs
@@ -4,10 +4,10 @@ use codex_common::CliConfigOverrides;
 use codex_core::config::Config;
 use codex_core::config::ConfigOverrides;
 use codex_core::config_types::SandboxMode;
-use codex_core::exec::StdioPolicy;
 use codex_core::exec::spawn_command_under_linux_sandbox;
-use codex_core::exec::spawn_command_under_seatbelt;
 use codex_core::exec_env::create_env;
+use codex_core::seatbelt::spawn_command_under_seatbelt;
+use codex_core::spawn::StdioPolicy;

 use crate::LandlockCommand;
 use crate::SeatbeltCommand;
--- a/codex-rs/cli/src/login.rs
+++ b/codex-rs/cli/src/login.rs
@@ -1,25 +1,16 @@
+use std::env;
+
 use codex_common::CliConfigOverrides;
 use codex_core::config::Config;
 use codex_core::config::ConfigOverrides;
+use codex_login::AuthMode;
+use codex_login::OPENAI_API_KEY_ENV_VAR;
+use codex_login::load_auth;
+use codex_login::login_with_api_key;
 use codex_login::login_with_chatgpt;

 pub async fn run_login_with_chatgpt(cli_config_overrides: CliConfigOverrides) -> ! {
-    let cli_overrides = match cli_config_overrides.parse_overrides() {
-        Ok(v) => v,
-        Err(e) => {
-            eprintln!("Error parsing -c overrides: {e}");
-            std::process::exit(1);
-        }
-    };
-
-    let config_overrides = ConfigOverrides::default();
-    let config = match Config::load_with_cli_overrides(cli_overrides, config_overrides) {
-        Ok(config) => config,
-        Err(e) => {
-            eprintln!("Error loading configuration: {e}");
-            std::process::exit(1);
-        }
-    };
+    let config = load_config_or_exit(cli_config_overrides);

    let capture_output = false;
    match login_with_chatgpt(&config.codex_home, capture_output).await {
@@ -33,3 +24,103 @@ pub async fn run_login_with_chatgpt(cli_config_overrides: CliConfigOverrides) ->
        }
    }
 }
+
+pub async fn run_login_with_api_key(
+    cli_config_overrides: CliConfigOverrides,
+    api_key: String,
+) -> ! {
+    let config = load_config_or_exit(cli_config_overrides);
+
+    match login_with_api_key(&config.codex_home, &api_key) {
+        Ok(_) => {
+            eprintln!("Successfully logged in");
+            std::process::exit(0);
+        }
+        Err(e) => {
+            eprintln!("Error logging in: {e}");
+            std::process::exit(1);
+        }
+    }
+}
+
+pub async fn run_login_status(cli_config_overrides: CliConfigOverrides) -> ! {
+    let config = load_config_or_exit(cli_config_overrides);
+
+    match load_auth(&config.codex_home, true) {
+        Ok(Some(auth)) => match auth.mode {
+            AuthMode::ApiKey => {
+                if let Some(api_key) = auth.api_key.as_deref() {
+                    eprintln!("Logged in using an API key - {}", safe_format_key(api_key));
+
+                    if let Ok(env_api_key) = env::var(OPENAI_API_KEY_ENV_VAR) {
+                        if env_api_key == api_key {
+                            eprintln!(
+                                "   API loaded from OPENAI_API_KEY environment variable or .env file"
+                            );
+                        }
+                    }
+                } else {
+                    eprintln!("Logged in using an API key");
+                }
+                std::process::exit(0);
+            }
+            AuthMode::ChatGPT => {
+                eprintln!("Logged in using ChatGPT");
+                std::process::exit(0);
+            }
+        },
+        Ok(None) => {
+            eprintln!("Not logged in");
+            std::process::exit(1);
+        }
+        Err(e) => {
+            eprintln!("Error checking login status: {e}");
+            std::process::exit(1);
+        }
+    }
+}
+
+fn load_config_or_exit(cli_config_overrides: CliConfigOverrides) -> Config {
+    let cli_overrides = match cli_config_overrides.parse_overrides() {
+        Ok(v) => v,
+        Err(e) => {
+            eprintln!("Error parsing -c overrides: {e}");
+            std::process::exit(1);
+        }
+    };
+
+    let config_overrides = ConfigOverrides::default();
+    match Config::load_with_cli_overrides(cli_overrides, config_overrides) {
+        Ok(config) => config,
+        Err(e) => {
+            eprintln!("Error loading configuration: {e}");
+            std::process::exit(1);
+        }
+    }
+}
+
+fn safe_format_key(key: &str) -> String {
+    if key.len() <= 13 {
+        return "***".to_string();
+    }
+    let prefix = &key[..8];
+    let suffix = &key[key.len() - 5..];
+    format!("{prefix}***{suffix}")
+}
+
+#[cfg(test)]
+mod tests {
+    use super::safe_format_key;
+
+    #[test]
+    fn formats_long_key() {
+        let key = "sk-proj-1234567890ABCDE";
+        assert_eq!(safe_format_key(key), "sk-proj-***ABCDE");
+    }
+
+    #[test]
+    fn short_key_returns_stars() {
+        let key = "sk-proj-12345";
+        assert_eq!(safe_format_key(key), "***");
+    }
+}
--- a/codex-rs/cli/src/main.rs
+++ b/codex-rs/cli/src/main.rs
@@ -2,10 +2,13 @@ use clap::CommandFactory;
 use clap::Parser;
 use clap_complete::Shell;
 use clap_complete::generate;
+use codex_arg0::arg0_dispatch_or_else;
 use codex_chatgpt::apply_command::ApplyCommand;
 use codex_chatgpt::apply_command::run_apply_command;
 use codex_cli::LandlockCommand;
 use codex_cli::SeatbeltCommand;
+use codex_cli::login::run_login_status;
+use codex_cli::login::run_login_with_api_key;
 use codex_cli::login::run_login_with_chatgpt;
 use codex_cli::proto;
 use codex_common::CliConfigOverrides;
@@ -42,7 +45,7 @@ enum Subcommand {
    #[clap(visible_alias = "e")]
    Exec(ExecCli),

-    /// Login with ChatGPT.
+    /// Manage login.
    Login(LoginCommand),

    /// Experimental: run Codex as an MCP server.
@@ -89,10 +92,22 @@ enum DebugCommand {
 struct LoginCommand {
    #[clap(skip)]
    config_overrides: CliConfigOverrides,
+
+    #[arg(long = "api-key", value_name = "API_KEY")]
+    api_key: Option<String>,
+
+    #[command(subcommand)]
+    action: Option<LoginSubcommand>,
+}
+
+#[derive(Debug, clap::Subcommand)]
+enum LoginSubcommand {
+    /// Show login status.
+    Status,
 }

 fn main() -> anyhow::Result<()> {
-    codex_linux_sandbox::run_with_sandbox(|codex_linux_sandbox_exe| async move {
+    arg0_dispatch_or_else(|codex_linux_sandbox_exe| async move {
        cli_main(codex_linux_sandbox_exe).await?;
        Ok(())
    })
@@ -105,7 +120,8 @@ async fn cli_main(codex_linux_sandbox_exe: Option<PathBuf>) -> anyhow::Result<()
        None => {
            let mut tui_cli = cli.interactive;
            prepend_config_flags(&mut tui_cli.config_overrides, cli.config_overrides);
-            codex_tui::run_main(tui_cli, codex_linux_sandbox_exe)?;
+            let usage = codex_tui::run_main(tui_cli, codex_linux_sandbox_exe).await?;
+            println!("{}", codex_core::protocol::FinalOutput::from(usage));
        }
        Some(Subcommand::Exec(mut exec_cli)) => {
            prepend_config_flags(&mut exec_cli.config_overrides, cli.config_overrides);
@@ -116,7 +132,18 @@ async fn cli_main(codex_linux_sandbox_exe: Option<PathBuf>) -> anyhow::Result<()
        }
        Some(Subcommand::Login(mut login_cli)) => {
            prepend_config_flags(&mut login_cli.config_overrides, cli.config_overrides);
-            run_login_with_chatgpt(login_cli.config_overrides).await;
+            match login_cli.action {
+                Some(LoginSubcommand::Status) => {
+                    run_login_status(login_cli.config_overrides).await;
+                }
+                None => {
+                    if let Some(api_key) = login_cli.api_key {
+                        run_login_with_api_key(login_cli.config_overrides, api_key).await;
+                    } else {
+                        run_login_with_chatgpt(login_cli.config_overrides).await;
+                    }
+                }
+            }
        }
        Some(Subcommand::Proto(mut proto_cli)) => {
            prepend_config_flags(&mut proto_cli.config_overrides, cli.config_overrides);
@@ -145,7 +172,7 @@ async fn cli_main(codex_linux_sandbox_exe: Option<PathBuf>) -> anyhow::Result<()
        },
        Some(Subcommand::Apply(mut apply_cli)) => {
            prepend_config_flags(&mut apply_cli.config_overrides, cli.config_overrides);
-            run_apply_command(apply_cli).await?;
+            run_apply_command(apply_cli, None).await?;
        }
    }

--- a/codex-rs/cli/src/proto.rs
+++ b/codex-rs/cli/src/proto.rs
@@ -4,10 +4,12 @@ use std::sync::Arc;
 use clap::Parser;
 use codex_common::CliConfigOverrides;
 use codex_core::Codex;
+use codex_core::CodexSpawnOk;
 use codex_core::config::Config;
 use codex_core::config::ConfigOverrides;
 use codex_core::protocol::Submission;
 use codex_core::util::notify_on_sigint;
+use codex_login::load_auth;
 use tokio::io::AsyncBufReadExt;
 use tokio::io::BufReader;
 use tracing::error;
@@ -34,8 +36,9 @@ pub async fn run_main(opts: ProtoCli) -> anyhow::Result<()> {
        .map_err(anyhow::Error::msg)?;

    let config = Config::load_with_cli_overrides(overrides_vec, ConfigOverrides::default())?;
+    let auth = load_auth(&config.codex_home, true)?;
    let ctrl_c = notify_on_sigint();
-    let (codex, _init_id) = Codex::spawn(config, ctrl_c.clone()).await?;
+    let CodexSpawnOk { codex, .. } = Codex::spawn(config, auth, ctrl_c.clone()).await?;
    let codex = Arc::new(codex);

    // Task that reads JSON lines from stdin and forwards to Submission Queue
--- a/codex-rs/common/Cargo.toml
+++ b/codex-rs/common/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
+edition = "2024"
 name = "codex-common"
 version = { workspace = true }
-edition = "2024"

 [lints]
 workspace = true
@@ -9,11 +9,11 @@ workspace = true
 [dependencies]
 clap = { version = "4", features = ["derive", "wrap_help"], optional = true }
 codex-core = { path = "../core" }
-toml = { version = "0.9", optional = true }
 serde = { version = "1", optional = true }
+toml = { version = "0.9", optional = true }

 [features]
 # Separate feature so that `clap` is not a mandatory dependency.
-cli = ["clap", "toml", "serde"]
+cli = ["clap", "serde", "toml"]
 elapsed = []
 sandbox_summary = []
--- a/codex-rs/common/src/config_override.rs
+++ b/codex-rs/common/src/config_override.rs
@@ -64,7 +64,11 @@ impl CliConfigOverrides {
                // `-c model=o3` without the quotes.
                let value: Value = match parse_toml_value(value_str) {
                    Ok(v) => v,
-                    Err(_) => Value::String(value_str.to_string()),
+                    Err(_) => {
+                        // Strip leading/trailing quotes if present
+                        let trimmed = value_str.trim().trim_matches(|c| c == '"' || c == '\'');
+                        Value::String(trimmed.to_string())
+                    }
                };

                Ok((key.to_string(), value))
--- a/codex-rs/common/src/sandbox_summary.rs
+++ b/codex-rs/common/src/sandbox_summary.rs
@@ -7,6 +7,7 @@ pub fn summarize_sandbox_policy(sandbox_policy: &SandboxPolicy) -> String {
        SandboxPolicy::WorkspaceWrite {
            writable_roots,
            network_access,
+            include_default_writable_roots,
        } => {
            let mut summary = "workspace-write".to_string();
            if !writable_roots.is_empty() {
@@ -19,6 +20,9 @@ pub fn summarize_sandbox_policy(sandbox_policy: &SandboxPolicy) -> String {
                        .join(", ")
                ));
            }
+            if !*include_default_writable_roots {
+                summary.push_str(" (exact writable roots)");
+            }
            if *network_access {
                summary.push_str(" (network access enabled)");
            }
--- a/codex-rs/config.md
+++ b/codex-rs/config.md
@@ -92,6 +92,35 @@ http_headers = { "X-Example-Header" = "example-value" }
 env_http_headers = { "X-Example-Features": "EXAMPLE_FEATURES" }
 ```

+### Per-provider network tuning
+
+The following optional settings control retry behaviour and streaming idle timeouts **per model provider**. They must be specified inside the corresponding `[model_providers.<id>]` block in `config.toml`. (Older releases accepted top‑level keys; those are now ignored.)
+
+Example:
+
+```toml
+[model_providers.openai]
+name = "OpenAI"
+base_url = "https://api.openai.com/v1"
+env_key = "OPENAI_API_KEY"
+# network tuning overrides (all optional; falls back to built‑in defaults)
+request_max_retries = 4            # retry failed HTTP requests
+stream_max_retries = 10            # retry dropped SSE streams
+stream_idle_timeout_ms = 300000    # 5m idle timeout
+```
+
+#### request_max_retries
+
+How many times Codex will retry a failed HTTP request to the model provider. Defaults to `4`.
+
+#### stream_max_retries
+
+Number of times Codex will attempt to reconnect when a streaming response is interrupted. Defaults to `10`.
+
+#### stream_idle_timeout_ms
+
+How long Codex will wait for activity on a streaming response before treating the connection as lost. Defaults to `300_000` (5 minutes).
+
 ## model_provider

 Identifies which provider to use from the `model_providers` map. Defaults to `"openai"`. You can override the `base_url` for the built-in `openai` provider via the `OPENAI_BASE_URL` environment variable.
@@ -230,6 +259,8 @@ disk, but attempts to write a file or access the network will be blocked.

 A more relaxed policy is `workspace-write`. When specified, the current working directory for the Codex task will be writable (as well as `$TMPDIR` on macOS). Note that the CLI defaults to using the directory where it was spawned as `cwd`, though this can be overridden using `--cwd/-C`.

+On macOS (and soon Linux), all writable roots (including `cwd`) that contain a `.git/` folder _as an immediate child_ will configure the `.git/` folder to be read-only while the rest of the Git repository will be writable. This means that commands like `git commit` will fail, by default (as it entails writing to `.git/`), and will require Codex to ask for permission.
+
 ```toml
 # same as `--sandbox workspace-write`
 sandbox_mode = "workspace-write"
@@ -444,7 +475,7 @@ Currently, `"vscode"` is the default, though Codex does not verify VS Code is in

 ## hide_agent_reasoning

-Codex intermittently emits "reasoning" events that show the model’s internal "thinking" before it produces a final answer. Some users may find these events distracting, especially in CI logs or minimal terminal output.
+Codex intermittently emits "reasoning" events that show the model's internal "thinking" before it produces a final answer. Some users may find these events distracting, especially in CI logs or minimal terminal output.

 Setting `hide_agent_reasoning` to `true` suppresses these events in **both** the TUI as well as the headless `exec` sub-command:

@@ -472,14 +503,5 @@ Options that are specific to the TUI.

 ```toml
 [tui]
-# This will make it so that Codex does not try to process mouse events, which
-# means your Terminal's native drag-to-text to text selection and copy/paste
-# should work. The tradeoff is that Codex will not receive any mouse events, so
-# it will not be possible to use the mouse to scroll conversation history.
-#
-# Note that most terminals support holding down a modifier key when using the
-# mouse to support text selection. For example, even if Codex mouse capture is
-# enabled (i.e., this is set to `false`), you can still hold down alt while
-# dragging the mouse to select text.
-disable_mouse_capture = true  # defaults to `false`
+# More to come here
 ```
--- a/codex-rs/core/Cargo.toml
+++ b/codex-rs/core/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
+edition = "2024"
 name = "codex-core"
 version = { workspace = true }
-edition = "2024"

 [lib]
 name = "codex_core"
@@ -15,20 +15,26 @@ anyhow = "1"
 async-channel = "2.3.1"
 base64 = "0.22"
 bytes = "1.10.1"
+chrono = { version = "0.4", features = ["serde"] }
 codex-apply-patch = { path = "../apply-patch" }
+codex-login = { path = "../login" }
 codex-mcp-client = { path = "../mcp-client" }
 dirs = "6"
 env-flags = "0.1.1"
 eventsource-stream = "0.2.3"
 fs2 = "0.4.3"
 futures = "0.3"
+libc = "0.2.174"
 mcp-types = { path = "../mcp-types" }
 mime_guess = "2.0"
 rand = "0.9"
 reqwest = { version = "0.12", features = ["json", "stream"] }
 serde = { version = "1", features = ["derive"] }
 serde_json = "1"
-strum_macros = "0.27.1"
+serde_bytes = "0.11"
+sha1 = "0.10.6"
+shlex = "1.3.0"
+strum_macros = "0.27.2"
 thiserror = "2.0.12"
 time = { version = "0.3", features = ["formatting", "local-offset", "macros"] }
 tokio = { version = "1", features = [
@@ -39,13 +45,15 @@ tokio = { version = "1", features = [
    "signal",
 ] }
 tokio-util = "0.7.14"
-toml = "0.9.1"
+toml = "0.9.2"
 tracing = { version = "0.1.41", features = ["log"] }
-tree-sitter = "0.25.3"
+tree-sitter = "0.25.8"
 tree-sitter-bash = "0.25.0"
 uuid = { version = "1", features = ["serde", "v4"] }
+whoami = "1.6.0"
 wildmatch = "2.4.0"

+
 [target.'cfg(target_os = "linux")'.dependencies]
 landlock = "0.4.1"
 seccompiler = "0.5.0"
@@ -60,9 +68,11 @@ openssl-sys = { version = "*", features = ["vendored"] }

 [dev-dependencies]
 assert_cmd = "2"
+core_test_support = { path = "tests/common" }
 maplit = "1.0.2"
 predicates = "3"
 pretty_assertions = "1.4.1"
 tempfile = "3"
 tokio-test = "0.4"
+walkdir = "2.5.0"
 wiremock = "0.6"
--- a/codex-rs/core/README.md
+++ b/codex-rs/core/README.md
@@ -2,9 +2,18 @@

 This crate implements the business logic for Codex. It is designed to be used by the various Codex UIs written in Rust.

-Though for non-Rust UIs, we are also working to define a _protocol_ for talking to Codex. See:
+## Dependencies

- [Specification](../docs/protocol_v1.md)
- [Rust types](./src/protocol.rs)
+Note that `codex-core` makes some assumptions about certain helper utilities being available in the environment. Currently, this

-You can use the `proto` subcommand using the executable in the [`cli` crate](../cli) to speak the protocol using newline-delimited-JSON over stdin/stdout.
+### macOS
+
+Expects `/usr/bin/sandbox-exec` to be present.
+
+### Linux
+
+Expects the binary containing `codex-core` to run the equivalent of `codex debug landlock` when `arg0` is `codex-linux-sandbox`. See the `codex-arg0` crate for details.
+
+### All Platforms
+
+Expects the binary containing `codex-core` to simulate the virtual `apply_patch` CLI when `arg1` is `--codex-run-as-apply-patch`. See the `codex-arg0` crate for details.
--- a/codex-rs/core/prompt.md
+++ b/codex-rs/core/prompt.md
@@ -96,3 +96,12 @@ You can invoke apply_patch like:
 ```
 shell {"command":["apply_patch","*** Begin Patch\n*** Add File: hello.txt\n+Hello, world!\n*** End Patch\n"]}
 ```
+
+Plan updates
+
+A tool named `update_plan` is available. Use it to keep an up‑to‑date, step‑by‑step plan for the task so you can follow your progress. When making your plans, keep in mind that you are a deployed coding agent - `update_plan` calls should not involve doing anything that you aren't capable of doing. For example, `update_plan` calls should NEVER contain tasks to merge your own pull requests. Only stop to ask the user if you genuinely need their feedback on a change.
+
+- At the start of the task, call `update_plan` with an initial plan: a short list of 1‑sentence steps with a `status` for each step (`pending`, `in_progress`, or `completed`). There should always be exactly one `in_progress` step until everything is done.
+- Whenever you finish a step, call `update_plan` again, marking the finished step as `completed` and the next step as `in_progress`.
+- If your plan needs to change, call `update_plan` with the revised steps and include an `explanation` describing the change.
+- When all steps are complete, make a final `update_plan` call with all steps marked `completed`.
--- a/codex-rs/core/src/apply_patch.rs
+++ b/codex-rs/core/src/apply_patch.rs
@@ -0,0 +1,157 @@
+use crate::codex::Session;
+use crate::models::FunctionCallOutputPayload;
+use crate::models::ResponseInputItem;
+use crate::protocol::FileChange;
+use crate::protocol::ReviewDecision;
+use crate::safety::SafetyCheck;
+use crate::safety::assess_patch_safety;
+use codex_apply_patch::ApplyPatchAction;
+use codex_apply_patch::ApplyPatchFileChange;
+use std::collections::HashMap;
+use std::path::Path;
+use std::path::PathBuf;
+
+pub const CODEX_APPLY_PATCH_ARG1: &str = "--codex-run-as-apply-patch";
+
+pub(crate) enum InternalApplyPatchInvocation {
+    /// The `apply_patch` call was handled programmatically, without any sort
+    /// of sandbox, because the user explicitly approved it. This is the
+    /// result to use with the `shell` function call that contained `apply_patch`.
+    Output(ResponseInputItem),
+
+    /// The `apply_patch` call was approved, either automatically because it
+    /// appears that it should be allowed based on the user's sandbox policy
+    /// *or* because the user explicitly approved it. In either case, we use
+    /// exec with [`CODEX_APPLY_PATCH_ARG1`] to realize the `apply_patch` call,
+    /// but [`ApplyPatchExec::auto_approved`] is used to determine the sandbox
+    /// used with the `exec()`.
+    DelegateToExec(ApplyPatchExec),
+}
+
+pub(crate) struct ApplyPatchExec {
+    pub(crate) action: ApplyPatchAction,
+    pub(crate) user_explicitly_approved_this_action: bool,
+}
+
+impl From<ResponseInputItem> for InternalApplyPatchInvocation {
+    fn from(item: ResponseInputItem) -> Self {
+        InternalApplyPatchInvocation::Output(item)
+    }
+}
+
+pub(crate) async fn apply_patch(
+    sess: &Session,
+    sub_id: &str,
+    call_id: &str,
+    action: ApplyPatchAction,
+) -> InternalApplyPatchInvocation {
+    let writable_roots_snapshot = {
+        #[allow(clippy::unwrap_used)]
+        let guard = sess.writable_roots.lock().unwrap();
+        guard.clone()
+    };
+
+    match assess_patch_safety(
+        &action,
+        sess.approval_policy,
+        &writable_roots_snapshot,
+        &sess.cwd,
+    ) {
+        SafetyCheck::AutoApprove { .. } => {
+            InternalApplyPatchInvocation::DelegateToExec(ApplyPatchExec {
+                action,
+                user_explicitly_approved_this_action: false,
+            })
+        }
+        SafetyCheck::AskUser => {
+            // Compute a readable summary of path changes to include in the
+            // approval request so the user can make an informed decision.
+            //
+            // Note that it might be worth expanding this approval request to
+            // give the user the option to expand the set of writable roots so
+            // that similar patches can be auto-approved in the future during
+            // this session.
+            let rx_approve = sess
+                .request_patch_approval(sub_id.to_owned(), call_id.to_owned(), &action, None, None)
+                .await;
+            match rx_approve.await.unwrap_or_default() {
+                ReviewDecision::Approved | ReviewDecision::ApprovedForSession => {
+                    InternalApplyPatchInvocation::DelegateToExec(ApplyPatchExec {
+                        action,
+                        user_explicitly_approved_this_action: true,
+                    })
+                }
+                ReviewDecision::Denied | ReviewDecision::Abort => {
+                    ResponseInputItem::FunctionCallOutput {
+                        call_id: call_id.to_owned(),
+                        output: FunctionCallOutputPayload {
+                            content: "patch rejected by user".to_string(),
+                            success: Some(false),
+                        },
+                    }
+                    .into()
+                }
+            }
+        }
+        SafetyCheck::Reject { reason } => ResponseInputItem::FunctionCallOutput {
+            call_id: call_id.to_owned(),
+            output: FunctionCallOutputPayload {
+                content: format!("patch rejected: {reason}"),
+                success: Some(false),
+            },
+        }
+        .into(),
+    }
+}
+
+pub(crate) fn convert_apply_patch_to_protocol(
+    action: &ApplyPatchAction,
+) -> HashMap<PathBuf, FileChange> {
+    let changes = action.changes();
+    let mut result = HashMap::with_capacity(changes.len());
+    for (path, change) in changes {
+        let protocol_change = match change {
+            ApplyPatchFileChange::Add { content } => FileChange::Add {
+                content: content.clone(),
+            },
+            ApplyPatchFileChange::Delete => FileChange::Delete,
+            ApplyPatchFileChange::Update {
+                unified_diff,
+                move_path,
+                new_content: _new_content,
+            } => FileChange::Update {
+                unified_diff: unified_diff.clone(),
+                move_path: move_path.clone(),
+            },
+        };
+        result.insert(path.clone(), protocol_change);
+    }
+    result
+}
+
+pub(crate) fn get_writable_roots(cwd: &Path) -> Vec<PathBuf> {
+    let mut writable_roots = Vec::new();
+    if cfg!(target_os = "macos") {
+        // On macOS, $TMPDIR is private to the user.
+        writable_roots.push(std::env::temp_dir());
+
+        // Allow pyenv to update its shims directory. Without this, any tool
+        // that happens to be managed by `pyenv` will fail with an error like:
+        //
+        //   pyenv: cannot rehash: $HOME/.pyenv/shims isn't writable
+        //
+        // which is emitted every time `pyenv` tries to run `rehash` (for
+        // example, after installing a new Python package that drops an entry
+        // point). Although the sandbox is intentionally read‑only by default,
+        // writing to the user's local `pyenv` directory is safe because it
+        // is already user‑writable and scoped to the current user account.
+        if let Ok(home_dir) = std::env::var("HOME") {
+            let pyenv_dir = PathBuf::from(home_dir).join(".pyenv");
+            writable_roots.push(pyenv_dir);
+        }
+    }
+
+    writable_roots.push(cwd.to_path_buf());
+
+    writable_roots
+}
--- a/codex-rs/core/src/bash.rs
+++ b/codex-rs/core/src/bash.rs
@@ -0,0 +1,219 @@
+use tree_sitter::Parser;
+use tree_sitter::Tree;
+use tree_sitter_bash::LANGUAGE as BASH;
+
+/// Parse the provided bash source using tree-sitter-bash, returning a Tree on
+/// success or None if parsing failed.
+pub fn try_parse_bash(bash_lc_arg: &str) -> Option<Tree> {
+    let lang = BASH.into();
+    let mut parser = Parser::new();
+    #[expect(clippy::expect_used)]
+    parser.set_language(&lang).expect("load bash grammar");
+    let old_tree: Option<&Tree> = None;
+    parser.parse(bash_lc_arg, old_tree)
+}
+
+/// Parse a script which may contain multiple simple commands joined only by
+/// the safe logical/pipe/sequencing operators: `&&`, `||`, `;`, `|`.
+///
+/// Returns `Some(Vec<command_words>)` if every command is a plain word‑only
+/// command and the parse tree does not contain disallowed constructs
+/// (parentheses, redirections, substitutions, control flow, etc.). Otherwise
+/// returns `None`.
+pub fn try_parse_word_only_commands_sequence(tree: &Tree, src: &str) -> Option<Vec<Vec<String>>> {
+    if tree.root_node().has_error() {
+        return None;
+    }
+
+    // List of allowed (named) node kinds for a "word only commands sequence".
+    // If we encounter a named node that is not in this list we reject.
+    const ALLOWED_KINDS: &[&str] = &[
+        // top level containers
+        "program",
+        "list",
+        "pipeline",
+        // commands & words
+        "command",
+        "command_name",
+        "word",
+        "string",
+        "string_content",
+        "raw_string",
+        "number",
+    ];
+    // Allow only safe punctuation / operator tokens; anything else causes reject.
+    const ALLOWED_PUNCT_TOKENS: &[&str] = &["&&", "||", ";", "|", "\"", "'"];
+
+    let root = tree.root_node();
+    let mut cursor = root.walk();
+    let mut stack = vec![root];
+    let mut command_nodes = Vec::new();
+    while let Some(node) = stack.pop() {
+        let kind = node.kind();
+        if node.is_named() {
+            if !ALLOWED_KINDS.contains(&kind) {
+                return None;
+            }
+            if kind == "command" {
+                command_nodes.push(node);
+            }
+        } else {
+            // Reject any punctuation / operator tokens that are not explicitly allowed.
+            if kind.chars().any(|c| "&;|".contains(c)) && !ALLOWED_PUNCT_TOKENS.contains(&kind) {
+                return None;
+            }
+            if !(ALLOWED_PUNCT_TOKENS.contains(&kind) || kind.trim().is_empty()) {
+                // If it's a quote token or operator it's allowed above; we also allow whitespace tokens.
+                // Any other punctuation like parentheses, braces, redirects, backticks, etc are rejected.
+                return None;
+            }
+        }
+        for child in node.children(&mut cursor) {
+            stack.push(child);
+        }
+    }
+
+    let mut commands = Vec::new();
+    for node in command_nodes {
+        if let Some(words) = parse_plain_command_from_node(node, src) {
+            commands.push(words);
+        } else {
+            return None;
+        }
+    }
+    Some(commands)
+}
+
+fn parse_plain_command_from_node(cmd: tree_sitter::Node, src: &str) -> Option<Vec<String>> {
+    if cmd.kind() != "command" {
+        return None;
+    }
+    let mut words = Vec::new();
+    let mut cursor = cmd.walk();
+    for child in cmd.named_children(&mut cursor) {
+        match child.kind() {
+            "command_name" => {
+                let word_node = child.named_child(0)?;
+                if word_node.kind() != "word" {
+                    return None;
+                }
+                words.push(word_node.utf8_text(src.as_bytes()).ok()?.to_owned());
+            }
+            "word" | "number" => {
+                words.push(child.utf8_text(src.as_bytes()).ok()?.to_owned());
+            }
+            "string" => {
+                if child.child_count() == 3
+                    && child.child(0)?.kind() == "\""
+                    && child.child(1)?.kind() == "string_content"
+                    && child.child(2)?.kind() == "\""
+                {
+                    words.push(child.child(1)?.utf8_text(src.as_bytes()).ok()?.to_owned());
+                } else {
+                    return None;
+                }
+            }
+            "raw_string" => {
+                let raw_string = child.utf8_text(src.as_bytes()).ok()?;
+                let stripped = raw_string
+                    .strip_prefix('\'')
+                    .and_then(|s| s.strip_suffix('\''));
+                if let Some(s) = stripped {
+                    words.push(s.to_owned());
+                } else {
+                    return None;
+                }
+            }
+            _ => return None,
+        }
+    }
+    Some(words)
+}
+
+#[cfg(test)]
+mod tests {
+    #![allow(clippy::unwrap_used)]
+    use super::*;
+
+    fn parse_seq(src: &str) -> Option<Vec<Vec<String>>> {
+        let tree = try_parse_bash(src)?;
+        try_parse_word_only_commands_sequence(&tree, src)
+    }
+
+    #[test]
+    fn accepts_single_simple_command() {
+        let cmds = parse_seq("ls -1").unwrap();
+        assert_eq!(cmds, vec![vec!["ls".to_string(), "-1".to_string()]]);
+    }
+
+    #[test]
+    fn accepts_multiple_commands_with_allowed_operators() {
+        let src = "ls && pwd; echo 'hi there' | wc -l";
+        let cmds = parse_seq(src).unwrap();
+        let expected: Vec<Vec<String>> = vec![
+            vec!["wc".to_string(), "-l".to_string()],
+            vec!["echo".to_string(), "hi there".to_string()],
+            vec!["pwd".to_string()],
+            vec!["ls".to_string()],
+        ];
+        assert_eq!(cmds, expected);
+    }
+
+    #[test]
+    fn extracts_double_and_single_quoted_strings() {
+        let cmds = parse_seq("echo \"hello world\"").unwrap();
+        assert_eq!(
+            cmds,
+            vec![vec!["echo".to_string(), "hello world".to_string()]]
+        );
+
+        let cmds2 = parse_seq("echo 'hi there'").unwrap();
+        assert_eq!(
+            cmds2,
+            vec![vec!["echo".to_string(), "hi there".to_string()]]
+        );
+    }
+
+    #[test]
+    fn accepts_numbers_as_words() {
+        let cmds = parse_seq("echo 123 456").unwrap();
+        assert_eq!(
+            cmds,
+            vec![vec![
+                "echo".to_string(),
+                "123".to_string(),
+                "456".to_string()
+            ]]
+        );
+    }
+
+    #[test]
+    fn rejects_parentheses_and_subshells() {
+        assert!(parse_seq("(ls)").is_none());
+        assert!(parse_seq("ls || (pwd && echo hi)").is_none());
+    }
+
+    #[test]
+    fn rejects_redirections_and_unsupported_operators() {
+        assert!(parse_seq("ls > out.txt").is_none());
+        assert!(parse_seq("echo hi & echo bye").is_none());
+    }
+
+    #[test]
+    fn rejects_command_and_process_substitutions_and_expansions() {
+        assert!(parse_seq("echo $(pwd)").is_none());
+        assert!(parse_seq("echo `pwd`").is_none());
+        assert!(parse_seq("echo $HOME").is_none());
+        assert!(parse_seq("echo \"hi $USER\"").is_none());
+    }
+
+    #[test]
+    fn rejects_variable_assignment_prefix() {
+        assert!(parse_seq("FOO=bar ls").is_none());
+    }
+
+    #[test]
+    fn rejects_trailing_operator_parse_error() {
+        assert!(parse_seq("ls &&").is_none());
+    }
+}
--- a/codex-rs/core/src/chat_completions.rs
+++ b/codex-rs/core/src/chat_completions.rs
@@ -21,8 +21,6 @@ use crate::client_common::ResponseEvent;
 use crate::client_common::ResponseStream;
 use crate::error::CodexErr;
 use crate::error::Result;
-use crate::flags::OPENAI_REQUEST_MAX_RETRIES;
-use crate::flags::OPENAI_STREAM_IDLE_TIMEOUT_MS;
 use crate::models::ContentItem;
 use crate::models::ResponseItem;
 use crate::openai_tools::create_tools_json_for_chat_completions_api;
@@ -32,6 +30,7 @@ use crate::util::backoff;
 pub(crate) async fn stream_chat_completions(
    prompt: &Prompt,
    model: &str,
+    include_plan_tool: bool,
    client: &reqwest::Client,
    provider: &ModelProviderInfo,
 ) -> Result<ResponseStream> {
@@ -41,9 +40,13 @@ pub(crate) async fn stream_chat_completions(
    let full_instructions = prompt.get_full_instructions(model);
    messages.push(json!({"role": "system", "content": full_instructions}));

+    if let Some(instr) = &prompt.user_instructions {
+        messages.push(json!({"role": "user", "content": instr}));
+    }
+
    for item in &prompt.input {
        match item {
-            ResponseItem::Message { role, content } => {
+            ResponseItem::Message { role, content, .. } => {
                let mut text = String::new();
                for c in content {
                    match c {
@@ -60,6 +63,7 @@ pub(crate) async fn stream_chat_completions(
                name,
                arguments,
                call_id,
+                ..
            } => {
                messages.push(json!({
                    "role": "assistant",
@@ -106,7 +110,7 @@ pub(crate) async fn stream_chat_completions(
        }
    }

-    let tools_json = create_tools_json_for_chat_completions_api(prompt, model)?;
+    let tools_json = create_tools_json_for_chat_completions_api(prompt, model, include_plan_tool)?;
    let payload = json!({
        "model": model,
        "messages": messages,
@@ -121,6 +125,7 @@ pub(crate) async fn stream_chat_completions(
    );

    let mut attempt = 0;
+    let max_retries = provider.request_max_retries();
    loop {
        attempt += 1;

@@ -134,9 +139,13 @@ pub(crate) async fn stream_chat_completions(

        match res {
            Ok(resp) if resp.status().is_success() => {
-                let (tx_event, rx_event) = mpsc::channel::<Result<ResponseEvent>>(16);
+                let (tx_event, rx_event) = mpsc::channel::<Result<ResponseEvent>>(1600);
                let stream = resp.bytes_stream().map_err(CodexErr::Reqwest);
-                tokio::spawn(process_chat_sse(stream, tx_event));
+                tokio::spawn(process_chat_sse(
+                    stream,
+                    tx_event,
+                    provider.stream_idle_timeout(),
+                ));
                return Ok(ResponseStream { rx_event });
            }
            Ok(res) => {
@@ -146,7 +155,7 @@ pub(crate) async fn stream_chat_completions(
                    return Err(CodexErr::UnexpectedStatus(status, body));
                }

-                if attempt > *OPENAI_REQUEST_MAX_RETRIES {
+                if attempt > max_retries {
                    return Err(CodexErr::RetryLimit(status));
                }

@@ -162,7 +171,7 @@ pub(crate) async fn stream_chat_completions(
                tokio::time::sleep(delay).await;
            }
            Err(e) => {
-                if attempt > *OPENAI_REQUEST_MAX_RETRIES {
+                if attempt > max_retries {
                    return Err(e.into());
                }
                let delay = backoff(attempt);
@@ -175,14 +184,15 @@ pub(crate) async fn stream_chat_completions(
 /// Lightweight SSE processor for the Chat Completions streaming format. The
 /// output is mapped onto Codex's internal [`ResponseEvent`] so that the rest
 /// of the pipeline can stay agnostic of the underlying wire format.
-async fn process_chat_sse<S>(stream: S, tx_event: mpsc::Sender<Result<ResponseEvent>>)
-where
+async fn process_chat_sse<S>(
+    stream: S,
+    tx_event: mpsc::Sender<Result<ResponseEvent>>,
+    idle_timeout: Duration,
+) where
    S: Stream<Item = Result<Bytes>> + Unpin,
 {
    let mut stream = stream.eventsource();

-    let idle_timeout = *OPENAI_STREAM_IDLE_TIMEOUT_MS;
-
    // State to accumulate a function call across streaming chunks.
    // OpenAI may split the `arguments` string over multiple `delta` events
    // until the chunk whose `finish_reason` is `tool_calls` is emitted. We
@@ -255,6 +265,7 @@ where
                    content: vec![ContentItem::OutputText {
                        text: content.to_string(),
                    }],
+                    id: None,
                };

                let _ = tx_event.send(Ok(ResponseEvent::OutputItemDone(item))).await;
@@ -296,6 +307,7 @@ where
                    "tool_calls" if fn_call_state.active => {
                        // Build the FunctionCall response item.
                        let item = ResponseItem::FunctionCall {
+                            id: None,
                            name: fn_call_state.name.clone().unwrap_or_else(|| "".to_string()),
                            arguments: fn_call_state.arguments.clone(),
                            call_id: fn_call_state.call_id.clone().unwrap_or_else(String::new),
@@ -398,6 +410,7 @@ where
                }))) => {
                    if !this.cumulative.is_empty() {
                        let aggregated_item = crate::models::ResponseItem::Message {
+                            id: None,
                            role: "assistant".to_string(),
                            content: vec![crate::models::ContentItem::OutputText {
                                text: std::mem::take(&mut this.cumulative),
@@ -426,6 +439,12 @@ where
                    // will never appear in a Chat Completions stream.
                    continue;
                }
+                Poll::Ready(Some(Ok(ResponseEvent::OutputTextDelta(_))))
+                | Poll::Ready(Some(Ok(ResponseEvent::ReasoningSummaryDelta(_)))) => {
+                    // Deltas are ignored here since aggregation waits for the
+                    // final OutputItemDone.
+                    continue;
+                }
            }
        }
    }
--- a/codex-rs/core/src/client.rs
+++ b/codex-rs/core/src/client.rs
@@ -3,6 +3,8 @@ use std::path::Path;
 use std::time::Duration;

 use bytes::Bytes;
+use codex_login::AuthMode;
+use codex_login::CodexAuth;
 use eventsource_stream::Eventsource;
 use futures::prelude::*;
 use reqwest::StatusCode;
@@ -15,6 +17,7 @@ use tokio_util::io::ReaderStream;
 use tracing::debug;
 use tracing::trace;
 use tracing::warn;
+use uuid::Uuid;

 use crate::chat_completions::AggregateStreamExt;
 use crate::chat_completions::stream_chat_completions;
@@ -27,12 +30,12 @@ use crate::config::Config;
 use crate::config_types::ReasoningEffort as ReasoningEffortConfig;
 use crate::config_types::ReasoningSummary as ReasoningSummaryConfig;
 use crate::error::CodexErr;
+use crate::error::EnvVarError;
 use crate::error::Result;
 use crate::flags::CODEX_RS_SSE_FIXTURE;
-use crate::flags::OPENAI_REQUEST_MAX_RETRIES;
-use crate::flags::OPENAI_STREAM_IDLE_TIMEOUT_MS;
 use crate::model_provider_info::ModelProviderInfo;
 use crate::model_provider_info::WireApi;
+use crate::models::ContentItem;
 use crate::models::ResponseItem;
 use crate::openai_tools::create_tools_json_for_responses_api;
 use crate::protocol::TokenUsage;
@@ -42,8 +45,10 @@ use std::sync::Arc;
 #[derive(Clone)]
 pub struct ModelClient {
    config: Arc<Config>,
+    auth: Option<CodexAuth>,
    client: reqwest::Client,
    provider: ModelProviderInfo,
+    session_id: Uuid,
    effort: ReasoningEffortConfig,
    summary: ReasoningSummaryConfig,
 }
@@ -51,14 +56,18 @@ pub struct ModelClient {
 impl ModelClient {
    pub fn new(
        config: Arc<Config>,
+        auth: Option<CodexAuth>,
        provider: ModelProviderInfo,
        effort: ReasoningEffortConfig,
        summary: ReasoningSummaryConfig,
+        session_id: Uuid,
    ) -> Self {
        Self {
            config,
+            auth,
            client: reqwest::Client::new(),
            provider,
+            session_id,
            effort,
            summary,
        }
@@ -75,6 +84,7 @@ impl ModelClient {
                let response_stream = stream_chat_completions(
                    prompt,
                    &self.config.model,
+                    self.config.include_plan_tool,
                    &self.client,
                    &self.provider,
                )
@@ -109,23 +119,65 @@ impl ModelClient {
        if let Some(path) = &*CODEX_RS_SSE_FIXTURE {
            // short circuit for tests
            warn!(path, "Streaming from fixture");
-            return stream_from_fixture(path).await;
+            return stream_from_fixture(path, self.provider.clone()).await;
        }

+        let auth = self.auth.as_ref().ok_or_else(|| {
+            CodexErr::EnvVar(EnvVarError {
+                var: "OPENAI_API_KEY".to_string(),
+                instructions: Some("Create an API key (https://platform.openai.com) and export it as an environment variable.".to_string()),
+            })
+        })?;
+
+        let store = prompt.store && auth.mode != AuthMode::ChatGPT;
+
+        let base_url = match self.provider.base_url.clone() {
+            Some(url) => url,
+            None => match auth.mode {
+                AuthMode::ChatGPT => "https://chatgpt.com/backend-api/codex".to_string(),
+                AuthMode::ApiKey => "https://api.openai.com/v1".to_string(),
+            },
+        };
+
+        let token = auth.get_token().await?;
+
        let full_instructions = prompt.get_full_instructions(&self.config.model);
-        let tools_json = create_tools_json_for_responses_api(prompt, &self.config.model)?;
+        let tools_json = create_tools_json_for_responses_api(
+            prompt,
+            &self.config.model,
+            self.config.include_plan_tool,
+        )?;
        let reasoning = create_reasoning_param_for_request(&self.config, self.effort, self.summary);
+
+        // Request encrypted COT if we are not storing responses,
+        // otherwise reasoning items will be referenced by ID
+        let include: Vec<String> = if !store && reasoning.is_some() {
+            vec!["reasoning.encrypted_content".to_string()]
+        } else {
+            vec![]
+        };
+
+        let mut input_with_instructions = Vec::with_capacity(prompt.input.len() + 1);
+        if let Some(ui) = &prompt.user_instructions {
+            input_with_instructions.push(ResponseItem::Message {
+                id: None,
+                role: "user".to_string(),
+                content: vec![ContentItem::InputText { text: ui.clone() }],
+            });
+        }
+        input_with_instructions.extend(prompt.input.clone());
+
        let payload = ResponsesApiRequest {
            model: &self.config.model,
            instructions: &full_instructions,
-            input: &prompt.input,
+            input: &input_with_instructions,
            tools: &tools_json,
            tool_choice: "auto",
            parallel_tool_calls: false,
            reasoning,
-            previous_response_id: prompt.prev_id.clone(),
-            store: prompt.store,
+            store,
            stream: true,
+            include,
        };

        trace!(
@@ -135,24 +187,58 @@ impl ModelClient {
        );

        let mut attempt = 0;
+        let max_retries = self.provider.request_max_retries();
+
        loop {
            attempt += 1;

-            let req_builder = self
-                .provider
-                .create_request_builder(&self.client)?
+            let mut req_builder = self
+                .client
+                .post(format!("{base_url}/responses"))
                .header("OpenAI-Beta", "responses=experimental")
+                .header("session_id", self.session_id.to_string())
+                .bearer_auth(&token)
                .header(reqwest::header::ACCEPT, "text/event-stream")
                .json(&payload);

+            if auth.mode == AuthMode::ChatGPT {
+                if let Some(account_id) = auth.get_account_id().await {
+                    req_builder = req_builder.header("chatgpt-account-id", account_id);
+                }
+            }
+
+            req_builder = self.provider.apply_http_headers(req_builder);
+
+            let originator = self
+                .config
+                .internal_originator
+                .as_deref()
+                .unwrap_or("codex_cli_rs");
+            req_builder = req_builder.header("originator", originator);
+
            let res = req_builder.send().await;
+            if let Ok(resp) = &res {
+                trace!(
+                    "Response status: {}, request-id: {}",
+                    resp.status(),
+                    resp.headers()
+                        .get("x-request-id")
+                        .map(|v| v.to_str().unwrap_or_default())
+                        .unwrap_or_default()
+                );
+            }
+
            match res {
                Ok(resp) if resp.status().is_success() => {
-                    let (tx_event, rx_event) = mpsc::channel::<Result<ResponseEvent>>(16);
+                    let (tx_event, rx_event) = mpsc::channel::<Result<ResponseEvent>>(1600);

                    // spawn task to process SSE
                    let stream = resp.bytes_stream().map_err(CodexErr::Reqwest);
-                    tokio::spawn(process_sse(stream, tx_event));
+                    tokio::spawn(process_sse(
+                        stream,
+                        tx_event,
+                        self.provider.stream_idle_timeout(),
+                    ));

                    return Ok(ResponseStream { rx_event });
                }
@@ -171,7 +257,7 @@ impl ModelClient {
                        return Err(CodexErr::UnexpectedStatus(status, body));
                    }

-                    if attempt > *OPENAI_REQUEST_MAX_RETRIES {
+                    if attempt > max_retries {
                        return Err(CodexErr::RetryLimit(status));
                    }

@@ -188,7 +274,7 @@ impl ModelClient {
                    tokio::time::sleep(delay).await;
                }
                Err(e) => {
-                    if attempt > *OPENAI_REQUEST_MAX_RETRIES {
+                    if attempt > max_retries {
                        return Err(e.into());
                    }
                    let delay = backoff(attempt);
@@ -197,6 +283,10 @@ impl ModelClient {
            }
        }
    }
+
+    pub fn get_provider(&self) -> ModelProviderInfo {
+        self.provider.clone()
+    }
 }

 #[derive(Debug, Deserialize, Serialize)]
@@ -205,6 +295,7 @@ struct SseEvent {
    kind: String,
    response: Option<Value>,
    item: Option<Value>,
+    delta: Option<String>,
 }

 #[derive(Debug, Deserialize)]
@@ -247,14 +338,16 @@ struct ResponseCompletedOutputTokensDetails {
    reasoning_tokens: u64,
 }

-async fn process_sse<S>(stream: S, tx_event: mpsc::Sender<Result<ResponseEvent>>)
-where
+async fn process_sse<S>(
+    stream: S,
+    tx_event: mpsc::Sender<Result<ResponseEvent>>,
+    idle_timeout: Duration,
+) where
    S: Stream<Item = Result<Bytes>> + Unpin,
 {
    let mut stream = stream.eventsource();

    // If the stream stays completely silent for an extended period treat it as disconnected.
-    let idle_timeout = *OPENAI_STREAM_IDLE_TIMEOUT_MS;
    // The response id returned from the "complete" message.
    let mut response_completed: Option<ResponseCompleted> = None;

@@ -315,7 +408,7 @@ where
            // duplicated `output` array embedded in the `response.completed`
            // payload.  That produced two concrete issues:
            //   1. No real‑time streaming – the user only saw output after the
-            //      entire turn had finished, which broke the “typing” UX and
+            //      entire turn had finished, which broke the "typing" UX and
            //      made long‑running turns look stalled.
            //   2. Duplicate `function_call_output` items – both the
            //      individual *and* the completed array were forwarded, which
@@ -337,11 +430,40 @@ where
                    return;
                }
            }
+            "response.output_text.delta" => {
+                if let Some(delta) = event.delta {
+                    let event = ResponseEvent::OutputTextDelta(delta);
+                    if tx_event.send(Ok(event)).await.is_err() {
+                        return;
+                    }
+                }
+            }
+            "response.reasoning_summary_text.delta" => {
+                if let Some(delta) = event.delta {
+                    let event = ResponseEvent::ReasoningSummaryDelta(delta);
+                    if tx_event.send(Ok(event)).await.is_err() {
+                        return;
+                    }
+                }
+            }
            "response.created" => {
                if event.response.is_some() {
                    let _ = tx_event.send(Ok(ResponseEvent::Created {})).await;
                }
            }
+            "response.failed" => {
+                if let Some(resp_val) = event.response {
+                    let error = resp_val
+                        .get("error")
+                        .and_then(|v| v.get("message"))
+                        .and_then(|v| v.as_str())
+                        .unwrap_or("response.failed event received");
+
+                    let _ = tx_event
+                        .send(Err(CodexErr::Stream(error.to_string())))
+                        .await;
+                }
+            }
            // Final response completed – includes array of output items & id
            "response.completed" => {
                if let Some(resp_val) = event.response {
@@ -360,10 +482,8 @@ where
            | "response.function_call_arguments.delta"
            | "response.in_progress"
            | "response.output_item.added"
-            | "response.output_text.delta"
            | "response.output_text.done"
            | "response.reasoning_summary_part.added"
-            | "response.reasoning_summary_text.delta"
            | "response.reasoning_summary_text.done" => {
                // Currently, we ignore these events, but we handle them
                // separately to skip the logging message in the `other` case.
@@ -374,8 +494,11 @@ where
 }

 /// used in tests to stream from a text SSE file
-async fn stream_from_fixture(path: impl AsRef<Path>) -> Result<ResponseStream> {
-    let (tx_event, rx_event) = mpsc::channel::<Result<ResponseEvent>>(16);
+async fn stream_from_fixture(
+    path: impl AsRef<Path>,
+    provider: ModelProviderInfo,
+) -> Result<ResponseStream> {
+    let (tx_event, rx_event) = mpsc::channel::<Result<ResponseEvent>>(1600);
    let f = std::fs::File::open(path.as_ref())?;
    let lines = std::io::BufReader::new(f).lines();

@@ -388,7 +511,11 @@ async fn stream_from_fixture(path: impl AsRef<Path>) -> Result<ResponseStream> {

    let rdr = std::io::Cursor::new(content);
    let stream = ReaderStream::new(rdr).map_err(CodexErr::Io);
-    tokio::spawn(process_sse(stream, tx_event));
+    tokio::spawn(process_sse(
+        stream,
+        tx_event,
+        provider.stream_idle_timeout(),
+    ));
    Ok(ResponseStream { rx_event })
 }

@@ -408,7 +535,10 @@ mod tests {

    /// Runs the SSE parser on pre-chunked byte slices and returns every event
    /// (including any final `Err` from a stream-closure check).
-    async fn collect_events(chunks: &[&[u8]]) -> Vec<Result<ResponseEvent>> {
+    async fn collect_events(
+        chunks: &[&[u8]],
+        provider: ModelProviderInfo,
+    ) -> Vec<Result<ResponseEvent>> {
        let mut builder = IoBuilder::new();
        for chunk in chunks {
            builder.read(chunk);
@@ -417,7 +547,7 @@ mod tests {
        let reader = builder.build();
        let stream = ReaderStream::new(reader).map_err(CodexErr::Io);
        let (tx, mut rx) = mpsc::channel::<Result<ResponseEvent>>(16);
-        tokio::spawn(process_sse(stream, tx));
+        tokio::spawn(process_sse(stream, tx, provider.stream_idle_timeout()));

        let mut events = Vec::new();
        while let Some(ev) = rx.recv().await {
@@ -428,7 +558,10 @@ mod tests {

    /// Builds an in-memory SSE stream from JSON fixtures and returns only the
    /// successfully parsed events (panics on internal channel errors).
-    async fn run_sse(events: Vec<serde_json::Value>) -> Vec<ResponseEvent> {
+    async fn run_sse(
+        events: Vec<serde_json::Value>,
+        provider: ModelProviderInfo,
+    ) -> Vec<ResponseEvent> {
        let mut body = String::new();
        for e in events {
            let kind = e
@@ -444,7 +577,7 @@ mod tests {

        let (tx, mut rx) = mpsc::channel::<Result<ResponseEvent>>(8);
        let stream = ReaderStream::new(std::io::Cursor::new(body)).map_err(CodexErr::Io);
-        tokio::spawn(process_sse(stream, tx));
+        tokio::spawn(process_sse(stream, tx, provider.stream_idle_timeout()));

        let mut out = Vec::new();
        while let Some(ev) = rx.recv().await {
@@ -489,7 +622,26 @@ mod tests {
        let sse2 = format!("event: response.output_item.done\ndata: {item2}\n\n");
        let sse3 = format!("event: response.completed\ndata: {completed}\n\n");

-        let events = collect_events(&[sse1.as_bytes(), sse2.as_bytes(), sse3.as_bytes()]).await;
+        let provider = ModelProviderInfo {
+            name: "test".to_string(),
+            base_url: Some("https://test.com".to_string()),
+            env_key: Some("TEST_API_KEY".to_string()),
+            env_key_instructions: None,
+            wire_api: WireApi::Responses,
+            query_params: None,
+            http_headers: None,
+            env_http_headers: None,
+            request_max_retries: Some(0),
+            stream_max_retries: Some(0),
+            stream_idle_timeout_ms: Some(1000),
+            requires_auth: false,
+        };
+
+        let events = collect_events(
+            &[sse1.as_bytes(), sse2.as_bytes(), sse3.as_bytes()],
+            provider,
+        )
+        .await;

        assert_eq!(events.len(), 3);

@@ -530,8 +682,22 @@ mod tests {
        .to_string();

        let sse1 = format!("event: response.output_item.done\ndata: {item1}\n\n");
+        let provider = ModelProviderInfo {
+            name: "test".to_string(),
+            base_url: Some("https://test.com".to_string()),
+            env_key: Some("TEST_API_KEY".to_string()),
+            env_key_instructions: None,
+            wire_api: WireApi::Responses,
+            query_params: None,
+            http_headers: None,
+            env_http_headers: None,
+            request_max_retries: Some(0),
+            stream_max_retries: Some(0),
+            stream_idle_timeout_ms: Some(1000),
+            requires_auth: false,
+        };

-        let events = collect_events(&[sse1.as_bytes()]).await;
+        let events = collect_events(&[sse1.as_bytes()], provider).await;

        assert_eq!(events.len(), 2);

@@ -619,7 +785,22 @@ mod tests {
            let mut evs = vec![case.event];
            evs.push(completed.clone());

-            let out = run_sse(evs).await;
+            let provider = ModelProviderInfo {
+                name: "test".to_string(),
+                base_url: Some("https://test.com".to_string()),
+                env_key: Some("TEST_API_KEY".to_string()),
+                env_key_instructions: None,
+                wire_api: WireApi::Responses,
+                query_params: None,
+                http_headers: None,
+                env_http_headers: None,
+                request_max_retries: Some(0),
+                stream_max_retries: Some(0),
+                stream_idle_timeout_ms: Some(1000),
+                requires_auth: false,
+            };
+
+            let out = run_sse(evs, provider).await;
            assert_eq!(out.len(), case.expected_len, "case {}", case.name);
            assert!(
                (case.expect_first)(&out[0]),
--- a/codex-rs/core/src/client_common.rs
+++ b/codex-rs/core/src/client_common.rs
@@ -22,8 +22,6 @@ const BASE_INSTRUCTIONS: &str = include_str!("../prompt.md");
 pub struct Prompt {
    /// Conversation context input items.
    pub input: Vec<ResponseItem>,
-    /// Optional previous response ID (when storage is enabled).
-    pub prev_id: Option<String>,
    /// Optional instructions from the user to amend to the built-in agent
    /// instructions.
    pub user_instructions: Option<String>,
@@ -34,14 +32,18 @@ pub struct Prompt {
    /// the "fully qualified" tool name (i.e., prefixed with the server name),
    /// which should be reported to the model in place of Tool::name.
    pub extra_tools: HashMap<String, mcp_types::Tool>,
+
+    /// Optional override for the built-in BASE_INSTRUCTIONS.
+    pub base_instructions_override: Option<String>,
 }

 impl Prompt {
    pub(crate) fn get_full_instructions(&self, model: &str) -> Cow<'_, str> {
-        let mut sections: Vec<&str> = vec![BASE_INSTRUCTIONS];
-        if let Some(ref user) = self.user_instructions {
-            sections.push(user);
-        }
+        let base = self
+            .base_instructions_override
+            .as_deref()
+            .unwrap_or(BASE_INSTRUCTIONS);
+        let mut sections: Vec<&str> = vec![base];
        if model.starts_with("gpt-4.1") {
            sections.push(APPLY_PATCH_TOOL_INSTRUCTIONS);
        }
@@ -57,6 +59,8 @@ pub enum ResponseEvent {
        response_id: String,
        token_usage: Option<TokenUsage>,
    },
+    OutputTextDelta(String),
+    ReasoningSummaryDelta(String),
 }

 #[derive(Debug, Serialize)]
@@ -124,11 +128,10 @@ pub(crate) struct ResponsesApiRequest<'a> {
    pub(crate) tool_choice: &'static str,
    pub(crate) parallel_tool_calls: bool,
    pub(crate) reasoning: Option<Reasoning>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub(crate) previous_response_id: Option<String>,
    /// true when using the Responses API.
    pub(crate) store: bool,
    pub(crate) stream: bool,
+    pub(crate) include: Vec<String>,
 }

 use crate::config::Config;
@@ -182,3 +185,19 @@ impl Stream for ResponseStream {
        self.rx_event.poll_recv(cx)
    }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn get_full_instructions_no_user_content() {
+        let prompt = Prompt {
+            user_instructions: Some("custom instruction".to_string()),
+            ..Default::default()
+        };
+        let expected = format!("{BASE_INSTRUCTIONS}\n{APPLY_PATCH_TOOL_INSTRUCTIONS}");
+        let full = prompt.get_full_instructions("gpt-4.1");
+        assert_eq!(full, expected);
+    }
+}
--- a/codex-rs/core/src/codex.rs
+++ b/codex-rs/core/src/codex.rs
--- a/codex-rs/core/src/codex_wrapper.rs
+++ b/codex-rs/core/src/codex_wrapper.rs
@@ -1,20 +1,37 @@
 use std::sync::Arc;

 use crate::Codex;
+use crate::CodexSpawnOk;
 use crate::config::Config;
 use crate::protocol::Event;
 use crate::protocol::EventMsg;
 use crate::util::notify_on_sigint;
+use codex_login::load_auth;
 use tokio::sync::Notify;
+use uuid::Uuid;
+
+/// Represents an active Codex conversation, including the first event
+/// (which is [`EventMsg::SessionConfigured`]).
+pub struct CodexConversation {
+    pub codex: Codex,
+    pub session_id: Uuid,
+    pub session_configured: Event,
+    pub ctrl_c: Arc<Notify>,
+}

 /// Spawn a new [`Codex`] and initialize the session.
 ///
 /// Returns the wrapped [`Codex`] **and** the `SessionInitialized` event that
 /// is received as a response to the initial `ConfigureSession` submission so
 /// that callers can surface the information to the UI.
-pub async fn init_codex(config: Config) -> anyhow::Result<(Codex, Event, Arc<Notify>)> {
+pub async fn init_codex(config: Config) -> anyhow::Result<CodexConversation> {
    let ctrl_c = notify_on_sigint();
-    let (codex, init_id) = Codex::spawn(config, ctrl_c.clone()).await?;
+    let auth = load_auth(&config.codex_home, true)?;
+    let CodexSpawnOk {
+        codex,
+        init_id,
+        session_id,
+    } = Codex::spawn(config, auth, ctrl_c.clone()).await?;

    // The first event must be `SessionInitialized`. Validate and forward it to
    // the caller so that they can display it in the conversation history.
@@ -33,5 +50,10 @@ pub async fn init_codex(config: Config) -> anyhow::Result<(Codex, Event, Arc<Not
        ));
    }

-    Ok((codex, event, ctrl_c))
+    Ok(CodexConversation {
+        codex,
+        session_id,
+        session_configured: event,
+        ctrl_c,
+    })
 }
--- a/codex-rs/core/src/config.rs
+++ b/codex-rs/core/src/config.rs
@@ -63,7 +63,10 @@ pub struct Config {
    pub disable_response_storage: bool,

    /// User-provided instructions from instructions.md.
-    pub instructions: Option<String>,
+    pub user_instructions: Option<String>,
+
+    /// Base instructions override.
+    pub base_instructions: Option<String>,

    /// Optional external notifier command. When set, Codex will spawn this
    /// program after each completed *turn* (i.e. when the agent finishes
@@ -137,6 +140,15 @@ pub struct Config {

    /// Base URL for requests to ChatGPT (as opposed to the OpenAI API).
    pub chatgpt_base_url: String,
+
+    /// Experimental rollout resume path (absolute path to .jsonl; undocumented).
+    pub experimental_resume: Option<PathBuf>,
+
+    /// Include an experimental plan tool that the model can use to update its current plan and status of each step.
+    pub include_plan_tool: bool,
+
+    /// The value for the `originator` header included with Responses API requests.
+    pub internal_originator: Option<String>,
 }

 impl Config {
@@ -321,6 +333,15 @@ pub struct ConfigToml {

    /// Base URL for requests to ChatGPT (as opposed to the OpenAI API).
    pub chatgpt_base_url: Option<String>,
+
+    /// Experimental rollout resume path (absolute path to .jsonl; undocumented).
+    pub experimental_resume: Option<PathBuf>,
+
+    /// Experimental path to a file whose contents replace the built-in BASE_INSTRUCTIONS.
+    pub experimental_instructions_file: Option<PathBuf>,
+
+    /// The value for the `originator` header included with Responses API requests.
+    pub internal_originator: Option<String>,
 }

 impl ConfigToml {
@@ -335,6 +356,7 @@ impl ConfigToml {
                Some(s) => SandboxPolicy::WorkspaceWrite {
                    writable_roots: s.writable_roots.clone(),
                    network_access: s.network_access,
+                    include_default_writable_roots: true,
                },
                None => SandboxPolicy::new_workspace_write_policy(),
            },
@@ -353,6 +375,8 @@ pub struct ConfigOverrides {
    pub model_provider: Option<String>,
    pub config_profile: Option<String>,
    pub codex_linux_sandbox_exe: Option<PathBuf>,
+    pub base_instructions: Option<String>,
+    pub include_plan_tool: Option<bool>,
 }

 impl Config {
@@ -363,7 +387,7 @@ impl Config {
        overrides: ConfigOverrides,
        codex_home: PathBuf,
    ) -> std::io::Result<Self> {
-        let instructions = Self::load_instructions(Some(&codex_home));
+        let user_instructions = Self::load_instructions(Some(&codex_home));

        // Destructure ConfigOverrides fully to ensure all overrides are applied.
        let ConfigOverrides {
@@ -374,6 +398,8 @@ impl Config {
            model_provider,
            config_profile: config_profile_key,
            codex_linux_sandbox_exe,
+            base_instructions,
+            include_plan_tool,
        } = overrides;

        let config_profile = match config_profile_key.as_ref().or(cfg.profile.as_ref()) {
@@ -448,6 +474,18 @@ impl Config {
                .as_ref()
                .map(|info| info.max_output_tokens)
        });
+
+        let experimental_resume = cfg.experimental_resume;
+
+        // Load base instructions override from a file if specified. If the
+        // path is relative, resolve it against the effective cwd so the
+        // behaviour matches other path-like config values.
+        let file_base_instructions = Self::get_base_instructions(
+            cfg.experimental_instructions_file.as_ref(),
+            &resolved_cwd,
+        )?;
+        let base_instructions = base_instructions.or(file_base_instructions);
+
        let config = Self {
            model,
            model_context_window,
@@ -466,7 +504,8 @@ impl Config {
                .or(cfg.disable_response_storage)
                .unwrap_or(false),
            notify: cfg.notify,
-            instructions,
+            user_instructions,
+            base_instructions,
            mcp_servers: cfg.mcp_servers,
            model_providers,
            project_doc_max_bytes: cfg.project_doc_max_bytes.unwrap_or(PROJECT_DOC_MAX_BYTES),
@@ -494,6 +533,10 @@ impl Config {
                .chatgpt_base_url
                .or(cfg.chatgpt_base_url)
                .unwrap_or("https://chatgpt.com/backend-api/".to_string()),
+
+            experimental_resume,
+            include_plan_tool: include_plan_tool.unwrap_or(false),
+            internal_originator: cfg.internal_originator,
        };
        Ok(config)
    }
@@ -514,6 +557,48 @@ impl Config {
            }
        })
    }
+
+    fn get_base_instructions(
+        path: Option<&PathBuf>,
+        cwd: &Path,
+    ) -> std::io::Result<Option<String>> {
+        let p = match path.as_ref() {
+            None => return Ok(None),
+            Some(p) => p,
+        };
+
+        // Resolve relative paths against the provided cwd to make CLI
+        // overrides consistent regardless of where the process was launched
+        // from.
+        let full_path = if p.is_relative() {
+            cwd.join(p)
+        } else {
+            p.to_path_buf()
+        };
+
+        let contents = std::fs::read_to_string(&full_path).map_err(|e| {
+            std::io::Error::new(
+                e.kind(),
+                format!(
+                    "failed to read experimental instructions file {}: {e}",
+                    full_path.display()
+                ),
+            )
+        })?;
+
+        let s = contents.trim().to_string();
+        if s.is_empty() {
+            Err(std::io::Error::new(
+                std::io::ErrorKind::InvalidData,
+                format!(
+                    "experimental instructions file is empty: {}",
+                    full_path.display()
+                ),
+            ))
+        } else {
+            Ok(Some(s))
+        }
+    }
 }

 fn default_model() -> String {
@@ -528,7 +613,7 @@ fn default_model() -> String {
 ///   function will Err if the path does not exist.
 /// - If `CODEX_HOME` is not set, this function does not verify that the
 ///   directory exists.
-fn find_codex_home() -> std::io::Result<PathBuf> {
+pub fn find_codex_home() -> std::io::Result<PathBuf> {
    // Honor the `CODEX_HOME` environment variable when it is set to allow users
    // (and tests) to override the default location.
    if let Ok(val) = std::env::var("CODEX_HOME") {
@@ -643,6 +728,7 @@ writable_roots = [
            SandboxPolicy::WorkspaceWrite {
                writable_roots: vec![PathBuf::from("/tmp")],
                network_access: false,
+                include_default_writable_roots: true,
            },
            sandbox_workspace_write_cfg.derive_sandbox_policy(sandbox_mode_override)
        );
@@ -682,6 +768,9 @@ name = "OpenAI using Chat Completions"
 base_url = "https://api.openai.com/v1"
 env_key = "OPENAI_API_KEY"
 wire_api = "chat"
+request_max_retries = 4            # retry failed HTTP requests
+stream_max_retries = 10            # retry dropped SSE streams
+stream_idle_timeout_ms = 300000    # 5m idle timeout

 [profiles.o3]
 model = "o3"
@@ -715,14 +804,17 @@ disable_response_storage = true

        let openai_chat_completions_provider = ModelProviderInfo {
            name: "OpenAI using Chat Completions".to_string(),
-            base_url: "https://api.openai.com/v1".to_string(),
+            base_url: Some("https://api.openai.com/v1".to_string()),
            env_key: Some("OPENAI_API_KEY".to_string()),
            wire_api: crate::WireApi::Chat,
            env_key_instructions: None,
            query_params: None,
            http_headers: None,
            env_http_headers: None,
-            supports_temperature: true,
+            request_max_retries: Some(4),
+            stream_max_retries: Some(10),
+            stream_idle_timeout_ms: Some(300_000),
+            requires_auth: false,
        };
        let model_provider_map = {
            let mut model_provider_map = built_in_model_providers();
@@ -753,7 +845,7 @@ disable_response_storage = true
    ///
    /// 1. custom command-line argument, e.g. `--model o3`
    /// 2. as part of a profile, where the `--profile` is specified via a CLI
-    ///    (or in the config file itelf)
+    ///    (or in the config file itself)
    /// 3. as an entry in `config.toml`, e.g. `model = "o3"`
    /// 4. the default value for a required field defined in code, e.g.,
    ///    `crate::flags::OPENAI_DEFAULT_MODEL`
@@ -785,7 +877,7 @@ disable_response_storage = true
                sandbox_policy: SandboxPolicy::new_read_only_policy(),
                shell_environment_policy: ShellEnvironmentPolicy::default(),
                disable_response_storage: false,
-                instructions: None,
+                user_instructions: None,
                notify: None,
                cwd: fixture.cwd(),
                mcp_servers: HashMap::new(),
@@ -801,6 +893,10 @@ disable_response_storage = true
                model_reasoning_summary: ReasoningSummary::Detailed,
                model_supports_reasoning_summaries: false,
                chatgpt_base_url: "https://chatgpt.com/backend-api/".to_string(),
+                experimental_resume: None,
+                base_instructions: None,
+                include_plan_tool: false,
+                internal_originator: None,
            },
            o3_profile_config
        );
@@ -831,7 +927,7 @@ disable_response_storage = true
            sandbox_policy: SandboxPolicy::new_read_only_policy(),
            shell_environment_policy: ShellEnvironmentPolicy::default(),
            disable_response_storage: false,
-            instructions: None,
+            user_instructions: None,
            notify: None,
            cwd: fixture.cwd(),
            mcp_servers: HashMap::new(),
@@ -847,6 +943,10 @@ disable_response_storage = true
            model_reasoning_summary: ReasoningSummary::default(),
            model_supports_reasoning_summaries: false,
            chatgpt_base_url: "https://chatgpt.com/backend-api/".to_string(),
+            experimental_resume: None,
+            base_instructions: None,
+            include_plan_tool: false,
+            internal_originator: None,
        };

        assert_eq!(expected_gpt3_profile_config, gpt3_profile_config);
@@ -892,7 +992,7 @@ disable_response_storage = true
            sandbox_policy: SandboxPolicy::new_read_only_policy(),
            shell_environment_policy: ShellEnvironmentPolicy::default(),
            disable_response_storage: true,
-            instructions: None,
+            user_instructions: None,
            notify: None,
            cwd: fixture.cwd(),
            mcp_servers: HashMap::new(),
@@ -908,6 +1008,10 @@ disable_response_storage = true
            model_reasoning_summary: ReasoningSummary::default(),
            model_supports_reasoning_summaries: false,
            chatgpt_base_url: "https://chatgpt.com/backend-api/".to_string(),
+            experimental_resume: None,
+            base_instructions: None,
+            include_plan_tool: false,
+            internal_originator: None,
        };

        assert_eq!(expected_zdr_profile_config, zdr_profile_config);
--- a/codex-rs/core/src/config_types.rs
+++ b/codex-rs/core/src/config_types.rs
@@ -76,22 +76,9 @@ pub enum HistoryPersistence {

 /// Collection of settings that are specific to the TUI.
 #[derive(Deserialize, Debug, Clone, PartialEq, Default)]
-pub struct Tui {
-    /// By default, mouse capture is enabled in the TUI so that it is possible
-    /// to scroll the conversation history with a mouse. This comes at the cost
-    /// of not being able to use the mouse to select text in the TUI.
-    /// (Most terminals support a modifier key to allow this. For example,
-    /// text selection works in iTerm if you hold down the `Option` key while
-    /// clicking and dragging.)
-    ///
-    /// Setting this option to `true` disables mouse capture, so scrolling with
-    /// the mouse is not possible, though the keyboard shortcuts e.g. `b` and
-    /// `space` still work. This allows the user to select text in the TUI
-    /// using the mouse without needing to hold down a modifier key.
-    pub disable_mouse_capture: bool,
-}
+pub struct Tui {}

-#[derive(Deserialize, Debug, Clone, Copy, PartialEq, Default)]
+#[derive(Deserialize, Debug, Clone, Copy, PartialEq, Default, Serialize)]
 #[serde(rename_all = "kebab-case")]
 pub enum SandboxMode {
    #[serde(rename = "read-only")]
@@ -143,6 +130,8 @@ pub struct ShellEnvironmentPolicyToml {

    /// List of regular expressions.
    pub include_only: Option<Vec<String>>,
+
+    pub experimental_use_profile: Option<bool>,
 }

 pub type EnvironmentVariablePattern = WildMatchPattern<'*', '?'>;
@@ -171,6 +160,9 @@ pub struct ShellEnvironmentPolicy {

    /// Environment variable names to retain in the environment.
    pub include_only: Vec<EnvironmentVariablePattern>,
+
+    /// If true, the shell profile will be used to run the command.
+    pub use_profile: bool,
 }

 impl From<ShellEnvironmentPolicyToml> for ShellEnvironmentPolicy {
@@ -190,6 +182,7 @@ impl From<ShellEnvironmentPolicyToml> for ShellEnvironmentPolicy {
            .into_iter()
            .map(|s| EnvironmentVariablePattern::new_case_insensitive(&s))
            .collect();
+        let use_profile = toml.experimental_use_profile.unwrap_or(false);

        Self {
            inherit,
@@ -197,6 +190,7 @@ impl From<ShellEnvironmentPolicyToml> for ShellEnvironmentPolicy {
            exclude,
            r#set,
            include_only,
+            use_profile,
        }
    }
 }
--- a/codex-rs/core/src/conversation_history.rs
+++ b/codex-rs/core/src/conversation_history.rs
@@ -1,12 +1,7 @@
 use crate::models::ResponseItem;

-/// Transcript of conversation history that is needed:
-/// - for ZDR clients for which previous_response_id is not available, so we
-///   must include the transcript with every API call. This must include each
-///   `function_call` and its corresponding `function_call_output`.
-/// - for clients using the "chat completions" API as opposed to the
-///   "responses" API.
-#[derive(Debug, Clone)]
+/// Transcript of conversation history
+#[derive(Debug, Clone, Default)]
 pub(crate) struct ConversationHistory {
    /// The oldest items are at the beginning of the vector.
    items: Vec<ResponseItem>,
@@ -35,6 +30,34 @@ impl ConversationHistory {
            }
        }
    }
+
+    pub(crate) fn keep_last_messages(&mut self, n: usize) {
+        if n == 0 {
+            self.items.clear();
+            return;
+        }
+
+        // Collect the last N message items (assistant/user), newest to oldest.
+        let mut kept: Vec<ResponseItem> = Vec::with_capacity(n);
+        for item in self.items.iter().rev() {
+            if let ResponseItem::Message { role, content, .. } = item {
+                kept.push(ResponseItem::Message {
+                    // we need to remove the id or the model will complain that messages are sent without
+                    // their reasonings
+                    id: None,
+                    role: role.clone(),
+                    content: content.clone(),
+                });
+                if kept.len() == n {
+                    break;
+                }
+            }
+        }
+
+        // Preserve chronological order (oldest to newest) within the kept slice.
+        kept.reverse();
+        self.items = kept;
+    }
 }

 /// Anything that is not a system message or "reasoning" message is considered
@@ -44,7 +67,8 @@ fn is_api_message(message: &ResponseItem) -> bool {
        ResponseItem::Message { role, .. } => role.as_str() != "system",
        ResponseItem::FunctionCallOutput { .. }
        | ResponseItem::FunctionCall { .. }
-        | ResponseItem::LocalShellCall { .. } => true,
-        ResponseItem::Reasoning { .. } | ResponseItem::Other => false,
+        | ResponseItem::LocalShellCall { .. }
+        | ResponseItem::Reasoning { .. } => true,
+        ResponseItem::Other => false,
    }
 }
--- a/codex-rs/core/src/exec.rs
+++ b/codex-rs/core/src/exec.rs
@@ -6,22 +6,29 @@ use std::io;
 use std::path::Path;
 use std::path::PathBuf;
 use std::process::ExitStatus;
-use std::process::Stdio;
 use std::sync::Arc;
 use std::time::Duration;
 use std::time::Instant;

+use async_channel::Sender;
 use tokio::io::AsyncRead;
 use tokio::io::AsyncReadExt;
 use tokio::io::BufReader;
 use tokio::process::Child;
-use tokio::process::Command;
 use tokio::sync::Notify;

 use crate::error::CodexErr;
 use crate::error::Result;
 use crate::error::SandboxErr;
+use crate::protocol::Event;
+use crate::protocol::EventMsg;
+use crate::protocol::ExecCommandOutputDeltaEvent;
+use crate::protocol::ExecOutputStream;
 use crate::protocol::SandboxPolicy;
+use crate::seatbelt::spawn_command_under_seatbelt;
+use crate::spawn::StdioPolicy;
+use crate::spawn::spawn_child_async;
+use serde_bytes::ByteBuf;

 // Maximum we send for each stream, which is either:
 // - 10KiB OR
@@ -36,24 +43,6 @@ const DEFAULT_TIMEOUT_MS: u64 = 10_000;
 const SIGKILL_CODE: i32 = 9;
 const TIMEOUT_CODE: i32 = 64;

-const MACOS_SEATBELT_BASE_POLICY: &str = include_str!("seatbelt_base_policy.sbpl");
-
-/// When working with `sandbox-exec`, only consider `sandbox-exec` in `/usr/bin`
-/// to defend against an attacker trying to inject a malicious version on the
-/// PATH. If /usr/bin/sandbox-exec has been tampered with, then the attacker
-/// already has root access.
-const MACOS_PATH_TO_SEATBELT_EXECUTABLE: &str = "/usr/bin/sandbox-exec";
-
-/// Experimental environment variable that will be set to some non-empty value
-/// if both of the following are true:
-///
-/// 1. The process was spawned by Codex as part of a shell tool call.
-/// 2. SandboxPolicy.has_full_network_access() was false for the tool call.
-///
-/// We may try to have just one environment variable for all sandboxing
-/// attributes, so this may change in the future.
-pub const CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR: &str = "CODEX_SANDBOX_NETWORK_DISABLED";
-
 #[derive(Debug, Clone)]
 pub struct ExecParams {
    pub command: Vec<String>,
@@ -73,17 +62,26 @@ pub enum SandboxType {
    LinuxSeccomp,
 }

+#[derive(Clone)]
+pub struct StdoutStream {
+    pub sub_id: String,
+    pub call_id: String,
+    pub tx_event: Sender<Event>,
+}
+
 pub async fn process_exec_tool_call(
    params: ExecParams,
    sandbox_type: SandboxType,
    ctrl_c: Arc<Notify>,
    sandbox_policy: &SandboxPolicy,
    codex_linux_sandbox_exe: &Option<PathBuf>,
+    stdout_stream: Option<StdoutStream>,
 ) -> Result<ExecToolCallOutput> {
    let start = Instant::now();

-    let raw_output_result = match sandbox_type {
-        SandboxType::None => exec(params, sandbox_policy, ctrl_c).await,
+    let raw_output_result: std::result::Result<RawExecToolCallOutput, CodexErr> = match sandbox_type
+    {
+        SandboxType::None => exec(params, sandbox_policy, ctrl_c, stdout_stream.clone()).await,
        SandboxType::MacosSeatbelt => {
            let ExecParams {
                command,
@@ -99,7 +97,7 @@ pub async fn process_exec_tool_call(
                env,
            )
            .await?;
-            consume_truncated_output(child, ctrl_c, timeout_ms).await
+            consume_truncated_output(child, ctrl_c, timeout_ms, stdout_stream.clone()).await
        }
        SandboxType::LinuxSeccomp => {
            let ExecParams {
@@ -122,7 +120,7 @@ pub async fn process_exec_tool_call(
            )
            .await?;

-            consume_truncated_output(child, ctrl_c, timeout_ms).await
+            consume_truncated_output(child, ctrl_c, timeout_ms, stdout_stream).await
        }
    };
    let duration = start.elapsed();
@@ -166,27 +164,6 @@ pub async fn process_exec_tool_call(
    }
 }

-pub async fn spawn_command_under_seatbelt(
-    command: Vec<String>,
-    sandbox_policy: &SandboxPolicy,
-    cwd: PathBuf,
-    stdio_policy: StdioPolicy,
-    env: HashMap<String, String>,
-) -> std::io::Result<Child> {
-    let args = create_seatbelt_command_args(command, sandbox_policy, &cwd);
-    let arg0 = None;
-    spawn_child_async(
-        PathBuf::from(MACOS_PATH_TO_SEATBELT_EXECUTABLE),
-        args,
-        arg0,
-        cwd,
-        sandbox_policy,
-        stdio_policy,
-        env,
-    )
-    .await
-}
-
 /// Spawn a shell tool command under the Linux Landlock+seccomp sandbox helper
 /// (codex-linux-sandbox).
 ///
@@ -246,65 +223,6 @@ fn create_linux_sandbox_command_args(
    linux_cmd
 }

-fn create_seatbelt_command_args(
-    command: Vec<String>,
-    sandbox_policy: &SandboxPolicy,
-    cwd: &Path,
-) -> Vec<String> {
-    let (file_write_policy, extra_cli_args) = {
-        if sandbox_policy.has_full_disk_write_access() {
-            // Allegedly, this is more permissive than `(allow file-write*)`.
-            (
-                r#"(allow file-write* (regex #"^/"))"#.to_string(),
-                Vec::<String>::new(),
-            )
-        } else {
-            let writable_roots = sandbox_policy.get_writable_roots_with_cwd(cwd);
-            let (writable_folder_policies, cli_args): (Vec<String>, Vec<String>) = writable_roots
-                .iter()
-                .enumerate()
-                .map(|(index, root)| {
-                    let param_name = format!("WRITABLE_ROOT_{index}");
-                    let policy: String = format!("(subpath (param \"{param_name}\"))");
-                    let cli_arg = format!("-D{param_name}={}", root.to_string_lossy());
-                    (policy, cli_arg)
-                })
-                .unzip();
-            if writable_folder_policies.is_empty() {
-                ("".to_string(), Vec::<String>::new())
-            } else {
-                let file_write_policy = format!(
-                    "(allow file-write*\n{}\n)",
-                    writable_folder_policies.join(" ")
-                );
-                (file_write_policy, cli_args)
-            }
-        }
-    };
-
-    let file_read_policy = if sandbox_policy.has_full_disk_read_access() {
-        "; allow read-only file operations\n(allow file-read*)"
-    } else {
-        ""
-    };
-
-    // TODO(mbolin): apply_patch calls must also honor the SandboxPolicy.
-    let network_policy = if sandbox_policy.has_full_network_access() {
-        "(allow network-outbound)\n(allow network-inbound)\n(allow system-socket)"
-    } else {
-        ""
-    };
-
-    let full_policy = format!(
-        "{MACOS_SEATBELT_BASE_POLICY}\n{file_read_policy}\n{file_write_policy}\n{network_policy}"
-    );
-    let mut seatbelt_args: Vec<String> = vec!["-p".to_string(), full_policy];
-    seatbelt_args.extend(extra_cli_args);
-    seatbelt_args.push("--".to_string());
-    seatbelt_args.extend(command);
-    seatbelt_args
-}
-
 #[derive(Debug)]
 pub struct RawExecToolCallOutput {
    pub exit_status: ExitStatus,
@@ -329,6 +247,7 @@ async fn exec(
    }: ExecParams,
    sandbox_policy: &SandboxPolicy,
    ctrl_c: Arc<Notify>,
+    stdout_stream: Option<StdoutStream>,
 ) -> Result<RawExecToolCallOutput> {
    let (program, args) = command.split_first().ok_or_else(|| {
        CodexErr::Io(io::Error::new(
@@ -347,62 +266,7 @@ async fn exec(
        env,
    )
    .await?;
-    consume_truncated_output(child, ctrl_c, timeout_ms).await
-}
-
-#[derive(Debug, Clone, Copy)]
-pub enum StdioPolicy {
-    RedirectForShellTool,
-    Inherit,
-}
-
-/// Spawns the appropriate child process for the ExecParams and SandboxPolicy,
-/// ensuring the args and environment variables used to create the `Command`
-/// (and `Child`) honor the configuration.
-///
-/// For now, we take `SandboxPolicy` as a parameter to spawn_child() because
-/// we need to determine whether to set the
-/// `CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR` environment variable.
-async fn spawn_child_async(
-    program: PathBuf,
-    args: Vec<String>,
-    #[cfg_attr(not(unix), allow(unused_variables))] arg0: Option<&str>,
-    cwd: PathBuf,
-    sandbox_policy: &SandboxPolicy,
-    stdio_policy: StdioPolicy,
-    env: HashMap<String, String>,
-) -> std::io::Result<Child> {
-    let mut cmd = Command::new(&program);
-    #[cfg(unix)]
-    cmd.arg0(arg0.map_or_else(|| program.to_string_lossy().to_string(), String::from));
-    cmd.args(args);
-    cmd.current_dir(cwd);
-    cmd.env_clear();
-    cmd.envs(env);
-
-    if !sandbox_policy.has_full_network_access() {
-        cmd.env(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR, "1");
-    }
-
-    match stdio_policy {
-        StdioPolicy::RedirectForShellTool => {
-            // Do not create a file descriptor for stdin because otherwise some
-            // commands may hang forever waiting for input. For example, ripgrep has
-            // a heuristic where it may try to read from stdin as explained here:
-            // https://github.com/BurntSushi/ripgrep/blob/e2362d4d5185d02fa857bf381e7bd52e66fafc73/crates/core/flags/hiargs.rs#L1101-L1103
-            cmd.stdin(Stdio::null());
-
-            cmd.stdout(Stdio::piped()).stderr(Stdio::piped());
-        }
-        StdioPolicy::Inherit => {
-            // Inherit stdin, stdout, and stderr from the parent process.
-            cmd.stdin(Stdio::inherit())
-                .stdout(Stdio::inherit())
-                .stderr(Stdio::inherit());
-        }
-    }
-
-    cmd.kill_on_drop(true).spawn()
+    consume_truncated_output(child, ctrl_c, timeout_ms, stdout_stream).await
 }

 /// Consumes the output of a child process, truncating it so it is suitable for
@@ -411,6 +275,7 @@ pub(crate) async fn consume_truncated_output(
    mut child: Child,
    ctrl_c: Arc<Notify>,
    timeout_ms: Option<u64>,
+    stdout_stream: Option<StdoutStream>,
 ) -> Result<RawExecToolCallOutput> {
    // Both stdout and stderr were configured with `Stdio::piped()`
    // above, therefore `take()` should normally return `Some`.  If it doesn't
@@ -431,11 +296,15 @@ pub(crate) async fn consume_truncated_output(
        BufReader::new(stdout_reader),
        MAX_STREAM_OUTPUT,
        MAX_STREAM_OUTPUT_LINES,
+        stdout_stream.clone(),
+        false,
    ));
    let stderr_handle = tokio::spawn(read_capped(
        BufReader::new(stderr_reader),
        MAX_STREAM_OUTPUT,
        MAX_STREAM_OUTPUT_LINES,
+        stdout_stream.clone(),
+        true,
    ));

    let interrupted = ctrl_c.notified();
@@ -469,10 +338,12 @@ pub(crate) async fn consume_truncated_output(
    })
 }

-async fn read_capped<R: AsyncRead + Unpin>(
+async fn read_capped<R: AsyncRead + Unpin + Send + 'static>(
    mut reader: R,
    max_output: usize,
    max_lines: usize,
+    stream: Option<StdoutStream>,
+    is_stderr: bool,
 ) -> io::Result<Vec<u8>> {
    let mut buf = Vec::with_capacity(max_output.min(8 * 1024));
    let mut tmp = [0u8; 8192];
@@ -486,6 +357,25 @@ async fn read_capped<R: AsyncRead + Unpin>(
            break;
        }

+        if let Some(stream) = &stream {
+            let chunk = tmp[..n].to_vec();
+            let msg = EventMsg::ExecCommandOutputDelta(ExecCommandOutputDeltaEvent {
+                call_id: stream.call_id.clone(),
+                stream: if is_stderr {
+                    ExecOutputStream::Stderr
+                } else {
+                    ExecOutputStream::Stdout
+                },
+                chunk: ByteBuf::from(chunk),
+            });
+            let event = Event {
+                id: stream.sub_id.clone(),
+                msg,
+            };
+            #[allow(clippy::let_unit_value)]
+            let _ = stream.tx_event.send(event).await;
+        }
+
        // Copy into the buffer only while we still have byte and line budget.
        if remaining_bytes > 0 && remaining_lines > 0 {
            let mut copy_len = 0;
--- a/codex-rs/core/src/flags.rs
+++ b/codex-rs/core/src/flags.rs
@@ -11,14 +11,6 @@ env_flags! {
    pub OPENAI_TIMEOUT_MS: Duration = Duration::from_millis(300_000), |value| {
        value.parse().map(Duration::from_millis)
    };
-    pub OPENAI_REQUEST_MAX_RETRIES: u64 = 4;
-    pub OPENAI_STREAM_MAX_RETRIES: u64 = 10;
-
-    // We generally don't want to disconnect; this updates the timeout to be five minutes
-    // which matches the upstream typescript codex impl.
-    pub OPENAI_STREAM_IDLE_TIMEOUT_MS: Duration = Duration::from_millis(300_000), |value| {
-        value.parse().map(Duration::from_millis)
-    };

    /// Fixture path for offline tests (see client.rs).
    pub CODEX_RS_SSE_FIXTURE: Option<&str> = None;
--- a/codex-rs/core/src/git_info.rs
+++ b/codex-rs/core/src/git_info.rs
@@ -0,0 +1,316 @@
+use std::path::Path;
+
+use serde::Deserialize;
+use serde::Serialize;
+use tokio::process::Command;
+use tokio::time::Duration as TokioDuration;
+use tokio::time::timeout;
+
+/// Timeout for git commands to prevent freezing on large repositories
+const GIT_COMMAND_TIMEOUT: TokioDuration = TokioDuration::from_secs(5);
+
+#[derive(Serialize, Deserialize, Clone)]
+pub struct GitInfo {
+    /// Current commit hash (SHA)
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub commit_hash: Option<String>,
+    /// Current branch name
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub branch: Option<String>,
+    /// Repository URL (if available from remote)
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub repository_url: Option<String>,
+}
+
+/// Collect git repository information from the given working directory using command-line git.
+/// Returns None if no git repository is found or if git operations fail.
+/// Uses timeouts to prevent freezing on large repositories.
+/// All git commands (except the initial repo check) run in parallel for better performance.
+pub async fn collect_git_info(cwd: &Path) -> Option<GitInfo> {
+    // Check if we're in a git repository first
+    let is_git_repo = run_git_command_with_timeout(&["rev-parse", "--git-dir"], cwd)
+        .await?
+        .status
+        .success();
+
+    if !is_git_repo {
+        return None;
+    }
+
+    // Run all git info collection commands in parallel
+    let (commit_result, branch_result, url_result) = tokio::join!(
+        run_git_command_with_timeout(&["rev-parse", "HEAD"], cwd),
+        run_git_command_with_timeout(&["rev-parse", "--abbrev-ref", "HEAD"], cwd),
+        run_git_command_with_timeout(&["remote", "get-url", "origin"], cwd)
+    );
+
+    let mut git_info = GitInfo {
+        commit_hash: None,
+        branch: None,
+        repository_url: None,
+    };
+
+    // Process commit hash
+    if let Some(output) = commit_result {
+        if output.status.success() {
+            if let Ok(hash) = String::from_utf8(output.stdout) {
+                git_info.commit_hash = Some(hash.trim().to_string());
+            }
+        }
+    }
+
+    // Process branch name
+    if let Some(output) = branch_result {
+        if output.status.success() {
+            if let Ok(branch) = String::from_utf8(output.stdout) {
+                let branch = branch.trim();
+                if branch != "HEAD" {
+                    git_info.branch = Some(branch.to_string());
+                }
+            }
+        }
+    }
+
+    // Process repository URL
+    if let Some(output) = url_result {
+        if output.status.success() {
+            if let Ok(url) = String::from_utf8(output.stdout) {
+                git_info.repository_url = Some(url.trim().to_string());
+            }
+        }
+    }
+
+    Some(git_info)
+}
+
+/// Run a git command with a timeout to prevent blocking on large repositories
+async fn run_git_command_with_timeout(args: &[&str], cwd: &Path) -> Option<std::process::Output> {
+    let result = timeout(
+        GIT_COMMAND_TIMEOUT,
+        Command::new("git").args(args).current_dir(cwd).output(),
+    )
+    .await;
+
+    match result {
+        Ok(Ok(output)) => Some(output),
+        _ => None, // Timeout or error
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    #![allow(clippy::expect_used)]
+    #![allow(clippy::unwrap_used)]
+
+    use super::*;
+
+    use std::fs;
+    use std::path::PathBuf;
+    use tempfile::TempDir;
+
+    // Helper function to create a test git repository
+    async fn create_test_git_repo(temp_dir: &TempDir) -> PathBuf {
+        let repo_path = temp_dir.path().to_path_buf();
+        let envs = vec![
+            ("GIT_CONFIG_GLOBAL", "/dev/null"),
+            ("GIT_CONFIG_NOSYSTEM", "1"),
+        ];
+
+        // Initialize git repo
+        Command::new("git")
+            .envs(envs.clone())
+            .args(["init"])
+            .current_dir(&repo_path)
+            .output()
+            .await
+            .expect("Failed to init git repo");
+
+        // Configure git user (required for commits)
+        Command::new("git")
+            .envs(envs.clone())
+            .args(["config", "user.name", "Test User"])
+            .current_dir(&repo_path)
+            .output()
+            .await
+            .expect("Failed to set git user name");
+
+        Command::new("git")
+            .envs(envs.clone())
+            .args(["config", "user.email", "test@example.com"])
+            .current_dir(&repo_path)
+            .output()
+            .await
+            .expect("Failed to set git user email");
+
+        // Create a test file and commit it
+        let test_file = repo_path.join("test.txt");
+        fs::write(&test_file, "test content").expect("Failed to write test file");
+
+        Command::new("git")
+            .envs(envs.clone())
+            .args(["add", "."])
+            .current_dir(&repo_path)
+            .output()
+            .await
+            .expect("Failed to add files");
+
+        Command::new("git")
+            .envs(envs.clone())
+            .args(["commit", "-m", "Initial commit"])
+            .current_dir(&repo_path)
+            .output()
+            .await
+            .expect("Failed to commit");
+
+        repo_path
+    }
+
+    #[tokio::test]
+    async fn test_collect_git_info_non_git_directory() {
+        let temp_dir = TempDir::new().expect("Failed to create temp dir");
+        let result = collect_git_info(temp_dir.path()).await;
+        assert!(result.is_none());
+    }
+
+    #[tokio::test]
+    async fn test_collect_git_info_git_repository() {
+        let temp_dir = TempDir::new().expect("Failed to create temp dir");
+        let repo_path = create_test_git_repo(&temp_dir).await;
+
+        let git_info = collect_git_info(&repo_path)
+            .await
+            .expect("Should collect git info from repo");
+
+        // Should have commit hash
+        assert!(git_info.commit_hash.is_some());
+        let commit_hash = git_info.commit_hash.unwrap();
+        assert_eq!(commit_hash.len(), 40); // SHA-1 hash should be 40 characters
+        assert!(commit_hash.chars().all(|c| c.is_ascii_hexdigit()));
+
+        // Should have branch (likely "main" or "master")
+        assert!(git_info.branch.is_some());
+        let branch = git_info.branch.unwrap();
+        assert!(branch == "main" || branch == "master");
+
+        // Repository URL might be None for local repos without remote
+        // This is acceptable behavior
+    }
+
+    #[tokio::test]
+    async fn test_collect_git_info_with_remote() {
+        let temp_dir = TempDir::new().expect("Failed to create temp dir");
+        let repo_path = create_test_git_repo(&temp_dir).await;
+
+        // Add a remote origin
+        Command::new("git")
+            .args([
+                "remote",
+                "add",
+                "origin",
+                "https://github.com/example/repo.git",
+            ])
+            .current_dir(&repo_path)
+            .output()
+            .await
+            .expect("Failed to add remote");
+
+        let git_info = collect_git_info(&repo_path)
+            .await
+            .expect("Should collect git info from repo");
+
+        // Should have repository URL
+        assert_eq!(
+            git_info.repository_url,
+            Some("https://github.com/example/repo.git".to_string())
+        );
+    }
+
+    #[tokio::test]
+    async fn test_collect_git_info_detached_head() {
+        let temp_dir = TempDir::new().expect("Failed to create temp dir");
+        let repo_path = create_test_git_repo(&temp_dir).await;
+
+        // Get the current commit hash
+        let output = Command::new("git")
+            .args(["rev-parse", "HEAD"])
+            .current_dir(&repo_path)
+            .output()
+            .await
+            .expect("Failed to get HEAD");
+        let commit_hash = String::from_utf8(output.stdout).unwrap().trim().to_string();
+
+        // Checkout the commit directly (detached HEAD)
+        Command::new("git")
+            .args(["checkout", &commit_hash])
+            .current_dir(&repo_path)
+            .output()
+            .await
+            .expect("Failed to checkout commit");
+
+        let git_info = collect_git_info(&repo_path)
+            .await
+            .expect("Should collect git info from repo");
+
+        // Should have commit hash
+        assert!(git_info.commit_hash.is_some());
+        // Branch should be None for detached HEAD (since rev-parse --abbrev-ref HEAD returns "HEAD")
+        assert!(git_info.branch.is_none());
+    }
+
+    #[tokio::test]
+    async fn test_collect_git_info_with_branch() {
+        let temp_dir = TempDir::new().expect("Failed to create temp dir");
+        let repo_path = create_test_git_repo(&temp_dir).await;
+
+        // Create and checkout a new branch
+        Command::new("git")
+            .args(["checkout", "-b", "feature-branch"])
+            .current_dir(&repo_path)
+            .output()
+            .await
+            .expect("Failed to create branch");
+
+        let git_info = collect_git_info(&repo_path)
+            .await
+            .expect("Should collect git info from repo");
+
+        // Should have the new branch name
+        assert_eq!(git_info.branch, Some("feature-branch".to_string()));
+    }
+
+    #[test]
+    fn test_git_info_serialization() {
+        let git_info = GitInfo {
+            commit_hash: Some("abc123def456".to_string()),
+            branch: Some("main".to_string()),
+            repository_url: Some("https://github.com/example/repo.git".to_string()),
+        };
+
+        let json = serde_json::to_string(&git_info).expect("Should serialize GitInfo");
+        let parsed: serde_json::Value = serde_json::from_str(&json).expect("Should parse JSON");
+
+        assert_eq!(parsed["commit_hash"], "abc123def456");
+        assert_eq!(parsed["branch"], "main");
+        assert_eq!(
+            parsed["repository_url"],
+            "https://github.com/example/repo.git"
+        );
+    }
+
+    #[test]
+    fn test_git_info_serialization_with_nones() {
+        let git_info = GitInfo {
+            commit_hash: None,
+            branch: None,
+            repository_url: None,
+        };
+
+        let json = serde_json::to_string(&git_info).expect("Should serialize GitInfo");
+        let parsed: serde_json::Value = serde_json::from_str(&json).expect("Should parse JSON");
+
+        // Fields with None values should be omitted due to skip_serializing_if
+        assert!(!parsed.as_object().unwrap().contains_key("commit_hash"));
+        assert!(!parsed.as_object().unwrap().contains_key("branch"));
+        assert!(!parsed.as_object().unwrap().contains_key("repository_url"));
+    }
+}
--- a/codex-rs/core/src/is_safe_command.rs
+++ b/codex-rs/core/src/is_safe_command.rs
@@ -1,31 +1,57 @@
-use tree_sitter::Parser;
-use tree_sitter::Tree;
-use tree_sitter_bash::LANGUAGE as BASH;
+use crate::bash::try_parse_bash;
+use crate::bash::try_parse_word_only_commands_sequence;

 pub fn is_known_safe_command(command: &[String]) -> bool {
    if is_safe_to_call_with_exec(command) {
        return true;
    }

-    // TODO(mbolin): Also support safe commands that are piped together such
-    // as `cat foo | wc -l`.
-    matches!(
-        command,
-        [bash, flag, script]
-            if bash == "bash"
-            && flag == "-lc"
-            && try_parse_bash(script).and_then(|tree|
-                try_parse_single_word_only_command(&tree, script)).is_some_and(|parsed_bash_command| is_safe_to_call_with_exec(&parsed_bash_command))
-    )
+    // Support `bash -lc "..."` where the script consists solely of one or
+    // more "plain" commands (only bare words / quoted strings) combined with
+    // a conservative allow‑list of shell operators that themselves do not
+    // introduce side effects ( "&&", "||", ";", and "|" ). If every
+    // individual command in the script is itself a known‑safe command, then
+    // the composite expression is considered safe.
+    if let [bash, flag, script] = command {
+        if bash == "bash" && flag == "-lc" {
+            if let Some(tree) = try_parse_bash(script) {
+                if let Some(all_commands) = try_parse_word_only_commands_sequence(&tree, script) {
+                    if !all_commands.is_empty()
+                        && all_commands
+                            .iter()
+                            .all(|cmd| is_safe_to_call_with_exec(cmd))
+                    {
+                        return true;
+                    }
+                }
+            }
+        }
+    }
+
+    false
 }

 fn is_safe_to_call_with_exec(command: &[String]) -> bool {
    let cmd0 = command.first().map(String::as_str);

    match cmd0 {
+        #[rustfmt::skip]
        Some(
-            "cat" | "cd" | "echo" | "grep" | "head" | "ls" | "pwd" | "rg" | "tail" | "wc" | "which",
-        ) => true,
+            "cat" |
+            "cd" |
+            "echo" |
+            "false" |
+            "grep" |
+            "head" |
+            "ls" |
+            "nl" |
+            "pwd" |
+            "tail" |
+            "true" |
+            "wc" |
+            "which") => {
+            true
+        },

        Some("find") => {
            // Certain options to `find` can delete files, write to files, or
@@ -46,6 +72,29 @@ fn is_safe_to_call_with_exec(command: &[String]) -> bool {
                .any(|arg| UNSAFE_FIND_OPTIONS.contains(&arg.as_str()))
        }

+        // Ripgrep
+        Some("rg") => {
+            const UNSAFE_RIPGREP_OPTIONS_WITH_ARGS: &[&str] = &[
+                // Takes an arbitrary command that is executed for each match.
+                "--pre",
+                // Takes a command that can be used to obtain the local hostname.
+                "--hostname-bin",
+            ];
+            const UNSAFE_RIPGREP_OPTIONS_WITHOUT_ARGS: &[&str] = &[
+                // Calls out to other decompression tools, so do not auto-approve
+                // out of an abundance of caution.
+                "--search-zip",
+                "-z",
+            ];
+
+            !command.iter().any(|arg| {
+                UNSAFE_RIPGREP_OPTIONS_WITHOUT_ARGS.contains(&arg.as_str())
+                    || UNSAFE_RIPGREP_OPTIONS_WITH_ARGS
+                        .iter()
+                        .any(|&opt| arg == opt || arg.starts_with(&format!("{opt}=")))
+            })
+        }
+
        // Git
        Some("git") => matches!(
            command.get(1).map(String::as_str),
@@ -72,90 +121,7 @@ fn is_safe_to_call_with_exec(command: &[String]) -> bool {
    }
 }

-fn try_parse_bash(bash_lc_arg: &str) -> Option<Tree> {
-    let lang = BASH.into();
-    let mut parser = Parser::new();
-    #[expect(clippy::expect_used)]
-    parser.set_language(&lang).expect("load bash grammar");
-
-    let old_tree: Option<&Tree> = None;
-    parser.parse(bash_lc_arg, old_tree)
-}
-
-/// If `tree` represents a single Bash command whose name and every argument is
-/// an ordinary `word`, return those words in order; otherwise, return `None`.
-///
-/// `src` must be the exact source string that was parsed into `tree`, so we can
-/// extract the text for every node.
-pub fn try_parse_single_word_only_command(tree: &Tree, src: &str) -> Option<Vec<String>> {
-    // Any parse error is an immediate rejection.
-    if tree.root_node().has_error() {
-        return None;
-    }
-
-    // (program …) with exactly one statement
-    let root = tree.root_node();
-    if root.kind() != "program" || root.named_child_count() != 1 {
-        return None;
-    }
-
-    let cmd = root.named_child(0)?; // (command …)
-    if cmd.kind() != "command" {
-        return None;
-    }
-
-    let mut words = Vec::new();
-    let mut cursor = cmd.walk();
-
-    for child in cmd.named_children(&mut cursor) {
-        match child.kind() {
-            // The command name node wraps one `word` child.
-            "command_name" => {
-                let word_node = child.named_child(0)?; // make sure it's only a word
-                if word_node.kind() != "word" {
-                    return None;
-                }
-                words.push(word_node.utf8_text(src.as_bytes()).ok()?.to_owned());
-            }
-            // Positional‑argument word (allowed).
-            "word" | "number" => {
-                words.push(child.utf8_text(src.as_bytes()).ok()?.to_owned());
-            }
-            "string" => {
-                if child.child_count() == 3
-                    && child.child(0)?.kind() == "\""
-                    && child.child(1)?.kind() == "string_content"
-                    && child.child(2)?.kind() == "\""
-                {
-                    words.push(child.child(1)?.utf8_text(src.as_bytes()).ok()?.to_owned());
-                } else {
-                    // Anything else means the command is *not* plain words.
-                    return None;
-                }
-            }
-            "concatenation" => {
-                // TODO: Consider things like `'ab\'a'`.
-                return None;
-            }
-            "raw_string" => {
-                // Raw string is a single word, but we need to strip the quotes.
-                let raw_string = child.utf8_text(src.as_bytes()).ok()?;
-                let stripped = raw_string
-                    .strip_prefix('\'')
-                    .and_then(|s| s.strip_suffix('\''));
-                if let Some(stripped) = stripped {
-                    words.push(stripped.to_owned());
-                } else {
-                    return None;
-                }
-            }
-            // Anything else means the command is *not* plain words.
-            _ => return None,
-        }
-    }
-
-    Some(words)
-}
+// (bash parsing helpers implemented in crate::bash)

 /* ----------------------------------------------------------
 Example
@@ -193,6 +159,7 @@ fn is_valid_sed_n_arg(arg: Option<&str>) -> bool {
        _ => false,
    }
 }
+
 #[cfg(test)]
 mod tests {
    #![allow(clippy::unwrap_used)]
@@ -209,6 +176,11 @@ mod tests {
        assert!(is_safe_to_call_with_exec(&vec_str(&[
            "sed", "-n", "1,5p", "file.txt"
        ])));
+        assert!(is_safe_to_call_with_exec(&vec_str(&[
+            "nl",
+            "-nrz",
+            "Cargo.toml"
+        ])));

        // Safe `find` command (no unsafe options).
        assert!(is_safe_to_call_with_exec(&vec_str(&[
@@ -245,6 +217,40 @@ mod tests {
        }
    }

+    #[test]
+    fn ripgrep_rules() {
+        // Safe ripgrep invocations – none of the unsafe flags are present.
+        assert!(is_safe_to_call_with_exec(&vec_str(&[
+            "rg",
+            "Cargo.toml",
+            "-n"
+        ])));
+
+        // Unsafe flags that do not take an argument (present verbatim).
+        for args in [
+            vec_str(&["rg", "--search-zip", "files"]),
+            vec_str(&["rg", "-z", "files"]),
+        ] {
+            assert!(
+                !is_safe_to_call_with_exec(&args),
+                "expected {args:?} to be considered unsafe due to zip-search flag",
+            );
+        }
+
+        // Unsafe flags that expect a value, provided in both split and = forms.
+        for args in [
+            vec_str(&["rg", "--pre", "pwned", "files"]),
+            vec_str(&["rg", "--pre=pwned", "files"]),
+            vec_str(&["rg", "--hostname-bin", "pwned", "files"]),
+            vec_str(&["rg", "--hostname-bin=pwned", "files"]),
+        ] {
+            assert!(
+                !is_safe_to_call_with_exec(&args),
+                "expected {args:?} to be considered unsafe due to external-command flag",
+            );
+        }
+    }
+
    #[test]
    fn bash_lc_safe_examples() {
        assert!(is_known_safe_command(&vec_str(&["bash", "-lc", "ls"])));
@@ -277,6 +283,30 @@ mod tests {
        ])));
    }

+    #[test]
+    fn bash_lc_safe_examples_with_operators() {
+        assert!(is_known_safe_command(&vec_str(&[
+            "bash",
+            "-lc",
+            "grep -R \"Cargo.toml\" -n || true"
+        ])));
+        assert!(is_known_safe_command(&vec_str(&[
+            "bash",
+            "-lc",
+            "ls && pwd"
+        ])));
+        assert!(is_known_safe_command(&vec_str(&[
+            "bash",
+            "-lc",
+            "echo 'hi' ; ls"
+        ])));
+        assert!(is_known_safe_command(&vec_str(&[
+            "bash",
+            "-lc",
+            "ls | wc -l"
+        ])));
+    }
+
    #[test]
    fn bash_lc_unsafe_examples() {
        assert!(
@@ -290,44 +320,29 @@ mod tests {

        assert!(
            !is_known_safe_command(&vec_str(&["bash", "-lc", "find . -name file.txt -delete"])),
-            "Unsafe find option should not be auto‑approved."
-        );
-    }
-
-    #[test]
-    fn test_try_parse_single_word_only_command() {
-        let script_with_single_quoted_string = "sed -n '1,5p' file.txt";
-        let parsed_words = try_parse_bash(script_with_single_quoted_string)
-            .and_then(|tree| {
-                try_parse_single_word_only_command(&tree, script_with_single_quoted_string)
-            })
-            .unwrap();
-        assert_eq!(
-            vec![
-                "sed".to_string(),
-                "-n".to_string(),
-                // Ensure the single quotes are properly removed.
-                "1,5p".to_string(),
-                "file.txt".to_string()
-            ],
-            parsed_words,
+            "Unsafe find option should not be auto-approved."
        );

-        let script_with_number_arg = "ls -1";
-        let parsed_words = try_parse_bash(script_with_number_arg)
-            .and_then(|tree| try_parse_single_word_only_command(&tree, script_with_number_arg))
-            .unwrap();
-        assert_eq!(vec!["ls", "-1"], parsed_words,);
+        // Disallowed because of unsafe command in sequence.
+        assert!(
+            !is_known_safe_command(&vec_str(&["bash", "-lc", "ls && rm -rf /"])),
+            "Sequence containing unsafe command must be rejected"
+        );

-        let script_with_double_quoted_string_with_no_funny_stuff_arg = "grep -R \"Cargo.toml\" -n";
-        let parsed_words = try_parse_bash(script_with_double_quoted_string_with_no_funny_stuff_arg)
-            .and_then(|tree| {
-                try_parse_single_word_only_command(
-                    &tree,
-                    script_with_double_quoted_string_with_no_funny_stuff_arg,
-                )
-            })
-            .unwrap();
-        assert_eq!(vec!["grep", "-R", "Cargo.toml", "-n"], parsed_words);
+        // Disallowed because of parentheses / subshell.
+        assert!(
+            !is_known_safe_command(&vec_str(&["bash", "-lc", "(ls)"])),
+            "Parentheses (subshell) are not provably safe with the current parser"
+        );
+        assert!(
+            !is_known_safe_command(&vec_str(&["bash", "-lc", "ls || (pwd && echo hi)"])),
+            "Nested parentheses are not provably safe with the current parser"
+        );
+
+        // Disallowed redirection.
+        assert!(
+            !is_known_safe_command(&vec_str(&["bash", "-lc", "ls > out.txt"])),
+            "> redirection should be rejected"
+        );
    }
 }
--- a/codex-rs/core/src/lib.rs
+++ b/codex-rs/core/src/lib.rs
@@ -5,11 +5,14 @@
 // the TUI or the tracing stack).
 #![deny(clippy::print_stdout, clippy::print_stderr)]

+mod apply_patch;
+mod bash;
 mod chat_completions;
 mod client;
 mod client_common;
 pub mod codex;
 pub use codex::Codex;
+pub use codex::CodexSpawnOk;
 pub mod codex_wrapper;
 pub mod config;
 pub mod config_profile;
@@ -19,6 +22,7 @@ pub mod error;
 pub mod exec;
 pub mod exec_env;
 mod flags;
+pub mod git_info;
 mod is_safe_command;
 mod mcp_connection_manager;
 mod mcp_tool_call;
@@ -26,15 +30,20 @@ mod message_history;
 mod model_provider_info;
 pub use model_provider_info::ModelProviderInfo;
 pub use model_provider_info::WireApi;
+pub use model_provider_info::built_in_model_providers;
 mod models;
-pub mod openai_api_key;
 mod openai_model_info;
 mod openai_tools;
+pub mod plan_tool;
 mod project_doc;
 pub mod protocol;
 mod rollout;
 mod safety;
+pub mod seatbelt;
+pub mod shell;
+pub mod spawn;
 mod user_notification;
 pub mod util;

+pub use apply_patch::CODEX_APPLY_PATCH_ARG1;
 pub use client_common::model_supports_reasoning_summaries;
--- a/codex-rs/core/src/mcp_connection_manager.rs
+++ b/codex-rs/core/src/mcp_connection_manager.rs
@@ -7,6 +7,8 @@
 //! `"<server><MCP_TOOL_NAME_DELIMITER><tool>"` as the key.

 use std::collections::HashMap;
+use std::collections::HashSet;
+use std::ffi::OsString;
 use std::time::Duration;

 use anyhow::Context;
@@ -16,8 +18,13 @@ use codex_mcp_client::McpClient;
 use mcp_types::ClientCapabilities;
 use mcp_types::Implementation;
 use mcp_types::Tool;
+
+use serde_json::json;
+use sha1::Digest;
+use sha1::Sha1;
 use tokio::task::JoinSet;
 use tracing::info;
+use tracing::warn;

 use crate::config_types::McpServerConfig;

@@ -26,7 +33,8 @@ use crate::config_types::McpServerConfig;
 ///
 /// OpenAI requires tool names to conform to `^[a-zA-Z0-9_-]+$`, so we must
 /// choose a delimiter from this character set.
-const MCP_TOOL_NAME_DELIMITER: &str = "__OAI_CODEX_MCP__";
+const MCP_TOOL_NAME_DELIMITER: &str = "__";
+const MAX_TOOL_NAME_LENGTH: usize = 64;

 /// Timeout for the `tools/list` request.
 const LIST_TOOLS_TIMEOUT: Duration = Duration::from_secs(10);
@@ -35,16 +43,42 @@ const LIST_TOOLS_TIMEOUT: Duration = Duration::from_secs(10);
 /// spawned successfully.
 pub type ClientStartErrors = HashMap<String, anyhow::Error>;

-fn fully_qualified_tool_name(server: &str, tool: &str) -> String {
-    format!("{server}{MCP_TOOL_NAME_DELIMITER}{tool}")
+fn qualify_tools(tools: Vec<ToolInfo>) -> HashMap<String, ToolInfo> {
+    let mut used_names = HashSet::new();
+    let mut qualified_tools = HashMap::new();
+    for tool in tools {
+        let mut qualified_name = format!(
+            "{}{}{}",
+            tool.server_name, MCP_TOOL_NAME_DELIMITER, tool.tool_name
+        );
+        if qualified_name.len() > MAX_TOOL_NAME_LENGTH {
+            let mut hasher = Sha1::new();
+            hasher.update(qualified_name.as_bytes());
+            let sha1 = hasher.finalize();
+            let sha1_str = format!("{sha1:x}");
+
+            // Truncate to make room for the hash suffix
+            let prefix_len = MAX_TOOL_NAME_LENGTH - sha1_str.len();
+
+            qualified_name = format!("{}{}", &qualified_name[..prefix_len], sha1_str);
+        }
+
+        if used_names.contains(&qualified_name) {
+            warn!("skipping duplicated tool {}", qualified_name);
+            continue;
+        }
+
+        used_names.insert(qualified_name.clone());
+        qualified_tools.insert(qualified_name, tool);
+    }
+
+    qualified_tools
 }

-pub(crate) fn try_parse_fully_qualified_tool_name(fq_name: &str) -> Option<(String, String)> {
-    let (server, tool) = fq_name.split_once(MCP_TOOL_NAME_DELIMITER)?;
-    if server.is_empty() || tool.is_empty() {
-        return None;
-    }
-    Some((server.to_string(), tool.to_string()))
+struct ToolInfo {
+    server_name: String,
+    tool_name: String,
+    tool: Tool,
 }

 /// A thin wrapper around a set of running [`McpClient`] instances.
@@ -57,7 +91,7 @@ pub(crate) struct McpConnectionManager {
    clients: HashMap<String, std::sync::Arc<McpClient>>,

    /// Fully qualified tool name -> tool instance.
-    tools: HashMap<String, Tool>,
+    tools: HashMap<String, ToolInfo>,
 }

 impl McpConnectionManager {
@@ -79,12 +113,27 @@ impl McpConnectionManager {

        // Launch all configured servers concurrently.
        let mut join_set = JoinSet::new();
+        let mut errors = ClientStartErrors::new();

        for (server_name, cfg) in mcp_servers {
-            // TODO: Verify server name: require `^[a-zA-Z0-9_-]+$`?
+            // Validate server name before spawning
+            if !is_valid_mcp_server_name(&server_name) {
+                let error = anyhow::anyhow!(
+                    "invalid server name '{}': must match pattern ^[a-zA-Z0-9_-]+$",
+                    server_name
+                );
+                errors.insert(server_name, error);
+                continue;
+            }
+
            join_set.spawn(async move {
                let McpServerConfig { command, args, env } = cfg;
-                let client_res = McpClient::new_stdio_client(command, args, env).await;
+                let client_res = McpClient::new_stdio_client(
+                    command.into(),
+                    args.into_iter().map(OsString::from).collect(),
+                    env,
+                )
+                .await;
                match client_res {
                    Ok(client) => {
                        // Initialize the client.
@@ -93,10 +142,14 @@ impl McpConnectionManager {
                                experimental: None,
                                roots: None,
                                sampling: None,
+                                // https://modelcontextprotocol.io/specification/2025-06-18/client/elicitation#capabilities
+                                // indicates this should be an empty object.
+                                elicitation: Some(json!({})),
                            },
                            client_info: Implementation {
                                name: "codex-mcp-client".to_owned(),
                                version: env!("CARGO_PKG_VERSION").to_owned(),
+                                title: Some("Codex".into()),
                            },
                            protocol_version: mcp_types::MCP_SCHEMA_VERSION.to_owned(),
                        };
@@ -117,7 +170,6 @@ impl McpConnectionManager {

        let mut clients: HashMap<String, std::sync::Arc<McpClient>> =
            HashMap::with_capacity(join_set.len());
-        let mut errors = ClientStartErrors::new();

        while let Some(res) = join_set.join_next().await {
            let (server_name, client_res) = res?; // JoinError propagation
@@ -132,7 +184,9 @@ impl McpConnectionManager {
            }
        }

-        let tools = list_all_tools(&clients).await?;
+        let all_tools = list_all_tools(&clients).await?;
+
+        let tools = qualify_tools(all_tools);

        Ok((Self { clients, tools }, errors))
    }
@@ -140,7 +194,10 @@ impl McpConnectionManager {
    /// Returns a single map that contains **all** tools. Each key is the
    /// fully-qualified name for the tool.
    pub fn list_all_tools(&self) -> HashMap<String, Tool> {
-        self.tools.clone()
+        self.tools
+            .iter()
+            .map(|(name, tool)| (name.clone(), tool.tool.clone()))
+            .collect()
    }

    /// Invoke the tool indicated by the (server, tool) pair.
@@ -162,13 +219,19 @@ impl McpConnectionManager {
            .await
            .with_context(|| format!("tool call failed for `{server}/{tool}`"))
    }
+
+    pub fn parse_tool_name(&self, tool_name: &str) -> Option<(String, String)> {
+        self.tools
+            .get(tool_name)
+            .map(|tool| (tool.server_name.clone(), tool.tool_name.clone()))
+    }
 }

 /// Query every server for its available tools and return a single map that
 /// contains **all** tools. Each key is the fully-qualified name for the tool.
-pub async fn list_all_tools(
+async fn list_all_tools(
    clients: &HashMap<String, std::sync::Arc<McpClient>>,
-) -> Result<HashMap<String, Tool>> {
+) -> Result<Vec<ToolInfo>> {
    let mut join_set = JoinSet::new();

    // Spawn one task per server so we can query them concurrently. This
@@ -185,18 +248,19 @@ pub async fn list_all_tools(
        });
    }

-    let mut aggregated: HashMap<String, Tool> = HashMap::with_capacity(join_set.len());
+    let mut aggregated: Vec<ToolInfo> = Vec::with_capacity(join_set.len());

    while let Some(join_res) = join_set.join_next().await {
        let (server_name, list_result) = join_res?;
        let list_result = list_result?;

        for tool in list_result.tools {
-            // TODO(mbolin): escape tool names that contain invalid characters.
-            let fq_name = fully_qualified_tool_name(&server_name, &tool.name);
-            if aggregated.insert(fq_name.clone(), tool).is_some() {
-                panic!("tool name collision for '{fq_name}': suspicious");
-            }
+            let tool_info = ToolInfo {
+                server_name: server_name.clone(),
+                tool_name: tool.name.clone(),
+                tool,
+            };
+            aggregated.push(tool_info);
        }
    }

@@ -208,3 +272,99 @@ pub async fn list_all_tools(

    Ok(aggregated)
 }
+
+fn is_valid_mcp_server_name(server_name: &str) -> bool {
+    !server_name.is_empty()
+        && server_name
+            .chars()
+            .all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '-')
+}
+
+#[cfg(test)]
+#[allow(clippy::unwrap_used)]
+mod tests {
+    use super::*;
+    use mcp_types::ToolInputSchema;
+
+    fn create_test_tool(server_name: &str, tool_name: &str) -> ToolInfo {
+        ToolInfo {
+            server_name: server_name.to_string(),
+            tool_name: tool_name.to_string(),
+            tool: Tool {
+                annotations: None,
+                description: Some(format!("Test tool: {tool_name}")),
+                input_schema: ToolInputSchema {
+                    properties: None,
+                    required: None,
+                    r#type: "object".to_string(),
+                },
+                name: tool_name.to_string(),
+                output_schema: None,
+                title: None,
+            },
+        }
+    }
+
+    #[test]
+    fn test_qualify_tools_short_non_duplicated_names() {
+        let tools = vec![
+            create_test_tool("server1", "tool1"),
+            create_test_tool("server1", "tool2"),
+        ];
+
+        let qualified_tools = qualify_tools(tools);
+
+        assert_eq!(qualified_tools.len(), 2);
+        assert!(qualified_tools.contains_key("server1__tool1"));
+        assert!(qualified_tools.contains_key("server1__tool2"));
+    }
+
+    #[test]
+    fn test_qualify_tools_duplicated_names_skipped() {
+        let tools = vec![
+            create_test_tool("server1", "duplicate_tool"),
+            create_test_tool("server1", "duplicate_tool"),
+        ];
+
+        let qualified_tools = qualify_tools(tools);
+
+        // Only the first tool should remain, the second is skipped
+        assert_eq!(qualified_tools.len(), 1);
+        assert!(qualified_tools.contains_key("server1__duplicate_tool"));
+    }
+
+    #[test]
+    fn test_qualify_tools_long_names_same_server() {
+        let server_name = "my_server";
+
+        let tools = vec![
+            create_test_tool(
+                server_name,
+                "extremely_lengthy_function_name_that_absolutely_surpasses_all_reasonable_limits",
+            ),
+            create_test_tool(
+                server_name,
+                "yet_another_extremely_lengthy_function_name_that_absolutely_surpasses_all_reasonable_limits",
+            ),
+        ];
+
+        let qualified_tools = qualify_tools(tools);
+
+        assert_eq!(qualified_tools.len(), 2);
+
+        let mut keys: Vec<_> = qualified_tools.keys().cloned().collect();
+        keys.sort();
+
+        assert_eq!(keys[0].len(), 64);
+        assert_eq!(
+            keys[0],
+            "my_server__extremely_lena02e507efc5a9de88637e436690364fd4219e4ef"
+        );
+
+        assert_eq!(keys[1].len(), 64);
+        assert_eq!(
+            keys[1],
+            "my_server__yet_another_e1c3987bd9c50b826cbe1687966f79f0c602d19ca"
+        );
+    }
+}
--- a/codex-rs/core/src/mcp_tool_call.rs
+++ b/codex-rs/core/src/mcp_tool_call.rs
@@ -1,4 +1,5 @@
 use std::time::Duration;
+use std::time::Instant;

 use tracing::error;

@@ -7,6 +8,7 @@ use crate::models::FunctionCallOutputPayload;
 use crate::models::ResponseInputItem;
 use crate::protocol::Event;
 use crate::protocol::EventMsg;
+use crate::protocol::McpInvocation;
 use crate::protocol::McpToolCallBeginEvent;
 use crate::protocol::McpToolCallEndEvent;

@@ -41,21 +43,28 @@ pub(crate) async fn handle_mcp_tool_call(
        }
    };

-    let tool_call_begin_event = EventMsg::McpToolCallBegin(McpToolCallBeginEvent {
-        call_id: call_id.clone(),
+    let invocation = McpInvocation {
        server: server.clone(),
        tool: tool_name.clone(),
        arguments: arguments_value.clone(),
+    };
+
+    let tool_call_begin_event = EventMsg::McpToolCallBegin(McpToolCallBeginEvent {
+        call_id: call_id.clone(),
+        invocation: invocation.clone(),
    });
    notify_mcp_tool_call_event(sess, sub_id, tool_call_begin_event).await;

+    let start = Instant::now();
    // Perform the tool call.
    let result = sess
-        .call_tool(&server, &tool_name, arguments_value, timeout)
+        .call_tool(&server, &tool_name, arguments_value.clone(), timeout)
        .await
        .map_err(|e| format!("tool call error: {e}"));
    let tool_call_end_event = EventMsg::McpToolCallEnd(McpToolCallEndEvent {
        call_id: call_id.clone(),
+        invocation,
+        duration: start.elapsed(),
        result: result.clone(),
    });

--- a/codex-rs/core/src/model_provider_info.rs
+++ b/codex-rs/core/src/model_provider_info.rs
@@ -9,13 +9,12 @@ use serde::Deserialize;
 use serde::Serialize;
 use std::collections::HashMap;
 use std::env::VarError;
+use std::time::Duration;

 use crate::error::EnvVarError;
-use crate::openai_api_key::get_openai_api_key;
-
-/// Value for the `OpenAI-Originator` header that is sent with requests to
-/// OpenAI.
-const OPENAI_ORIGINATOR_HEADER: &str = "codex_cli_rs";
+const DEFAULT_STREAM_IDLE_TIMEOUT_MS: u64 = 300_000;
+const DEFAULT_STREAM_MAX_RETRIES: u64 = 10;
+const DEFAULT_REQUEST_MAX_RETRIES: u64 = 4;

 /// Wire protocol that the provider speaks. Most third-party services only
 /// implement the classic OpenAI Chat Completions JSON schema, whereas OpenAI
@@ -26,7 +25,7 @@ const OPENAI_ORIGINATOR_HEADER: &str = "codex_cli_rs";
 #[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
 #[serde(rename_all = "lowercase")]
 pub enum WireApi {
-    /// The experimental “Responses” API exposed by OpenAI at `/v1/responses`.
+    /// The Responses API exposed by OpenAI at `/v1/responses`.
    Responses,

    /// Regular Chat Completions compatible with `/v1/chat/completions`.
@@ -40,7 +39,7 @@ pub struct ModelProviderInfo {
    /// Friendly display name.
    pub name: String,
    /// Base URL for the provider's OpenAI-compatible API.
-    pub base_url: String,
+    pub base_url: Option<String>,
    /// Environment variable that stores the user's API key for this provider.
    pub env_key: Option<String>,

@@ -65,13 +64,19 @@ pub struct ModelProviderInfo {
    /// value is empty, the header will not be included in the request.
    pub env_http_headers: Option<HashMap<String, String>>,

-    /// Whether the provider accepts an explicit `temperature` parameter.
-    #[serde(default = "default_supports_temperature")]
-    pub supports_temperature: bool,
-}
+    /// Maximum number of times to retry a failed HTTP request to this provider.
+    pub request_max_retries: Option<u64>,

-const fn default_supports_temperature() -> bool {
-    true
+    /// Number of times to retry reconnecting a dropped streaming response before failing.
+    pub stream_max_retries: Option<u64>,
+
+    /// Idle timeout (in milliseconds) to wait for activity on a streaming response before treating
+    /// the connection as lost.
+    pub stream_idle_timeout_ms: Option<u64>,
+
+    /// Whether this provider requires some form of standard authentication (API key, ChatGPT token).
+    #[serde(default)]
+    pub requires_auth: bool,
 }

 impl ModelProviderInfo {
@@ -87,11 +92,11 @@ impl ModelProviderInfo {
        &'a self,
        client: &'a reqwest::Client,
    ) -> crate::error::Result<reqwest::RequestBuilder> {
-        let api_key = self.api_key()?;
-
        let url = self.get_full_url();

        let mut builder = client.post(url);
+
+        let api_key = self.api_key()?;
        if let Some(key) = api_key {
            builder = builder.bearer_auth(key);
        }
@@ -111,9 +116,15 @@ impl ModelProviderInfo {
                    .join("&");
                format!("?{full_params}")
            });
-        let base_url = &self.base_url;
+        let base_url = self
+            .base_url
+            .clone()
+            .unwrap_or("https://api.openai.com/v1".to_string());
+
        match self.wire_api {
-            WireApi::Responses => format!("{base_url}/responses{query_string}"),
+            WireApi::Responses => {
+                format!("{base_url}/responses{query_string}")
+            }
            WireApi::Chat => format!("{base_url}/chat/completions{query_string}"),
        }
    }
@@ -121,7 +132,10 @@ impl ModelProviderInfo {
    /// Apply provider-specific HTTP headers (both static and environment-based)
    /// onto an existing `reqwest::RequestBuilder` and return the updated
    /// builder.
-    fn apply_http_headers(&self, mut builder: reqwest::RequestBuilder) -> reqwest::RequestBuilder {
+    pub fn apply_http_headers(
+        &self,
+        mut builder: reqwest::RequestBuilder,
+    ) -> reqwest::RequestBuilder {
        if let Some(extra) = &self.http_headers {
            for (k, v) in extra {
                builder = builder.header(k, v);
@@ -146,11 +160,7 @@ impl ModelProviderInfo {
    fn api_key(&self) -> crate::error::Result<Option<String>> {
        match &self.env_key {
            Some(env_key) => {
-                let env_value = if env_key == crate::openai_api_key::OPENAI_API_KEY_ENV_VAR {
-                    get_openai_api_key().map_or_else(|| Err(VarError::NotPresent), Ok)
-                } else {
-                    std::env::var(env_key)
-                };
+                let env_value = std::env::var(env_key);
                env_value
                    .and_then(|v| {
                        if v.trim().is_empty() {
@@ -169,6 +179,25 @@ impl ModelProviderInfo {
            None => Ok(None),
        }
    }
+
+    /// Effective maximum number of request retries for this provider.
+    pub fn request_max_retries(&self) -> u64 {
+        self.request_max_retries
+            .unwrap_or(DEFAULT_REQUEST_MAX_RETRIES)
+    }
+
+    /// Effective maximum number of stream reconnection attempts for this provider.
+    pub fn stream_max_retries(&self) -> u64 {
+        self.stream_max_retries
+            .unwrap_or(DEFAULT_STREAM_MAX_RETRIES)
+    }
+
+    /// Effective idle timeout for streaming responses.
+    pub fn stream_idle_timeout(&self) -> Duration {
+        self.stream_idle_timeout_ms
+            .map(Duration::from_millis)
+            .unwrap_or(Duration::from_millis(DEFAULT_STREAM_IDLE_TIMEOUT_MS))
+    }
 }

 /// Built-in default provider list.
@@ -179,44 +208,45 @@ pub fn built_in_model_providers() -> HashMap<String, ModelProviderInfo> {
    // providers are bundled with Codex CLI, so we only include the OpenAI
    // provider by default. Users are encouraged to add to `model_providers`
    // in config.toml to add their own providers.
-    [
-        (
-            "openai",
-            P {
-                name: "OpenAI".into(),
-                // Allow users to override the default OpenAI endpoint by
-                // exporting `OPENAI_BASE_URL`. This is useful when pointing
-                // Codex at a proxy, mock server, or Azure-style deployment
-                // without requiring a full TOML override for the built-in
-                // OpenAI provider.
-                base_url: std::env::var("OPENAI_BASE_URL")
-                    .ok()
-                    .filter(|v| !v.trim().is_empty())
-                    .unwrap_or_else(|| "https://api.openai.com/v1".to_string()),
-                env_key: Some("OPENAI_API_KEY".into()),
-                env_key_instructions: Some("Create an API key (https://platform.openai.com) and export it as an environment variable.".into()),
-                wire_api: WireApi::Responses,
-                query_params: None,
-                http_headers: Some(
-                    [
-                        ("originator".to_string(), OPENAI_ORIGINATOR_HEADER.to_string()),
-                        ("version".to_string(), env!("CARGO_PKG_VERSION").to_string()),
-                    ]
-                        .into_iter()
-                        .collect(),
-                ),
-                env_http_headers: Some(
-                    [
-                        ("OpenAI-Organization".to_string(), "OPENAI_ORGANIZATION".to_string()),
-                        ("OpenAI-Project".to_string(), "OPENAI_PROJECT".to_string()),
-                    ]
-                        .into_iter()
-                        .collect(),
-                ),
-                supports_temperature: false,
-            },
-        ),
-    ]
+    [(
+        "openai",
+        P {
+            name: "OpenAI".into(),
+            // Allow users to override the default OpenAI endpoint by
+            // exporting `OPENAI_BASE_URL`. This is useful when pointing
+            // Codex at a proxy, mock server, or Azure-style deployment
+            // without requiring a full TOML override for the built-in
+            // OpenAI provider.
+            base_url: std::env::var("OPENAI_BASE_URL")
+                .ok()
+                .filter(|v| !v.trim().is_empty()),
+            env_key: None,
+            env_key_instructions: None,
+            wire_api: WireApi::Responses,
+            query_params: None,
+            http_headers: Some(
+                [("version".to_string(), env!("CARGO_PKG_VERSION").to_string())]
+                    .into_iter()
+                    .collect(),
+            ),
+            env_http_headers: Some(
+                [
+                    (
+                        "OpenAI-Organization".to_string(),
+                        "OPENAI_ORGANIZATION".to_string(),
+                    ),
+                    ("OpenAI-Project".to_string(), "OPENAI_PROJECT".to_string()),
+                ]
+                .into_iter()
+                .collect(),
+            ),
+            // Use global defaults for retry/timeout unless overridden in config.toml.
+            request_max_retries: None,
+            stream_max_retries: None,
+            stream_idle_timeout_ms: None,
+            requires_auth: true,
+        },
+    )]
    .into_iter()
    .map(|(k, v)| (k.to_string(), v))
    .collect()
@@ -236,14 +266,17 @@ base_url = "http://localhost:11434/v1"
        "#;
        let expected_provider = ModelProviderInfo {
            name: "Ollama".into(),
-            base_url: "http://localhost:11434/v1".into(),
+            base_url: Some("http://localhost:11434/v1".into()),
            env_key: None,
            env_key_instructions: None,
            wire_api: WireApi::Chat,
            query_params: None,
            http_headers: None,
            env_http_headers: None,
-            supports_temperature: true,
+            request_max_retries: None,
+            stream_max_retries: None,
+            stream_idle_timeout_ms: None,
+            requires_auth: false,
        };

        let provider: ModelProviderInfo = toml::from_str(azure_provider_toml).unwrap();
@@ -260,7 +293,7 @@ query_params = { api-version = "2025-04-01-preview" }
        "#;
        let expected_provider = ModelProviderInfo {
            name: "Azure".into(),
-            base_url: "https://xxxxx.openai.azure.com/openai".into(),
+            base_url: Some("https://xxxxx.openai.azure.com/openai".into()),
            env_key: Some("AZURE_OPENAI_API_KEY".into()),
            env_key_instructions: None,
            wire_api: WireApi::Chat,
@@ -269,7 +302,10 @@ query_params = { api-version = "2025-04-01-preview" }
            }),
            http_headers: None,
            env_http_headers: None,
-            supports_temperature: true,
+            request_max_retries: None,
+            stream_max_retries: None,
+            stream_idle_timeout_ms: None,
+            requires_auth: false,
        };

        let provider: ModelProviderInfo = toml::from_str(azure_provider_toml).unwrap();
@@ -287,7 +323,7 @@ env_http_headers = { "X-Example-Env-Header" = "EXAMPLE_ENV_VAR" }
        "#;
        let expected_provider = ModelProviderInfo {
            name: "Example".into(),
-            base_url: "https://example.com".into(),
+            base_url: Some("https://example.com".into()),
            env_key: Some("API_KEY".into()),
            env_key_instructions: None,
            wire_api: WireApi::Chat,
@@ -298,7 +334,10 @@ env_http_headers = { "X-Example-Env-Header" = "EXAMPLE_ENV_VAR" }
            env_http_headers: Some(maplit::hashmap! {
                "X-Example-Env-Header".to_string() => "EXAMPLE_ENV_VAR".to_string(),
            }),
-            supports_temperature: true,
+            request_max_retries: None,
+            stream_max_retries: None,
+            stream_idle_timeout_ms: None,
+            requires_auth: false,
        };

        let provider: ModelProviderInfo = toml::from_str(azure_provider_toml).unwrap();
--- a/codex-rs/core/src/models.rs
+++ b/codex-rs/core/src/models.rs
@@ -3,6 +3,7 @@ use std::collections::HashMap;
 use base64::Engine;
 use mcp_types::CallToolResult;
 use serde::Deserialize;
+use serde::Deserializer;
 use serde::Serialize;
 use serde::ser::Serializer;

@@ -37,12 +38,14 @@ pub enum ContentItem {
 #[serde(tag = "type", rename_all = "snake_case")]
 pub enum ResponseItem {
    Message {
+        id: Option<String>,
        role: String,
        content: Vec<ContentItem>,
    },
    Reasoning {
        id: String,
        summary: Vec<ReasoningItemReasoningSummary>,
+        encrypted_content: Option<String>,
    },
    LocalShellCall {
        /// Set when using the chat completions API.
@@ -53,6 +56,7 @@ pub enum ResponseItem {
        action: LocalShellAction,
    },
    FunctionCall {
+        id: Option<String>,
        name: String,
        // The Responses API returns the function call arguments as a *string* that contains
        // JSON, not as an already‑parsed object. We keep it as a raw string here and let
@@ -78,7 +82,11 @@ pub enum ResponseItem {
 impl From<ResponseInputItem> for ResponseItem {
    fn from(item: ResponseInputItem) -> Self {
        match item {
-            ResponseInputItem::Message { role, content } => Self::Message { role, content },
+            ResponseInputItem::Message { role, content } => Self::Message {
+                role,
+                content,
+                id: None,
+            },
            ResponseInputItem::FunctionCallOutput { call_id, output } => {
                Self::FunctionCallOutput { call_id, output }
            }
@@ -177,7 +185,7 @@ pub struct ShellToolCallParams {
    pub timeout_ms: Option<u64>,
 }

-#[derive(Deserialize, Debug, Clone)]
+#[derive(Debug, Clone)]
 pub struct FunctionCallOutputPayload {
    pub content: String,
    #[expect(dead_code)]
@@ -205,6 +213,19 @@ impl Serialize for FunctionCallOutputPayload {
    }
 }

+impl<'de> Deserialize<'de> for FunctionCallOutputPayload {
+    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+    where
+        D: Deserializer<'de>,
+    {
+        let s = String::deserialize(deserializer)?;
+        Ok(FunctionCallOutputPayload {
+            content: s,
+            success: None,
+        })
+    }
+}
+
 // Implement Display so callers can treat the payload like a plain string when logging or doing
 // trivial substring checks in tests (existing tests call `.contains()` on the output). Display
 // returns the raw `content` field.
--- a/codex-rs/core/src/openai_api_key.rs
+++ b/codex-rs/core/src/openai_api_key.rs
@@ -1,24 +0,0 @@
-use std::env;
-use std::sync::LazyLock;
-use std::sync::RwLock;
-
-pub const OPENAI_API_KEY_ENV_VAR: &str = "OPENAI_API_KEY";
-
-static OPENAI_API_KEY: LazyLock<RwLock<Option<String>>> = LazyLock::new(|| {
-    let val = env::var(OPENAI_API_KEY_ENV_VAR)
-        .ok()
-        .and_then(|s| if s.is_empty() { None } else { Some(s) });
-    RwLock::new(val)
-});
-
-pub fn get_openai_api_key() -> Option<String> {
-    #![allow(clippy::unwrap_used)]
-    OPENAI_API_KEY.read().unwrap().clone()
-}
-
-pub fn set_openai_api_key(value: String) {
-    #![allow(clippy::unwrap_used)]
-    if !value.is_empty() {
-        *OPENAI_API_KEY.write().unwrap() = Some(value);
-    }
-}
--- a/codex-rs/core/src/openai_tools.rs
+++ b/codex-rs/core/src/openai_tools.rs
@@ -4,13 +4,14 @@ use std::collections::BTreeMap;
 use std::sync::LazyLock;

 use crate::client_common::Prompt;
+use crate::plan_tool::PLAN_TOOL;

 #[derive(Debug, Clone, Serialize)]
 pub(crate) struct ResponsesApiTool {
-    name: &'static str,
-    description: &'static str,
-    strict: bool,
-    parameters: JsonSchema,
+    pub(crate) name: &'static str,
+    pub(crate) description: &'static str,
+    pub(crate) strict: bool,
+    pub(crate) parameters: JsonSchema,
 }

 /// When serialized as JSON, this produces a valid "Tool" in the OpenAI
@@ -74,6 +75,7 @@ static DEFAULT_CODEX_MODEL_TOOLS: LazyLock<Vec<OpenAiTool>> =
 pub(crate) fn create_tools_json_for_responses_api(
    prompt: &Prompt,
    model: &str,
+    include_plan_tool: bool,
 ) -> crate::error::Result<Vec<serde_json::Value>> {
    // Assemble tool list: built-in tools + any extra tools from the prompt.
    let default_tools = if model.starts_with("codex") {
@@ -93,6 +95,10 @@ pub(crate) fn create_tools_json_for_responses_api(
            .map(|(name, tool)| mcp_tool_to_openai_tool(name, tool)),
    );

+    if include_plan_tool {
+        tools_json.push(serde_json::to_value(PLAN_TOOL.clone())?);
+    }
+
    Ok(tools_json)
 }

@@ -102,10 +108,12 @@ pub(crate) fn create_tools_json_for_responses_api(
 pub(crate) fn create_tools_json_for_chat_completions_api(
    prompt: &Prompt,
    model: &str,
+    include_plan_tool: bool,
 ) -> crate::error::Result<Vec<serde_json::Value>> {
    // We start with the JSON for the Responses API and than rewrite it to match
    // the chat completions tool call format.
-    let responses_api_tools_json = create_tools_json_for_responses_api(prompt, model)?;
+    let responses_api_tools_json =
+        create_tools_json_for_responses_api(prompt, model, include_plan_tool)?;
    let tools_json = responses_api_tools_json
        .into_iter()
        .filter_map(|mut tool| {
--- a/codex-rs/core/src/plan_tool.rs
+++ b/codex-rs/core/src/plan_tool.rs
@@ -0,0 +1,126 @@
+use std::collections::BTreeMap;
+use std::sync::LazyLock;
+
+use serde::Deserialize;
+use serde::Serialize;
+
+use crate::codex::Session;
+use crate::models::FunctionCallOutputPayload;
+use crate::models::ResponseInputItem;
+use crate::openai_tools::JsonSchema;
+use crate::openai_tools::OpenAiTool;
+use crate::openai_tools::ResponsesApiTool;
+use crate::protocol::Event;
+use crate::protocol::EventMsg;
+
+// Types for the TODO tool arguments matching codex-vscode/todo-mcp/src/main.rs
+#[derive(Debug, Clone, Serialize, Deserialize)]
+#[serde(rename_all = "snake_case")]
+pub enum StepStatus {
+    Pending,
+    InProgress,
+    Completed,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+#[serde(deny_unknown_fields)]
+pub struct PlanItemArg {
+    pub step: String,
+    pub status: StepStatus,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+#[serde(deny_unknown_fields)]
+pub struct UpdatePlanArgs {
+    #[serde(default)]
+    pub explanation: Option<String>,
+    pub plan: Vec<PlanItemArg>,
+}
+
+pub(crate) static PLAN_TOOL: LazyLock<OpenAiTool> = LazyLock::new(|| {
+    let mut plan_item_props = BTreeMap::new();
+    plan_item_props.insert("step".to_string(), JsonSchema::String);
+    plan_item_props.insert("status".to_string(), JsonSchema::String);
+
+    let plan_items_schema = JsonSchema::Array {
+        items: Box::new(JsonSchema::Object {
+            properties: plan_item_props,
+            required: &["step", "status"],
+            additional_properties: false,
+        }),
+    };
+
+    let mut properties = BTreeMap::new();
+    properties.insert("explanation".to_string(), JsonSchema::String);
+    properties.insert("plan".to_string(), plan_items_schema);
+
+    OpenAiTool::Function(ResponsesApiTool {
+        name: "update_plan",
+        description: r#"Use the update_plan tool to keep the user updated on the current plan for the task.
+After understanding the user's task, call the update_plan tool with an initial plan. An example of a plan:
+1. Explore the codebase to find relevant files (status: in_progress)
+2. Implement the feature in the XYZ component (status: pending)
+3. Commit changes and make a pull request (status: pending)
+Each step should be a short, 1-sentence description.
+Until all the steps are finished, there should always be exactly one in_progress step in the plan.
+Call the update_plan tool whenever you finish a step, marking the completed step as `completed` and marking the next step as `in_progress`.
+Before running a command, consider whether or not you have completed the previous step, and make sure to mark it as completed before moving on to the next step.
+Sometimes, you may need to change plans in the middle of a task: call `update_plan` with the updated plan and make sure to provide an `explanation` of the rationale when doing so.
+When all steps are completed, call update_plan one last time with all steps marked as `completed`."#,
+        strict: false,
+        parameters: JsonSchema::Object {
+            properties,
+            required: &["plan"],
+            additional_properties: false,
+        },
+    })
+});
+
+/// This function doesn't do anything useful. However, it gives the model a structured way to record its plan that clients can read and render.
+/// So it's the _inputs_ to this function that are useful to clients, not the outputs and neither are actually useful for the model other
+/// than forcing it to come up and document a plan (TBD how that affects performance).
+pub(crate) async fn handle_update_plan(
+    session: &Session,
+    arguments: String,
+    sub_id: String,
+    call_id: String,
+) -> ResponseInputItem {
+    match parse_update_plan_arguments(arguments, &call_id) {
+        Ok(args) => {
+            let output = ResponseInputItem::FunctionCallOutput {
+                call_id,
+                output: FunctionCallOutputPayload {
+                    content: "Plan updated".to_string(),
+                    success: Some(true),
+                },
+            };
+            session
+                .send_event(Event {
+                    id: sub_id.to_string(),
+                    msg: EventMsg::PlanUpdate(args),
+                })
+                .await;
+            output
+        }
+        Err(output) => *output,
+    }
+}
+
+fn parse_update_plan_arguments(
+    arguments: String,
+    call_id: &str,
+) -> Result<UpdatePlanArgs, Box<ResponseInputItem>> {
+    match serde_json::from_str::<UpdatePlanArgs>(&arguments) {
+        Ok(args) => Ok(args),
+        Err(e) => {
+            let output = ResponseInputItem::FunctionCallOutput {
+                call_id: call_id.to_string(),
+                output: FunctionCallOutputPayload {
+                    content: format!("failed to parse function arguments: {e}"),
+                    success: None,
+                },
+            };
+            Err(Box::new(output))
+        }
+    }
+}
--- a/codex-rs/core/src/project_doc.rs
+++ b/codex-rs/core/src/project_doc.rs
@@ -27,16 +27,16 @@ const PROJECT_DOC_SEPARATOR: &str = "\n\n--- project-doc ---\n\n";
 /// string of instructions.
 pub(crate) async fn get_user_instructions(config: &Config) -> Option<String> {
    match find_project_doc(config).await {
-        Ok(Some(project_doc)) => match &config.instructions {
+        Ok(Some(project_doc)) => match &config.user_instructions {
            Some(original_instructions) => Some(format!(
                "{original_instructions}{PROJECT_DOC_SEPARATOR}{project_doc}"
            )),
            None => Some(project_doc),
        },
-        Ok(None) => config.instructions.clone(),
+        Ok(None) => config.user_instructions.clone(),
        Err(e) => {
            error!("error trying to find project doc: {e:#}");
-            config.instructions.clone()
+            config.user_instructions.clone()
        }
    }
 }
@@ -159,7 +159,7 @@ mod tests {
        config.cwd = root.path().to_path_buf();
        config.project_doc_max_bytes = limit;

-        config.instructions = instructions.map(ToOwned::to_owned);
+        config.user_instructions = instructions.map(ToOwned::to_owned);
        config
    }

--- a/codex-rs/core/src/protocol.rs
+++ b/codex-rs/core/src/protocol.rs
@@ -4,19 +4,24 @@
 //! between user and agent.

 use std::collections::HashMap;
+use std::fmt;
 use std::path::Path;
 use std::path::PathBuf;
 use std::str::FromStr;
+use std::time::Duration;

 use mcp_types::CallToolResult;
 use serde::Deserialize;
 use serde::Serialize;
+use serde_bytes::ByteBuf;
+use strum_macros::Display;
 use uuid::Uuid;

 use crate::config_types::ReasoningEffort as ReasoningEffortConfig;
 use crate::config_types::ReasoningSummary as ReasoningSummaryConfig;
 use crate::message_history::HistoryEntry;
 use crate::model_provider_info::ModelProviderInfo;
+use crate::plan_tool::UpdatePlanArgs;

 /// Submission Queue Entry - requests from user
 #[derive(Debug, Clone, Deserialize, Serialize)]
@@ -44,8 +49,12 @@ pub enum Op {
        model_reasoning_effort: ReasoningEffortConfig,
        model_reasoning_summary: ReasoningSummaryConfig,

-        /// Model instructions
-        instructions: Option<String>,
+        /// Model instructions that are appended to the base instructions.
+        user_instructions: Option<String>,
+
+        /// Base instructions override.
+        base_instructions: Option<String>,
+
        /// When to escalate for approval for execution
        approval_policy: AskForApproval,
        /// How to sandbox commands executed in the system
@@ -69,6 +78,10 @@ pub enum Op {
        /// `ConfigureSession` operation so that the business-logic layer can
        /// operate deterministically.
        cwd: std::path::PathBuf,
+
+        /// Path to a rollout file to resume from.
+        #[serde(skip_serializing_if = "Option::is_none")]
+        resume_path: Option<std::path::PathBuf>,
    },

    /// Abort current task.
@@ -108,18 +121,27 @@ pub enum Op {

    /// Request a single history entry identified by `log_id` + `offset`.
    GetHistoryEntryRequest { offset: usize, log_id: u64 },
+
+    /// Request the agent to summarize the current conversation context.
+    /// The agent will use its existing context (either conversation history or previous response id)
+    /// to generate a summary which will be returned as an AgentMessage event.
+    Compact,
+    /// Request to shut down codex instance.
+    Shutdown,
 }

 /// Determines the conditions under which the user is consulted to approve
 /// running the command proposed by Codex.
-#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Hash, Serialize, Deserialize)]
+#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Hash, Serialize, Deserialize, Display)]
 #[serde(rename_all = "kebab-case")]
+#[strum(serialize_all = "kebab-case")]
 pub enum AskForApproval {
    /// Under this policy, only "known safe" commands—as determined by
    /// `is_safe_command()`—that **only read files** are auto‑approved.
    /// Everything else will ask the user to approve.
    #[default]
    #[serde(rename = "untrusted")]
+    #[strum(serialize = "untrusted")]
    UnlessTrusted,

    /// *All* commands are auto‑approved, but they are expected to run inside a
@@ -158,9 +180,29 @@ pub enum SandboxPolicy {
        /// default.
        #[serde(default)]
        network_access: bool,
+
+        /// When set to `true`, will include defaults like the current working
+        /// directory and TMPDIR (on macOS). When `false`, only `writable_roots`
+        /// are used. (Mainly used for testing.)
+        #[serde(default = "default_true")]
+        include_default_writable_roots: bool,
    },
 }

+/// A writable root path accompanied by a list of subpaths that should remain
+/// read‑only even when the root is writable. This is primarily used to ensure
+/// top‑level VCS metadata directories (e.g. `.git`) under a writable root are
+/// not modified by the agent.
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct WritableRoot {
+    pub root: PathBuf,
+    pub read_only_subpaths: Vec<PathBuf>,
+}
+
+fn default_true() -> bool {
+    true
+}
+
 impl FromStr for SandboxPolicy {
    type Err = serde_json::Error;

@@ -182,6 +224,7 @@ impl SandboxPolicy {
        SandboxPolicy::WorkspaceWrite {
            writable_roots: vec![],
            network_access: false,
+            include_default_writable_roots: true,
        }
    }

@@ -207,27 +250,51 @@ impl SandboxPolicy {
        }
    }

-    /// Returns the list of writable roots that should be passed down to the
-    /// Landlock rules installer, tailored to the current working directory.
-    pub fn get_writable_roots_with_cwd(&self, cwd: &Path) -> Vec<PathBuf> {
+    /// Returns the list of writable roots (tailored to the current working
+    /// directory) together with subpaths that should remain read‑only under
+    /// each writable root.
+    pub fn get_writable_roots_with_cwd(&self, cwd: &Path) -> Vec<WritableRoot> {
        match self {
            SandboxPolicy::DangerFullAccess => Vec::new(),
            SandboxPolicy::ReadOnly => Vec::new(),
-            SandboxPolicy::WorkspaceWrite { writable_roots, .. } => {
-                let mut roots = writable_roots.clone();
-                roots.push(cwd.to_path_buf());
+            SandboxPolicy::WorkspaceWrite {
+                writable_roots,
+                include_default_writable_roots,
+                ..
+            } => {
+                // Start from explicitly configured writable roots.
+                let mut roots: Vec<PathBuf> = writable_roots.clone();

-                // Also include the per-user tmp dir on macOS.
-                // Note this is added dynamically rather than storing it in
-                // writable_roots because writable_roots contains only static
-                // values deserialized from the config file.
-                if cfg!(target_os = "macos") {
-                    if let Some(tmpdir) = std::env::var_os("TMPDIR") {
-                        roots.push(PathBuf::from(tmpdir));
+                // Optionally include defaults (cwd and TMPDIR on macOS).
+                if *include_default_writable_roots {
+                    roots.push(cwd.to_path_buf());
+
+                    // Also include the per-user tmp dir on macOS.
+                    // Note this is added dynamically rather than storing it in
+                    // `writable_roots` because `writable_roots` contains only static
+                    // values deserialized from the config file.
+                    if cfg!(target_os = "macos") {
+                        if let Some(tmpdir) = std::env::var_os("TMPDIR") {
+                            roots.push(PathBuf::from(tmpdir));
+                        }
                    }
                }

+                // For each root, compute subpaths that should remain read-only.
                roots
+                    .into_iter()
+                    .map(|writable_root| {
+                        let mut subpaths = Vec::new();
+                        let top_level_git = writable_root.join(".git");
+                        if top_level_git.is_dir() {
+                            subpaths.push(top_level_git);
+                        }
+                        WritableRoot {
+                            root: writable_root,
+                            read_only_subpaths: subpaths,
+                        }
+                    })
+                    .collect()
            }
        }
    }
@@ -263,8 +330,9 @@ pub struct Event {
 }

 /// Response event from the agent
-#[derive(Debug, Clone, Deserialize, Serialize)]
+#[derive(Debug, Clone, Deserialize, Serialize, Display)]
 #[serde(tag = "type", rename_all = "snake_case")]
+#[strum(serialize_all = "snake_case")]
 pub enum EventMsg {
    /// Error while executing a submission
    Error(ErrorEvent),
@@ -282,9 +350,15 @@ pub enum EventMsg {
    /// Agent text output message
    AgentMessage(AgentMessageEvent),

+    /// Agent text output delta message
+    AgentMessageDelta(AgentMessageDeltaEvent),
+
    /// Reasoning event from agent.
    AgentReasoning(AgentReasoningEvent),

+    /// Agent reasoning delta event from agent.
+    AgentReasoningDelta(AgentReasoningDeltaEvent),
+
    /// Ack the client's configure message.
    SessionConfigured(SessionConfiguredEvent),

@@ -295,6 +369,9 @@ pub enum EventMsg {
    /// Notification that the server is about to execute a command.
    ExecCommandBegin(ExecCommandBeginEvent),

+    /// Incremental chunk of output from a running command.
+    ExecCommandOutputDelta(ExecCommandOutputDeltaEvent),
+
    ExecCommandEnd(ExecCommandEndEvent),

    ExecApprovalRequest(ExecApprovalRequestEvent),
@@ -312,6 +389,11 @@ pub enum EventMsg {

    /// Response to GetHistoryEntryRequest.
    GetHistoryEntryResponse(GetHistoryEntryResponseEvent),
+
+    PlanUpdate(UpdatePlanArgs),
+
+    /// Notification that the agent is shutting down.
+    ShutdownComplete,
 }

 // Individual event payload types matching each `EventMsg` variant.
@@ -335,20 +417,58 @@ pub struct TokenUsage {
    pub total_tokens: u64,
 }

+#[derive(Debug, Clone, Deserialize, Serialize)]
+pub struct FinalOutput {
+    pub token_usage: TokenUsage,
+}
+
+impl From<TokenUsage> for FinalOutput {
+    fn from(token_usage: TokenUsage) -> Self {
+        Self { token_usage }
+    }
+}
+
+impl fmt::Display for FinalOutput {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        let u = &self.token_usage;
+        write!(
+            f,
+            "Token usage: total={} input={}{} output={}{}",
+            u.total_tokens,
+            u.input_tokens,
+            u.cached_input_tokens
+                .map(|c| format!(" (cached {c})"))
+                .unwrap_or_default(),
+            u.output_tokens,
+            u.reasoning_output_tokens
+                .map(|r| format!(" (reasoning {r})"))
+                .unwrap_or_default()
+        )
+    }
+}
+
 #[derive(Debug, Clone, Deserialize, Serialize)]
 pub struct AgentMessageEvent {
    pub message: String,
 }

+#[derive(Debug, Clone, Deserialize, Serialize)]
+pub struct AgentMessageDeltaEvent {
+    pub delta: String,
+}
+
 #[derive(Debug, Clone, Deserialize, Serialize)]
 pub struct AgentReasoningEvent {
    pub text: String,
 }

 #[derive(Debug, Clone, Deserialize, Serialize)]
-pub struct McpToolCallBeginEvent {
-    /// Identifier so this can be paired with the McpToolCallEnd event.
-    pub call_id: String,
+pub struct AgentReasoningDeltaEvent {
+    pub delta: String,
+}
+
+#[derive(Debug, Clone, Deserialize, Serialize)]
+pub struct McpInvocation {
    /// Name of the MCP server as defined in the config.
    pub server: String,
    /// Name of the tool as given by the MCP server.
@@ -357,10 +477,19 @@ pub struct McpToolCallBeginEvent {
    pub arguments: Option<serde_json::Value>,
 }

+#[derive(Debug, Clone, Deserialize, Serialize)]
+pub struct McpToolCallBeginEvent {
+    /// Identifier so this can be paired with the McpToolCallEnd event.
+    pub call_id: String,
+    pub invocation: McpInvocation,
+}
+
 #[derive(Debug, Clone, Deserialize, Serialize)]
 pub struct McpToolCallEndEvent {
    /// Identifier for the corresponding McpToolCallBegin that finished.
    pub call_id: String,
+    pub invocation: McpInvocation,
+    pub duration: Duration,
    /// Result of the tool call. Note this could be an error.
    pub result: Result<CallToolResult, String>,
 }
@@ -394,10 +523,32 @@ pub struct ExecCommandEndEvent {
    pub stderr: String,
    /// The command's exit code.
    pub exit_code: i32,
+    /// The duration of the command execution.
+    pub duration: Duration,
+}
+
+#[derive(Debug, Clone, Deserialize, Serialize)]
+#[serde(rename_all = "snake_case")]
+pub enum ExecOutputStream {
+    Stdout,
+    Stderr,
+}
+
+#[derive(Debug, Clone, Deserialize, Serialize)]
+pub struct ExecCommandOutputDeltaEvent {
+    /// Identifier for the ExecCommandBegin that produced this chunk.
+    pub call_id: String,
+    /// Which stream produced this chunk.
+    pub stream: ExecOutputStream,
+    /// Raw bytes from the stream (may not be valid UTF-8).
+    #[serde(with = "serde_bytes")]
+    pub chunk: ByteBuf,
 }

 #[derive(Debug, Clone, Deserialize, Serialize)]
 pub struct ExecApprovalRequestEvent {
+    /// Identifier for the associated exec call, if available.
+    pub call_id: String,
    /// The command to be executed.
    pub command: Vec<String>,
    /// The command's working directory.
@@ -409,6 +560,8 @@ pub struct ExecApprovalRequestEvent {

 #[derive(Debug, Clone, Deserialize, Serialize)]
 pub struct ApplyPatchApprovalRequestEvent {
+    /// Responses API call id for the associated patch apply call, if available.
+    pub call_id: String,
    pub changes: HashMap<PathBuf, FileChange>,
    /// Optional explanatory reason (e.g. request for extra write access).
    #[serde(skip_serializing_if = "Option::is_none")]
--- a/codex-rs/core/src/rollout.rs
+++ b/codex-rs/core/src/rollout.rs
@@ -1,33 +1,57 @@
-//! Functionality to persist a Codex conversation *rollout* – a linear list of
-//! [`ResponseItem`] objects exchanged during a session – to disk so that
-//! sessions can be replayed or inspected later (mirrors the behaviour of the
-//! upstream TypeScript implementation).
+//! Persist Codex session rollouts (.jsonl) so sessions can be replayed or inspected later.

 use std::fs::File;
 use std::fs::{self};
 use std::io::Error as IoError;
+use std::path::Path;

+use serde::Deserialize;
 use serde::Serialize;
+use serde_json::Value;
 use time::OffsetDateTime;
 use time::format_description::FormatItem;
 use time::macros::format_description;
 use tokio::io::AsyncWriteExt;
 use tokio::sync::mpsc::Sender;
 use tokio::sync::mpsc::{self};
+use tokio::sync::oneshot;
+use tracing::info;
+use tracing::warn;
 use uuid::Uuid;

 use crate::config::Config;
+use crate::git_info::GitInfo;
+use crate::git_info::collect_git_info;
 use crate::models::ResponseItem;

-/// Folder inside `~/.codex` that holds saved rollouts.
 const SESSIONS_SUBDIR: &str = "sessions";

+#[derive(Serialize, Deserialize, Clone, Default)]
+pub struct SessionMeta {
+    pub id: Uuid,
+    pub timestamp: String,
+    pub instructions: Option<String>,
+}
+
 #[derive(Serialize)]
-struct SessionMeta {
-    id: String,
-    timestamp: String,
+struct SessionMetaWithGit {
+    #[serde(flatten)]
+    meta: SessionMeta,
    #[serde(skip_serializing_if = "Option::is_none")]
-    instructions: Option<String>,
+    git: Option<GitInfo>,
+}
+
+#[derive(Serialize, Deserialize, Default, Clone)]
+pub struct SessionStateSnapshot {}
+
+#[derive(Serialize, Deserialize, Default, Clone)]
+pub struct SavedSession {
+    pub session: SessionMeta,
+    #[serde(default)]
+    pub items: Vec<ResponseItem>,
+    #[serde(default)]
+    pub state: SessionStateSnapshot,
+    pub session_id: Uuid,
 }

 /// Records all [`ResponseItem`]s for a session and flushes them to disk after
@@ -41,7 +65,13 @@ struct SessionMeta {
 /// ```
 #[derive(Clone)]
 pub(crate) struct RolloutRecorder {
-    tx: Sender<String>,
+    tx: Sender<RolloutCmd>,
+}
+
+enum RolloutCmd {
+    AddItems(Vec<ResponseItem>),
+    UpdateState(SessionStateSnapshot),
+    Shutdown { ack: oneshot::Sender<()> },
 }

 impl RolloutRecorder {
@@ -59,7 +89,6 @@ impl RolloutRecorder {
            timestamp,
        } = create_log_file(config, uuid)?;

-        // Build the static session metadata JSON first.
        let timestamp_format: &[FormatItem] = format_description!(
            "[year]-[month]-[day]T[hour]:[minute]:[second].[subsecond digits:3]Z"
        );
@@ -67,48 +96,33 @@ impl RolloutRecorder {
            .format(timestamp_format)
            .map_err(|e| IoError::other(format!("failed to format timestamp: {e}")))?;

-        let meta = SessionMeta {
-            timestamp,
-            id: session_id.to_string(),
-            instructions,
-        };
+        // Clone the cwd for the spawned task to collect git info asynchronously
+        let cwd = config.cwd.clone();

        // A reasonably-sized bounded channel. If the buffer fills up the send
        // future will yield, which is fine – we only need to ensure we do not
-        // perform *blocking* I/O on the caller’s thread.
-        let (tx, mut rx) = mpsc::channel::<String>(256);
+        // perform *blocking* I/O on the caller's thread.
+        let (tx, rx) = mpsc::channel::<RolloutCmd>(256);

        // Spawn a Tokio task that owns the file handle and performs async
        // writes. Using `tokio::fs::File` keeps everything on the async I/O
        // driver instead of blocking the runtime.
-        tokio::task::spawn(async move {
-            let mut file = tokio::fs::File::from_std(file);
+        tokio::task::spawn(rollout_writer(
+            tokio::fs::File::from_std(file),
+            rx,
+            Some(SessionMeta {
+                timestamp,
+                id: session_id,
+                instructions,
+            }),
+            cwd,
+        ));

-            while let Some(line) = rx.recv().await {
-                // Write line + newline, then flush to disk.
-                if let Err(e) = file.write_all(line.as_bytes()).await {
-                    tracing::warn!("rollout writer: failed to write line: {e}");
-                    break;
-                }
-                if let Err(e) = file.write_all(b"\n").await {
-                    tracing::warn!("rollout writer: failed to write newline: {e}");
-                    break;
-                }
-                if let Err(e) = file.flush().await {
-                    tracing::warn!("rollout writer: failed to flush: {e}");
-                    break;
-                }
-            }
-        });
-
-        let recorder = Self { tx };
-        // Ensure SessionMeta is the first item in the file.
-        recorder.record_item(&meta).await?;
-        Ok(recorder)
+        Ok(Self { tx })
    }

-    /// Append `items` to the rollout file.
    pub(crate) async fn record_items(&self, items: &[ResponseItem]) -> std::io::Result<()> {
+        let mut filtered = Vec::new();
        for item in items {
            match item {
                // Note that function calls may look a bit strange if they are
@@ -117,27 +131,114 @@ impl RolloutRecorder {
                ResponseItem::Message { .. }
                | ResponseItem::LocalShellCall { .. }
                | ResponseItem::FunctionCall { .. }
-                | ResponseItem::FunctionCallOutput { .. } => {}
-                ResponseItem::Reasoning { .. } | ResponseItem::Other => {
+                | ResponseItem::FunctionCallOutput { .. }
+                | ResponseItem::Reasoning { .. } => filtered.push(item.clone()),
+                ResponseItem::Other => {
                    // These should never be serialized.
                    continue;
                }
            }
-            self.record_item(item).await?;
        }
-        Ok(())
+        if filtered.is_empty() {
+            return Ok(());
+        }
+        self.tx
+            .send(RolloutCmd::AddItems(filtered))
+            .await
+            .map_err(|e| IoError::other(format!("failed to queue rollout items: {e}")))
    }

-    async fn record_item(&self, item: &impl Serialize) -> std::io::Result<()> {
-        // Serialize the item to JSON first so that the writer thread only has
-        // to perform the actual write.
-        let json = serde_json::to_string(item)
-            .map_err(|e| IoError::other(format!("failed to serialize response items: {e}")))?;
-
+    pub(crate) async fn record_state(&self, state: SessionStateSnapshot) -> std::io::Result<()> {
        self.tx
-            .send(json)
+            .send(RolloutCmd::UpdateState(state))
            .await
-            .map_err(|e| IoError::other(format!("failed to queue rollout item: {e}")))
+            .map_err(|e| IoError::other(format!("failed to queue rollout state: {e}")))
+    }
+
+    pub async fn resume(
+        path: &Path,
+        cwd: std::path::PathBuf,
+    ) -> std::io::Result<(Self, SavedSession)> {
+        info!("Resuming rollout from {path:?}");
+        let text = tokio::fs::read_to_string(path).await?;
+        let mut lines = text.lines();
+        let meta_line = lines
+            .next()
+            .ok_or_else(|| IoError::other("empty session file"))?;
+        let session: SessionMeta = serde_json::from_str(meta_line)
+            .map_err(|e| IoError::other(format!("failed to parse session meta: {e}")))?;
+        let mut items = Vec::new();
+        let mut state = SessionStateSnapshot::default();
+
+        for line in lines {
+            if line.trim().is_empty() {
+                continue;
+            }
+            let v: Value = match serde_json::from_str(line) {
+                Ok(v) => v,
+                Err(_) => continue,
+            };
+            if v.get("record_type")
+                .and_then(|rt| rt.as_str())
+                .map(|s| s == "state")
+                .unwrap_or(false)
+            {
+                if let Ok(s) = serde_json::from_value::<SessionStateSnapshot>(v.clone()) {
+                    state = s
+                }
+                continue;
+            }
+            match serde_json::from_value::<ResponseItem>(v.clone()) {
+                Ok(item) => match item {
+                    ResponseItem::Message { .. }
+                    | ResponseItem::LocalShellCall { .. }
+                    | ResponseItem::FunctionCall { .. }
+                    | ResponseItem::FunctionCallOutput { .. }
+                    | ResponseItem::Reasoning { .. } => items.push(item),
+                    ResponseItem::Other => {}
+                },
+                Err(e) => {
+                    warn!("failed to parse item: {v:?}, error: {e}");
+                }
+            }
+        }
+
+        let saved = SavedSession {
+            session: session.clone(),
+            items: items.clone(),
+            state: state.clone(),
+            session_id: session.id,
+        };
+
+        let file = std::fs::OpenOptions::new()
+            .append(true)
+            .read(true)
+            .open(path)?;
+
+        let (tx, rx) = mpsc::channel::<RolloutCmd>(256);
+        tokio::task::spawn(rollout_writer(
+            tokio::fs::File::from_std(file),
+            rx,
+            None,
+            cwd,
+        ));
+        info!("Resumed rollout successfully from {path:?}");
+        Ok((Self { tx }, saved))
+    }
+
+    pub async fn shutdown(&self) -> std::io::Result<()> {
+        let (tx_done, rx_done) = oneshot::channel();
+        match self.tx.send(RolloutCmd::Shutdown { ack: tx_done }).await {
+            Ok(_) => rx_done
+                .await
+                .map_err(|e| IoError::other(format!("failed waiting for rollout shutdown: {e}"))),
+            Err(e) => {
+                warn!("failed to send rollout shutdown command: {e}");
+                Err(IoError::other(format!(
+                    "failed to send rollout shutdown command: {e}"
+                )))
+            }
+        }
    }
 }

@@ -153,13 +254,15 @@ struct LogFileInfo {
 }

 fn create_log_file(config: &Config, session_id: Uuid) -> std::io::Result<LogFileInfo> {
-    // Resolve ~/.codex/sessions and create it if missing.
-    let mut dir = config.codex_home.clone();
-    dir.push(SESSIONS_SUBDIR);
-    fs::create_dir_all(&dir)?;
-
+    // Resolve ~/.codex/sessions/YYYY/MM/DD and create it if missing.
    let timestamp = OffsetDateTime::now_local()
        .map_err(|e| IoError::other(format!("failed to get local time: {e}")))?;
+    let mut dir = config.codex_home.clone();
+    dir.push(SESSIONS_SUBDIR);
+    dir.push(timestamp.year().to_string());
+    dir.push(format!("{:02}", u8::from(timestamp.month())));
+    dir.push(format!("{:02}", timestamp.day()));
+    fs::create_dir_all(&dir)?;

    // Custom format for YYYY-MM-DDThh-mm-ss. Use `-` instead of `:` for
    // compatibility with filesystems that do not allow colons in filenames.
@@ -183,3 +286,77 @@ fn create_log_file(config: &Config, session_id: Uuid) -> std::io::Result<LogFile
        timestamp,
    })
 }
+
+async fn rollout_writer(
+    file: tokio::fs::File,
+    mut rx: mpsc::Receiver<RolloutCmd>,
+    mut meta: Option<SessionMeta>,
+    cwd: std::path::PathBuf,
+) -> std::io::Result<()> {
+    let mut writer = JsonlWriter { file };
+
+    // If we have a meta, collect git info asynchronously and write meta first
+    if let Some(session_meta) = meta.take() {
+        let git_info = collect_git_info(&cwd).await;
+        let session_meta_with_git = SessionMetaWithGit {
+            meta: session_meta,
+            git: git_info,
+        };
+
+        // Write the SessionMeta as the first item in the file
+        writer.write_line(&session_meta_with_git).await?;
+    }
+
+    // Process rollout commands
+    while let Some(cmd) = rx.recv().await {
+        match cmd {
+            RolloutCmd::AddItems(items) => {
+                for item in items {
+                    match item {
+                        ResponseItem::Message { .. }
+                        | ResponseItem::LocalShellCall { .. }
+                        | ResponseItem::FunctionCall { .. }
+                        | ResponseItem::FunctionCallOutput { .. }
+                        | ResponseItem::Reasoning { .. } => {
+                            writer.write_line(&item).await?;
+                        }
+                        ResponseItem::Other => {}
+                    }
+                }
+            }
+            RolloutCmd::UpdateState(state) => {
+                #[derive(Serialize)]
+                struct StateLine<'a> {
+                    record_type: &'static str,
+                    #[serde(flatten)]
+                    state: &'a SessionStateSnapshot,
+                }
+                writer
+                    .write_line(&StateLine {
+                        record_type: "state",
+                        state: &state,
+                    })
+                    .await?;
+            }
+            RolloutCmd::Shutdown { ack } => {
+                let _ = ack.send(());
+            }
+        }
+    }
+
+    Ok(())
+}
+
+struct JsonlWriter {
+    file: tokio::fs::File,
+}
+
+impl JsonlWriter {
+    async fn write_line(&mut self, item: &impl serde::Serialize) -> std::io::Result<()> {
+        let mut json = serde_json::to_string(item)?;
+        json.push('\n');
+        let _ = self.file.write_all(json.as_bytes()).await;
+        self.file.flush().await?;
+        Ok(())
+    }
+}
--- a/codex-rs/core/src/safety.rs
+++ b/codex-rs/core/src/safety.rs
@@ -41,11 +41,13 @@ pub fn assess_patch_safety(
        }
    }

-    if is_write_patch_constrained_to_writable_paths(action, writable_roots, cwd) {
-        SafetyCheck::AutoApprove {
-            sandbox_type: SandboxType::None,
-        }
-    } else if policy == AskForApproval::OnFailure {
+    // Even though the patch *appears* to be constrained to writable paths, it
+    // is possible that paths in the patch are hard links to files outside the
+    // writable roots, so we should still run `apply_patch` in a sandbox in that
+    // case.
+    if is_write_patch_constrained_to_writable_paths(action, writable_roots, cwd)
+        || policy == AskForApproval::OnFailure
+    {
        // Only auto‑approve when we can actually enforce a sandbox. Otherwise
        // fall back to asking the user because the patch may touch arbitrary
        // paths outside the project.
@@ -75,9 +77,6 @@ pub fn assess_command_safety(
    sandbox_policy: &SandboxPolicy,
    approved: &HashSet<Vec<String>>,
 ) -> SafetyCheck {
-    use AskForApproval::*;
-    use SandboxPolicy::*;
-
    // A command is "trusted" because either:
    // - it belongs to a set of commands we consider "safe" by default, or
    // - the user has explicitly approved the command for this session
@@ -97,6 +96,16 @@ pub fn assess_command_safety(
        };
    }

+    assess_safety_for_untrusted_command(approval_policy, sandbox_policy)
+}
+
+pub(crate) fn assess_safety_for_untrusted_command(
+    approval_policy: AskForApproval,
+    sandbox_policy: &SandboxPolicy,
+) -> SafetyCheck {
+    use AskForApproval::*;
+    use SandboxPolicy::*;
+
    match (approval_policy, sandbox_policy) {
        (UnlessTrusted, _) => {
            // Even though the user may have opted into DangerFullAccess,
--- a/codex-rs/core/src/seatbelt.rs
+++ b/codex-rs/core/src/seatbelt.rs
@@ -0,0 +1,312 @@
+use std::collections::HashMap;
+use std::path::Path;
+use std::path::PathBuf;
+use tokio::process::Child;
+
+use crate::protocol::SandboxPolicy;
+use crate::spawn::CODEX_SANDBOX_ENV_VAR;
+use crate::spawn::StdioPolicy;
+use crate::spawn::spawn_child_async;
+
+const MACOS_SEATBELT_BASE_POLICY: &str = include_str!("seatbelt_base_policy.sbpl");
+
+/// When working with `sandbox-exec`, only consider `sandbox-exec` in `/usr/bin`
+/// to defend against an attacker trying to inject a malicious version on the
+/// PATH. If /usr/bin/sandbox-exec has been tampered with, then the attacker
+/// already has root access.
+const MACOS_PATH_TO_SEATBELT_EXECUTABLE: &str = "/usr/bin/sandbox-exec";
+
+pub async fn spawn_command_under_seatbelt(
+    command: Vec<String>,
+    sandbox_policy: &SandboxPolicy,
+    cwd: PathBuf,
+    stdio_policy: StdioPolicy,
+    mut env: HashMap<String, String>,
+) -> std::io::Result<Child> {
+    let args = create_seatbelt_command_args(command, sandbox_policy, &cwd);
+    let arg0 = None;
+    env.insert(CODEX_SANDBOX_ENV_VAR.to_string(), "seatbelt".to_string());
+    spawn_child_async(
+        PathBuf::from(MACOS_PATH_TO_SEATBELT_EXECUTABLE),
+        args,
+        arg0,
+        cwd,
+        sandbox_policy,
+        stdio_policy,
+        env,
+    )
+    .await
+}
+
+fn create_seatbelt_command_args(
+    command: Vec<String>,
+    sandbox_policy: &SandboxPolicy,
+    cwd: &Path,
+) -> Vec<String> {
+    let (file_write_policy, extra_cli_args) = {
+        if sandbox_policy.has_full_disk_write_access() {
+            // Allegedly, this is more permissive than `(allow file-write*)`.
+            (
+                r#"(allow file-write* (regex #"^/"))"#.to_string(),
+                Vec::<String>::new(),
+            )
+        } else {
+            let writable_roots = sandbox_policy.get_writable_roots_with_cwd(cwd);
+
+            let mut writable_folder_policies: Vec<String> = Vec::new();
+            let mut cli_args: Vec<String> = Vec::new();
+
+            for (index, wr) in writable_roots.iter().enumerate() {
+                // Canonicalize to avoid mismatches like /var vs /private/var on macOS.
+                let canonical_root = wr.root.canonicalize().unwrap_or_else(|_| wr.root.clone());
+                let root_param = format!("WRITABLE_ROOT_{index}");
+                cli_args.push(format!(
+                    "-D{root_param}={}",
+                    canonical_root.to_string_lossy()
+                ));
+
+                if wr.read_only_subpaths.is_empty() {
+                    writable_folder_policies.push(format!("(subpath (param \"{root_param}\"))"));
+                } else {
+                    // Add parameters for each read-only subpath and generate
+                    // the `(require-not ...)` clauses.
+                    let mut require_parts: Vec<String> = Vec::new();
+                    require_parts.push(format!("(subpath (param \"{root_param}\"))"));
+                    for (subpath_index, ro) in wr.read_only_subpaths.iter().enumerate() {
+                        let canonical_ro = ro.canonicalize().unwrap_or_else(|_| ro.clone());
+                        let ro_param = format!("WRITABLE_ROOT_{index}_RO_{subpath_index}");
+                        cli_args.push(format!("-D{ro_param}={}", canonical_ro.to_string_lossy()));
+                        require_parts
+                            .push(format!("(require-not (subpath (param \"{ro_param}\")))"));
+                    }
+                    let policy_component = format!("(require-all {} )", require_parts.join(" "));
+                    writable_folder_policies.push(policy_component);
+                }
+            }
+
+            if writable_folder_policies.is_empty() {
+                ("".to_string(), Vec::<String>::new())
+            } else {
+                let file_write_policy = format!(
+                    "(allow file-write*\n{}\n)",
+                    writable_folder_policies.join(" ")
+                );
+                (file_write_policy, cli_args)
+            }
+        }
+    };
+
+    let file_read_policy = if sandbox_policy.has_full_disk_read_access() {
+        "; allow read-only file operations\n(allow file-read*)"
+    } else {
+        ""
+    };
+
+    // TODO(mbolin): apply_patch calls must also honor the SandboxPolicy.
+    let network_policy = if sandbox_policy.has_full_network_access() {
+        "(allow network-outbound)\n(allow network-inbound)\n(allow system-socket)"
+    } else {
+        ""
+    };
+
+    let full_policy = format!(
+        "{MACOS_SEATBELT_BASE_POLICY}\n{file_read_policy}\n{file_write_policy}\n{network_policy}"
+    );
+
+    let mut seatbelt_args: Vec<String> = vec!["-p".to_string(), full_policy];
+    seatbelt_args.extend(extra_cli_args);
+    seatbelt_args.push("--".to_string());
+    seatbelt_args.extend(command);
+    seatbelt_args
+}
+
+#[cfg(test)]
+mod tests {
+    #![expect(clippy::expect_used)]
+    use super::MACOS_SEATBELT_BASE_POLICY;
+    use super::create_seatbelt_command_args;
+    use crate::protocol::SandboxPolicy;
+    use pretty_assertions::assert_eq;
+    use std::fs;
+    use std::path::Path;
+    use std::path::PathBuf;
+    use tempfile::TempDir;
+
+    #[test]
+    fn create_seatbelt_args_with_read_only_git_subpath() {
+        // Create a temporary workspace with two writable roots: one containing
+        // a top-level .git directory and one without it.
+        let tmp = TempDir::new().expect("tempdir");
+        let PopulatedTmp {
+            root_with_git,
+            root_without_git,
+            root_with_git_canon,
+            root_with_git_git_canon,
+            root_without_git_canon,
+        } = populate_tmpdir(tmp.path());
+
+        // Build a policy that only includes the two test roots as writable and
+        // does not automatically include defaults like cwd or TMPDIR.
+        let policy = SandboxPolicy::WorkspaceWrite {
+            writable_roots: vec![root_with_git.clone(), root_without_git.clone()],
+            network_access: false,
+            include_default_writable_roots: false,
+        };
+
+        let args = create_seatbelt_command_args(
+            vec!["/bin/echo".to_string(), "hello".to_string()],
+            &policy,
+            tmp.path(),
+        );
+
+        // Build the expected policy text using a raw string for readability.
+        // Note that the policy includes:
+        // - the base policy,
+        // - read-only access to the filesystem,
+        // - write access to WRITABLE_ROOT_0 (but not its .git) and WRITABLE_ROOT_1.
+        let expected_policy = format!(
+            r#"{MACOS_SEATBELT_BASE_POLICY}
+; allow read-only file operations
+(allow file-read*)
+(allow file-write*
+(require-all (subpath (param "WRITABLE_ROOT_0")) (require-not (subpath (param "WRITABLE_ROOT_0_RO_0"))) ) (subpath (param "WRITABLE_ROOT_1"))
+)
+"#,
+        );
+
+        let expected_args = vec![
+            "-p".to_string(),
+            expected_policy,
+            format!(
+                "-DWRITABLE_ROOT_0={}",
+                root_with_git_canon.to_string_lossy()
+            ),
+            format!(
+                "-DWRITABLE_ROOT_0_RO_0={}",
+                root_with_git_git_canon.to_string_lossy()
+            ),
+            format!(
+                "-DWRITABLE_ROOT_1={}",
+                root_without_git_canon.to_string_lossy()
+            ),
+            "--".to_string(),
+            "/bin/echo".to_string(),
+            "hello".to_string(),
+        ];
+
+        assert_eq!(args, expected_args);
+    }
+
+    #[test]
+    fn create_seatbelt_args_for_cwd_as_git_repo() {
+        // Create a temporary workspace with two writable roots: one containing
+        // a top-level .git directory and one without it.
+        let tmp = TempDir::new().expect("tempdir");
+        let PopulatedTmp {
+            root_with_git,
+            root_with_git_canon,
+            root_with_git_git_canon,
+            ..
+        } = populate_tmpdir(tmp.path());
+
+        // Build a policy that does not specify any writable_roots, but does
+        // use the default ones (cwd and TMPDIR) and verifies the `.git` check
+        // is done properly for cwd.
+        let policy = SandboxPolicy::WorkspaceWrite {
+            writable_roots: vec![],
+            network_access: false,
+            include_default_writable_roots: true,
+        };
+
+        let args = create_seatbelt_command_args(
+            vec!["/bin/echo".to_string(), "hello".to_string()],
+            &policy,
+            root_with_git.as_path(),
+        );
+
+        let tmpdir_env_var = if cfg!(target_os = "macos") {
+            std::env::var("TMPDIR")
+                .ok()
+                .map(PathBuf::from)
+                .and_then(|p| p.canonicalize().ok())
+                .map(|p| p.to_string_lossy().to_string())
+        } else {
+            None
+        };
+        let tempdir_policy_entry = if tmpdir_env_var.is_some() {
+            " (subpath (param \"WRITABLE_ROOT_1\"))"
+        } else {
+            ""
+        };
+
+        // Build the expected policy text using a raw string for readability.
+        // Note that the policy includes:
+        // - the base policy,
+        // - read-only access to the filesystem,
+        // - write access to WRITABLE_ROOT_0 (but not its .git) and WRITABLE_ROOT_1.
+        let expected_policy = format!(
+            r#"{MACOS_SEATBELT_BASE_POLICY}
+; allow read-only file operations
+(allow file-read*)
+(allow file-write*
+(require-all (subpath (param "WRITABLE_ROOT_0")) (require-not (subpath (param "WRITABLE_ROOT_0_RO_0"))) ){tempdir_policy_entry}
+)
+"#,
+        );
+
+        let mut expected_args = vec![
+            "-p".to_string(),
+            expected_policy,
+            format!(
+                "-DWRITABLE_ROOT_0={}",
+                root_with_git_canon.to_string_lossy()
+            ),
+            format!(
+                "-DWRITABLE_ROOT_0_RO_0={}",
+                root_with_git_git_canon.to_string_lossy()
+            ),
+        ];
+
+        if let Some(p) = tmpdir_env_var {
+            expected_args.push(format!("-DWRITABLE_ROOT_1={p}"));
+        }
+
+        expected_args.extend(vec![
+            "--".to_string(),
+            "/bin/echo".to_string(),
+            "hello".to_string(),
+        ]);
+
+        assert_eq!(args, expected_args);
+    }
+
+    struct PopulatedTmp {
+        root_with_git: PathBuf,
+        root_without_git: PathBuf,
+        root_with_git_canon: PathBuf,
+        root_with_git_git_canon: PathBuf,
+        root_without_git_canon: PathBuf,
+    }
+
+    fn populate_tmpdir(tmp: &Path) -> PopulatedTmp {
+        let root_with_git = tmp.join("with_git");
+        let root_without_git = tmp.join("no_git");
+        fs::create_dir_all(&root_with_git).expect("create with_git");
+        fs::create_dir_all(&root_without_git).expect("create no_git");
+        fs::create_dir_all(root_with_git.join(".git")).expect("create .git");
+
+        // Ensure we have canonical paths for -D parameter matching.
+        let root_with_git_canon = root_with_git.canonicalize().expect("canonicalize with_git");
+        let root_with_git_git_canon = root_with_git_canon.join(".git");
+        let root_without_git_canon = root_without_git
+            .canonicalize()
+            .expect("canonicalize no_git");
+        PopulatedTmp {
+            root_with_git,
+            root_without_git,
+            root_with_git_canon,
+            root_with_git_git_canon,
+            root_without_git_canon,
+        }
+    }
+}
--- a/codex-rs/core/src/seatbelt_base_policy.sbpl
+++ b/codex-rs/core/src/seatbelt_base_policy.sbpl
@@ -65,3 +65,7 @@
  (sysctl-name "sysctl.proc_cputype")
  (sysctl-name-prefix "hw.perflevel")
 )
+
+; Added on top of Chrome profile
+; Needed for python multiprocessing on MacOS for the SemLock
+(allow ipc-posix-sem)
--- a/codex-rs/core/src/shell.rs
+++ b/codex-rs/core/src/shell.rs
@@ -0,0 +1,237 @@
+use shlex;
+
+#[derive(Debug, PartialEq, Eq)]
+pub struct ZshShell {
+    shell_path: String,
+    zshrc_path: String,
+}
+
+#[derive(Debug, PartialEq, Eq)]
+pub enum Shell {
+    Zsh(ZshShell),
+    Unknown,
+}
+
+impl Shell {
+    pub fn format_default_shell_invocation(&self, command: Vec<String>) -> Option<Vec<String>> {
+        match self {
+            Shell::Zsh(zsh) => {
+                if !std::path::Path::new(&zsh.zshrc_path).exists() {
+                    return None;
+                }
+
+                let mut result = vec![zsh.shell_path.clone()];
+                result.push("-lc".to_string());
+
+                let joined = strip_bash_lc(&command)
+                    .or_else(|| shlex::try_join(command.iter().map(|s| s.as_str())).ok());
+
+                if let Some(joined) = joined {
+                    result.push(format!("source {} && ({joined})", zsh.zshrc_path));
+                } else {
+                    return None;
+                }
+                Some(result)
+            }
+            Shell::Unknown => None,
+        }
+    }
+}
+
+fn strip_bash_lc(command: &Vec<String>) -> Option<String> {
+    match command.as_slice() {
+        // exactly three items
+        [first, second, third]
+            // first two must be "bash", "-lc"
+            if first == "bash" && second == "-lc" =>
+        {
+            Some(third.clone())
+        }
+        _ => None,
+    }
+}
+
+#[cfg(target_os = "macos")]
+pub async fn default_user_shell() -> Shell {
+    use tokio::process::Command;
+    use whoami;
+
+    let user = whoami::username();
+    let home = format!("/Users/{user}");
+    let output = Command::new("dscl")
+        .args([".", "-read", &home, "UserShell"])
+        .output()
+        .await
+        .ok();
+    match output {
+        Some(o) => {
+            if !o.status.success() {
+                return Shell::Unknown;
+            }
+            let stdout = String::from_utf8_lossy(&o.stdout);
+            for line in stdout.lines() {
+                if let Some(shell_path) = line.strip_prefix("UserShell: ") {
+                    if shell_path.ends_with("/zsh") {
+                        return Shell::Zsh(ZshShell {
+                            shell_path: shell_path.to_string(),
+                            zshrc_path: format!("{home}/.zshrc"),
+                        });
+                    }
+                }
+            }
+
+            Shell::Unknown
+        }
+        _ => Shell::Unknown,
+    }
+}
+
+#[cfg(not(target_os = "macos"))]
+pub async fn default_user_shell() -> Shell {
+    Shell::Unknown
+}
+
+#[cfg(test)]
+#[cfg(target_os = "macos")]
+mod tests {
+    use super::*;
+    use std::process::Command;
+
+    #[tokio::test]
+    #[expect(clippy::unwrap_used)]
+    async fn test_current_shell_detects_zsh() {
+        let shell = Command::new("sh")
+            .arg("-c")
+            .arg("echo $SHELL")
+            .output()
+            .unwrap();
+
+        let home = std::env::var("HOME").unwrap();
+        let shell_path = String::from_utf8_lossy(&shell.stdout).trim().to_string();
+        if shell_path.ends_with("/zsh") {
+            assert_eq!(
+                default_user_shell().await,
+                Shell::Zsh(ZshShell {
+                    shell_path: shell_path.to_string(),
+                    zshrc_path: format!("{home}/.zshrc",),
+                })
+            );
+        }
+    }
+
+    #[tokio::test]
+    async fn test_run_with_profile_zshrc_not_exists() {
+        let shell = Shell::Zsh(ZshShell {
+            shell_path: "/bin/zsh".to_string(),
+            zshrc_path: "/does/not/exist/.zshrc".to_string(),
+        });
+        let actual_cmd = shell.format_default_shell_invocation(vec!["myecho".to_string()]);
+        assert_eq!(actual_cmd, None);
+    }
+
+    #[expect(clippy::unwrap_used)]
+    #[tokio::test]
+    async fn test_run_with_profile_escaping_and_execution() {
+        let shell_path = "/bin/zsh";
+
+        let cases = vec![
+            (
+                vec!["myecho"],
+                vec![shell_path, "-lc", "source ZSHRC_PATH && (myecho)"],
+                Some("It works!\n"),
+            ),
+            (
+                vec!["myecho"],
+                vec![shell_path, "-lc", "source ZSHRC_PATH && (myecho)"],
+                Some("It works!\n"),
+            ),
+            (
+                vec!["bash", "-c", "echo 'single' \"double\""],
+                vec![
+                    shell_path,
+                    "-lc",
+                    "source ZSHRC_PATH && (bash -c \"echo 'single' \\\"double\\\"\")",
+                ],
+                Some("single double\n"),
+            ),
+            (
+                vec!["bash", "-lc", "echo 'single' \"double\""],
+                vec![
+                    shell_path,
+                    "-lc",
+                    "source ZSHRC_PATH && (echo 'single' \"double\")",
+                ],
+                Some("single double\n"),
+            ),
+        ];
+        for (input, expected_cmd, expected_output) in cases {
+            use std::collections::HashMap;
+            use std::path::PathBuf;
+            use std::sync::Arc;
+
+            use tokio::sync::Notify;
+
+            use crate::exec::ExecParams;
+            use crate::exec::SandboxType;
+            use crate::exec::process_exec_tool_call;
+            use crate::protocol::SandboxPolicy;
+
+            // create a temp directory with a zshrc file in it
+            let temp_home = tempfile::tempdir().unwrap();
+            let zshrc_path = temp_home.path().join(".zshrc");
+            std::fs::write(
+                &zshrc_path,
+                r#"
+                    set -x
+                    function myecho {
+                        echo 'It works!'
+                    }
+                    "#,
+            )
+            .unwrap();
+            let shell = Shell::Zsh(ZshShell {
+                shell_path: shell_path.to_string(),
+                zshrc_path: zshrc_path.to_str().unwrap().to_string(),
+            });
+
+            let actual_cmd = shell
+                .format_default_shell_invocation(input.iter().map(|s| s.to_string()).collect());
+            let expected_cmd = expected_cmd
+                .iter()
+                .map(|s| {
+                    s.replace("ZSHRC_PATH", zshrc_path.to_str().unwrap())
+                        .to_string()
+                })
+                .collect();
+
+            assert_eq!(actual_cmd, Some(expected_cmd));
+            // Actually run the command and check output/exit code
+            let output = process_exec_tool_call(
+                ExecParams {
+                    command: actual_cmd.unwrap(),
+                    cwd: PathBuf::from(temp_home.path()),
+                    timeout_ms: None,
+                    env: HashMap::from([(
+                        "HOME".to_string(),
+                        temp_home.path().to_str().unwrap().to_string(),
+                    )]),
+                },
+                SandboxType::None,
+                Arc::new(Notify::new()),
+                &SandboxPolicy::DangerFullAccess,
+                &None,
+                None,
+            )
+            .await
+            .unwrap();
+
+            assert_eq!(output.exit_code, 0, "input: {input:?} output: {output:?}");
+            if let Some(expected) = expected_output {
+                assert_eq!(
+                    output.stdout, expected,
+                    "input: {input:?} output: {output:?}"
+                );
+            }
+        }
+    }
+}
--- a/codex-rs/core/src/spawn.rs
+++ b/codex-rs/core/src/spawn.rs
@@ -0,0 +1,107 @@
+use std::collections::HashMap;
+use std::path::PathBuf;
+use std::process::Stdio;
+use tokio::process::Child;
+use tokio::process::Command;
+use tracing::trace;
+
+use crate::protocol::SandboxPolicy;
+
+/// Experimental environment variable that will be set to some non-empty value
+/// if both of the following are true:
+///
+/// 1. The process was spawned by Codex as part of a shell tool call.
+/// 2. SandboxPolicy.has_full_network_access() was false for the tool call.
+///
+/// We may try to have just one environment variable for all sandboxing
+/// attributes, so this may change in the future.
+pub const CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR: &str = "CODEX_SANDBOX_NETWORK_DISABLED";
+
+/// Should be set when the process is spawned under a sandbox. Currently, the
+/// value is "seatbelt" for macOS, but it may change in the future to
+/// accommodate sandboxing configuration and other sandboxing mechanisms.
+pub const CODEX_SANDBOX_ENV_VAR: &str = "CODEX_SANDBOX";
+
+#[derive(Debug, Clone, Copy)]
+pub enum StdioPolicy {
+    RedirectForShellTool,
+    Inherit,
+}
+
+/// Spawns the appropriate child process for the ExecParams and SandboxPolicy,
+/// ensuring the args and environment variables used to create the `Command`
+/// (and `Child`) honor the configuration.
+///
+/// For now, we take `SandboxPolicy` as a parameter to spawn_child() because
+/// we need to determine whether to set the
+/// `CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR` environment variable.
+pub(crate) async fn spawn_child_async(
+    program: PathBuf,
+    args: Vec<String>,
+    #[cfg_attr(not(unix), allow(unused_variables))] arg0: Option<&str>,
+    cwd: PathBuf,
+    sandbox_policy: &SandboxPolicy,
+    stdio_policy: StdioPolicy,
+    env: HashMap<String, String>,
+) -> std::io::Result<Child> {
+    trace!(
+        "spawn_child_async: {program:?} {args:?} {arg0:?} {cwd:?} {sandbox_policy:?} {stdio_policy:?} {env:?}"
+    );
+
+    let mut cmd = Command::new(&program);
+    #[cfg(unix)]
+    cmd.arg0(arg0.map_or_else(|| program.to_string_lossy().to_string(), String::from));
+    cmd.args(args);
+    cmd.current_dir(cwd);
+    cmd.env_clear();
+    cmd.envs(env);
+
+    if !sandbox_policy.has_full_network_access() {
+        cmd.env(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR, "1");
+    }
+
+    // If this Codex process dies (including being killed via SIGKILL), we want
+    // any child processes that were spawned as part of a `"shell"` tool call
+    // to also be terminated.
+
+    // This relies on prctl(2), so it only works on Linux.
+    #[cfg(target_os = "linux")]
+    unsafe {
+        cmd.pre_exec(|| {
+            // This prctl call effectively requests, "deliver SIGTERM when my
+            // current parent dies."
+            if libc::prctl(libc::PR_SET_PDEATHSIG, libc::SIGTERM) == -1 {
+                return Err(std::io::Error::last_os_error());
+            }
+
+            // Though if there was a race condition and this pre_exec() block is
+            // run _after_ the parent (i.e., the Codex process) has already
+            // exited, then the parent is the _init_ process (which will never
+            // die), so we should just terminate the child process now.
+            if libc::getppid() == 1 {
+                libc::raise(libc::SIGTERM);
+            }
+            Ok(())
+        });
+    }
+
+    match stdio_policy {
+        StdioPolicy::RedirectForShellTool => {
+            // Do not create a file descriptor for stdin because otherwise some
+            // commands may hang forever waiting for input. For example, ripgrep has
+            // a heuristic where it may try to read from stdin as explained here:
+            // https://github.com/BurntSushi/ripgrep/blob/e2362d4d5185d02fa857bf381e7bd52e66fafc73/crates/core/flags/hiargs.rs#L1101-L1103
+            cmd.stdin(Stdio::null());
+
+            cmd.stdout(Stdio::piped()).stderr(Stdio::piped());
+        }
+        StdioPolicy::Inherit => {
+            // Inherit stdin, stdout, and stderr from the parent process.
+            cmd.stdin(Stdio::inherit())
+                .stdout(Stdio::inherit())
+                .stderr(Stdio::inherit());
+        }
+    }
+
+    cmd.kill_on_drop(true).spawn()
+}
--- a/codex-rs/core/tests/cli_stream.rs
+++ b/codex-rs/core/tests/cli_stream.rs
@@ -1,8 +1,12 @@
 #![expect(clippy::unwrap_used)]

 use assert_cmd::Command as AssertCommand;
-use codex_core::exec::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR;
+use codex_core::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR;
+use std::time::Duration;
+use std::time::Instant;
 use tempfile::TempDir;
+use uuid::Uuid;
+use walkdir::WalkDir;
 use wiremock::Mock;
 use wiremock::MockServer;
 use wiremock::ResponseTemplate;
@@ -71,12 +75,102 @@ async fn chat_mode_stream_cli() {
    println!("Stderr:\n{}", String::from_utf8_lossy(&output.stderr));
    assert!(output.status.success());
    let stdout = String::from_utf8_lossy(&output.stdout);
-    assert!(stdout.contains("hi"));
-    assert_eq!(stdout.matches("hi").count(), 1);
+    let hi_lines = stdout.lines().filter(|line| line.trim() == "hi").count();
+    assert_eq!(hi_lines, 1, "Expected exactly one line with 'hi'");

    server.verify().await;
 }

+/// Verify that passing `-c experimental_instructions_file=...` to the CLI
+/// overrides the built-in base instructions by inspecting the request body
+/// received by a mock OpenAI Responses endpoint.
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn exec_cli_applies_experimental_instructions_file() {
+    if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
+        println!(
+            "Skipping test because it cannot execute when network is disabled in a Codex sandbox."
+        );
+        return;
+    }
+
+    // Start mock server which will capture the request and return a minimal
+    // SSE stream for a single turn.
+    let server = MockServer::start().await;
+    let sse = concat!(
+        "data: {\"type\":\"response.created\",\"response\":{}}\n\n",
+        "data: {\"type\":\"response.completed\",\"response\":{\"id\":\"r1\"}}\n\n"
+    );
+    Mock::given(method("POST"))
+        .and(path("/v1/responses"))
+        .respond_with(
+            ResponseTemplate::new(200)
+                .insert_header("content-type", "text/event-stream")
+                .set_body_raw(sse, "text/event-stream"),
+        )
+        .expect(1)
+        .mount(&server)
+        .await;
+
+    // Create a temporary instructions file with a unique marker we can assert
+    // appears in the outbound request payload.
+    let custom = TempDir::new().unwrap();
+    let marker = "cli-experimental-instructions-marker";
+    let custom_path = custom.path().join("instr.md");
+    std::fs::write(&custom_path, marker).unwrap();
+    let custom_path_str = custom_path.to_string_lossy().replace('\\', "/");
+
+    // Build a provider override that points at the mock server and instructs
+    // Codex to use the Responses API with the dummy env var.
+    let provider_override = format!(
+        "model_providers.mock={{ name = \"mock\", base_url = \"{}/v1\", env_key = \"PATH\", wire_api = \"responses\" }}",
+        server.uri()
+    );
+
+    let home = TempDir::new().unwrap();
+    let mut cmd = AssertCommand::new("cargo");
+    cmd.arg("run")
+        .arg("-p")
+        .arg("codex-cli")
+        .arg("--quiet")
+        .arg("--")
+        .arg("exec")
+        .arg("--skip-git-repo-check")
+        .arg("-c")
+        .arg(&provider_override)
+        .arg("-c")
+        .arg("model_provider=\"mock\"")
+        .arg("-c")
+        .arg(format!(
+            "experimental_instructions_file=\"{custom_path_str}\""
+        ))
+        .arg("-C")
+        .arg(env!("CARGO_MANIFEST_DIR"))
+        .arg("hello?\n");
+    cmd.env("CODEX_HOME", home.path())
+        .env("OPENAI_API_KEY", "dummy")
+        .env("OPENAI_BASE_URL", format!("{}/v1", server.uri()));
+
+    let output = cmd.output().unwrap();
+    println!("Status: {}", output.status);
+    println!("Stdout:\n{}", String::from_utf8_lossy(&output.stdout));
+    println!("Stderr:\n{}", String::from_utf8_lossy(&output.stderr));
+    assert!(output.status.success());
+
+    // Inspect the captured request and verify our custom base instructions were
+    // included in the `instructions` field.
+    let request = &server.received_requests().await.unwrap()[0];
+    let body = request.body_json::<serde_json::Value>().unwrap();
+    let instructions = body
+        .get("instructions")
+        .and_then(|v| v.as_str())
+        .unwrap_or_default()
+        .to_string();
+    assert!(
+        instructions.contains(marker),
+        "instructions did not contain custom marker; got: {instructions}"
+    );
+}
+
 /// Tests streaming responses through the CLI using a local SSE fixture file.
 /// This test:
 /// 1. Uses a pre-recorded SSE response fixture instead of a live server
@@ -117,3 +211,375 @@ async fn responses_api_stream_cli() {
    let stdout = String::from_utf8_lossy(&output.stdout);
    assert!(stdout.contains("fixture hello"));
 }
+
+/// End-to-end: create a session (writes rollout), verify the file, then resume and confirm append.
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn integration_creates_and_checks_session_file() {
+    // Honor sandbox network restrictions for CI parity with the other tests.
+    if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
+        println!(
+            "Skipping test because it cannot execute when network is disabled in a Codex sandbox."
+        );
+        return;
+    }
+
+    // 1. Temp home so we read/write isolated session files.
+    let home = TempDir::new().unwrap();
+
+    // 2. Unique marker we'll look for in the session log.
+    let marker = format!("integration-test-{}", Uuid::new_v4());
+    let prompt = format!("echo {marker}");
+
+    // 3. Use the same offline SSE fixture as responses_api_stream_cli so the test is hermetic.
+    let fixture =
+        std::path::Path::new(env!("CARGO_MANIFEST_DIR")).join("tests/cli_responses_fixture.sse");
+
+    // 4. Run the codex CLI through cargo (ensures the right bin is built) and invoke `exec`,
+    //    which is what records a session.
+    let mut cmd = AssertCommand::new("cargo");
+    cmd.arg("run")
+        .arg("-p")
+        .arg("codex-cli")
+        .arg("--quiet")
+        .arg("--")
+        .arg("exec")
+        .arg("--skip-git-repo-check")
+        .arg("-C")
+        .arg(env!("CARGO_MANIFEST_DIR"))
+        .arg(&prompt);
+    cmd.env("CODEX_HOME", home.path())
+        .env("OPENAI_API_KEY", "dummy")
+        .env("CODEX_RS_SSE_FIXTURE", &fixture)
+        // Required for CLI arg parsing even though fixture short-circuits network usage.
+        .env("OPENAI_BASE_URL", "http://unused.local");
+
+    let output = cmd.output().unwrap();
+    assert!(
+        output.status.success(),
+        "codex-cli exec failed: {}",
+        String::from_utf8_lossy(&output.stderr)
+    );
+
+    // Wait for sessions dir to appear.
+    let sessions_dir = home.path().join("sessions");
+    let dir_deadline = Instant::now() + Duration::from_secs(5);
+    while !sessions_dir.exists() && Instant::now() < dir_deadline {
+        std::thread::sleep(Duration::from_millis(50));
+    }
+    assert!(sessions_dir.exists(), "sessions directory never appeared");
+
+    // Find the session file that contains `marker`.
+    let deadline = Instant::now() + Duration::from_secs(10);
+    let mut matching_path: Option<std::path::PathBuf> = None;
+    while Instant::now() < deadline && matching_path.is_none() {
+        for entry in WalkDir::new(&sessions_dir) {
+            let entry = match entry {
+                Ok(e) => e,
+                Err(_) => continue,
+            };
+            if !entry.file_type().is_file() {
+                continue;
+            }
+            if !entry.file_name().to_string_lossy().ends_with(".jsonl") {
+                continue;
+            }
+            let path = entry.path();
+            let Ok(content) = std::fs::read_to_string(path) else {
+                continue;
+            };
+            let mut lines = content.lines();
+            if lines.next().is_none() {
+                continue;
+            }
+            for line in lines {
+                if line.trim().is_empty() {
+                    continue;
+                }
+                let item: serde_json::Value = match serde_json::from_str(line) {
+                    Ok(v) => v,
+                    Err(_) => continue,
+                };
+                if item.get("type").and_then(|t| t.as_str()) == Some("message") {
+                    if let Some(c) = item.get("content") {
+                        if c.to_string().contains(&marker) {
+                            matching_path = Some(path.to_path_buf());
+                            break;
+                        }
+                    }
+                }
+            }
+        }
+        if matching_path.is_none() {
+            std::thread::sleep(Duration::from_millis(50));
+        }
+    }
+
+    let path = match matching_path {
+        Some(p) => p,
+        None => panic!("No session file containing the marker was found"),
+    };
+
+    // Basic sanity checks on location and metadata.
+    let rel = match path.strip_prefix(&sessions_dir) {
+        Ok(r) => r,
+        Err(_) => panic!("session file should live under sessions/"),
+    };
+    let comps: Vec<String> = rel
+        .components()
+        .map(|c| c.as_os_str().to_string_lossy().into_owned())
+        .collect();
+    assert_eq!(
+        comps.len(),
+        4,
+        "Expected sessions/YYYY/MM/DD/<file>, got {rel:?}"
+    );
+    let year = &comps[0];
+    let month = &comps[1];
+    let day = &comps[2];
+    assert!(
+        year.len() == 4 && year.chars().all(|c| c.is_ascii_digit()),
+        "Year dir not 4-digit numeric: {year}"
+    );
+    assert!(
+        month.len() == 2 && month.chars().all(|c| c.is_ascii_digit()),
+        "Month dir not zero-padded 2-digit numeric: {month}"
+    );
+    assert!(
+        day.len() == 2 && day.chars().all(|c| c.is_ascii_digit()),
+        "Day dir not zero-padded 2-digit numeric: {day}"
+    );
+    if let Ok(m) = month.parse::<u8>() {
+        assert!((1..=12).contains(&m), "Month out of range: {m}");
+    }
+    if let Ok(d) = day.parse::<u8>() {
+        assert!((1..=31).contains(&d), "Day out of range: {d}");
+    }
+
+    let content =
+        std::fs::read_to_string(&path).unwrap_or_else(|_| panic!("Failed to read session file"));
+    let mut lines = content.lines();
+    let meta_line = lines
+        .next()
+        .ok_or("missing session meta line")
+        .unwrap_or_else(|_| panic!("missing session meta line"));
+    let meta: serde_json::Value = serde_json::from_str(meta_line)
+        .unwrap_or_else(|_| panic!("Failed to parse session meta line as JSON"));
+    assert!(meta.get("id").is_some(), "SessionMeta missing id");
+    assert!(
+        meta.get("timestamp").is_some(),
+        "SessionMeta missing timestamp"
+    );
+
+    let mut found_message = false;
+    for line in lines {
+        if line.trim().is_empty() {
+            continue;
+        }
+        let Ok(item) = serde_json::from_str::<serde_json::Value>(line) else {
+            continue;
+        };
+        if item.get("type").and_then(|t| t.as_str()) == Some("message") {
+            if let Some(c) = item.get("content") {
+                if c.to_string().contains(&marker) {
+                    found_message = true;
+                    break;
+                }
+            }
+        }
+    }
+    assert!(
+        found_message,
+        "No message found in session file containing the marker"
+    );
+
+    // Second run: resume and append.
+    let orig_len = content.lines().count();
+    let marker2 = format!("integration-resume-{}", Uuid::new_v4());
+    let prompt2 = format!("echo {marker2}");
+    // Cross‑platform safe resume override.  On Windows, backslashes in a TOML string must be escaped
+    // or the parse will fail and the raw literal (including quotes) may be preserved all the way down
+    // to Config, which in turn breaks resume because the path is invalid. Normalize to forward slashes
+    // to sidestep the issue.
+    let resume_path_str = path.to_string_lossy().replace('\\', "/");
+    let resume_override = format!("experimental_resume=\"{resume_path_str}\"");
+    let mut cmd2 = AssertCommand::new("cargo");
+    cmd2.arg("run")
+        .arg("-p")
+        .arg("codex-cli")
+        .arg("--quiet")
+        .arg("--")
+        .arg("exec")
+        .arg("--skip-git-repo-check")
+        .arg("-c")
+        .arg(&resume_override)
+        .arg("-C")
+        .arg(env!("CARGO_MANIFEST_DIR"))
+        .arg(&prompt2);
+    cmd2.env("CODEX_HOME", home.path())
+        .env("OPENAI_API_KEY", "dummy")
+        .env("CODEX_RS_SSE_FIXTURE", &fixture)
+        .env("OPENAI_BASE_URL", "http://unused.local");
+
+    let output2 = cmd2.output().unwrap();
+    assert!(output2.status.success(), "resume codex-cli run failed");
+
+    // The rollout writer runs on a background async task; give it a moment to flush.
+    let mut new_len = orig_len;
+    let deadline = Instant::now() + Duration::from_secs(5);
+    let mut content2 = String::new();
+    while Instant::now() < deadline {
+        if let Ok(c) = std::fs::read_to_string(&path) {
+            let count = c.lines().count();
+            if count > orig_len {
+                content2 = c;
+                new_len = count;
+                break;
+            }
+        }
+        std::thread::sleep(Duration::from_millis(50));
+    }
+    if content2.is_empty() {
+        // last attempt
+        content2 = std::fs::read_to_string(&path).unwrap();
+        new_len = content2.lines().count();
+    }
+    assert!(new_len > orig_len, "rollout file did not grow after resume");
+    assert!(content2.contains(&marker), "rollout lost original marker");
+    assert!(
+        content2.contains(&marker2),
+        "rollout missing resumed marker"
+    );
+}
+
+/// Integration test to verify git info is collected and recorded in session files.
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn integration_git_info_unit_test() {
+    // This test verifies git info collection works independently
+    // without depending on the full CLI integration
+
+    // 1. Create temp directory for git repo
+    let temp_dir = TempDir::new().unwrap();
+    let git_repo = temp_dir.path().to_path_buf();
+    let envs = vec![
+        ("GIT_CONFIG_GLOBAL", "/dev/null"),
+        ("GIT_CONFIG_NOSYSTEM", "1"),
+    ];
+
+    // 2. Initialize a git repository with some content
+    let init_output = std::process::Command::new("git")
+        .envs(envs.clone())
+        .args(["init"])
+        .current_dir(&git_repo)
+        .output()
+        .unwrap();
+    assert!(init_output.status.success(), "git init failed");
+
+    // Configure git user (required for commits)
+    std::process::Command::new("git")
+        .envs(envs.clone())
+        .args(["config", "user.name", "Integration Test"])
+        .current_dir(&git_repo)
+        .output()
+        .unwrap();
+
+    std::process::Command::new("git")
+        .envs(envs.clone())
+        .args(["config", "user.email", "test@example.com"])
+        .current_dir(&git_repo)
+        .output()
+        .unwrap();
+
+    // Create a test file and commit it
+    let test_file = git_repo.join("test.txt");
+    std::fs::write(&test_file, "integration test content").unwrap();
+
+    std::process::Command::new("git")
+        .envs(envs.clone())
+        .args(["add", "."])
+        .current_dir(&git_repo)
+        .output()
+        .unwrap();
+
+    let commit_output = std::process::Command::new("git")
+        .envs(envs.clone())
+        .args(["commit", "-m", "Integration test commit"])
+        .current_dir(&git_repo)
+        .output()
+        .unwrap();
+    assert!(commit_output.status.success(), "git commit failed");
+
+    // Create a branch to test branch detection
+    std::process::Command::new("git")
+        .envs(envs.clone())
+        .args(["checkout", "-b", "integration-test-branch"])
+        .current_dir(&git_repo)
+        .output()
+        .unwrap();
+
+    // Add a remote to test repository URL detection
+    std::process::Command::new("git")
+        .envs(envs.clone())
+        .args([
+            "remote",
+            "add",
+            "origin",
+            "https://github.com/example/integration-test.git",
+        ])
+        .current_dir(&git_repo)
+        .output()
+        .unwrap();
+
+    // 3. Test git info collection directly
+    let git_info = codex_core::git_info::collect_git_info(&git_repo).await;
+
+    // 4. Verify git info is present and contains expected data
+    assert!(git_info.is_some(), "Git info should be collected");
+
+    let git_info = git_info.unwrap();
+
+    // Check that we have a commit hash
+    assert!(
+        git_info.commit_hash.is_some(),
+        "Git info should contain commit_hash"
+    );
+    let commit_hash = git_info.commit_hash.as_ref().unwrap();
+    assert_eq!(commit_hash.len(), 40, "Commit hash should be 40 characters");
+    assert!(
+        commit_hash.chars().all(|c| c.is_ascii_hexdigit()),
+        "Commit hash should be hexadecimal"
+    );
+
+    // Check that we have the correct branch
+    assert!(git_info.branch.is_some(), "Git info should contain branch");
+    let branch = git_info.branch.as_ref().unwrap();
+    assert_eq!(
+        branch, "integration-test-branch",
+        "Branch should match what we created"
+    );
+
+    // Check that we have the repository URL
+    assert!(
+        git_info.repository_url.is_some(),
+        "Git info should contain repository_url"
+    );
+    let repo_url = git_info.repository_url.as_ref().unwrap();
+    assert_eq!(
+        repo_url, "https://github.com/example/integration-test.git",
+        "Repository URL should match what we configured"
+    );
+
+    println!("✅ Git info collection test passed!");
+    println!("   Commit: {commit_hash}");
+    println!("   Branch: {branch}");
+    println!("   Repo: {repo_url}");
+
+    // 5. Test serialization to ensure it works in SessionMeta
+    let serialized = serde_json::to_string(&git_info).unwrap();
+    let deserialized: codex_core::git_info::GitInfo = serde_json::from_str(&serialized).unwrap();
+
+    assert_eq!(git_info.commit_hash, deserialized.commit_hash);
+    assert_eq!(git_info.branch, deserialized.branch);
+    assert_eq!(git_info.repository_url, deserialized.repository_url);
+
+    println!("✅ Git info serialization test passed!");
+}
--- a/codex-rs/core/tests/client.rs
+++ b/codex-rs/core/tests/client.rs
@@ -0,0 +1,395 @@
+use std::path::PathBuf;
+
+use chrono::Utc;
+use codex_core::Codex;
+use codex_core::CodexSpawnOk;
+use codex_core::ModelProviderInfo;
+use codex_core::built_in_model_providers;
+use codex_core::protocol::EventMsg;
+use codex_core::protocol::InputItem;
+use codex_core::protocol::Op;
+use codex_core::protocol::SessionConfiguredEvent;
+use codex_core::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR;
+use codex_login::AuthDotJson;
+use codex_login::AuthMode;
+use codex_login::CodexAuth;
+use codex_login::TokenData;
+use core_test_support::load_default_config_for_test;
+use core_test_support::load_sse_fixture_with_id;
+use core_test_support::wait_for_event;
+use tempfile::TempDir;
+use wiremock::Mock;
+use wiremock::MockServer;
+use wiremock::ResponseTemplate;
+use wiremock::matchers::method;
+use wiremock::matchers::path;
+
+/// Build minimal SSE stream with completed marker using the JSON fixture.
+fn sse_completed(id: &str) -> String {
+    load_sse_fixture_with_id("tests/fixtures/completed_template.json", id)
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn includes_session_id_and_model_headers_in_request() {
+    #![allow(clippy::unwrap_used)]
+
+    if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
+        println!(
+            "Skipping test because it cannot execute when network is disabled in a Codex sandbox."
+        );
+        return;
+    }
+
+    // Mock server
+    let server = MockServer::start().await;
+
+    // First request – must NOT include `previous_response_id`.
+    let first = ResponseTemplate::new(200)
+        .insert_header("content-type", "text/event-stream")
+        .set_body_raw(sse_completed("resp1"), "text/event-stream");
+
+    Mock::given(method("POST"))
+        .and(path("/v1/responses"))
+        .respond_with(first)
+        .expect(1)
+        .mount(&server)
+        .await;
+
+    let model_provider = ModelProviderInfo {
+        base_url: Some(format!("{}/v1", server.uri())),
+        ..built_in_model_providers()["openai"].clone()
+    };
+
+    // Init session
+    let codex_home = TempDir::new().unwrap();
+    let mut config = load_default_config_for_test(&codex_home);
+    config.model_provider = model_provider;
+
+    let ctrl_c = std::sync::Arc::new(tokio::sync::Notify::new());
+    let CodexSpawnOk { codex, .. } = Codex::spawn(
+        config,
+        Some(CodexAuth::from_api_key("Test API Key".to_string())),
+        ctrl_c.clone(),
+    )
+    .await
+    .unwrap();
+
+    codex
+        .submit(Op::UserInput {
+            items: vec![InputItem::Text {
+                text: "hello".into(),
+            }],
+        })
+        .await
+        .unwrap();
+
+    let EventMsg::SessionConfigured(SessionConfiguredEvent { session_id, .. }) =
+        wait_for_event(&codex, |ev| matches!(ev, EventMsg::SessionConfigured(_))).await
+    else {
+        unreachable!()
+    };
+
+    let current_session_id = Some(session_id.to_string());
+    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
+
+    // get request from the server
+    let request = &server.received_requests().await.unwrap()[0];
+    let request_session_id = request.headers.get("session_id").unwrap();
+    let request_authorization = request.headers.get("authorization").unwrap();
+    let request_originator = request.headers.get("originator").unwrap();
+
+    assert!(current_session_id.is_some());
+    assert_eq!(
+        request_session_id.to_str().unwrap(),
+        current_session_id.as_ref().unwrap()
+    );
+    assert_eq!(request_originator.to_str().unwrap(), "codex_cli_rs");
+    assert_eq!(
+        request_authorization.to_str().unwrap(),
+        "Bearer Test API Key"
+    );
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn includes_base_instructions_override_in_request() {
+    #![allow(clippy::unwrap_used)]
+
+    // Mock server
+    let server = MockServer::start().await;
+
+    // First request – must NOT include `previous_response_id`.
+    let first = ResponseTemplate::new(200)
+        .insert_header("content-type", "text/event-stream")
+        .set_body_raw(sse_completed("resp1"), "text/event-stream");
+
+    Mock::given(method("POST"))
+        .and(path("/v1/responses"))
+        .respond_with(first)
+        .expect(1)
+        .mount(&server)
+        .await;
+
+    let model_provider = ModelProviderInfo {
+        base_url: Some(format!("{}/v1", server.uri())),
+        ..built_in_model_providers()["openai"].clone()
+    };
+    let codex_home = TempDir::new().unwrap();
+    let mut config = load_default_config_for_test(&codex_home);
+
+    config.base_instructions = Some("test instructions".to_string());
+    config.model_provider = model_provider;
+
+    let ctrl_c = std::sync::Arc::new(tokio::sync::Notify::new());
+    let CodexSpawnOk { codex, .. } = Codex::spawn(
+        config,
+        Some(CodexAuth::from_api_key("Test API Key".to_string())),
+        ctrl_c.clone(),
+    )
+    .await
+    .unwrap();
+
+    codex
+        .submit(Op::UserInput {
+            items: vec![InputItem::Text {
+                text: "hello".into(),
+            }],
+        })
+        .await
+        .unwrap();
+
+    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
+
+    let request = &server.received_requests().await.unwrap()[0];
+    let request_body = request.body_json::<serde_json::Value>().unwrap();
+
+    assert!(
+        request_body["instructions"]
+            .as_str()
+            .unwrap()
+            .contains("test instructions")
+    );
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn originator_config_override_is_used() {
+    #![allow(clippy::unwrap_used)]
+
+    // Mock server
+    let server = MockServer::start().await;
+
+    let first = ResponseTemplate::new(200)
+        .insert_header("content-type", "text/event-stream")
+        .set_body_raw(sse_completed("resp1"), "text/event-stream");
+
+    Mock::given(method("POST"))
+        .and(path("/v1/responses"))
+        .respond_with(first)
+        .expect(1)
+        .mount(&server)
+        .await;
+
+    let model_provider = ModelProviderInfo {
+        base_url: Some(format!("{}/v1", server.uri())),
+        ..built_in_model_providers()["openai"].clone()
+    };
+
+    let codex_home = TempDir::new().unwrap();
+    let mut config = load_default_config_for_test(&codex_home);
+    config.model_provider = model_provider;
+    config.internal_originator = Some("my_override".to_string());
+
+    let ctrl_c = std::sync::Arc::new(tokio::sync::Notify::new());
+    let CodexSpawnOk { codex, .. } = Codex::spawn(
+        config,
+        Some(CodexAuth::from_api_key("Test API Key".to_string())),
+        ctrl_c.clone(),
+    )
+    .await
+    .unwrap();
+
+    codex
+        .submit(Op::UserInput {
+            items: vec![InputItem::Text {
+                text: "hello".into(),
+            }],
+        })
+        .await
+        .unwrap();
+
+    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
+
+    let request = &server.received_requests().await.unwrap()[0];
+    let request_originator = request.headers.get("originator").unwrap();
+    assert_eq!(request_originator.to_str().unwrap(), "my_override");
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn chatgpt_auth_sends_correct_request() {
+    #![allow(clippy::unwrap_used)]
+
+    if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
+        println!(
+            "Skipping test because it cannot execute when network is disabled in a Codex sandbox."
+        );
+        return;
+    }
+
+    // Mock server
+    let server = MockServer::start().await;
+
+    // First request – must NOT include `previous_response_id`.
+    let first = ResponseTemplate::new(200)
+        .insert_header("content-type", "text/event-stream")
+        .set_body_raw(sse_completed("resp1"), "text/event-stream");
+
+    Mock::given(method("POST"))
+        .and(path("/api/codex/responses"))
+        .respond_with(first)
+        .expect(1)
+        .mount(&server)
+        .await;
+
+    let model_provider = ModelProviderInfo {
+        base_url: Some(format!("{}/api/codex", server.uri())),
+        ..built_in_model_providers()["openai"].clone()
+    };
+
+    // Init session
+    let codex_home = TempDir::new().unwrap();
+    let mut config = load_default_config_for_test(&codex_home);
+    config.model_provider = model_provider;
+    let ctrl_c = std::sync::Arc::new(tokio::sync::Notify::new());
+    let CodexSpawnOk { codex, .. } = Codex::spawn(
+        config,
+        Some(auth_from_token("Access Token".to_string())),
+        ctrl_c.clone(),
+    )
+    .await
+    .unwrap();
+
+    codex
+        .submit(Op::UserInput {
+            items: vec![InputItem::Text {
+                text: "hello".into(),
+            }],
+        })
+        .await
+        .unwrap();
+
+    let EventMsg::SessionConfigured(SessionConfiguredEvent { session_id, .. }) =
+        wait_for_event(&codex, |ev| matches!(ev, EventMsg::SessionConfigured(_))).await
+    else {
+        unreachable!()
+    };
+
+    let current_session_id = Some(session_id.to_string());
+    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
+
+    // get request from the server
+    let request = &server.received_requests().await.unwrap()[0];
+    let request_session_id = request.headers.get("session_id").unwrap();
+    let request_authorization = request.headers.get("authorization").unwrap();
+    let request_originator = request.headers.get("originator").unwrap();
+    let request_chatgpt_account_id = request.headers.get("chatgpt-account-id").unwrap();
+    let request_body = request.body_json::<serde_json::Value>().unwrap();
+
+    assert!(current_session_id.is_some());
+    assert_eq!(
+        request_session_id.to_str().unwrap(),
+        current_session_id.as_ref().unwrap()
+    );
+    assert_eq!(request_originator.to_str().unwrap(), "codex_cli_rs");
+    assert_eq!(
+        request_authorization.to_str().unwrap(),
+        "Bearer Access Token"
+    );
+    assert_eq!(request_chatgpt_account_id.to_str().unwrap(), "account_id");
+    assert!(!request_body["store"].as_bool().unwrap());
+    assert!(request_body["stream"].as_bool().unwrap());
+    assert_eq!(
+        request_body["include"][0].as_str().unwrap(),
+        "reasoning.encrypted_content"
+    );
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn includes_user_instructions_message_in_request() {
+    #![allow(clippy::unwrap_used)]
+
+    let server = MockServer::start().await;
+
+    let first = ResponseTemplate::new(200)
+        .insert_header("content-type", "text/event-stream")
+        .set_body_raw(sse_completed("resp1"), "text/event-stream");
+
+    Mock::given(method("POST"))
+        .and(path("/v1/responses"))
+        .respond_with(first)
+        .expect(1)
+        .mount(&server)
+        .await;
+
+    let model_provider = ModelProviderInfo {
+        base_url: Some(format!("{}/v1", server.uri())),
+        ..built_in_model_providers()["openai"].clone()
+    };
+
+    let codex_home = TempDir::new().unwrap();
+    let mut config = load_default_config_for_test(&codex_home);
+    config.model_provider = model_provider;
+    config.user_instructions = Some("be nice".to_string());
+
+    let ctrl_c = std::sync::Arc::new(tokio::sync::Notify::new());
+    let CodexSpawnOk { codex, .. } = Codex::spawn(
+        config,
+        Some(CodexAuth::from_api_key("Test API Key".to_string())),
+        ctrl_c.clone(),
+    )
+    .await
+    .unwrap();
+
+    codex
+        .submit(Op::UserInput {
+            items: vec![InputItem::Text {
+                text: "hello".into(),
+            }],
+        })
+        .await
+        .unwrap();
+
+    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
+
+    let request = &server.received_requests().await.unwrap()[0];
+    let request_body = request.body_json::<serde_json::Value>().unwrap();
+
+    assert!(
+        !request_body["instructions"]
+            .as_str()
+            .unwrap()
+            .contains("be nice")
+    );
+    assert_eq!(request_body["input"][0]["role"], "user");
+    assert!(
+        request_body["input"][0]["content"][0]["text"]
+            .as_str()
+            .unwrap()
+            .starts_with("be nice")
+    );
+}
+fn auth_from_token(id_token: String) -> CodexAuth {
+    CodexAuth::new(
+        None,
+        AuthMode::ChatGPT,
+        PathBuf::new(),
+        Some(AuthDotJson {
+            openai_api_key: None,
+            tokens: Some(TokenData {
+                id_token,
+                access_token: "Access Token".to_string(),
+                refresh_token: "test".to_string(),
+                account_id: Some("account_id".to_string()),
+            }),
+            last_refresh: Some(Utc::now()),
+        }),
+    )
+}
--- a/codex-rs/core/tests/common/Cargo.toml
+++ b/codex-rs/core/tests/common/Cargo.toml
@@ -0,0 +1,13 @@
+[package]
+name = "core_test_support"
+version = { workspace = true }
+edition = "2024"
+
+[lib]
+path = "lib.rs"
+
+[dependencies]
+codex-core = { path = "../.." }
+serde_json = "1"
+tempfile = "3"
+tokio = { version = "1", features = ["time"] }
--- a/codex-rs/core/tests/test_support.rs
+++ b/codex-rs/core/tests/test_support.rs
@@ -1,9 +1,5 @@
 #![allow(clippy::expect_used)]

-// Helpers shared by the integration tests.  These are located inside the
-// `tests/` tree on purpose so they never become part of the public API surface
-// of the `codex-core` crate.
-
 use tempfile::TempDir;

 use codex_core::config::Config;
@@ -30,7 +26,6 @@ pub fn load_default_config_for_test(codex_home: &TempDir) -> Config {
 /// with only a `type` field results in an event with no `data:` section. This
 /// makes it trivial to extend the fixtures as OpenAI adds new event kinds or
 /// fields.
-#[allow(dead_code)]
 pub fn load_sse_fixture(path: impl AsRef<std::path::Path>) -> String {
    let events: Vec<serde_json::Value> =
        serde_json::from_reader(std::fs::File::open(path).expect("read fixture"))
@@ -55,7 +50,6 @@ pub fn load_sse_fixture(path: impl AsRef<std::path::Path>) -> String {
 /// fixture template with the supplied identifier before parsing. This lets a
 /// single JSON template be reused by multiple tests that each need a unique
 /// `response_id`.
-#[allow(dead_code)]
 pub fn load_sse_fixture_with_id(path: impl AsRef<std::path::Path>, id: &str) -> String {
    let raw = std::fs::read_to_string(path).expect("read fixture template");
    let replaced = raw.replace("__ID__", id);
@@ -76,3 +70,23 @@ pub fn load_sse_fixture_with_id(path: impl AsRef<std::path::Path>, id: &str) ->
        })
        .collect()
 }
+
+pub async fn wait_for_event<F>(
+    codex: &codex_core::Codex,
+    mut predicate: F,
+) -> codex_core::protocol::EventMsg
+where
+    F: FnMut(&codex_core::protocol::EventMsg) -> bool,
+{
+    use tokio::time::Duration;
+    use tokio::time::timeout;
+    loop {
+        let ev = timeout(Duration::from_secs(1), codex.next_event())
+            .await
+            .expect("timeout waiting for event")
+            .expect("stream ended unexpectedly");
+        if predicate(&ev.msg) {
+            return ev.msg;
+        }
+    }
+}
--- a/codex-rs/core/tests/compact.rs
+++ b/codex-rs/core/tests/compact.rs
@@ -0,0 +1,254 @@
+#![expect(clippy::unwrap_used)]
+
+use codex_core::Codex;
+use codex_core::CodexSpawnOk;
+use codex_core::ModelProviderInfo;
+use codex_core::built_in_model_providers;
+use codex_core::protocol::EventMsg;
+use codex_core::protocol::InputItem;
+use codex_core::protocol::Op;
+use codex_core::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR;
+use codex_login::CodexAuth;
+use core_test_support::load_default_config_for_test;
+use core_test_support::wait_for_event;
+use serde_json::Value;
+use tempfile::TempDir;
+use wiremock::Mock;
+use wiremock::MockServer;
+use wiremock::ResponseTemplate;
+use wiremock::matchers::method;
+use wiremock::matchers::path;
+
+use pretty_assertions::assert_eq;
+
+// --- Test helpers -----------------------------------------------------------
+
+/// Build an SSE stream body from a list of JSON events.
+fn sse(events: Vec<Value>) -> String {
+    use std::fmt::Write as _;
+    let mut out = String::new();
+    for ev in events {
+        let kind = ev.get("type").and_then(|v| v.as_str()).unwrap();
+        writeln!(&mut out, "event: {kind}").unwrap();
+        if !ev.as_object().map(|o| o.len() == 1).unwrap_or(false) {
+            write!(&mut out, "data: {ev}\n\n").unwrap();
+        } else {
+            out.push('\n');
+        }
+    }
+    out
+}
+
+/// Convenience: SSE event for a completed response with a specific id.
+fn ev_completed(id: &str) -> Value {
+    serde_json::json!({
+        "type": "response.completed",
+        "response": {
+            "id": id,
+            "usage": {"input_tokens":0,"input_tokens_details":null,"output_tokens":0,"output_tokens_details":null,"total_tokens":0}
+        }
+    })
+}
+
+/// Convenience: SSE event for a single assistant message output item.
+fn ev_assistant_message(id: &str, text: &str) -> Value {
+    serde_json::json!({
+        "type": "response.output_item.done",
+        "item": {
+            "type": "message",
+            "role": "assistant",
+            "id": id,
+            "content": [{"type": "output_text", "text": text}]
+        }
+    })
+}
+
+fn sse_response(body: String) -> ResponseTemplate {
+    ResponseTemplate::new(200)
+        .insert_header("content-type", "text/event-stream")
+        .set_body_raw(body, "text/event-stream")
+}
+
+async fn mount_sse_once<M>(server: &MockServer, matcher: M, body: String)
+where
+    M: wiremock::Match + Send + Sync + 'static,
+{
+    Mock::given(method("POST"))
+        .and(path("/v1/responses"))
+        .and(matcher)
+        .respond_with(sse_response(body))
+        .expect(1)
+        .mount(server)
+        .await;
+}
+
+const FIRST_REPLY: &str = "FIRST_REPLY";
+const SUMMARY_TEXT: &str = "SUMMARY_ONLY_CONTEXT";
+const SUMMARIZE_TRIGGER: &str = "Start Summarization";
+const THIRD_USER_MSG: &str = "next turn";
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn summarize_context_three_requests_and_instructions() {
+    if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
+        println!(
+            "Skipping test because it cannot execute when network is disabled in a Codex sandbox."
+        );
+        return;
+    }
+
+    // Set up a mock server that we can inspect after the run.
+    let server = MockServer::start().await;
+
+    // SSE 1: assistant replies normally so it is recorded in history.
+    let sse1 = sse(vec![
+        ev_assistant_message("m1", FIRST_REPLY),
+        ev_completed("r1"),
+    ]);
+
+    // SSE 2: summarizer returns a summary message.
+    let sse2 = sse(vec![
+        ev_assistant_message("m2", SUMMARY_TEXT),
+        ev_completed("r2"),
+    ]);
+
+    // SSE 3: minimal completed; we only need to capture the request body.
+    let sse3 = sse(vec![ev_completed("r3")]);
+
+    // Mount three expectations, one per request, matched by body content.
+    let first_matcher = |req: &wiremock::Request| {
+        let body = std::str::from_utf8(&req.body).unwrap_or("");
+        body.contains("\"text\":\"hello world\"")
+            && !body.contains(&format!("\"text\":\"{SUMMARIZE_TRIGGER}\""))
+    };
+    mount_sse_once(&server, first_matcher, sse1).await;
+
+    let second_matcher = |req: &wiremock::Request| {
+        let body = std::str::from_utf8(&req.body).unwrap_or("");
+        body.contains(&format!("\"text\":\"{SUMMARIZE_TRIGGER}\""))
+    };
+    mount_sse_once(&server, second_matcher, sse2).await;
+
+    let third_matcher = |req: &wiremock::Request| {
+        let body = std::str::from_utf8(&req.body).unwrap_or("");
+        body.contains(&format!("\"text\":\"{THIRD_USER_MSG}\""))
+    };
+    mount_sse_once(&server, third_matcher, sse3).await;
+
+    // Build config pointing to the mock server and spawn Codex.
+    let model_provider = ModelProviderInfo {
+        base_url: Some(format!("{}/v1", server.uri())),
+        ..built_in_model_providers()["openai"].clone()
+    };
+    let home = TempDir::new().unwrap();
+    let mut config = load_default_config_for_test(&home);
+    config.model_provider = model_provider;
+    let ctrl_c = std::sync::Arc::new(tokio::sync::Notify::new());
+    let CodexSpawnOk { codex, .. } = Codex::spawn(
+        config,
+        Some(CodexAuth::from_api_key("dummy".to_string())),
+        ctrl_c.clone(),
+    )
+    .await
+    .unwrap();
+
+    // 1) Normal user input – should hit server once.
+    codex
+        .submit(Op::UserInput {
+            items: vec![InputItem::Text {
+                text: "hello world".into(),
+            }],
+        })
+        .await
+        .unwrap();
+    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
+
+    // 2) Summarize – second hit with summarization instructions.
+    codex.submit(Op::Compact).await.unwrap();
+    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
+
+    // 3) Next user input – third hit; history should include only the summary.
+    codex
+        .submit(Op::UserInput {
+            items: vec![InputItem::Text {
+                text: THIRD_USER_MSG.into(),
+            }],
+        })
+        .await
+        .unwrap();
+    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
+
+    // Inspect the three captured requests.
+    let requests = server.received_requests().await.unwrap();
+    assert_eq!(requests.len(), 3, "expected exactly three requests");
+
+    let req1 = &requests[0];
+    let req2 = &requests[1];
+    let req3 = &requests[2];
+
+    let body1 = req1.body_json::<serde_json::Value>().unwrap();
+    let body2 = req2.body_json::<serde_json::Value>().unwrap();
+    let body3 = req3.body_json::<serde_json::Value>().unwrap();
+
+    // System instructions should change for the summarization turn.
+    let instr1 = body1.get("instructions").and_then(|v| v.as_str()).unwrap();
+    let instr2 = body2.get("instructions").and_then(|v| v.as_str()).unwrap();
+    assert_ne!(
+        instr1, instr2,
+        "summarization should override base instructions"
+    );
+    assert!(
+        instr2.contains("You are a summarization assistant"),
+        "summarization instructions not applied"
+    );
+
+    // The summarization request should include the injected user input marker.
+    let input2 = body2.get("input").and_then(|v| v.as_array()).unwrap();
+    // The last item is the user message created from the injected input.
+    let last2 = input2.last().unwrap();
+    assert_eq!(last2.get("type").unwrap().as_str().unwrap(), "message");
+    assert_eq!(last2.get("role").unwrap().as_str().unwrap(), "user");
+    let text2 = last2["content"][0]["text"].as_str().unwrap();
+    assert!(text2.contains(SUMMARIZE_TRIGGER));
+
+    // Third request must contain only the summary from step 2 as prior history plus new user msg.
+    let input3 = body3.get("input").and_then(|v| v.as_array()).unwrap();
+    println!("third request body: {body3}");
+    assert!(
+        input3.len() >= 2,
+        "expected summary + new user message in third request"
+    );
+
+    // Collect all (role, text) message tuples.
+    let mut messages: Vec<(String, String)> = Vec::new();
+    for item in input3 {
+        if item["type"].as_str() == Some("message") {
+            let role = item["role"].as_str().unwrap_or_default().to_string();
+            let text = item["content"][0]["text"]
+                .as_str()
+                .unwrap_or_default()
+                .to_string();
+            messages.push((role, text));
+        }
+    }
+
+    // Exactly one assistant message should remain after compaction and the new user message is present.
+    let assistant_count = messages.iter().filter(|(r, _)| r == "assistant").count();
+    assert_eq!(
+        assistant_count, 1,
+        "exactly one assistant message should remain after compaction"
+    );
+    assert!(
+        messages
+            .iter()
+            .any(|(r, t)| r == "user" && t == THIRD_USER_MSG),
+        "third request should include the new user message"
+    );
+    assert!(
+        !messages.iter().any(|(_, t)| t.contains("hello world")),
+        "third request should not include the original user input"
+    );
+    assert!(
+        !messages.iter().any(|(_, t)| t.contains(SUMMARIZE_TRIGGER)),
+        "third request should not include the summarize trigger"
+    );
+}
--- a/codex-rs/core/tests/exec_stream_events.rs
+++ b/codex-rs/core/tests/exec_stream_events.rs
@@ -0,0 +1,143 @@
+#![cfg(unix)]
+
+use std::collections::HashMap;
+use std::path::PathBuf;
+use std::sync::Arc;
+
+use async_channel::Receiver;
+use codex_core::exec::ExecParams;
+use codex_core::exec::SandboxType;
+use codex_core::exec::StdoutStream;
+use codex_core::exec::process_exec_tool_call;
+use codex_core::protocol::Event;
+use codex_core::protocol::EventMsg;
+use codex_core::protocol::ExecCommandOutputDeltaEvent;
+use codex_core::protocol::ExecOutputStream;
+use codex_core::protocol::SandboxPolicy;
+use tokio::sync::Notify;
+
+fn collect_stdout_events(rx: Receiver<Event>) -> Vec<u8> {
+    let mut out = Vec::new();
+    while let Ok(ev) = rx.try_recv() {
+        if let EventMsg::ExecCommandOutputDelta(ExecCommandOutputDeltaEvent {
+            stream: ExecOutputStream::Stdout,
+            chunk,
+            ..
+        }) = ev.msg
+        {
+            out.extend_from_slice(&chunk);
+        }
+    }
+    out
+}
+
+#[tokio::test]
+async fn test_exec_stdout_stream_events_echo() {
+    let (tx, rx) = async_channel::unbounded::<Event>();
+
+    let stdout_stream = StdoutStream {
+        sub_id: "test-sub".to_string(),
+        call_id: "call-1".to_string(),
+        tx_event: tx,
+    };
+
+    let cmd = vec![
+        "/bin/sh".to_string(),
+        "-c".to_string(),
+        // Use printf for predictable behavior across shells
+        "printf 'hello-world\n'".to_string(),
+    ];
+
+    let params = ExecParams {
+        command: cmd,
+        cwd: std::env::current_dir().unwrap_or_else(|_| PathBuf::from(".")),
+        timeout_ms: Some(5_000),
+        env: HashMap::new(),
+    };
+
+    let ctrl_c = Arc::new(Notify::new());
+    let policy = SandboxPolicy::new_read_only_policy();
+
+    let result = process_exec_tool_call(
+        params,
+        SandboxType::None,
+        ctrl_c,
+        &policy,
+        &None,
+        Some(stdout_stream),
+    )
+    .await;
+
+    let result = match result {
+        Ok(r) => r,
+        Err(e) => panic!("process_exec_tool_call failed: {e}"),
+    };
+
+    assert_eq!(result.exit_code, 0);
+    assert_eq!(result.stdout, "hello-world\n");
+
+    let streamed = collect_stdout_events(rx);
+    // We should have received at least the same contents (possibly in one chunk)
+    assert_eq!(String::from_utf8_lossy(&streamed), "hello-world\n");
+}
+
+#[tokio::test]
+async fn test_exec_stderr_stream_events_echo() {
+    let (tx, rx) = async_channel::unbounded::<Event>();
+
+    let stdout_stream = StdoutStream {
+        sub_id: "test-sub".to_string(),
+        call_id: "call-2".to_string(),
+        tx_event: tx,
+    };
+
+    let cmd = vec![
+        "/bin/sh".to_string(),
+        "-c".to_string(),
+        // Write to stderr explicitly
+        "printf 'oops\n' 1>&2".to_string(),
+    ];
+
+    let params = ExecParams {
+        command: cmd,
+        cwd: std::env::current_dir().unwrap_or_else(|_| PathBuf::from(".")),
+        timeout_ms: Some(5_000),
+        env: HashMap::new(),
+    };
+
+    let ctrl_c = Arc::new(Notify::new());
+    let policy = SandboxPolicy::new_read_only_policy();
+
+    let result = process_exec_tool_call(
+        params,
+        SandboxType::None,
+        ctrl_c,
+        &policy,
+        &None,
+        Some(stdout_stream),
+    )
+    .await;
+
+    let result = match result {
+        Ok(r) => r,
+        Err(e) => panic!("process_exec_tool_call failed: {e}"),
+    };
+
+    assert_eq!(result.exit_code, 0);
+    assert_eq!(result.stdout, "");
+    assert_eq!(result.stderr, "oops\n");
+
+    // Collect only stderr delta events
+    let mut err = Vec::new();
+    while let Ok(ev) = rx.try_recv() {
+        if let EventMsg::ExecCommandOutputDelta(ExecCommandOutputDeltaEvent {
+            stream: ExecOutputStream::Stderr,
+            chunk,
+            ..
+        }) = ev.msg
+        {
+            err.extend_from_slice(&chunk);
+        }
+    }
+    assert_eq!(String::from_utf8_lossy(&err), "oops\n");
+}
--- a/codex-rs/core/tests/live_agent.rs
+++ b/codex-rs/core/tests/live_agent.rs
@@ -20,15 +20,15 @@
 use std::time::Duration;

 use codex_core::Codex;
+use codex_core::CodexSpawnOk;
 use codex_core::error::CodexErr;
 use codex_core::protocol::AgentMessageEvent;
 use codex_core::protocol::ErrorEvent;
 use codex_core::protocol::EventMsg;
 use codex_core::protocol::InputItem;
 use codex_core::protocol::Op;
-mod test_support;
+use core_test_support::load_default_config_for_test;
 use tempfile::TempDir;
-use test_support::load_default_config_for_test;
 use tokio::sync::Notify;
 use tokio::time::timeout;

@@ -45,23 +45,12 @@ async fn spawn_codex() -> Result<Codex, CodexErr> {
        "OPENAI_API_KEY must be set for live tests"
    );

-    // Environment tweaks to keep the tests snappy and inexpensive while still
-    // exercising retry/robustness logic.
-    //
-    // NOTE: Starting with the 2024 edition `std::env::set_var` is `unsafe`
-    // because changing the process environment races with any other threads
-    // that might be performing environment look-ups at the same time.
-    // Restrict the unsafety to this tiny block that happens at the very
-    // beginning of the test, before we spawn any background tasks that could
-    // observe the environment.
-    unsafe {
-        std::env::set_var("OPENAI_REQUEST_MAX_RETRIES", "2");
-        std::env::set_var("OPENAI_STREAM_MAX_RETRIES", "2");
-    }
-
    let codex_home = TempDir::new().unwrap();
-    let config = load_default_config_for_test(&codex_home);
-    let (agent, _init_id) = Codex::spawn(config, std::sync::Arc::new(Notify::new())).await?;
+    let mut config = load_default_config_for_test(&codex_home);
+    config.model_provider.request_max_retries = Some(2);
+    config.model_provider.stream_max_retries = Some(2);
+    let CodexSpawnOk { codex: agent, .. } =
+        Codex::spawn(config, None, std::sync::Arc::new(Notify::new())).await?;

    Ok(agent)
 }
@@ -79,7 +68,7 @@ async fn live_streaming_and_prev_id_reset() {

    let codex = spawn_codex().await.unwrap();

-    // ---------- Task 1 ----------
+    // ---------- Task 1 ----------
    codex
        .submit(Op::UserInput {
            items: vec![InputItem::Text {
@@ -113,7 +102,7 @@ async fn live_streaming_and_prev_id_reset() {
        "Agent did not stream any AgentMessage before TaskComplete"
    );

-    // ---------- Task 2 (same session) ----------
+    // ---------- Task 2 (same session) ----------
    codex
        .submit(Op::UserInput {
            items: vec![InputItem::Text {
@@ -188,8 +177,7 @@ async fn live_shell_function_call() {
        match ev.msg {
            EventMsg::ExecCommandBegin(codex_core::protocol::ExecCommandBeginEvent {
                command,
-                call_id: _,
-                cwd: _,
+                ..
            }) => {
                assert_eq!(command, vec!["echo", MARKER]);
                saw_begin = true;
@@ -197,8 +185,7 @@ async fn live_shell_function_call() {
            EventMsg::ExecCommandEnd(codex_core::protocol::ExecCommandEndEvent {
                stdout,
                exit_code,
-                call_id: _,
-                stderr: _,
+                ..
            }) => {
                assert_eq!(exit_code, 0, "echo returned non‑zero exit code");
                assert!(stdout.contains(MARKER));
--- a/codex-rs/core/tests/previous_response_id.rs
+++ b/codex-rs/core/tests/previous_response_id.rs
@@ -1,167 +0,0 @@
-use std::time::Duration;
-
-use codex_core::Codex;
-use codex_core::ModelProviderInfo;
-use codex_core::exec::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR;
-use codex_core::protocol::ErrorEvent;
-use codex_core::protocol::EventMsg;
-use codex_core::protocol::InputItem;
-use codex_core::protocol::Op;
-mod test_support;
-use serde_json::Value;
-use tempfile::TempDir;
-use test_support::load_default_config_for_test;
-use test_support::load_sse_fixture_with_id;
-use tokio::time::timeout;
-use wiremock::Match;
-use wiremock::Mock;
-use wiremock::MockServer;
-use wiremock::Request;
-use wiremock::ResponseTemplate;
-use wiremock::matchers::method;
-use wiremock::matchers::path;
-
-/// Matcher asserting that JSON body has NO `previous_response_id` field.
-struct NoPrevId;
-
-impl Match for NoPrevId {
-    fn matches(&self, req: &Request) -> bool {
-        serde_json::from_slice::<Value>(&req.body)
-            .map(|v| v.get("previous_response_id").is_none())
-            .unwrap_or(false)
-    }
-}
-
-/// Matcher asserting that JSON body HAS a `previous_response_id` field.
-struct HasPrevId;
-
-impl Match for HasPrevId {
-    fn matches(&self, req: &Request) -> bool {
-        serde_json::from_slice::<Value>(&req.body)
-            .map(|v| v.get("previous_response_id").is_some())
-            .unwrap_or(false)
-    }
-}
-
-/// Build minimal SSE stream with completed marker using the JSON fixture.
-fn sse_completed(id: &str) -> String {
-    load_sse_fixture_with_id("tests/fixtures/completed_template.json", id)
-}
-
-#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-async fn keeps_previous_response_id_between_tasks() {
-    #![allow(clippy::unwrap_used)]
-
-    if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
-        println!(
-            "Skipping test because it cannot execute when network is disabled in a Codex sandbox."
-        );
-        return;
-    }
-
-    // Mock server
-    let server = MockServer::start().await;
-
-    // First request – must NOT include `previous_response_id`.
-    let first = ResponseTemplate::new(200)
-        .insert_header("content-type", "text/event-stream")
-        .set_body_raw(sse_completed("resp1"), "text/event-stream");
-
-    Mock::given(method("POST"))
-        .and(path("/v1/responses"))
-        .and(NoPrevId)
-        .respond_with(first)
-        .expect(1)
-        .mount(&server)
-        .await;
-
-    // Second request – MUST include `previous_response_id`.
-    let second = ResponseTemplate::new(200)
-        .insert_header("content-type", "text/event-stream")
-        .set_body_raw(sse_completed("resp2"), "text/event-stream");
-
-    Mock::given(method("POST"))
-        .and(path("/v1/responses"))
-        .and(HasPrevId)
-        .respond_with(second)
-        .expect(1)
-        .mount(&server)
-        .await;
-
-    // Environment
-    // Update environment – `set_var` is `unsafe` starting with the 2024
-    // edition so we group the calls into a single `unsafe { … }` block.
-    unsafe {
-        std::env::set_var("OPENAI_REQUEST_MAX_RETRIES", "0");
-        std::env::set_var("OPENAI_STREAM_MAX_RETRIES", "0");
-    }
-    let model_provider = ModelProviderInfo {
-        name: "openai".into(),
-        base_url: format!("{}/v1", server.uri()),
-        // Environment variable that should exist in the test environment.
-        // ModelClient will return an error if the environment variable for the
-        // provider is not set.
-        env_key: Some("PATH".into()),
-        env_key_instructions: None,
-        wire_api: codex_core::WireApi::Responses,
-        query_params: None,
-        http_headers: None,
-        env_http_headers: None,
-        supports_temperature: true,
-    };
-
-    // Init session
-    let codex_home = TempDir::new().unwrap();
-    let mut config = load_default_config_for_test(&codex_home);
-    config.model_provider = model_provider;
-    let ctrl_c = std::sync::Arc::new(tokio::sync::Notify::new());
-    let (codex, _init_id) = Codex::spawn(config, ctrl_c.clone()).await.unwrap();
-
-    // Task 1 – triggers first request (no previous_response_id)
-    codex
-        .submit(Op::UserInput {
-            items: vec![InputItem::Text {
-                text: "hello".into(),
-            }],
-        })
-        .await
-        .unwrap();
-
-    // Wait for TaskComplete
-    loop {
-        let ev = timeout(Duration::from_secs(1), codex.next_event())
-            .await
-            .unwrap()
-            .unwrap();
-        if matches!(ev.msg, EventMsg::TaskComplete(_)) {
-            break;
-        }
-    }
-
-    // Task 2 – should include `previous_response_id` (triggers second request)
-    codex
-        .submit(Op::UserInput {
-            items: vec![InputItem::Text {
-                text: "again".into(),
-            }],
-        })
-        .await
-        .unwrap();
-
-    // Wait for TaskComplete or error
-    loop {
-        let ev = timeout(Duration::from_secs(1), codex.next_event())
-            .await
-            .unwrap()
-            .unwrap();
-        match ev.msg {
-            EventMsg::TaskComplete(_) => break,
-            EventMsg::Error(ErrorEvent { message }) => {
-                panic!("unexpected error: {message}")
-            }
-            _ => {
-                // Ignore other events.
-            }
-        }
-    }
-}
--- a/codex-rs/core/tests/sandbox.rs
+++ b/codex-rs/core/tests/sandbox.rs
@@ -0,0 +1,195 @@
+#![cfg(target_os = "macos")]
+#![expect(clippy::expect_used)]
+
+use std::collections::HashMap;
+use std::path::Path;
+use std::path::PathBuf;
+
+use codex_core::protocol::SandboxPolicy;
+use codex_core::seatbelt::spawn_command_under_seatbelt;
+use codex_core::spawn::CODEX_SANDBOX_ENV_VAR;
+use codex_core::spawn::StdioPolicy;
+use tempfile::TempDir;
+
+struct TestScenario {
+    repo_parent: PathBuf,
+    file_outside_repo: PathBuf,
+    repo_root: PathBuf,
+    file_in_repo_root: PathBuf,
+    file_in_dot_git_dir: PathBuf,
+}
+
+struct TestExpectations {
+    file_outside_repo_is_writable: bool,
+    file_in_repo_root_is_writable: bool,
+    file_in_dot_git_dir_is_writable: bool,
+}
+
+impl TestScenario {
+    async fn run_test(&self, policy: &SandboxPolicy, expectations: TestExpectations) {
+        if std::env::var(CODEX_SANDBOX_ENV_VAR) == Ok("seatbelt".to_string()) {
+            eprintln!("{CODEX_SANDBOX_ENV_VAR} is set to 'seatbelt', skipping test.");
+            return;
+        }
+
+        assert_eq!(
+            touch(&self.file_outside_repo, policy).await,
+            expectations.file_outside_repo_is_writable
+        );
+        assert_eq!(
+            self.file_outside_repo.exists(),
+            expectations.file_outside_repo_is_writable
+        );
+
+        assert_eq!(
+            touch(&self.file_in_repo_root, policy).await,
+            expectations.file_in_repo_root_is_writable
+        );
+        assert_eq!(
+            self.file_in_repo_root.exists(),
+            expectations.file_in_repo_root_is_writable
+        );
+
+        assert_eq!(
+            touch(&self.file_in_dot_git_dir, policy).await,
+            expectations.file_in_dot_git_dir_is_writable
+        );
+        assert_eq!(
+            self.file_in_dot_git_dir.exists(),
+            expectations.file_in_dot_git_dir_is_writable
+        );
+    }
+}
+
+/// If the user has added a workspace root that is not a Git repo root, then
+/// the user has to specify `--skip-git-repo-check` or go through some
+/// interstitial that indicates they are taking on some risk because Git
+/// cannot be used to backup their work before the agent begins.
+///
+/// Because the user has agreed to this risk, we do not try find all .git
+/// folders in the workspace and block them (though we could change our
+/// position on this in the future).
+#[tokio::test]
+async fn if_parent_of_repo_is_writable_then_dot_git_folder_is_writable() {
+    let tmp = TempDir::new().expect("should be able to create temp dir");
+    let test_scenario = create_test_scenario(&tmp);
+    let policy = SandboxPolicy::WorkspaceWrite {
+        writable_roots: vec![test_scenario.repo_parent.clone()],
+        network_access: false,
+        include_default_writable_roots: false,
+    };
+
+    test_scenario
+        .run_test(
+            &policy,
+            TestExpectations {
+                file_outside_repo_is_writable: true,
+                file_in_repo_root_is_writable: true,
+                file_in_dot_git_dir_is_writable: true,
+            },
+        )
+        .await;
+}
+
+/// When the writable root is the root of a Git repository (as evidenced by the
+/// presence of a .git folder), then the .git folder should be read-only if
+/// the policy is `WorkspaceWrite`.
+#[tokio::test]
+async fn if_git_repo_is_writable_root_then_dot_git_folder_is_read_only() {
+    let tmp = TempDir::new().expect("should be able to create temp dir");
+    let test_scenario = create_test_scenario(&tmp);
+    let policy = SandboxPolicy::WorkspaceWrite {
+        writable_roots: vec![test_scenario.repo_root.clone()],
+        network_access: false,
+        include_default_writable_roots: false,
+    };
+
+    test_scenario
+        .run_test(
+            &policy,
+            TestExpectations {
+                file_outside_repo_is_writable: false,
+                file_in_repo_root_is_writable: true,
+                file_in_dot_git_dir_is_writable: false,
+            },
+        )
+        .await;
+}
+
+/// Under DangerFullAccess, all writes should be permitted anywhere on disk,
+/// including inside the .git folder.
+#[tokio::test]
+async fn danger_full_access_allows_all_writes() {
+    let tmp = TempDir::new().expect("should be able to create temp dir");
+    let test_scenario = create_test_scenario(&tmp);
+    let policy = SandboxPolicy::DangerFullAccess;
+
+    test_scenario
+        .run_test(
+            &policy,
+            TestExpectations {
+                file_outside_repo_is_writable: true,
+                file_in_repo_root_is_writable: true,
+                file_in_dot_git_dir_is_writable: true,
+            },
+        )
+        .await;
+}
+
+/// Under ReadOnly, writes should not be permitted anywhere on disk.
+#[tokio::test]
+async fn read_only_forbids_all_writes() {
+    let tmp = TempDir::new().expect("should be able to create temp dir");
+    let test_scenario = create_test_scenario(&tmp);
+    let policy = SandboxPolicy::ReadOnly;
+
+    test_scenario
+        .run_test(
+            &policy,
+            TestExpectations {
+                file_outside_repo_is_writable: false,
+                file_in_repo_root_is_writable: false,
+                file_in_dot_git_dir_is_writable: false,
+            },
+        )
+        .await;
+}
+
+fn create_test_scenario(tmp: &TempDir) -> TestScenario {
+    let repo_parent = tmp.path().to_path_buf();
+    let repo_root = repo_parent.join("repo");
+    let dot_git_dir = repo_root.join(".git");
+
+    std::fs::create_dir(&repo_root).expect("should be able to create repo root");
+    std::fs::create_dir(&dot_git_dir).expect("should be able to create .git dir");
+
+    TestScenario {
+        file_outside_repo: repo_parent.join("outside.txt"),
+        repo_parent,
+        file_in_repo_root: repo_root.join("repo_file.txt"),
+        repo_root,
+        file_in_dot_git_dir: dot_git_dir.join("dot_git_file.txt"),
+    }
+}
+
+/// Note that `path` must be absolute.
+async fn touch(path: &Path, policy: &SandboxPolicy) -> bool {
+    assert!(path.is_absolute(), "Path must be absolute: {path:?}");
+    let mut child = spawn_command_under_seatbelt(
+        vec![
+            "/usr/bin/touch".to_string(),
+            path.to_string_lossy().to_string(),
+        ],
+        policy,
+        std::env::current_dir().expect("should be able to get current dir"),
+        StdioPolicy::RedirectForShellTool,
+        HashMap::new(),
+    )
+    .await
+    .expect("should be able to spawn command under seatbelt");
+    child
+        .wait()
+        .await
+        .expect("should be able to wait for child process")
+        .success()
+}
--- a/codex-rs/core/tests/stream_no_completed.rs
+++ b/codex-rs/core/tests/stream_no_completed.rs
@@ -4,16 +4,17 @@
 use std::time::Duration;

 use codex_core::Codex;
+use codex_core::CodexSpawnOk;
 use codex_core::ModelProviderInfo;
-use codex_core::exec::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR;
 use codex_core::protocol::EventMsg;
 use codex_core::protocol::InputItem;
 use codex_core::protocol::Op;
-mod test_support;
+use codex_core::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR;
+use codex_login::CodexAuth;
+use core_test_support::load_default_config_for_test;
+use core_test_support::load_sse_fixture;
+use core_test_support::load_sse_fixture_with_id;
 use tempfile::TempDir;
-use test_support::load_default_config_for_test;
-use test_support::load_sse_fixture;
-use test_support::load_sse_fixture_with_id;
 use tokio::time::timeout;
 use wiremock::Mock;
 use wiremock::MockServer;
@@ -70,23 +71,12 @@ async fn retries_on_early_close() {
        .mount(&server)
        .await;

-    // Environment
-    //
-    // As of Rust 2024 `std::env::set_var` has been made `unsafe` because
-    // mutating the process environment is inherently racy when other threads
-    // are running.  We therefore have to wrap every call in an explicit
-    // `unsafe` block.  These are limited to the test-setup section so the
-    // scope is very small and clearly delineated.
-
-    unsafe {
-        std::env::set_var("OPENAI_REQUEST_MAX_RETRIES", "0");
-        std::env::set_var("OPENAI_STREAM_MAX_RETRIES", "1");
-        std::env::set_var("OPENAI_STREAM_IDLE_TIMEOUT_MS", "2000");
-    }
+    // Configure retry behavior explicitly to avoid mutating process-wide
+    // environment variables.

    let model_provider = ModelProviderInfo {
        name: "openai".into(),
-        base_url: format!("{}/v1", server.uri()),
+        base_url: Some(format!("{}/v1", server.uri())),
        // Environment variable that should exist in the test environment.
        // ModelClient will return an error if the environment variable for the
        // provider is not set.
@@ -96,14 +86,24 @@ async fn retries_on_early_close() {
        query_params: None,
        http_headers: None,
        env_http_headers: None,
-        supports_temperature: true,
+        // exercise retry path: first attempt yields incomplete stream, so allow 1 retry
+        request_max_retries: Some(0),
+        stream_max_retries: Some(1),
+        stream_idle_timeout_ms: Some(2000),
+        requires_auth: false,
    };

    let ctrl_c = std::sync::Arc::new(tokio::sync::Notify::new());
    let codex_home = TempDir::new().unwrap();
    let mut config = load_default_config_for_test(&codex_home);
    config.model_provider = model_provider;
-    let (codex, _init_id) = Codex::spawn(config, ctrl_c).await.unwrap();
+    let CodexSpawnOk { codex, .. } = Codex::spawn(
+        config,
+        Some(CodexAuth::from_api_key("Test API Key".to_string())),
+        ctrl_c,
+    )
+    .await
+    .unwrap();

    codex
        .submit(Op::UserInput {
--- a/codex-rs/exec/Cargo.toml
+++ b/codex-rs/exec/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
+edition = "2024"
 name = "codex-exec"
 version = { workspace = true }
-edition = "2024"

 [[bin]]
 name = "codex-exec"
@@ -18,13 +18,13 @@ workspace = true
 anyhow = "1"
 chrono = "0.4.40"
 clap = { version = "4", features = ["derive"] }
-codex-core = { path = "../core" }
+codex-arg0 = { path = "../arg0" }
 codex-common = { path = "../common", features = [
    "cli",
    "elapsed",
    "sandbox_summary",
 ] }
-codex-linux-sandbox = { path = "../linux-sandbox" }
+codex-core = { path = "../core" }
 owo-colors = "4.2.0"
 serde_json = "1"
 shlex = "1.3.0"
@@ -37,3 +37,8 @@ tokio = { version = "1", features = [
 ] }
 tracing = { version = "0.1.41", features = ["log"] }
 tracing-subscriber = { version = "0.3.19", features = ["env-filter"] }
+
+[dev-dependencies]
+assert_cmd = "2"
+predicates = "3"
+tempfile = "3.13.0"
--- a/codex-rs/exec/src/cli.rs
+++ b/codex-rs/exec/src/cli.rs
@@ -51,6 +51,10 @@ pub struct Cli {
    #[arg(long = "color", value_enum, default_value_t = Color::Auto)]
    pub color: Color,

+    /// Print events to stdout as JSONL.
+    #[arg(long = "json", default_value_t = false)]
+    pub json: bool,
+
    /// Specifies file where the last message from the agent should be written.
    #[arg(long = "output-last-message")]
    pub last_message_file: Option<PathBuf>,
--- a/codex-rs/exec/src/event_processor.rs
+++ b/codex-rs/exec/src/event_processor.rs
@@ -1,492 +1,70 @@
-use codex_common::elapsed::format_elapsed;
+use std::path::Path;
+
 use codex_common::summarize_sandbox_policy;
 use codex_core::WireApi;
 use codex_core::config::Config;
 use codex_core::model_supports_reasoning_summaries;
-use codex_core::protocol::AgentMessageEvent;
-use codex_core::protocol::BackgroundEventEvent;
-use codex_core::protocol::ErrorEvent;
 use codex_core::protocol::Event;
-use codex_core::protocol::EventMsg;
-use codex_core::protocol::ExecCommandBeginEvent;
-use codex_core::protocol::ExecCommandEndEvent;
-use codex_core::protocol::FileChange;
-use codex_core::protocol::McpToolCallBeginEvent;
-use codex_core::protocol::McpToolCallEndEvent;
-use codex_core::protocol::PatchApplyBeginEvent;
-use codex_core::protocol::PatchApplyEndEvent;
-use codex_core::protocol::SessionConfiguredEvent;
-use codex_core::protocol::TokenUsage;
-use owo_colors::OwoColorize;
-use owo_colors::Style;
-use shlex::try_join;
-use std::collections::HashMap;
-use std::time::Instant;

-/// This should be configurable. When used in CI, users may not want to impose
-/// a limit so they can see the full transcript.
-const MAX_OUTPUT_LINES_FOR_EXEC_TOOL_CALL: usize = 20;
-
-pub(crate) struct EventProcessor {
-    call_id_to_command: HashMap<String, ExecCommandBegin>,
-    call_id_to_patch: HashMap<String, PatchApplyBegin>,
-
-    /// Tracks in-flight MCP tool calls so we can calculate duration and print
-    /// a concise summary when the corresponding `McpToolCallEnd` event is
-    /// received.
-    call_id_to_tool_call: HashMap<String, McpToolCallBegin>,
-
-    // To ensure that --color=never is respected, ANSI escapes _must_ be added
-    // using .style() with one of these fields. If you need a new style, add a
-    // new field here.
-    bold: Style,
-    italic: Style,
-    dimmed: Style,
-
-    magenta: Style,
-    red: Style,
-    green: Style,
-    cyan: Style,
-
-    /// Whether to include `AgentReasoning` events in the output.
-    show_agent_reasoning: bool,
+pub(crate) enum CodexStatus {
+    Running,
+    InitiateShutdown,
+    Shutdown,
 }

-impl EventProcessor {
-    pub(crate) fn create_with_ansi(with_ansi: bool, show_agent_reasoning: bool) -> Self {
-        let call_id_to_command = HashMap::new();
-        let call_id_to_patch = HashMap::new();
-        let call_id_to_tool_call = HashMap::new();
+pub(crate) trait EventProcessor {
+    /// Print summary of effective configuration and user prompt.
+    fn print_config_summary(&mut self, config: &Config, prompt: &str);

-        if with_ansi {
-            Self {
-                call_id_to_command,
-                call_id_to_patch,
-                bold: Style::new().bold(),
-                italic: Style::new().italic(),
-                dimmed: Style::new().dimmed(),
-                magenta: Style::new().magenta(),
-                red: Style::new().red(),
-                green: Style::new().green(),
-                cyan: Style::new().cyan(),
-                call_id_to_tool_call,
-                show_agent_reasoning,
-            }
-        } else {
-            Self {
-                call_id_to_command,
-                call_id_to_patch,
-                bold: Style::new(),
-                italic: Style::new(),
-                dimmed: Style::new(),
-                magenta: Style::new(),
-                red: Style::new(),
-                green: Style::new(),
-                cyan: Style::new(),
-                call_id_to_tool_call,
-                show_agent_reasoning,
-            }
+    /// Handle a single event emitted by the agent.
+    fn process_event(&mut self, event: Event) -> CodexStatus;
+}
+
+pub(crate) fn create_config_summary_entries(config: &Config) -> Vec<(&'static str, String)> {
+    let mut entries = vec![
+        ("workdir", config.cwd.display().to_string()),
+        ("model", config.model.clone()),
+        ("provider", config.model_provider_id.clone()),
+        ("approval", config.approval_policy.to_string()),
+        ("sandbox", summarize_sandbox_policy(&config.sandbox_policy)),
+    ];
+    if config.model_provider.wire_api == WireApi::Responses
+        && model_supports_reasoning_summaries(config)
+    {
+        entries.push((
+            "reasoning effort",
+            config.model_reasoning_effort.to_string(),
+        ));
+        entries.push((
+            "reasoning summaries",
+            config.model_reasoning_summary.to_string(),
+        ));
+    }
+
+    entries
+}
+
+pub(crate) fn handle_last_message(
+    last_agent_message: Option<&str>,
+    last_message_path: Option<&Path>,
+) {
+    match (last_message_path, last_agent_message) {
+        (Some(path), Some(msg)) => write_last_message_file(msg, Some(path)),
+        (Some(path), None) => {
+            write_last_message_file("", Some(path));
+            eprintln!(
+                "Warning: no last agent message; wrote empty content to {}",
+                path.display()
+            );
        }
+        (None, _) => eprintln!("Warning: no file to write last message to."),
    }
 }

-struct ExecCommandBegin {
-    command: Vec<String>,
-    start_time: Instant,
-}
-
-/// Metadata captured when an `McpToolCallBegin` event is received.
-struct McpToolCallBegin {
-    /// Formatted invocation string, e.g. `server.tool({"city":"sf"})`.
-    invocation: String,
-    /// Timestamp when the call started so we can compute duration later.
-    start_time: Instant,
-}
-
-struct PatchApplyBegin {
-    start_time: Instant,
-    auto_approved: bool,
-}
-
-// Timestamped println helper. The timestamp is styled with self.dimmed.
-#[macro_export]
-macro_rules! ts_println {
-    ($self:ident, $($arg:tt)*) => {{
-        let now = chrono::Utc::now();
-        let formatted = now.format("[%Y-%m-%dT%H:%M:%S]");
-        print!("{} ", formatted.style($self.dimmed));
-        println!($($arg)*);
-    }};
-}
-
-impl EventProcessor {
-    /// Print a concise summary of the effective configuration that will be used
-    /// for the session. This mirrors the information shown in the TUI welcome
-    /// screen.
-    pub(crate) fn print_config_summary(&mut self, config: &Config, prompt: &str) {
-        const VERSION: &str = env!("CARGO_PKG_VERSION");
-        ts_println!(
-            self,
-            "OpenAI Codex v{} (research preview)\n--------",
-            VERSION
-        );
-
-        let mut entries = vec![
-            ("workdir", config.cwd.display().to_string()),
-            ("model", config.model.clone()),
-            ("provider", config.model_provider_id.clone()),
-            ("approval", format!("{:?}", config.approval_policy)),
-            ("sandbox", summarize_sandbox_policy(&config.sandbox_policy)),
-        ];
-        if config.model_provider.wire_api == WireApi::Responses
-            && model_supports_reasoning_summaries(config)
-        {
-            entries.push((
-                "reasoning effort",
-                config.model_reasoning_effort.to_string(),
-            ));
-            entries.push((
-                "reasoning summaries",
-                config.model_reasoning_summary.to_string(),
-            ));
-        }
-
-        for (key, value) in entries {
-            println!("{} {}", format!("{key}:").style(self.bold), value);
-        }
-
-        println!("--------");
-
-        // Echo the prompt that will be sent to the agent so it is visible in the
-        // transcript/logs before any events come in. Note the prompt may have been
-        // read from stdin, so it may not be visible in the terminal otherwise.
-        ts_println!(
-            self,
-            "{}\n{}",
-            "User instructions:".style(self.bold).style(self.cyan),
-            prompt
-        );
-    }
-
-    pub(crate) fn process_event(&mut self, event: Event) {
-        let Event { id: _, msg } = event;
-        match msg {
-            EventMsg::Error(ErrorEvent { message }) => {
-                let prefix = "ERROR:".style(self.red);
-                ts_println!(self, "{prefix} {message}");
-            }
-            EventMsg::BackgroundEvent(BackgroundEventEvent { message }) => {
-                ts_println!(self, "{}", message.style(self.dimmed));
-            }
-            EventMsg::TaskStarted | EventMsg::TaskComplete(_) => {
-                // Ignore.
-            }
-            EventMsg::TokenCount(TokenUsage { total_tokens, .. }) => {
-                ts_println!(self, "tokens used: {total_tokens}");
-            }
-            EventMsg::AgentMessage(AgentMessageEvent { message }) => {
-                ts_println!(
-                    self,
-                    "{}\n{message}",
-                    "codex".style(self.bold).style(self.magenta)
-                );
-            }
-            EventMsg::ExecCommandBegin(ExecCommandBeginEvent {
-                call_id,
-                command,
-                cwd,
-            }) => {
-                self.call_id_to_command.insert(
-                    call_id.clone(),
-                    ExecCommandBegin {
-                        command: command.clone(),
-                        start_time: Instant::now(),
-                    },
-                );
-                ts_println!(
-                    self,
-                    "{} {} in {}",
-                    "exec".style(self.magenta),
-                    escape_command(&command).style(self.bold),
-                    cwd.to_string_lossy(),
-                );
-            }
-            EventMsg::ExecCommandEnd(ExecCommandEndEvent {
-                call_id,
-                stdout,
-                stderr,
-                exit_code,
-            }) => {
-                let exec_command = self.call_id_to_command.remove(&call_id);
-                let (duration, call) = if let Some(ExecCommandBegin {
-                    command,
-                    start_time,
-                }) = exec_command
-                {
-                    (
-                        format!(" in {}", format_elapsed(start_time)),
-                        format!("{}", escape_command(&command).style(self.bold)),
-                    )
-                } else {
-                    ("".to_string(), format!("exec('{call_id}')"))
-                };
-
-                let output = if exit_code == 0 { stdout } else { stderr };
-                let truncated_output = output
-                    .lines()
-                    .take(MAX_OUTPUT_LINES_FOR_EXEC_TOOL_CALL)
-                    .collect::<Vec<_>>()
-                    .join("\n");
-                match exit_code {
-                    0 => {
-                        let title = format!("{call} succeeded{duration}:");
-                        ts_println!(self, "{}", title.style(self.green));
-                    }
-                    _ => {
-                        let title = format!("{call} exited {exit_code}{duration}:");
-                        ts_println!(self, "{}", title.style(self.red));
-                    }
-                }
-                println!("{}", truncated_output.style(self.dimmed));
-            }
-            EventMsg::McpToolCallBegin(McpToolCallBeginEvent {
-                call_id,
-                server,
-                tool,
-                arguments,
-            }) => {
-                // Build fully-qualified tool name: server.tool
-                let fq_tool_name = format!("{server}.{tool}");
-
-                // Format arguments as compact JSON so they fit on one line.
-                let args_str = arguments
-                    .as_ref()
-                    .map(|v: &serde_json::Value| {
-                        serde_json::to_string(v).unwrap_or_else(|_| v.to_string())
-                    })
-                    .unwrap_or_default();
-
-                let invocation = if args_str.is_empty() {
-                    format!("{fq_tool_name}()")
-                } else {
-                    format!("{fq_tool_name}({args_str})")
-                };
-
-                self.call_id_to_tool_call.insert(
-                    call_id.clone(),
-                    McpToolCallBegin {
-                        invocation: invocation.clone(),
-                        start_time: Instant::now(),
-                    },
-                );
-
-                ts_println!(
-                    self,
-                    "{} {}",
-                    "tool".style(self.magenta),
-                    invocation.style(self.bold),
-                );
-            }
-            EventMsg::McpToolCallEnd(tool_call_end_event) => {
-                let is_success = tool_call_end_event.is_success();
-                let McpToolCallEndEvent { call_id, result } = tool_call_end_event;
-                // Retrieve start time and invocation for duration calculation and labeling.
-                let info = self.call_id_to_tool_call.remove(&call_id);
-
-                let (duration, invocation) = if let Some(McpToolCallBegin {
-                    invocation,
-                    start_time,
-                    ..
-                }) = info
-                {
-                    (format!(" in {}", format_elapsed(start_time)), invocation)
-                } else {
-                    (String::new(), format!("tool('{call_id}')"))
-                };
-
-                let status_str = if is_success { "success" } else { "failed" };
-                let title_style = if is_success { self.green } else { self.red };
-                let title = format!("{invocation} {status_str}{duration}:");
-
-                ts_println!(self, "{}", title.style(title_style));
-
-                if let Ok(res) = result {
-                    let val: serde_json::Value = res.into();
-                    let pretty =
-                        serde_json::to_string_pretty(&val).unwrap_or_else(|_| val.to_string());
-
-                    for line in pretty.lines().take(MAX_OUTPUT_LINES_FOR_EXEC_TOOL_CALL) {
-                        println!("{}", line.style(self.dimmed));
-                    }
-                }
-            }
-            EventMsg::PatchApplyBegin(PatchApplyBeginEvent {
-                call_id,
-                auto_approved,
-                changes,
-            }) => {
-                // Store metadata so we can calculate duration later when we
-                // receive the corresponding PatchApplyEnd event.
-                self.call_id_to_patch.insert(
-                    call_id.clone(),
-                    PatchApplyBegin {
-                        start_time: Instant::now(),
-                        auto_approved,
-                    },
-                );
-
-                ts_println!(
-                    self,
-                    "{} auto_approved={}:",
-                    "apply_patch".style(self.magenta),
-                    auto_approved,
-                );
-
-                // Pretty-print the patch summary with colored diff markers so
-                // it’s easy to scan in the terminal output.
-                for (path, change) in changes.iter() {
-                    match change {
-                        FileChange::Add { content } => {
-                            let header = format!(
-                                "{} {}",
-                                format_file_change(change),
-                                path.to_string_lossy()
-                            );
-                            println!("{}", header.style(self.magenta));
-                            for line in content.lines() {
-                                println!("{}", line.style(self.green));
-                            }
-                        }
-                        FileChange::Delete => {
-                            let header = format!(
-                                "{} {}",
-                                format_file_change(change),
-                                path.to_string_lossy()
-                            );
-                            println!("{}", header.style(self.magenta));
-                        }
-                        FileChange::Update {
-                            unified_diff,
-                            move_path,
-                        } => {
-                            let header = if let Some(dest) = move_path {
-                                format!(
-                                    "{} {} -> {}",
-                                    format_file_change(change),
-                                    path.to_string_lossy(),
-                                    dest.to_string_lossy()
-                                )
-                            } else {
-                                format!("{} {}", format_file_change(change), path.to_string_lossy())
-                            };
-                            println!("{}", header.style(self.magenta));
-
-                            // Colorize diff lines. We keep file header lines
-                            // (--- / +++) without extra coloring so they are
-                            // still readable.
-                            for diff_line in unified_diff.lines() {
-                                if diff_line.starts_with('+') && !diff_line.starts_with("+++") {
-                                    println!("{}", diff_line.style(self.green));
-                                } else if diff_line.starts_with('-')
-                                    && !diff_line.starts_with("---")
-                                {
-                                    println!("{}", diff_line.style(self.red));
-                                } else {
-                                    println!("{diff_line}");
-                                }
-                            }
-                        }
-                    }
-                }
-            }
-            EventMsg::PatchApplyEnd(PatchApplyEndEvent {
-                call_id,
-                stdout,
-                stderr,
-                success,
-            }) => {
-                let patch_begin = self.call_id_to_patch.remove(&call_id);
-
-                // Compute duration and summary label similar to exec commands.
-                let (duration, label) = if let Some(PatchApplyBegin {
-                    start_time,
-                    auto_approved,
-                }) = patch_begin
-                {
-                    (
-                        format!(" in {}", format_elapsed(start_time)),
-                        format!("apply_patch(auto_approved={auto_approved})"),
-                    )
-                } else {
-                    (String::new(), format!("apply_patch('{call_id}')"))
-                };
-
-                let (exit_code, output, title_style) = if success {
-                    (0, stdout, self.green)
-                } else {
-                    (1, stderr, self.red)
-                };
-
-                let title = format!("{label} exited {exit_code}{duration}:");
-                ts_println!(self, "{}", title.style(title_style));
-                for line in output.lines() {
-                    println!("{}", line.style(self.dimmed));
-                }
-            }
-            EventMsg::ExecApprovalRequest(_) => {
-                // Should we exit?
-            }
-            EventMsg::ApplyPatchApprovalRequest(_) => {
-                // Should we exit?
-            }
-            EventMsg::AgentReasoning(agent_reasoning_event) => {
-                if self.show_agent_reasoning {
-                    ts_println!(
-                        self,
-                        "{}\n{}",
-                        "thinking".style(self.italic).style(self.magenta),
-                        agent_reasoning_event.text
-                    );
-                }
-            }
-            EventMsg::SessionConfigured(session_configured_event) => {
-                let SessionConfiguredEvent {
-                    session_id,
-                    model,
-                    history_log_id: _,
-                    history_entry_count: _,
-                } = session_configured_event;
-
-                ts_println!(
-                    self,
-                    "{} {}",
-                    "codex session".style(self.magenta).style(self.bold),
-                    session_id.to_string().style(self.dimmed)
-                );
-
-                ts_println!(self, "model: {}", model);
-                println!();
-            }
-            EventMsg::GetHistoryEntryResponse(_) => {
-                // Currently ignored in exec output.
-            }
+fn write_last_message_file(contents: &str, last_message_path: Option<&Path>) {
+    if let Some(path) = last_message_path {
+        if let Err(e) = std::fs::write(path, contents) {
+            eprintln!("Failed to write last message file {path:?}: {e}");
        }
    }
 }
-
-fn escape_command(command: &[String]) -> String {
-    try_join(command.iter().map(|s| s.as_str())).unwrap_or_else(|_| command.join(" "))
-}
-
-fn format_file_change(change: &FileChange) -> &'static str {
-    match change {
-        FileChange::Add { .. } => "A",
-        FileChange::Delete => "D",
-        FileChange::Update {
-            move_path: Some(_), ..
-        } => "R",
-        FileChange::Update {
-            move_path: None, ..
-        } => "M",
-    }
-}
--- a/codex-rs/exec/src/event_processor_with_human_output.rs
+++ b/codex-rs/exec/src/event_processor_with_human_output.rs
@@ -0,0 +1,517 @@
+use codex_common::elapsed::format_duration;
+use codex_common::elapsed::format_elapsed;
+use codex_core::config::Config;
+use codex_core::plan_tool::UpdatePlanArgs;
+use codex_core::protocol::AgentMessageDeltaEvent;
+use codex_core::protocol::AgentMessageEvent;
+use codex_core::protocol::AgentReasoningDeltaEvent;
+use codex_core::protocol::BackgroundEventEvent;
+use codex_core::protocol::ErrorEvent;
+use codex_core::protocol::Event;
+use codex_core::protocol::EventMsg;
+use codex_core::protocol::ExecCommandBeginEvent;
+use codex_core::protocol::ExecCommandEndEvent;
+use codex_core::protocol::FileChange;
+use codex_core::protocol::McpInvocation;
+use codex_core::protocol::McpToolCallBeginEvent;
+use codex_core::protocol::McpToolCallEndEvent;
+use codex_core::protocol::PatchApplyBeginEvent;
+use codex_core::protocol::PatchApplyEndEvent;
+use codex_core::protocol::SessionConfiguredEvent;
+use codex_core::protocol::TaskCompleteEvent;
+use codex_core::protocol::TokenUsage;
+use owo_colors::OwoColorize;
+use owo_colors::Style;
+use shlex::try_join;
+use std::collections::HashMap;
+use std::io::Write;
+use std::path::PathBuf;
+use std::time::Instant;
+
+use crate::event_processor::CodexStatus;
+use crate::event_processor::EventProcessor;
+use crate::event_processor::create_config_summary_entries;
+use crate::event_processor::handle_last_message;
+
+/// This should be configurable. When used in CI, users may not want to impose
+/// a limit so they can see the full transcript.
+const MAX_OUTPUT_LINES_FOR_EXEC_TOOL_CALL: usize = 20;
+pub(crate) struct EventProcessorWithHumanOutput {
+    call_id_to_command: HashMap<String, ExecCommandBegin>,
+    call_id_to_patch: HashMap<String, PatchApplyBegin>,
+
+    // To ensure that --color=never is respected, ANSI escapes _must_ be added
+    // using .style() with one of these fields. If you need a new style, add a
+    // new field here.
+    bold: Style,
+    italic: Style,
+    dimmed: Style,
+
+    magenta: Style,
+    red: Style,
+    green: Style,
+    cyan: Style,
+
+    /// Whether to include `AgentReasoning` events in the output.
+    show_agent_reasoning: bool,
+    answer_started: bool,
+    reasoning_started: bool,
+    last_message_path: Option<PathBuf>,
+}
+
+impl EventProcessorWithHumanOutput {
+    pub(crate) fn create_with_ansi(
+        with_ansi: bool,
+        config: &Config,
+        last_message_path: Option<PathBuf>,
+    ) -> Self {
+        let call_id_to_command = HashMap::new();
+        let call_id_to_patch = HashMap::new();
+
+        if with_ansi {
+            Self {
+                call_id_to_command,
+                call_id_to_patch,
+                bold: Style::new().bold(),
+                italic: Style::new().italic(),
+                dimmed: Style::new().dimmed(),
+                magenta: Style::new().magenta(),
+                red: Style::new().red(),
+                green: Style::new().green(),
+                cyan: Style::new().cyan(),
+                show_agent_reasoning: !config.hide_agent_reasoning,
+                answer_started: false,
+                reasoning_started: false,
+                last_message_path,
+            }
+        } else {
+            Self {
+                call_id_to_command,
+                call_id_to_patch,
+                bold: Style::new(),
+                italic: Style::new(),
+                dimmed: Style::new(),
+                magenta: Style::new(),
+                red: Style::new(),
+                green: Style::new(),
+                cyan: Style::new(),
+                show_agent_reasoning: !config.hide_agent_reasoning,
+                answer_started: false,
+                reasoning_started: false,
+                last_message_path,
+            }
+        }
+    }
+}
+
+struct ExecCommandBegin {
+    command: Vec<String>,
+}
+
+struct PatchApplyBegin {
+    start_time: Instant,
+    auto_approved: bool,
+}
+
+// Timestamped println helper. The timestamp is styled with self.dimmed.
+#[macro_export]
+macro_rules! ts_println {
+    ($self:ident, $($arg:tt)*) => {{
+        let now = chrono::Utc::now();
+        let formatted = now.format("[%Y-%m-%dT%H:%M:%S]");
+        print!("{} ", formatted.style($self.dimmed));
+        println!($($arg)*);
+    }};
+}
+
+impl EventProcessor for EventProcessorWithHumanOutput {
+    /// Print a concise summary of the effective configuration that will be used
+    /// for the session. This mirrors the information shown in the TUI welcome
+    /// screen.
+    fn print_config_summary(&mut self, config: &Config, prompt: &str) {
+        const VERSION: &str = env!("CARGO_PKG_VERSION");
+        ts_println!(
+            self,
+            "OpenAI Codex v{} (research preview)\n--------",
+            VERSION
+        );
+
+        let entries = create_config_summary_entries(config);
+
+        for (key, value) in entries {
+            println!("{} {}", format!("{key}:").style(self.bold), value);
+        }
+
+        println!("--------");
+
+        // Echo the prompt that will be sent to the agent so it is visible in the
+        // transcript/logs before any events come in. Note the prompt may have been
+        // read from stdin, so it may not be visible in the terminal otherwise.
+        ts_println!(
+            self,
+            "{}\n{}",
+            "User instructions:".style(self.bold).style(self.cyan),
+            prompt
+        );
+    }
+
+    fn process_event(&mut self, event: Event) -> CodexStatus {
+        let Event { id: _, msg } = event;
+        match msg {
+            EventMsg::Error(ErrorEvent { message }) => {
+                let prefix = "ERROR:".style(self.red);
+                ts_println!(self, "{prefix} {message}");
+            }
+            EventMsg::BackgroundEvent(BackgroundEventEvent { message }) => {
+                ts_println!(self, "{}", message.style(self.dimmed));
+            }
+            EventMsg::TaskStarted => {
+                // Ignore.
+            }
+            EventMsg::TaskComplete(TaskCompleteEvent { last_agent_message }) => {
+                handle_last_message(
+                    last_agent_message.as_deref(),
+                    self.last_message_path.as_deref(),
+                );
+                return CodexStatus::InitiateShutdown;
+            }
+            EventMsg::TokenCount(TokenUsage { total_tokens, .. }) => {
+                ts_println!(self, "tokens used: {total_tokens}");
+            }
+            EventMsg::AgentMessageDelta(AgentMessageDeltaEvent { delta }) => {
+                if !self.answer_started {
+                    ts_println!(self, "{}\n", "codex".style(self.italic).style(self.magenta));
+                    self.answer_started = true;
+                }
+                print!("{delta}");
+                #[allow(clippy::expect_used)]
+                std::io::stdout().flush().expect("could not flush stdout");
+            }
+            EventMsg::AgentReasoningDelta(AgentReasoningDeltaEvent { delta }) => {
+                if !self.show_agent_reasoning {
+                    return CodexStatus::Running;
+                }
+                if !self.reasoning_started {
+                    ts_println!(
+                        self,
+                        "{}\n",
+                        "thinking".style(self.italic).style(self.magenta),
+                    );
+                    self.reasoning_started = true;
+                }
+                print!("{delta}");
+                #[allow(clippy::expect_used)]
+                std::io::stdout().flush().expect("could not flush stdout");
+            }
+            EventMsg::AgentMessage(AgentMessageEvent { message }) => {
+                // if answer_started is false, this means we haven't received any
+                // delta. Thus, we need to print the message as a new answer.
+                if !self.answer_started {
+                    ts_println!(
+                        self,
+                        "{}\n{}",
+                        "codex".style(self.italic).style(self.magenta),
+                        message,
+                    );
+                } else {
+                    println!();
+                    self.answer_started = false;
+                }
+            }
+            EventMsg::ExecCommandBegin(ExecCommandBeginEvent {
+                call_id,
+                command,
+                cwd,
+            }) => {
+                self.call_id_to_command.insert(
+                    call_id.clone(),
+                    ExecCommandBegin {
+                        command: command.clone(),
+                    },
+                );
+                ts_println!(
+                    self,
+                    "{} {} in {}",
+                    "exec".style(self.magenta),
+                    escape_command(&command).style(self.bold),
+                    cwd.to_string_lossy(),
+                );
+            }
+            EventMsg::ExecCommandOutputDelta(_) => {}
+            EventMsg::ExecCommandEnd(ExecCommandEndEvent {
+                call_id,
+                stdout,
+                stderr,
+                duration,
+                exit_code,
+            }) => {
+                let exec_command = self.call_id_to_command.remove(&call_id);
+                let (duration, call) = if let Some(ExecCommandBegin { command, .. }) = exec_command
+                {
+                    (
+                        format!(" in {}", format_duration(duration)),
+                        format!("{}", escape_command(&command).style(self.bold)),
+                    )
+                } else {
+                    ("".to_string(), format!("exec('{call_id}')"))
+                };
+
+                let output = if exit_code == 0 { stdout } else { stderr };
+                let truncated_output = output
+                    .lines()
+                    .take(MAX_OUTPUT_LINES_FOR_EXEC_TOOL_CALL)
+                    .collect::<Vec<_>>()
+                    .join("\n");
+                match exit_code {
+                    0 => {
+                        let title = format!("{call} succeeded{duration}:");
+                        ts_println!(self, "{}", title.style(self.green));
+                    }
+                    _ => {
+                        let title = format!("{call} exited {exit_code}{duration}:");
+                        ts_println!(self, "{}", title.style(self.red));
+                    }
+                }
+                println!("{}", truncated_output.style(self.dimmed));
+            }
+            EventMsg::McpToolCallBegin(McpToolCallBeginEvent {
+                call_id: _,
+                invocation,
+            }) => {
+                ts_println!(
+                    self,
+                    "{} {}",
+                    "tool".style(self.magenta),
+                    format_mcp_invocation(&invocation).style(self.bold),
+                );
+            }
+            EventMsg::McpToolCallEnd(tool_call_end_event) => {
+                let is_success = tool_call_end_event.is_success();
+                let McpToolCallEndEvent {
+                    call_id: _,
+                    result,
+                    invocation,
+                    duration,
+                } = tool_call_end_event;
+
+                let duration = format!(" in {}", format_duration(duration));
+
+                let status_str = if is_success { "success" } else { "failed" };
+                let title_style = if is_success { self.green } else { self.red };
+                let title = format!(
+                    "{} {status_str}{duration}:",
+                    format_mcp_invocation(&invocation)
+                );
+
+                ts_println!(self, "{}", title.style(title_style));
+
+                if let Ok(res) = result {
+                    let val: serde_json::Value = res.into();
+                    let pretty =
+                        serde_json::to_string_pretty(&val).unwrap_or_else(|_| val.to_string());
+
+                    for line in pretty.lines().take(MAX_OUTPUT_LINES_FOR_EXEC_TOOL_CALL) {
+                        println!("{}", line.style(self.dimmed));
+                    }
+                }
+            }
+            EventMsg::PatchApplyBegin(PatchApplyBeginEvent {
+                call_id,
+                auto_approved,
+                changes,
+            }) => {
+                // Store metadata so we can calculate duration later when we
+                // receive the corresponding PatchApplyEnd event.
+                self.call_id_to_patch.insert(
+                    call_id.clone(),
+                    PatchApplyBegin {
+                        start_time: Instant::now(),
+                        auto_approved,
+                    },
+                );
+
+                ts_println!(
+                    self,
+                    "{} auto_approved={}:",
+                    "apply_patch".style(self.magenta),
+                    auto_approved,
+                );
+
+                // Pretty-print the patch summary with colored diff markers so
+                // it's easy to scan in the terminal output.
+                for (path, change) in changes.iter() {
+                    match change {
+                        FileChange::Add { content } => {
+                            let header = format!(
+                                "{} {}",
+                                format_file_change(change),
+                                path.to_string_lossy()
+                            );
+                            println!("{}", header.style(self.magenta));
+                            for line in content.lines() {
+                                println!("{}", line.style(self.green));
+                            }
+                        }
+                        FileChange::Delete => {
+                            let header = format!(
+                                "{} {}",
+                                format_file_change(change),
+                                path.to_string_lossy()
+                            );
+                            println!("{}", header.style(self.magenta));
+                        }
+                        FileChange::Update {
+                            unified_diff,
+                            move_path,
+                        } => {
+                            let header = if let Some(dest) = move_path {
+                                format!(
+                                    "{} {} -> {}",
+                                    format_file_change(change),
+                                    path.to_string_lossy(),
+                                    dest.to_string_lossy()
+                                )
+                            } else {
+                                format!("{} {}", format_file_change(change), path.to_string_lossy())
+                            };
+                            println!("{}", header.style(self.magenta));
+
+                            // Colorize diff lines. We keep file header lines
+                            // (--- / +++) without extra coloring so they are
+                            // still readable.
+                            for diff_line in unified_diff.lines() {
+                                if diff_line.starts_with('+') && !diff_line.starts_with("+++") {
+                                    println!("{}", diff_line.style(self.green));
+                                } else if diff_line.starts_with('-')
+                                    && !diff_line.starts_with("---")
+                                {
+                                    println!("{}", diff_line.style(self.red));
+                                } else {
+                                    println!("{diff_line}");
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+            EventMsg::PatchApplyEnd(PatchApplyEndEvent {
+                call_id,
+                stdout,
+                stderr,
+                success,
+            }) => {
+                let patch_begin = self.call_id_to_patch.remove(&call_id);
+
+                // Compute duration and summary label similar to exec commands.
+                let (duration, label) = if let Some(PatchApplyBegin {
+                    start_time,
+                    auto_approved,
+                }) = patch_begin
+                {
+                    (
+                        format!(" in {}", format_elapsed(start_time)),
+                        format!("apply_patch(auto_approved={auto_approved})"),
+                    )
+                } else {
+                    (String::new(), format!("apply_patch('{call_id}')"))
+                };
+
+                let (exit_code, output, title_style) = if success {
+                    (0, stdout, self.green)
+                } else {
+                    (1, stderr, self.red)
+                };
+
+                let title = format!("{label} exited {exit_code}{duration}:");
+                ts_println!(self, "{}", title.style(title_style));
+                for line in output.lines() {
+                    println!("{}", line.style(self.dimmed));
+                }
+            }
+            EventMsg::ExecApprovalRequest(_) => {
+                // Should we exit?
+            }
+            EventMsg::ApplyPatchApprovalRequest(_) => {
+                // Should we exit?
+            }
+            EventMsg::AgentReasoning(agent_reasoning_event) => {
+                if self.show_agent_reasoning {
+                    if !self.reasoning_started {
+                        ts_println!(
+                            self,
+                            "{}\n{}",
+                            "codex".style(self.italic).style(self.magenta),
+                            agent_reasoning_event.text,
+                        );
+                    } else {
+                        println!();
+                        self.reasoning_started = false;
+                    }
+                }
+            }
+            EventMsg::SessionConfigured(session_configured_event) => {
+                let SessionConfiguredEvent {
+                    session_id,
+                    model,
+                    history_log_id: _,
+                    history_entry_count: _,
+                } = session_configured_event;
+
+                ts_println!(
+                    self,
+                    "{} {}",
+                    "codex session".style(self.magenta).style(self.bold),
+                    session_id.to_string().style(self.dimmed)
+                );
+
+                ts_println!(self, "model: {}", model);
+                println!();
+            }
+            EventMsg::PlanUpdate(plan_update_event) => {
+                let UpdatePlanArgs { explanation, plan } = plan_update_event;
+                ts_println!(self, "explanation: {explanation:?}");
+                ts_println!(self, "plan: {plan:?}");
+            }
+            EventMsg::GetHistoryEntryResponse(_) => {
+                // Currently ignored in exec output.
+            }
+            EventMsg::ShutdownComplete => return CodexStatus::Shutdown,
+        }
+        CodexStatus::Running
+    }
+}
+
+fn escape_command(command: &[String]) -> String {
+    try_join(command.iter().map(|s| s.as_str())).unwrap_or_else(|_| command.join(" "))
+}
+
+fn format_file_change(change: &FileChange) -> &'static str {
+    match change {
+        FileChange::Add { .. } => "A",
+        FileChange::Delete => "D",
+        FileChange::Update {
+            move_path: Some(_), ..
+        } => "R",
+        FileChange::Update {
+            move_path: None, ..
+        } => "M",
+    }
+}
+
+fn format_mcp_invocation(invocation: &McpInvocation) -> String {
+    // Build fully-qualified tool name: server.tool
+    let fq_tool_name = format!("{}.{}", invocation.server, invocation.tool);
+
+    // Format arguments as compact JSON so they fit on one line.
+    let args_str = invocation
+        .arguments
+        .as_ref()
+        .map(|v: &serde_json::Value| serde_json::to_string(v).unwrap_or_else(|_| v.to_string()))
+        .unwrap_or_default();
+
+    if args_str.is_empty() {
+        format!("{fq_tool_name}()")
+    } else {
+        format!("{fq_tool_name}({args_str})")
+    }
+}
--- a/codex-rs/exec/src/event_processor_with_json_output.rs
+++ b/codex-rs/exec/src/event_processor_with_json_output.rs
@@ -0,0 +1,64 @@
+use std::collections::HashMap;
+use std::path::PathBuf;
+
+use codex_core::config::Config;
+use codex_core::protocol::Event;
+use codex_core::protocol::EventMsg;
+use codex_core::protocol::TaskCompleteEvent;
+use serde_json::json;
+
+use crate::event_processor::CodexStatus;
+use crate::event_processor::EventProcessor;
+use crate::event_processor::create_config_summary_entries;
+use crate::event_processor::handle_last_message;
+
+pub(crate) struct EventProcessorWithJsonOutput {
+    last_message_path: Option<PathBuf>,
+}
+
+impl EventProcessorWithJsonOutput {
+    pub fn new(last_message_path: Option<PathBuf>) -> Self {
+        Self { last_message_path }
+    }
+}
+
+impl EventProcessor for EventProcessorWithJsonOutput {
+    fn print_config_summary(&mut self, config: &Config, prompt: &str) {
+        let entries = create_config_summary_entries(config)
+            .into_iter()
+            .map(|(key, value)| (key.to_string(), value))
+            .collect::<HashMap<String, String>>();
+        #[allow(clippy::expect_used)]
+        let config_json =
+            serde_json::to_string(&entries).expect("Failed to serialize config summary to JSON");
+        println!("{config_json}");
+
+        let prompt_json = json!({
+            "prompt": prompt,
+        });
+        println!("{prompt_json}");
+    }
+
+    fn process_event(&mut self, event: Event) -> CodexStatus {
+        match event.msg {
+            EventMsg::AgentMessageDelta(_) | EventMsg::AgentReasoningDelta(_) => {
+                // Suppress streaming events in JSON mode.
+                CodexStatus::Running
+            }
+            EventMsg::TaskComplete(TaskCompleteEvent { last_agent_message }) => {
+                handle_last_message(
+                    last_agent_message.as_deref(),
+                    self.last_message_path.as_deref(),
+                );
+                CodexStatus::InitiateShutdown
+            }
+            EventMsg::ShutdownComplete => CodexStatus::Shutdown,
+            _ => {
+                if let Ok(line) = serde_json::to_string(&event) {
+                    println!("{line}");
+                }
+                CodexStatus::Running
+            }
+        }
+    }
+}
--- a/codex-rs/exec/src/lib.rs
+++ b/codex-rs/exec/src/lib.rs
@@ -1,14 +1,16 @@
 mod cli;
 mod event_processor;
+mod event_processor_with_human_output;
+mod event_processor_with_json_output;

 use std::io::IsTerminal;
 use std::io::Read;
-use std::path::Path;
 use std::path::PathBuf;
 use std::sync::Arc;

 pub use cli::Cli;
-use codex_core::codex_wrapper;
+use codex_core::codex_wrapper::CodexConversation;
+use codex_core::codex_wrapper::{self};
 use codex_core::config::Config;
 use codex_core::config::ConfigOverrides;
 use codex_core::config_types::SandboxMode;
@@ -19,12 +21,16 @@ use codex_core::protocol::InputItem;
 use codex_core::protocol::Op;
 use codex_core::protocol::TaskCompleteEvent;
 use codex_core::util::is_inside_git_repo;
-use event_processor::EventProcessor;
+use event_processor_with_human_output::EventProcessorWithHumanOutput;
+use event_processor_with_json_output::EventProcessorWithJsonOutput;
 use tracing::debug;
 use tracing::error;
 use tracing::info;
 use tracing_subscriber::EnvFilter;

+use crate::event_processor::CodexStatus;
+use crate::event_processor::EventProcessor;
+
 pub async fn run_main(cli: Cli, codex_linux_sandbox_exe: Option<PathBuf>) -> anyhow::Result<()> {
    let Cli {
        images,
@@ -36,6 +42,7 @@ pub async fn run_main(cli: Cli, codex_linux_sandbox_exe: Option<PathBuf>) -> any
        skip_git_repo_check,
        color,
        last_message_file,
+        json: json_mode,
        sandbox_mode: sandbox_mode_cli_arg,
        prompt,
        config_overrides,
@@ -85,6 +92,20 @@ pub async fn run_main(cli: Cli, codex_linux_sandbox_exe: Option<PathBuf>) -> any
        ),
    };

+    // TODO(mbolin): Take a more thoughtful approach to logging.
+    let default_level = "error";
+    let _ = tracing_subscriber::fmt()
+        // Fallback to the `default_level` log filter if the environment
+        // variable is not set _or_ contains an invalid value
+        .with_env_filter(
+            EnvFilter::try_from_default_env()
+                .or_else(|_| EnvFilter::try_new(default_level))
+                .unwrap_or_else(|_| EnvFilter::new(default_level)),
+        )
+        .with_ansi(stderr_with_ansi)
+        .with_writer(std::io::stderr)
+        .try_init();
+
    let sandbox_mode = if full_auto {
        Some(SandboxMode::WorkspaceWrite)
    } else if dangerously_bypass_approvals_and_sandbox {
@@ -104,6 +125,8 @@ pub async fn run_main(cli: Cli, codex_linux_sandbox_exe: Option<PathBuf>) -> any
        cwd: cwd.map(|p| p.canonicalize().unwrap_or(p)),
        model_provider: None,
        codex_linux_sandbox_exe,
+        base_instructions: None,
+        include_plan_tool: None,
    };
    // Parse `-c` overrides.
    let cli_kv_overrides = match config_overrides.parse_overrides() {
@@ -115,8 +138,16 @@ pub async fn run_main(cli: Cli, codex_linux_sandbox_exe: Option<PathBuf>) -> any
    };

    let config = Config::load_with_cli_overrides(cli_kv_overrides, overrides)?;
-    let mut event_processor =
-        EventProcessor::create_with_ansi(stdout_with_ansi, !config.hide_agent_reasoning);
+    let mut event_processor: Box<dyn EventProcessor> = if json_mode {
+        Box::new(EventProcessorWithJsonOutput::new(last_message_file.clone()))
+    } else {
+        Box::new(EventProcessorWithHumanOutput::create_with_ansi(
+            stdout_with_ansi,
+            &config,
+            last_message_file.clone(),
+        ))
+    };
+
    // Print the effective configuration and prompt so users can see what Codex
    // is using.
    event_processor.print_config_summary(&config, &prompt);
@@ -126,23 +157,14 @@ pub async fn run_main(cli: Cli, codex_linux_sandbox_exe: Option<PathBuf>) -> any
        std::process::exit(1);
    }

-    // TODO(mbolin): Take a more thoughtful approach to logging.
-    let default_level = "error";
-    let _ = tracing_subscriber::fmt()
-        // Fallback to the `default_level` log filter if the environment
-        // variable is not set _or_ contains an invalid value
-        .with_env_filter(
-            EnvFilter::try_from_default_env()
-                .or_else(|_| EnvFilter::try_new(default_level))
-                .unwrap_or_else(|_| EnvFilter::new(default_level)),
-        )
-        .with_ansi(stderr_with_ansi)
-        .with_writer(std::io::stderr)
-        .try_init();
-
-    let (codex_wrapper, event, ctrl_c) = codex_wrapper::init_codex(config).await?;
+    let CodexConversation {
+        codex: codex_wrapper,
+        session_configured,
+        ctrl_c,
+        ..
+    } = codex_wrapper::init_codex(config).await?;
    let codex = Arc::new(codex_wrapper);
-    info!("Codex initialized with event: {event:?}");
+    info!("Codex initialized with event: {session_configured:?}");

    let (tx, mut rx) = tokio::sync::mpsc::unbounded_channel::<Event>();
    {
@@ -210,40 +232,17 @@ pub async fn run_main(cli: Cli, codex_linux_sandbox_exe: Option<PathBuf>) -> any

    // Run the loop until the task is complete.
    while let Some(event) = rx.recv().await {
-        let (is_last_event, last_assistant_message) = match &event.msg {
-            EventMsg::TaskComplete(TaskCompleteEvent { last_agent_message }) => {
-                (true, last_agent_message.clone())
+        let shutdown: CodexStatus = event_processor.process_event(event);
+        match shutdown {
+            CodexStatus::Running => continue,
+            CodexStatus::InitiateShutdown => {
+                codex.submit(Op::Shutdown).await?;
+            }
+            CodexStatus::Shutdown => {
+                break;
            }
-            _ => (false, None),
-        };
-        event_processor.process_event(event);
-        if is_last_event {
-            handle_last_message(last_assistant_message, last_message_file.as_deref())?;
-            break;
        }
    }

    Ok(())
 }
-
-fn handle_last_message(
-    last_agent_message: Option<String>,
-    last_message_file: Option<&Path>,
-) -> std::io::Result<()> {
-    match (last_agent_message, last_message_file) {
-        (Some(last_agent_message), Some(last_message_file)) => {
-            // Last message and a file to write to.
-            std::fs::write(last_message_file, last_agent_message)?;
-        }
-        (None, Some(last_message_file)) => {
-            eprintln!(
-                "Warning: No last message to write to file: {}",
-                last_message_file.to_string_lossy()
-            );
-        }
-        (_, None) => {
-            // No last message and no file to write to.
-        }
-    }
-    Ok(())
-}
--- a/codex-rs/exec/src/main.rs
+++ b/codex-rs/exec/src/main.rs
@@ -10,6 +10,7 @@
 //! This allows us to ship a completely separate set of functionality as part
 //! of the `codex-exec` binary.
 use clap::Parser;
+use codex_arg0::arg0_dispatch_or_else;
 use codex_common::CliConfigOverrides;
 use codex_exec::Cli;
 use codex_exec::run_main;
@@ -24,7 +25,7 @@ struct TopCli {
 }

 fn main() -> anyhow::Result<()> {
-    codex_linux_sandbox::run_with_sandbox(|codex_linux_sandbox_exe| async move {
+    arg0_dispatch_or_else(|codex_linux_sandbox_exe| async move {
        let top_cli = TopCli::parse();
        // Merge root-level overrides into inner CLI struct so downstream logic remains unchanged.
        let mut inner = top_cli.inner;
--- a/codex-rs/exec/tests/apply_patch.rs
+++ b/codex-rs/exec/tests/apply_patch.rs
@@ -0,0 +1,39 @@
+use anyhow::Context;
+use assert_cmd::prelude::*;
+use codex_core::CODEX_APPLY_PATCH_ARG1;
+use std::fs;
+use std::process::Command;
+use tempfile::tempdir;
+
+/// While we may add an `apply-patch` subcommand to the `codex` CLI multitool
+/// at some point, we must ensure that the smaller `codex-exec` CLI can still
+/// emulate the `apply_patch` CLI.
+#[test]
+fn test_standalone_exec_cli_can_use_apply_patch() -> anyhow::Result<()> {
+    let tmp = tempdir()?;
+    let relative_path = "source.txt";
+    let absolute_path = tmp.path().join(relative_path);
+    fs::write(&absolute_path, "original content\n")?;
+
+    Command::cargo_bin("codex-exec")
+        .context("should find binary for codex-exec")?
+        .arg(CODEX_APPLY_PATCH_ARG1)
+        .arg(
+            r#"*** Begin Patch
+*** Update File: source.txt
+@@
+-original content
+modified by apply_patch
+*** End Patch"#,
+        )
+        .current_dir(tmp.path())
+        .assert()
+        .success()
+        .stdout("Success. Updated the following files:\nM source.txt\n")
+        .stderr(predicates::str::is_empty());
+    assert_eq!(
+        fs::read_to_string(absolute_path)?,
+        "modified by apply_patch\n"
+    );
+    Ok(())
+}
--- a/Show More
+++ b/Show More