Merge remote-tracking branch 'origin/main' into pakrym/redo-codemode-to-link-against

# Conflicts: # codex-rs/core/src/codex.rs # codex-rs/core/src/tools/code_mode.rs # codex-rs/core/src/tools/code_mode_bridge.js # codex-rs/core/src/tools/code_mode_runner.cjs # codex-rs/core/src/tools/context.rs # codex-rs/core/src/tools/handlers/code_mode.rs # codex-rs/core/src/tools/spec.rs # codex-rs/core/tests/suite/code_mode.rs
2026-04-23 14:14:50 +00:00 · 2026-03-11 12:55:40 -07:00 · 2026-03-11 12:52:55 -07:00 · 2026-03-11 11:54:46 -07:00 · 2026-03-11 11:41:50 -07:00 · 2026-03-11 11:22:25 -07:00
42 changed files with 12677 additions and 764 deletions
--- a/MODULE.bazel.lock
+++ b/MODULE.bazel.lock
--- a/codex-rs/Cargo.lock
+++ b/codex-rs/Cargo.lock
@@ -948,6 +948,8 @@ dependencies = [
 "cexpr",
 "clang-sys",
 "itertools 0.13.0",
+ "log",
+ "prettyplease",
 "proc-macro2",
 "quote",
 "regex",
@@ -1151,6 +1153,16 @@ version = "0.1.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ade8366b8bd5ba243f0a58f036cc0ca8a2f069cff1a2351ef1cac6b083e16fc0"

+[[package]]
+name = "calendrical_calculations"
+version = "0.2.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3a0b39595c6ee54a8d0900204ba4c401d0ab4eb45adaf07178e8d017541529e7"
+dependencies = [
+ "core_maths",
+ "displaydoc",
+]
+
 [[package]]
 name = "cassowary"
 version = "0.3.0"
@@ -1572,7 +1584,7 @@ dependencies = [
 "thiserror 2.0.18",
 "tokio",
 "url",
- "which",
+ "which 8.0.0",
 "wiremock",
 "zip",
 ]
@@ -1764,6 +1776,15 @@ dependencies = [
 "thiserror 2.0.18",
 ]

+[[package]]
+name = "codex-code-mode"
+version = "0.0.0"
+dependencies = [
+ "serde",
+ "serde_json",
+ "v8",
+]
+
 [[package]]
 name = "codex-config"
 version = "0.0.0"
@@ -1810,6 +1831,7 @@ dependencies = [
 "codex-artifacts",
 "codex-async-utils",
 "codex-client",
+ "codex-code-mode",
 "codex-config",
 "codex-execpolicy",
 "codex-file-search",
@@ -1894,7 +1916,7 @@ dependencies = [
 "url",
 "uuid",
 "walkdir",
- "which",
+ "which 8.0.0",
 "wildmatch",
 "windows-sys 0.52.0",
 "wiremock",
@@ -2105,7 +2127,7 @@ dependencies = [
 "serde_json",
 "tokio",
 "tracing",
- "which",
+ "which 8.0.0",
 "wiremock",
 ]

@@ -2343,7 +2365,7 @@ dependencies = [
 "tracing",
 "urlencoding",
 "webbrowser",
- "which",
+ "which 8.0.0",
 ]

 [[package]]
@@ -2383,7 +2405,7 @@ dependencies = [
 "tree-sitter",
 "tree-sitter-bash",
 "url",
- "which",
+ "which 8.0.0",
 ]

 [[package]]
@@ -2542,7 +2564,7 @@ dependencies = [
 "uuid",
 "vt100",
 "webbrowser",
- "which",
+ "which 8.0.0",
 "windows-sys 0.52.0",
 "winsplit",
 ]
@@ -2916,6 +2938,15 @@ version = "0.8.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b"

+[[package]]
+name = "core_maths"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "77745e017f5edba1a9c1d854f6f3a52dac8a12dd5af5d2f54aecf61e43d80d30"
+dependencies = [
+ "libm",
+]
+
 [[package]]
 name = "core_test_support"
 version = "0.0.0"
@@ -3513,6 +3544,38 @@ dependencies = [
 "subtle",
 ]

+[[package]]
+name = "diplomat"
+version = "0.14.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9adb46b05e2f53dcf6a7dfc242e4ce9eb60c369b6b6eb10826a01e93167f59c6"
+dependencies = [
+ "diplomat_core",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.114",
+]
+
+[[package]]
+name = "diplomat-runtime"
+version = "0.14.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0569bd3caaf13829da7ee4e83dbf9197a0e1ecd72772da6d08f0b4c9285c8d29"
+
+[[package]]
+name = "diplomat_core"
+version = "0.14.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "51731530ed7f2d4495019abc7df3744f53338e69e2863a6a64ae91821c763df1"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "serde",
+ "smallvec",
+ "strck",
+ "syn 2.0.114",
+]
+
 [[package]]
 name = "dirs"
 version = "6.0.0"
@@ -4119,6 +4182,16 @@ dependencies = [
 "libc",
 ]

+[[package]]
+name = "fslock"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "04412b8935272e3a9bae6f48c7bfff74c2911f60525404edfdd28e49884c3bfb"
+dependencies = [
+ "libc",
+ "winapi",
+]
+
 [[package]]
 name = "futures"
 version = "0.3.31"
@@ -4347,6 +4420,15 @@ dependencies = [
 "regex-syntax 0.8.8",
 ]

+[[package]]
+name = "gzip-header"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "95cc527b92e6029a62960ad99aa8a6660faa4555fe5f731aab13aa6a921795a2"
+dependencies = [
+ "crc32fast",
+]
+
 [[package]]
 name = "h2"
 version = "0.4.13"
@@ -4804,6 +4886,28 @@ dependencies = [
 "cc",
 ]

+[[package]]
+name = "icu_calendar"
+version = "2.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d6f0e52e009b6b16ba9c0693578796f2dd4aaa59a7f8f920423706714a89ac4e"
+dependencies = [
+ "calendrical_calculations",
+ "displaydoc",
+ "icu_calendar_data",
+ "icu_locale",
+ "icu_locale_core",
+ "icu_provider",
+ "tinystr",
+ "zerovec",
+]
+
+[[package]]
+name = "icu_calendar_data"
+version = "2.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "527f04223b17edfe0bd43baf14a0cb1b017830db65f3950dc00224860a9a446d"
+
 [[package]]
 name = "icu_collections"
 version = "2.1.1"
@@ -5238,6 +5342,12 @@ version = "1.0.17"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2"

+[[package]]
+name = "ixdtf"
+version = "0.6.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "84de9d95a6d2547d9b77ee3f25fa0ee32e3c3a6484d47a55adebc0439c077992"
+
 [[package]]
 name = "jiff"
 version = "0.2.18"
@@ -6962,6 +7072,16 @@ dependencies = [
 "yansi",
 ]

+[[package]]
+name = "prettyplease"
+version = "0.2.37"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b"
+dependencies = [
+ "proc-macro2",
+ "syn 2.0.114",
+]
+
 [[package]]
 name = "proc-macro-crate"
 version = "3.4.0"
@@ -7794,6 +7914,16 @@ dependencies = [
 "webpki-roots 1.0.5",
 ]

+[[package]]
+name = "resb"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6a067ab3b5ca3b4dc307d0de9cf75f9f5e6ca9717b192b2f28a36c83e5de9e76"
+dependencies = [
+ "potential_utf",
+ "serde_core",
+]
+
 [[package]]
 name = "resolv-conf"
 version = "0.7.6"
@@ -9174,6 +9304,15 @@ dependencies = [
 "serde_json",
 ]

+[[package]]
+name = "strck"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "42316e70da376f3d113a68d138a60d8a9883c604fe97942721ec2068dab13a9f"
+dependencies = [
+ "unicode-ident",
+]
+
 [[package]]
 name = "streaming-iterator"
 version = "0.1.9"
@@ -9403,6 +9542,39 @@ dependencies = [
 "windows-sys 0.61.2",
 ]

+[[package]]
+name = "temporal_capi"
+version = "0.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a151e402c2bdb6a3a2a2f3f225eddaead2e7ce7dd5d3fa2090deb11b17aa4ed8"
+dependencies = [
+ "diplomat",
+ "diplomat-runtime",
+ "icu_calendar",
+ "icu_locale",
+ "num-traits",
+ "temporal_rs",
+ "timezone_provider",
+ "writeable",
+ "zoneinfo64",
+]
+
+[[package]]
+name = "temporal_rs"
+version = "0.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "88afde3bd75d2fc68d77a914bece426aa08aa7649ffd0cdd4a11c3d4d33474d1"
+dependencies = [
+ "core_maths",
+ "icu_calendar",
+ "icu_locale",
+ "ixdtf",
+ "num-traits",
+ "timezone_provider",
+ "tinystr",
+ "writeable",
+]
+
 [[package]]
 name = "term"
 version = "0.7.0"
@@ -9610,6 +9782,18 @@ dependencies = [
 "time-core",
 ]

+[[package]]
+name = "timezone_provider"
+version = "0.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "df9ba0000e9e73862f3e7ca1ff159e2ddf915c9d8bb11e38a7874760f445d993"
+dependencies = [
+ "tinystr",
+ "zerotrie",
+ "zerovec",
+ "zoneinfo64",
+]
+
 [[package]]
 name = "tiny-keccak"
 version = "2.0.2"
@@ -10409,6 +10593,23 @@ dependencies = [
 "wasm-bindgen",
 ]

+[[package]]
+name = "v8"
+version = "146.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d97bcac5cdc5a195a4813f1855a6bc658f240452aac36caa12fd6c6f16026ab1"
+dependencies = [
+ "bindgen",
+ "bitflags 2.10.0",
+ "fslock",
+ "gzip-header",
+ "home",
+ "miniz_oxide",
+ "paste",
+ "temporal_capi",
+ "which 6.0.3",
+]
+
 [[package]]
 name = "valuable"
 version = "0.1.1"
@@ -10708,6 +10909,18 @@ version = "0.1.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "a28ac98ddc8b9274cb41bb4d9d4d5c425b6020c50c46f25559911905610b4a88"

+[[package]]
+name = "which"
+version = "6.0.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b4ee928febd44d98f2f459a4a79bd4d928591333a494a10a868418ac1b39cf1f"
+dependencies = [
+ "either",
+ "home",
+ "rustix 0.38.44",
+ "winsafe",
+]
+
 [[package]]
 name = "which"
 version = "8.0.0"
@@ -11711,6 +11924,19 @@ version = "1.0.19"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "3ff05f8caa9038894637571ae6b9e29466c1f4f829d26c9b28f869a29cbe3445"

+[[package]]
+name = "zoneinfo64"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bb2e5597efbe7c421da8a7fd396b20b571704e787c21a272eecf35dfe9d386f0"
+dependencies = [
+ "calendrical_calculations",
+ "icu_locale_core",
+ "potential_utf",
+ "resb",
+ "serde",
+]
+
 [[package]]
 name = "zopfli"
 version = "0.8.3"
--- a/codex-rs/Cargo.toml
+++ b/codex-rs/Cargo.toml
@@ -16,6 +16,7 @@ members = [
    "cloud-tasks",
    "cloud-tasks-client",
    "cli",
+    "code-mode",
    "config",
    "shell-command",
    "shell-escalation",
@@ -98,6 +99,7 @@ codex-chatgpt = { path = "chatgpt" }
 codex-cli = { path = "cli" }
 codex-client = { path = "codex-client" }
 codex-cloud-requirements = { path = "cloud-requirements" }
+codex-code-mode = { path = "code-mode" }
 codex-config = { path = "config" }
 codex-core = { path = "core" }
 codex-exec = { path = "exec" }
@@ -234,6 +236,7 @@ regex-lite = "0.1.8"
 reqwest = "0.12"
 rmcp = { version = "0.15.0", default-features = false }
 runfiles = { git = "https://github.com/dzbarsky/rules_rust", rev = "b56cbaa8465e74127f1ea216f813cd377295ad81" }
+v8 = "146.4.0"
 rustls = { version = "0.23", default-features = false, features = [
    "ring",
    "std",
--- a/codex-rs/code-mode/BUILD.bazel
+++ b/codex-rs/code-mode/BUILD.bazel
@@ -0,0 +1,16 @@
+load("@rules_rust//rust:defs.bzl", "rust_library")
+
+rust_library(
+    name = "code-mode",
+    crate_name = "codex_code_mode",
+    crate_root = "src/bazel_stub.rs",
+    srcs = [
+        "src/api.rs",
+        "src/bazel_stub.rs",
+    ],
+    deps = [
+        "@crates//:serde",
+        "@crates//:serde_json",
+    ],
+    visibility = ["//visibility:public"],
+)
--- a/codex-rs/code-mode/Cargo.toml
+++ b/codex-rs/code-mode/Cargo.toml
@@ -0,0 +1,19 @@
+[package]
+edition.workspace = true
+license.workspace = true
+name = "codex-code-mode"
+version.workspace = true
+
+[lib]
+name = "codex_code_mode"
+path = "src/lib.rs"
+
+[lints]
+workspace = true
+
+[dependencies]
+serde = { workspace = true, features = ["derive"] }
+serde_json = { workspace = true }
+
+[target.'cfg(not(all(target_os = "linux", target_env = "musl")))'.dependencies]
+v8 = { workspace = true }
--- a/codex-rs/code-mode/src/api.rs
+++ b/codex-rs/code-mode/src/api.rs
@@ -0,0 +1,34 @@
+use std::collections::HashMap;
+
+use serde::Serialize;
+use serde_json::Value as JsonValue;
+
+pub type ToolCallHandler =
+    dyn FnMut(String, Option<JsonValue>) -> Result<JsonValue, String> + Send + 'static;
+
+#[derive(Clone, Copy, Debug, Eq, PartialEq, Serialize)]
+#[serde(rename_all = "snake_case")]
+pub enum ToolKind {
+    Function,
+    Freeform,
+}
+
+#[derive(Clone, Debug, Eq, PartialEq, Serialize)]
+pub struct EnabledTool {
+    pub tool_name: String,
+    #[serde(rename = "module")]
+    pub module_path: String,
+    pub namespace: Vec<String>,
+    pub name: String,
+    pub description: String,
+    pub kind: ToolKind,
+}
+
+#[derive(Debug)]
+pub struct ExecutionResult {
+    pub content_items: Vec<JsonValue>,
+    pub stored_values: HashMap<String, JsonValue>,
+    pub max_output_tokens_per_exec_call: usize,
+    pub success: bool,
+    pub error_text: Option<String>,
+}
--- a/codex-rs/code-mode/src/bazel_stub.rs
+++ b/codex-rs/code-mode/src/bazel_stub.rs
@@ -0,0 +1,32 @@
+use std::collections::HashMap;
+
+use serde_json::Value as JsonValue;
+
+mod api;
+pub use api::EnabledTool;
+pub use api::ExecutionResult;
+pub use api::ToolCallHandler;
+pub use api::ToolKind;
+
+const BAZEL_UNSUPPORTED_REASON: &str = "code_mode is unavailable in Bazel builds";
+
+pub const fn is_supported() -> bool {
+    false
+}
+
+pub fn unsupported_reason() -> Option<&'static str> {
+    Some(BAZEL_UNSUPPORTED_REASON)
+}
+
+pub fn execute(
+    code: String,
+    enabled_tools: Vec<EnabledTool>,
+    stored_values: HashMap<String, JsonValue>,
+    on_tool_call: Box<ToolCallHandler>,
+) -> Result<ExecutionResult, String> {
+    let _ = code;
+    let _ = enabled_tools;
+    let _ = stored_values;
+    let _ = on_tool_call;
+    Err(BAZEL_UNSUPPORTED_REASON.to_string())
+}
--- a/codex-rs/code-mode/src/code_mode_bridge.js
+++ b/codex-rs/code-mode/src/code_mode_bridge.js
@@ -0,0 +1,184 @@
+const __codexEnabledTools = __CODE_MODE_ENABLED_TOOLS_PLACEHOLDER__;
+const __codexEnabledToolNames = __codexEnabledTools.map((tool) => tool.tool_name);
+const __codexAllTools = Object.freeze(
+  __codexEnabledTools.map((tool) =>
+    Object.freeze({
+      module: tool.module,
+      name: tool.name,
+      description: tool.description,
+    }),
+  ),
+);
+const __codexTools = Object.create(null);
+const __codexContentItems = Array.isArray(globalThis.__codexContentItems)
+  ? globalThis.__codexContentItems
+  : [];
+const __codexStoredValues = __CODE_MODE_STORED_VALUES_PLACEHOLDER__;
+
+function __codexCloneContentItem(item) {
+  if (!item || typeof item !== 'object') {
+    throw new TypeError('content item must be an object');
+  }
+  switch (item.type) {
+    case 'input_text':
+      if (typeof item.text !== 'string') {
+        throw new TypeError('content item "input_text" requires a string text field');
+      }
+      return { type: 'input_text', text: item.text };
+    case 'input_image':
+      if (typeof item.image_url !== 'string') {
+        throw new TypeError('content item "input_image" requires a string image_url field');
+      }
+      return { type: 'input_image', image_url: item.image_url };
+    default:
+      throw new TypeError(`unsupported content item type "${item.type}"`);
+  }
+}
+
+function __codexNormalizeRawContentItems(value) {
+  if (Array.isArray(value)) {
+    return value.flatMap((entry) => __codexNormalizeRawContentItems(entry));
+  }
+  return [__codexCloneContentItem(value)];
+}
+
+function __codexNormalizeContentItems(value) {
+  if (typeof value === 'string') {
+    return [{ type: 'input_text', text: value }];
+  }
+  return __codexNormalizeRawContentItems(value);
+}
+
+function __codexCloneJsonValue(value) {
+  return JSON.parse(JSON.stringify(value));
+}
+
+function __codexSerializeOutputText(value) {
+  if (typeof value === 'string') {
+    return value;
+  }
+  if (
+    typeof value === 'undefined' ||
+    value === null ||
+    typeof value === 'boolean' ||
+    typeof value === 'number' ||
+    typeof value === 'bigint'
+  ) {
+    return String(value);
+  }
+
+  const serialized = JSON.stringify(value);
+  if (typeof serialized === 'string') {
+    return serialized;
+  }
+
+  return String(value);
+}
+
+function __codexNormalizeOutputImageUrl(value) {
+  if (typeof value !== 'string' || !value) {
+    throw new TypeError('output_image expects a non-empty image URL string');
+  }
+  if (/^(?:https?:\/\/|data:)/i.test(value)) {
+    return value;
+  }
+  throw new TypeError('output_image expects an http(s) or data URL');
+}
+
+Object.defineProperty(globalThis, '__codexContentItems', {
+  value: __codexContentItems,
+  configurable: true,
+  enumerable: false,
+  writable: false,
+});
+Object.defineProperty(globalThis, '__codexStoredValues', {
+  value: __codexStoredValues,
+  configurable: true,
+  enumerable: false,
+  writable: false,
+});
+Object.defineProperty(globalThis, 'ALL_TOOLS', {
+  value: __codexAllTools,
+  configurable: true,
+  enumerable: false,
+  writable: false,
+});
+
+globalThis.codex = {
+  enabledTools: Object.freeze(__codexEnabledToolNames.slice()),
+};
+
+globalThis.add_content = (value) => {
+  const contentItems = __codexNormalizeContentItems(value);
+  __codexContentItems.push(...contentItems);
+  return contentItems;
+};
+globalThis.__codex_output_text = (value) => {
+  const item = {
+    type: 'input_text',
+    text: __codexSerializeOutputText(value),
+  };
+  __codexContentItems.push(item);
+  return item;
+};
+globalThis.__codex_output_image = (value) => {
+  const item = {
+    type: 'input_image',
+    image_url: __codexNormalizeOutputImageUrl(value),
+  };
+  __codexContentItems.push(item);
+  return item;
+};
+globalThis.__codex_store = (key, value) => {
+  if (typeof key !== 'string') {
+    throw new TypeError('store key must be a string');
+  }
+  __codexStoredValues[key] = __codexCloneJsonValue(value);
+};
+globalThis.__codex_load = (key) => {
+  if (typeof key !== 'string') {
+    throw new TypeError('load key must be a string');
+  }
+  if (!Object.prototype.hasOwnProperty.call(__codexStoredValues, key)) {
+    return undefined;
+  }
+  return __codexCloneJsonValue(__codexStoredValues[key]);
+};
+globalThis.__codex_set_max_output_tokens_per_exec_call = (value) => {
+  if (!Number.isSafeInteger(value) || value < 0) {
+    throw new TypeError('max_output_tokens_per_exec_call must be a non-negative safe integer');
+  }
+  return __codex_set_max_output_tokens_per_exec_call_native(value);
+};
+
+globalThis.console = Object.freeze({
+  log() {},
+  info() {},
+  warn() {},
+  error() {},
+  debug() {},
+});
+
+for (const name of __codexEnabledToolNames) {
+  Object.defineProperty(__codexTools, name, {
+    value: async (args) => __codex_tool_call(name, args),
+    configurable: false,
+    enumerable: true,
+    writable: false,
+  });
+  if (!(name in globalThis)) {
+    Object.defineProperty(globalThis, name, {
+      value: __codexTools[name],
+      configurable: true,
+      enumerable: false,
+      writable: false,
+    });
+  }
+}
+
+Object.defineProperty(globalThis, 'tools', {
+  value: Object.freeze(__codexTools),
+  configurable: true,
+  enumerable: false,
+  writable: false,
+});
--- a/codex-rs/code-mode/src/imp.rs
+++ b/codex-rs/code-mode/src/imp.rs
@@ -0,0 +1,725 @@
+use std::collections::HashMap;
+use std::pin::pin;
+use std::sync::Once;
+
+use crate::EnabledTool;
+use crate::ExecutionResult;
+use crate::ToolCallHandler;
+use serde_json::Value as JsonValue;
+
+const CODE_MODE_BOOTSTRAP_SOURCE: &str = include_str!("code_mode_bridge.js");
+const CODE_MODE_BOOTSTRAP_FILENAME: &str = "code_mode_bootstrap.js";
+const CODE_MODE_MAIN_FILENAME: &str = "code_mode_main.mjs";
+const CODE_MODE_TOOLS_MODULE_NAME: &str = "tools.js";
+const OPENAI_CODE_MODE_MODULE_NAME: &str = "@openai/code_mode";
+const OPENAI_CODE_MODE_LEGACY_MODULE_NAME: &str = "openai/code_mode";
+const DEFAULT_MAX_OUTPUT_TOKENS_PER_EXEC_CALL: usize = 10_000;
+
+static CODE_MODE_V8_INIT: Once = Once::new();
+
+struct RuntimeState {
+    enabled_tools: Vec<EnabledTool>,
+    tools_module: Option<v8::Global<v8::Module>>,
+    module_cache: HashMap<String, v8::Global<v8::Module>>,
+    on_tool_call: Box<ToolCallHandler>,
+    max_output_tokens_per_exec_call: usize,
+}
+
+pub fn execute(
+    code: String,
+    enabled_tools: Vec<EnabledTool>,
+    stored_values: HashMap<String, JsonValue>,
+    on_tool_call: Box<ToolCallHandler>,
+) -> Result<ExecutionResult, String> {
+    init_v8();
+
+    let bootstrap_source = build_bootstrap_source(&enabled_tools, &stored_values)?;
+    let mut isolate = v8::Isolate::new(v8::CreateParams::default());
+    isolate.set_capture_stack_trace_for_uncaught_exceptions(true, 32);
+    isolate.set_host_import_module_dynamically_callback(code_mode_dynamic_import_callback);
+    isolate.set_slot(RuntimeState {
+        enabled_tools: enabled_tools.clone(),
+        tools_module: None,
+        module_cache: HashMap::new(),
+        on_tool_call,
+        max_output_tokens_per_exec_call: DEFAULT_MAX_OUTPUT_TOKENS_PER_EXEC_CALL,
+    });
+
+    let scope = pin!(v8::HandleScope::new(&mut isolate));
+    let scope = &mut scope.init();
+    let context = v8::Context::new(scope, Default::default());
+    let scope = &mut v8::ContextScope::new(scope, context);
+
+    install_binding(scope, "__codex_tool_call", code_mode_tool_call_callback)?;
+    install_binding(
+        scope,
+        "__codex_set_max_output_tokens_per_exec_call_native",
+        set_max_output_tokens_per_exec_call_callback,
+    )?;
+    run_script(scope, CODE_MODE_BOOTSTRAP_FILENAME, &bootstrap_source)?;
+
+    let tools_module = create_tools_module(scope, &enabled_tools)?;
+    let tools_module = v8::Global::new(scope, tools_module);
+    let Some(runtime_state) = scope.get_slot_mut::<RuntimeState>() else {
+        return Err("code_mode runtime state missing".to_string());
+    };
+    runtime_state.tools_module = Some(tools_module);
+
+    let scope = pin!(v8::TryCatch::new(scope));
+    let scope = &mut scope.init();
+    let execution_outcome = execute_main_module(scope, &code);
+    let content_items = read_content_items(scope)?;
+    let stored_values = read_stored_values(scope)?;
+    let Some(runtime_state) = scope.get_slot::<RuntimeState>() else {
+        return Err("code_mode runtime state missing".to_string());
+    };
+    let (success, error_text) = match execution_outcome {
+        Ok(()) => (true, None),
+        Err(error_text) => (false, Some(error_text)),
+    };
+    Ok(ExecutionResult {
+        content_items,
+        stored_values,
+        max_output_tokens_per_exec_call: runtime_state.max_output_tokens_per_exec_call,
+        success,
+        error_text,
+    })
+}
+
+fn init_v8() {
+    CODE_MODE_V8_INIT.call_once(|| {
+        let platform = v8::new_default_platform(0, false).make_shared();
+        v8::V8::initialize_platform(platform);
+        v8::V8::initialize();
+    });
+}
+
+fn install_binding(
+    scope: &mut v8::PinScope<'_, '_>,
+    name: &str,
+    callback: impl v8::MapFnTo<v8::FunctionCallback>,
+) -> Result<(), String> {
+    let function = v8::Function::new(scope, callback)
+        .ok_or_else(|| format!("failed to install code_mode binding `{name}`"))?;
+    let key = v8_string(scope, name)?;
+    let global = scope.get_current_context().global(scope);
+    if global.set(scope, key.into(), function.into()).is_some() {
+        Ok(())
+    } else {
+        Err(format!("failed to bind `{name}`"))
+    }
+}
+
+fn run_script(
+    scope: &mut v8::PinScope<'_, '_>,
+    filename: &str,
+    source: &str,
+) -> Result<(), String> {
+    let scope = pin!(v8::TryCatch::new(scope));
+    let scope = &mut scope.init();
+    let source = v8_string(scope, source)?;
+    let filename = v8_string(scope, filename)?;
+    let origin = script_origin(scope, filename, false);
+    let Some(script) = v8::Script::compile(scope, source, Some(&origin)) else {
+        return Err(format_v8_exception(scope));
+    };
+    if script.run(scope).is_none() {
+        return Err(format_v8_exception(scope));
+    }
+    Ok(())
+}
+
+fn script_origin<'s>(
+    scope: &mut v8::PinScope<'s, '_>,
+    filename: v8::Local<'s, v8::String>,
+    is_module: bool,
+) -> v8::ScriptOrigin<'s> {
+    v8::ScriptOrigin::new(
+        scope,
+        filename.into(),
+        0,
+        0,
+        false,
+        0,
+        None,
+        false,
+        false,
+        is_module,
+        None,
+    )
+}
+
+fn compile_module<'s>(
+    scope: &mut v8::PinScope<'s, '_>,
+    identifier: &str,
+    source_text: &str,
+) -> Result<v8::Local<'s, v8::Module>, String> {
+    let source = v8_string(scope, source_text)?;
+    let identifier = v8_string(scope, identifier)?;
+    let origin = script_origin(scope, identifier, true);
+    let mut source = v8::script_compiler::Source::new(source, Some(&origin));
+    v8::script_compiler::compile_module(scope, &mut source)
+        .ok_or_else(|| "failed to compile code_mode module".to_string())
+}
+
+fn execute_main_module(
+    try_catch: &mut v8::PinnedRef<'_, v8::TryCatch<v8::HandleScope>>,
+    code: &str,
+) -> Result<(), String> {
+    let source = v8_string(try_catch, code)?;
+    let identifier = v8_string(try_catch, CODE_MODE_MAIN_FILENAME)?;
+    let origin = script_origin(try_catch, identifier, true);
+    let mut source = v8::script_compiler::Source::new(source, Some(&origin));
+    let Some(module) = v8::script_compiler::compile_module(try_catch, &mut source) else {
+        return Err(format_v8_exception(try_catch));
+    };
+    let Some(instantiated) = module.instantiate_module(try_catch, resolve_code_mode_module) else {
+        return Err(format_v8_exception(try_catch));
+    };
+    if !instantiated {
+        return Err("failed to instantiate code_mode module".to_string());
+    }
+
+    let Some(result) = module.evaluate(try_catch) else {
+        return Err(format_v8_exception(try_catch));
+    };
+    if result.is_promise() {
+        let promise = v8::Local::<v8::Promise>::try_from(result)
+            .map_err(|_| "code_mode module evaluation did not return a promise".to_string())?;
+        wait_for_module_promise(try_catch, module, promise)?;
+    } else {
+        try_catch.perform_microtask_checkpoint();
+    }
+
+    Ok(())
+}
+
+fn create_tools_module<'s>(
+    scope: &mut v8::PinScope<'s, '_>,
+    enabled_tools: &[EnabledTool],
+) -> Result<v8::Local<'s, v8::Module>, String> {
+    let mut export_names = vec![v8_string(scope, "tools")?, v8_string(scope, "ALL_TOOLS")?];
+    for tool in enabled_tools {
+        if tool.tool_name != "tools" && is_valid_identifier(&tool.tool_name) {
+            export_names.push(v8_string(scope, &tool.tool_name)?);
+        }
+    }
+    let module_name = v8_string(scope, CODE_MODE_TOOLS_MODULE_NAME)?;
+    Ok(v8::Module::create_synthetic_module(
+        scope,
+        module_name,
+        &export_names,
+        evaluate_tools_module,
+    ))
+}
+
+fn evaluate_tools_module<'s>(
+    context: v8::Local<'s, v8::Context>,
+    module: v8::Local<'s, v8::Module>,
+) -> Option<v8::Local<'s, v8::Value>> {
+    v8::callback_scope!(unsafe scope, context);
+    let Some(global_name) = v8::String::new(scope, "tools") else {
+        return throw_v8_exception(scope, "failed to allocate tools export name");
+    };
+    let global = context.global(scope);
+    let Some(tools_value) = global.get(scope, global_name.into()) else {
+        return throw_v8_exception(scope, "code_mode tools namespace missing");
+    };
+    let Ok(tools_object) = v8::Local::<v8::Object>::try_from(tools_value) else {
+        return throw_v8_exception(scope, "code_mode tools namespace is not an object");
+    };
+    module.set_synthetic_module_export(scope, global_name, tools_object.into())?;
+    let Some(all_tools_name) = v8::String::new(scope, "ALL_TOOLS") else {
+        return throw_v8_exception(scope, "failed to allocate ALL_TOOLS export name");
+    };
+    let Some(all_tools_value) = global.get(scope, all_tools_name.into()) else {
+        return throw_v8_exception(scope, "code_mode ALL_TOOLS export is unavailable");
+    };
+    module.set_synthetic_module_export(scope, all_tools_name, all_tools_value)?;
+
+    let enabled_tools = match scope.get_slot::<RuntimeState>() {
+        Some(runtime_state) => runtime_state.enabled_tools.clone(),
+        None => return throw_v8_exception(scope, "code_mode runtime state missing"),
+    };
+    for tool in &enabled_tools {
+        if !is_valid_identifier(&tool.tool_name) || tool.tool_name == "tools" {
+            continue;
+        }
+        let Some(export_name) = v8::String::new(scope, &tool.tool_name) else {
+            return throw_v8_exception(scope, "failed to allocate tool export name");
+        };
+        let Some(export_value) = tools_object.get(scope, export_name.into()) else {
+            return throw_v8_exception(
+                scope,
+                &format!("code_mode tool export `{}` is unavailable", tool.tool_name),
+            );
+        };
+        module.set_synthetic_module_export(scope, export_name, export_value)?;
+    }
+
+    Some(v8::undefined(scope).into())
+}
+
+fn resolve_code_mode_module<'s>(
+    context: v8::Local<'s, v8::Context>,
+    specifier: v8::Local<'s, v8::String>,
+    _import_attributes: v8::Local<'s, v8::FixedArray>,
+    _referrer: v8::Local<'s, v8::Module>,
+) -> Option<v8::Local<'s, v8::Module>> {
+    v8::callback_scope!(unsafe scope, context);
+    let specifier = specifier.to_rust_string_lossy(scope);
+    match resolve_module(scope, &specifier) {
+        Ok(module) => Some(module),
+        Err(error) => throw_v8_exception(scope, &error),
+    }
+}
+
+fn code_mode_dynamic_import_callback<'s, 'i>(
+    scope: &mut v8::PinScope<'s, 'i>,
+    _host_defined_options: v8::Local<'s, v8::Data>,
+    _resource_name: v8::Local<'s, v8::Value>,
+    specifier: v8::Local<'s, v8::String>,
+    _import_attributes: v8::Local<'s, v8::FixedArray>,
+) -> Option<v8::Local<'s, v8::Promise>> {
+    let resolver = v8::PromiseResolver::new(scope)?;
+    let promise = resolver.get_promise(scope);
+    let specifier = specifier.to_rust_string_lossy(scope);
+
+    match resolve_module(scope, &specifier).and_then(|module| {
+        instantiate_and_evaluate_module(scope, module)?;
+        Ok(module.get_module_namespace())
+    }) {
+        Ok(namespace) => {
+            let _ = resolver.resolve(scope, namespace);
+        }
+        Err(error) => {
+            let error = v8_string(scope, &error).ok()?;
+            let _ = resolver.reject(scope, error.into());
+        }
+    }
+
+    Some(promise)
+}
+
+fn resolve_module<'s>(
+    scope: &mut v8::PinScope<'s, '_>,
+    specifier: &str,
+) -> Result<v8::Local<'s, v8::Module>, String> {
+    if specifier == CODE_MODE_TOOLS_MODULE_NAME {
+        let Some(runtime_state) = scope.get_slot::<RuntimeState>() else {
+            return Err("code_mode runtime state missing".to_string());
+        };
+        let Some(tools_module) = runtime_state.tools_module.as_ref() else {
+            return Err("code_mode tools module missing".to_string());
+        };
+        return Ok(v8::Local::new(scope, tools_module));
+    }
+
+    if let Some(runtime_state) = scope.get_slot::<RuntimeState>()
+        && let Some(module) = runtime_state.module_cache.get(specifier)
+    {
+        return Ok(v8::Local::new(scope, module));
+    }
+
+    let enabled_tools = scope
+        .get_slot::<RuntimeState>()
+        .ok_or_else(|| "code_mode runtime state missing".to_string())?
+        .enabled_tools
+        .clone();
+    let source = build_module_source(specifier, &enabled_tools)?;
+    let module = compile_module(scope, specifier, &source)?;
+    let module_handle = v8::Global::new(scope, module);
+    let Some(runtime_state) = scope.get_slot_mut::<RuntimeState>() else {
+        return Err("code_mode runtime state missing".to_string());
+    };
+    runtime_state
+        .module_cache
+        .insert(specifier.to_string(), module_handle);
+    Ok(module)
+}
+
+fn build_module_source(specifier: &str, enabled_tools: &[EnabledTool]) -> Result<String, String> {
+    match specifier {
+        OPENAI_CODE_MODE_MODULE_NAME | OPENAI_CODE_MODE_LEGACY_MODULE_NAME => {
+            Ok(build_code_mode_module_source())
+        }
+        _ => {
+            let Some(namespace) = parse_namespaced_tools_specifier(specifier) else {
+                return Err(format!("Unsupported import in code_mode: {specifier}"));
+            };
+            build_namespaced_tools_module_source(enabled_tools, &namespace)
+        }
+    }
+}
+
+fn parse_namespaced_tools_specifier(specifier: &str) -> Option<Vec<&str>> {
+    let namespace = specifier
+        .strip_prefix("tools/")?
+        .strip_suffix(".js")?
+        .split('/')
+        .filter(|segment| !segment.is_empty())
+        .collect::<Vec<_>>();
+    (!namespace.is_empty()).then_some(namespace)
+}
+
+fn build_code_mode_module_source() -> String {
+    [
+        "export const load = globalThis.__codex_load;",
+        "export const output_text = globalThis.__codex_output_text;",
+        "export const output_image = globalThis.__codex_output_image;",
+        "export const set_max_output_tokens_per_exec_call = globalThis.__codex_set_max_output_tokens_per_exec_call;",
+        "export const store = globalThis.__codex_store;",
+    ]
+    .join("\n")
+}
+
+fn build_namespaced_tools_module_source(
+    enabled_tools: &[EnabledTool],
+    namespace: &[&str],
+) -> Result<String, String> {
+    let mut source = String::from("const tools = Object.create(null);\n");
+    for tool in enabled_tools {
+        if !namespaces_match(&tool.namespace, namespace) {
+            continue;
+        }
+        let export_name = js_string_literal(&tool.name)?;
+        let tool_name = js_string_literal(&tool.tool_name)?;
+        source.push_str(&format!(
+            "Object.defineProperty(tools, {export_name}, {{ value: async (args) => globalThis.__codex_tool_call({tool_name}, args), configurable: false, enumerable: true, writable: false }});\n"
+        ));
+    }
+    source.push_str("Object.freeze(tools);\nexport { tools };\n");
+    for tool in enabled_tools {
+        if namespaces_match(&tool.namespace, namespace) && is_valid_identifier(&tool.name) {
+            let export_name = js_string_literal(&tool.name)?;
+            source.push_str(&format!(
+                "export const {} = tools[{export_name}];\n",
+                tool.name
+            ));
+        }
+    }
+    Ok(source)
+}
+
+fn namespaces_match(left: &[String], right: &[&str]) -> bool {
+    left.len() == right.len()
+        && left
+            .iter()
+            .map(String::as_str)
+            .zip(right.iter().copied())
+            .all(|(left, right)| left == right)
+}
+
+fn js_string_literal(value: &str) -> Result<String, String> {
+    serde_json::to_string(value)
+        .map_err(|err| format!("failed to serialize code_mode string: {err}"))
+}
+
+fn instantiate_and_evaluate_module(
+    scope: &mut v8::PinScope<'_, '_>,
+    module: v8::Local<'_, v8::Module>,
+) -> Result<(), String> {
+    match module.get_status() {
+        v8::ModuleStatus::Uninstantiated => {
+            let Some(instantiated) = module.instantiate_module(scope, resolve_code_mode_module)
+            else {
+                return Err("failed to instantiate code_mode module".to_string());
+            };
+            if !instantiated {
+                return Err("failed to instantiate code_mode module".to_string());
+            }
+        }
+        v8::ModuleStatus::Instantiating => {
+            return Err("code_mode module is already instantiating".to_string());
+        }
+        v8::ModuleStatus::Instantiated
+        | v8::ModuleStatus::Evaluating
+        | v8::ModuleStatus::Evaluated => {}
+        v8::ModuleStatus::Errored => {
+            return Err(format_v8_value(scope, module.get_exception()));
+        }
+    }
+
+    match module.get_status() {
+        v8::ModuleStatus::Instantiated => {
+            let Some(result) = module.evaluate(scope) else {
+                return Err("failed to evaluate code_mode module".to_string());
+            };
+            if result.is_promise() {
+                let promise = v8::Local::<v8::Promise>::try_from(result).map_err(|_| {
+                    "code_mode module evaluation did not return a promise".to_string()
+                })?;
+                scope.perform_microtask_checkpoint();
+                match promise.state() {
+                    v8::PromiseState::Fulfilled => {}
+                    v8::PromiseState::Rejected => {
+                        return Err(format_v8_value(scope, promise.result(scope)));
+                    }
+                    v8::PromiseState::Pending => {
+                        return Err("code_mode module evaluation did not settle".to_string());
+                    }
+                }
+            }
+        }
+        v8::ModuleStatus::Evaluated => {}
+        v8::ModuleStatus::Evaluating => {
+            return Err("code_mode module is already evaluating".to_string());
+        }
+        v8::ModuleStatus::Errored => {
+            return Err(format_v8_value(scope, module.get_exception()));
+        }
+        v8::ModuleStatus::Uninstantiated | v8::ModuleStatus::Instantiating => {}
+    }
+
+    Ok(())
+}
+
+fn code_mode_tool_call_callback(
+    scope: &mut v8::PinScope<'_, '_>,
+    args: v8::FunctionCallbackArguments,
+    mut rv: v8::ReturnValue<v8::Value>,
+) {
+    let Some(resolver) = v8::PromiseResolver::new(scope) else {
+        return;
+    };
+    let promise = resolver.get_promise(scope);
+    rv.set(promise.into());
+
+    let result = run_tool_call(scope, &args).and_then(|value| json_to_v8(scope, &value));
+    match result {
+        Ok(value) => {
+            let _ = resolver.resolve(scope, value);
+        }
+        Err(error) => {
+            if let Some(error) = v8::String::new(scope, &error) {
+                let _ = resolver.reject(scope, error.into());
+            }
+        }
+    }
+}
+
+fn set_max_output_tokens_per_exec_call_callback(
+    scope: &mut v8::PinScope<'_, '_>,
+    args: v8::FunctionCallbackArguments,
+    mut rv: v8::ReturnValue<v8::Value>,
+) {
+    let Some(value) = args.get(0).integer_value(scope) else {
+        let _ = throw_v8_exception::<v8::Value>(
+            scope,
+            "max_output_tokens_per_exec_call must be a non-negative safe integer",
+        );
+        return;
+    };
+    let Ok(value) = usize::try_from(value) else {
+        let _ = throw_v8_exception::<v8::Value>(
+            scope,
+            "max_output_tokens_per_exec_call must be a non-negative safe integer",
+        );
+        return;
+    };
+
+    let Some(runtime_state) = scope.get_slot_mut::<RuntimeState>() else {
+        let _ = throw_v8_exception::<v8::Value>(scope, "code_mode runtime state missing");
+        return;
+    };
+    runtime_state.max_output_tokens_per_exec_call = value;
+    rv.set(v8::Number::new(scope, value as f64).into());
+}
+
+fn run_tool_call(
+    scope: &mut v8::PinScope<'_, '_>,
+    args: &v8::FunctionCallbackArguments,
+) -> Result<JsonValue, String> {
+    let tool_name = args
+        .get(0)
+        .to_string(scope)
+        .ok_or_else(|| "code_mode tool call requires a tool name".to_string())?
+        .to_rust_string_lossy(scope);
+    let input = json_from_v8(scope, args.get(1))?;
+
+    let Some(runtime_state) = scope.get_slot_mut::<RuntimeState>() else {
+        return Err("code_mode runtime state missing".to_string());
+    };
+    (runtime_state.on_tool_call)(tool_name, input)
+}
+
+fn wait_for_module_promise(
+    scope: &mut v8::PinScope<'_, '_>,
+    module: v8::Local<'_, v8::Module>,
+    promise: v8::Local<'_, v8::Promise>,
+) -> Result<(), String> {
+    for _ in 0..32 {
+        match promise.state() {
+            v8::PromiseState::Fulfilled => return Ok(()),
+            v8::PromiseState::Rejected => {
+                return Err(format_v8_value(scope, promise.result(scope)));
+            }
+            v8::PromiseState::Pending => {
+                scope.perform_microtask_checkpoint();
+            }
+        }
+    }
+
+    let stalled = module.get_stalled_top_level_await_message(scope);
+    if let Some((_module, message)) = stalled.into_iter().next() {
+        let pending = message.get(scope).to_rust_string_lossy(scope);
+        let filename = message
+            .get_script_resource_name(scope)
+            .map(|name| name.to_rust_string_lossy(scope))
+            .unwrap_or_else(|| CODE_MODE_MAIN_FILENAME.to_string());
+        let line = message.get_line_number(scope).unwrap_or_default();
+        return Err(format!("{filename}:{line}: {pending}"));
+    }
+
+    Err("code_mode top-level await did not settle".to_string())
+}
+
+fn read_content_items(scope: &mut v8::PinScope<'_, '_>) -> Result<Vec<JsonValue>, String> {
+    read_json_global(scope, "globalThis.__codexContentItems ?? []")
+}
+
+fn read_stored_values(
+    scope: &mut v8::PinScope<'_, '_>,
+) -> Result<HashMap<String, JsonValue>, String> {
+    read_json_global(scope, "globalThis.__codexStoredValues ?? {}")
+}
+
+fn read_json_global<T>(scope: &mut v8::PinScope<'_, '_>, expression: &str) -> Result<T, String>
+where
+    T: serde::de::DeserializeOwned,
+{
+    let source = v8_string(scope, &format!("JSON.stringify({expression})"))?;
+    let script = v8::Script::compile(scope, source, None)
+        .ok_or_else(|| format!("failed to read {expression}"))?;
+    let value = script
+        .run(scope)
+        .ok_or_else(|| format!("failed to evaluate {expression}"))?;
+    let serialized = value
+        .to_string(scope)
+        .ok_or_else(|| format!("failed to serialize {expression}"))?
+        .to_rust_string_lossy(scope);
+    serde_json::from_str(&serialized).map_err(|err| format!("invalid {expression}: {err}"))
+}
+
+fn build_bootstrap_source(
+    enabled_tools: &[EnabledTool],
+    stored_values: &HashMap<String, JsonValue>,
+) -> Result<String, String> {
+    let enabled_tools_json = serde_json::to_string(enabled_tools)
+        .map_err(|err| format!("failed to serialize enabled tools: {err}"))?;
+    let stored_values_json = serde_json::to_string(stored_values)
+        .map_err(|err| format!("failed to serialize code_mode stored values: {err}"))?;
+    Ok(CODE_MODE_BOOTSTRAP_SOURCE
+        .replace(
+            "__CODE_MODE_ENABLED_TOOLS_PLACEHOLDER__",
+            &enabled_tools_json,
+        )
+        .replace(
+            "__CODE_MODE_STORED_VALUES_PLACEHOLDER__",
+            &stored_values_json,
+        ))
+}
+
+fn v8_string<'s>(
+    scope: &mut v8::PinScope<'s, '_>,
+    text: &str,
+) -> Result<v8::Local<'s, v8::String>, String> {
+    v8::String::new(scope, text).ok_or_else(|| "failed to allocate V8 string".to_string())
+}
+
+fn json_from_v8(
+    scope: &mut v8::PinScope<'_, '_>,
+    value: v8::Local<'_, v8::Value>,
+) -> Result<Option<JsonValue>, String> {
+    if value.is_undefined() {
+        return Ok(None);
+    }
+
+    let Some(serialized) = v8::json::stringify(scope, value) else {
+        return Err("code_mode tool arguments must be JSON-serializable".to_string());
+    };
+    let serialized = serialized.to_rust_string_lossy(scope);
+    serde_json::from_str(&serialized)
+        .map(Some)
+        .map_err(|err| format!("invalid code_mode tool arguments: {err}"))
+}
+
+fn json_to_v8<'s>(
+    scope: &mut v8::PinScope<'s, '_>,
+    value: &JsonValue,
+) -> Result<v8::Local<'s, v8::Value>, String> {
+    let serialized = serde_json::to_string(value)
+        .map_err(|err| format!("failed to serialize code_mode tool response: {err}"))?;
+    let serialized = v8_string(scope, &serialized)?;
+    v8::json::parse(scope, serialized)
+        .ok_or_else(|| "failed to deserialize code_mode tool response into V8".to_string())
+}
+
+fn throw_v8_exception<'s, T>(
+    scope: &mut v8::PinScope<'s, '_>,
+    message: &str,
+) -> Option<v8::Local<'s, T>> {
+    if let Some(message) = v8::String::new(scope, message) {
+        scope.throw_exception(message.into());
+    }
+    None
+}
+
+fn format_v8_exception(try_catch: &mut v8::PinnedRef<'_, v8::TryCatch<v8::HandleScope>>) -> String {
+    let Some(exception) = try_catch.exception() else {
+        return "JavaScript execution failed".to_string();
+    };
+
+    if let Some(stack_trace) = try_catch.stack_trace()
+        && let Some(stack_trace) = stack_trace.to_string(try_catch)
+    {
+        let stack_trace = stack_trace.to_rust_string_lossy(try_catch);
+        if !stack_trace.trim().is_empty() {
+            return stack_trace;
+        }
+    }
+
+    let exception_string = exception
+        .to_string(try_catch)
+        .map(|value| value.to_rust_string_lossy(try_catch))
+        .unwrap_or_else(|| "JavaScript execution failed".to_string());
+    let Some(message) = try_catch.message() else {
+        return exception_string;
+    };
+
+    let filename = message
+        .get_script_resource_name(try_catch)
+        .and_then(|value| value.to_string(try_catch))
+        .map(|value| value.to_rust_string_lossy(try_catch))
+        .unwrap_or_else(|| "(unknown)".to_string());
+    let line = message.get_line_number(try_catch).unwrap_or_default();
+    format!("{filename}:{line}: {exception_string}")
+}
+
+fn format_v8_value(scope: &mut v8::PinScope<'_, '_>, value: v8::Local<'_, v8::Value>) -> String {
+    if value.is_object()
+        && let Ok(object) = v8::Local::<v8::Object>::try_from(value)
+        && let Some(stack_key) = v8::String::new(scope, "stack")
+        && let Some(stack_value) = object.get(scope, stack_key.into())
+        && let Some(stack_value) = stack_value.to_string(scope)
+    {
+        let stack_value = stack_value.to_rust_string_lossy(scope);
+        if !stack_value.trim().is_empty() {
+            return stack_value;
+        }
+    }
+
+    value
+        .to_string(scope)
+        .map(|value| value.to_rust_string_lossy(scope))
+        .unwrap_or_else(|| "JavaScript execution failed".to_string())
+}
+
+fn is_valid_identifier(name: &str) -> bool {
+    let mut chars = name.chars();
+    match chars.next() {
+        Some(c) if c == '_' || c == '$' || c.is_ascii_alphabetic() => {}
+        _ => return false,
+    }
+    chars.all(|c| c == '_' || c == '$' || c.is_ascii_alphanumeric())
+}
--- a/codex-rs/code-mode/src/lib.rs
+++ b/codex-rs/code-mode/src/lib.rs
@@ -0,0 +1,39 @@
+mod api;
+pub use api::EnabledTool;
+pub use api::ExecutionResult;
+pub use api::ToolCallHandler;
+pub use api::ToolKind;
+
+const MUSL_UNSUPPORTED_REASON: &str = "code_mode is unavailable on musl Linux";
+
+pub const fn is_supported() -> bool {
+    !cfg!(all(target_os = "linux", target_env = "musl"))
+}
+
+pub fn unsupported_reason() -> Option<&'static str> {
+    if is_supported() {
+        None
+    } else {
+        Some(MUSL_UNSUPPORTED_REASON)
+    }
+}
+
+#[cfg(not(all(target_os = "linux", target_env = "musl")))]
+mod imp;
+
+#[cfg(not(all(target_os = "linux", target_env = "musl")))]
+pub use imp::execute;
+
+#[cfg(all(target_os = "linux", target_env = "musl"))]
+pub fn execute(
+    code: String,
+    enabled_tools: Vec<EnabledTool>,
+    stored_values: std::collections::HashMap<String, serde_json::Value>,
+    on_tool_call: Box<ToolCallHandler>,
+) -> Result<ExecutionResult, String> {
+    let _ = code;
+    let _ = enabled_tools;
+    let _ = stored_values;
+    let _ = on_tool_call;
+    Err(MUSL_UNSUPPORTED_REASON.to_string())
+}
--- a/codex-rs/core/Cargo.toml
+++ b/codex-rs/core/Cargo.toml
@@ -32,6 +32,7 @@ codex-app-server-protocol = { workspace = true }
 codex-apply-patch = { workspace = true }
 codex-async-utils = { workspace = true }
 codex-client = { workspace = true }
+codex-code-mode = { workspace = true }
 codex-config = { workspace = true }
 codex-shell-command = { workspace = true }
 codex-skills = { workspace = true }
--- a/codex-rs/core/src/codex.rs
+++ b/codex-rs/core/src/codex.rs
@@ -390,6 +390,18 @@ impl Codex {
            let _ = config.features.disable(Feature::Collab);
        }

+        if config.features.enabled(Feature::CodeMode)
+            && let Some(reason) = codex_code_mode::unsupported_reason()
+        {
+            let _ = config.features.disable(Feature::CodeMode);
+            let message = format!(
+                "Disabled `{}` for this session because it is unavailable: {reason}",
+                crate::tools::code_mode::PUBLIC_TOOL_NAME
+            );
+            warn!("{message}");
+            config.startup_warnings.push(message);
+        }
+
        if config.features.enabled(Feature::JsRepl)
            && let Err(err) = resolve_compatible_node(config.js_repl_node_path.as_deref()).await
        {
@@ -407,17 +419,6 @@ impl Codex {
            warn!("{message}");
            config.startup_warnings.push(message);
        }
-        if config.features.enabled(Feature::CodeMode)
-            && let Err(err) = resolve_compatible_node(config.js_repl_node_path.as_deref()).await
-        {
-            let message = format!(
-                "Disabled `exec` for this session because the configured Node runtime is unavailable or incompatible. {err}"
-            );
-            warn!("{message}");
-            let _ = config.features.disable(Feature::CodeMode);
-            config.startup_warnings.push(message);
-        }
-
        let allowed_skills_for_implicit_invocation =
            loaded_skills.allowed_skills_for_implicit_invocation();
        let user_instructions = get_user_instructions(
--- a/codex-rs/core/src/tools/code_mode.rs
+++ b/codex-rs/core/src/tools/code_mode.rs
@@ -1,4 +1,3 @@
-use std::collections::HashMap;
 use std::sync::Arc;
 use std::time::Duration;

@@ -6,7 +5,6 @@ use crate::client_common::tools::ToolSpec;
 use crate::codex::Session;
 use crate::codex::TurnContext;
 use crate::config::Config;
-use crate::exec_env::create_env;
 use crate::features::Feature;
 use crate::function_tool::FunctionCallError;
 use crate::tools::ToolRouter;
@@ -15,24 +13,20 @@ use crate::tools::code_mode_description::code_mode_tool_reference;
 use crate::tools::context::FunctionToolOutput;
 use crate::tools::context::SharedTurnDiffTracker;
 use crate::tools::context::ToolPayload;
-use crate::tools::js_repl::resolve_compatible_node;
 use crate::tools::router::ToolCall;
 use crate::tools::router::ToolCallSource;
 use crate::truncate::TruncationPolicy;
 use crate::truncate::formatted_truncate_text_content_items_with_policy;
 use crate::truncate::truncate_function_output_items_with_policy;
 use crate::unified_exec::resolve_max_tokens;
+use codex_code_mode::EnabledTool;
+use codex_code_mode::ToolKind as CodeModeToolKind;
+use codex_code_mode::execute as execute_code_mode;
 use codex_protocol::models::FunctionCallOutputContentItem;
-use serde::Deserialize;
-use serde::Serialize;
 use serde_json::Value as JsonValue;
-use tokio::io::AsyncBufReadExt;
-use tokio::io::AsyncReadExt;
-use tokio::io::AsyncWriteExt;
-use tokio::io::BufReader;
+use tokio::runtime::Handle;
+use tokio::runtime::RuntimeFlavor;

-const CODE_MODE_RUNNER_SOURCE: &str = include_str!("code_mode_runner.cjs");
-const CODE_MODE_BRIDGE_SOURCE: &str = include_str!("code_mode_bridge.js");
 pub(crate) const PUBLIC_TOOL_NAME: &str = "exec";

 #[derive(Clone)]
@@ -41,66 +35,14 @@ struct ExecContext {
    turn: Arc<TurnContext>,
    tracker: SharedTurnDiffTracker,
 }
-
-#[derive(Clone, Copy, Debug, Deserialize, Eq, PartialEq, Serialize)]
-#[serde(rename_all = "snake_case")]
-enum CodeModeToolKind {
-    Function,
-    Freeform,
-}
-
-#[derive(Clone, Debug, Serialize)]
-struct EnabledTool {
-    tool_name: String,
-    #[serde(rename = "module")]
-    module_path: String,
-    namespace: Vec<String>,
-    name: String,
-    description: String,
-    kind: CodeModeToolKind,
-}
-
-#[derive(Serialize)]
-#[serde(tag = "type", rename_all = "snake_case")]
-enum HostToNodeMessage {
-    Init {
-        enabled_tools: Vec<EnabledTool>,
-        stored_values: HashMap<String, JsonValue>,
-        source: String,
-    },
-    Response {
-        id: String,
-        code_mode_result: JsonValue,
-    },
-}
-
-#[derive(Deserialize)]
-#[serde(tag = "type", rename_all = "snake_case")]
-enum NodeToHostMessage {
-    ToolCall {
-        id: String,
-        name: String,
-        #[serde(default)]
-        input: Option<JsonValue>,
-    },
-    Result {
-        content_items: Vec<JsonValue>,
-        stored_values: HashMap<String, JsonValue>,
-        #[serde(default)]
-        error_text: Option<String>,
-        #[serde(default)]
-        max_output_tokens_per_exec_call: Option<usize>,
-    },
-}
-
 pub(crate) fn instructions(config: &Config) -> Option<String> {
-    if !config.features.enabled(Feature::CodeMode) {
+    if !config.features.enabled(Feature::CodeMode) || !codex_code_mode::is_supported() {
        return None;
    }

    let mut section = String::from("## Exec\n");
    section.push_str(&format!(
-        "- Use `{PUBLIC_TOOL_NAME}` for JavaScript execution in a Node-backed `node:vm` context.\n",
+        "- Use `{PUBLIC_TOOL_NAME}` for JavaScript execution in an embedded V8 runtime.\n",
    ));
    section.push_str(&format!(
        "- `{PUBLIC_TOOL_NAME}` is a freeform/custom tool. Direct `{PUBLIC_TOOL_NAME}` calls must send raw JavaScript tool input. Do not wrap code in JSON, quotes, or markdown code fences.\n",
@@ -108,10 +50,7 @@ pub(crate) fn instructions(config: &Config) -> Option<String> {
    section.push_str(&format!(
        "- Direct tool calls remain available while `{PUBLIC_TOOL_NAME}` is enabled.\n",
    ));
-    section.push_str(&format!(
-        "- `{PUBLIC_TOOL_NAME}` uses the same Node runtime resolution as `js_repl`. If needed, point `js_repl_node_path` at the Node binary you want Codex to use.\n",
-    ));
-    section.push_str("- Import nested tools from `tools.js`, for example `import { exec_command } from \"tools.js\"` or `import { ALL_TOOLS } from \"tools.js\"` to inspect the available `{ module, name, description }` entries. Namespaced tools are also available from `tools/<namespace...>.js`; MCP tools use `tools/mcp/<server>.js`, for example `import { append_notebook_logs_chart } from \"tools/mcp/ologs.js\"`. Nested tool calls resolve to their code-mode result values.\n");
+    section.push_str("- Import nested tools from `tools.js`, for example `import { exec_command } from \"tools.js\"`, `import { tools } from \"tools.js\"`, or `import { ALL_TOOLS } from \"tools.js\"` to inspect the available `{ module, name, description }` entries. Namespaced tools are also available from `tools/<namespace...>.js`; MCP tools use `tools/mcp/<server>.js`, for example `import { append_notebook_logs_chart } from \"tools/mcp/ologs.js\"`. `tools[name]` and identifier wrappers like `await exec_command(args)` remain available for compatibility. Nested tool calls resolve to their code-mode result values.\n");
    section.push_str(&format!(
        "- Import `{{ output_text, output_image, set_max_output_tokens_per_exec_call, store, load }}` from `@openai/code_mode` (or `\"openai/code_mode\"`). `output_text(value)` surfaces text back to the model and stringifies non-string objects with `JSON.stringify(...)` when possible. `output_image(imageUrl)` appends an `input_image` content item for `http(s)` or `data:` URLs. `store(key, value)` persists JSON-serializable values across `{PUBLIC_TOOL_NAME}` calls in the current session, and `load(key)` returns a cloned stored value or `undefined`. `set_max_output_tokens_per_exec_call(value)` sets the token budget used to truncate the final Rust-side result of the current `{PUBLIC_TOOL_NAME}` execution; the default is `10000`. This guards the overall `{PUBLIC_TOOL_NAME}` output, not individual nested tool invocations. The returned content starts with a separate `Script completed` or `Script failed` text item that includes wall time. When truncation happens, the final text may include `Total output lines:` and the usual `…N tokens truncated…` marker.\n",
    ));
@@ -137,173 +76,38 @@ pub(crate) async fn execute(
    };
    let enabled_tools = build_enabled_tools(&exec).await;
    let stored_values = exec.session.services.code_mode_store.stored_values().await;
-    let source = build_source(&code, &enabled_tools).map_err(FunctionCallError::RespondToModel)?;
-    execute_node(exec, source, enabled_tools, stored_values)
-        .await
-        .map_err(FunctionCallError::RespondToModel)
-}
-
-async fn execute_node(
-    exec: ExecContext,
-    source: String,
-    enabled_tools: Vec<EnabledTool>,
-    stored_values: HashMap<String, JsonValue>,
-) -> Result<FunctionToolOutput, String> {
-    let node_path = resolve_compatible_node(exec.turn.config.js_repl_node_path.as_deref()).await?;
    let started_at = std::time::Instant::now();
-
-    let env = create_env(&exec.turn.shell_environment_policy, None);
-    let mut cmd = tokio::process::Command::new(&node_path);
-    cmd.arg("--experimental-vm-modules");
-    cmd.arg("--eval");
-    cmd.arg(CODE_MODE_RUNNER_SOURCE);
-    cmd.current_dir(&exec.turn.cwd);
-    cmd.env_clear();
-    cmd.envs(env);
-    cmd.stdin(std::process::Stdio::piped())
-        .stdout(std::process::Stdio::piped())
-        .stderr(std::process::Stdio::piped())
-        .kill_on_drop(true);
-
-    let mut child = cmd
-        .spawn()
-        .map_err(|err| format!("failed to start {PUBLIC_TOOL_NAME} Node runtime: {err}"))?;
-    let stdout = child
-        .stdout
-        .take()
-        .ok_or_else(|| format!("{PUBLIC_TOOL_NAME} runner missing stdout"))?;
-    let stderr = child
-        .stderr
-        .take()
-        .ok_or_else(|| format!("{PUBLIC_TOOL_NAME} runner missing stderr"))?;
-    let mut stdin = child
-        .stdin
-        .take()
-        .ok_or_else(|| format!("{PUBLIC_TOOL_NAME} runner missing stdin"))?;
-
-    let stderr_task = tokio::spawn(async move {
-        let mut reader = BufReader::new(stderr);
-        let mut buf = Vec::new();
-        let _ = reader.read_to_end(&mut buf).await;
-        String::from_utf8_lossy(&buf).trim().to_string()
-    });
-
-    write_message(
-        &mut stdin,
-        &HostToNodeMessage::Init {
-            enabled_tools: enabled_tools.clone(),
-            stored_values,
-            source,
-        },
+    let callback_exec = exec.clone();
+    let result = execute_code_mode(
+        code,
+        enabled_tools,
+        stored_values,
+        Box::new(move |tool_name, input| run_tool_call(&callback_exec, tool_name, input)),
    )
-    .await?;
-
-    let mut stdout_lines = BufReader::new(stdout).lines();
-    let mut pending_result = None;
-    while let Some(line) = stdout_lines
-        .next_line()
-        .await
-        .map_err(|err| format!("failed to read {PUBLIC_TOOL_NAME} runner stdout: {err}"))?
-    {
-        if line.trim().is_empty() {
-            continue;
-        }
-        let message: NodeToHostMessage = serde_json::from_str(&line).map_err(|err| {
-            format!("invalid {PUBLIC_TOOL_NAME} runner message: {err}; line={line}")
-        })?;
-        match message {
-            NodeToHostMessage::ToolCall { id, name, input } => {
-                let response = HostToNodeMessage::Response {
-                    id,
-                    code_mode_result: call_nested_tool(exec.clone(), name, input).await,
-                };
-                write_message(&mut stdin, &response).await?;
-            }
-            NodeToHostMessage::Result {
-                content_items,
-                stored_values,
-                error_text,
-                max_output_tokens_per_exec_call,
-            } => {
-                exec.session
-                    .services
-                    .code_mode_store
-                    .replace_stored_values(stored_values)
-                    .await;
-                pending_result = Some((
-                    output_content_items_from_json_values(content_items)?,
-                    error_text,
-                    max_output_tokens_per_exec_call,
-                ));
-                break;
-            }
-        }
-    }
-
-    drop(stdin);
-
-    let status = child
-        .wait()
-        .await
-        .map_err(|err| format!("failed to wait for {PUBLIC_TOOL_NAME} runner: {err}"))?;
-    let stderr = stderr_task
-        .await
-        .map_err(|err| format!("failed to collect {PUBLIC_TOOL_NAME} stderr: {err}"))?;
-    let wall_time = started_at.elapsed();
-    let success = status.success();
-
-    let Some((mut content_items, error_text, max_output_tokens_per_exec_call)) = pending_result
-    else {
-        let message = if stderr.is_empty() {
-            format!("{PUBLIC_TOOL_NAME} runner exited without returning a result (status {status})")
-        } else {
-            stderr
-        };
-        return Err(message);
-    };
-
-    if !success {
-        let error_text = error_text.unwrap_or_else(|| {
-            if stderr.is_empty() {
-                format!("Process exited with status {status}")
-            } else {
-                stderr
-            }
-        });
-        content_items.push(FunctionCallOutputContentItem::InputText {
+    .map_err(FunctionCallError::RespondToModel)?;
+    exec.session
+        .services
+        .code_mode_store
+        .replace_stored_values(result.stored_values)
+        .await;
+    let mut items = output_content_items_from_json_values(result.content_items)
+        .map_err(FunctionCallError::RespondToModel)?;
+    if !result.success {
+        let error_text = result
+            .error_text
+            .unwrap_or_else(|| "JavaScript execution failed".to_string());
+        items.push(FunctionCallOutputContentItem::InputText {
            text: format!("Script error:\n{error_text}"),
        });
    }
-
-    let mut content_items =
-        truncate_code_mode_result(content_items, max_output_tokens_per_exec_call);
-    prepend_script_status(&mut content_items, success, wall_time);
+    let mut items = truncate_code_mode_result(items, Some(result.max_output_tokens_per_exec_call));
+    prepend_script_status(&mut items, result.success, started_at.elapsed());
    Ok(FunctionToolOutput::from_content(
-        content_items,
-        Some(success),
+        items,
+        Some(result.success),
    ))
 }

-async fn write_message(
-    stdin: &mut tokio::process::ChildStdin,
-    message: &HostToNodeMessage,
-) -> Result<(), String> {
-    let line = serde_json::to_string(message)
-        .map_err(|err| format!("failed to serialize {PUBLIC_TOOL_NAME} message: {err}"))?;
-    stdin
-        .write_all(line.as_bytes())
-        .await
-        .map_err(|err| format!("failed to write {PUBLIC_TOOL_NAME} message: {err}"))?;
-    stdin
-        .write_all(b"\n")
-        .await
-        .map_err(|err| format!("failed to write {PUBLIC_TOOL_NAME} message newline: {err}"))?;
-    stdin
-        .flush()
-        .await
-        .map_err(|err| format!("failed to flush {PUBLIC_TOOL_NAME} message: {err}"))
-}
-
 fn prepend_script_status(
    content_items: &mut Vec<FunctionCallOutputContentItem>,
    success: bool,
@@ -321,17 +125,6 @@ fn prepend_script_status(
    content_items.insert(0, FunctionCallOutputContentItem::InputText { text: header });
 }

-fn build_source(user_code: &str, enabled_tools: &[EnabledTool]) -> Result<String, String> {
-    let enabled_tools_json = serde_json::to_string(enabled_tools)
-        .map_err(|err| format!("failed to serialize enabled tools: {err}"))?;
-    Ok(CODE_MODE_BRIDGE_SOURCE
-        .replace(
-            "__CODE_MODE_ENABLED_TOOLS_PLACEHOLDER__",
-            &enabled_tools_json,
-        )
-        .replace("__CODE_MODE_USER_CODE_PLACEHOLDER__", user_code))
-}
-
 fn truncate_code_mode_result(
    items: Vec<FunctionCallOutputContentItem>,
    max_output_tokens_per_exec_call: Option<usize>,
@@ -411,32 +204,45 @@ async fn build_nested_router(exec: &ExecContext) -> ToolRouter {
    )
 }

+fn run_tool_call(
+    exec: &ExecContext,
+    tool_name: String,
+    input: Option<JsonValue>,
+) -> Result<JsonValue, String> {
+    match Handle::current().runtime_flavor() {
+        RuntimeFlavor::MultiThread => tokio::task::block_in_place(|| {
+            Handle::current().block_on(call_nested_tool(exec.clone(), tool_name, input))
+        }),
+        RuntimeFlavor::CurrentThread => Err(format!(
+            "{PUBLIC_TOOL_NAME} tool calls require a multi-thread Tokio runtime"
+        )),
+        _ => Err(format!(
+            "{PUBLIC_TOOL_NAME} tool calls require a supported Tokio runtime"
+        )),
+    }
+}
+
 async fn call_nested_tool(
    exec: ExecContext,
    tool_name: String,
    input: Option<JsonValue>,
-) -> JsonValue {
+) -> Result<JsonValue, String> {
    if tool_name == PUBLIC_TOOL_NAME {
-        return JsonValue::String(format!("{PUBLIC_TOOL_NAME} cannot invoke itself"));
+        return Ok(JsonValue::String(format!(
+            "{PUBLIC_TOOL_NAME} cannot invoke itself"
+        )));
    }

    let router = build_nested_router(&exec).await;
-
    let specs = router.specs();
    let payload = if let Some((server, tool)) = exec.session.parse_mcp_tool_name(&tool_name).await {
-        match serialize_function_tool_arguments(&tool_name, input) {
-            Ok(raw_arguments) => ToolPayload::Mcp {
-                server,
-                tool,
-                raw_arguments,
-            },
-            Err(error) => return JsonValue::String(error),
+        ToolPayload::Mcp {
+            server,
+            tool,
+            raw_arguments: serialize_function_tool_arguments(&tool_name, input)?,
        }
    } else {
-        match build_nested_tool_payload(&specs, &tool_name, input) {
-            Ok(payload) => payload,
-            Err(error) => return JsonValue::String(error),
-        }
+        build_nested_tool_payload(&specs, &tool_name, input)?
    };

    let call = ToolCall {
@@ -452,12 +258,10 @@ async fn call_nested_tool(
            call,
            ToolCallSource::CodeMode,
        )
-        .await;
+        .await
+        .map_err(|error| error.to_string())?;

-    match result {
-        Ok(result) => result.code_mode_result(),
-        Err(error) => JsonValue::String(error.to_string()),
-    }
+    Ok(result.code_mode_result())
 }

 fn tool_kind_for_spec(spec: &ToolSpec) -> CodeModeToolKind {
@@ -481,8 +285,7 @@ fn build_nested_tool_payload(
    tool_name: &str,
    input: Option<JsonValue>,
 ) -> Result<ToolPayload, String> {
-    let actual_kind = tool_kind_for_name(specs, tool_name)?;
-    match actual_kind {
+    match tool_kind_for_name(specs, tool_name)? {
        CodeModeToolKind::Function => build_function_tool_payload(tool_name, input),
        CodeModeToolKind::Freeform => build_freeform_tool_payload(tool_name, input),
    }
--- a/codex-rs/core/src/tools/code_mode_bridge.js
+++ b/codex-rs/core/src/tools/code_mode_bridge.js
@@ -1,77 +0,0 @@
-const __codexEnabledTools = __CODE_MODE_ENABLED_TOOLS_PLACEHOLDER__;
-const __codexEnabledToolNames = __codexEnabledTools.map((tool) => tool.tool_name);
-const __codexContentItems = Array.isArray(globalThis.__codexContentItems)
-  ? globalThis.__codexContentItems
-  : [];
-
-function __codexCloneContentItem(item) {
-  if (!item || typeof item !== 'object') {
-    throw new TypeError('content item must be an object');
-  }
-  switch (item.type) {
-    case 'input_text':
-      if (typeof item.text !== 'string') {
-        throw new TypeError('content item "input_text" requires a string text field');
-      }
-      return { type: 'input_text', text: item.text };
-    case 'input_image':
-      if (typeof item.image_url !== 'string') {
-        throw new TypeError('content item "input_image" requires a string image_url field');
-      }
-      return { type: 'input_image', image_url: item.image_url };
-    default:
-      throw new TypeError(`unsupported content item type "${item.type}"`);
-  }
-}
-
-function __codexNormalizeRawContentItems(value) {
-  if (Array.isArray(value)) {
-    return value.flatMap((entry) => __codexNormalizeRawContentItems(entry));
-  }
-  return [__codexCloneContentItem(value)];
-}
-
-function __codexNormalizeContentItems(value) {
-  if (typeof value === 'string') {
-    return [{ type: 'input_text', text: value }];
-  }
-  return __codexNormalizeRawContentItems(value);
-}
-
-Object.defineProperty(globalThis, '__codexContentItems', {
-  value: __codexContentItems,
-  configurable: true,
-  enumerable: false,
-  writable: false,
-});
-
-globalThis.codex = {
-  enabledTools: Object.freeze(__codexEnabledToolNames.slice()),
-};
-
-globalThis.add_content = (value) => {
-  const contentItems = __codexNormalizeContentItems(value);
-  __codexContentItems.push(...contentItems);
-  return contentItems;
-};
-
-globalThis.console = Object.freeze({
-  log() {},
-  info() {},
-  warn() {},
-  error() {},
-  debug() {},
-});
-
-for (const name of __codexEnabledToolNames) {
-  if (!(name in globalThis)) {
-    Object.defineProperty(globalThis, name, {
-      value: async (args) => __codex_tool_call(name, args),
-      configurable: true,
-      enumerable: false,
-      writable: false,
-    });
-  }
-}
-
-__CODE_MODE_USER_CODE_PLACEHOLDER__
--- a/codex-rs/core/src/tools/code_mode_runner.cjs
+++ b/codex-rs/core/src/tools/code_mode_runner.cjs
@@ -1,408 +0,0 @@
-'use strict';
-
-const readline = require('node:readline');
-const vm = require('node:vm');
-
-const { SourceTextModule, SyntheticModule } = vm;
-const DEFAULT_MAX_OUTPUT_TOKENS_PER_EXEC_CALL = 10000;
-
-function normalizeMaxOutputTokensPerExecCall(value) {
-  if (!Number.isSafeInteger(value) || value < 0) {
-    throw new TypeError('max_output_tokens_per_exec_call must be a non-negative safe integer');
-  }
-  return value;
-}
-
-function createProtocol() {
-  const rl = readline.createInterface({
-    input: process.stdin,
-    crlfDelay: Infinity,
-  });
-
-  let nextId = 0;
-  const pending = new Map();
-  let initResolve;
-  let initReject;
-  const init = new Promise((resolve, reject) => {
-    initResolve = resolve;
-    initReject = reject;
-  });
-
-  rl.on('line', (line) => {
-    if (!line.trim()) {
-      return;
-    }
-
-    let message;
-    try {
-      message = JSON.parse(line);
-    } catch (error) {
-      initReject(error);
-      return;
-    }
-
-    if (message.type === 'init') {
-      initResolve(message);
-      return;
-    }
-
-    if (message.type === 'response') {
-      const entry = pending.get(message.id);
-      if (!entry) {
-        return;
-      }
-      pending.delete(message.id);
-      entry.resolve(message.code_mode_result ?? '');
-      return;
-    }
-
-    initReject(new Error(`Unknown protocol message type: ${message.type}`));
-  });
-
-  rl.on('close', () => {
-    const error = new Error('stdin closed');
-    initReject(error);
-    for (const entry of pending.values()) {
-      entry.reject(error);
-    }
-    pending.clear();
-  });
-
-  function send(message) {
-    return new Promise((resolve, reject) => {
-      process.stdout.write(`${JSON.stringify(message)}\n`, (error) => {
-        if (error) {
-          reject(error);
-        } else {
-          resolve();
-        }
-      });
-    });
-  }
-
-  function request(type, payload) {
-    const id = `msg-${++nextId}`;
-    return new Promise((resolve, reject) => {
-      pending.set(id, { resolve, reject });
-      void send({ type, id, ...payload }).catch((error) => {
-        pending.delete(id);
-        reject(error);
-      });
-    });
-  }
-
-  return { init, request, send };
-}
-
-function readContentItems(context) {
-  try {
-    const serialized = vm.runInContext('JSON.stringify(globalThis.__codexContentItems ?? [])', context);
-    const contentItems = JSON.parse(serialized);
-    return Array.isArray(contentItems) ? contentItems : [];
-  } catch {
-    return [];
-  }
-}
-
-function formatErrorText(error) {
-  return String(error && error.stack ? error.stack : error);
-}
-
-function cloneJsonValue(value) {
-  return JSON.parse(JSON.stringify(value));
-}
-
-function createToolCaller(protocol) {
-  return (name, input) =>
-    protocol.request('tool_call', {
-      name: String(name),
-      input,
-    });
-}
-
-function createToolsNamespace(callTool, enabledTools) {
-  const tools = Object.create(null);
-
-  for (const { tool_name } of enabledTools) {
-    Object.defineProperty(tools, tool_name, {
-      value: async (args) => callTool(tool_name, args),
-      configurable: false,
-      enumerable: true,
-      writable: false,
-    });
-  }
-
-  return Object.freeze(tools);
-}
-
-function createAllToolsMetadata(enabledTools) {
-  return Object.freeze(
-    enabledTools.map(({ module: modulePath, name, description }) =>
-      Object.freeze({
-        module: modulePath,
-        name,
-        description,
-      })
-    )
-  );
-}
-
-function createToolsModule(context, callTool, enabledTools) {
-  const tools = createToolsNamespace(callTool, enabledTools);
-  const allTools = createAllToolsMetadata(enabledTools);
-  const exportNames = ['ALL_TOOLS'];
-
-  for (const { tool_name } of enabledTools) {
-    if (tool_name !== 'ALL_TOOLS') {
-      exportNames.push(tool_name);
-    }
-  }
-
-  const uniqueExportNames = [...new Set(exportNames)];
-
-  return new SyntheticModule(
-    uniqueExportNames,
-    function initToolsModule() {
-      this.setExport('ALL_TOOLS', allTools);
-      for (const exportName of uniqueExportNames) {
-        if (exportName !== 'ALL_TOOLS') {
-          this.setExport(exportName, tools[exportName]);
-        }
-      }
-    },
-    { context }
-  );
-}
-
-function ensureContentItems(context) {
-  if (!Array.isArray(context.__codexContentItems)) {
-    context.__codexContentItems = [];
-  }
-  return context.__codexContentItems;
-}
-
-function serializeOutputText(value) {
-  if (typeof value === 'string') {
-    return value;
-  }
-  if (
-    typeof value === 'undefined' ||
-    value === null ||
-    typeof value === 'boolean' ||
-    typeof value === 'number' ||
-    typeof value === 'bigint'
-  ) {
-    return String(value);
-  }
-
-  const serialized = JSON.stringify(value);
-  if (typeof serialized === 'string') {
-    return serialized;
-  }
-
-  return String(value);
-}
-
-function normalizeOutputImageUrl(value) {
-  if (typeof value !== 'string' || !value) {
-    throw new TypeError('output_image expects a non-empty image URL string');
-  }
-  if (/^(?:https?:\/\/|data:)/i.test(value)) {
-    return value;
-  }
-  throw new TypeError('output_image expects an http(s) or data URL');
-}
-
-function createCodeModeModule(context, state) {
-  const load = (key) => {
-    if (typeof key !== 'string') {
-      throw new TypeError('load key must be a string');
-    }
-    if (!Object.prototype.hasOwnProperty.call(state.storedValues, key)) {
-      return undefined;
-    }
-    return cloneJsonValue(state.storedValues[key]);
-  };
-  const store = (key, value) => {
-    if (typeof key !== 'string') {
-      throw new TypeError('store key must be a string');
-    }
-    state.storedValues[key] = cloneJsonValue(value);
-  };
-  const outputText = (value) => {
-    const item = {
-      type: 'input_text',
-      text: serializeOutputText(value),
-    };
-    ensureContentItems(context).push(item);
-    return item;
-  };
-  const outputImage = (value) => {
-    const item = {
-      type: 'input_image',
-      image_url: normalizeOutputImageUrl(value),
-    };
-    ensureContentItems(context).push(item);
-    return item;
-  };
-
-  return new SyntheticModule(
-    ['load', 'output_text', 'output_image', 'set_max_output_tokens_per_exec_call', 'store'],
-    function initCodeModeModule() {
-      this.setExport('load', load);
-      this.setExport('output_text', outputText);
-      this.setExport('output_image', outputImage);
-      this.setExport('set_max_output_tokens_per_exec_call', (value) => {
-        const normalized = normalizeMaxOutputTokensPerExecCall(value);
-        state.maxOutputTokensPerExecCall = normalized;
-        return normalized;
-      });
-      this.setExport('store', store);
-    },
-    { context }
-  );
-}
-
-function namespacesMatch(left, right) {
-  if (left.length !== right.length) {
-    return false;
-  }
-  return left.every((segment, index) => segment === right[index]);
-}
-
-function createNamespacedToolsNamespace(callTool, enabledTools, namespace) {
-  const tools = Object.create(null);
-
-  for (const tool of enabledTools) {
-    const toolNamespace = Array.isArray(tool.namespace) ? tool.namespace : [];
-    if (!namespacesMatch(toolNamespace, namespace)) {
-      continue;
-    }
-
-    Object.defineProperty(tools, tool.name, {
-      value: async (args) => callTool(tool.tool_name, args),
-      configurable: false,
-      enumerable: true,
-      writable: false,
-    });
-  }
-
-  return Object.freeze(tools);
-}
-
-function createNamespacedToolsModule(context, callTool, enabledTools, namespace) {
-  const tools = createNamespacedToolsNamespace(callTool, enabledTools, namespace);
-  const exportNames = [];
-
-  for (const exportName of Object.keys(tools)) {
-    if (exportName !== 'ALL_TOOLS') {
-      exportNames.push(exportName);
-    }
-  }
-
-  const uniqueExportNames = [...new Set(exportNames)];
-
-  return new SyntheticModule(
-    uniqueExportNames,
-    function initNamespacedToolsModule() {
-      for (const exportName of uniqueExportNames) {
-        this.setExport(exportName, tools[exportName]);
-      }
-    },
-    { context }
-  );
-}
-
-function createModuleResolver(context, callTool, enabledTools, state) {
-  const toolsModule = createToolsModule(context, callTool, enabledTools);
-  const codeModeModule = createCodeModeModule(context, state);
-  const namespacedModules = new Map();
-
-  return function resolveModule(specifier) {
-    if (specifier === 'tools.js') {
-      return toolsModule;
-    }
-    if (specifier === '@openai/code_mode' || specifier === 'openai/code_mode') {
-      return codeModeModule;
-    }
-    const namespacedMatch = /^tools\/(.+)\.js$/.exec(specifier);
-    if (!namespacedMatch) {
-      throw new Error(`Unsupported import in exec: ${specifier}`);
-    }
-
-    const namespace = namespacedMatch[1]
-      .split('/')
-      .filter((segment) => segment.length > 0);
-    if (namespace.length === 0) {
-      throw new Error(`Unsupported import in exec: ${specifier}`);
-    }
-
-    const cacheKey = namespace.join('/');
-    if (!namespacedModules.has(cacheKey)) {
-      namespacedModules.set(
-        cacheKey,
-        createNamespacedToolsModule(context, callTool, enabledTools, namespace)
-      );
-    }
-    return namespacedModules.get(cacheKey);
-  };
-}
-
-async function runModule(context, request, state, callTool) {
-  const resolveModule = createModuleResolver(
-    context,
-    callTool,
-    request.enabled_tools ?? [],
-    state
-  );
-  const mainModule = new SourceTextModule(request.source, {
-    context,
-    identifier: 'exec_main.mjs',
-    importModuleDynamically: async (specifier) => resolveModule(specifier),
-  });
-
-  await mainModule.link(resolveModule);
-  await mainModule.evaluate();
-}
-
-async function main() {
-  const protocol = createProtocol();
-  const request = await protocol.init;
-  const state = {
-    maxOutputTokensPerExecCall: DEFAULT_MAX_OUTPUT_TOKENS_PER_EXEC_CALL,
-    storedValues: cloneJsonValue(request.stored_values ?? {}),
-  };
-  const callTool = createToolCaller(protocol);
-  const context = vm.createContext({
-    __codexContentItems: [],
-    __codex_tool_call: callTool,
-  });
-
-  try {
-    await runModule(context, request, state, callTool);
-    await protocol.send({
-      type: 'result',
-      content_items: readContentItems(context),
-      stored_values: state.storedValues,
-      max_output_tokens_per_exec_call: state.maxOutputTokensPerExecCall,
-    });
-    process.exit(0);
-  } catch (error) {
-    await protocol.send({
-      type: 'result',
-      content_items: readContentItems(context),
-      stored_values: state.storedValues,
-      error_text: formatErrorText(error),
-      max_output_tokens_per_exec_call: state.maxOutputTokensPerExecCall,
-    });
-    process.exit(1);
-  }
-}
-
-void main().catch(async (error) => {
-  try {
-    process.stderr.write(`${formatErrorText(error)}\n`);
-  } finally {
-    process.exitCode = 1;
-  }
-});
--- a/codex-rs/core/src/tools/context.rs
+++ b/codex-rs/core/src/tools/context.rs
@@ -307,7 +307,6 @@ fn content_items_to_code_mode_result(items: &[FunctionCallOutputContentItem]) ->
            .join("\n"),
    )
 }
-
 fn function_tool_response(
    call_id: &str,
    payload: &ToolPayload,
--- a/codex-rs/core/src/tools/handlers/code_mode.rs
+++ b/codex-rs/core/src/tools/handlers/code_mode.rs
@@ -38,6 +38,9 @@ impl ToolHandler for CodeModeHandler {
                "{PUBLIC_TOOL_NAME} is disabled by feature flag"
            )));
        }
+        if let Some(reason) = codex_code_mode::unsupported_reason() {
+            return Err(FunctionCallError::RespondToModel(reason.to_string()));
+        }

        let code = match payload {
            ToolPayload::Custom { input } => input,
--- a/codex-rs/core/src/tools/spec.rs
+++ b/codex-rs/core/src/tools/spec.rs
@@ -140,7 +140,6 @@ impl ToolsConfig {
        let include_js_repl_tools_only =
            include_js_repl && features.enabled(Feature::JsReplToolsOnly);
        let include_collab_tools = features.enabled(Feature::Collab);
-        let include_agent_jobs = features.enabled(Feature::SpawnCsv);
        let include_request_user_input = !matches!(session_source, SessionSource::SubAgent(_));
        let include_default_mode_request_user_input =
            include_request_user_input && features.enabled(Feature::DefaultModeRequestUserInput);
@@ -149,6 +148,7 @@ impl ToolsConfig {
            features.enabled(Feature::Artifact) && codex_artifacts::can_manage_artifact_runtime();
        let include_image_gen_tool =
            features.enabled(Feature::ImageGeneration) && supports_image_generation(model_info);
+        let include_agent_jobs = features.enabled(Feature::SpawnCsv);
        let request_permission_enabled = features.enabled(Feature::RequestPermissions);
        let request_permissions_tool_enabled = features.enabled(Feature::RequestPermissionsTool);
        let shell_command_backend =
@@ -1660,7 +1660,7 @@ source: /[\s\S]+/
        enabled_tool_names.join(", ")
    };
    let description = format!(
-        "Runs JavaScript in a Node-backed `node:vm` context. This is a freeform tool: send raw JavaScript source text (no JSON/quotes/markdown fences). Direct tool calls remain available while `{PUBLIC_TOOL_NAME}` is enabled. Inside JavaScript, import nested tools from `tools.js`, for example `import {{ exec_command }} from \"tools.js\"` or `import {{ ALL_TOOLS }} from \"tools.js\"` to inspect the available `{{ module, name, description }}` entries. Namespaced tools are also available from `tools/<namespace...>.js`; MCP tools use `tools/mcp/<server>.js`, for example `import {{ append_notebook_logs_chart }} from \"tools/mcp/ologs.js\"`. Nested tool calls resolve to their code-mode result values. Import `{{ output_text, output_image, set_max_output_tokens_per_exec_call, store, load }}` from `\"@openai/code_mode\"` (or `\"openai/code_mode\"`); `output_text(value)` surfaces text back to the model and stringifies non-string objects when possible, `output_image(imageUrl)` appends an `input_image` content item for `http(s)` or `data:` URLs, `store(key, value)` persists JSON-serializable values across `{PUBLIC_TOOL_NAME}` calls in the current session, `load(key)` returns a cloned stored value or `undefined`, and `set_max_output_tokens_per_exec_call(value)` sets the token budget used to truncate the final Rust-side result of the current `{PUBLIC_TOOL_NAME}` execution. The default is `10000`. This guards the overall `{PUBLIC_TOOL_NAME}` output, not individual nested tool invocations. The returned content starts with a separate `Script completed` or `Script failed` text item that includes wall time. When truncation happens, the final text may include `Total output lines:` and the usual `…N tokens truncated…` marker. Function tools require JSON object arguments. Freeform tools require raw strings. `add_content(value)` remains available for compatibility with a content item, content-item array, or string. Structured nested-tool results should be converted to text first, for example with `JSON.stringify(...)`. Only content passed to `output_text(...)`, `output_image(...)`, or `add_content(value)` is surfaced back to the model. Enabled nested tools: {enabled_list}."
+        "Runs JavaScript in an embedded V8 runtime. This is a freeform tool: send raw JavaScript source text (no JSON/quotes/markdown fences). Direct tool calls remain available while `{PUBLIC_TOOL_NAME}` is enabled. Inside JavaScript, import nested tools from `tools.js`, for example `import {{ exec_command }} from \"tools.js\"`, `import {{ tools }} from \"tools.js\"`, or `import {{ ALL_TOOLS }} from \"tools.js\"` to inspect the available `{{ module, name, description }}` entries. Namespaced tools are also available from `tools/<namespace...>.js`; MCP tools use `tools/mcp/<server>.js`, for example `import {{ append_notebook_logs_chart }} from \"tools/mcp/ologs.js\"`. `tools[name]` and identifier wrappers like `await shell(args)` remain available for compatibility when the tool name is a valid JS identifier. Nested tool calls resolve to their code-mode result values. Import `{{ output_text, output_image, set_max_output_tokens_per_exec_call, store, load }}` from `\"@openai/code_mode\"` (or `\"openai/code_mode\"`); `output_text(value)` surfaces text back to the model and stringifies non-string objects when possible, `output_image(imageUrl)` appends an `input_image` content item for `http(s)` or `data:` URLs, `store(key, value)` persists JSON-serializable values across `{PUBLIC_TOOL_NAME}` calls in the current session, `load(key)` returns a cloned stored value or `undefined`, and `set_max_output_tokens_per_exec_call(value)` sets the token budget used to truncate the final Rust-side result of the current `{PUBLIC_TOOL_NAME}` execution. The default is `10000`. This guards the overall `{PUBLIC_TOOL_NAME}` output, not individual nested tool invocations. The returned content starts with a separate `Script completed` or `Script failed` text item that includes wall time. When truncation happens, the final text may include `Total output lines:` and the usual `…N tokens truncated…` marker. Function tools require JSON object arguments. Freeform tools require raw strings. `add_content(value)` remains available for compatibility with a content item, content-item array, or string. Structured nested-tool results should be converted to text first, for example with `JSON.stringify(...)`. Only content passed to `output_text(...)`, `output_image(...)`, or `add_content(value)` is surfaced back to the model. Enabled nested tools: {enabled_list}."
    );

    ToolSpec::Freeform(FreeformTool {
--- a/codex-rs/core/tests/suite/code_mode.rs
+++ b/codex-rs/core/tests/suite/code_mode.rs
@@ -24,6 +24,14 @@ use std::fs;
 use std::time::Duration;
 use wiremock::MockServer;

+macro_rules! skip_on_bazel {
+    ($result:expr) => {
+        if option_env!("BAZEL_PACKAGE").is_some() {
+            return $result;
+        }
+    };
+}
+
 fn custom_tool_output_items(req: &ResponsesRequest, call_id: &str) -> Vec<Value> {
    req.custom_tool_call_output(call_id)
        .get("output")
@@ -157,6 +165,7 @@ async fn run_code_mode_turn_with_rmcp(
 #[cfg_attr(windows, ignore = "no exec_command on Windows")]
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn code_mode_can_return_exec_command_output() -> Result<()> {
+    skip_on_bazel!(Ok(()));
    skip_if_no_network!(Ok(()));

    let server = responses::start_mock_server().await;
@@ -203,6 +212,7 @@ add_content(JSON.stringify(await exec_command({ cmd: "printf code_mode_exec_mark
 #[cfg_attr(windows, ignore = "no exec_command on Windows")]
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn code_mode_can_truncate_final_result_with_configured_budget() -> Result<()> {
+    skip_on_bazel!(Ok(()));
    skip_if_no_network!(Ok(()));

    let server = responses::start_mock_server().await;
@@ -248,6 +258,7 @@ Total\ output\ lines:\ 1\n

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn code_mode_returns_accumulated_output_when_script_fails() -> Result<()> {
+    skip_on_bazel!(Ok(()));
    skip_if_no_network!(Ok(()));

    let server = responses::start_mock_server().await;
@@ -291,6 +302,7 @@ Error:\ boom\n

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn code_mode_can_output_serialized_text_via_openai_code_mode_module() -> Result<()> {
+    skip_on_bazel!(Ok(()));
    skip_if_no_network!(Ok(()));

    let server = responses::start_mock_server().await;
@@ -320,6 +332,7 @@ output_text({ json: true });

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn code_mode_surfaces_output_text_stringify_errors() -> Result<()> {
+    skip_on_bazel!(Ok(()));
    skip_if_no_network!(Ok(()));

    let server = responses::start_mock_server().await;
@@ -363,6 +376,7 @@ output_text(circular);

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn code_mode_can_output_images_via_openai_code_mode_module() -> Result<()> {
+    skip_on_bazel!(Ok(()));
    skip_if_no_network!(Ok(()));

    let server = responses::start_mock_server().await;
@@ -415,6 +429,7 @@ output_image("data:image/png;base64,AAA");

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn code_mode_can_apply_patch_via_nested_tool() -> Result<()> {
+    skip_on_bazel!(Ok(()));
    skip_if_no_network!(Ok(()));

    let server = responses::start_mock_server().await;
@@ -456,6 +471,7 @@ async fn code_mode_can_apply_patch_via_nested_tool() -> Result<()> {

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn code_mode_can_print_structured_mcp_tool_result_fields() -> Result<()> {
+    skip_on_bazel!(Ok(()));
    skip_if_no_network!(Ok(()));

    let server = responses::start_mock_server().await;
@@ -496,6 +512,7 @@ contentLength=0"

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn code_mode_exports_all_tools_metadata_for_builtin_tools() -> Result<()> {
+    skip_on_bazel!(Ok(()));
    skip_if_no_network!(Ok(()));

    let server = responses::start_mock_server().await;
@@ -530,8 +547,48 @@ add_content(JSON.stringify(tool));
    Ok(())
 }

+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn code_mode_can_access_namespaced_mcp_tool_from_flat_tools_namespace() -> Result<()> {
+    skip_on_bazel!(Ok(()));
+    skip_if_no_network!(Ok(()));
+
+    let server = responses::start_mock_server().await;
+    let code = r#"
+import { tools } from "tools.js";
+
+const { structuredContent, isError } = await tools["mcp__rmcp__echo"]({
+  message: "ping",
+});
+add_content(
+  `echo=${structuredContent?.echo ?? "missing"}\n` +
+    `env=${structuredContent?.env ?? "missing"}\n` +
+    `isError=${String(isError)}`
+);
+"#;
+
+    let (_test, second_mock) =
+        run_code_mode_turn_with_rmcp(&server, "use exec to run the rmcp echo tool", code).await?;
+
+    let req = second_mock.single_request();
+    let (output, success) = custom_tool_output_body_and_success(&req, "call-1");
+    assert_ne!(
+        success,
+        Some(false),
+        "exec rmcp echo call failed unexpectedly: {output}"
+    );
+    assert_eq!(
+        output,
+        "echo=ECHOING: ping
+env=propagated-env
+isError=false"
+    );
+
+    Ok(())
+}
+
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn code_mode_exports_all_tools_metadata_for_namespaced_mcp_tools() -> Result<()> {
+    skip_on_bazel!(Ok(()));
    skip_if_no_network!(Ok(()));

    let server = responses::start_mock_server().await;
@@ -570,6 +627,7 @@ add_content(JSON.stringify(tool));

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn code_mode_can_print_content_only_mcp_tool_result_fields() -> Result<()> {
+    skip_on_bazel!(Ok(()));
    skip_if_no_network!(Ok(()));

    let server = responses::start_mock_server().await;
@@ -615,6 +673,7 @@ isError=false"

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn code_mode_can_print_error_mcp_tool_result_fields() -> Result<()> {
+    skip_on_bazel!(Ok(()));
    skip_if_no_network!(Ok(()));

    let server = responses::start_mock_server().await;
@@ -655,6 +714,7 @@ structuredContent=null"
 }
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn code_mode_can_store_and_load_values_across_turns() -> Result<()> {
+    skip_on_bazel!(Ok(()));
    skip_if_no_network!(Ok(()));

    let server = responses::start_mock_server().await;
--- a/sdk/python-runtime/README.md
+++ b/sdk/python-runtime/README.md
@@ -0,0 +1,9 @@
+# Codex CLI Runtime for Python SDK
+
+Platform-specific runtime package consumed by the published `codex-app-server-sdk`.
+
+This package is staged during release so the SDK can pin an exact Codex CLI
+version without checking platform binaries into the repo.
+
+`codex-cli-bin` is intentionally wheel-only. Do not build or publish an sdist
+for this package.
--- a/sdk/python-runtime/hatch_build.py
+++ b/sdk/python-runtime/hatch_build.py
@@ -0,0 +1,15 @@
+from __future__ import annotations
+
+from hatchling.builders.hooks.plugin.interface import BuildHookInterface
+
+
+class RuntimeBuildHook(BuildHookInterface):
+    def initialize(self, version: str, build_data: dict[str, object]) -> None:
+        del version
+        if self.target_name == "sdist":
+            raise RuntimeError(
+                "codex-cli-bin is wheel-only; build and publish platform wheels only."
+            )
+
+        build_data["pure_python"] = False
+        build_data["infer_tag"] = True
--- a/sdk/python-runtime/pyproject.toml
+++ b/sdk/python-runtime/pyproject.toml
@@ -0,0 +1,45 @@
+[build-system]
+requires = ["hatchling>=1.24.0"]
+build-backend = "hatchling.build"
+
+[project]
+name = "codex-cli-bin"
+version = "0.0.0-dev"
+description = "Pinned Codex CLI runtime for the Python SDK"
+readme = "README.md"
+requires-python = ">=3.10"
+license = { text = "Apache-2.0" }
+authors = [{ name = "OpenAI" }]
+classifiers = [
+  "Development Status :: 4 - Beta",
+  "Intended Audience :: Developers",
+  "License :: OSI Approved :: Apache Software License",
+  "Programming Language :: Python :: 3",
+  "Programming Language :: Python :: 3.10",
+  "Programming Language :: Python :: 3.11",
+  "Programming Language :: Python :: 3.12",
+  "Programming Language :: Python :: 3.13",
+]
+
+[project.urls]
+Homepage = "https://github.com/openai/codex"
+Repository = "https://github.com/openai/codex"
+Issues = "https://github.com/openai/codex/issues"
+
+[tool.hatch.build]
+exclude = [
+  ".venv/**",
+  ".pytest_cache/**",
+  "dist/**",
+  "build/**",
+]
+
+[tool.hatch.build.targets.wheel]
+packages = ["src/codex_cli_bin"]
+include = ["src/codex_cli_bin/bin/**"]
+
+[tool.hatch.build.targets.wheel.hooks.custom]
+
+[tool.hatch.build.targets.sdist]
+
+[tool.hatch.build.targets.sdist.hooks.custom]
--- a/sdk/python-runtime/src/codex_cli_bin/init.py
+++ b/sdk/python-runtime/src/codex_cli_bin/init.py
@@ -0,0 +1,19 @@
+from __future__ import annotations
+
+import os
+from pathlib import Path
+
+PACKAGE_NAME = "codex-cli-bin"
+
+
+def bundled_codex_path() -> Path:
+    exe = "codex.exe" if os.name == "nt" else "codex"
+    path = Path(__file__).resolve().parent / "bin" / exe
+    if not path.is_file():
+        raise FileNotFoundError(
+            f"{PACKAGE_NAME} is installed but missing its packaged codex binary at {path}"
+        )
+    return path
+
+
+__all__ = ["PACKAGE_NAME", "bundled_codex_path"]
--- a/sdk/python/README.md
+++ b/sdk/python/README.md
@@ -0,0 +1,95 @@
+# Codex App Server Python SDK (Experimental)
+
+Experimental Python SDK for `codex app-server` JSON-RPC v2 over stdio, with a small default surface optimized for real scripts and apps.
+
+The generated wire-model layer is currently sourced from the bundled v2 schema and exposed as Pydantic models with snake_case Python fields that serialize back to the app-server’s camelCase wire format.
+
+## Install
+
+```bash
+cd sdk/python
+python -m pip install -e .
+```
+
+Published SDK builds pin an exact `codex-cli-bin` runtime dependency. For local
+repo development, pass `AppServerConfig(codex_bin=...)` to point at a local
+build explicitly.
+
+## Quickstart
+
+```python
+from codex_app_server import Codex, TextInput
+
+with Codex() as codex:
+    thread = codex.thread_start(model="gpt-5")
+    result = thread.turn(TextInput("Say hello in one sentence.")).run()
+    print(result.text)
+```
+
+## Docs map
+
+- Golden path tutorial: `docs/getting-started.md`
+- API reference (signatures + behavior): `docs/api-reference.md`
+- Common decisions and pitfalls: `docs/faq.md`
+- Runnable examples index: `examples/README.md`
+- Jupyter walkthrough notebook: `notebooks/sdk_walkthrough.ipynb`
+
+## Examples
+
+Start here:
+
+```bash
+cd sdk/python
+python examples/01_quickstart_constructor/sync.py
+python examples/01_quickstart_constructor/async.py
+```
+
+## Runtime packaging
+
+The repo no longer checks `codex` binaries into `sdk/python`.
+
+Published SDK builds are pinned to an exact `codex-cli-bin` package version,
+and that runtime package carries the platform-specific binary for the target
+wheel.
+
+For local repo development, the checked-in `sdk/python-runtime` package is only
+a template for staged release artifacts. Editable installs should use an
+explicit `codex_bin` override instead.
+
+## Maintainer workflow
+
+```bash
+cd sdk/python
+python scripts/update_sdk_artifacts.py generate-types
+python scripts/update_sdk_artifacts.py \
+  stage-sdk \
+  /tmp/codex-python-release/codex-app-server-sdk \
+  --runtime-version 1.2.3
+python scripts/update_sdk_artifacts.py \
+  stage-runtime \
+  /tmp/codex-python-release/codex-cli-bin \
+  /path/to/codex \
+  --runtime-version 1.2.3
+```
+
+This supports the CI release flow:
+
+- run `generate-types` before packaging
+- stage `codex-app-server-sdk` once with an exact `codex-cli-bin==...` dependency
+- stage `codex-cli-bin` on each supported platform runner with the same pinned runtime version
+- build and publish `codex-cli-bin` as platform wheels only; do not publish an sdist
+
+## Compatibility and versioning
+
+- Package: `codex-app-server-sdk`
+- Runtime package: `codex-cli-bin`
+- Current SDK version in this repo: `0.2.0`
+- Python: `>=3.10`
+- Target protocol: Codex `app-server` JSON-RPC v2
+- Recommendation: keep SDK and `codex` CLI reasonably up to date together
+
+## Notes
+
+- `Codex()` is eager and performs startup + `initialize` in the constructor.
+- Use context managers (`with Codex() as codex:`) to ensure shutdown.
+- For transient overload, use `codex_app_server.retry.retry_on_overload`.
--- a/sdk/python/docs/faq.md
+++ b/sdk/python/docs/faq.md
@@ -0,0 +1,77 @@
+# FAQ
+
+## Thread vs turn
+
+- A `Thread` is conversation state.
+- A `Turn` is one model execution inside that thread.
+- Multi-turn chat means multiple turns on the same `Thread`.
+
+## `run()` vs `stream()`
+
+- `Turn.run()` is the easiest path. It consumes events until completion and returns `TurnResult`.
+- `Turn.stream()` yields raw notifications (`Notification`) so you can react event-by-event.
+
+Choose `run()` for most apps. Choose `stream()` for progress UIs, custom timeout logic, or custom parsing.
+
+## Sync vs async clients
+
+- `Codex` is the minimal sync SDK and best default.
+- `AsyncAppServerClient` wraps the sync transport with `asyncio.to_thread(...)` for async-friendly call sites.
+
+If your app is not already async, stay with `Codex`.
+
+## `thread(...)` vs `thread_resume(...)`
+
+- `codex.thread(thread_id)` only binds a local helper to an existing thread ID.
+- `codex.thread_resume(thread_id, ...)` performs a `thread/resume` RPC and can apply overrides (model, instructions, sandbox, etc.).
+
+Use `thread(...)` for simple continuation. Use `thread_resume(...)` when you need explicit resume semantics or override fields.
+
+## Why does constructor fail?
+
+`Codex()` is eager: it starts transport and calls `initialize` in `__init__`.
+
+Common causes:
+
+- published runtime package (`codex-cli-bin`) is not installed
+- local `codex_bin` override points to a missing file
+- local auth/session is missing
+- incompatible/old app-server
+
+Maintainers stage releases by building the SDK once and the runtime once per
+platform with the same pinned runtime version. Publish `codex-cli-bin` as
+platform wheels only; do not publish an sdist:
+
+```bash
+cd sdk/python
+python scripts/update_sdk_artifacts.py generate-types
+python scripts/update_sdk_artifacts.py \
+  stage-sdk \
+  /tmp/codex-python-release/codex-app-server-sdk \
+  --runtime-version 1.2.3
+python scripts/update_sdk_artifacts.py \
+  stage-runtime \
+  /tmp/codex-python-release/codex-cli-bin \
+  /path/to/codex \
+  --runtime-version 1.2.3
+```
+
+## Why does a turn "hang"?
+
+A turn is complete only when `turn/completed` arrives for that turn ID.
+
+- `run()` waits for this automatically.
+- With `stream()`, make sure you keep consuming notifications until completion.
+
+## How do I retry safely?
+
+Use `retry_on_overload(...)` for transient overload failures (`ServerBusyError`).
+
+Do not blindly retry all errors. For `InvalidParamsError` or `MethodNotFoundError`, fix inputs/version compatibility instead.
+
+## Common pitfalls
+
+- Starting a new thread for every prompt when you wanted continuity.
+- Forgetting to `close()` (or not using `with Codex() as codex:`).
+- Ignoring `TurnResult.status` and `TurnResult.error`.
+- Mixing SDK input classes with raw dicts incorrectly in minimal API paths.
--- a/sdk/python/docs/getting-started.md
+++ b/sdk/python/docs/getting-started.md
@@ -0,0 +1,75 @@
+# Getting Started
+
+This is the fastest path from install to a multi-turn thread using the minimal SDK surface.
+
+## 1) Install
+
+From repo root:
+
+```bash
+cd sdk/python
+python -m pip install -e .
+```
+
+Requirements:
+
+- Python `>=3.10`
+- installed `codex-cli-bin` runtime package, or an explicit `codex_bin` override
+- Local Codex auth/session configured
+
+## 2) Run your first turn
+
+```python
+from codex_app_server import Codex, TextInput
+
+with Codex() as codex:
+    print("Server:", codex.metadata.server_name, codex.metadata.server_version)
+
+    thread = codex.thread_start(model="gpt-5")
+    result = thread.turn(TextInput("Say hello in one sentence.")).run()
+
+    print("Thread:", result.thread_id)
+    print("Turn:", result.turn_id)
+    print("Status:", result.status)
+    print("Text:", result.text)
+```
+
+What happened:
+
+- `Codex()` started and initialized `codex app-server`.
+- `thread_start(...)` created a thread.
+- `turn(...).run()` consumed events until `turn/completed` and returned a `TurnResult`.
+
+## 3) Continue the same thread (multi-turn)
+
+```python
+from codex_app_server import Codex, TextInput
+
+with Codex() as codex:
+    thread = codex.thread_start(model="gpt-5")
+
+    first = thread.turn(TextInput("Summarize Rust ownership in 2 bullets.")).run()
+    second = thread.turn(TextInput("Now explain it to a Python developer.")).run()
+
+    print("first:", first.text)
+    print("second:", second.text)
+```
+
+## 4) Resume an existing thread
+
+```python
+from codex_app_server import Codex, TextInput
+
+THREAD_ID = "thr_123"  # replace with a real id
+
+with Codex() as codex:
+    thread = codex.thread(THREAD_ID)
+    result = thread.turn(TextInput("Continue where we left off.")).run()
+    print(result.text)
+```
+
+## 5) Next stops
+
+- API surface and signatures: `docs/api-reference.md`
+- Common decisions/pitfalls: `docs/faq.md`
+- End-to-end runnable examples: `examples/README.md`
--- a/sdk/python/pyproject.toml
+++ b/sdk/python/pyproject.toml
@@ -0,0 +1,62 @@
+[build-system]
+requires = ["hatchling>=1.24.0"]
+build-backend = "hatchling.build"
+
+[project]
+name = "codex-app-server-sdk"
+version = "0.2.0"
+description = "Python SDK for Codex app-server v2"
+readme = "README.md"
+requires-python = ">=3.10"
+license = { text = "Apache-2.0" }
+authors = [{ name = "OpenClaw Assistant" }]
+keywords = ["codex", "json-rpc", "sdk", "llm", "app-server"]
+classifiers = [
+  "Development Status :: 4 - Beta",
+  "Intended Audience :: Developers",
+  "License :: OSI Approved :: Apache Software License",
+  "Programming Language :: Python :: 3",
+  "Programming Language :: Python :: 3.10",
+  "Programming Language :: Python :: 3.11",
+  "Programming Language :: Python :: 3.12",
+  "Programming Language :: Python :: 3.13",
+  "Topic :: Software Development :: Libraries :: Python Modules",
+]
+dependencies = ["pydantic>=2.12"]
+
+[project.urls]
+Homepage = "https://github.com/openai/codex"
+Repository = "https://github.com/openai/codex"
+Issues = "https://github.com/openai/codex/issues"
+
+[project.optional-dependencies]
+dev = ["pytest>=8.0", "datamodel-code-generator==0.31.2", "ruff>=0.11"]
+
+[tool.hatch.build]
+exclude = [
+  ".venv/**",
+  ".venv2/**",
+  ".pytest_cache/**",
+  "dist/**",
+  "build/**",
+]
+
+[tool.hatch.build.targets.wheel]
+packages = ["src/codex_app_server"]
+include = [
+  "src/codex_app_server/py.typed",
+]
+
+[tool.hatch.build.targets.sdist]
+include = [
+  "src/codex_app_server/**",
+  "README.md",
+  "CHANGELOG.md",
+  "CONTRIBUTING.md",
+  "RELEASE_CHECKLIST.md",
+  "pyproject.toml",
+]
+
+[tool.pytest.ini_options]
+addopts = "-q"
+testpaths = ["tests"]
--- a/sdk/python/scripts/update_sdk_artifacts.py
+++ b/sdk/python/scripts/update_sdk_artifacts.py
@@ -0,0 +1,998 @@
+#!/usr/bin/env python3
+from __future__ import annotations
+
+import argparse
+import importlib
+import json
+import platform
+import re
+import shutil
+import stat
+import subprocess
+import sys
+import tempfile
+import types
+import typing
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any, Callable, Sequence, get_args, get_origin
+
+
+def repo_root() -> Path:
+    return Path(__file__).resolve().parents[3]
+
+
+def sdk_root() -> Path:
+    return repo_root() / "sdk" / "python"
+
+
+def python_runtime_root() -> Path:
+    return repo_root() / "sdk" / "python-runtime"
+
+
+def schema_bundle_path() -> Path:
+    return (
+        repo_root()
+        / "codex-rs"
+        / "app-server-protocol"
+        / "schema"
+        / "json"
+        / "codex_app_server_protocol.v2.schemas.json"
+    )
+
+
+def schema_root_dir() -> Path:
+    return repo_root() / "codex-rs" / "app-server-protocol" / "schema" / "json"
+
+
+def _is_windows() -> bool:
+    return platform.system().lower().startswith("win")
+
+
+def runtime_binary_name() -> str:
+    return "codex.exe" if _is_windows() else "codex"
+
+
+def staged_runtime_bin_path(root: Path) -> Path:
+    return root / "src" / "codex_cli_bin" / "bin" / runtime_binary_name()
+
+
+def run(cmd: list[str], cwd: Path) -> None:
+    subprocess.run(cmd, cwd=str(cwd), check=True)
+
+
+def run_python_module(module: str, args: list[str], cwd: Path) -> None:
+    run([sys.executable, "-m", module, *args], cwd)
+
+
+def current_sdk_version() -> str:
+    match = re.search(
+        r'^version = "([^"]+)"$',
+        (sdk_root() / "pyproject.toml").read_text(),
+        flags=re.MULTILINE,
+    )
+    if match is None:
+        raise RuntimeError("Could not determine Python SDK version from pyproject.toml")
+    return match.group(1)
+
+
+def _copy_package_tree(src: Path, dst: Path) -> None:
+    if dst.exists():
+        if dst.is_dir():
+            shutil.rmtree(dst)
+        else:
+            dst.unlink()
+    shutil.copytree(
+        src,
+        dst,
+        ignore=shutil.ignore_patterns(
+            ".venv",
+            ".venv2",
+            ".pytest_cache",
+            "__pycache__",
+            "build",
+            "dist",
+            "*.pyc",
+        ),
+    )
+
+
+def _rewrite_project_version(pyproject_text: str, version: str) -> str:
+    updated, count = re.subn(
+        r'^version = "[^"]+"$',
+        f'version = "{version}"',
+        pyproject_text,
+        count=1,
+        flags=re.MULTILINE,
+    )
+    if count != 1:
+        raise RuntimeError("Could not rewrite project version in pyproject.toml")
+    return updated
+
+
+def _rewrite_sdk_runtime_dependency(pyproject_text: str, runtime_version: str) -> str:
+    match = re.search(r"^dependencies = \[(.*?)\]$", pyproject_text, flags=re.MULTILINE)
+    if match is None:
+        raise RuntimeError(
+            "Could not find dependencies array in sdk/python/pyproject.toml"
+        )
+
+    raw_items = [item.strip() for item in match.group(1).split(",") if item.strip()]
+    raw_items = [item for item in raw_items if "codex-cli-bin" not in item]
+    raw_items.append(f'"codex-cli-bin=={runtime_version}"')
+    replacement = "dependencies = [\n  " + ",\n  ".join(raw_items) + ",\n]"
+    return pyproject_text[: match.start()] + replacement + pyproject_text[match.end() :]
+
+
+def stage_python_sdk_package(
+    staging_dir: Path, sdk_version: str, runtime_version: str
+) -> Path:
+    _copy_package_tree(sdk_root(), staging_dir)
+    sdk_bin_dir = staging_dir / "src" / "codex_app_server" / "bin"
+    if sdk_bin_dir.exists():
+        shutil.rmtree(sdk_bin_dir)
+
+    pyproject_path = staging_dir / "pyproject.toml"
+    pyproject_text = pyproject_path.read_text()
+    pyproject_text = _rewrite_project_version(pyproject_text, sdk_version)
+    pyproject_text = _rewrite_sdk_runtime_dependency(pyproject_text, runtime_version)
+    pyproject_path.write_text(pyproject_text)
+    return staging_dir
+
+
+def stage_python_runtime_package(
+    staging_dir: Path, runtime_version: str, binary_path: Path
+) -> Path:
+    _copy_package_tree(python_runtime_root(), staging_dir)
+
+    pyproject_path = staging_dir / "pyproject.toml"
+    pyproject_path.write_text(
+        _rewrite_project_version(pyproject_path.read_text(), runtime_version)
+    )
+
+    out_bin = staged_runtime_bin_path(staging_dir)
+    out_bin.parent.mkdir(parents=True, exist_ok=True)
+    shutil.copy2(binary_path, out_bin)
+    if not _is_windows():
+        out_bin.chmod(
+            out_bin.stat().st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH
+        )
+    return staging_dir
+
+
+def _flatten_string_enum_one_of(definition: dict[str, Any]) -> bool:
+    branches = definition.get("oneOf")
+    if not isinstance(branches, list) or not branches:
+        return False
+
+    enum_values: list[str] = []
+    for branch in branches:
+        if not isinstance(branch, dict):
+            return False
+        if branch.get("type") != "string":
+            return False
+
+        enum = branch.get("enum")
+        if not isinstance(enum, list) or len(enum) != 1 or not isinstance(enum[0], str):
+            return False
+
+        extra_keys = set(branch) - {"type", "enum", "description", "title"}
+        if extra_keys:
+            return False
+
+        enum_values.append(enum[0])
+
+    description = definition.get("description")
+    title = definition.get("title")
+    definition.clear()
+    definition["type"] = "string"
+    definition["enum"] = enum_values
+    if isinstance(description, str):
+        definition["description"] = description
+    if isinstance(title, str):
+        definition["title"] = title
+    return True
+
+
+DISCRIMINATOR_KEYS = ("type", "method", "mode", "state", "status", "role", "reason")
+
+
+def _to_pascal_case(value: str) -> str:
+    parts = re.split(r"[^0-9A-Za-z]+", value)
+    compact = "".join(part[:1].upper() + part[1:] for part in parts if part)
+    return compact or "Value"
+
+
+def _string_literal(value: Any) -> str | None:
+    if not isinstance(value, dict):
+        return None
+    const = value.get("const")
+    if isinstance(const, str):
+        return const
+
+    enum = value.get("enum")
+    if isinstance(enum, list) and enum and len(enum) == 1 and isinstance(enum[0], str):
+        return enum[0]
+    return None
+
+
+def _enum_literals(value: Any) -> list[str] | None:
+    if not isinstance(value, dict):
+        return None
+    enum = value.get("enum")
+    if (
+        not isinstance(enum, list)
+        or not enum
+        or not all(isinstance(item, str) for item in enum)
+    ):
+        return None
+    return list(enum)
+
+
+def _literal_from_property(props: dict[str, Any], key: str) -> str | None:
+    return _string_literal(props.get(key))
+
+
+def _variant_definition_name(base: str, variant: dict[str, Any]) -> str | None:
+    # datamodel-code-generator invents numbered helper names for inline union
+    # branches unless they carry a stable, unique title up front. We derive
+    # those titles from the branch discriminator or other identifying shape.
+    props = variant.get("properties")
+    if isinstance(props, dict):
+        for key in DISCRIMINATOR_KEYS:
+            literal = _literal_from_property(props, key)
+            if literal is None:
+                continue
+            pascal = _to_pascal_case(literal)
+            if base == "ClientRequest":
+                return f"{pascal}Request"
+            if base == "ServerRequest":
+                return f"{pascal}ServerRequest"
+            if base == "ClientNotification":
+                return f"{pascal}ClientNotification"
+            if base == "ServerNotification":
+                return f"{pascal}ServerNotification"
+            if base == "EventMsg":
+                return f"{pascal}EventMsg"
+            return f"{pascal}{base}"
+
+        if len(props) == 1:
+            key = next(iter(props))
+            pascal = _string_literal(props[key])
+            return f"{_to_pascal_case(pascal or key)}{base}"
+
+    required = variant.get("required")
+    if (
+        isinstance(required, list)
+        and len(required) == 1
+        and isinstance(required[0], str)
+    ):
+        return f"{_to_pascal_case(required[0])}{base}"
+
+    enum_literals = _enum_literals(variant)
+    if enum_literals is not None:
+        if len(enum_literals) == 1:
+            return f"{_to_pascal_case(enum_literals[0])}{base}"
+        return f"{base}Value"
+
+    return None
+
+
+def _variant_collision_key(
+    base: str, variant: dict[str, Any], generated_name: str
+) -> str:
+    parts = [f"base={base}", f"generated={generated_name}"]
+    props = variant.get("properties")
+    if isinstance(props, dict):
+        for key in DISCRIMINATOR_KEYS:
+            literal = _literal_from_property(props, key)
+            if literal is not None:
+                parts.append(f"{key}={literal}")
+        if len(props) == 1:
+            parts.append(f"only_property={next(iter(props))}")
+
+    required = variant.get("required")
+    if (
+        isinstance(required, list)
+        and len(required) == 1
+        and isinstance(required[0], str)
+    ):
+        parts.append(f"required_only={required[0]}")
+
+    enum_literals = _enum_literals(variant)
+    if enum_literals is not None:
+        parts.append(f"enum={'|'.join(enum_literals)}")
+
+    return "|".join(parts)
+
+
+def _set_discriminator_titles(props: dict[str, Any], owner: str) -> None:
+    for key in DISCRIMINATOR_KEYS:
+        prop = props.get(key)
+        if not isinstance(prop, dict):
+            continue
+        if _string_literal(prop) is None or "title" in prop:
+            continue
+        prop["title"] = f"{owner}{_to_pascal_case(key)}"
+
+
+def _annotate_variant_list(variants: list[Any], base: str | None) -> None:
+    seen = {
+        variant["title"]
+        for variant in variants
+        if isinstance(variant, dict) and isinstance(variant.get("title"), str)
+    }
+
+    for variant in variants:
+        if not isinstance(variant, dict):
+            continue
+
+        variant_name = variant.get("title")
+        generated_name = _variant_definition_name(base, variant) if base else None
+        if generated_name is not None and (
+            not isinstance(variant_name, str)
+            or "/" in variant_name
+            or variant_name != generated_name
+        ):
+            # Titles like `Thread/startedNotification` sanitize poorly in
+            # Python, and envelope titles like `ErrorNotification` collide
+            # with their payload model names. Rewrite them before codegen so
+            # we get `ThreadStartedServerNotification` instead of `...1`.
+            if generated_name in seen and variant_name != generated_name:
+                raise RuntimeError(
+                    "Variant title naming collision detected: "
+                    f"{_variant_collision_key(base or '<root>', variant, generated_name)}"
+                )
+            variant["title"] = generated_name
+            seen.add(generated_name)
+            variant_name = generated_name
+
+        if isinstance(variant_name, str):
+            props = variant.get("properties")
+            if isinstance(props, dict):
+                _set_discriminator_titles(props, variant_name)
+
+        _annotate_schema(variant, base)
+
+
+def _annotate_schema(value: Any, base: str | None = None) -> None:
+    if isinstance(value, list):
+        for item in value:
+            _annotate_schema(item, base)
+        return
+
+    if not isinstance(value, dict):
+        return
+
+    owner = value.get("title")
+    props = value.get("properties")
+    if isinstance(owner, str) and isinstance(props, dict):
+        _set_discriminator_titles(props, owner)
+
+    one_of = value.get("oneOf")
+    if isinstance(one_of, list):
+        # Walk nested unions recursively so every inline branch gets the same
+        # title normalization treatment before we hand the bundle to Python
+        # codegen.
+        _annotate_variant_list(one_of, base)
+
+    any_of = value.get("anyOf")
+    if isinstance(any_of, list):
+        _annotate_variant_list(any_of, base)
+
+    definitions = value.get("definitions")
+    if isinstance(definitions, dict):
+        for name, schema in definitions.items():
+            _annotate_schema(schema, name if isinstance(name, str) else base)
+
+    defs = value.get("$defs")
+    if isinstance(defs, dict):
+        for name, schema in defs.items():
+            _annotate_schema(schema, name if isinstance(name, str) else base)
+
+    for key, child in value.items():
+        if key in {"oneOf", "anyOf", "definitions", "$defs"}:
+            continue
+        _annotate_schema(child, base)
+
+
+def _normalized_schema_bundle_text() -> str:
+    schema = json.loads(schema_bundle_path().read_text())
+    definitions = schema.get("definitions", {})
+    if isinstance(definitions, dict):
+        for definition in definitions.values():
+            if isinstance(definition, dict):
+                _flatten_string_enum_one_of(definition)
+    # Normalize the schema into something datamodel-code-generator can map to
+    # stable class names instead of anonymous numbered helpers.
+    _annotate_schema(schema)
+    return json.dumps(schema, indent=2, sort_keys=True) + "\n"
+
+
+def generate_v2_all() -> None:
+    out_path = sdk_root() / "src" / "codex_app_server" / "generated" / "v2_all.py"
+    out_dir = out_path.parent
+    old_package_dir = out_dir / "v2_all"
+    if old_package_dir.exists():
+        shutil.rmtree(old_package_dir)
+    out_dir.mkdir(parents=True, exist_ok=True)
+    with tempfile.TemporaryDirectory() as td:
+        normalized_bundle = Path(td) / schema_bundle_path().name
+        normalized_bundle.write_text(_normalized_schema_bundle_text())
+        run_python_module(
+            "datamodel_code_generator",
+            [
+                "--input",
+                str(normalized_bundle),
+                "--input-file-type",
+                "jsonschema",
+                "--output",
+                str(out_path),
+                "--output-model-type",
+                "pydantic_v2.BaseModel",
+                "--target-python-version",
+                "3.11",
+                "--use-standard-collections",
+                "--enum-field-as-literal",
+                "one",
+                "--field-constraints",
+                "--use-default-kwarg",
+                "--snake-case-field",
+                "--allow-population-by-field-name",
+                # Once the schema prepass has assigned stable titles, tell the
+                # generator to prefer those titles as the emitted class names.
+                "--use-title-as-name",
+                "--use-annotated",
+                "--use-union-operator",
+                "--disable-timestamp",
+                # Keep the generated file formatted deterministically so the
+                # checked-in artifact only changes when the schema does.
+                "--formatters",
+                "ruff-format",
+            ],
+            cwd=sdk_root(),
+        )
+    _normalize_generated_timestamps(out_path)
+
+
+def _notification_specs() -> list[tuple[str, str]]:
+    server_notifications = json.loads(
+        (schema_root_dir() / "ServerNotification.json").read_text()
+    )
+    one_of = server_notifications.get("oneOf", [])
+    generated_source = (
+        sdk_root() / "src" / "codex_app_server" / "generated" / "v2_all.py"
+    ).read_text()
+
+    specs: list[tuple[str, str]] = []
+
+    for variant in one_of:
+        props = variant.get("properties", {})
+        method_meta = props.get("method", {})
+        params_meta = props.get("params", {})
+
+        methods = method_meta.get("enum", [])
+        if len(methods) != 1:
+            continue
+        method = methods[0]
+        if not isinstance(method, str):
+            continue
+
+        ref = params_meta.get("$ref")
+        if not isinstance(ref, str) or not ref.startswith("#/definitions/"):
+            continue
+        class_name = ref.split("/")[-1]
+        if (
+            f"class {class_name}(" not in generated_source
+            and f"{class_name} =" not in generated_source
+        ):
+            # Skip schema variants that are not emitted into the generated v2 surface.
+            continue
+        specs.append((method, class_name))
+
+    specs.sort()
+    return specs
+
+
+def generate_notification_registry() -> None:
+    out = (
+        sdk_root()
+        / "src"
+        / "codex_app_server"
+        / "generated"
+        / "notification_registry.py"
+    )
+    specs = _notification_specs()
+    class_names = sorted({class_name for _, class_name in specs})
+
+    lines = [
+        "# Auto-generated by scripts/update_sdk_artifacts.py",
+        "# DO NOT EDIT MANUALLY.",
+        "",
+        "from __future__ import annotations",
+        "",
+        "from pydantic import BaseModel",
+        "",
+    ]
+
+    for class_name in class_names:
+        lines.append(f"from .v2_all import {class_name}")
+    lines.extend(
+        [
+            "",
+            "NOTIFICATION_MODELS: dict[str, type[BaseModel]] = {",
+        ]
+    )
+    for method, class_name in specs:
+        lines.append(f'    "{method}": {class_name},')
+    lines.extend(["}", ""])
+
+    out.write_text("\n".join(lines))
+
+
+def _normalize_generated_timestamps(root: Path) -> None:
+    timestamp_re = re.compile(r"^#\s+timestamp:\s+.+$", flags=re.MULTILINE)
+    py_files = [root] if root.is_file() else sorted(root.rglob("*.py"))
+    for py_file in py_files:
+        content = py_file.read_text()
+        normalized = timestamp_re.sub("#   timestamp: <normalized>", content)
+        if normalized != content:
+            py_file.write_text(normalized)
+
+
+FIELD_ANNOTATION_OVERRIDES: dict[str, str] = {
+    # Keep public API typed without falling back to `Any`.
+    "config": "JsonObject",
+    "output_schema": "JsonObject",
+}
+
+
+@dataclass(slots=True)
+class PublicFieldSpec:
+    wire_name: str
+    py_name: str
+    annotation: str
+    required: bool
+
+
+@dataclass(frozen=True)
+class CliOps:
+    generate_types: Callable[[], None]
+    stage_python_sdk_package: Callable[[Path, str, str], Path]
+    stage_python_runtime_package: Callable[[Path, str, Path], Path]
+    current_sdk_version: Callable[[], str]
+
+
+def _annotation_to_source(annotation: Any) -> str:
+    origin = get_origin(annotation)
+    if origin is typing.Annotated:
+        return _annotation_to_source(get_args(annotation)[0])
+    if origin in (typing.Union, types.UnionType):
+        parts: list[str] = []
+        for arg in get_args(annotation):
+            rendered = _annotation_to_source(arg)
+            if rendered not in parts:
+                parts.append(rendered)
+        return " | ".join(parts)
+    if origin is list:
+        args = get_args(annotation)
+        item = _annotation_to_source(args[0]) if args else "Any"
+        return f"list[{item}]"
+    if origin is dict:
+        args = get_args(annotation)
+        key = _annotation_to_source(args[0]) if args else "str"
+        val = _annotation_to_source(args[1]) if len(args) > 1 else "Any"
+        return f"dict[{key}, {val}]"
+    if annotation is Any or annotation is typing.Any:
+        return "Any"
+    if annotation is None or annotation is type(None):
+        return "None"
+    if isinstance(annotation, type):
+        if annotation.__module__ == "builtins":
+            return annotation.__name__
+        return annotation.__name__
+    return repr(annotation)
+
+
+def _camel_to_snake(name: str) -> str:
+    head = re.sub(r"(.)([A-Z][a-z]+)", r"\1_\2", name)
+    return re.sub(r"([a-z0-9])([A-Z])", r"\1_\2", head).lower()
+
+
+def _load_public_fields(
+    module_name: str, class_name: str, *, exclude: set[str] | None = None
+) -> list[PublicFieldSpec]:
+    exclude = exclude or set()
+    module = importlib.import_module(module_name)
+    model = getattr(module, class_name)
+    fields: list[PublicFieldSpec] = []
+    for name, field in model.model_fields.items():
+        if name in exclude:
+            continue
+        required = field.is_required()
+        annotation = _annotation_to_source(field.annotation)
+        override = FIELD_ANNOTATION_OVERRIDES.get(name)
+        if override is not None:
+            annotation = override if required else f"{override} | None"
+        fields.append(
+            PublicFieldSpec(
+                wire_name=name,
+                py_name=name,
+                annotation=annotation,
+                required=required,
+            )
+        )
+    return fields
+
+
+def _kw_signature_lines(fields: list[PublicFieldSpec]) -> list[str]:
+    lines: list[str] = []
+    for field in fields:
+        default = "" if field.required else " = None"
+        lines.append(f"        {field.py_name}: {field.annotation}{default},")
+    return lines
+
+
+def _model_arg_lines(
+    fields: list[PublicFieldSpec], *, indent: str = "            "
+) -> list[str]:
+    return [f"{indent}{field.wire_name}={field.py_name}," for field in fields]
+
+
+def _replace_generated_block(source: str, block_name: str, body: str) -> str:
+    start_tag = f"    # BEGIN GENERATED: {block_name}"
+    end_tag = f"    # END GENERATED: {block_name}"
+    pattern = re.compile(rf"(?s){re.escape(start_tag)}\n.*?\n{re.escape(end_tag)}")
+    replacement = f"{start_tag}\n{body.rstrip()}\n{end_tag}"
+    updated, count = pattern.subn(replacement, source, count=1)
+    if count != 1:
+        raise RuntimeError(f"Could not update generated block: {block_name}")
+    return updated
+
+
+def _render_codex_block(
+    thread_start_fields: list[PublicFieldSpec],
+    thread_list_fields: list[PublicFieldSpec],
+    resume_fields: list[PublicFieldSpec],
+    fork_fields: list[PublicFieldSpec],
+) -> str:
+    lines = [
+        "    def thread_start(",
+        "        self,",
+        "        *,",
+        *_kw_signature_lines(thread_start_fields),
+        "    ) -> Thread:",
+        "        params = ThreadStartParams(",
+        *_model_arg_lines(thread_start_fields),
+        "        )",
+        "        started = self._client.thread_start(params)",
+        "        return Thread(self._client, started.thread.id)",
+        "",
+        "    def thread_list(",
+        "        self,",
+        "        *,",
+        *_kw_signature_lines(thread_list_fields),
+        "    ) -> ThreadListResponse:",
+        "        params = ThreadListParams(",
+        *_model_arg_lines(thread_list_fields),
+        "        )",
+        "        return self._client.thread_list(params)",
+        "",
+        "    def thread_resume(",
+        "        self,",
+        "        thread_id: str,",
+        "        *,",
+        *_kw_signature_lines(resume_fields),
+        "    ) -> Thread:",
+        "        params = ThreadResumeParams(",
+        "            thread_id=thread_id,",
+        *_model_arg_lines(resume_fields),
+        "        )",
+        "        resumed = self._client.thread_resume(thread_id, params)",
+        "        return Thread(self._client, resumed.thread.id)",
+        "",
+        "    def thread_fork(",
+        "        self,",
+        "        thread_id: str,",
+        "        *,",
+        *_kw_signature_lines(fork_fields),
+        "    ) -> Thread:",
+        "        params = ThreadForkParams(",
+        "            thread_id=thread_id,",
+        *_model_arg_lines(fork_fields),
+        "        )",
+        "        forked = self._client.thread_fork(thread_id, params)",
+        "        return Thread(self._client, forked.thread.id)",
+        "",
+        "    def thread_archive(self, thread_id: str) -> ThreadArchiveResponse:",
+        "        return self._client.thread_archive(thread_id)",
+        "",
+        "    def thread_unarchive(self, thread_id: str) -> Thread:",
+        "        unarchived = self._client.thread_unarchive(thread_id)",
+        "        return Thread(self._client, unarchived.thread.id)",
+    ]
+    return "\n".join(lines)
+
+
+def _render_async_codex_block(
+    thread_start_fields: list[PublicFieldSpec],
+    thread_list_fields: list[PublicFieldSpec],
+    resume_fields: list[PublicFieldSpec],
+    fork_fields: list[PublicFieldSpec],
+) -> str:
+    lines = [
+        "    async def thread_start(",
+        "        self,",
+        "        *,",
+        *_kw_signature_lines(thread_start_fields),
+        "    ) -> AsyncThread:",
+        "        await self._ensure_initialized()",
+        "        params = ThreadStartParams(",
+        *_model_arg_lines(thread_start_fields),
+        "        )",
+        "        started = await self._client.thread_start(params)",
+        "        return AsyncThread(self, started.thread.id)",
+        "",
+        "    async def thread_list(",
+        "        self,",
+        "        *,",
+        *_kw_signature_lines(thread_list_fields),
+        "    ) -> ThreadListResponse:",
+        "        await self._ensure_initialized()",
+        "        params = ThreadListParams(",
+        *_model_arg_lines(thread_list_fields),
+        "        )",
+        "        return await self._client.thread_list(params)",
+        "",
+        "    async def thread_resume(",
+        "        self,",
+        "        thread_id: str,",
+        "        *,",
+        *_kw_signature_lines(resume_fields),
+        "    ) -> AsyncThread:",
+        "        await self._ensure_initialized()",
+        "        params = ThreadResumeParams(",
+        "            thread_id=thread_id,",
+        *_model_arg_lines(resume_fields),
+        "        )",
+        "        resumed = await self._client.thread_resume(thread_id, params)",
+        "        return AsyncThread(self, resumed.thread.id)",
+        "",
+        "    async def thread_fork(",
+        "        self,",
+        "        thread_id: str,",
+        "        *,",
+        *_kw_signature_lines(fork_fields),
+        "    ) -> AsyncThread:",
+        "        await self._ensure_initialized()",
+        "        params = ThreadForkParams(",
+        "            thread_id=thread_id,",
+        *_model_arg_lines(fork_fields),
+        "        )",
+        "        forked = await self._client.thread_fork(thread_id, params)",
+        "        return AsyncThread(self, forked.thread.id)",
+        "",
+        "    async def thread_archive(self, thread_id: str) -> ThreadArchiveResponse:",
+        "        await self._ensure_initialized()",
+        "        return await self._client.thread_archive(thread_id)",
+        "",
+        "    async def thread_unarchive(self, thread_id: str) -> AsyncThread:",
+        "        await self._ensure_initialized()",
+        "        unarchived = await self._client.thread_unarchive(thread_id)",
+        "        return AsyncThread(self, unarchived.thread.id)",
+    ]
+    return "\n".join(lines)
+
+
+def _render_thread_block(
+    turn_fields: list[PublicFieldSpec],
+) -> str:
+    lines = [
+        "    def turn(",
+        "        self,",
+        "        input: Input,",
+        "        *,",
+        *_kw_signature_lines(turn_fields),
+        "    ) -> Turn:",
+        "        wire_input = _to_wire_input(input)",
+        "        params = TurnStartParams(",
+        "            thread_id=self.id,",
+        "            input=wire_input,",
+        *_model_arg_lines(turn_fields),
+        "        )",
+        "        turn = self._client.turn_start(self.id, wire_input, params=params)",
+        "        return Turn(self._client, self.id, turn.turn.id)",
+    ]
+    return "\n".join(lines)
+
+
+def _render_async_thread_block(
+    turn_fields: list[PublicFieldSpec],
+) -> str:
+    lines = [
+        "    async def turn(",
+        "        self,",
+        "        input: Input,",
+        "        *,",
+        *_kw_signature_lines(turn_fields),
+        "    ) -> AsyncTurn:",
+        "        await self._codex._ensure_initialized()",
+        "        wire_input = _to_wire_input(input)",
+        "        params = TurnStartParams(",
+        "            thread_id=self.id,",
+        "            input=wire_input,",
+        *_model_arg_lines(turn_fields),
+        "        )",
+        "        turn = await self._codex._client.turn_start(",
+        "            self.id,",
+        "            wire_input,",
+        "            params=params,",
+        "        )",
+        "        return AsyncTurn(self._codex, self.id, turn.turn.id)",
+    ]
+    return "\n".join(lines)
+
+
+def generate_public_api_flat_methods() -> None:
+    src_dir = sdk_root() / "src"
+    public_api_path = src_dir / "codex_app_server" / "public_api.py"
+    if not public_api_path.exists():
+        # PR2 can run codegen before the ergonomic public API layer is added.
+        return
+    src_dir_str = str(src_dir)
+    if src_dir_str not in sys.path:
+        sys.path.insert(0, src_dir_str)
+
+    thread_start_fields = _load_public_fields(
+        "codex_app_server.generated.v2_all",
+        "ThreadStartParams",
+    )
+    thread_list_fields = _load_public_fields(
+        "codex_app_server.generated.v2_all",
+        "ThreadListParams",
+    )
+    thread_resume_fields = _load_public_fields(
+        "codex_app_server.generated.v2_all",
+        "ThreadResumeParams",
+        exclude={"thread_id"},
+    )
+    thread_fork_fields = _load_public_fields(
+        "codex_app_server.generated.v2_all",
+        "ThreadForkParams",
+        exclude={"thread_id"},
+    )
+    turn_start_fields = _load_public_fields(
+        "codex_app_server.generated.v2_all",
+        "TurnStartParams",
+        exclude={"thread_id", "input"},
+    )
+
+    source = public_api_path.read_text()
+    source = _replace_generated_block(
+        source,
+        "Codex.flat_methods",
+        _render_codex_block(
+            thread_start_fields,
+            thread_list_fields,
+            thread_resume_fields,
+            thread_fork_fields,
+        ),
+    )
+    source = _replace_generated_block(
+        source,
+        "AsyncCodex.flat_methods",
+        _render_async_codex_block(
+            thread_start_fields,
+            thread_list_fields,
+            thread_resume_fields,
+            thread_fork_fields,
+        ),
+    )
+    source = _replace_generated_block(
+        source,
+        "Thread.flat_methods",
+        _render_thread_block(turn_start_fields),
+    )
+    source = _replace_generated_block(
+        source,
+        "AsyncThread.flat_methods",
+        _render_async_thread_block(turn_start_fields),
+    )
+    public_api_path.write_text(source)
+
+
+def generate_types() -> None:
+    # v2_all is the authoritative generated surface.
+    generate_v2_all()
+    generate_notification_registry()
+    generate_public_api_flat_methods()
+
+
+def build_parser() -> argparse.ArgumentParser:
+    parser = argparse.ArgumentParser(description="Single SDK maintenance entrypoint")
+    subparsers = parser.add_subparsers(dest="command", required=True)
+
+    subparsers.add_parser(
+        "generate-types", help="Regenerate Python protocol-derived types"
+    )
+
+    stage_sdk_parser = subparsers.add_parser(
+        "stage-sdk",
+        help="Stage a releasable SDK package pinned to a runtime version",
+    )
+    stage_sdk_parser.add_argument(
+        "staging_dir",
+        type=Path,
+        help="Output directory for the staged SDK package",
+    )
+    stage_sdk_parser.add_argument(
+        "--runtime-version",
+        required=True,
+        help="Pinned codex-cli-bin version for the staged SDK package",
+    )
+    stage_sdk_parser.add_argument(
+        "--sdk-version",
+        help="Version to write into the staged SDK package (defaults to sdk/python current version)",
+    )
+
+    stage_runtime_parser = subparsers.add_parser(
+        "stage-runtime",
+        help="Stage a releasable runtime package for the current platform",
+    )
+    stage_runtime_parser.add_argument(
+        "staging_dir",
+        type=Path,
+        help="Output directory for the staged runtime package",
+    )
+    stage_runtime_parser.add_argument(
+        "runtime_binary",
+        type=Path,
+        help="Path to the codex binary to package for this platform",
+    )
+    stage_runtime_parser.add_argument(
+        "--runtime-version",
+        required=True,
+        help="Version to write into the staged runtime package",
+    )
+    return parser
+
+
+def parse_args(argv: Sequence[str] | None = None) -> argparse.Namespace:
+    return build_parser().parse_args(list(argv) if argv is not None else None)
+
+
+def default_cli_ops() -> CliOps:
+    return CliOps(
+        generate_types=generate_types,
+        stage_python_sdk_package=stage_python_sdk_package,
+        stage_python_runtime_package=stage_python_runtime_package,
+        current_sdk_version=current_sdk_version,
+    )
+
+
+def run_command(args: argparse.Namespace, ops: CliOps) -> None:
+    if args.command == "generate-types":
+        ops.generate_types()
+    elif args.command == "stage-sdk":
+        ops.generate_types()
+        ops.stage_python_sdk_package(
+            args.staging_dir,
+            args.sdk_version or ops.current_sdk_version(),
+            args.runtime_version,
+        )
+    elif args.command == "stage-runtime":
+        ops.stage_python_runtime_package(
+            args.staging_dir,
+            args.runtime_version,
+            args.runtime_binary.resolve(),
+        )
+
+
+def main(argv: Sequence[str] | None = None, ops: CliOps | None = None) -> None:
+    args = parse_args(argv)
+    run_command(args, ops or default_cli_ops())
+    print("Done.")
+
+
+if __name__ == "__main__":
+    main()
--- a/sdk/python/src/codex_app_server/init.py
+++ b/sdk/python/src/codex_app_server/init.py
@@ -0,0 +1,10 @@
+from .client import AppServerClient, AppServerConfig
+from .errors import AppServerError, JsonRpcError, TransportClosedError
+
+__all__ = [
+    "AppServerClient",
+    "AppServerConfig",
+    "AppServerError",
+    "JsonRpcError",
+    "TransportClosedError",
+]
--- a/sdk/python/src/codex_app_server/client.py
+++ b/sdk/python/src/codex_app_server/client.py
@@ -0,0 +1,540 @@
+from __future__ import annotations
+
+import json
+import os
+import subprocess
+import threading
+import uuid
+from collections import deque
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Callable, Iterable, Iterator, TypeVar
+
+from pydantic import BaseModel
+
+from .errors import AppServerError, TransportClosedError, map_jsonrpc_error
+from .generated.notification_registry import NOTIFICATION_MODELS
+from .generated.v2_all import (
+    AgentMessageDeltaNotification,
+    ModelListResponse,
+    ThreadArchiveResponse,
+    ThreadCompactStartResponse,
+    ThreadForkParams as V2ThreadForkParams,
+    ThreadForkResponse,
+    ThreadListParams as V2ThreadListParams,
+    ThreadListResponse,
+    ThreadReadResponse,
+    ThreadResumeParams as V2ThreadResumeParams,
+    ThreadResumeResponse,
+    ThreadSetNameResponse,
+    ThreadStartParams as V2ThreadStartParams,
+    ThreadStartResponse,
+    ThreadUnarchiveResponse,
+    TurnCompletedNotification,
+    TurnInterruptResponse,
+    TurnStartParams as V2TurnStartParams,
+    TurnStartResponse,
+    TurnSteerResponse,
+)
+from .models import (
+    InitializeResponse,
+    JsonObject,
+    JsonValue,
+    Notification,
+    UnknownNotification,
+)
+from .retry import retry_on_overload
+
+ModelT = TypeVar("ModelT", bound=BaseModel)
+ApprovalHandler = Callable[[str, JsonObject | None], JsonObject]
+RUNTIME_PKG_NAME = "codex-cli-bin"
+
+
+def _params_dict(
+    params: (
+        V2ThreadStartParams
+        | V2ThreadResumeParams
+        | V2ThreadListParams
+        | V2ThreadForkParams
+        | V2TurnStartParams
+        | JsonObject
+        | None
+    ),
+) -> JsonObject:
+    if params is None:
+        return {}
+    if hasattr(params, "model_dump"):
+        dumped = params.model_dump(
+            by_alias=True,
+            exclude_none=True,
+            mode="json",
+        )
+        if not isinstance(dumped, dict):
+            raise TypeError("Expected model_dump() to return dict")
+        return dumped
+    if isinstance(params, dict):
+        return params
+    raise TypeError(f"Expected generated params model or dict, got {type(params).__name__}")
+
+
+def _installed_codex_path() -> Path:
+    try:
+        from codex_cli_bin import bundled_codex_path
+    except ImportError as exc:
+        raise FileNotFoundError(
+            "Unable to locate the pinned Codex runtime. Install the published SDK build "
+            f"with its {RUNTIME_PKG_NAME} dependency, or set AppServerConfig.codex_bin "
+            "explicitly."
+        ) from exc
+
+    return bundled_codex_path()
+
+
+@dataclass(frozen=True)
+class CodexBinResolverOps:
+    installed_codex_path: Callable[[], Path]
+    path_exists: Callable[[Path], bool]
+
+
+def _default_codex_bin_resolver_ops() -> CodexBinResolverOps:
+    return CodexBinResolverOps(
+        installed_codex_path=_installed_codex_path,
+        path_exists=lambda path: path.exists(),
+    )
+
+
+def resolve_codex_bin(config: "AppServerConfig", ops: CodexBinResolverOps) -> Path:
+    if config.codex_bin is not None:
+        codex_bin = Path(config.codex_bin)
+        if not ops.path_exists(codex_bin):
+            raise FileNotFoundError(
+                f"Codex binary not found at {codex_bin}. Set AppServerConfig.codex_bin "
+                "to a valid binary path."
+            )
+        return codex_bin
+
+    return ops.installed_codex_path()
+
+
+def _resolve_codex_bin(config: "AppServerConfig") -> Path:
+    return resolve_codex_bin(config, _default_codex_bin_resolver_ops())
+
+
+@dataclass(slots=True)
+class AppServerConfig:
+    codex_bin: str | None = None
+    launch_args_override: tuple[str, ...] | None = None
+    config_overrides: tuple[str, ...] = ()
+    cwd: str | None = None
+    env: dict[str, str] | None = None
+    client_name: str = "codex_python_sdk"
+    client_title: str = "Codex Python SDK"
+    client_version: str = "0.2.0"
+    experimental_api: bool = True
+
+
+class AppServerClient:
+    """Synchronous typed JSON-RPC client for `codex app-server` over stdio."""
+
+    def __init__(
+        self,
+        config: AppServerConfig | None = None,
+        approval_handler: ApprovalHandler | None = None,
+    ) -> None:
+        self.config = config or AppServerConfig()
+        self._approval_handler = approval_handler or self._default_approval_handler
+        self._proc: subprocess.Popen[str] | None = None
+        self._lock = threading.Lock()
+        self._turn_consumer_lock = threading.Lock()
+        self._active_turn_consumer: str | None = None
+        self._pending_notifications: deque[Notification] = deque()
+        self._stderr_lines: deque[str] = deque(maxlen=400)
+        self._stderr_thread: threading.Thread | None = None
+
+    def __enter__(self) -> "AppServerClient":
+        self.start()
+        return self
+
+    def __exit__(self, _exc_type, _exc, _tb) -> None:
+        self.close()
+
+    def start(self) -> None:
+        if self._proc is not None:
+            return
+
+        if self.config.launch_args_override is not None:
+            args = list(self.config.launch_args_override)
+        else:
+            codex_bin = _resolve_codex_bin(self.config)
+            args = [str(codex_bin)]
+            for kv in self.config.config_overrides:
+                args.extend(["--config", kv])
+            args.extend(["app-server", "--listen", "stdio://"])
+
+        env = os.environ.copy()
+        if self.config.env:
+            env.update(self.config.env)
+
+        self._proc = subprocess.Popen(
+            args,
+            stdin=subprocess.PIPE,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            text=True,
+            cwd=self.config.cwd,
+            env=env,
+            bufsize=1,
+        )
+
+        self._start_stderr_drain_thread()
+
+    def close(self) -> None:
+        if self._proc is None:
+            return
+        proc = self._proc
+        self._proc = None
+        self._active_turn_consumer = None
+
+        if proc.stdin:
+            proc.stdin.close()
+        try:
+            proc.terminate()
+            proc.wait(timeout=2)
+        except Exception:
+            proc.kill()
+
+        if self._stderr_thread and self._stderr_thread.is_alive():
+            self._stderr_thread.join(timeout=0.5)
+
+    def initialize(self) -> InitializeResponse:
+        result = self.request(
+            "initialize",
+            {
+                "clientInfo": {
+                    "name": self.config.client_name,
+                    "title": self.config.client_title,
+                    "version": self.config.client_version,
+                },
+                "capabilities": {
+                    "experimentalApi": self.config.experimental_api,
+                },
+            },
+            response_model=InitializeResponse,
+        )
+        self.notify("initialized", None)
+        return result
+
+    def request(
+        self,
+        method: str,
+        params: JsonObject | None,
+        *,
+        response_model: type[ModelT],
+    ) -> ModelT:
+        result = self._request_raw(method, params)
+        if not isinstance(result, dict):
+            raise AppServerError(f"{method} response must be a JSON object")
+        return response_model.model_validate(result)
+
+    def _request_raw(self, method: str, params: JsonObject | None = None) -> JsonValue:
+        request_id = str(uuid.uuid4())
+        self._write_message({"id": request_id, "method": method, "params": params or {}})
+
+        while True:
+            msg = self._read_message()
+
+            if "method" in msg and "id" in msg:
+                response = self._handle_server_request(msg)
+                self._write_message({"id": msg["id"], "result": response})
+                continue
+
+            if "method" in msg and "id" not in msg:
+                self._pending_notifications.append(
+                    self._coerce_notification(msg["method"], msg.get("params"))
+                )
+                continue
+
+            if msg.get("id") != request_id:
+                continue
+
+            if "error" in msg:
+                err = msg["error"]
+                if isinstance(err, dict):
+                    raise map_jsonrpc_error(
+                        int(err.get("code", -32000)),
+                        str(err.get("message", "unknown")),
+                        err.get("data"),
+                    )
+                raise AppServerError("Malformed JSON-RPC error response")
+
+            return msg.get("result")
+
+    def notify(self, method: str, params: JsonObject | None = None) -> None:
+        self._write_message({"method": method, "params": params or {}})
+
+    def next_notification(self) -> Notification:
+        if self._pending_notifications:
+            return self._pending_notifications.popleft()
+
+        while True:
+            msg = self._read_message()
+            if "method" in msg and "id" in msg:
+                response = self._handle_server_request(msg)
+                self._write_message({"id": msg["id"], "result": response})
+                continue
+            if "method" in msg and "id" not in msg:
+                return self._coerce_notification(msg["method"], msg.get("params"))
+
+    def acquire_turn_consumer(self, turn_id: str) -> None:
+        with self._turn_consumer_lock:
+            if self._active_turn_consumer is not None:
+                raise RuntimeError(
+                    "Concurrent turn consumers are not yet supported in the experimental SDK. "
+                    f"Client is already streaming turn {self._active_turn_consumer!r}; "
+                    f"cannot start turn {turn_id!r} until the active consumer finishes."
+                )
+            self._active_turn_consumer = turn_id
+
+    def release_turn_consumer(self, turn_id: str) -> None:
+        with self._turn_consumer_lock:
+            if self._active_turn_consumer == turn_id:
+                self._active_turn_consumer = None
+
+    def thread_start(self, params: V2ThreadStartParams | JsonObject | None = None) -> ThreadStartResponse:
+        return self.request("thread/start", _params_dict(params), response_model=ThreadStartResponse)
+
+    def thread_resume(
+        self,
+        thread_id: str,
+        params: V2ThreadResumeParams | JsonObject | None = None,
+    ) -> ThreadResumeResponse:
+        payload = {"threadId": thread_id, **_params_dict(params)}
+        return self.request("thread/resume", payload, response_model=ThreadResumeResponse)
+
+    def thread_list(self, params: V2ThreadListParams | JsonObject | None = None) -> ThreadListResponse:
+        return self.request("thread/list", _params_dict(params), response_model=ThreadListResponse)
+
+    def thread_read(self, thread_id: str, include_turns: bool = False) -> ThreadReadResponse:
+        return self.request(
+            "thread/read",
+            {"threadId": thread_id, "includeTurns": include_turns},
+            response_model=ThreadReadResponse,
+        )
+
+    def thread_fork(
+        self,
+        thread_id: str,
+        params: V2ThreadForkParams | JsonObject | None = None,
+    ) -> ThreadForkResponse:
+        payload = {"threadId": thread_id, **_params_dict(params)}
+        return self.request("thread/fork", payload, response_model=ThreadForkResponse)
+
+    def thread_archive(self, thread_id: str) -> ThreadArchiveResponse:
+        return self.request("thread/archive", {"threadId": thread_id}, response_model=ThreadArchiveResponse)
+
+    def thread_unarchive(self, thread_id: str) -> ThreadUnarchiveResponse:
+        return self.request("thread/unarchive", {"threadId": thread_id}, response_model=ThreadUnarchiveResponse)
+
+    def thread_set_name(self, thread_id: str, name: str) -> ThreadSetNameResponse:
+        return self.request(
+            "thread/name/set",
+            {"threadId": thread_id, "name": name},
+            response_model=ThreadSetNameResponse,
+        )
+
+    def thread_compact(self, thread_id: str) -> ThreadCompactStartResponse:
+        return self.request(
+            "thread/compact/start",
+            {"threadId": thread_id},
+            response_model=ThreadCompactStartResponse,
+        )
+
+    def turn_start(
+        self,
+        thread_id: str,
+        input_items: list[JsonObject] | JsonObject | str,
+        params: V2TurnStartParams | JsonObject | None = None,
+    ) -> TurnStartResponse:
+        payload = {
+            **_params_dict(params),
+            "threadId": thread_id,
+            "input": self._normalize_input_items(input_items),
+        }
+        return self.request("turn/start", payload, response_model=TurnStartResponse)
+
+    def turn_interrupt(self, thread_id: str, turn_id: str) -> TurnInterruptResponse:
+        return self.request(
+            "turn/interrupt",
+            {"threadId": thread_id, "turnId": turn_id},
+            response_model=TurnInterruptResponse,
+        )
+
+    def turn_steer(
+        self,
+        thread_id: str,
+        expected_turn_id: str,
+        input_items: list[JsonObject] | JsonObject | str,
+    ) -> TurnSteerResponse:
+        return self.request(
+            "turn/steer",
+            {
+                "threadId": thread_id,
+                "expectedTurnId": expected_turn_id,
+                "input": self._normalize_input_items(input_items),
+            },
+            response_model=TurnSteerResponse,
+        )
+
+    def model_list(self, include_hidden: bool = False) -> ModelListResponse:
+        return self.request(
+            "model/list",
+            {"includeHidden": include_hidden},
+            response_model=ModelListResponse,
+        )
+
+    def request_with_retry_on_overload(
+        self,
+        method: str,
+        params: JsonObject | None,
+        *,
+        response_model: type[ModelT],
+        max_attempts: int = 3,
+        initial_delay_s: float = 0.25,
+        max_delay_s: float = 2.0,
+    ) -> ModelT:
+        return retry_on_overload(
+            lambda: self.request(method, params, response_model=response_model),
+            max_attempts=max_attempts,
+            initial_delay_s=initial_delay_s,
+            max_delay_s=max_delay_s,
+        )
+
+    def wait_for_turn_completed(self, turn_id: str) -> TurnCompletedNotification:
+        while True:
+            notification = self.next_notification()
+            if (
+                notification.method == "turn/completed"
+                and isinstance(notification.payload, TurnCompletedNotification)
+                and notification.payload.turn.id == turn_id
+            ):
+                return notification.payload
+
+    def stream_until_methods(self, methods: Iterable[str] | str) -> list[Notification]:
+        target_methods = {methods} if isinstance(methods, str) else set(methods)
+        out: list[Notification] = []
+        while True:
+            notification = self.next_notification()
+            out.append(notification)
+            if notification.method in target_methods:
+                return out
+
+    def stream_text(
+        self,
+        thread_id: str,
+        text: str,
+        params: V2TurnStartParams | JsonObject | None = None,
+    ) -> Iterator[AgentMessageDeltaNotification]:
+        started = self.turn_start(thread_id, text, params=params)
+        turn_id = started.turn.id
+        while True:
+            notification = self.next_notification()
+            if (
+                notification.method == "item/agentMessage/delta"
+                and isinstance(notification.payload, AgentMessageDeltaNotification)
+                and notification.payload.turn_id == turn_id
+            ):
+                yield notification.payload
+                continue
+            if (
+                notification.method == "turn/completed"
+                and isinstance(notification.payload, TurnCompletedNotification)
+                and notification.payload.turn.id == turn_id
+            ):
+                break
+
+    def _coerce_notification(self, method: str, params: object) -> Notification:
+        params_dict = params if isinstance(params, dict) else {}
+
+        model = NOTIFICATION_MODELS.get(method)
+        if model is None:
+            return Notification(method=method, payload=UnknownNotification(params=params_dict))
+
+        try:
+            payload = model.model_validate(params_dict)
+        except Exception:  # noqa: BLE001
+            return Notification(method=method, payload=UnknownNotification(params=params_dict))
+        return Notification(method=method, payload=payload)
+
+    def _normalize_input_items(
+        self,
+        input_items: list[JsonObject] | JsonObject | str,
+    ) -> list[JsonObject]:
+        if isinstance(input_items, str):
+            return [{"type": "text", "text": input_items}]
+        if isinstance(input_items, dict):
+            return [input_items]
+        return input_items
+
+    def _default_approval_handler(self, method: str, params: JsonObject | None) -> JsonObject:
+        if method == "item/commandExecution/requestApproval":
+            return {"decision": "accept"}
+        if method == "item/fileChange/requestApproval":
+            return {"decision": "accept"}
+        return {}
+
+    def _start_stderr_drain_thread(self) -> None:
+        if self._proc is None or self._proc.stderr is None:
+            return
+
+        def _drain() -> None:
+            stderr = self._proc.stderr
+            if stderr is None:
+                return
+            for line in stderr:
+                self._stderr_lines.append(line.rstrip("\n"))
+
+        self._stderr_thread = threading.Thread(target=_drain, daemon=True)
+        self._stderr_thread.start()
+
+    def _stderr_tail(self, limit: int = 40) -> str:
+        return "\n".join(list(self._stderr_lines)[-limit:])
+
+    def _handle_server_request(self, msg: dict[str, JsonValue]) -> JsonObject:
+        method = msg["method"]
+        params = msg.get("params")
+        if not isinstance(method, str):
+            return {}
+        return self._approval_handler(
+            method,
+            params if isinstance(params, dict) else None,
+        )
+
+    def _write_message(self, payload: JsonObject) -> None:
+        if self._proc is None or self._proc.stdin is None:
+            raise TransportClosedError("app-server is not running")
+        with self._lock:
+            self._proc.stdin.write(json.dumps(payload) + "\n")
+            self._proc.stdin.flush()
+
+    def _read_message(self) -> dict[str, JsonValue]:
+        if self._proc is None or self._proc.stdout is None:
+            raise TransportClosedError("app-server is not running")
+
+        line = self._proc.stdout.readline()
+        if not line:
+            raise TransportClosedError(
+                f"app-server closed stdout. stderr_tail={self._stderr_tail()[:2000]}"
+            )
+
+        try:
+            message = json.loads(line)
+        except json.JSONDecodeError as exc:
+            raise AppServerError(f"Invalid JSON-RPC line: {line!r}") from exc
+
+        if not isinstance(message, dict):
+            raise AppServerError(f"Invalid JSON-RPC payload: {message!r}")
+        return message
+
+
+def default_codex_home() -> str:
+    return str(Path.home() / ".codex")
--- a/sdk/python/src/codex_app_server/errors.py
+++ b/sdk/python/src/codex_app_server/errors.py
@@ -0,0 +1,125 @@
+from __future__ import annotations
+
+from typing import Any
+
+
+class AppServerError(Exception):
+    """Base exception for SDK errors."""
+
+
+class JsonRpcError(AppServerError):
+    """Raw JSON-RPC error wrapper from the server."""
+
+    def __init__(self, code: int, message: str, data: Any = None):
+        super().__init__(f"JSON-RPC error {code}: {message}")
+        self.code = code
+        self.message = message
+        self.data = data
+
+
+class TransportClosedError(AppServerError):
+    """Raised when the app-server transport closes unexpectedly."""
+
+
+class AppServerRpcError(JsonRpcError):
+    """Base typed error for JSON-RPC failures."""
+
+
+class ParseError(AppServerRpcError):
+    pass
+
+
+class InvalidRequestError(AppServerRpcError):
+    pass
+
+
+class MethodNotFoundError(AppServerRpcError):
+    pass
+
+
+class InvalidParamsError(AppServerRpcError):
+    pass
+
+
+class InternalRpcError(AppServerRpcError):
+    pass
+
+
+class ServerBusyError(AppServerRpcError):
+    """Server is overloaded / unavailable and caller should retry."""
+
+
+class RetryLimitExceededError(ServerBusyError):
+    """Server exhausted internal retry budget for a retryable operation."""
+
+
+def _contains_retry_limit_text(message: str) -> bool:
+    lowered = message.lower()
+    return "retry limit" in lowered or "too many failed attempts" in lowered
+
+
+def _is_server_overloaded(data: Any) -> bool:
+    if data is None:
+        return False
+
+    if isinstance(data, str):
+        return data.lower() == "server_overloaded"
+
+    if isinstance(data, dict):
+        direct = (
+            data.get("codex_error_info")
+            or data.get("codexErrorInfo")
+            or data.get("errorInfo")
+        )
+        if isinstance(direct, str) and direct.lower() == "server_overloaded":
+            return True
+        if isinstance(direct, dict):
+            for value in direct.values():
+                if isinstance(value, str) and value.lower() == "server_overloaded":
+                    return True
+        for value in data.values():
+            if _is_server_overloaded(value):
+                return True
+
+    if isinstance(data, list):
+        return any(_is_server_overloaded(value) for value in data)
+
+    return False
+
+
+def map_jsonrpc_error(code: int, message: str, data: Any = None) -> JsonRpcError:
+    """Map a raw JSON-RPC error into a richer SDK exception class."""
+
+    if code == -32700:
+        return ParseError(code, message, data)
+    if code == -32600:
+        return InvalidRequestError(code, message, data)
+    if code == -32601:
+        return MethodNotFoundError(code, message, data)
+    if code == -32602:
+        return InvalidParamsError(code, message, data)
+    if code == -32603:
+        return InternalRpcError(code, message, data)
+
+    if -32099 <= code <= -32000:
+        if _is_server_overloaded(data):
+            if _contains_retry_limit_text(message):
+                return RetryLimitExceededError(code, message, data)
+            return ServerBusyError(code, message, data)
+        if _contains_retry_limit_text(message):
+            return RetryLimitExceededError(code, message, data)
+        return AppServerRpcError(code, message, data)
+
+    return JsonRpcError(code, message, data)
+
+
+def is_retryable_error(exc: BaseException) -> bool:
+    """True if the exception is a transient overload-style error."""
+
+    if isinstance(exc, ServerBusyError):
+        return True
+
+    if isinstance(exc, JsonRpcError):
+        return _is_server_overloaded(exc.data)
+
+    return False
--- a/sdk/python/src/codex_app_server/generated/init.py
+++ b/sdk/python/src/codex_app_server/generated/init.py
@@ -0,0 +1 @@
+"""Auto-generated Python types derived from the app-server schemas."""
--- a/sdk/python/src/codex_app_server/generated/notification_registry.py
+++ b/sdk/python/src/codex_app_server/generated/notification_registry.py
@@ -0,0 +1,102 @@
+# Auto-generated by scripts/update_sdk_artifacts.py
+# DO NOT EDIT MANUALLY.
+
+from __future__ import annotations
+
+from pydantic import BaseModel
+
+from .v2_all import AccountLoginCompletedNotification
+from .v2_all import AccountRateLimitsUpdatedNotification
+from .v2_all import AccountUpdatedNotification
+from .v2_all import AgentMessageDeltaNotification
+from .v2_all import AppListUpdatedNotification
+from .v2_all import CommandExecOutputDeltaNotification
+from .v2_all import CommandExecutionOutputDeltaNotification
+from .v2_all import ConfigWarningNotification
+from .v2_all import ContextCompactedNotification
+from .v2_all import DeprecationNoticeNotification
+from .v2_all import ErrorNotification
+from .v2_all import FileChangeOutputDeltaNotification
+from .v2_all import FuzzyFileSearchSessionCompletedNotification
+from .v2_all import FuzzyFileSearchSessionUpdatedNotification
+from .v2_all import HookCompletedNotification
+from .v2_all import HookStartedNotification
+from .v2_all import ItemCompletedNotification
+from .v2_all import ItemStartedNotification
+from .v2_all import McpServerOauthLoginCompletedNotification
+from .v2_all import McpToolCallProgressNotification
+from .v2_all import ModelReroutedNotification
+from .v2_all import PlanDeltaNotification
+from .v2_all import ReasoningSummaryPartAddedNotification
+from .v2_all import ReasoningSummaryTextDeltaNotification
+from .v2_all import ReasoningTextDeltaNotification
+from .v2_all import ServerRequestResolvedNotification
+from .v2_all import SkillsChangedNotification
+from .v2_all import TerminalInteractionNotification
+from .v2_all import ThreadArchivedNotification
+from .v2_all import ThreadClosedNotification
+from .v2_all import ThreadNameUpdatedNotification
+from .v2_all import ThreadRealtimeClosedNotification
+from .v2_all import ThreadRealtimeErrorNotification
+from .v2_all import ThreadRealtimeItemAddedNotification
+from .v2_all import ThreadRealtimeOutputAudioDeltaNotification
+from .v2_all import ThreadRealtimeStartedNotification
+from .v2_all import ThreadStartedNotification
+from .v2_all import ThreadStatusChangedNotification
+from .v2_all import ThreadTokenUsageUpdatedNotification
+from .v2_all import ThreadUnarchivedNotification
+from .v2_all import TurnCompletedNotification
+from .v2_all import TurnDiffUpdatedNotification
+from .v2_all import TurnPlanUpdatedNotification
+from .v2_all import TurnStartedNotification
+from .v2_all import WindowsSandboxSetupCompletedNotification
+from .v2_all import WindowsWorldWritableWarningNotification
+
+NOTIFICATION_MODELS: dict[str, type[BaseModel]] = {
+    "account/login/completed": AccountLoginCompletedNotification,
+    "account/rateLimits/updated": AccountRateLimitsUpdatedNotification,
+    "account/updated": AccountUpdatedNotification,
+    "app/list/updated": AppListUpdatedNotification,
+    "command/exec/outputDelta": CommandExecOutputDeltaNotification,
+    "configWarning": ConfigWarningNotification,
+    "deprecationNotice": DeprecationNoticeNotification,
+    "error": ErrorNotification,
+    "fuzzyFileSearch/sessionCompleted": FuzzyFileSearchSessionCompletedNotification,
+    "fuzzyFileSearch/sessionUpdated": FuzzyFileSearchSessionUpdatedNotification,
+    "hook/completed": HookCompletedNotification,
+    "hook/started": HookStartedNotification,
+    "item/agentMessage/delta": AgentMessageDeltaNotification,
+    "item/commandExecution/outputDelta": CommandExecutionOutputDeltaNotification,
+    "item/commandExecution/terminalInteraction": TerminalInteractionNotification,
+    "item/completed": ItemCompletedNotification,
+    "item/fileChange/outputDelta": FileChangeOutputDeltaNotification,
+    "item/mcpToolCall/progress": McpToolCallProgressNotification,
+    "item/plan/delta": PlanDeltaNotification,
+    "item/reasoning/summaryPartAdded": ReasoningSummaryPartAddedNotification,
+    "item/reasoning/summaryTextDelta": ReasoningSummaryTextDeltaNotification,
+    "item/reasoning/textDelta": ReasoningTextDeltaNotification,
+    "item/started": ItemStartedNotification,
+    "mcpServer/oauthLogin/completed": McpServerOauthLoginCompletedNotification,
+    "model/rerouted": ModelReroutedNotification,
+    "serverRequest/resolved": ServerRequestResolvedNotification,
+    "skills/changed": SkillsChangedNotification,
+    "thread/archived": ThreadArchivedNotification,
+    "thread/closed": ThreadClosedNotification,
+    "thread/compacted": ContextCompactedNotification,
+    "thread/name/updated": ThreadNameUpdatedNotification,
+    "thread/realtime/closed": ThreadRealtimeClosedNotification,
+    "thread/realtime/error": ThreadRealtimeErrorNotification,
+    "thread/realtime/itemAdded": ThreadRealtimeItemAddedNotification,
+    "thread/realtime/outputAudio/delta": ThreadRealtimeOutputAudioDeltaNotification,
+    "thread/realtime/started": ThreadRealtimeStartedNotification,
+    "thread/started": ThreadStartedNotification,
+    "thread/status/changed": ThreadStatusChangedNotification,
+    "thread/tokenUsage/updated": ThreadTokenUsageUpdatedNotification,
+    "thread/unarchived": ThreadUnarchivedNotification,
+    "turn/completed": TurnCompletedNotification,
+    "turn/diff/updated": TurnDiffUpdatedNotification,
+    "turn/plan/updated": TurnPlanUpdatedNotification,
+    "turn/started": TurnStartedNotification,
+    "windows/worldWritableWarning": WindowsWorldWritableWarningNotification,
+    "windowsSandbox/setupCompleted": WindowsSandboxSetupCompletedNotification,
+}
--- a/sdk/python/src/codex_app_server/generated/v2_all.py
+++ b/sdk/python/src/codex_app_server/generated/v2_all.py
--- a/sdk/python/src/codex_app_server/generated/v2_types.py
+++ b/sdk/python/src/codex_app_server/generated/v2_types.py
@@ -0,0 +1,25 @@
+"""Stable aliases over full v2 autogenerated models (datamodel-code-generator)."""
+
+from .v2_all.ModelListResponse import ModelListResponse
+from .v2_all.ThreadCompactStartResponse import ThreadCompactStartResponse
+from .v2_all.ThreadListResponse import ThreadListResponse
+from .v2_all.ThreadReadResponse import ThreadReadResponse
+from .v2_all.ThreadTokenUsageUpdatedNotification import (
+    ThreadTokenUsageUpdatedNotification,
+)
+from .v2_all.TurnCompletedNotification import ThreadItem153 as ThreadItem
+from .v2_all.TurnCompletedNotification import (
+    TurnCompletedNotification as TurnCompletedNotificationPayload,
+)
+from .v2_all.TurnSteerResponse import TurnSteerResponse
+
+__all__ = [
+    "ModelListResponse",
+    "ThreadCompactStartResponse",
+    "ThreadListResponse",
+    "ThreadReadResponse",
+    "ThreadTokenUsageUpdatedNotification",
+    "TurnCompletedNotificationPayload",
+    "TurnSteerResponse",
+    "ThreadItem",
+]
--- a/sdk/python/src/codex_app_server/models.py
+++ b/sdk/python/src/codex_app_server/models.py
@@ -0,0 +1,97 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import TypeAlias
+
+from pydantic import BaseModel
+
+from .generated.v2_all import (
+    AccountLoginCompletedNotification,
+    AccountRateLimitsUpdatedNotification,
+    AccountUpdatedNotification,
+    AgentMessageDeltaNotification,
+    AppListUpdatedNotification,
+    CommandExecutionOutputDeltaNotification,
+    ConfigWarningNotification,
+    ContextCompactedNotification,
+    DeprecationNoticeNotification,
+    ErrorNotification,
+    FileChangeOutputDeltaNotification,
+    ItemCompletedNotification,
+    ItemStartedNotification,
+    McpServerOauthLoginCompletedNotification,
+    McpToolCallProgressNotification,
+    PlanDeltaNotification,
+    RawResponseItemCompletedNotification,
+    ReasoningSummaryPartAddedNotification,
+    ReasoningSummaryTextDeltaNotification,
+    ReasoningTextDeltaNotification,
+    TerminalInteractionNotification,
+    ThreadNameUpdatedNotification,
+    ThreadStartedNotification,
+    ThreadTokenUsageUpdatedNotification,
+    TurnCompletedNotification,
+    TurnDiffUpdatedNotification,
+    TurnPlanUpdatedNotification,
+    TurnStartedNotification,
+    WindowsWorldWritableWarningNotification,
+)
+
+JsonScalar: TypeAlias = str | int | float | bool | None
+JsonValue: TypeAlias = JsonScalar | dict[str, "JsonValue"] | list["JsonValue"]
+JsonObject: TypeAlias = dict[str, JsonValue]
+
+
+@dataclass(slots=True)
+class UnknownNotification:
+    params: JsonObject
+
+
+NotificationPayload: TypeAlias = (
+    AccountLoginCompletedNotification
+    | AccountRateLimitsUpdatedNotification
+    | AccountUpdatedNotification
+    | AgentMessageDeltaNotification
+    | AppListUpdatedNotification
+    | CommandExecutionOutputDeltaNotification
+    | ConfigWarningNotification
+    | ContextCompactedNotification
+    | DeprecationNoticeNotification
+    | ErrorNotification
+    | FileChangeOutputDeltaNotification
+    | ItemCompletedNotification
+    | ItemStartedNotification
+    | McpServerOauthLoginCompletedNotification
+    | McpToolCallProgressNotification
+    | PlanDeltaNotification
+    | RawResponseItemCompletedNotification
+    | ReasoningSummaryPartAddedNotification
+    | ReasoningSummaryTextDeltaNotification
+    | ReasoningTextDeltaNotification
+    | TerminalInteractionNotification
+    | ThreadNameUpdatedNotification
+    | ThreadStartedNotification
+    | ThreadTokenUsageUpdatedNotification
+    | TurnCompletedNotification
+    | TurnDiffUpdatedNotification
+    | TurnPlanUpdatedNotification
+    | TurnStartedNotification
+    | WindowsWorldWritableWarningNotification
+    | UnknownNotification
+)
+
+
+@dataclass(slots=True)
+class Notification:
+    method: str
+    payload: NotificationPayload
+
+
+class ServerInfo(BaseModel):
+    name: str | None = None
+    version: str | None = None
+
+
+class InitializeResponse(BaseModel):
+    serverInfo: ServerInfo | None = None
+    userAgent: str | None = None
--- a/sdk/python/src/codex_app_server/py.typed
+++ b/sdk/python/src/codex_app_server/py.typed
--- a/sdk/python/src/codex_app_server/retry.py
+++ b/sdk/python/src/codex_app_server/retry.py
@@ -0,0 +1,41 @@
+from __future__ import annotations
+
+import random
+import time
+from typing import Callable, TypeVar
+
+from .errors import is_retryable_error
+
+T = TypeVar("T")
+
+
+def retry_on_overload(
+    op: Callable[[], T],
+    *,
+    max_attempts: int = 3,
+    initial_delay_s: float = 0.25,
+    max_delay_s: float = 2.0,
+    jitter_ratio: float = 0.2,
+) -> T:
+    """Retry helper for transient server-overload errors."""
+
+    if max_attempts < 1:
+        raise ValueError("max_attempts must be >= 1")
+
+    delay = initial_delay_s
+    attempt = 0
+    while True:
+        attempt += 1
+        try:
+            return op()
+        except Exception as exc:
+            if attempt >= max_attempts:
+                raise
+            if not is_retryable_error(exc):
+                raise
+
+            jitter = delay * jitter_ratio
+            sleep_for = min(max_delay_s, delay) + random.uniform(-jitter, jitter)
+            if sleep_for > 0:
+                time.sleep(sleep_for)
+            delay = min(max_delay_s, delay * 2)
--- a/sdk/python/tests/conftest.py
+++ b/sdk/python/tests/conftest.py
@@ -0,0 +1,16 @@
+from __future__ import annotations
+
+import sys
+from pathlib import Path
+
+ROOT = Path(__file__).resolve().parents[1]
+SRC = ROOT / "src"
+
+src_str = str(SRC)
+if src_str in sys.path:
+    sys.path.remove(src_str)
+sys.path.insert(0, src_str)
+
+for module_name in list(sys.modules):
+    if module_name == "codex_app_server" or module_name.startswith("codex_app_server."):
+        sys.modules.pop(module_name)
--- a/sdk/python/tests/test_artifact_workflow_and_binaries.py
+++ b/sdk/python/tests/test_artifact_workflow_and_binaries.py
@@ -0,0 +1,411 @@
+from __future__ import annotations
+
+import ast
+import importlib.util
+import json
+import sys
+import tomllib
+from pathlib import Path
+
+import pytest
+
+ROOT = Path(__file__).resolve().parents[1]
+
+
+def _load_update_script_module():
+    script_path = ROOT / "scripts" / "update_sdk_artifacts.py"
+    spec = importlib.util.spec_from_file_location("update_sdk_artifacts", script_path)
+    if spec is None or spec.loader is None:
+        raise AssertionError(f"Failed to load script module: {script_path}")
+    module = importlib.util.module_from_spec(spec)
+    sys.modules[spec.name] = module
+    spec.loader.exec_module(module)
+    return module
+
+
+def test_generation_has_single_maintenance_entrypoint_script() -> None:
+    scripts = sorted(p.name for p in (ROOT / "scripts").glob("*.py"))
+    assert scripts == ["update_sdk_artifacts.py"]
+
+
+def test_generate_types_wires_all_generation_steps() -> None:
+    source = (ROOT / "scripts" / "update_sdk_artifacts.py").read_text()
+    tree = ast.parse(source)
+
+    generate_types_fn = next(
+        (
+            node
+            for node in tree.body
+            if isinstance(node, ast.FunctionDef) and node.name == "generate_types"
+        ),
+        None,
+    )
+    assert generate_types_fn is not None
+
+    calls: list[str] = []
+    for node in generate_types_fn.body:
+        if isinstance(node, ast.Expr) and isinstance(node.value, ast.Call):
+            fn = node.value.func
+            if isinstance(fn, ast.Name):
+                calls.append(fn.id)
+
+    assert calls == [
+        "generate_v2_all",
+        "generate_notification_registry",
+        "generate_public_api_flat_methods",
+    ]
+
+
+def test_schema_normalization_only_flattens_string_literal_oneofs() -> None:
+    script = _load_update_script_module()
+    schema = json.loads(
+        (
+            ROOT.parent.parent
+            / "codex-rs"
+            / "app-server-protocol"
+            / "schema"
+            / "json"
+            / "codex_app_server_protocol.v2.schemas.json"
+        ).read_text()
+    )
+
+    definitions = schema["definitions"]
+    flattened = [
+        name
+        for name, definition in definitions.items()
+        if isinstance(definition, dict)
+        and script._flatten_string_enum_one_of(definition.copy())
+    ]
+
+    assert flattened == [
+        "AuthMode",
+        "CommandExecOutputStream",
+        "ExperimentalFeatureStage",
+        "InputModality",
+        "MessagePhase",
+    ]
+
+
+def test_python_codegen_schema_annotation_adds_stable_variant_titles() -> None:
+    script = _load_update_script_module()
+    schema = json.loads(
+        (
+            ROOT.parent.parent
+            / "codex-rs"
+            / "app-server-protocol"
+            / "schema"
+            / "json"
+            / "codex_app_server_protocol.v2.schemas.json"
+        ).read_text()
+    )
+
+    script._annotate_schema(schema)
+    definitions = schema["definitions"]
+
+    server_notification_titles = {
+        variant.get("title")
+        for variant in definitions["ServerNotification"]["oneOf"]
+        if isinstance(variant, dict)
+    }
+    assert "ErrorServerNotification" in server_notification_titles
+    assert "ThreadStartedServerNotification" in server_notification_titles
+    assert "ErrorNotification" not in server_notification_titles
+    assert "Thread/startedNotification" not in server_notification_titles
+
+    ask_for_approval_titles = [
+        variant.get("title") for variant in definitions["AskForApproval"]["oneOf"]
+    ]
+    assert ask_for_approval_titles == [
+        "AskForApprovalValue",
+        "RejectAskForApproval",
+    ]
+
+    reasoning_summary_titles = [
+        variant.get("title") for variant in definitions["ReasoningSummary"]["oneOf"]
+    ]
+    assert reasoning_summary_titles == [
+        "ReasoningSummaryValue",
+        "NoneReasoningSummary",
+    ]
+
+
+def test_generate_v2_all_uses_titles_for_generated_names() -> None:
+    source = (ROOT / "scripts" / "update_sdk_artifacts.py").read_text()
+    assert "--use-title-as-name" in source
+    assert "--use-annotated" in source
+    assert "--formatters" in source
+    assert "ruff-format" in source
+
+
+def test_runtime_package_template_has_no_checked_in_binaries() -> None:
+    runtime_root = ROOT.parent / "python-runtime" / "src" / "codex_cli_bin"
+    assert sorted(
+        path.name
+        for path in runtime_root.rglob("*")
+        if path.is_file() and "__pycache__" not in path.parts
+    ) == ["__init__.py"]
+
+
+def test_runtime_package_is_wheel_only_and_builds_platform_specific_wheels() -> None:
+    pyproject = tomllib.loads(
+        (ROOT.parent / "python-runtime" / "pyproject.toml").read_text()
+    )
+    hook_source = (ROOT.parent / "python-runtime" / "hatch_build.py").read_text()
+    hook_tree = ast.parse(hook_source)
+    initialize_fn = next(
+        node
+        for node in ast.walk(hook_tree)
+        if isinstance(node, ast.FunctionDef) and node.name == "initialize"
+    )
+
+    sdist_guard = next(
+        (
+            node
+            for node in initialize_fn.body
+            if isinstance(node, ast.If)
+            and isinstance(node.test, ast.Compare)
+            and isinstance(node.test.left, ast.Attribute)
+            and isinstance(node.test.left.value, ast.Name)
+            and node.test.left.value.id == "self"
+            and node.test.left.attr == "target_name"
+            and len(node.test.ops) == 1
+            and isinstance(node.test.ops[0], ast.Eq)
+            and len(node.test.comparators) == 1
+            and isinstance(node.test.comparators[0], ast.Constant)
+            and node.test.comparators[0].value == "sdist"
+        ),
+        None,
+    )
+    build_data_assignments = {
+        node.targets[0].slice.value: node.value.value
+        for node in initialize_fn.body
+        if isinstance(node, ast.Assign)
+        and len(node.targets) == 1
+        and isinstance(node.targets[0], ast.Subscript)
+        and isinstance(node.targets[0].value, ast.Name)
+        and node.targets[0].value.id == "build_data"
+        and isinstance(node.targets[0].slice, ast.Constant)
+        and isinstance(node.targets[0].slice.value, str)
+        and isinstance(node.value, ast.Constant)
+    }
+
+    assert pyproject["tool"]["hatch"]["build"]["targets"]["wheel"] == {
+        "packages": ["src/codex_cli_bin"],
+        "include": ["src/codex_cli_bin/bin/**"],
+        "hooks": {"custom": {}},
+    }
+    assert pyproject["tool"]["hatch"]["build"]["targets"]["sdist"] == {
+        "hooks": {"custom": {}},
+    }
+    assert sdist_guard is not None
+    assert build_data_assignments == {"pure_python": False, "infer_tag": True}
+
+
+def test_stage_runtime_release_copies_binary_and_sets_version(tmp_path: Path) -> None:
+    script = _load_update_script_module()
+    fake_binary = tmp_path / script.runtime_binary_name()
+    fake_binary.write_text("fake codex\n")
+
+    staged = script.stage_python_runtime_package(
+        tmp_path / "runtime-stage",
+        "1.2.3",
+        fake_binary,
+    )
+
+    assert staged == tmp_path / "runtime-stage"
+    assert script.staged_runtime_bin_path(staged).read_text() == "fake codex\n"
+    assert 'version = "1.2.3"' in (staged / "pyproject.toml").read_text()
+
+
+def test_stage_runtime_release_replaces_existing_staging_dir(tmp_path: Path) -> None:
+    script = _load_update_script_module()
+    staging_dir = tmp_path / "runtime-stage"
+    old_file = staging_dir / "stale.txt"
+    old_file.parent.mkdir(parents=True)
+    old_file.write_text("stale")
+
+    fake_binary = tmp_path / script.runtime_binary_name()
+    fake_binary.write_text("fake codex\n")
+
+    staged = script.stage_python_runtime_package(
+        staging_dir,
+        "1.2.3",
+        fake_binary,
+    )
+
+    assert staged == staging_dir
+    assert not old_file.exists()
+    assert script.staged_runtime_bin_path(staged).read_text() == "fake codex\n"
+
+
+def test_stage_sdk_release_injects_exact_runtime_pin(tmp_path: Path) -> None:
+    script = _load_update_script_module()
+    staged = script.stage_python_sdk_package(tmp_path / "sdk-stage", "0.2.1", "1.2.3")
+
+    pyproject = (staged / "pyproject.toml").read_text()
+    assert 'version = "0.2.1"' in pyproject
+    assert '"codex-cli-bin==1.2.3"' in pyproject
+    assert not any((staged / "src" / "codex_app_server").glob("bin/**"))
+
+
+def test_stage_sdk_release_replaces_existing_staging_dir(tmp_path: Path) -> None:
+    script = _load_update_script_module()
+    staging_dir = tmp_path / "sdk-stage"
+    old_file = staging_dir / "stale.txt"
+    old_file.parent.mkdir(parents=True)
+    old_file.write_text("stale")
+
+    staged = script.stage_python_sdk_package(staging_dir, "0.2.1", "1.2.3")
+
+    assert staged == staging_dir
+    assert not old_file.exists()
+
+
+def test_stage_sdk_runs_type_generation_before_staging(tmp_path: Path) -> None:
+    script = _load_update_script_module()
+    calls: list[str] = []
+    args = script.parse_args(
+        [
+            "stage-sdk",
+            str(tmp_path / "sdk-stage"),
+            "--runtime-version",
+            "1.2.3",
+        ]
+    )
+
+    def fake_generate_types() -> None:
+        calls.append("generate_types")
+
+    def fake_stage_sdk_package(
+        _staging_dir: Path, _sdk_version: str, _runtime_version: str
+    ) -> Path:
+        calls.append("stage_sdk")
+        return tmp_path / "sdk-stage"
+
+    def fake_stage_runtime_package(
+        _staging_dir: Path, _runtime_version: str, _runtime_binary: Path
+    ) -> Path:
+        raise AssertionError("runtime staging should not run for stage-sdk")
+
+    def fake_current_sdk_version() -> str:
+        return "0.2.0"
+
+    ops = script.CliOps(
+        generate_types=fake_generate_types,
+        stage_python_sdk_package=fake_stage_sdk_package,
+        stage_python_runtime_package=fake_stage_runtime_package,
+        current_sdk_version=fake_current_sdk_version,
+    )
+
+    script.run_command(args, ops)
+
+    assert calls == ["generate_types", "stage_sdk"]
+
+
+def test_stage_runtime_stages_binary_without_type_generation(tmp_path: Path) -> None:
+    script = _load_update_script_module()
+    fake_binary = tmp_path / script.runtime_binary_name()
+    fake_binary.write_text("fake codex\n")
+    calls: list[str] = []
+    args = script.parse_args(
+        [
+            "stage-runtime",
+            str(tmp_path / "runtime-stage"),
+            str(fake_binary),
+            "--runtime-version",
+            "1.2.3",
+        ]
+    )
+
+    def fake_generate_types() -> None:
+        calls.append("generate_types")
+
+    def fake_stage_sdk_package(
+        _staging_dir: Path, _sdk_version: str, _runtime_version: str
+    ) -> Path:
+        raise AssertionError("sdk staging should not run for stage-runtime")
+
+    def fake_stage_runtime_package(
+        _staging_dir: Path, _runtime_version: str, _runtime_binary: Path
+    ) -> Path:
+        calls.append("stage_runtime")
+        return tmp_path / "runtime-stage"
+
+    def fake_current_sdk_version() -> str:
+        return "0.2.0"
+
+    ops = script.CliOps(
+        generate_types=fake_generate_types,
+        stage_python_sdk_package=fake_stage_sdk_package,
+        stage_python_runtime_package=fake_stage_runtime_package,
+        current_sdk_version=fake_current_sdk_version,
+    )
+
+    script.run_command(args, ops)
+
+    assert calls == ["stage_runtime"]
+
+
+def test_default_runtime_is_resolved_from_installed_runtime_package(
+    tmp_path: Path,
+) -> None:
+    from codex_app_server import client as client_module
+
+    fake_binary = tmp_path / ("codex.exe" if client_module.os.name == "nt" else "codex")
+    fake_binary.write_text("")
+    ops = client_module.CodexBinResolverOps(
+        installed_codex_path=lambda: fake_binary,
+        path_exists=lambda path: path == fake_binary,
+    )
+
+    config = client_module.AppServerConfig()
+    assert config.codex_bin is None
+    assert client_module.resolve_codex_bin(config, ops) == fake_binary
+
+
+def test_explicit_codex_bin_override_takes_priority(tmp_path: Path) -> None:
+    from codex_app_server import client as client_module
+
+    explicit_binary = tmp_path / (
+        "custom-codex.exe" if client_module.os.name == "nt" else "custom-codex"
+    )
+    explicit_binary.write_text("")
+    ops = client_module.CodexBinResolverOps(
+        installed_codex_path=lambda: (_ for _ in ()).throw(
+            AssertionError("packaged runtime should not be used")
+        ),
+        path_exists=lambda path: path == explicit_binary,
+    )
+
+    config = client_module.AppServerConfig(codex_bin=str(explicit_binary))
+    assert client_module.resolve_codex_bin(config, ops) == explicit_binary
+
+
+def test_missing_runtime_package_requires_explicit_codex_bin() -> None:
+    from codex_app_server import client as client_module
+
+    ops = client_module.CodexBinResolverOps(
+        installed_codex_path=lambda: (_ for _ in ()).throw(
+            FileNotFoundError("missing packaged runtime")
+        ),
+        path_exists=lambda _path: False,
+    )
+
+    with pytest.raises(FileNotFoundError, match="missing packaged runtime"):
+        client_module.resolve_codex_bin(client_module.AppServerConfig(), ops)
+
+
+def test_broken_runtime_package_does_not_fall_back() -> None:
+    from codex_app_server import client as client_module
+
+    ops = client_module.CodexBinResolverOps(
+        installed_codex_path=lambda: (_ for _ in ()).throw(
+            FileNotFoundError("missing packaged binary")
+        ),
+        path_exists=lambda _path: False,
+    )
+
+    with pytest.raises(FileNotFoundError) as exc_info:
+        client_module.resolve_codex_bin(client_module.AppServerConfig(), ops)
+
+    assert str(exc_info.value) == ("missing packaged binary")
--- a/sdk/python/tests/test_client_rpc_methods.py
+++ b/sdk/python/tests/test_client_rpc_methods.py
@@ -0,0 +1,95 @@
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Any
+
+from codex_app_server.client import AppServerClient, _params_dict
+from codex_app_server.generated.v2_all import ThreadListParams, ThreadTokenUsageUpdatedNotification
+from codex_app_server.models import UnknownNotification
+
+ROOT = Path(__file__).resolve().parents[1]
+
+
+def test_thread_set_name_and_compact_use_current_rpc_methods() -> None:
+    client = AppServerClient()
+    calls: list[tuple[str, dict[str, Any] | None]] = []
+
+    def fake_request(method: str, params, *, response_model):  # type: ignore[no-untyped-def]
+        calls.append((method, params))
+        return response_model.model_validate({})
+
+    client.request = fake_request  # type: ignore[method-assign]
+
+    client.thread_set_name("thread-1", "sdk-name")
+    client.thread_compact("thread-1")
+
+    assert calls[0][0] == "thread/name/set"
+    assert calls[1][0] == "thread/compact/start"
+
+
+def test_generated_params_models_are_snake_case_and_dump_by_alias() -> None:
+    params = ThreadListParams(search_term="needle", limit=5)
+
+    assert "search_term" in ThreadListParams.model_fields
+    dumped = _params_dict(params)
+    assert dumped == {"searchTerm": "needle", "limit": 5}
+
+
+def test_generated_v2_bundle_has_single_shared_plan_type_definition() -> None:
+    source = (ROOT / "src" / "codex_app_server" / "generated" / "v2_all.py").read_text()
+    assert source.count("class PlanType(") == 1
+
+
+def test_notifications_are_typed_with_canonical_v2_methods() -> None:
+    client = AppServerClient()
+    event = client._coerce_notification(
+        "thread/tokenUsage/updated",
+        {
+            "threadId": "thread-1",
+            "turnId": "turn-1",
+            "tokenUsage": {
+                "last": {
+                    "cachedInputTokens": 0,
+                    "inputTokens": 1,
+                    "outputTokens": 2,
+                    "reasoningOutputTokens": 0,
+                    "totalTokens": 3,
+                },
+                "total": {
+                    "cachedInputTokens": 0,
+                    "inputTokens": 1,
+                    "outputTokens": 2,
+                    "reasoningOutputTokens": 0,
+                    "totalTokens": 3,
+                },
+            },
+        },
+    )
+
+    assert event.method == "thread/tokenUsage/updated"
+    assert isinstance(event.payload, ThreadTokenUsageUpdatedNotification)
+    assert event.payload.turn_id == "turn-1"
+
+
+def test_unknown_notifications_fall_back_to_unknown_payloads() -> None:
+    client = AppServerClient()
+    event = client._coerce_notification(
+        "unknown/notification",
+        {
+            "id": "evt-1",
+            "conversationId": "thread-1",
+            "msg": {"type": "turn_aborted"},
+        },
+    )
+
+    assert event.method == "unknown/notification"
+    assert isinstance(event.payload, UnknownNotification)
+    assert event.payload.params["msg"] == {"type": "turn_aborted"}
+
+
+def test_invalid_notification_payload_falls_back_to_unknown() -> None:
+    client = AppServerClient()
+    event = client._coerce_notification("thread/tokenUsage/updated", {"threadId": "missing"})
+
+    assert event.method == "thread/tokenUsage/updated"
+    assert isinstance(event.payload, UnknownNotification)
--- a/sdk/python/tests/test_contract_generation.py
+++ b/sdk/python/tests/test_contract_generation.py
@@ -0,0 +1,52 @@
+from __future__ import annotations
+
+import os
+import subprocess
+import sys
+from pathlib import Path
+
+ROOT = Path(__file__).resolve().parents[1]
+GENERATED_TARGETS = [
+    Path("src/codex_app_server/generated/notification_registry.py"),
+    Path("src/codex_app_server/generated/v2_all.py"),
+    Path("src/codex_app_server/public_api.py"),
+]
+
+
+def _snapshot_target(root: Path, rel_path: Path) -> dict[str, bytes] | bytes | None:
+    target = root / rel_path
+    if not target.exists():
+        return None
+    if target.is_file():
+        return target.read_bytes()
+
+    snapshot: dict[str, bytes] = {}
+    for path in sorted(target.rglob("*")):
+        if path.is_file() and "__pycache__" not in path.parts:
+            snapshot[str(path.relative_to(target))] = path.read_bytes()
+    return snapshot
+
+
+def _snapshot_targets(root: Path) -> dict[str, dict[str, bytes] | bytes | None]:
+    return {
+        str(rel_path): _snapshot_target(root, rel_path) for rel_path in GENERATED_TARGETS
+    }
+
+
+def test_generated_files_are_up_to_date():
+    before = _snapshot_targets(ROOT)
+
+    # Regenerate contract artifacts via single maintenance entrypoint.
+    env = os.environ.copy()
+    python_bin = str(Path(sys.executable).parent)
+    env["PATH"] = f"{python_bin}{os.pathsep}{env.get('PATH', '')}"
+
+    subprocess.run(
+        [sys.executable, "scripts/update_sdk_artifacts.py", "generate-types"],
+        cwd=ROOT,
+        check=True,
+        env=env,
+    )
+
+    after = _snapshot_targets(ROOT)
+    assert before == after, "Generated files drifted after regeneration"
				`@@ -0,0 +1 @@`
				`"""Auto-generated Python types derived from the app-server schemas."""`