Compare commits

..

4 Commits

Author SHA1 Message Date
Shaqayeq
3cf1306968 python-sdk: use pinned runtime package in real coverage (2026-03-12)
2026-03-12

Switch the repo-source Python SDK real coverage over to a pinned runtime-package flow backed by GitHub release artifacts instead of PATH or explicit binary overrides.

- add sdk/python/_runtime_setup.py to download the release codex archive for a requested CODEX_PYTHON_RUNTIME_VERSION, stage a temporary codex-cli-bin package, and install it into a target Python environment with cleanup
- refactor real integration tests to run repo-source SDK code against an isolated site-packages target that contains the staged codex-cli-bin runtime
- update examples and notebook bootstrap to install and use the runtime package, and stop consulting CODEX_PYTHON_SDK_CODEX_BIN or PATH
- switch the failing turn-run and model-selection examples to runtime-compatible model selection for the pinned release binary
- keep the main SDK runtime resolution model unchanged: explicit codex_bin or installed codex-cli-bin

Validation:
- python3 -m pytest sdk/python/tests
- RUN_REAL_CODEX_TESTS=1 CODEX_PYTHON_RUNTIME_VERSION=0.115.0-alpha.11 python3 -m pytest sdk/python/tests/test_real_app_server_integration.py

Co-authored-by: Codex <noreply@openai.com>
2026-03-12 10:15:56 -07:00
Shaqayeq
fd4beb8b37 Add Python SDK public API and examples
Co-authored-by: Codex <noreply@openai.com>
2026-03-12 10:15:53 -07:00
pakrym-oai
c0528b9bd9 Move code mode tool files under tools/code_mode and split functionality (#14476)
- **Summary**
- migrate the code mode handler, service, worker, process, runner, and
bridge assets into the `tools/code_mode` module tree
- split Execution, protocol, and handler logic into dedicated files and
relocate the tool definition into `code_mode/spec.rs`
- update core references and tests to stitch the new organization
together
- **Testing**
  - Not run (not requested)
2026-03-12 09:54:11 -07:00
Ahmed Ibrahim
09aa71adb7 Fix stdio-to-uds peer-close flake (#13882)
## What changed
- `codex-stdio-to-uds` now tolerates `NotConnected` when
`shutdown(Write)` happens after the peer has already closed.
- The socket test was rewritten to send stdin from a fixture file and to
read an exact request payload length instead of waiting on EOF timing.

## Why this fixes the flake
- This one exposed a real cross-platform runtime edge case: on macOS,
the peer can close first after a successful exchange, and
`shutdown(Write)` can report `NotConnected` even though the interaction
already succeeded.
- Treating that specific ordering as a harmless shutdown condition
removes the production-level false failure.
- The old test compounded the problem by depending on EOF timing, which
varies by platform and scheduler. Exact-length IO makes the test
deterministic and focused on the actual data exchange.

## Scope
- Production logic change with matching test rewrite.
2026-03-12 09:52:50 -07:00
60 changed files with 5912 additions and 1079 deletions

3
codex-rs/Cargo.lock generated
View File

@@ -1439,7 +1439,6 @@ dependencies = [
"codex-utils-cargo-bin",
"codex-utils-cli",
"codex-utils-json-to-toml",
"codex-utils-pty",
"core_test_support",
"futures",
"opentelemetry",
@@ -2439,6 +2438,7 @@ dependencies = [
"anyhow",
"chrono",
"clap",
"codex-otel",
"codex-protocol",
"dirs",
"log",
@@ -2458,7 +2458,6 @@ name = "codex-stdio-to-uds"
version = "0.0.0"
dependencies = [
"anyhow",
"assert_cmd",
"codex-utils-cargo-bin",
"pretty_assertions",
"tempfile",

View File

@@ -72,7 +72,7 @@ members = [
resolver = "2"
[workspace.package]
version = "0.115.0-alpha.12"
version = "0.0.0"
# Track the edition for all workspace crates in one place. Individual
# crates can still override this value, but keeping it here means new
# crates created with `cargo new -w ...` automatically inherit the 2024

View File

@@ -1,920 +0,0 @@
use std::collections::HashMap;
use std::path::PathBuf;
use std::sync::Arc;
use std::time::Duration;
use crate::client_common::tools::ToolSpec;
use crate::codex::Session;
use crate::codex::TurnContext;
use crate::config::Config;
use crate::features::Feature;
use crate::function_tool::FunctionCallError;
use crate::tools::ToolRouter;
use crate::tools::code_mode_description::augment_tool_spec_for_code_mode;
use crate::tools::code_mode_description::code_mode_tool_reference;
use crate::tools::context::FunctionToolOutput;
use crate::tools::context::SharedTurnDiffTracker;
use crate::tools::context::ToolPayload;
use crate::tools::js_repl::resolve_compatible_node;
use crate::tools::router::ToolCall;
use crate::tools::router::ToolCallSource;
use crate::tools::router::ToolRouterParams;
use crate::truncate::TruncationPolicy;
use crate::truncate::formatted_truncate_text_content_items_with_policy;
use crate::truncate::truncate_function_output_items_with_policy;
use crate::unified_exec::resolve_max_tokens;
use codex_protocol::models::FunctionCallOutputContentItem;
use serde::Deserialize;
use serde::Serialize;
use serde_json::Value as JsonValue;
use tokio::io::AsyncBufReadExt;
use tokio::io::AsyncReadExt;
use tokio::io::AsyncWriteExt;
use tokio::io::BufReader;
use tokio::sync::Mutex;
use tokio::sync::mpsc;
use tokio::sync::oneshot;
use tokio::task::JoinHandle;
use tracing::warn;
const CODE_MODE_RUNNER_SOURCE: &str = include_str!("code_mode_runner.cjs");
const CODE_MODE_BRIDGE_SOURCE: &str = include_str!("code_mode_bridge.js");
pub(crate) const PUBLIC_TOOL_NAME: &str = "exec";
pub(crate) const WAIT_TOOL_NAME: &str = "exec_wait";
pub(crate) const DEFAULT_WAIT_YIELD_TIME_MS: u64 = 10_000;
#[derive(Clone)]
struct ExecContext {
session: Arc<Session>,
turn: Arc<TurnContext>,
tracker: SharedTurnDiffTracker,
}
pub(crate) struct CodeModeProcess {
child: tokio::process::Child,
stdin: Arc<Mutex<tokio::process::ChildStdin>>,
stdout_task: JoinHandle<()>,
// A set of current requests waiting for a response from code mode host
response_waiters: Arc<Mutex<HashMap<String, oneshot::Sender<NodeToHostMessage>>>>,
// When there is an active worker it listens for tool calls from code mode and processes them
tool_call_rx: Arc<Mutex<mpsc::UnboundedReceiver<CodeModeToolCall>>>,
}
pub(crate) struct CodeModeWorker {
shutdown_tx: Option<oneshot::Sender<()>>,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "snake_case")]
struct CodeModeToolCall {
request_id: String,
id: String,
name: String,
#[serde(default)]
input: Option<JsonValue>,
}
impl Drop for CodeModeWorker {
fn drop(&mut self) {
if let Some(shutdown_tx) = self.shutdown_tx.take() {
let _ = shutdown_tx.send(());
}
}
}
impl CodeModeProcess {
fn worker(&self, exec: ExecContext) -> CodeModeWorker {
let (shutdown_tx, mut shutdown_rx) = oneshot::channel();
let stdin = Arc::clone(&self.stdin);
let tool_call_rx = Arc::clone(&self.tool_call_rx);
tokio::spawn(async move {
loop {
let tool_call = tokio::select! {
_ = &mut shutdown_rx => break,
tool_call = async {
let mut tool_call_rx = tool_call_rx.lock().await;
tool_call_rx.recv().await
} => tool_call,
};
let Some(tool_call) = tool_call else {
break;
};
let exec = exec.clone();
let stdin = Arc::clone(&stdin);
tokio::spawn(async move {
let response = HostToNodeMessage::Response {
request_id: tool_call.request_id,
id: tool_call.id,
code_mode_result: call_nested_tool(exec, tool_call.name, tool_call.input)
.await,
};
if let Err(err) = write_message(&stdin, &response).await {
warn!("failed to write {PUBLIC_TOOL_NAME} tool response: {err}");
}
});
}
});
CodeModeWorker {
shutdown_tx: Some(shutdown_tx),
}
}
async fn send(
&mut self,
request_id: &str,
message: &HostToNodeMessage,
) -> Result<NodeToHostMessage, std::io::Error> {
if self.stdout_task.is_finished() {
return Err(std::io::Error::other(format!(
"{PUBLIC_TOOL_NAME} runner is not available"
)));
}
let (tx, rx) = oneshot::channel();
self.response_waiters
.lock()
.await
.insert(request_id.to_string(), tx);
if let Err(err) = write_message(&self.stdin, message).await {
self.response_waiters.lock().await.remove(request_id);
return Err(err);
}
match rx.await {
Ok(message) => Ok(message),
Err(_) => Err(std::io::Error::other(format!(
"{PUBLIC_TOOL_NAME} runner is not available"
))),
}
}
fn has_exited(&mut self) -> Result<bool, std::io::Error> {
self.child
.try_wait()
.map(|status| status.is_some())
.map_err(std::io::Error::other)
}
}
pub(crate) struct CodeModeService {
js_repl_node_path: Option<PathBuf>,
stored_values: Mutex<HashMap<String, JsonValue>>,
process: Arc<Mutex<Option<CodeModeProcess>>>,
next_session_id: Mutex<i32>,
}
impl CodeModeService {
pub(crate) fn new(js_repl_node_path: Option<PathBuf>) -> Self {
Self {
js_repl_node_path,
stored_values: Mutex::new(HashMap::new()),
process: Arc::new(Mutex::new(None)),
next_session_id: Mutex::new(1),
}
}
pub(crate) async fn stored_values(&self) -> HashMap<String, JsonValue> {
self.stored_values.lock().await.clone()
}
pub(crate) async fn replace_stored_values(&self, values: HashMap<String, JsonValue>) {
*self.stored_values.lock().await = values;
}
async fn ensure_started(
&self,
) -> Result<tokio::sync::OwnedMutexGuard<Option<CodeModeProcess>>, std::io::Error> {
let mut process_slot = self.process.lock().await;
let needs_spawn = match process_slot.as_mut() {
Some(process) => !matches!(process.has_exited(), Ok(false)),
None => true,
};
if needs_spawn {
let node_path = resolve_compatible_node(self.js_repl_node_path.as_deref())
.await
.map_err(std::io::Error::other)?;
*process_slot = Some(spawn_code_mode_process(&node_path).await?);
}
drop(process_slot);
Ok(self.process.clone().lock_owned().await)
}
pub(crate) async fn start_turn_worker(
&self,
session: &Arc<Session>,
turn: &Arc<TurnContext>,
tracker: &SharedTurnDiffTracker,
) -> Option<CodeModeWorker> {
if !turn.features.enabled(Feature::CodeMode) {
return None;
}
let exec = ExecContext {
session: Arc::clone(session),
turn: Arc::clone(turn),
tracker: Arc::clone(tracker),
};
let mut process_slot = match self.ensure_started().await {
Ok(process_slot) => process_slot,
Err(err) => {
warn!("failed to start {PUBLIC_TOOL_NAME} worker for turn: {err}");
return None;
}
};
let Some(process) = process_slot.as_mut() else {
warn!(
"failed to start {PUBLIC_TOOL_NAME} worker for turn: {PUBLIC_TOOL_NAME} runner failed to start"
);
return None;
};
Some(process.worker(exec))
}
pub(crate) async fn allocate_session_id(&self) -> i32 {
let mut next_session_id = self.next_session_id.lock().await;
let session_id = *next_session_id;
*next_session_id = next_session_id.saturating_add(1);
session_id
}
pub(crate) async fn allocate_request_id(&self) -> String {
uuid::Uuid::new_v4().to_string()
}
}
#[derive(Clone, Copy, Debug, Deserialize, Eq, PartialEq, Serialize)]
#[serde(rename_all = "snake_case")]
enum CodeModeToolKind {
Function,
Freeform,
}
#[derive(Clone, Debug, Serialize)]
struct EnabledTool {
tool_name: String,
#[serde(rename = "module")]
module_path: String,
namespace: Vec<String>,
name: String,
description: String,
kind: CodeModeToolKind,
}
#[derive(Serialize)]
#[serde(tag = "type", rename_all = "snake_case")]
enum HostToNodeMessage {
Start {
request_id: String,
session_id: i32,
enabled_tools: Vec<EnabledTool>,
stored_values: HashMap<String, JsonValue>,
source: String,
},
Poll {
request_id: String,
session_id: i32,
yield_time_ms: u64,
},
Terminate {
request_id: String,
session_id: i32,
},
Response {
request_id: String,
id: String,
code_mode_result: JsonValue,
},
}
#[derive(Deserialize)]
#[serde(tag = "type", rename_all = "snake_case")]
enum NodeToHostMessage {
ToolCall {
#[serde(flatten)]
tool_call: CodeModeToolCall,
},
Yielded {
request_id: String,
content_items: Vec<JsonValue>,
},
Terminated {
request_id: String,
content_items: Vec<JsonValue>,
},
Result {
request_id: String,
content_items: Vec<JsonValue>,
stored_values: HashMap<String, JsonValue>,
#[serde(default)]
error_text: Option<String>,
#[serde(default)]
max_output_tokens_per_exec_call: Option<usize>,
},
}
enum CodeModeSessionProgress {
Finished(FunctionToolOutput),
Yielded { output: FunctionToolOutput },
}
enum CodeModeExecutionStatus {
Completed,
Failed,
Running(i32),
Terminated,
}
pub(crate) fn instructions(config: &Config) -> Option<String> {
if !config.features.enabled(Feature::CodeMode) {
return None;
}
let mut section = String::from("## Exec\n");
section.push_str(&format!(
"- Use `{PUBLIC_TOOL_NAME}` for JavaScript execution in a Node-backed `node:vm` context.\n",
));
section.push_str(&format!(
"- `{PUBLIC_TOOL_NAME}` is a freeform/custom tool. Direct `{PUBLIC_TOOL_NAME}` calls must send raw JavaScript tool input. Do not wrap code in JSON, quotes, or markdown code fences.\n",
));
section.push_str(&format!(
"- Direct tool calls remain available while `{PUBLIC_TOOL_NAME}` is enabled.\n",
));
section.push_str(&format!(
"- `{PUBLIC_TOOL_NAME}` uses the same Node runtime resolution as `js_repl`. If needed, point `js_repl_node_path` at the Node binary you want Codex to use.\n",
));
section.push_str("- Import nested tools from `tools.js`, for example `import { exec_command } from \"tools.js\"` or `import { ALL_TOOLS } from \"tools.js\"` to inspect the available `{ module, name, description }` entries. Namespaced tools are also available from `tools/<namespace...>.js`; MCP tools use `tools/mcp/<server>.js`, for example `import { append_notebook_logs_chart } from \"tools/mcp/ologs.js\"`. Nested tool calls resolve to their code-mode result values.\n");
section.push_str(&format!(
"- Import `{{ background, output_text, output_image, set_max_output_tokens_per_exec_call, set_yield_time, store, load }}` from `@openai/code_mode` (or `\"openai/code_mode\"`). `output_text(value)` surfaces text back to the model and stringifies non-string objects with `JSON.stringify(...)` when possible. `output_image(imageUrl)` appends an `input_image` content item for `http(s)` or `data:` URLs. `store(key, value)` persists JSON-serializable values across `{PUBLIC_TOOL_NAME}` calls in the current session, and `load(key)` returns a cloned stored value or `undefined`. `set_max_output_tokens_per_exec_call(value)` sets the token budget used to truncate direct `{PUBLIC_TOOL_NAME}` returns; `{WAIT_TOOL_NAME}` uses its own `max_tokens` argument instead and defaults to `10000`. `set_yield_time(value)` asks `{PUBLIC_TOOL_NAME}` to return early if the script is still running after that many milliseconds so `{WAIT_TOOL_NAME}` can resume it later. `background()` returns a yielded `{PUBLIC_TOOL_NAME}` response immediately while the script keeps running in the background. The returned content starts with a separate `Script completed`, `Script failed`, or `Script running with session ID …` text item that includes wall time. When truncation happens, the final text may include `Total output lines:` and the usual `…N tokens truncated…` marker.\n",
));
section.push_str(&format!(
"- If `{PUBLIC_TOOL_NAME}` returns `Script running with session ID …`, call `{WAIT_TOOL_NAME}` with that `session_id` to keep waiting for more output, completion, or termination.\n",
));
section.push_str(
"- Function tools require JSON object arguments. Freeform tools require raw strings.\n",
);
section.push_str("- `add_content(value)` remains available for compatibility. It is synchronous and accepts a content item, an array of content items, or a string. Structured nested-tool results should be converted to text first, for example with `JSON.stringify(...)`.\n");
section
.push_str("- Only content passed to `output_text(...)`, `output_image(...)`, or `add_content(value)` is surfaced back to the model.");
Some(section)
}
pub(crate) async fn execute(
session: Arc<Session>,
turn: Arc<TurnContext>,
tracker: SharedTurnDiffTracker,
code: String,
) -> Result<FunctionToolOutput, FunctionCallError> {
let exec = ExecContext {
session,
turn,
tracker,
};
let enabled_tools = build_enabled_tools(&exec).await;
let service = &exec.session.services.code_mode_service;
let stored_values = service.stored_values().await;
let source = build_source(&code, &enabled_tools).map_err(FunctionCallError::RespondToModel)?;
let session_id = service.allocate_session_id().await;
let request_id = service.allocate_request_id().await;
let process_slot = service
.ensure_started()
.await
.map_err(|err| FunctionCallError::RespondToModel(err.to_string()))?;
let started_at = std::time::Instant::now();
let message = HostToNodeMessage::Start {
request_id: request_id.clone(),
session_id,
enabled_tools,
stored_values,
source,
};
let result = {
let mut process_slot = process_slot;
let Some(process) = process_slot.as_mut() else {
return Err(FunctionCallError::RespondToModel(format!(
"{PUBLIC_TOOL_NAME} runner failed to start"
)));
};
let message = process
.send(&request_id, &message)
.await
.map_err(|err| err.to_string());
let message = match message {
Ok(message) => message,
Err(error) => return Err(FunctionCallError::RespondToModel(error)),
};
handle_node_message(&exec, session_id, message, None, started_at).await
};
match result {
Ok(CodeModeSessionProgress::Finished(output))
| Ok(CodeModeSessionProgress::Yielded { output }) => Ok(output),
Err(error) => Err(FunctionCallError::RespondToModel(error)),
}
}
pub(crate) async fn wait(
session: Arc<Session>,
turn: Arc<TurnContext>,
tracker: SharedTurnDiffTracker,
session_id: i32,
yield_time_ms: u64,
max_output_tokens: Option<usize>,
terminate: bool,
) -> Result<FunctionToolOutput, FunctionCallError> {
let exec = ExecContext {
session,
turn,
tracker,
};
let request_id = exec
.session
.services
.code_mode_service
.allocate_request_id()
.await;
let started_at = std::time::Instant::now();
let message = if terminate {
HostToNodeMessage::Terminate {
request_id: request_id.clone(),
session_id,
}
} else {
HostToNodeMessage::Poll {
request_id: request_id.clone(),
session_id,
yield_time_ms,
}
};
let process_slot = exec
.session
.services
.code_mode_service
.ensure_started()
.await
.map_err(|err| FunctionCallError::RespondToModel(err.to_string()))?;
let result = {
let mut process_slot = process_slot;
let Some(process) = process_slot.as_mut() else {
return Err(FunctionCallError::RespondToModel(format!(
"{PUBLIC_TOOL_NAME} runner failed to start"
)));
};
if !matches!(process.has_exited(), Ok(false)) {
return Err(FunctionCallError::RespondToModel(format!(
"{PUBLIC_TOOL_NAME} runner failed to start"
)));
}
let message = process
.send(&request_id, &message)
.await
.map_err(|err| err.to_string());
let message = match message {
Ok(message) => message,
Err(error) => return Err(FunctionCallError::RespondToModel(error)),
};
handle_node_message(
&exec,
session_id,
message,
Some(max_output_tokens),
started_at,
)
.await
};
match result {
Ok(CodeModeSessionProgress::Finished(output))
| Ok(CodeModeSessionProgress::Yielded { output }) => Ok(output),
Err(error) => Err(FunctionCallError::RespondToModel(error)),
}
}
async fn handle_node_message(
exec: &ExecContext,
session_id: i32,
message: NodeToHostMessage,
poll_max_output_tokens: Option<Option<usize>>,
started_at: std::time::Instant,
) -> Result<CodeModeSessionProgress, String> {
match message {
NodeToHostMessage::ToolCall { .. } => Err(format!(
"{PUBLIC_TOOL_NAME} received an unexpected tool call response"
)),
NodeToHostMessage::Yielded { content_items, .. } => {
let mut delta_items = output_content_items_from_json_values(content_items)?;
delta_items = truncate_code_mode_result(delta_items, poll_max_output_tokens.flatten());
prepend_script_status(
&mut delta_items,
CodeModeExecutionStatus::Running(session_id),
started_at.elapsed(),
);
Ok(CodeModeSessionProgress::Yielded {
output: FunctionToolOutput::from_content(delta_items, Some(true)),
})
}
NodeToHostMessage::Terminated { content_items, .. } => {
let mut delta_items = output_content_items_from_json_values(content_items)?;
delta_items = truncate_code_mode_result(delta_items, poll_max_output_tokens.flatten());
prepend_script_status(
&mut delta_items,
CodeModeExecutionStatus::Terminated,
started_at.elapsed(),
);
Ok(CodeModeSessionProgress::Finished(
FunctionToolOutput::from_content(delta_items, Some(true)),
))
}
NodeToHostMessage::Result {
content_items,
stored_values,
error_text,
max_output_tokens_per_exec_call,
..
} => {
exec.session
.services
.code_mode_service
.replace_stored_values(stored_values)
.await;
let mut delta_items = output_content_items_from_json_values(content_items)?;
let success = error_text.is_none();
if let Some(error_text) = error_text {
delta_items.push(FunctionCallOutputContentItem::InputText {
text: format!("Script error:\n{error_text}"),
});
}
let mut delta_items = truncate_code_mode_result(
delta_items,
poll_max_output_tokens.unwrap_or(max_output_tokens_per_exec_call),
);
prepend_script_status(
&mut delta_items,
if success {
CodeModeExecutionStatus::Completed
} else {
CodeModeExecutionStatus::Failed
},
started_at.elapsed(),
);
Ok(CodeModeSessionProgress::Finished(
FunctionToolOutput::from_content(delta_items, Some(success)),
))
}
}
}
async fn spawn_code_mode_process(
node_path: &std::path::Path,
) -> Result<CodeModeProcess, std::io::Error> {
let mut cmd = tokio::process::Command::new(node_path);
cmd.arg("--experimental-vm-modules");
cmd.arg("--eval");
cmd.arg(CODE_MODE_RUNNER_SOURCE);
cmd.stdin(std::process::Stdio::piped())
.stdout(std::process::Stdio::piped())
.stderr(std::process::Stdio::piped())
.kill_on_drop(true);
let mut child = cmd.spawn().map_err(std::io::Error::other)?;
let stdout = child.stdout.take().ok_or_else(|| {
std::io::Error::other(format!("{PUBLIC_TOOL_NAME} runner missing stdout"))
})?;
let stderr = child.stderr.take().ok_or_else(|| {
std::io::Error::other(format!("{PUBLIC_TOOL_NAME} runner missing stderr"))
})?;
let stdin = child
.stdin
.take()
.ok_or_else(|| std::io::Error::other(format!("{PUBLIC_TOOL_NAME} runner missing stdin")))?;
let stdin = Arc::new(Mutex::new(stdin));
let response_waiters = Arc::new(Mutex::new(HashMap::<
String,
oneshot::Sender<NodeToHostMessage>,
>::new()));
let (tool_call_tx, tool_call_rx) = mpsc::unbounded_channel();
tokio::spawn(async move {
let mut reader = BufReader::new(stderr);
let mut buf = Vec::new();
match reader.read_to_end(&mut buf).await {
Ok(_) => {
let stderr = String::from_utf8_lossy(&buf).trim().to_string();
if !stderr.is_empty() {
warn!("{PUBLIC_TOOL_NAME} runner stderr: {stderr}");
}
}
Err(err) => {
warn!("failed to read {PUBLIC_TOOL_NAME} stderr: {err}");
}
}
});
let stdout_task = tokio::spawn({
let response_waiters = Arc::clone(&response_waiters);
async move {
let mut stdout_lines = BufReader::new(stdout).lines();
loop {
let line = match stdout_lines.next_line().await {
Ok(line) => line,
Err(err) => {
warn!("failed to read {PUBLIC_TOOL_NAME} stdout: {err}");
break;
}
};
let Some(line) = line else {
break;
};
if line.trim().is_empty() {
continue;
}
let message: NodeToHostMessage = match serde_json::from_str(&line) {
Ok(message) => message,
Err(err) => {
warn!("failed to parse {PUBLIC_TOOL_NAME} stdout message: {err}");
break;
}
};
match message {
NodeToHostMessage::ToolCall { tool_call } => {
let _ = tool_call_tx.send(tool_call);
}
message => {
let request_id = message_request_id(&message).to_string();
if let Some(waiter) = response_waiters.lock().await.remove(&request_id) {
let _ = waiter.send(message);
}
}
}
}
response_waiters.lock().await.clear();
}
});
Ok(CodeModeProcess {
child,
stdin,
stdout_task,
response_waiters,
tool_call_rx: Arc::new(Mutex::new(tool_call_rx)),
})
}
async fn write_message(
stdin: &Arc<Mutex<tokio::process::ChildStdin>>,
message: &HostToNodeMessage,
) -> Result<(), std::io::Error> {
let line = serde_json::to_string(message).map_err(std::io::Error::other)?;
let mut stdin = stdin.lock().await;
stdin.write_all(line.as_bytes()).await?;
stdin.write_all(b"\n").await?;
stdin.flush().await?;
Ok(())
}
fn message_request_id(message: &NodeToHostMessage) -> &str {
match message {
NodeToHostMessage::ToolCall { tool_call } => &tool_call.request_id,
NodeToHostMessage::Yielded { request_id, .. }
| NodeToHostMessage::Terminated { request_id, .. }
| NodeToHostMessage::Result { request_id, .. } => request_id,
}
}
fn prepend_script_status(
content_items: &mut Vec<FunctionCallOutputContentItem>,
status: CodeModeExecutionStatus,
wall_time: Duration,
) {
let wall_time_seconds = ((wall_time.as_secs_f32()) * 10.0).round() / 10.0;
let header = format!(
"{}\nWall time {wall_time_seconds:.1} seconds\nOutput:\n",
match status {
CodeModeExecutionStatus::Completed => "Script completed".to_string(),
CodeModeExecutionStatus::Failed => "Script failed".to_string(),
CodeModeExecutionStatus::Running(session_id) => {
format!("Script running with session ID {session_id}")
}
CodeModeExecutionStatus::Terminated => "Script terminated".to_string(),
}
);
content_items.insert(0, FunctionCallOutputContentItem::InputText { text: header });
}
fn build_source(user_code: &str, enabled_tools: &[EnabledTool]) -> Result<String, String> {
let enabled_tools_json = serde_json::to_string(enabled_tools)
.map_err(|err| format!("failed to serialize enabled tools: {err}"))?;
Ok(CODE_MODE_BRIDGE_SOURCE
.replace(
"__CODE_MODE_ENABLED_TOOLS_PLACEHOLDER__",
&enabled_tools_json,
)
.replace("__CODE_MODE_USER_CODE_PLACEHOLDER__", user_code))
}
fn truncate_code_mode_result(
items: Vec<FunctionCallOutputContentItem>,
max_output_tokens_per_exec_call: Option<usize>,
) -> Vec<FunctionCallOutputContentItem> {
let max_output_tokens = resolve_max_tokens(max_output_tokens_per_exec_call);
let policy = TruncationPolicy::Tokens(max_output_tokens);
if items
.iter()
.all(|item| matches!(item, FunctionCallOutputContentItem::InputText { .. }))
{
let (truncated_items, _) =
formatted_truncate_text_content_items_with_policy(&items, policy);
return truncated_items;
}
truncate_function_output_items_with_policy(&items, policy)
}
async fn build_enabled_tools(exec: &ExecContext) -> Vec<EnabledTool> {
let router = build_nested_router(exec).await;
let mut out = router
.specs()
.into_iter()
.map(|spec| augment_tool_spec_for_code_mode(spec, true))
.filter_map(enabled_tool_from_spec)
.collect::<Vec<_>>();
out.sort_by(|left, right| left.tool_name.cmp(&right.tool_name));
out.dedup_by(|left, right| left.tool_name == right.tool_name);
out
}
fn enabled_tool_from_spec(spec: ToolSpec) -> Option<EnabledTool> {
let tool_name = spec.name().to_string();
if tool_name == PUBLIC_TOOL_NAME || tool_name == WAIT_TOOL_NAME {
return None;
}
let reference = code_mode_tool_reference(&tool_name);
let (description, kind) = match spec {
ToolSpec::Function(tool) => (tool.description, CodeModeToolKind::Function),
ToolSpec::Freeform(tool) => (tool.description, CodeModeToolKind::Freeform),
ToolSpec::LocalShell {}
| ToolSpec::ImageGeneration { .. }
| ToolSpec::ToolSearch { .. }
| ToolSpec::WebSearch { .. } => {
return None;
}
};
Some(EnabledTool {
tool_name,
module_path: reference.module_path,
namespace: reference.namespace,
name: reference.tool_key,
description,
kind,
})
}
async fn build_nested_router(exec: &ExecContext) -> ToolRouter {
let nested_tools_config = exec.turn.tools_config.for_code_mode_nested_tools();
let mcp_tools = exec
.session
.services
.mcp_connection_manager
.read()
.await
.list_all_tools()
.await
.into_iter()
.map(|(name, tool_info)| (name, tool_info.tool))
.collect();
ToolRouter::from_config(
&nested_tools_config,
ToolRouterParams {
mcp_tools: Some(mcp_tools),
app_tools: None,
discoverable_tools: None,
dynamic_tools: exec.turn.dynamic_tools.as_slice(),
},
)
}
async fn call_nested_tool(
exec: ExecContext,
tool_name: String,
input: Option<JsonValue>,
) -> JsonValue {
if tool_name == PUBLIC_TOOL_NAME {
return JsonValue::String(format!("{PUBLIC_TOOL_NAME} cannot invoke itself"));
}
let router = build_nested_router(&exec).await;
let specs = router.specs();
let payload =
if let Some((server, tool)) = exec.session.parse_mcp_tool_name(&tool_name, &None).await {
match serialize_function_tool_arguments(&tool_name, input) {
Ok(raw_arguments) => ToolPayload::Mcp {
server,
tool,
raw_arguments,
},
Err(error) => return JsonValue::String(error),
}
} else {
match build_nested_tool_payload(&specs, &tool_name, input) {
Ok(payload) => payload,
Err(error) => return JsonValue::String(error),
}
};
let call = ToolCall {
tool_name: tool_name.clone(),
call_id: format!("{PUBLIC_TOOL_NAME}-{}", uuid::Uuid::new_v4()),
tool_namespace: None,
payload,
};
let result = router
.dispatch_tool_call_with_code_mode_result(
Arc::clone(&exec.session),
Arc::clone(&exec.turn),
Arc::clone(&exec.tracker),
call,
ToolCallSource::CodeMode,
)
.await;
match result {
Ok(result) => result.code_mode_result(),
Err(error) => JsonValue::String(error.to_string()),
}
}
fn tool_kind_for_spec(spec: &ToolSpec) -> CodeModeToolKind {
if matches!(spec, ToolSpec::Freeform(_)) {
CodeModeToolKind::Freeform
} else {
CodeModeToolKind::Function
}
}
fn tool_kind_for_name(specs: &[ToolSpec], tool_name: &str) -> Result<CodeModeToolKind, String> {
specs
.iter()
.find(|spec| spec.name() == tool_name)
.map(tool_kind_for_spec)
.ok_or_else(|| format!("tool `{tool_name}` is not enabled in {PUBLIC_TOOL_NAME}"))
}
fn build_nested_tool_payload(
specs: &[ToolSpec],
tool_name: &str,
input: Option<JsonValue>,
) -> Result<ToolPayload, String> {
let actual_kind = tool_kind_for_name(specs, tool_name)?;
match actual_kind {
CodeModeToolKind::Function => build_function_tool_payload(tool_name, input),
CodeModeToolKind::Freeform => build_freeform_tool_payload(tool_name, input),
}
}
fn build_function_tool_payload(
tool_name: &str,
input: Option<JsonValue>,
) -> Result<ToolPayload, String> {
let arguments = serialize_function_tool_arguments(tool_name, input)?;
Ok(ToolPayload::Function { arguments })
}
fn serialize_function_tool_arguments(
tool_name: &str,
input: Option<JsonValue>,
) -> Result<String, String> {
match input {
None => Ok("{}".to_string()),
Some(JsonValue::Object(map)) => serde_json::to_string(&JsonValue::Object(map))
.map_err(|err| format!("failed to serialize tool `{tool_name}` arguments: {err}")),
Some(_) => Err(format!(
"tool `{tool_name}` expects a JSON object for arguments"
)),
}
}
fn build_freeform_tool_payload(
tool_name: &str,
input: Option<JsonValue>,
) -> Result<ToolPayload, String> {
match input {
Some(JsonValue::String(input)) => Ok(ToolPayload::Custom { input }),
_ => Err(format!("tool `{tool_name}` expects a string input")),
}
}
fn output_content_items_from_json_values(
content_items: Vec<JsonValue>,
) -> Result<Vec<FunctionCallOutputContentItem>, String> {
content_items
.into_iter()
.enumerate()
.map(|(index, item)| {
serde_json::from_value(item).map_err(|err| {
format!("invalid {PUBLIC_TOOL_NAME} content item at index {index}: {err}")
})
})
.collect()
}

View File

@@ -0,0 +1,111 @@
use async_trait::async_trait;
use crate::codex::Session;
use crate::codex::TurnContext;
use crate::function_tool::FunctionCallError;
use crate::tools::context::FunctionToolOutput;
use crate::tools::context::SharedTurnDiffTracker;
use crate::tools::context::ToolInvocation;
use crate::tools::context::ToolPayload;
use crate::tools::registry::ToolHandler;
use crate::tools::registry::ToolKind;
use super::CodeModeSessionProgress;
use super::ExecContext;
use super::PUBLIC_TOOL_NAME;
use super::build_enabled_tools;
use super::handle_node_message;
use super::protocol::HostToNodeMessage;
use super::protocol::build_source;
pub struct CodeModeExecuteHandler;
impl CodeModeExecuteHandler {
async fn execute(
&self,
session: std::sync::Arc<Session>,
turn: std::sync::Arc<TurnContext>,
tracker: SharedTurnDiffTracker,
code: String,
) -> Result<FunctionToolOutput, FunctionCallError> {
let exec = ExecContext {
session,
turn,
tracker,
};
let enabled_tools = build_enabled_tools(&exec).await;
let service = &exec.session.services.code_mode_service;
let stored_values = service.stored_values().await;
let source =
build_source(&code, &enabled_tools).map_err(FunctionCallError::RespondToModel)?;
let session_id = service.allocate_session_id().await;
let request_id = service.allocate_request_id().await;
let process_slot = service
.ensure_started()
.await
.map_err(|err| FunctionCallError::RespondToModel(err.to_string()))?;
let started_at = std::time::Instant::now();
let message = HostToNodeMessage::Start {
request_id: request_id.clone(),
session_id,
enabled_tools,
stored_values,
source,
};
let result = {
let mut process_slot = process_slot;
let Some(process) = process_slot.as_mut() else {
return Err(FunctionCallError::RespondToModel(format!(
"{PUBLIC_TOOL_NAME} runner failed to start"
)));
};
let message = process
.send(&request_id, &message)
.await
.map_err(|err| err.to_string());
let message = match message {
Ok(message) => message,
Err(error) => return Err(FunctionCallError::RespondToModel(error)),
};
handle_node_message(&exec, session_id, message, None, started_at).await
};
match result {
Ok(CodeModeSessionProgress::Finished(output))
| Ok(CodeModeSessionProgress::Yielded { output }) => Ok(output),
Err(error) => Err(FunctionCallError::RespondToModel(error)),
}
}
}
#[async_trait]
impl ToolHandler for CodeModeExecuteHandler {
type Output = FunctionToolOutput;
fn kind(&self) -> ToolKind {
ToolKind::Function
}
fn matches_kind(&self, payload: &ToolPayload) -> bool {
matches!(payload, ToolPayload::Custom { .. })
}
async fn handle(&self, invocation: ToolInvocation) -> Result<Self::Output, FunctionCallError> {
let ToolInvocation {
session,
turn,
tracker,
tool_name,
payload,
..
} = invocation;
match payload {
ToolPayload::Custom { input } if tool_name == PUBLIC_TOOL_NAME => {
self.execute(session, turn, tracker, input).await
}
_ => Err(FunctionCallError::RespondToModel(format!(
"{PUBLIC_TOOL_NAME} expects raw JavaScript source text"
))),
}
}
}

View File

@@ -0,0 +1,399 @@
mod execute_handler;
mod process;
mod protocol;
mod service;
mod wait_handler;
mod worker;
use std::sync::Arc;
use std::time::Duration;
use codex_protocol::models::FunctionCallOutputContentItem;
use serde_json::Value as JsonValue;
use crate::client_common::tools::ToolSpec;
use crate::codex::Session;
use crate::codex::TurnContext;
use crate::config::Config;
use crate::features::Feature;
use crate::tools::ToolRouter;
use crate::tools::code_mode_description::augment_tool_spec_for_code_mode;
use crate::tools::code_mode_description::code_mode_tool_reference;
use crate::tools::context::FunctionToolOutput;
use crate::tools::context::SharedTurnDiffTracker;
use crate::tools::context::ToolPayload;
use crate::tools::router::ToolCall;
use crate::tools::router::ToolCallSource;
use crate::tools::router::ToolRouterParams;
use crate::truncate::TruncationPolicy;
use crate::truncate::formatted_truncate_text_content_items_with_policy;
use crate::truncate::truncate_function_output_items_with_policy;
use crate::unified_exec::resolve_max_tokens;
const CODE_MODE_RUNNER_SOURCE: &str = include_str!("runner.cjs");
const CODE_MODE_BRIDGE_SOURCE: &str = include_str!("bridge.js");
pub(crate) const PUBLIC_TOOL_NAME: &str = "exec";
pub(crate) const WAIT_TOOL_NAME: &str = "exec_wait";
pub(crate) const DEFAULT_WAIT_YIELD_TIME_MS: u64 = 10_000;
#[derive(Clone)]
pub(super) struct ExecContext {
pub(super) session: Arc<Session>,
pub(super) turn: Arc<TurnContext>,
pub(super) tracker: SharedTurnDiffTracker,
}
pub(crate) use execute_handler::CodeModeExecuteHandler;
pub(crate) use service::CodeModeService;
pub(crate) use wait_handler::CodeModeWaitHandler;
enum CodeModeSessionProgress {
Finished(FunctionToolOutput),
Yielded { output: FunctionToolOutput },
}
enum CodeModeExecutionStatus {
Completed,
Failed,
Running(i32),
Terminated,
}
pub(crate) fn instructions(config: &Config) -> Option<String> {
if !config.features.enabled(Feature::CodeMode) {
return None;
}
let mut section = String::from("## Exec\n");
section.push_str(&format!(
"- Use `{PUBLIC_TOOL_NAME}` for JavaScript execution in a Node-backed `node:vm` context.\n",
));
section.push_str(&format!(
"- `{PUBLIC_TOOL_NAME}` is a freeform/custom tool. Direct `{PUBLIC_TOOL_NAME}` calls must send raw JavaScript tool input. Do not wrap code in JSON, quotes, or markdown code fences.\n",
));
section.push_str(&format!(
"- Direct tool calls remain available while `{PUBLIC_TOOL_NAME}` is enabled.\n",
));
section.push_str(&format!(
"- `{PUBLIC_TOOL_NAME}` uses the same Node runtime resolution as `js_repl`. If needed, point `js_repl_node_path` at the Node binary you want Codex to use.\n",
));
section.push_str("- Import nested tools from `tools.js`, for example `import { exec_command } from \"tools.js\"` or `import { ALL_TOOLS } from \"tools.js\"` to inspect the available `{ module, name, description }` entries. Namespaced tools are also available from `tools/<namespace...>.js`; MCP tools use `tools/mcp/<server>.js`, for example `import { append_notebook_logs_chart } from \"tools/mcp/ologs.js\"`. Nested tool calls resolve to their code-mode result values.\n");
section.push_str(&format!(
"- Import `{{ background, output_text, output_image, set_max_output_tokens_per_exec_call, set_yield_time, store, load }}` from `@openai/code_mode` (or `\"openai/code_mode\"`). `output_text(value)` surfaces text back to the model and stringifies non-string objects with `JSON.stringify(...)` when possible. `output_image(imageUrl)` appends an `input_image` content item for `http(s)` or `data:` URLs. `store(key, value)` persists JSON-serializable values across `{PUBLIC_TOOL_NAME}` calls in the current session, and `load(key)` returns a cloned stored value or `undefined`. `set_max_output_tokens_per_exec_call(value)` sets the token budget used to truncate direct `{PUBLIC_TOOL_NAME}` returns; `{WAIT_TOOL_NAME}` uses its own `max_tokens` argument instead and defaults to `10000`. `set_yield_time(value)` asks `{PUBLIC_TOOL_NAME}` to return early if the script is still running after that many milliseconds so `{WAIT_TOOL_NAME}` can resume it later. `background()` returns a yielded `{PUBLIC_TOOL_NAME}` response immediately while the script keeps running in the background. The returned content starts with a separate `Script completed`, `Script failed`, or `Script running with session ID …` text item that includes wall time. When truncation happens, the final text may include `Total output lines:` and the usual `…N tokens truncated…` marker.\n",
));
section.push_str(&format!(
"- If `{PUBLIC_TOOL_NAME}` returns `Script running with session ID …`, call `{WAIT_TOOL_NAME}` with that `session_id` to keep waiting for more output, completion, or termination.\n",
));
section.push_str(
"- Function tools require JSON object arguments. Freeform tools require raw strings.\n",
);
section.push_str("- `add_content(value)` remains available for compatibility. It is synchronous and accepts a content item, an array of content items, or a string. Structured nested-tool results should be converted to text first, for example with `JSON.stringify(...)`.\n");
section
.push_str("- Only content passed to `output_text(...)`, `output_image(...)`, or `add_content(value)` is surfaced back to the model.");
Some(section)
}
async fn handle_node_message(
exec: &ExecContext,
session_id: i32,
message: protocol::NodeToHostMessage,
poll_max_output_tokens: Option<Option<usize>>,
started_at: std::time::Instant,
) -> Result<CodeModeSessionProgress, String> {
match message {
protocol::NodeToHostMessage::ToolCall { .. } => Err(protocol::unexpected_tool_call_error()),
protocol::NodeToHostMessage::Yielded { content_items, .. } => {
let mut delta_items = output_content_items_from_json_values(content_items)?;
delta_items = truncate_code_mode_result(delta_items, poll_max_output_tokens.flatten());
prepend_script_status(
&mut delta_items,
CodeModeExecutionStatus::Running(session_id),
started_at.elapsed(),
);
Ok(CodeModeSessionProgress::Yielded {
output: FunctionToolOutput::from_content(delta_items, Some(true)),
})
}
protocol::NodeToHostMessage::Terminated { content_items, .. } => {
let mut delta_items = output_content_items_from_json_values(content_items)?;
delta_items = truncate_code_mode_result(delta_items, poll_max_output_tokens.flatten());
prepend_script_status(
&mut delta_items,
CodeModeExecutionStatus::Terminated,
started_at.elapsed(),
);
Ok(CodeModeSessionProgress::Finished(
FunctionToolOutput::from_content(delta_items, Some(true)),
))
}
protocol::NodeToHostMessage::Result {
content_items,
stored_values,
error_text,
max_output_tokens_per_exec_call,
..
} => {
exec.session
.services
.code_mode_service
.replace_stored_values(stored_values)
.await;
let mut delta_items = output_content_items_from_json_values(content_items)?;
let success = error_text.is_none();
if let Some(error_text) = error_text {
delta_items.push(FunctionCallOutputContentItem::InputText {
text: format!("Script error:\n{error_text}"),
});
}
let mut delta_items = truncate_code_mode_result(
delta_items,
poll_max_output_tokens.unwrap_or(max_output_tokens_per_exec_call),
);
prepend_script_status(
&mut delta_items,
if success {
CodeModeExecutionStatus::Completed
} else {
CodeModeExecutionStatus::Failed
},
started_at.elapsed(),
);
Ok(CodeModeSessionProgress::Finished(
FunctionToolOutput::from_content(delta_items, Some(success)),
))
}
}
}
fn prepend_script_status(
content_items: &mut Vec<FunctionCallOutputContentItem>,
status: CodeModeExecutionStatus,
wall_time: Duration,
) {
let wall_time_seconds = ((wall_time.as_secs_f32()) * 10.0).round() / 10.0;
let header = format!(
"{}\nWall time {wall_time_seconds:.1} seconds\nOutput:\n",
match status {
CodeModeExecutionStatus::Completed => "Script completed".to_string(),
CodeModeExecutionStatus::Failed => "Script failed".to_string(),
CodeModeExecutionStatus::Running(session_id) => {
format!("Script running with session ID {session_id}")
}
CodeModeExecutionStatus::Terminated => "Script terminated".to_string(),
}
);
content_items.insert(0, FunctionCallOutputContentItem::InputText { text: header });
}
fn truncate_code_mode_result(
items: Vec<FunctionCallOutputContentItem>,
max_output_tokens_per_exec_call: Option<usize>,
) -> Vec<FunctionCallOutputContentItem> {
let max_output_tokens = resolve_max_tokens(max_output_tokens_per_exec_call);
let policy = TruncationPolicy::Tokens(max_output_tokens);
if items
.iter()
.all(|item| matches!(item, FunctionCallOutputContentItem::InputText { .. }))
{
let (truncated_items, _) =
formatted_truncate_text_content_items_with_policy(&items, policy);
return truncated_items;
}
truncate_function_output_items_with_policy(&items, policy)
}
fn output_content_items_from_json_values(
content_items: Vec<JsonValue>,
) -> Result<Vec<FunctionCallOutputContentItem>, String> {
content_items
.into_iter()
.enumerate()
.map(|(index, item)| {
serde_json::from_value(item).map_err(|err| {
format!("invalid {PUBLIC_TOOL_NAME} content item at index {index}: {err}")
})
})
.collect()
}
async fn build_enabled_tools(exec: &ExecContext) -> Vec<protocol::EnabledTool> {
let router = build_nested_router(exec).await;
let mut out = router
.specs()
.into_iter()
.map(|spec| augment_tool_spec_for_code_mode(spec, true))
.filter_map(enabled_tool_from_spec)
.collect::<Vec<_>>();
out.sort_by(|left, right| left.tool_name.cmp(&right.tool_name));
out.dedup_by(|left, right| left.tool_name == right.tool_name);
out
}
fn enabled_tool_from_spec(spec: ToolSpec) -> Option<protocol::EnabledTool> {
let tool_name = spec.name().to_string();
if tool_name == PUBLIC_TOOL_NAME || tool_name == WAIT_TOOL_NAME {
return None;
}
let reference = code_mode_tool_reference(&tool_name);
let (description, kind) = match spec {
ToolSpec::Function(tool) => (tool.description, protocol::CodeModeToolKind::Function),
ToolSpec::Freeform(tool) => (tool.description, protocol::CodeModeToolKind::Freeform),
ToolSpec::LocalShell {}
| ToolSpec::ImageGeneration { .. }
| ToolSpec::ToolSearch { .. }
| ToolSpec::WebSearch { .. } => {
return None;
}
};
Some(protocol::EnabledTool {
tool_name,
module_path: reference.module_path,
namespace: reference.namespace,
name: reference.tool_key,
description,
kind,
})
}
async fn build_nested_router(exec: &ExecContext) -> ToolRouter {
let nested_tools_config = exec.turn.tools_config.for_code_mode_nested_tools();
let mcp_tools = exec
.session
.services
.mcp_connection_manager
.read()
.await
.list_all_tools()
.await
.into_iter()
.map(|(name, tool_info)| (name, tool_info.tool))
.collect();
ToolRouter::from_config(
&nested_tools_config,
ToolRouterParams {
mcp_tools: Some(mcp_tools),
app_tools: None,
discoverable_tools: None,
dynamic_tools: exec.turn.dynamic_tools.as_slice(),
},
)
}
async fn call_nested_tool(
exec: ExecContext,
tool_name: String,
input: Option<JsonValue>,
) -> JsonValue {
if tool_name == PUBLIC_TOOL_NAME {
return JsonValue::String(format!("{PUBLIC_TOOL_NAME} cannot invoke itself"));
}
let router = build_nested_router(&exec).await;
let specs = router.specs();
let payload =
if let Some((server, tool)) = exec.session.parse_mcp_tool_name(&tool_name, &None).await {
match serialize_function_tool_arguments(&tool_name, input) {
Ok(raw_arguments) => ToolPayload::Mcp {
server,
tool,
raw_arguments,
},
Err(error) => return JsonValue::String(error),
}
} else {
match build_nested_tool_payload(&specs, &tool_name, input) {
Ok(payload) => payload,
Err(error) => return JsonValue::String(error),
}
};
let call = ToolCall {
tool_name: tool_name.clone(),
call_id: format!("{PUBLIC_TOOL_NAME}-{}", uuid::Uuid::new_v4()),
tool_namespace: None,
payload,
};
let result = router
.dispatch_tool_call_with_code_mode_result(
exec.session.clone(),
exec.turn.clone(),
exec.tracker.clone(),
call,
ToolCallSource::CodeMode,
)
.await;
match result {
Ok(result) => result.code_mode_result(),
Err(error) => JsonValue::String(error.to_string()),
}
}
fn tool_kind_for_spec(spec: &ToolSpec) -> protocol::CodeModeToolKind {
if matches!(spec, ToolSpec::Freeform(_)) {
protocol::CodeModeToolKind::Freeform
} else {
protocol::CodeModeToolKind::Function
}
}
fn tool_kind_for_name(
specs: &[ToolSpec],
tool_name: &str,
) -> Result<protocol::CodeModeToolKind, String> {
specs
.iter()
.find(|spec| spec.name() == tool_name)
.map(tool_kind_for_spec)
.ok_or_else(|| format!("tool `{tool_name}` is not enabled in {PUBLIC_TOOL_NAME}"))
}
fn build_nested_tool_payload(
specs: &[ToolSpec],
tool_name: &str,
input: Option<JsonValue>,
) -> Result<ToolPayload, String> {
let actual_kind = tool_kind_for_name(specs, tool_name)?;
match actual_kind {
protocol::CodeModeToolKind::Function => build_function_tool_payload(tool_name, input),
protocol::CodeModeToolKind::Freeform => build_freeform_tool_payload(tool_name, input),
}
}
fn build_function_tool_payload(
tool_name: &str,
input: Option<JsonValue>,
) -> Result<ToolPayload, String> {
let arguments = serialize_function_tool_arguments(tool_name, input)?;
Ok(ToolPayload::Function { arguments })
}
fn serialize_function_tool_arguments(
tool_name: &str,
input: Option<JsonValue>,
) -> Result<String, String> {
match input {
None => Ok("{}".to_string()),
Some(JsonValue::Object(map)) => serde_json::to_string(&JsonValue::Object(map))
.map_err(|err| format!("failed to serialize tool `{tool_name}` arguments: {err}")),
Some(_) => Err(format!(
"tool `{tool_name}` expects a JSON object for arguments"
)),
}
}
fn build_freeform_tool_payload(
tool_name: &str,
input: Option<JsonValue>,
) -> Result<ToolPayload, String> {
match input {
Some(JsonValue::String(input)) => Ok(ToolPayload::Custom { input }),
_ => Err(format!("tool `{tool_name}` expects a string input")),
}
}

View File

@@ -0,0 +1,172 @@
use std::collections::HashMap;
use std::sync::Arc;
use tokio::io::AsyncBufReadExt;
use tokio::io::AsyncReadExt;
use tokio::io::AsyncWriteExt;
use tokio::io::BufReader;
use tokio::sync::Mutex;
use tokio::sync::mpsc;
use tokio::sync::oneshot;
use tokio::task::JoinHandle;
use tracing::warn;
use super::CODE_MODE_RUNNER_SOURCE;
use super::PUBLIC_TOOL_NAME;
use super::protocol::CodeModeToolCall;
use super::protocol::HostToNodeMessage;
use super::protocol::NodeToHostMessage;
use super::protocol::message_request_id;
pub(super) struct CodeModeProcess {
pub(super) child: tokio::process::Child,
pub(super) stdin: Arc<Mutex<tokio::process::ChildStdin>>,
pub(super) stdout_task: JoinHandle<()>,
pub(super) response_waiters: Arc<Mutex<HashMap<String, oneshot::Sender<NodeToHostMessage>>>>,
pub(super) tool_call_rx: Arc<Mutex<mpsc::UnboundedReceiver<CodeModeToolCall>>>,
}
impl CodeModeProcess {
pub(super) async fn send(
&mut self,
request_id: &str,
message: &HostToNodeMessage,
) -> Result<NodeToHostMessage, std::io::Error> {
if self.stdout_task.is_finished() {
return Err(std::io::Error::other(format!(
"{PUBLIC_TOOL_NAME} runner is not available"
)));
}
let (tx, rx) = oneshot::channel();
self.response_waiters
.lock()
.await
.insert(request_id.to_string(), tx);
if let Err(err) = write_message(&self.stdin, message).await {
self.response_waiters.lock().await.remove(request_id);
return Err(err);
}
match rx.await {
Ok(message) => Ok(message),
Err(_) => Err(std::io::Error::other(format!(
"{PUBLIC_TOOL_NAME} runner is not available"
))),
}
}
pub(super) fn has_exited(&mut self) -> Result<bool, std::io::Error> {
self.child
.try_wait()
.map(|status| status.is_some())
.map_err(std::io::Error::other)
}
}
pub(super) async fn spawn_code_mode_process(
node_path: &std::path::Path,
) -> Result<CodeModeProcess, std::io::Error> {
let mut cmd = tokio::process::Command::new(node_path);
cmd.arg("--experimental-vm-modules");
cmd.arg("--eval");
cmd.arg(CODE_MODE_RUNNER_SOURCE);
cmd.stdin(std::process::Stdio::piped())
.stdout(std::process::Stdio::piped())
.stderr(std::process::Stdio::piped())
.kill_on_drop(true);
let mut child = cmd.spawn().map_err(std::io::Error::other)?;
let stdout = child.stdout.take().ok_or_else(|| {
std::io::Error::other(format!("{PUBLIC_TOOL_NAME} runner missing stdout"))
})?;
let stderr = child.stderr.take().ok_or_else(|| {
std::io::Error::other(format!("{PUBLIC_TOOL_NAME} runner missing stderr"))
})?;
let stdin = child
.stdin
.take()
.ok_or_else(|| std::io::Error::other(format!("{PUBLIC_TOOL_NAME} runner missing stdin")))?;
let stdin = Arc::new(Mutex::new(stdin));
let response_waiters = Arc::new(Mutex::new(HashMap::<
String,
oneshot::Sender<NodeToHostMessage>,
>::new()));
let (tool_call_tx, tool_call_rx) = mpsc::unbounded_channel();
tokio::spawn(async move {
let mut reader = BufReader::new(stderr);
let mut buf = Vec::new();
match reader.read_to_end(&mut buf).await {
Ok(_) => {
let stderr = String::from_utf8_lossy(&buf).trim().to_string();
if !stderr.is_empty() {
warn!("{PUBLIC_TOOL_NAME} runner stderr: {stderr}");
}
}
Err(err) => {
warn!("failed to read {PUBLIC_TOOL_NAME} stderr: {err}");
}
}
});
let stdout_task = tokio::spawn({
let response_waiters = Arc::clone(&response_waiters);
async move {
let mut stdout_lines = BufReader::new(stdout).lines();
loop {
let line = match stdout_lines.next_line().await {
Ok(line) => line,
Err(err) => {
warn!("failed to read {PUBLIC_TOOL_NAME} stdout: {err}");
break;
}
};
let Some(line) = line else {
break;
};
if line.trim().is_empty() {
continue;
}
let message: NodeToHostMessage = match serde_json::from_str(&line) {
Ok(message) => message,
Err(err) => {
warn!("failed to parse {PUBLIC_TOOL_NAME} stdout message: {err}");
break;
}
};
match message {
NodeToHostMessage::ToolCall { tool_call } => {
let _ = tool_call_tx.send(tool_call);
}
message => {
let request_id = message_request_id(&message).to_string();
if let Some(waiter) = response_waiters.lock().await.remove(&request_id) {
let _ = waiter.send(message);
}
}
}
}
response_waiters.lock().await.clear();
}
});
Ok(CodeModeProcess {
child,
stdin,
stdout_task,
response_waiters,
tool_call_rx: Arc::new(Mutex::new(tool_call_rx)),
})
}
pub(super) async fn write_message(
stdin: &Arc<Mutex<tokio::process::ChildStdin>>,
message: &HostToNodeMessage,
) -> Result<(), std::io::Error> {
let line = serde_json::to_string(message).map_err(std::io::Error::other)?;
let mut stdin = stdin.lock().await;
stdin.write_all(line.as_bytes()).await?;
stdin.write_all(b"\n").await?;
stdin.flush().await?;
Ok(())
}

View File

@@ -0,0 +1,115 @@
use std::collections::HashMap;
use serde::Deserialize;
use serde::Serialize;
use serde_json::Value as JsonValue;
use super::CODE_MODE_BRIDGE_SOURCE;
use super::PUBLIC_TOOL_NAME;
#[derive(Clone, Copy, Debug, Deserialize, Eq, PartialEq, Serialize)]
#[serde(rename_all = "snake_case")]
pub(super) enum CodeModeToolKind {
Function,
Freeform,
}
#[derive(Clone, Debug, Serialize)]
pub(super) struct EnabledTool {
pub(super) tool_name: String,
#[serde(rename = "module")]
pub(super) module_path: String,
pub(super) namespace: Vec<String>,
pub(super) name: String,
pub(super) description: String,
pub(super) kind: CodeModeToolKind,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "snake_case")]
pub(super) struct CodeModeToolCall {
pub(super) request_id: String,
pub(super) id: String,
pub(super) name: String,
#[serde(default)]
pub(super) input: Option<JsonValue>,
}
#[derive(Serialize)]
#[serde(tag = "type", rename_all = "snake_case")]
pub(super) enum HostToNodeMessage {
Start {
request_id: String,
session_id: i32,
enabled_tools: Vec<EnabledTool>,
stored_values: HashMap<String, JsonValue>,
source: String,
},
Poll {
request_id: String,
session_id: i32,
yield_time_ms: u64,
},
Terminate {
request_id: String,
session_id: i32,
},
Response {
request_id: String,
id: String,
code_mode_result: JsonValue,
},
}
#[derive(Deserialize)]
#[serde(tag = "type", rename_all = "snake_case")]
pub(super) enum NodeToHostMessage {
ToolCall {
#[serde(flatten)]
tool_call: CodeModeToolCall,
},
Yielded {
request_id: String,
content_items: Vec<JsonValue>,
},
Terminated {
request_id: String,
content_items: Vec<JsonValue>,
},
Result {
request_id: String,
content_items: Vec<JsonValue>,
stored_values: HashMap<String, JsonValue>,
#[serde(default)]
error_text: Option<String>,
#[serde(default)]
max_output_tokens_per_exec_call: Option<usize>,
},
}
pub(super) fn build_source(
user_code: &str,
enabled_tools: &[EnabledTool],
) -> Result<String, String> {
let enabled_tools_json = serde_json::to_string(enabled_tools)
.map_err(|err| format!("failed to serialize enabled tools: {err}"))?;
Ok(CODE_MODE_BRIDGE_SOURCE
.replace(
"__CODE_MODE_ENABLED_TOOLS_PLACEHOLDER__",
&enabled_tools_json,
)
.replace("__CODE_MODE_USER_CODE_PLACEHOLDER__", user_code))
}
pub(super) fn message_request_id(message: &NodeToHostMessage) -> &str {
match message {
NodeToHostMessage::ToolCall { tool_call } => &tool_call.request_id,
NodeToHostMessage::Yielded { request_id, .. }
| NodeToHostMessage::Terminated { request_id, .. }
| NodeToHostMessage::Result { request_id, .. } => request_id,
}
}
pub(super) fn unexpected_tool_call_error() -> String {
format!("{PUBLIC_TOOL_NAME} received an unexpected tool call response")
}

View File

@@ -0,0 +1,104 @@
use std::collections::HashMap;
use std::path::PathBuf;
use std::sync::Arc;
use serde_json::Value as JsonValue;
use tokio::sync::Mutex;
use tracing::warn;
use crate::codex::Session;
use crate::codex::TurnContext;
use crate::features::Feature;
use crate::tools::context::SharedTurnDiffTracker;
use crate::tools::js_repl::resolve_compatible_node;
use super::ExecContext;
use super::PUBLIC_TOOL_NAME;
use super::process::CodeModeProcess;
use super::process::spawn_code_mode_process;
use super::worker::CodeModeWorker;
pub(crate) struct CodeModeService {
js_repl_node_path: Option<PathBuf>,
stored_values: Mutex<HashMap<String, JsonValue>>,
process: Arc<Mutex<Option<CodeModeProcess>>>,
next_session_id: Mutex<i32>,
}
impl CodeModeService {
pub(crate) fn new(js_repl_node_path: Option<PathBuf>) -> Self {
Self {
js_repl_node_path,
stored_values: Mutex::new(HashMap::new()),
process: Arc::new(Mutex::new(None)),
next_session_id: Mutex::new(1),
}
}
pub(crate) async fn stored_values(&self) -> HashMap<String, JsonValue> {
self.stored_values.lock().await.clone()
}
pub(crate) async fn replace_stored_values(&self, values: HashMap<String, JsonValue>) {
*self.stored_values.lock().await = values;
}
pub(super) async fn ensure_started(
&self,
) -> Result<tokio::sync::OwnedMutexGuard<Option<CodeModeProcess>>, std::io::Error> {
let mut process_slot = self.process.lock().await;
let needs_spawn = match process_slot.as_mut() {
Some(process) => !matches!(process.has_exited(), Ok(false)),
None => true,
};
if needs_spawn {
let node_path = resolve_compatible_node(self.js_repl_node_path.as_deref())
.await
.map_err(std::io::Error::other)?;
*process_slot = Some(spawn_code_mode_process(&node_path).await?);
}
drop(process_slot);
Ok(self.process.clone().lock_owned().await)
}
pub(crate) async fn start_turn_worker(
&self,
session: &Arc<Session>,
turn: &Arc<TurnContext>,
tracker: &SharedTurnDiffTracker,
) -> Option<CodeModeWorker> {
if !turn.features.enabled(Feature::CodeMode) {
return None;
}
let exec = ExecContext {
session: Arc::clone(session),
turn: Arc::clone(turn),
tracker: Arc::clone(tracker),
};
let mut process_slot = match self.ensure_started().await {
Ok(process_slot) => process_slot,
Err(err) => {
warn!("failed to start {PUBLIC_TOOL_NAME} worker for turn: {err}");
return None;
}
};
let Some(process) = process_slot.as_mut() else {
warn!(
"failed to start {PUBLIC_TOOL_NAME} worker for turn: {PUBLIC_TOOL_NAME} runner failed to start"
);
return None;
};
Some(process.worker(exec))
}
pub(crate) async fn allocate_session_id(&self) -> i32 {
let mut next_session_id = self.next_session_id.lock().await;
let session_id = *next_session_id;
*next_session_id = next_session_id.saturating_add(1);
session_id
}
pub(crate) async fn allocate_request_id(&self) -> String {
uuid::Uuid::new_v4().to_string()
}
}

View File

@@ -0,0 +1,137 @@
use async_trait::async_trait;
use serde::Deserialize;
use crate::function_tool::FunctionCallError;
use crate::tools::context::FunctionToolOutput;
use crate::tools::context::ToolInvocation;
use crate::tools::context::ToolPayload;
use crate::tools::registry::ToolHandler;
use crate::tools::registry::ToolKind;
use super::CodeModeSessionProgress;
use super::DEFAULT_WAIT_YIELD_TIME_MS;
use super::ExecContext;
use super::PUBLIC_TOOL_NAME;
use super::WAIT_TOOL_NAME;
use super::handle_node_message;
use super::protocol::HostToNodeMessage;
pub struct CodeModeWaitHandler;
#[derive(Debug, Deserialize)]
struct ExecWaitArgs {
session_id: i32,
#[serde(default = "default_wait_yield_time_ms")]
yield_time_ms: u64,
#[serde(default)]
max_tokens: Option<usize>,
#[serde(default)]
terminate: bool,
}
fn default_wait_yield_time_ms() -> u64 {
DEFAULT_WAIT_YIELD_TIME_MS
}
fn parse_arguments<T>(arguments: &str) -> Result<T, FunctionCallError>
where
T: for<'de> Deserialize<'de>,
{
serde_json::from_str(arguments).map_err(|err| {
FunctionCallError::RespondToModel(format!("failed to parse function arguments: {err}"))
})
}
#[async_trait]
impl ToolHandler for CodeModeWaitHandler {
type Output = FunctionToolOutput;
fn kind(&self) -> ToolKind {
ToolKind::Function
}
async fn handle(&self, invocation: ToolInvocation) -> Result<Self::Output, FunctionCallError> {
let ToolInvocation {
session,
turn,
tracker,
tool_name,
payload,
..
} = invocation;
match payload {
ToolPayload::Function { arguments } if tool_name == WAIT_TOOL_NAME => {
let args: ExecWaitArgs = parse_arguments(&arguments)?;
let exec = ExecContext {
session,
turn,
tracker,
};
let request_id = exec
.session
.services
.code_mode_service
.allocate_request_id()
.await;
let started_at = std::time::Instant::now();
let message = if args.terminate {
HostToNodeMessage::Terminate {
request_id: request_id.clone(),
session_id: args.session_id,
}
} else {
HostToNodeMessage::Poll {
request_id: request_id.clone(),
session_id: args.session_id,
yield_time_ms: args.yield_time_ms,
}
};
let process_slot = exec
.session
.services
.code_mode_service
.ensure_started()
.await
.map_err(|err| FunctionCallError::RespondToModel(err.to_string()))?;
let result = {
let mut process_slot = process_slot;
let Some(process) = process_slot.as_mut() else {
return Err(FunctionCallError::RespondToModel(format!(
"{PUBLIC_TOOL_NAME} runner failed to start"
)));
};
if !matches!(process.has_exited(), Ok(false)) {
return Err(FunctionCallError::RespondToModel(format!(
"{PUBLIC_TOOL_NAME} runner failed to start"
)));
}
let message = process
.send(&request_id, &message)
.await
.map_err(|err| err.to_string());
let message = match message {
Ok(message) => message,
Err(error) => return Err(FunctionCallError::RespondToModel(error)),
};
handle_node_message(
&exec,
args.session_id,
message,
Some(args.max_tokens),
started_at,
)
.await
};
match result {
Ok(CodeModeSessionProgress::Finished(output))
| Ok(CodeModeSessionProgress::Yielded { output }) => Ok(output),
Err(error) => Err(FunctionCallError::RespondToModel(error)),
}
}
_ => Err(FunctionCallError::RespondToModel(format!(
"{WAIT_TOOL_NAME} expects JSON arguments"
))),
}
}
}

View File

@@ -0,0 +1,59 @@
use tokio::sync::oneshot;
use tracing::warn;
use super::ExecContext;
use super::PUBLIC_TOOL_NAME;
use super::call_nested_tool;
use super::process::CodeModeProcess;
use super::process::write_message;
use super::protocol::HostToNodeMessage;
pub(crate) struct CodeModeWorker {
shutdown_tx: Option<oneshot::Sender<()>>,
}
impl Drop for CodeModeWorker {
fn drop(&mut self) {
if let Some(shutdown_tx) = self.shutdown_tx.take() {
let _ = shutdown_tx.send(());
}
}
}
impl CodeModeProcess {
pub(super) fn worker(&self, exec: ExecContext) -> CodeModeWorker {
let (shutdown_tx, mut shutdown_rx) = oneshot::channel();
let stdin = self.stdin.clone();
let tool_call_rx = self.tool_call_rx.clone();
tokio::spawn(async move {
loop {
let tool_call = tokio::select! {
_ = &mut shutdown_rx => break,
tool_call = async {
let mut tool_call_rx = tool_call_rx.lock().await;
tool_call_rx.recv().await
} => tool_call,
};
let Some(tool_call) = tool_call else {
break;
};
let exec = exec.clone();
let stdin = stdin.clone();
tokio::spawn(async move {
let response = HostToNodeMessage::Response {
request_id: tool_call.request_id,
id: tool_call.id,
code_mode_result: call_nested_tool(exec, tool_call.name, tool_call.input)
.await,
};
if let Err(err) = write_message(&stdin, &response).await {
warn!("failed to write {PUBLIC_TOOL_NAME} tool response: {err}");
}
});
}
});
CodeModeWorker {
shutdown_tx: Some(shutdown_tx),
}
}
}

View File

@@ -1,104 +0,0 @@
use async_trait::async_trait;
use serde::Deserialize;
use crate::function_tool::FunctionCallError;
use crate::tools::code_mode;
use crate::tools::code_mode::DEFAULT_WAIT_YIELD_TIME_MS;
use crate::tools::code_mode::PUBLIC_TOOL_NAME;
use crate::tools::code_mode::WAIT_TOOL_NAME;
use crate::tools::context::FunctionToolOutput;
use crate::tools::context::ToolInvocation;
use crate::tools::context::ToolPayload;
use crate::tools::handlers::parse_arguments;
use crate::tools::registry::ToolHandler;
use crate::tools::registry::ToolKind;
pub struct CodeModeHandler;
pub struct CodeModeWaitHandler;
#[derive(Debug, Deserialize)]
struct ExecWaitArgs {
session_id: i32,
#[serde(default = "default_wait_yield_time_ms")]
yield_time_ms: u64,
#[serde(default)]
max_tokens: Option<usize>,
#[serde(default)]
terminate: bool,
}
fn default_wait_yield_time_ms() -> u64 {
DEFAULT_WAIT_YIELD_TIME_MS
}
#[async_trait]
impl ToolHandler for CodeModeHandler {
type Output = FunctionToolOutput;
fn kind(&self) -> ToolKind {
ToolKind::Function
}
fn matches_kind(&self, payload: &ToolPayload) -> bool {
matches!(payload, ToolPayload::Custom { .. })
}
async fn handle(&self, invocation: ToolInvocation) -> Result<Self::Output, FunctionCallError> {
let ToolInvocation {
session,
turn,
tracker,
tool_name,
payload,
..
} = invocation;
match payload {
ToolPayload::Custom { input } if tool_name == PUBLIC_TOOL_NAME => {
code_mode::execute(session, turn, tracker, input).await
}
_ => Err(FunctionCallError::RespondToModel(format!(
"{PUBLIC_TOOL_NAME} expects raw JavaScript source text"
))),
}
}
}
#[async_trait]
impl ToolHandler for CodeModeWaitHandler {
type Output = FunctionToolOutput;
fn kind(&self) -> ToolKind {
ToolKind::Function
}
async fn handle(&self, invocation: ToolInvocation) -> Result<Self::Output, FunctionCallError> {
let ToolInvocation {
session,
turn,
tracker,
tool_name,
payload,
..
} = invocation;
match payload {
ToolPayload::Function { arguments } if tool_name == WAIT_TOOL_NAME => {
let args: ExecWaitArgs = parse_arguments(&arguments)?;
code_mode::wait(
session,
turn,
tracker,
args.session_id,
args.yield_time_ms,
args.max_tokens,
args.terminate,
)
.await
}
_ => Err(FunctionCallError::RespondToModel(format!(
"{WAIT_TOOL_NAME} expects JSON arguments"
))),
}
}
}

View File

@@ -1,7 +1,6 @@
pub(crate) mod agent_jobs;
pub mod apply_patch;
mod artifacts;
mod code_mode;
mod dynamic;
mod grep_files;
mod js_repl;
@@ -32,10 +31,10 @@ use crate::function_tool::FunctionCallError;
use crate::sandboxing::SandboxPermissions;
use crate::sandboxing::merge_permission_profiles;
use crate::sandboxing::normalize_additional_permissions;
pub(crate) use crate::tools::code_mode::CodeModeExecuteHandler;
pub(crate) use crate::tools::code_mode::CodeModeWaitHandler;
pub use apply_patch::ApplyPatchHandler;
pub use artifacts::ArtifactsHandler;
pub use code_mode::CodeModeHandler;
pub use code_mode::CodeModeWaitHandler;
use codex_protocol::models::PermissionProfile;
use codex_protocol::protocol::AskForApproval;
pub use dynamic::DynamicToolHandler;

View File

@@ -2295,7 +2295,7 @@ pub(crate) fn build_specs_with_discoverable_tools(
) -> ToolRegistryBuilder {
use crate::tools::handlers::ApplyPatchHandler;
use crate::tools::handlers::ArtifactsHandler;
use crate::tools::handlers::CodeModeHandler;
use crate::tools::handlers::CodeModeExecuteHandler;
use crate::tools::handlers::CodeModeWaitHandler;
use crate::tools::handlers::DynamicToolHandler;
use crate::tools::handlers::GrepFilesHandler;
@@ -2334,7 +2334,7 @@ pub(crate) fn build_specs_with_discoverable_tools(
default_mode_request_user_input: config.default_mode_request_user_input,
});
let tool_suggest_handler = Arc::new(ToolSuggestHandler);
let code_mode_handler = Arc::new(CodeModeHandler);
let code_mode_handler = Arc::new(CodeModeExecuteHandler);
let code_mode_wait_handler = Arc::new(CodeModeWaitHandler);
let js_repl_handler = Arc::new(JsReplHandler);
let js_repl_reset_handler = Arc::new(JsReplResetHandler);

View File

@@ -22,7 +22,6 @@ anyhow = { workspace = true }
uds_windows = { workspace = true }
[dev-dependencies]
assert_cmd = { workspace = true }
codex-utils-cargo-bin = { workspace = true }
pretty_assertions = { workspace = true }
tempfile = { workspace = true }

View File

@@ -39,9 +39,13 @@ pub fn run(socket_path: &Path) -> anyhow::Result<()> {
io::copy(&mut handle, &mut stream).context("failed to copy data from stdin to socket")?;
}
stream
.shutdown(Shutdown::Write)
.context("failed to shutdown socket writer")?;
// The peer can close immediately after sending its response; in that race,
// half-closing our write side can report NotConnected on some platforms.
if let Err(err) = stream.shutdown(Shutdown::Write)
&& err.kind() != io::ErrorKind::NotConnected
{
return Err(err).context("failed to shutdown socket writer");
}
let stdout_result = stdout_thread
.join()

View File

@@ -1,12 +1,15 @@
use std::io::ErrorKind;
use std::io::Read;
use std::io::Write;
use std::process::Command;
use std::process::Stdio;
use std::sync::mpsc;
use std::thread;
use std::time::Duration;
use std::time::Instant;
use anyhow::Context;
use assert_cmd::Command;
use anyhow::anyhow;
use pretty_assertions::assert_eq;
#[cfg(unix)]
@@ -17,8 +20,18 @@ use uds_windows::UnixListener;
#[test]
fn pipes_stdin_and_stdout_through_socket() -> anyhow::Result<()> {
// This test intentionally avoids `read_to_end()` on the server side because
// waiting for EOF can race with socket half-close behavior on slower runners.
// Reading the exact request length keeps the test deterministic.
//
// We also use `std::process::Command` (instead of `assert_cmd`) so we can
// poll/kill on timeout and include incremental server events + stderr in
// failure output, which makes flaky failures actionable to debug.
let dir = tempfile::TempDir::new().context("failed to create temp dir")?;
let socket_path = dir.path().join("socket");
let request = b"request";
let request_path = dir.path().join("request.txt");
std::fs::write(&request_path, request).context("failed to write child stdin fixture")?;
let listener = match UnixListener::bind(&socket_path) {
Ok(listener) => listener,
Err(err) if err.kind() == ErrorKind::PermissionDenied => {
@@ -31,37 +44,103 @@ fn pipes_stdin_and_stdout_through_socket() -> anyhow::Result<()> {
};
let (tx, rx) = mpsc::channel();
let (event_tx, event_rx) = mpsc::channel();
let server_thread = thread::spawn(move || -> anyhow::Result<()> {
let _ = event_tx.send("waiting for accept".to_string());
let (mut connection, _) = listener
.accept()
.context("failed to accept test connection")?;
let mut received = Vec::new();
let _ = event_tx.send("accepted connection".to_string());
let mut received = vec![0; request.len()];
connection
.read_to_end(&mut received)
.read_exact(&mut received)
.context("failed to read data from client")?;
let _ = event_tx.send(format!("read {} bytes", received.len()));
tx.send(received)
.map_err(|_| anyhow::anyhow!("failed to send received bytes to test thread"))?;
.map_err(|_| anyhow!("failed to send received bytes to test thread"))?;
connection
.write_all(b"response")
.context("failed to write response to client")?;
let _ = event_tx.send("wrote response".to_string());
Ok(())
});
Command::new(codex_utils_cargo_bin::cargo_bin("codex-stdio-to-uds")?)
let stdin = std::fs::File::open(&request_path).context("failed to open child stdin fixture")?;
let mut child = Command::new(codex_utils_cargo_bin::cargo_bin("codex-stdio-to-uds")?)
.arg(&socket_path)
.write_stdin("request")
.assert()
.success()
.stdout("response");
.stdin(Stdio::from(stdin))
.stdout(Stdio::piped())
.stderr(Stdio::piped())
.spawn()
.context("failed to spawn codex-stdio-to-uds")?;
let mut child_stdout = child.stdout.take().context("missing child stdout")?;
let mut child_stderr = child.stderr.take().context("missing child stderr")?;
let (stdout_tx, stdout_rx) = mpsc::channel();
let (stderr_tx, stderr_rx) = mpsc::channel();
thread::spawn(move || {
let mut stdout = Vec::new();
let result = child_stdout.read_to_end(&mut stdout).map(|_| stdout);
let _ = stdout_tx.send(result);
});
thread::spawn(move || {
let mut stderr = Vec::new();
let result = child_stderr.read_to_end(&mut stderr).map(|_| stderr);
let _ = stderr_tx.send(result);
});
let mut server_events = Vec::new();
let deadline = Instant::now() + Duration::from_secs(5);
let status = loop {
while let Ok(event) = event_rx.try_recv() {
server_events.push(event);
}
if let Some(status) = child.try_wait().context("failed to poll child status")? {
break status;
}
if Instant::now() >= deadline {
let _ = child.kill();
let _ = child.wait();
let stderr = stderr_rx
.recv_timeout(Duration::from_secs(1))
.context("timed out waiting for child stderr after kill")?
.context("failed to read child stderr")?;
anyhow::bail!(
"codex-stdio-to-uds did not exit in time; server events: {:?}; stderr: {}",
server_events,
String::from_utf8_lossy(&stderr).trim_end()
);
}
thread::sleep(Duration::from_millis(25));
};
let stdout = stdout_rx
.recv_timeout(Duration::from_secs(1))
.context("timed out waiting for child stdout")?
.context("failed to read child stdout")?;
let stderr = stderr_rx
.recv_timeout(Duration::from_secs(1))
.context("timed out waiting for child stderr")?
.context("failed to read child stderr")?;
assert!(
status.success(),
"codex-stdio-to-uds exited with {status}; server events: {:?}; stderr: {}",
server_events,
String::from_utf8_lossy(&stderr).trim_end()
);
assert_eq!(stdout, b"response");
let received = rx
.recv_timeout(Duration::from_secs(1))
.context("server did not receive data in time")?;
assert_eq!(received, b"request");
assert_eq!(received, request);
let server_result = server_thread
.join()
.map_err(|_| anyhow::anyhow!("server thread panicked"))?;
.map_err(|_| anyhow!("server thread panicked"))?;
server_result.context("server failed")?;
Ok(())

View File

@@ -0,0 +1,380 @@
from __future__ import annotations
import importlib
import importlib.util
import json
import os
import platform
import shutil
import subprocess
import sys
import tarfile
import tempfile
import urllib.error
import urllib.request
import zipfile
from pathlib import Path
PACKAGE_NAME = "codex-cli-bin"
RUNTIME_VERSION_ENV_VAR = "CODEX_PYTHON_RUNTIME_VERSION"
REPO_SLUG = "openai/codex"
class RuntimeSetupError(RuntimeError):
pass
def configured_runtime_version() -> str | None:
value = os.environ.get(RUNTIME_VERSION_ENV_VAR)
if value is None:
return None
normalized = value.strip()
if not normalized:
raise RuntimeSetupError(
f"{RUNTIME_VERSION_ENV_VAR} is set but empty. "
"Set it to a release version like 0.115.0-alpha.11."
)
return normalized
def required_runtime_version() -> str:
version = configured_runtime_version()
if version is not None:
return version
raise RuntimeSetupError(
f"Set {RUNTIME_VERSION_ENV_VAR}=<version> so repo-local examples and real "
"integration coverage can install the pinned codex-cli-bin runtime package."
)
def ensure_runtime_package_installed(
python_executable: str | Path,
sdk_python_dir: Path,
runtime_version: str | None = None,
install_target: Path | None = None,
) -> str:
requested_version = runtime_version or configured_runtime_version()
installed_version = None
if install_target is None:
installed_version = _installed_runtime_version(python_executable)
normalized_requested = (
_normalized_package_version(requested_version) if requested_version is not None else None
)
if requested_version is None:
if install_target is not None:
raise RuntimeSetupError(
f"{RUNTIME_VERSION_ENV_VAR} is required when installing {PACKAGE_NAME} "
"into an isolated target directory."
)
if installed_version is None:
raise RuntimeSetupError(
f"Unable to locate {PACKAGE_NAME} in {python_executable}.\n"
f"Install {PACKAGE_NAME} first, or set {RUNTIME_VERSION_ENV_VAR}=<version> "
"to download a matching release codex artifact and install a local runtime wheel."
)
return installed_version
if installed_version is not None and _normalized_package_version(installed_version) == normalized_requested:
return requested_version
with tempfile.TemporaryDirectory(prefix="codex-python-runtime-") as temp_root_str:
temp_root = Path(temp_root_str)
archive_path = _download_release_archive(requested_version, temp_root)
runtime_binary = _extract_runtime_binary(archive_path, temp_root)
staged_runtime_dir = _stage_runtime_package(
sdk_python_dir,
requested_version,
runtime_binary,
temp_root / "runtime-stage",
)
_install_runtime_package(python_executable, staged_runtime_dir, install_target)
if install_target is not None:
return requested_version
if Path(python_executable).resolve() == Path(sys.executable).resolve():
importlib.invalidate_caches()
installed_version = _installed_runtime_version(python_executable)
if installed_version is None or _normalized_package_version(installed_version) != normalized_requested:
raise RuntimeSetupError(
f"Expected {PACKAGE_NAME} {requested_version} in {python_executable}, "
f"but found {installed_version!r} after installation."
)
return requested_version
def platform_asset_name() -> str:
system = platform.system().lower()
machine = platform.machine().lower()
if system == "darwin":
if machine in {"arm64", "aarch64"}:
return "codex-aarch64-apple-darwin.tar.gz"
if machine in {"x86_64", "amd64"}:
return "codex-x86_64-apple-darwin.tar.gz"
elif system == "linux":
if machine in {"aarch64", "arm64"}:
return "codex-aarch64-unknown-linux-musl.tar.gz"
if machine in {"x86_64", "amd64"}:
return "codex-x86_64-unknown-linux-musl.tar.gz"
elif system == "windows":
if machine in {"aarch64", "arm64"}:
return "codex-aarch64-pc-windows-msvc.exe.zip"
if machine in {"x86_64", "amd64"}:
return "codex-x86_64-pc-windows-msvc.exe.zip"
raise RuntimeSetupError(
f"Unsupported runtime artifact platform: system={platform.system()!r}, "
f"machine={platform.machine()!r}"
)
def runtime_binary_name() -> str:
return "codex.exe" if platform.system().lower() == "windows" else "codex"
def _installed_runtime_version(python_executable: str | Path) -> str | None:
snippet = (
"import importlib.metadata, json, sys\n"
"try:\n"
" from codex_cli_bin import bundled_codex_path\n"
" bundled_codex_path()\n"
" print(json.dumps({'version': importlib.metadata.version('codex-cli-bin')}))\n"
"except Exception:\n"
" sys.exit(1)\n"
)
result = subprocess.run(
[str(python_executable), "-c", snippet],
text=True,
capture_output=True,
check=False,
)
if result.returncode != 0:
return None
return json.loads(result.stdout)["version"]
def _release_metadata(version: str) -> dict[str, object]:
url = f"https://api.github.com/repos/{REPO_SLUG}/releases/tags/rust-v{version}"
request = urllib.request.Request(
url,
headers=_github_api_headers("application/vnd.github+json"),
)
try:
with urllib.request.urlopen(request) as response:
return json.load(response)
except urllib.error.HTTPError as exc:
raise RuntimeSetupError(
f"Failed to resolve release metadata for rust-v{version} from {REPO_SLUG}: "
f"{exc.code} {exc.reason}"
) from exc
def _download_release_archive(version: str, temp_root: Path) -> Path:
asset_name = platform_asset_name()
metadata = _release_metadata(version)
assets = metadata.get("assets")
if not isinstance(assets, list):
raise RuntimeSetupError(f"Release rust-v{version} returned malformed assets metadata.")
asset = next(
(
item
for item in assets
if isinstance(item, dict) and item.get("name") == asset_name
),
None,
)
if asset is None:
raise RuntimeSetupError(
f"Release rust-v{version} does not contain asset {asset_name} for this platform."
)
archive_path = temp_root / asset_name
api_url = asset.get("url")
browser_download_url = asset.get("browser_download_url")
if not isinstance(api_url, str):
api_url = None
if not isinstance(browser_download_url, str):
browser_download_url = None
if api_url is not None:
token = _github_token()
if token is not None:
request = urllib.request.Request(
api_url,
headers=_github_api_headers("application/octet-stream"),
)
try:
with urllib.request.urlopen(request) as response, archive_path.open("wb") as fh:
shutil.copyfileobj(response, fh)
return archive_path
except urllib.error.HTTPError:
pass
if browser_download_url is not None:
request = urllib.request.Request(
browser_download_url,
headers={"User-Agent": "codex-python-runtime-setup"},
)
try:
with urllib.request.urlopen(request) as response, archive_path.open("wb") as fh:
shutil.copyfileobj(response, fh)
return archive_path
except urllib.error.HTTPError:
pass
if shutil.which("gh") is None:
raise RuntimeSetupError(
f"Unable to download {asset_name} for rust-v{version}. "
"Provide GH_TOKEN/GITHUB_TOKEN or install/authenticate GitHub CLI."
)
try:
subprocess.run(
[
"gh",
"release",
"download",
f"rust-v{version}",
"--repo",
REPO_SLUG,
"--pattern",
asset_name,
"--dir",
str(temp_root),
],
check=True,
text=True,
capture_output=True,
)
except subprocess.CalledProcessError as exc:
raise RuntimeSetupError(
f"gh release download failed for rust-v{version} asset {asset_name}.\n"
f"STDOUT:\n{exc.stdout}\nSTDERR:\n{exc.stderr}"
) from exc
return archive_path
def _extract_runtime_binary(archive_path: Path, temp_root: Path) -> Path:
extract_dir = temp_root / "extracted"
extract_dir.mkdir(parents=True, exist_ok=True)
if archive_path.name.endswith(".tar.gz"):
with tarfile.open(archive_path, "r:gz") as tar:
try:
tar.extractall(extract_dir, filter="data")
except TypeError:
tar.extractall(extract_dir)
elif archive_path.suffix == ".zip":
with zipfile.ZipFile(archive_path) as zip_file:
zip_file.extractall(extract_dir)
else:
raise RuntimeSetupError(f"Unsupported release archive format: {archive_path.name}")
binary_name = runtime_binary_name()
archive_stem = archive_path.name.removesuffix(".tar.gz").removesuffix(".zip")
candidates = [
path
for path in extract_dir.rglob("*")
if path.is_file()
and (
path.name == binary_name
or path.name == archive_stem
or path.name.startswith("codex-")
)
]
if not candidates:
raise RuntimeSetupError(
f"Failed to find {binary_name} in extracted runtime archive {archive_path.name}."
)
return candidates[0]
def _stage_runtime_package(
sdk_python_dir: Path,
runtime_version: str,
runtime_binary: Path,
staging_dir: Path,
) -> Path:
script_module = _load_update_script_module(sdk_python_dir)
return script_module.stage_python_runtime_package( # type: ignore[no-any-return]
staging_dir,
runtime_version,
runtime_binary.resolve(),
)
def _install_runtime_package(
python_executable: str | Path,
staged_runtime_dir: Path,
install_target: Path | None,
) -> None:
args = [
str(python_executable),
"-m",
"pip",
"install",
"--force-reinstall",
"--no-deps",
]
if install_target is not None:
install_target.mkdir(parents=True, exist_ok=True)
args.extend(["--target", str(install_target)])
args.append(str(staged_runtime_dir))
try:
subprocess.run(
args,
check=True,
text=True,
capture_output=True,
)
except subprocess.CalledProcessError as exc:
raise RuntimeSetupError(
f"Failed to install {PACKAGE_NAME} into {python_executable} from {staged_runtime_dir}.\n"
f"STDOUT:\n{exc.stdout}\nSTDERR:\n{exc.stderr}"
) from exc
def _load_update_script_module(sdk_python_dir: Path):
script_path = sdk_python_dir / "scripts" / "update_sdk_artifacts.py"
spec = importlib.util.spec_from_file_location("update_sdk_artifacts", script_path)
if spec is None or spec.loader is None:
raise RuntimeSetupError(f"Failed to load {script_path}")
module = importlib.util.module_from_spec(spec)
sys.modules[spec.name] = module
spec.loader.exec_module(module)
return module
def _github_api_headers(accept: str) -> dict[str, str]:
headers = {
"Accept": accept,
"User-Agent": "codex-python-runtime-setup",
}
token = _github_token()
if token is not None:
headers["Authorization"] = f"Bearer {token}"
return headers
def _github_token() -> str | None:
for env_name in ("GH_TOKEN", "GITHUB_TOKEN"):
token = os.environ.get(env_name)
if token:
return token
return None
def _normalized_package_version(version: str) -> str:
return version.strip().replace("-alpha.", "a").replace("-beta.", "b")
__all__ = [
"PACKAGE_NAME",
"RUNTIME_VERSION_ENV_VAR",
"RuntimeSetupError",
"configured_runtime_version",
"ensure_runtime_package_installed",
"platform_asset_name",
"required_runtime_version",
]

View File

@@ -0,0 +1,180 @@
# Codex App Server SDK — API Reference
Public surface of `codex_app_server` for app-server v2.
This SDK surface is experimental. The current implementation intentionally allows only one active `Turn.stream()` or `Turn.run()` consumer per client instance at a time.
## Package Entry
```python
from codex_app_server import (
Codex,
AsyncCodex,
Thread,
AsyncThread,
Turn,
AsyncTurn,
TurnResult,
InitializeResult,
Input,
InputItem,
TextInput,
ImageInput,
LocalImageInput,
SkillInput,
MentionInput,
ThreadItem,
TurnStatus,
)
```
- Version: `codex_app_server.__version__`
- Requires Python >= 3.10
## Codex (sync)
```python
Codex(config: AppServerConfig | None = None)
```
Properties/methods:
- `metadata -> InitializeResult`
- `close() -> None`
- `thread_start(*, approval_policy=None, base_instructions=None, config=None, cwd=None, developer_instructions=None, ephemeral=None, model=None, model_provider=None, personality=None, sandbox=None) -> Thread`
- `thread_list(*, archived=None, cursor=None, cwd=None, limit=None, model_providers=None, sort_key=None, source_kinds=None) -> ThreadListResponse`
- `thread_resume(thread_id: str, *, approval_policy=None, base_instructions=None, config=None, cwd=None, developer_instructions=None, model=None, model_provider=None, personality=None, sandbox=None) -> Thread`
- `thread_fork(thread_id: str, *, approval_policy=None, base_instructions=None, config=None, cwd=None, developer_instructions=None, model=None, model_provider=None, sandbox=None) -> Thread`
- `thread_archive(thread_id: str) -> ThreadArchiveResponse`
- `thread_unarchive(thread_id: str) -> Thread`
- `models(*, include_hidden: bool = False) -> ModelListResponse`
Context manager:
```python
with Codex() as codex:
...
```
## AsyncCodex (async parity)
```python
AsyncCodex(config: AppServerConfig | None = None)
```
Properties/methods:
- `metadata -> InitializeResult`
- `close() -> Awaitable[None]`
- `thread_start(*, approval_policy=None, base_instructions=None, config=None, cwd=None, developer_instructions=None, ephemeral=None, model=None, model_provider=None, personality=None, sandbox=None) -> Awaitable[AsyncThread]`
- `thread_list(*, archived=None, cursor=None, cwd=None, limit=None, model_providers=None, sort_key=None, source_kinds=None) -> Awaitable[ThreadListResponse]`
- `thread_resume(thread_id: str, *, approval_policy=None, base_instructions=None, config=None, cwd=None, developer_instructions=None, model=None, model_provider=None, personality=None, sandbox=None) -> Awaitable[AsyncThread]`
- `thread_fork(thread_id: str, *, approval_policy=None, base_instructions=None, config=None, cwd=None, developer_instructions=None, model=None, model_provider=None, sandbox=None) -> Awaitable[AsyncThread]`
- `thread_archive(thread_id: str) -> Awaitable[ThreadArchiveResponse]`
- `thread_unarchive(thread_id: str) -> Awaitable[AsyncThread]`
- `models(*, include_hidden: bool = False) -> Awaitable[ModelListResponse]`
Async context manager:
```python
async with AsyncCodex() as codex:
...
```
## Thread / AsyncThread
`Thread` and `AsyncThread` share the same shape and intent.
### Thread
- `turn(input: Input, *, approval_policy=None, cwd=None, effort=None, model=None, output_schema=None, personality=None, sandbox_policy=None, summary=None) -> Turn`
- `read(*, include_turns: bool = False) -> ThreadReadResponse`
- `set_name(name: str) -> ThreadSetNameResponse`
- `compact() -> ThreadCompactStartResponse`
### AsyncThread
- `turn(input: Input, *, approval_policy=None, cwd=None, effort=None, model=None, output_schema=None, personality=None, sandbox_policy=None, summary=None) -> Awaitable[AsyncTurn]`
- `read(*, include_turns: bool = False) -> Awaitable[ThreadReadResponse]`
- `set_name(name: str) -> Awaitable[ThreadSetNameResponse]`
- `compact() -> Awaitable[ThreadCompactStartResponse]`
## Turn / AsyncTurn
### Turn
- `steer(input: Input) -> TurnSteerResponse`
- `interrupt() -> TurnInterruptResponse`
- `stream() -> Iterator[Notification]`
- `run() -> TurnResult`
Behavior notes:
- `stream()` and `run()` are exclusive per client instance in the current experimental build
- starting a second turn consumer on the same `Codex` instance raises `RuntimeError`
### AsyncTurn
- `steer(input: Input) -> Awaitable[TurnSteerResponse]`
- `interrupt() -> Awaitable[TurnInterruptResponse]`
- `stream() -> AsyncIterator[Notification]`
- `run() -> Awaitable[TurnResult]`
Behavior notes:
- `stream()` and `run()` are exclusive per client instance in the current experimental build
- starting a second turn consumer on the same `AsyncCodex` instance raises `RuntimeError`
## TurnResult
```python
@dataclass
class TurnResult:
thread_id: str
turn_id: str
status: TurnStatus
error: TurnError | None
text: str
items: list[ThreadItem]
usage: ThreadTokenUsageUpdatedNotification | None
```
## Inputs
```python
@dataclass class TextInput: text: str
@dataclass class ImageInput: url: str
@dataclass class LocalImageInput: path: str
@dataclass class SkillInput: name: str; path: str
@dataclass class MentionInput: name: str; path: str
InputItem = TextInput | ImageInput | LocalImageInput | SkillInput | MentionInput
Input = list[InputItem] | InputItem
```
## Retry + errors
```python
from codex_app_server import (
retry_on_overload,
JsonRpcError,
MethodNotFoundError,
InvalidParamsError,
ServerBusyError,
is_retryable_error,
)
```
- `retry_on_overload(...)` retries transient overload errors with exponential backoff + jitter.
- `is_retryable_error(exc)` checks if an exception is transient/overload-like.
## Example
```python
from codex_app_server import Codex, TextInput
with Codex() as codex:
thread = codex.thread_start(model="gpt-5", config={"model_reasoning_effort": "high"})
result = thread.turn(TextInput("Say hello in one sentence.")).run()
print(result.text)
```

View File

@@ -8,24 +8,42 @@
## `run()` vs `stream()`
- `Turn.run()` is the easiest path. It consumes events until completion and returns `TurnResult`.
- `Turn.stream()` yields raw notifications (`Notification`) so you can react event-by-event.
- `Turn.run()` / `AsyncTurn.run()` is the easiest path. It consumes events until completion and returns `TurnResult`.
- `Turn.stream()` / `AsyncTurn.stream()` yields raw notifications (`Notification`) so you can react event-by-event.
Choose `run()` for most apps. Choose `stream()` for progress UIs, custom timeout logic, or custom parsing.
## Sync vs async clients
- `Codex` is the minimal sync SDK and best default.
- `AsyncAppServerClient` wraps the sync transport with `asyncio.to_thread(...)` for async-friendly call sites.
- `Codex` is the sync public API.
- `AsyncCodex` is an async replica of the same public API shape.
If your app is not already async, stay with `Codex`.
## `thread(...)` vs `thread_resume(...)`
## Public kwargs are snake_case
- `codex.thread(thread_id)` only binds a local helper to an existing thread ID.
- `codex.thread_resume(thread_id, ...)` performs a `thread/resume` RPC and can apply overrides (model, instructions, sandbox, etc.).
Public API keyword names are snake_case. The SDK still maps them to wire camelCase under the hood.
Use `thread(...)` for simple continuation. Use `thread_resume(...)` when you need explicit resume semantics or override fields.
If you are migrating older code, update these names:
- `approvalPolicy` -> `approval_policy`
- `baseInstructions` -> `base_instructions`
- `developerInstructions` -> `developer_instructions`
- `modelProvider` -> `model_provider`
- `modelProviders` -> `model_providers`
- `sortKey` -> `sort_key`
- `sourceKinds` -> `source_kinds`
- `outputSchema` -> `output_schema`
- `sandboxPolicy` -> `sandbox_policy`
## Why only `thread_start(...)` and `thread_resume(...)`?
The public API keeps only explicit lifecycle calls:
- `thread_start(...)` to create new threads
- `thread_resume(thread_id, ...)` to continue existing threads
This avoids duplicate ways to do the same operation and keeps behavior explicit.
## Why does constructor fail?
@@ -61,7 +79,7 @@ python scripts/update_sdk_artifacts.py \
A turn is complete only when `turn/completed` arrives for that turn ID.
- `run()` waits for this automatically.
- With `stream()`, make sure you keep consuming notifications until completion.
- With `stream()`, keep consuming notifications until completion.
## How do I retry safely?
@@ -72,6 +90,6 @@ Do not blindly retry all errors. For `InvalidParamsError` or `MethodNotFoundErro
## Common pitfalls
- Starting a new thread for every prompt when you wanted continuity.
- Forgetting to `close()` (or not using `with Codex() as codex:`).
- Forgetting to `close()` (or not using context managers).
- Ignoring `TurnResult.status` and `TurnResult.error`.
- Mixing SDK input classes with raw dicts incorrectly in minimal API paths.
- Mixing SDK input classes with raw dicts incorrectly.

View File

@@ -1,6 +1,8 @@
# Getting Started
This is the fastest path from install to a multi-turn thread using the minimal SDK surface.
This is the fastest path from install to a multi-turn thread using the public SDK surface.
The SDK is experimental. Treat the API, bundled runtime strategy, and packaging details as unstable until the first public release.
## 1) Install
@@ -15,9 +17,9 @@ Requirements:
- Python `>=3.10`
- installed `codex-cli-bin` runtime package, or an explicit `codex_bin` override
- Local Codex auth/session configured
- local Codex auth/session configured
## 2) Run your first turn
## 2) Run your first turn (sync)
```python
from codex_app_server import Codex, TextInput
@@ -25,7 +27,7 @@ from codex_app_server import Codex, TextInput
with Codex() as codex:
print("Server:", codex.metadata.server_name, codex.metadata.server_version)
thread = codex.thread_start(model="gpt-5")
thread = codex.thread_start(model="gpt-5", config={"model_reasoning_effort": "high"})
result = thread.turn(TextInput("Say hello in one sentence.")).run()
print("Thread:", result.thread_id)
@@ -39,6 +41,7 @@ What happened:
- `Codex()` started and initialized `codex app-server`.
- `thread_start(...)` created a thread.
- `turn(...).run()` consumed events until `turn/completed` and returned a `TurnResult`.
- one client can have only one active `Turn.stream()` / `Turn.run()` consumer at a time in the current experimental build
## 3) Continue the same thread (multi-turn)
@@ -46,7 +49,7 @@ What happened:
from codex_app_server import Codex, TextInput
with Codex() as codex:
thread = codex.thread_start(model="gpt-5")
thread = codex.thread_start(model="gpt-5", config={"model_reasoning_effort": "high"})
first = thread.turn(TextInput("Summarize Rust ownership in 2 bullets.")).run()
second = thread.turn(TextInput("Now explain it to a Python developer.")).run()
@@ -55,7 +58,25 @@ with Codex() as codex:
print("second:", second.text)
```
## 4) Resume an existing thread
## 4) Async parity
```python
import asyncio
from codex_app_server import AsyncCodex, TextInput
async def main() -> None:
async with AsyncCodex() as codex:
thread = await codex.thread_start(model="gpt-5", config={"model_reasoning_effort": "high"})
turn = await thread.turn(TextInput("Continue where we left off."))
result = await turn.run()
print(result.text)
asyncio.run(main())
```
## 5) Resume an existing thread
```python
from codex_app_server import Codex, TextInput
@@ -63,12 +84,12 @@ from codex_app_server import Codex, TextInput
THREAD_ID = "thr_123" # replace with a real id
with Codex() as codex:
thread = codex.thread(THREAD_ID)
thread = codex.thread_resume(THREAD_ID)
result = thread.turn(TextInput("Continue where we left off.")).run()
print(result.text)
```
## 5) Next stops
## 6) Next stops
- API surface and signatures: `docs/api-reference.md`
- Common decisions/pitfalls: `docs/faq.md`

View File

@@ -0,0 +1,30 @@
import sys
from pathlib import Path
_EXAMPLES_ROOT = Path(__file__).resolve().parents[1]
if str(_EXAMPLES_ROOT) not in sys.path:
sys.path.insert(0, str(_EXAMPLES_ROOT))
from _bootstrap import ensure_local_sdk_src, runtime_config
ensure_local_sdk_src()
import asyncio
from codex_app_server import AsyncCodex, TextInput
async def main() -> None:
async with AsyncCodex(config=runtime_config()) as codex:
print("Server:", codex.metadata.server_name, codex.metadata.server_version)
thread = await codex.thread_start(model="gpt-5", config={"model_reasoning_effort": "high"})
turn = await thread.turn(TextInput("Say hello in one sentence."))
result = await turn.run()
print("Status:", result.status)
print("Text:", result.text)
if __name__ == "__main__":
asyncio.run(main())

View File

@@ -0,0 +1,20 @@
import sys
from pathlib import Path
_EXAMPLES_ROOT = Path(__file__).resolve().parents[1]
if str(_EXAMPLES_ROOT) not in sys.path:
sys.path.insert(0, str(_EXAMPLES_ROOT))
from _bootstrap import ensure_local_sdk_src, runtime_config
ensure_local_sdk_src()
from codex_app_server import Codex, TextInput
with Codex(config=runtime_config()) as codex:
print("Server:", codex.metadata.server_name, codex.metadata.server_version)
thread = codex.thread_start(model="gpt-5", config={"model_reasoning_effort": "high"})
result = thread.turn(TextInput("Say hello in one sentence.")).run()
print("Status:", result.status)
print("Text:", result.text)

View File

@@ -0,0 +1,37 @@
import sys
from pathlib import Path
_EXAMPLES_ROOT = Path(__file__).resolve().parents[1]
if str(_EXAMPLES_ROOT) not in sys.path:
sys.path.insert(0, str(_EXAMPLES_ROOT))
from _bootstrap import ensure_local_sdk_src, runtime_config
ensure_local_sdk_src()
import asyncio
from codex_app_server import AsyncCodex, TextInput
async def main() -> None:
async with AsyncCodex(config=runtime_config()) as codex:
thread = await codex.thread_start(model="gpt-5.4", config={"model_reasoning_effort": "high"})
turn = await thread.turn(TextInput("Give 3 bullets about SIMD."))
result = await turn.run()
print("thread_id:", result.thread_id)
print("turn_id:", result.turn_id)
print("status:", result.status)
if result.error is not None:
print("error:", result.error)
print("text:", result.text)
print("items.count:", len(result.items))
if result.usage is None:
raise RuntimeError("missing usage for completed turn")
print("usage.thread_id:", result.usage.thread_id)
print("usage.turn_id:", result.usage.turn_id)
if __name__ == "__main__":
asyncio.run(main())

View File

@@ -0,0 +1,28 @@
import sys
from pathlib import Path
_EXAMPLES_ROOT = Path(__file__).resolve().parents[1]
if str(_EXAMPLES_ROOT) not in sys.path:
sys.path.insert(0, str(_EXAMPLES_ROOT))
from _bootstrap import ensure_local_sdk_src, runtime_config
ensure_local_sdk_src()
from codex_app_server import Codex, TextInput
with Codex(config=runtime_config()) as codex:
thread = codex.thread_start(model="gpt-5.4", config={"model_reasoning_effort": "high"})
result = thread.turn(TextInput("Give 3 bullets about SIMD.")).run()
print("thread_id:", result.thread_id)
print("turn_id:", result.turn_id)
print("status:", result.status)
if result.error is not None:
print("error:", result.error)
print("text:", result.text)
print("items.count:", len(result.items))
if result.usage is None:
raise RuntimeError("missing usage for completed turn")
print("usage.thread_id:", result.usage.thread_id)
print("usage.turn_id:", result.usage.turn_id)

View File

@@ -0,0 +1,44 @@
import sys
from pathlib import Path
_EXAMPLES_ROOT = Path(__file__).resolve().parents[1]
if str(_EXAMPLES_ROOT) not in sys.path:
sys.path.insert(0, str(_EXAMPLES_ROOT))
from _bootstrap import ensure_local_sdk_src, runtime_config
ensure_local_sdk_src()
import asyncio
from codex_app_server import AsyncCodex, TextInput
async def main() -> None:
async with AsyncCodex(config=runtime_config()) as codex:
thread = await codex.thread_start(model="gpt-5", config={"model_reasoning_effort": "high"})
turn = await thread.turn(TextInput("Count from 1 to 200 with commas, then one summary sentence."))
# Best effort controls: models can finish quickly, so races are expected.
try:
_ = await turn.steer(TextInput("Keep it brief and stop after 20 numbers."))
print("steer: sent")
except Exception as exc:
print("steer: skipped", type(exc).__name__)
try:
_ = await turn.interrupt()
print("interrupt: sent")
except Exception as exc:
print("interrupt: skipped", type(exc).__name__)
event_count = 0
async for event in turn.stream():
event_count += 1
print(event.method, event.payload)
print("events.count:", event_count)
if __name__ == "__main__":
asyncio.run(main())

View File

@@ -0,0 +1,36 @@
import sys
from pathlib import Path
_EXAMPLES_ROOT = Path(__file__).resolve().parents[1]
if str(_EXAMPLES_ROOT) not in sys.path:
sys.path.insert(0, str(_EXAMPLES_ROOT))
from _bootstrap import ensure_local_sdk_src, runtime_config
ensure_local_sdk_src()
from codex_app_server import Codex, TextInput
with Codex(config=runtime_config()) as codex:
thread = codex.thread_start(model="gpt-5", config={"model_reasoning_effort": "high"})
turn = thread.turn(TextInput("Count from 1 to 200 with commas, then one summary sentence."))
# Best effort controls: models can finish quickly, so races are expected.
try:
_ = turn.steer(TextInput("Keep it brief and stop after 20 numbers."))
print("steer: sent")
except Exception as exc:
print("steer: skipped", type(exc).__name__)
try:
_ = turn.interrupt()
print("interrupt: sent")
except Exception as exc:
print("interrupt: skipped", type(exc).__name__)
event_count = 0
for event in turn.stream():
event_count += 1
print(event.method, event.payload)
print("events.count:", event_count)

View File

@@ -0,0 +1,28 @@
import sys
from pathlib import Path
_EXAMPLES_ROOT = Path(__file__).resolve().parents[1]
if str(_EXAMPLES_ROOT) not in sys.path:
sys.path.insert(0, str(_EXAMPLES_ROOT))
from _bootstrap import ensure_local_sdk_src, runtime_config
ensure_local_sdk_src()
import asyncio
from codex_app_server import AsyncCodex
async def main() -> None:
async with AsyncCodex(config=runtime_config()) as codex:
print("metadata:", codex.metadata)
models = await codex.models(include_hidden=True)
print("models.count:", len(models.data))
if models.data:
print("first model id:", models.data[0].id)
if __name__ == "__main__":
asyncio.run(main())

View File

@@ -0,0 +1,20 @@
import sys
from pathlib import Path
_EXAMPLES_ROOT = Path(__file__).resolve().parents[1]
if str(_EXAMPLES_ROOT) not in sys.path:
sys.path.insert(0, str(_EXAMPLES_ROOT))
from _bootstrap import ensure_local_sdk_src, runtime_config
ensure_local_sdk_src()
from codex_app_server import Codex
with Codex(config=runtime_config()) as codex:
print("metadata:", codex.metadata)
models = codex.models()
print("models.count:", len(models.data))
if models.data:
print("first model id:", models.data[0].id)

View File

@@ -0,0 +1,32 @@
import sys
from pathlib import Path
_EXAMPLES_ROOT = Path(__file__).resolve().parents[1]
if str(_EXAMPLES_ROOT) not in sys.path:
sys.path.insert(0, str(_EXAMPLES_ROOT))
from _bootstrap import ensure_local_sdk_src, runtime_config
ensure_local_sdk_src()
import asyncio
from codex_app_server import AsyncCodex, TextInput
async def main() -> None:
async with AsyncCodex(config=runtime_config()) as codex:
original = await codex.thread_start(model="gpt-5", config={"model_reasoning_effort": "high"})
first_turn = await original.turn(TextInput("Tell me one fact about Saturn."))
first = await first_turn.run()
print("Created thread:", first.thread_id)
resumed = await codex.thread_resume(first.thread_id)
second_turn = await resumed.turn(TextInput("Continue with one more fact."))
second = await second_turn.run()
print(second.text)
if __name__ == "__main__":
asyncio.run(main())

View File

@@ -0,0 +1,23 @@
import sys
from pathlib import Path
_EXAMPLES_ROOT = Path(__file__).resolve().parents[1]
if str(_EXAMPLES_ROOT) not in sys.path:
sys.path.insert(0, str(_EXAMPLES_ROOT))
from _bootstrap import ensure_local_sdk_src, runtime_config
ensure_local_sdk_src()
from codex_app_server import Codex, TextInput
with Codex(config=runtime_config()) as codex:
# Create an initial thread and turn so we have a real thread to resume.
original = codex.thread_start(model="gpt-5", config={"model_reasoning_effort": "high"})
first = original.turn(TextInput("Tell me one fact about Saturn.")).run()
print("Created thread:", first.thread_id)
# Resume the existing thread by ID.
resumed = codex.thread_resume(first.thread_id)
second = resumed.turn(TextInput("Continue with one more fact.")).run()
print(second.text)

View File

@@ -0,0 +1,70 @@
import sys
from pathlib import Path
_EXAMPLES_ROOT = Path(__file__).resolve().parents[1]
if str(_EXAMPLES_ROOT) not in sys.path:
sys.path.insert(0, str(_EXAMPLES_ROOT))
from _bootstrap import ensure_local_sdk_src, runtime_config
ensure_local_sdk_src()
import asyncio
from codex_app_server import AsyncCodex, TextInput
async def main() -> None:
async with AsyncCodex(config=runtime_config()) as codex:
thread = await codex.thread_start(model="gpt-5", config={"model_reasoning_effort": "high"})
first = await (await thread.turn(TextInput("One sentence about structured planning."))).run()
second = await (await thread.turn(TextInput("Now restate it for a junior engineer."))).run()
reopened = await codex.thread_resume(thread.id)
listing_active = await codex.thread_list(limit=20, archived=False)
reading = await reopened.read(include_turns=True)
_ = await reopened.set_name("sdk-lifecycle-demo")
_ = await codex.thread_archive(reopened.id)
listing_archived = await codex.thread_list(limit=20, archived=True)
unarchived = await codex.thread_unarchive(reopened.id)
resumed_info = "n/a"
try:
resumed = await codex.thread_resume(
unarchived.id,
model="gpt-5",
config={"model_reasoning_effort": "high"},
)
resumed_result = await (await resumed.turn(TextInput("Continue in one short sentence."))).run()
resumed_info = f"{resumed_result.turn_id} {resumed_result.status}"
except Exception as exc:
resumed_info = f"skipped({type(exc).__name__})"
forked_info = "n/a"
try:
forked = await codex.thread_fork(unarchived.id, model="gpt-5")
forked_result = await (await forked.turn(TextInput("Take a different angle in one short sentence."))).run()
forked_info = f"{forked_result.turn_id} {forked_result.status}"
except Exception as exc:
forked_info = f"skipped({type(exc).__name__})"
compact_info = "sent"
try:
_ = await unarchived.compact()
except Exception as exc:
compact_info = f"skipped({type(exc).__name__})"
print("Lifecycle OK:", thread.id)
print("first:", first.turn_id, first.status)
print("second:", second.turn_id, second.status)
print("read.turns:", len(reading.thread.turns or []))
print("list.active:", len(listing_active.data))
print("list.archived:", len(listing_archived.data))
print("resumed:", resumed_info)
print("forked:", forked_info)
print("compact:", compact_info)
if __name__ == "__main__":
asyncio.run(main())

View File

@@ -0,0 +1,63 @@
import sys
from pathlib import Path
_EXAMPLES_ROOT = Path(__file__).resolve().parents[1]
if str(_EXAMPLES_ROOT) not in sys.path:
sys.path.insert(0, str(_EXAMPLES_ROOT))
from _bootstrap import ensure_local_sdk_src, runtime_config
ensure_local_sdk_src()
from codex_app_server import Codex, TextInput
with Codex(config=runtime_config()) as codex:
thread = codex.thread_start(model="gpt-5", config={"model_reasoning_effort": "high"})
first = thread.turn(TextInput("One sentence about structured planning.")).run()
second = thread.turn(TextInput("Now restate it for a junior engineer.")).run()
reopened = codex.thread_resume(thread.id)
listing_active = codex.thread_list(limit=20, archived=False)
reading = reopened.read(include_turns=True)
_ = reopened.set_name("sdk-lifecycle-demo")
_ = codex.thread_archive(reopened.id)
listing_archived = codex.thread_list(limit=20, archived=True)
unarchived = codex.thread_unarchive(reopened.id)
resumed_info = "n/a"
try:
resumed = codex.thread_resume(
unarchived.id,
model="gpt-5",
config={"model_reasoning_effort": "high"},
)
resumed_result = resumed.turn(TextInput("Continue in one short sentence.")).run()
resumed_info = f"{resumed_result.turn_id} {resumed_result.status}"
except Exception as exc:
resumed_info = f"skipped({type(exc).__name__})"
forked_info = "n/a"
try:
forked = codex.thread_fork(unarchived.id, model="gpt-5")
forked_result = forked.turn(TextInput("Take a different angle in one short sentence.")).run()
forked_info = f"{forked_result.turn_id} {forked_result.status}"
except Exception as exc:
forked_info = f"skipped({type(exc).__name__})"
compact_info = "sent"
try:
_ = unarchived.compact()
except Exception as exc:
compact_info = f"skipped({type(exc).__name__})"
print("Lifecycle OK:", thread.id)
print("first:", first.turn_id, first.status)
print("second:", second.turn_id, second.status)
print("read.turns:", len(reading.thread.turns or []))
print("list.active:", len(listing_active.data))
print("list.archived:", len(listing_archived.data))
print("resumed:", resumed_info)
print("forked:", forked_info)
print("compact:", compact_info)

View File

@@ -0,0 +1,35 @@
import sys
from pathlib import Path
_EXAMPLES_ROOT = Path(__file__).resolve().parents[1]
if str(_EXAMPLES_ROOT) not in sys.path:
sys.path.insert(0, str(_EXAMPLES_ROOT))
from _bootstrap import ensure_local_sdk_src, runtime_config
ensure_local_sdk_src()
import asyncio
from codex_app_server import AsyncCodex, ImageInput, TextInput
REMOTE_IMAGE_URL = "https://raw.githubusercontent.com/github/explore/main/topics/python/python.png"
async def main() -> None:
async with AsyncCodex(config=runtime_config()) as codex:
thread = await codex.thread_start(model="gpt-5", config={"model_reasoning_effort": "high"})
turn = await thread.turn(
[
TextInput("What is in this image? Give 3 bullets."),
ImageInput(REMOTE_IMAGE_URL),
]
)
result = await turn.run()
print("Status:", result.status)
print(result.text)
if __name__ == "__main__":
asyncio.run(main())

View File

@@ -0,0 +1,26 @@
import sys
from pathlib import Path
_EXAMPLES_ROOT = Path(__file__).resolve().parents[1]
if str(_EXAMPLES_ROOT) not in sys.path:
sys.path.insert(0, str(_EXAMPLES_ROOT))
from _bootstrap import ensure_local_sdk_src, runtime_config
ensure_local_sdk_src()
from codex_app_server import Codex, ImageInput, TextInput
REMOTE_IMAGE_URL = "https://raw.githubusercontent.com/github/explore/main/topics/python/python.png"
with Codex(config=runtime_config()) as codex:
thread = codex.thread_start(model="gpt-5", config={"model_reasoning_effort": "high"})
result = thread.turn(
[
TextInput("What is in this image? Give 3 bullets."),
ImageInput(REMOTE_IMAGE_URL),
]
).run()
print("Status:", result.status)
print(result.text)

View File

@@ -0,0 +1,38 @@
import sys
from pathlib import Path
_EXAMPLES_ROOT = Path(__file__).resolve().parents[1]
if str(_EXAMPLES_ROOT) not in sys.path:
sys.path.insert(0, str(_EXAMPLES_ROOT))
from _bootstrap import ensure_local_sdk_src, runtime_config
ensure_local_sdk_src()
import asyncio
from codex_app_server import AsyncCodex, LocalImageInput, TextInput
IMAGE_PATH = Path(__file__).resolve().parents[1] / "assets" / "sample_scene.png"
if not IMAGE_PATH.exists():
raise FileNotFoundError(f"Missing bundled image: {IMAGE_PATH}")
async def main() -> None:
async with AsyncCodex(config=runtime_config()) as codex:
thread = await codex.thread_start(model="gpt-5", config={"model_reasoning_effort": "high"})
turn = await thread.turn(
[
TextInput("Read this local image and summarize what you see in 2 bullets."),
LocalImageInput(str(IMAGE_PATH.resolve())),
]
)
result = await turn.run()
print("Status:", result.status)
print(result.text)
if __name__ == "__main__":
asyncio.run(main())

View File

@@ -0,0 +1,29 @@
import sys
from pathlib import Path
_EXAMPLES_ROOT = Path(__file__).resolve().parents[1]
if str(_EXAMPLES_ROOT) not in sys.path:
sys.path.insert(0, str(_EXAMPLES_ROOT))
from _bootstrap import ensure_local_sdk_src, runtime_config
ensure_local_sdk_src()
from codex_app_server import Codex, LocalImageInput, TextInput
IMAGE_PATH = Path(__file__).resolve().parents[1] / "assets" / "sample_scene.png"
if not IMAGE_PATH.exists():
raise FileNotFoundError(f"Missing bundled image: {IMAGE_PATH}")
with Codex(config=runtime_config()) as codex:
thread = codex.thread_start(model="gpt-5", config={"model_reasoning_effort": "high"})
result = thread.turn(
[
TextInput("Read this local image and summarize what you see in 2 bullets."),
LocalImageInput(str(IMAGE_PATH.resolve())),
]
).run()
print("Status:", result.status)
print(result.text)

View File

@@ -0,0 +1,23 @@
import sys
from pathlib import Path
_EXAMPLES_ROOT = Path(__file__).resolve().parents[1]
if str(_EXAMPLES_ROOT) not in sys.path:
sys.path.insert(0, str(_EXAMPLES_ROOT))
from _bootstrap import ensure_local_sdk_src, runtime_config
ensure_local_sdk_src()
from codex_app_server import Codex, TextInput
with Codex(config=runtime_config()) as codex:
print("Server:", codex.metadata.server_name, codex.metadata.server_version)
thread = codex.thread_start(model="gpt-5", config={"model_reasoning_effort": "high"})
turn = thread.turn(TextInput("Say hello in one sentence."))
result = turn.run()
print("Thread:", result.thread_id)
print("Turn:", result.turn_id)
print("Text:", result.text.strip())

View File

@@ -0,0 +1,91 @@
import sys
from pathlib import Path
_EXAMPLES_ROOT = Path(__file__).resolve().parents[1]
if str(_EXAMPLES_ROOT) not in sys.path:
sys.path.insert(0, str(_EXAMPLES_ROOT))
from _bootstrap import ensure_local_sdk_src, runtime_config
ensure_local_sdk_src()
import asyncio
import random
from collections.abc import Awaitable, Callable
from typing import TypeVar
from codex_app_server import (
AsyncCodex,
JsonRpcError,
ServerBusyError,
TextInput,
TurnStatus,
is_retryable_error,
)
ResultT = TypeVar("ResultT")
async def retry_on_overload_async(
op: Callable[[], Awaitable[ResultT]],
*,
max_attempts: int = 3,
initial_delay_s: float = 0.25,
max_delay_s: float = 2.0,
jitter_ratio: float = 0.2,
) -> ResultT:
if max_attempts < 1:
raise ValueError("max_attempts must be >= 1")
delay = initial_delay_s
attempt = 0
while True:
attempt += 1
try:
return await op()
except Exception as exc: # noqa: BLE001
if attempt >= max_attempts or not is_retryable_error(exc):
raise
jitter = delay * jitter_ratio
sleep_for = min(max_delay_s, delay) + random.uniform(-jitter, jitter)
if sleep_for > 0:
await asyncio.sleep(sleep_for)
delay = min(max_delay_s, delay * 2)
async def main() -> None:
async with AsyncCodex(config=runtime_config()) as codex:
thread = await codex.thread_start(model="gpt-5", config={"model_reasoning_effort": "high"})
try:
result = await retry_on_overload_async(
_run_turn(thread, "Summarize retry best practices in 3 bullets."),
max_attempts=3,
initial_delay_s=0.25,
max_delay_s=2.0,
)
except ServerBusyError as exc:
print("Server overloaded after retries:", exc.message)
print("Text:")
return
except JsonRpcError as exc:
print(f"JSON-RPC error {exc.code}: {exc.message}")
print("Text:")
return
if result.status == TurnStatus.failed:
print("Turn failed:", result.error)
print("Text:", result.text)
def _run_turn(thread, prompt: str):
async def _inner():
turn = await thread.turn(TextInput(prompt))
return await turn.run()
return _inner
if __name__ == "__main__":
asyncio.run(main())

View File

@@ -0,0 +1,40 @@
import sys
from pathlib import Path
_EXAMPLES_ROOT = Path(__file__).resolve().parents[1]
if str(_EXAMPLES_ROOT) not in sys.path:
sys.path.insert(0, str(_EXAMPLES_ROOT))
from _bootstrap import ensure_local_sdk_src, runtime_config
ensure_local_sdk_src()
from codex_app_server import (
Codex,
JsonRpcError,
ServerBusyError,
TextInput,
TurnStatus,
retry_on_overload,
)
with Codex(config=runtime_config()) as codex:
thread = codex.thread_start(model="gpt-5", config={"model_reasoning_effort": "high"})
try:
result = retry_on_overload(
lambda: thread.turn(TextInput("Summarize retry best practices in 3 bullets.")).run(),
max_attempts=3,
initial_delay_s=0.25,
max_delay_s=2.0,
)
except ServerBusyError as exc:
print("Server overloaded after retries:", exc.message)
print("Text:")
except JsonRpcError as exc:
print(f"JSON-RPC error {exc.code}: {exc.message}")
print("Text:")
else:
if result.status == TurnStatus.failed:
print("Turn failed:", result.error)
print("Text:", result.text)

View File

@@ -0,0 +1,96 @@
import sys
from pathlib import Path
_EXAMPLES_ROOT = Path(__file__).resolve().parents[1]
if str(_EXAMPLES_ROOT) not in sys.path:
sys.path.insert(0, str(_EXAMPLES_ROOT))
from _bootstrap import ensure_local_sdk_src, runtime_config
ensure_local_sdk_src()
import asyncio
from codex_app_server import (
AsyncCodex,
TextInput,
ThreadTokenUsageUpdatedNotification,
TurnCompletedNotificationPayload,
)
def _status_value(status: object | None) -> str:
return str(getattr(status, "value", status))
def _format_usage(usage: object | None) -> str:
if usage is None:
return "usage> (none)"
last = getattr(usage, "last", None)
total = getattr(usage, "total", None)
if last is None or total is None:
return f"usage> {usage}"
return (
"usage>\n"
f" last: input={last.inputTokens} output={last.outputTokens} reasoning={last.reasoningOutputTokens} total={last.totalTokens} cached={last.cachedInputTokens}\n"
f" total: input={total.inputTokens} output={total.outputTokens} reasoning={total.reasoningOutputTokens} total={total.totalTokens} cached={total.cachedInputTokens}"
)
async def main() -> None:
print("Codex async mini CLI. Type /exit to quit.")
async with AsyncCodex(config=runtime_config()) as codex:
thread = await codex.thread_start(model="gpt-5", config={"model_reasoning_effort": "high"})
print("Thread:", thread.id)
while True:
try:
user_input = (await asyncio.to_thread(input, "you> ")).strip()
except EOFError:
break
if not user_input:
continue
if user_input in {"/exit", "/quit"}:
break
turn = await thread.turn(TextInput(user_input))
usage = None
status = None
error = None
printed_delta = False
print("assistant> ", end="", flush=True)
async for event in turn.stream():
payload = event.payload
if event.method == "item/agentMessage/delta":
delta = getattr(payload, "delta", "")
if delta:
print(delta, end="", flush=True)
printed_delta = True
continue
if isinstance(payload, ThreadTokenUsageUpdatedNotification):
usage = payload.token_usage
continue
if isinstance(payload, TurnCompletedNotificationPayload):
status = payload.turn.status
error = payload.turn.error
if printed_delta:
print()
else:
print("[no text]")
status_text = _status_value(status)
print(f"assistant.status> {status_text}")
if status_text == "failed":
print("assistant.error>", error)
print(_format_usage(usage))
if __name__ == "__main__":
asyncio.run(main())

View File

@@ -0,0 +1,89 @@
import sys
from pathlib import Path
_EXAMPLES_ROOT = Path(__file__).resolve().parents[1]
if str(_EXAMPLES_ROOT) not in sys.path:
sys.path.insert(0, str(_EXAMPLES_ROOT))
from _bootstrap import ensure_local_sdk_src, runtime_config
ensure_local_sdk_src()
from codex_app_server import (
Codex,
TextInput,
ThreadTokenUsageUpdatedNotification,
TurnCompletedNotificationPayload,
)
print("Codex mini CLI. Type /exit to quit.")
def _status_value(status: object | None) -> str:
return str(getattr(status, "value", status))
def _format_usage(usage: object | None) -> str:
if usage is None:
return "usage> (none)"
last = getattr(usage, "last", None)
total = getattr(usage, "total", None)
if last is None or total is None:
return f"usage> {usage}"
return (
"usage>\n"
f" last: input={last.inputTokens} output={last.outputTokens} reasoning={last.reasoningOutputTokens} total={last.totalTokens} cached={last.cachedInputTokens}\n"
f" total: input={total.inputTokens} output={total.outputTokens} reasoning={total.reasoningOutputTokens} total={total.totalTokens} cached={total.cachedInputTokens}"
)
with Codex(config=runtime_config()) as codex:
thread = codex.thread_start(model="gpt-5", config={"model_reasoning_effort": "high"})
print("Thread:", thread.id)
while True:
try:
user_input = input("you> ").strip()
except EOFError:
break
if not user_input:
continue
if user_input in {"/exit", "/quit"}:
break
turn = thread.turn(TextInput(user_input))
usage = None
status = None
error = None
printed_delta = False
print("assistant> ", end="", flush=True)
for event in turn.stream():
payload = event.payload
if event.method == "item/agentMessage/delta":
delta = getattr(payload, "delta", "")
if delta:
print(delta, end="", flush=True)
printed_delta = True
continue
if isinstance(payload, ThreadTokenUsageUpdatedNotification):
usage = payload.token_usage
continue
if isinstance(payload, TurnCompletedNotificationPayload):
status = payload.turn.status
error = payload.turn.error
if printed_delta:
print()
else:
print("[no text]")
status_text = _status_value(status)
print(f"assistant.status> {status_text}")
if status_text == "failed":
print("assistant.error>", error)
print(_format_usage(usage))

View File

@@ -0,0 +1,75 @@
import sys
from pathlib import Path
_EXAMPLES_ROOT = Path(__file__).resolve().parents[1]
if str(_EXAMPLES_ROOT) not in sys.path:
sys.path.insert(0, str(_EXAMPLES_ROOT))
from _bootstrap import ensure_local_sdk_src, runtime_config
ensure_local_sdk_src()
import asyncio
from codex_app_server import (
AskForApproval,
AsyncCodex,
Personality,
ReasoningEffort,
ReasoningSummary,
SandboxPolicy,
TextInput,
)
OUTPUT_SCHEMA = {
"type": "object",
"properties": {
"summary": {"type": "string"},
"actions": {
"type": "array",
"items": {"type": "string"},
},
},
"required": ["summary", "actions"],
"additionalProperties": False,
}
SANDBOX_POLICY = SandboxPolicy.model_validate(
{
"type": "readOnly",
"access": {"type": "fullAccess"},
}
)
SUMMARY = ReasoningSummary.model_validate("concise")
PROMPT = (
"Analyze a safe rollout plan for enabling a feature flag in production. "
"Return JSON matching the requested schema."
)
APPROVAL_POLICY = AskForApproval.model_validate("never")
async def main() -> None:
async with AsyncCodex(config=runtime_config()) as codex:
thread = await codex.thread_start(model="gpt-5", config={"model_reasoning_effort": "high"})
turn = await thread.turn(
TextInput(PROMPT),
approval_policy=APPROVAL_POLICY,
cwd=str(Path.cwd()),
effort=ReasoningEffort.medium,
model="gpt-5",
output_schema=OUTPUT_SCHEMA,
personality=Personality.pragmatic,
sandbox_policy=SANDBOX_POLICY,
summary=SUMMARY,
)
result = await turn.run()
print("Status:", result.status)
print("Text:", result.text)
print("Usage:", result.usage)
if __name__ == "__main__":
asyncio.run(main())

View File

@@ -0,0 +1,67 @@
import sys
from pathlib import Path
_EXAMPLES_ROOT = Path(__file__).resolve().parents[1]
if str(_EXAMPLES_ROOT) not in sys.path:
sys.path.insert(0, str(_EXAMPLES_ROOT))
from _bootstrap import ensure_local_sdk_src, runtime_config
ensure_local_sdk_src()
from codex_app_server import (
AskForApproval,
Codex,
Personality,
ReasoningEffort,
ReasoningSummary,
SandboxPolicy,
TextInput,
)
OUTPUT_SCHEMA = {
"type": "object",
"properties": {
"summary": {"type": "string"},
"actions": {
"type": "array",
"items": {"type": "string"},
},
},
"required": ["summary", "actions"],
"additionalProperties": False,
}
SANDBOX_POLICY = SandboxPolicy.model_validate(
{
"type": "readOnly",
"access": {"type": "fullAccess"},
}
)
SUMMARY = ReasoningSummary.model_validate("concise")
PROMPT = (
"Analyze a safe rollout plan for enabling a feature flag in production. "
"Return JSON matching the requested schema."
)
APPROVAL_POLICY = AskForApproval.model_validate("never")
with Codex(config=runtime_config()) as codex:
thread = codex.thread_start(model="gpt-5", config={"model_reasoning_effort": "high"})
turn = thread.turn(
TextInput(PROMPT),
approval_policy=APPROVAL_POLICY,
cwd=str(Path.cwd()),
effort=ReasoningEffort.medium,
model="gpt-5",
output_schema=OUTPUT_SCHEMA,
personality=Personality.pragmatic,
sandbox_policy=SANDBOX_POLICY,
summary=SUMMARY,
)
result = turn.run()
print("Status:", result.status)
print("Text:", result.text)
print("Usage:", result.usage)

View File

@@ -0,0 +1,121 @@
import sys
from pathlib import Path
_EXAMPLES_ROOT = Path(__file__).resolve().parents[1]
if str(_EXAMPLES_ROOT) not in sys.path:
sys.path.insert(0, str(_EXAMPLES_ROOT))
from _bootstrap import ensure_local_sdk_src, runtime_config
ensure_local_sdk_src()
import asyncio
from codex_app_server import (
AskForApproval,
AsyncCodex,
Personality,
ReasoningEffort,
ReasoningSummary,
SandboxPolicy,
TextInput,
)
REASONING_RANK = {
"none": 0,
"minimal": 1,
"low": 2,
"medium": 3,
"high": 4,
"xhigh": 5,
}
PREFERRED_MODEL = "gpt-5.4"
def _pick_highest_model(models):
visible = [m for m in models if not m.hidden] or models
preferred = next((m for m in visible if m.model == PREFERRED_MODEL or m.id == PREFERRED_MODEL), None)
if preferred is not None:
return preferred
known_names = {m.id for m in visible} | {m.model for m in visible}
top_candidates = [m for m in visible if not (m.upgrade and m.upgrade in known_names)]
pool = top_candidates or visible
return max(pool, key=lambda m: (m.model, m.id))
def _pick_highest_turn_effort(model) -> ReasoningEffort:
if not model.supported_reasoning_efforts:
return ReasoningEffort.medium
best = max(
model.supported_reasoning_efforts,
key=lambda option: REASONING_RANK.get(option.reasoning_effort.value, -1),
)
return ReasoningEffort(best.reasoning_effort.value)
OUTPUT_SCHEMA = {
"type": "object",
"properties": {
"summary": {"type": "string"},
"actions": {
"type": "array",
"items": {"type": "string"},
},
},
"required": ["summary", "actions"],
"additionalProperties": False,
}
SANDBOX_POLICY = SandboxPolicy.model_validate(
{
"type": "readOnly",
"access": {"type": "fullAccess"},
}
)
APPROVAL_POLICY = AskForApproval.model_validate("never")
async def main() -> None:
async with AsyncCodex(config=runtime_config()) as codex:
models = await codex.models(include_hidden=True)
selected_model = _pick_highest_model(models.data)
selected_effort = _pick_highest_turn_effort(selected_model)
print("selected.model:", selected_model.model)
print("selected.effort:", selected_effort.value)
thread = await codex.thread_start(
model=selected_model.model,
config={"model_reasoning_effort": selected_effort.value},
)
first_turn = await thread.turn(
TextInput("Give one short sentence about reliable production releases."),
model=selected_model.model,
effort=selected_effort,
)
first = await first_turn.run()
print("agent.message:", first.text)
print("usage:", first.usage)
second_turn = await thread.turn(
TextInput("Return JSON for a safe feature-flag rollout plan."),
approval_policy=APPROVAL_POLICY,
cwd=str(Path.cwd()),
effort=selected_effort,
model=selected_model.model,
output_schema=OUTPUT_SCHEMA,
personality=Personality.pragmatic,
sandbox_policy=SANDBOX_POLICY,
summary=ReasoningSummary.model_validate("concise"),
)
second = await second_turn.run()
print("agent.message.params:", second.text)
print("usage.params:", second.usage)
if __name__ == "__main__":
asyncio.run(main())

View File

@@ -0,0 +1,112 @@
import sys
from pathlib import Path
_EXAMPLES_ROOT = Path(__file__).resolve().parents[1]
if str(_EXAMPLES_ROOT) not in sys.path:
sys.path.insert(0, str(_EXAMPLES_ROOT))
from _bootstrap import ensure_local_sdk_src, runtime_config
ensure_local_sdk_src()
from codex_app_server import (
AskForApproval,
Codex,
Personality,
ReasoningEffort,
ReasoningSummary,
SandboxPolicy,
TextInput,
)
REASONING_RANK = {
"none": 0,
"minimal": 1,
"low": 2,
"medium": 3,
"high": 4,
"xhigh": 5,
}
PREFERRED_MODEL = "gpt-5.4"
def _pick_highest_model(models):
visible = [m for m in models if not m.hidden] or models
preferred = next((m for m in visible if m.model == PREFERRED_MODEL or m.id == PREFERRED_MODEL), None)
if preferred is not None:
return preferred
known_names = {m.id for m in visible} | {m.model for m in visible}
top_candidates = [m for m in visible if not (m.upgrade and m.upgrade in known_names)]
pool = top_candidates or visible
return max(pool, key=lambda m: (m.model, m.id))
def _pick_highest_turn_effort(model) -> ReasoningEffort:
if not model.supported_reasoning_efforts:
return ReasoningEffort.medium
best = max(
model.supported_reasoning_efforts,
key=lambda option: REASONING_RANK.get(option.reasoning_effort.value, -1),
)
return ReasoningEffort(best.reasoning_effort.value)
OUTPUT_SCHEMA = {
"type": "object",
"properties": {
"summary": {"type": "string"},
"actions": {
"type": "array",
"items": {"type": "string"},
},
},
"required": ["summary", "actions"],
"additionalProperties": False,
}
SANDBOX_POLICY = SandboxPolicy.model_validate(
{
"type": "readOnly",
"access": {"type": "fullAccess"},
}
)
APPROVAL_POLICY = AskForApproval.model_validate("never")
with Codex(config=runtime_config()) as codex:
models = codex.models(include_hidden=True)
selected_model = _pick_highest_model(models.data)
selected_effort = _pick_highest_turn_effort(selected_model)
print("selected.model:", selected_model.model)
print("selected.effort:", selected_effort.value)
thread = codex.thread_start(
model=selected_model.model,
config={"model_reasoning_effort": selected_effort.value},
)
first = thread.turn(
TextInput("Give one short sentence about reliable production releases."),
model=selected_model.model,
effort=selected_effort,
).run()
print("agent.message:", first.text)
print("usage:", first.usage)
second = thread.turn(
TextInput("Return JSON for a safe feature-flag rollout plan."),
approval_policy=APPROVAL_POLICY,
cwd=str(Path.cwd()),
effort=selected_effort,
model=selected_model.model,
output_schema=OUTPUT_SCHEMA,
personality=Personality.pragmatic,
sandbox_policy=SANDBOX_POLICY,
summary=ReasoningSummary.model_validate("concise"),
).run()
print("agent.message.params:", second.text)
print("usage.params:", second.usage)

View File

@@ -0,0 +1,83 @@
# Python SDK Examples
Each example folder contains runnable versions:
- `sync.py` (public sync surface: `Codex`)
- `async.py` (public async surface: `AsyncCodex`)
All examples intentionally use only public SDK exports from `codex_app_server`.
## Prerequisites
- Python `>=3.10`
- Install SDK dependencies for the same Python interpreter you will use to run examples
Recommended setup (from `sdk/python`):
```bash
python -m venv .venv
source .venv/bin/activate
python -m pip install -U pip
python -m pip install -e .
```
When running examples from this repo checkout, the SDK source uses the local
tree and does not bundle a runtime binary. The helper in `examples/_bootstrap.py`
uses the installed `codex-cli-bin` runtime package.
If `codex-cli-bin` is not already installed, set `CODEX_PYTHON_RUNTIME_VERSION`
to a release version like `0.115.0-alpha.11`; the bootstrap will download the
matching GitHub release artifact, stage a temporary local `codex-cli-bin`
package, install it into your active interpreter, and clean up the temporary
files afterward.
## Run examples
From `sdk/python`:
```bash
python examples/<example-folder>/sync.py
python examples/<example-folder>/async.py
```
The examples bootstrap local imports from `sdk/python/src` automatically, so no
SDK wheel install is required. You only need the Python dependencies for your
active interpreter and an installed `codex-cli-bin` runtime package (either
already present or provisioned through `CODEX_PYTHON_RUNTIME_VERSION`).
## Recommended first run
```bash
export CODEX_PYTHON_RUNTIME_VERSION=0.115.0-alpha.11
python examples/01_quickstart_constructor/sync.py
python examples/01_quickstart_constructor/async.py
```
## Index
- `01_quickstart_constructor/`
- first run / sanity check
- `02_turn_run/`
- inspect full turn output fields
- `03_turn_stream_events/`
- stream and print raw notifications
- `04_models_and_metadata/`
- read server metadata and model list
- `05_existing_thread/`
- resume a real existing thread (created in-script)
- `06_thread_lifecycle_and_controls/`
- thread lifecycle + control calls
- `07_image_and_text/`
- remote image URL + text multimodal turn
- `08_local_image_and_text/`
- local image + text multimodal turn using bundled sample image
- `09_async_parity/`
- parity-style sync flow (see async parity in other examples)
- `10_error_handling_and_retry/`
- overload retry pattern + typed error handling structure
- `11_cli_mini_app/`
- interactive chat loop
- `12_turn_params_kitchen_sink/`
- one turn using most optional `turn(...)` params (sync + async)
- `13_model_select_and_turn_params/`
- list models, pick highest model + highest supported reasoning effort, run turns, print message and usage

View File

@@ -0,0 +1,51 @@
from __future__ import annotations
import importlib.util
import os
import sys
from pathlib import Path
_SDK_PYTHON_DIR = Path(__file__).resolve().parents[1]
_SDK_PYTHON_STR = str(_SDK_PYTHON_DIR)
if _SDK_PYTHON_STR not in sys.path:
sys.path.insert(0, _SDK_PYTHON_STR)
from _runtime_setup import ensure_runtime_package_installed
def _ensure_runtime_dependencies(sdk_python_dir: Path) -> None:
if importlib.util.find_spec("pydantic") is not None:
return
python = sys.executable
raise RuntimeError(
"Missing required dependency: pydantic.\n"
f"Interpreter: {python}\n"
"Install dependencies with the same interpreter used to run this example:\n"
f" {python} -m pip install -e {sdk_python_dir}\n"
"If you installed with `pip` from another Python, reinstall using the command above."
)
def ensure_local_sdk_src() -> Path:
"""Add sdk/python/src to sys.path so examples run without installing the package."""
sdk_python_dir = _SDK_PYTHON_DIR
src_dir = sdk_python_dir / "src"
package_dir = src_dir / "codex_app_server"
if not package_dir.exists():
raise RuntimeError(f"Could not locate local SDK package at {package_dir}")
_ensure_runtime_dependencies(sdk_python_dir)
src_str = str(src_dir)
if src_str not in sys.path:
sys.path.insert(0, src_str)
return src_dir
def runtime_config():
"""Return an example-friendly AppServerConfig for repo-source SDK usage."""
from codex_app_server import AppServerConfig
ensure_runtime_package_installed(sys.executable, _SDK_PYTHON_DIR)
return AppServerConfig()

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.6 KiB

View File

@@ -0,0 +1,549 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Codex Python SDK Walkthrough\n",
"\n",
"Public SDK surface only (`codex_app_server` root exports)."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Cell 1: bootstrap local SDK imports + pinned runtime package\n",
"import os\n",
"import sys\n",
"from pathlib import Path\n",
"\n",
"if sys.version_info < (3, 10):\n",
" raise RuntimeError(\n",
" f'Notebook requires Python 3.10+; current interpreter is {sys.version.split()[0]}.'\n",
" )\n",
"\n",
"try:\n",
" _ = os.getcwd()\n",
"except FileNotFoundError:\n",
" os.chdir(str(Path.home()))\n",
"\n",
"\n",
"def _is_sdk_python_dir(path: Path) -> bool:\n",
" return (path / 'pyproject.toml').exists() and (path / 'src' / 'codex_app_server').exists()\n",
"\n",
"\n",
"def _iter_home_fallback_candidates(home: Path):\n",
" # bounded depth scan under home to support launching notebooks from unrelated cwd values\n",
" patterns = ('sdk/python', '*/sdk/python', '*/*/sdk/python', '*/*/*/sdk/python')\n",
" for pattern in patterns:\n",
" yield from home.glob(pattern)\n",
"\n",
"\n",
"def _find_sdk_python_dir(start: Path) -> Path | None:\n",
" checked = set()\n",
"\n",
" def _consider(candidate: Path) -> Path | None:\n",
" resolved = candidate.resolve()\n",
" if resolved in checked:\n",
" return None\n",
" checked.add(resolved)\n",
" if _is_sdk_python_dir(resolved):\n",
" return resolved\n",
" return None\n",
"\n",
" for candidate in [start, *start.parents]:\n",
" found = _consider(candidate)\n",
" if found is not None:\n",
" return found\n",
"\n",
" for candidate in [start / 'sdk' / 'python', *(parent / 'sdk' / 'python' for parent in start.parents)]:\n",
" found = _consider(candidate)\n",
" if found is not None:\n",
" return found\n",
"\n",
" env_dir = os.environ.get('CODEX_PYTHON_SDK_DIR')\n",
" if env_dir:\n",
" found = _consider(Path(env_dir).expanduser())\n",
" if found is not None:\n",
" return found\n",
"\n",
" for entry in sys.path:\n",
" if not entry:\n",
" continue\n",
" entry_path = Path(entry).expanduser()\n",
" for candidate in (entry_path, entry_path / 'sdk' / 'python'):\n",
" found = _consider(candidate)\n",
" if found is not None:\n",
" return found\n",
"\n",
" home = Path.home()\n",
" for candidate in _iter_home_fallback_candidates(home):\n",
" found = _consider(candidate)\n",
" if found is not None:\n",
" return found\n",
"\n",
" return None\n",
"\n",
"\n",
"repo_python_dir = _find_sdk_python_dir(Path.cwd())\n",
"if repo_python_dir is None:\n",
" raise RuntimeError('Could not locate sdk/python. Set CODEX_PYTHON_SDK_DIR to your sdk/python path.')\n",
"\n",
"repo_python_str = str(repo_python_dir)\n",
"if repo_python_str not in sys.path:\n",
" sys.path.insert(0, repo_python_str)\n",
"\n",
"from _runtime_setup import configured_runtime_version, ensure_runtime_package_installed\n",
"\n",
"runtime_version = ensure_runtime_package_installed(\n",
" sys.executable,\n",
" repo_python_dir,\n",
" configured_runtime_version(),\n",
")\n",
"\n",
"src_dir = repo_python_dir / 'src'\n",
"src_str = str(src_dir)\n",
"if src_str not in sys.path:\n",
" sys.path.insert(0, src_str)\n",
"\n",
"# Force fresh imports after SDK upgrades in the same notebook kernel.\n",
"for module_name in list(sys.modules):\n",
" if module_name == 'codex_app_server' or module_name.startswith('codex_app_server.'):\n",
" sys.modules.pop(module_name, None)\n",
"\n",
"print('Kernel:', sys.executable)\n",
"print('SDK source:', src_dir)\n",
"print('Runtime package:', runtime_version)\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Cell 2: imports (public only)\n",
"from codex_app_server import (\n",
" AsyncCodex,\n",
" Codex,\n",
" ImageInput,\n",
" LocalImageInput,\n",
" TextInput,\n",
" retry_on_overload,\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Cell 3: simple sync conversation\n",
"with Codex() as codex:\n",
" thread = codex.thread_start(model='gpt-5', config={'model_reasoning_effort': 'high'})\n",
" turn = thread.turn(TextInput('Explain gradient descent in 3 bullets.'))\n",
" result = turn.run()\n",
"\n",
" print('server:', codex.metadata)\n",
" print('status:', result.status)\n",
" print(result.text)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Cell 4: multi-turn continuity in same thread\n",
"with Codex() as codex:\n",
" thread = codex.thread_start(model='gpt-5', config={'model_reasoning_effort': 'high'})\n",
"\n",
" first = thread.turn(TextInput('Give a short summary of transformers.')).run()\n",
" second = thread.turn(TextInput('Now explain that to a high-school student.')).run()\n",
"\n",
" print('first status:', first.status)\n",
" print('second status:', second.status)\n",
" print('second text:', second.text)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Cell 5: full thread lifecycle and branching (sync)\n",
"with Codex() as codex:\n",
" thread = codex.thread_start(model='gpt-5', config={'model_reasoning_effort': 'high'})\n",
" first = thread.turn(TextInput('One sentence about structured planning.')).run()\n",
" second = thread.turn(TextInput('Now restate it for a junior engineer.')).run()\n",
"\n",
" reopened = codex.thread_resume(thread.id)\n",
" listing_active = codex.thread_list(limit=20, archived=False)\n",
" reading = reopened.read(include_turns=True)\n",
"\n",
" _ = reopened.set_name('sdk-lifecycle-demo')\n",
" _ = codex.thread_archive(reopened.id)\n",
" listing_archived = codex.thread_list(limit=20, archived=True)\n",
" unarchived = codex.thread_unarchive(reopened.id)\n",
"\n",
" resumed_info = 'n/a'\n",
" try:\n",
" resumed = codex.thread_resume(\n",
" unarchived.id,\n",
" model='gpt-5',\n",
" config={'model_reasoning_effort': 'high'},\n",
" )\n",
" resumed_result = resumed.turn(TextInput('Continue in one short sentence.')).run()\n",
" resumed_info = f'{resumed_result.turn_id} {resumed_result.status}'\n",
" except Exception as e:\n",
" resumed_info = f'skipped({type(e).__name__})'\n",
"\n",
" forked_info = 'n/a'\n",
" try:\n",
" forked = codex.thread_fork(unarchived.id, model='gpt-5')\n",
" forked_result = forked.turn(TextInput('Take a different angle in one short sentence.')).run()\n",
" forked_info = f'{forked_result.turn_id} {forked_result.status}'\n",
" except Exception as e:\n",
" forked_info = f'skipped({type(e).__name__})'\n",
"\n",
" compact_info = 'sent'\n",
" try:\n",
" _ = unarchived.compact()\n",
" except Exception as e:\n",
" compact_info = f'skipped({type(e).__name__})'\n",
"\n",
" print('Lifecycle OK:', thread.id)\n",
" print('first:', first.turn_id, first.status)\n",
" print('second:', second.turn_id, second.status)\n",
" print('read.turns:', len(reading.thread.turns or []))\n",
" print('list.active:', len(listing_active.data))\n",
" print('list.archived:', len(listing_archived.data))\n",
" print('resumed:', resumed_info)\n",
" print('forked:', forked_info)\n",
" print('compact:', compact_info)\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Cell 5b: one turn with most optional turn params\n",
"from pathlib import Path\n",
"from codex_app_server import (\n",
" AskForApproval,\n",
" Personality,\n",
" ReasoningEffort,\n",
" ReasoningSummary,\n",
" SandboxPolicy,\n",
")\n",
"\n",
"output_schema = {\n",
" 'type': 'object',\n",
" 'properties': {\n",
" 'summary': {'type': 'string'},\n",
" 'actions': {'type': 'array', 'items': {'type': 'string'}},\n",
" },\n",
" 'required': ['summary', 'actions'],\n",
" 'additionalProperties': False,\n",
"}\n",
"\n",
"sandbox_policy = SandboxPolicy.model_validate({'type': 'readOnly', 'access': {'type': 'fullAccess'}})\n",
"summary = ReasoningSummary.model_validate('concise')\n",
"\n",
"with Codex() as codex:\n",
" thread = codex.thread_start(model='gpt-5', config={'model_reasoning_effort': 'high'})\n",
" turn = thread.turn(\n",
" TextInput('Propose a safe production feature-flag rollout. Return JSON matching the schema.'),\n",
" approval_policy=AskForApproval.never,\n",
" cwd=str(Path.cwd()),\n",
" effort=ReasoningEffort.medium,\n",
" model='gpt-5',\n",
" output_schema=output_schema,\n",
" personality=Personality.pragmatic,\n",
" sandbox_policy=sandbox_policy,\n",
" summary=summary,\n",
" )\n",
" result = turn.run()\n",
"\n",
" print('status:', result.status)\n",
" print(result.text)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Cell 5c: choose highest model + highest supported reasoning, then run turns\n",
"from pathlib import Path\n",
"from codex_app_server import (\n",
" AskForApproval,\n",
" Personality,\n",
" ReasoningEffort,\n",
" ReasoningSummary,\n",
" SandboxPolicy,\n",
")\n",
"\n",
"reasoning_rank = {\n",
" 'none': 0,\n",
" 'minimal': 1,\n",
" 'low': 2,\n",
" 'medium': 3,\n",
" 'high': 4,\n",
" 'xhigh': 5,\n",
"}\n",
"\n",
"\n",
"def pick_highest_model(models):\n",
" visible = [m for m in models if not m.hidden] or models\n",
" known_names = {m.id for m in visible} | {m.model for m in visible}\n",
" top_candidates = [m for m in visible if not (m.upgrade and m.upgrade in known_names)]\n",
" pool = top_candidates or visible\n",
" return max(pool, key=lambda m: (m.model, m.id))\n",
"\n",
"\n",
"def pick_highest_turn_effort(model) -> ReasoningEffort:\n",
" if not model.supported_reasoning_efforts:\n",
" return ReasoningEffort.medium\n",
" best = max(model.supported_reasoning_efforts, key=lambda opt: reasoning_rank.get(opt.reasoning_effort.value, -1))\n",
" return ReasoningEffort(best.reasoning_effort.value)\n",
"\n",
"\n",
"output_schema = {\n",
" 'type': 'object',\n",
" 'properties': {\n",
" 'summary': {'type': 'string'},\n",
" 'actions': {'type': 'array', 'items': {'type': 'string'}},\n",
" },\n",
" 'required': ['summary', 'actions'],\n",
" 'additionalProperties': False,\n",
"}\n",
"sandbox_policy = SandboxPolicy.model_validate({'type': 'readOnly', 'access': {'type': 'fullAccess'}})\n",
"\n",
"with Codex() as codex:\n",
" models = codex.models(include_hidden=True)\n",
" selected_model = pick_highest_model(models.data)\n",
" selected_effort = pick_highest_turn_effort(selected_model)\n",
"\n",
" print('selected.model:', selected_model.model)\n",
" print('selected.effort:', selected_effort.value)\n",
"\n",
" thread = codex.thread_start(model=selected_model.model, config={'model_reasoning_effort': selected_effort.value})\n",
"\n",
" first = thread.turn(\n",
" TextInput('Give one short sentence about reliable production releases.'),\n",
" model=selected_model.model,\n",
" effort=selected_effort,\n",
" ).run()\n",
" print('agent.message:', first.text)\n",
" print('usage:', first.usage)\n",
"\n",
" second = thread.turn(\n",
" TextInput('Return JSON for a safe feature-flag rollout plan.'),\n",
" approval_policy=AskForApproval.never,\n",
" cwd=str(Path.cwd()),\n",
" effort=selected_effort,\n",
" model=selected_model.model,\n",
" output_schema=output_schema,\n",
" personality=Personality.pragmatic,\n",
" sandbox_policy=sandbox_policy,\n",
" summary=ReasoningSummary.model_validate('concise'),\n",
" ).run()\n",
" print('agent.message.params:', second.text)\n",
" print('usage.params:', second.usage)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Cell 6: multimodal with remote image\n",
"remote_image_url = 'https://raw.githubusercontent.com/github/explore/main/topics/python/python.png'\n",
"\n",
"with Codex() as codex:\n",
" thread = codex.thread_start(model='gpt-5', config={'model_reasoning_effort': 'high'})\n",
" result = thread.turn([\n",
" TextInput('What do you see in this image? 3 bullets.'),\n",
" ImageInput(remote_image_url),\n",
" ]).run()\n",
"\n",
" print('status:', result.status)\n",
" print(result.text)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Cell 7: multimodal with local image (bundled asset)\n",
"local_image_path = repo_python_dir / 'examples' / 'assets' / 'sample_scene.png'\n",
"if not local_image_path.exists():\n",
" raise FileNotFoundError(f'Missing bundled image: {local_image_path}')\n",
"\n",
"with Codex() as codex:\n",
" thread = codex.thread_start(model='gpt-5', config={'model_reasoning_effort': 'high'})\n",
" result = thread.turn([\n",
" TextInput('Describe this local image in 2 bullets.'),\n",
" LocalImageInput(str(local_image_path.resolve())),\n",
" ]).run()\n",
"\n",
" print('status:', result.status)\n",
" print(result.text)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Cell 8: retry-on-overload pattern\n",
"with Codex() as codex:\n",
" thread = codex.thread_start(model='gpt-5', config={'model_reasoning_effort': 'high'})\n",
"\n",
" result = retry_on_overload(\n",
" lambda: thread.turn(TextInput('List 5 failure modes in distributed systems.')).run(),\n",
" max_attempts=3,\n",
" initial_delay_s=0.25,\n",
" max_delay_s=2.0,\n",
" )\n",
"\n",
" print('status:', result.status)\n",
" print(result.text)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Cell 9: full thread lifecycle and branching (async)\n",
"import asyncio\n",
"\n",
"\n",
"async def async_lifecycle_demo():\n",
" async with AsyncCodex() as codex:\n",
" thread = await codex.thread_start(model='gpt-5', config={'model_reasoning_effort': 'high'})\n",
" first = await (await thread.turn(TextInput('One sentence about structured planning.'))).run()\n",
" second = await (await thread.turn(TextInput('Now restate it for a junior engineer.'))).run()\n",
"\n",
" reopened = await codex.thread_resume(thread.id)\n",
" listing_active = await codex.thread_list(limit=20, archived=False)\n",
" reading = await reopened.read(include_turns=True)\n",
"\n",
" _ = await reopened.set_name('sdk-lifecycle-demo')\n",
" _ = await codex.thread_archive(reopened.id)\n",
" listing_archived = await codex.thread_list(limit=20, archived=True)\n",
" unarchived = await codex.thread_unarchive(reopened.id)\n",
"\n",
" resumed_info = 'n/a'\n",
" try:\n",
" resumed = await codex.thread_resume(\n",
" unarchived.id,\n",
" model='gpt-5',\n",
" config={'model_reasoning_effort': 'high'},\n",
" )\n",
" resumed_result = await (await resumed.turn(TextInput('Continue in one short sentence.'))).run()\n",
" resumed_info = f'{resumed_result.turn_id} {resumed_result.status}'\n",
" except Exception as e:\n",
" resumed_info = f'skipped({type(e).__name__})'\n",
"\n",
" forked_info = 'n/a'\n",
" try:\n",
" forked = await codex.thread_fork(unarchived.id, model='gpt-5')\n",
" forked_result = await (await forked.turn(TextInput('Take a different angle in one short sentence.'))).run()\n",
" forked_info = f'{forked_result.turn_id} {forked_result.status}'\n",
" except Exception as e:\n",
" forked_info = f'skipped({type(e).__name__})'\n",
"\n",
" compact_info = 'sent'\n",
" try:\n",
" _ = await unarchived.compact()\n",
" except Exception as e:\n",
" compact_info = f'skipped({type(e).__name__})'\n",
"\n",
" print('Lifecycle OK:', thread.id)\n",
" print('first:', first.turn_id, first.status)\n",
" print('second:', second.turn_id, second.status)\n",
" print('read.turns:', len(reading.thread.turns or []))\n",
" print('list.active:', len(listing_active.data))\n",
" print('list.archived:', len(listing_archived.data))\n",
" print('resumed:', resumed_info)\n",
" print('forked:', forked_info)\n",
" print('compact:', compact_info)\n",
"\n",
"\n",
"await async_lifecycle_demo()\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Cell 10: async stream + steer + interrupt (best effort)\n",
"import asyncio\n",
"\n",
"\n",
"async def async_stream_demo():\n",
" async with AsyncCodex() as codex:\n",
" thread = await codex.thread_start(model='gpt-5', config={'model_reasoning_effort': 'high'})\n",
" turn = await thread.turn(TextInput('Count from 1 to 200 with commas, then one summary sentence.'))\n",
"\n",
" try:\n",
" _ = await turn.steer(TextInput('Keep it brief and stop after 20 numbers.'))\n",
" print('steer: sent')\n",
" except Exception as e:\n",
" print('steer: skipped', type(e).__name__)\n",
"\n",
" try:\n",
" _ = await turn.interrupt()\n",
" print('interrupt: sent')\n",
" except Exception as e:\n",
" print('interrupt: skipped', type(e).__name__)\n",
"\n",
" event_count = 0\n",
" async for event in turn.stream():\n",
" event_count += 1\n",
" print(event.method, event.payload)\n",
"\n",
" print('events.count:', event_count)\n",
"\n",
"\n",
"await async_stream_demo()\n",
"\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"name": "python",
"version": "3.10+"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -1,10 +1,115 @@
from .async_client import AsyncAppServerClient
from .client import AppServerClient, AppServerConfig
from .errors import AppServerError, JsonRpcError, TransportClosedError
from .errors import (
AppServerError,
AppServerRpcError,
InternalRpcError,
InvalidParamsError,
InvalidRequestError,
JsonRpcError,
MethodNotFoundError,
ParseError,
RetryLimitExceededError,
ServerBusyError,
TransportClosedError,
is_retryable_error,
)
from .generated.v2_types import (
ThreadItem,
ThreadTokenUsageUpdatedNotification,
TurnCompletedNotificationPayload,
)
from .public_api import (
AsyncCodex,
AsyncThread,
AsyncTurn,
Codex,
ImageInput,
InitializeResult,
Input,
InputItem,
LocalImageInput,
MentionInput,
SkillInput,
TextInput,
Thread,
Turn,
TurnResult,
)
from .public_types import (
AskForApproval,
Personality,
PlanType,
ReasoningEffort,
ReasoningSummary,
SandboxMode,
SandboxPolicy,
ServiceTier,
ThreadForkParams,
ThreadListParams,
ThreadResumeParams,
ThreadSortKey,
ThreadSourceKind,
ThreadStartParams,
TurnStartParams,
TurnStatus,
TurnSteerParams,
)
from .retry import retry_on_overload
__version__ = "0.2.0"
__all__ = [
"__version__",
"AppServerClient",
"AsyncAppServerClient",
"AppServerConfig",
"Codex",
"AsyncCodex",
"Thread",
"AsyncThread",
"Turn",
"AsyncTurn",
"TurnResult",
"InitializeResult",
"Input",
"InputItem",
"TextInput",
"ImageInput",
"LocalImageInput",
"SkillInput",
"MentionInput",
"ThreadItem",
"ThreadTokenUsageUpdatedNotification",
"TurnCompletedNotificationPayload",
"AskForApproval",
"Personality",
"PlanType",
"ReasoningEffort",
"ReasoningSummary",
"SandboxMode",
"SandboxPolicy",
"ServiceTier",
"ThreadStartParams",
"ThreadResumeParams",
"ThreadListParams",
"ThreadSortKey",
"ThreadSourceKind",
"ThreadForkParams",
"TurnStatus",
"TurnStartParams",
"TurnSteerParams",
"retry_on_overload",
"AppServerError",
"JsonRpcError",
"TransportClosedError",
"JsonRpcError",
"AppServerRpcError",
"ParseError",
"InvalidRequestError",
"MethodNotFoundError",
"InvalidParamsError",
"InternalRpcError",
"ServerBusyError",
"RetryLimitExceededError",
"is_retryable_error",
]

View File

@@ -0,0 +1,208 @@
from __future__ import annotations
import asyncio
from collections.abc import Iterator
from typing import AsyncIterator, Callable, Iterable, ParamSpec, TypeVar
from pydantic import BaseModel
from .client import AppServerClient, AppServerConfig
from .generated.v2_all import (
AgentMessageDeltaNotification,
ModelListResponse,
ThreadArchiveResponse,
ThreadCompactStartResponse,
ThreadForkParams as V2ThreadForkParams,
ThreadForkResponse,
ThreadListParams as V2ThreadListParams,
ThreadListResponse,
ThreadReadResponse,
ThreadResumeParams as V2ThreadResumeParams,
ThreadResumeResponse,
ThreadSetNameResponse,
ThreadStartParams as V2ThreadStartParams,
ThreadStartResponse,
ThreadUnarchiveResponse,
TurnCompletedNotification,
TurnInterruptResponse,
TurnStartParams as V2TurnStartParams,
TurnStartResponse,
TurnSteerResponse,
)
from .models import InitializeResponse, JsonObject, Notification
ModelT = TypeVar("ModelT", bound=BaseModel)
ParamsT = ParamSpec("ParamsT")
ReturnT = TypeVar("ReturnT")
class AsyncAppServerClient:
"""Async wrapper around AppServerClient using thread offloading."""
def __init__(self, config: AppServerConfig | None = None) -> None:
self._sync = AppServerClient(config=config)
# Single stdio transport cannot be read safely from multiple threads.
self._transport_lock = asyncio.Lock()
async def __aenter__(self) -> "AsyncAppServerClient":
await self.start()
return self
async def __aexit__(self, _exc_type, _exc, _tb) -> None:
await self.close()
async def _call_sync(
self,
fn: Callable[ParamsT, ReturnT],
/,
*args: ParamsT.args,
**kwargs: ParamsT.kwargs,
) -> ReturnT:
async with self._transport_lock:
return await asyncio.to_thread(fn, *args, **kwargs)
@staticmethod
def _next_from_iterator(
iterator: Iterator[AgentMessageDeltaNotification],
) -> tuple[bool, AgentMessageDeltaNotification | None]:
try:
return True, next(iterator)
except StopIteration:
return False, None
async def start(self) -> None:
await self._call_sync(self._sync.start)
async def close(self) -> None:
await self._call_sync(self._sync.close)
async def initialize(self) -> InitializeResponse:
return await self._call_sync(self._sync.initialize)
def acquire_turn_consumer(self, turn_id: str) -> None:
self._sync.acquire_turn_consumer(turn_id)
def release_turn_consumer(self, turn_id: str) -> None:
self._sync.release_turn_consumer(turn_id)
async def request(
self,
method: str,
params: JsonObject | None,
*,
response_model: type[ModelT],
) -> ModelT:
return await self._call_sync(
self._sync.request,
method,
params,
response_model=response_model,
)
async def thread_start(self, params: V2ThreadStartParams | JsonObject | None = None) -> ThreadStartResponse:
return await self._call_sync(self._sync.thread_start, params)
async def thread_resume(
self,
thread_id: str,
params: V2ThreadResumeParams | JsonObject | None = None,
) -> ThreadResumeResponse:
return await self._call_sync(self._sync.thread_resume, thread_id, params)
async def thread_list(self, params: V2ThreadListParams | JsonObject | None = None) -> ThreadListResponse:
return await self._call_sync(self._sync.thread_list, params)
async def thread_read(self, thread_id: str, include_turns: bool = False) -> ThreadReadResponse:
return await self._call_sync(self._sync.thread_read, thread_id, include_turns)
async def thread_fork(
self,
thread_id: str,
params: V2ThreadForkParams | JsonObject | None = None,
) -> ThreadForkResponse:
return await self._call_sync(self._sync.thread_fork, thread_id, params)
async def thread_archive(self, thread_id: str) -> ThreadArchiveResponse:
return await self._call_sync(self._sync.thread_archive, thread_id)
async def thread_unarchive(self, thread_id: str) -> ThreadUnarchiveResponse:
return await self._call_sync(self._sync.thread_unarchive, thread_id)
async def thread_set_name(self, thread_id: str, name: str) -> ThreadSetNameResponse:
return await self._call_sync(self._sync.thread_set_name, thread_id, name)
async def thread_compact(self, thread_id: str) -> ThreadCompactStartResponse:
return await self._call_sync(self._sync.thread_compact, thread_id)
async def turn_start(
self,
thread_id: str,
input_items: list[JsonObject] | JsonObject | str,
params: V2TurnStartParams | JsonObject | None = None,
) -> TurnStartResponse:
return await self._call_sync(self._sync.turn_start, thread_id, input_items, params)
async def turn_interrupt(self, thread_id: str, turn_id: str) -> TurnInterruptResponse:
return await self._call_sync(self._sync.turn_interrupt, thread_id, turn_id)
async def turn_steer(
self,
thread_id: str,
expected_turn_id: str,
input_items: list[JsonObject] | JsonObject | str,
) -> TurnSteerResponse:
return await self._call_sync(
self._sync.turn_steer,
thread_id,
expected_turn_id,
input_items,
)
async def model_list(self, include_hidden: bool = False) -> ModelListResponse:
return await self._call_sync(self._sync.model_list, include_hidden)
async def request_with_retry_on_overload(
self,
method: str,
params: JsonObject | None,
*,
response_model: type[ModelT],
max_attempts: int = 3,
initial_delay_s: float = 0.25,
max_delay_s: float = 2.0,
) -> ModelT:
return await self._call_sync(
self._sync.request_with_retry_on_overload,
method,
params,
response_model=response_model,
max_attempts=max_attempts,
initial_delay_s=initial_delay_s,
max_delay_s=max_delay_s,
)
async def next_notification(self) -> Notification:
return await self._call_sync(self._sync.next_notification)
async def wait_for_turn_completed(self, turn_id: str) -> TurnCompletedNotification:
return await self._call_sync(self._sync.wait_for_turn_completed, turn_id)
async def stream_until_methods(self, methods: Iterable[str] | str) -> list[Notification]:
return await self._call_sync(self._sync.stream_until_methods, methods)
async def stream_text(
self,
thread_id: str,
text: str,
params: V2TurnStartParams | JsonObject | None = None,
) -> AsyncIterator[AgentMessageDeltaNotification]:
async with self._transport_lock:
iterator = self._sync.stream_text(thread_id, text, params)
while True:
has_value, chunk = await asyncio.to_thread(
self._next_from_iterator,
iterator,
)
if not has_value:
break
yield chunk

View File

@@ -1,25 +1,23 @@
"""Stable aliases over full v2 autogenerated models (datamodel-code-generator)."""
"""Stable aliases over the canonical generated v2 models."""
from .v2_all.ModelListResponse import ModelListResponse
from .v2_all.ThreadCompactStartResponse import ThreadCompactStartResponse
from .v2_all.ThreadListResponse import ThreadListResponse
from .v2_all.ThreadReadResponse import ThreadReadResponse
from .v2_all.ThreadTokenUsageUpdatedNotification import (
from .v2_all import (
ModelListResponse,
ThreadCompactStartResponse,
ThreadItem,
ThreadListResponse,
ThreadReadResponse,
ThreadTokenUsageUpdatedNotification,
)
from .v2_all.TurnCompletedNotification import ThreadItem153 as ThreadItem
from .v2_all.TurnCompletedNotification import (
TurnCompletedNotification as TurnCompletedNotificationPayload,
TurnSteerResponse,
)
from .v2_all.TurnSteerResponse import TurnSteerResponse
__all__ = [
"ModelListResponse",
"ThreadCompactStartResponse",
"ThreadItem",
"ThreadListResponse",
"ThreadReadResponse",
"ThreadTokenUsageUpdatedNotification",
"TurnCompletedNotificationPayload",
"TurnSteerResponse",
"ThreadItem",
]

View File

@@ -0,0 +1,795 @@
from __future__ import annotations
import asyncio
from dataclasses import dataclass
from typing import AsyncIterator, Iterator
from .async_client import AsyncAppServerClient
from .client import AppServerClient, AppServerConfig
from .generated.v2_all import (
AgentMessageDeltaNotification,
RawResponseItemCompletedNotification,
ThreadArchiveResponse,
ThreadSetNameResponse,
TurnError,
TurnInterruptResponse,
)
from .generated.v2_types import (
ModelListResponse,
ThreadCompactStartResponse,
ThreadItem,
ThreadListResponse,
ThreadReadResponse,
ThreadTokenUsageUpdatedNotification,
TurnCompletedNotificationPayload,
TurnSteerResponse,
)
from .models import InitializeResponse, JsonObject, Notification
from .public_types import (
AskForApproval,
Personality,
ReasoningEffort,
ReasoningSummary,
SandboxMode,
SandboxPolicy,
ServiceTier,
ThreadForkParams,
ThreadListParams,
ThreadResumeParams,
ThreadSortKey,
ThreadSourceKind,
ThreadStartParams,
TurnStartParams,
TurnStatus,
)
@dataclass(slots=True)
class TurnResult:
thread_id: str
turn_id: str
status: TurnStatus
error: TurnError | None
text: str
items: list[ThreadItem]
usage: ThreadTokenUsageUpdatedNotification | None = None
@dataclass(slots=True)
class TextInput:
text: str
@dataclass(slots=True)
class ImageInput:
url: str
@dataclass(slots=True)
class LocalImageInput:
path: str
@dataclass(slots=True)
class SkillInput:
name: str
path: str
@dataclass(slots=True)
class MentionInput:
name: str
path: str
InputItem = TextInput | ImageInput | LocalImageInput | SkillInput | MentionInput
Input = list[InputItem] | InputItem
@dataclass(slots=True)
class InitializeResult:
server_name: str
server_version: str
user_agent: str
def _to_wire_item(item: InputItem) -> JsonObject:
if isinstance(item, TextInput):
return {"type": "text", "text": item.text}
if isinstance(item, ImageInput):
return {"type": "image", "url": item.url}
if isinstance(item, LocalImageInput):
return {"type": "localImage", "path": item.path}
if isinstance(item, SkillInput):
return {"type": "skill", "name": item.name, "path": item.path}
if isinstance(item, MentionInput):
return {"type": "mention", "name": item.name, "path": item.path}
raise TypeError(f"unsupported input item: {type(item)!r}")
def _to_wire_input(input: Input) -> list[JsonObject]:
if isinstance(input, list):
return [_to_wire_item(i) for i in input]
return [_to_wire_item(input)]
def _split_user_agent(user_agent: str) -> tuple[str | None, str | None]:
raw = user_agent.strip()
if not raw:
return None, None
if "/" in raw:
name, version = raw.split("/", 1)
return (name or None), (version or None)
parts = raw.split(maxsplit=1)
if len(parts) == 2:
return parts[0], parts[1]
return raw, None
def _enum_value(value: object) -> object:
return getattr(value, "value", value)
def _assistant_output_text_chunks(
notification: RawResponseItemCompletedNotification,
) -> list[str]:
item = notification.item.root
if _enum_value(getattr(item, "type", None)) != "message":
return []
if getattr(item, "role", None) != "assistant":
return []
chunks: list[str] = []
for content in getattr(item, "content", []) or []:
content_item = getattr(content, "root", content)
if _enum_value(getattr(content_item, "type", None)) != "output_text":
continue
text = getattr(content_item, "text", None)
if isinstance(text, str) and text:
chunks.append(text)
return chunks
def _build_turn_result(
completed: TurnCompletedNotificationPayload | None,
usage: ThreadTokenUsageUpdatedNotification | None,
delta_chunks: list[str],
raw_text_chunks: list[str],
) -> TurnResult:
if completed is None:
raise RuntimeError("turn completed event not received")
if completed.turn.status == TurnStatus.completed and usage is None:
raise RuntimeError(
"thread/tokenUsage/updated notification not received for completed turn"
)
text = "".join(delta_chunks) if delta_chunks else "".join(raw_text_chunks)
return TurnResult(
thread_id=completed.thread_id,
turn_id=completed.turn.id,
status=completed.turn.status,
error=completed.turn.error,
text=text,
items=list(completed.turn.items or []),
usage=usage,
)
class Codex:
"""Minimal typed SDK surface for app-server v2."""
def __init__(self, config: AppServerConfig | None = None) -> None:
self._client = AppServerClient(config=config)
try:
self._client.start()
self._init = self._parse_initialize(self._client.initialize())
except Exception:
self._client.close()
raise
def __enter__(self) -> "Codex":
return self
def __exit__(self, _exc_type, _exc, _tb) -> None:
self.close()
@staticmethod
def _parse_initialize(payload: InitializeResponse) -> InitializeResult:
user_agent = (payload.userAgent or "").strip()
server = payload.serverInfo
server_name: str | None = None
server_version: str | None = None
if server is not None:
server_name = (server.name or "").strip() or None
server_version = (server.version or "").strip() or None
if (server_name is None or server_version is None) and user_agent:
parsed_name, parsed_version = _split_user_agent(user_agent)
if server_name is None:
server_name = parsed_name
if server_version is None:
server_version = parsed_version
normalized_server_name = (server_name or "").strip()
normalized_server_version = (server_version or "").strip()
if not user_agent or not normalized_server_name or not normalized_server_version:
raise RuntimeError(
"initialize response missing required metadata "
f"(user_agent={user_agent!r}, server_name={normalized_server_name!r}, server_version={normalized_server_version!r})"
)
return InitializeResult(
server_name=normalized_server_name,
server_version=normalized_server_version,
user_agent=user_agent,
)
@property
def metadata(self) -> InitializeResult:
return self._init
def close(self) -> None:
self._client.close()
# BEGIN GENERATED: Codex.flat_methods
def thread_start(
self,
*,
approval_policy: AskForApproval | None = None,
base_instructions: str | None = None,
config: JsonObject | None = None,
cwd: str | None = None,
developer_instructions: str | None = None,
ephemeral: bool | None = None,
model: str | None = None,
model_provider: str | None = None,
personality: Personality | None = None,
sandbox: SandboxMode | None = None,
service_name: str | None = None,
service_tier: ServiceTier | None = None,
) -> Thread:
params = ThreadStartParams(
approval_policy=approval_policy,
base_instructions=base_instructions,
config=config,
cwd=cwd,
developer_instructions=developer_instructions,
ephemeral=ephemeral,
model=model,
model_provider=model_provider,
personality=personality,
sandbox=sandbox,
service_name=service_name,
service_tier=service_tier,
)
started = self._client.thread_start(params)
return Thread(self._client, started.thread.id)
def thread_list(
self,
*,
archived: bool | None = None,
cursor: str | None = None,
cwd: str | None = None,
limit: int | None = None,
model_providers: list[str] | None = None,
search_term: str | None = None,
sort_key: ThreadSortKey | None = None,
source_kinds: list[ThreadSourceKind] | None = None,
) -> ThreadListResponse:
params = ThreadListParams(
archived=archived,
cursor=cursor,
cwd=cwd,
limit=limit,
model_providers=model_providers,
search_term=search_term,
sort_key=sort_key,
source_kinds=source_kinds,
)
return self._client.thread_list(params)
def thread_resume(
self,
thread_id: str,
*,
approval_policy: AskForApproval | None = None,
base_instructions: str | None = None,
config: JsonObject | None = None,
cwd: str | None = None,
developer_instructions: str | None = None,
model: str | None = None,
model_provider: str | None = None,
personality: Personality | None = None,
sandbox: SandboxMode | None = None,
service_tier: ServiceTier | None = None,
) -> Thread:
params = ThreadResumeParams(
thread_id=thread_id,
approval_policy=approval_policy,
base_instructions=base_instructions,
config=config,
cwd=cwd,
developer_instructions=developer_instructions,
model=model,
model_provider=model_provider,
personality=personality,
sandbox=sandbox,
service_tier=service_tier,
)
resumed = self._client.thread_resume(thread_id, params)
return Thread(self._client, resumed.thread.id)
def thread_fork(
self,
thread_id: str,
*,
approval_policy: AskForApproval | None = None,
base_instructions: str | None = None,
config: JsonObject | None = None,
cwd: str | None = None,
developer_instructions: str | None = None,
ephemeral: bool | None = None,
model: str | None = None,
model_provider: str | None = None,
sandbox: SandboxMode | None = None,
service_tier: ServiceTier | None = None,
) -> Thread:
params = ThreadForkParams(
thread_id=thread_id,
approval_policy=approval_policy,
base_instructions=base_instructions,
config=config,
cwd=cwd,
developer_instructions=developer_instructions,
ephemeral=ephemeral,
model=model,
model_provider=model_provider,
sandbox=sandbox,
service_tier=service_tier,
)
forked = self._client.thread_fork(thread_id, params)
return Thread(self._client, forked.thread.id)
def thread_archive(self, thread_id: str) -> ThreadArchiveResponse:
return self._client.thread_archive(thread_id)
def thread_unarchive(self, thread_id: str) -> Thread:
unarchived = self._client.thread_unarchive(thread_id)
return Thread(self._client, unarchived.thread.id)
# END GENERATED: Codex.flat_methods
def models(self, *, include_hidden: bool = False) -> ModelListResponse:
return self._client.model_list(include_hidden=include_hidden)
class AsyncCodex:
"""Async mirror of :class:`Codex` with matching method shapes."""
def __init__(self, config: AppServerConfig | None = None) -> None:
self._client = AsyncAppServerClient(config=config)
self._init: InitializeResult | None = None
self._initialized = False
self._init_lock = asyncio.Lock()
async def __aenter__(self) -> "AsyncCodex":
await self._ensure_initialized()
return self
async def __aexit__(self, _exc_type, _exc, _tb) -> None:
await self.close()
async def _ensure_initialized(self) -> None:
if self._initialized:
return
async with self._init_lock:
if self._initialized:
return
try:
await self._client.start()
payload = await self._client.initialize()
self._init = Codex._parse_initialize(payload)
self._initialized = True
except Exception:
await self._client.close()
self._init = None
self._initialized = False
raise
@property
def metadata(self) -> InitializeResult:
if self._init is None:
raise RuntimeError(
"AsyncCodex is not initialized yet. Use `async with AsyncCodex()` or call an async API first."
)
return self._init
async def close(self) -> None:
await self._client.close()
self._init = None
self._initialized = False
# BEGIN GENERATED: AsyncCodex.flat_methods
async def thread_start(
self,
*,
approval_policy: AskForApproval | None = None,
base_instructions: str | None = None,
config: JsonObject | None = None,
cwd: str | None = None,
developer_instructions: str | None = None,
ephemeral: bool | None = None,
model: str | None = None,
model_provider: str | None = None,
personality: Personality | None = None,
sandbox: SandboxMode | None = None,
service_name: str | None = None,
service_tier: ServiceTier | None = None,
) -> AsyncThread:
await self._ensure_initialized()
params = ThreadStartParams(
approval_policy=approval_policy,
base_instructions=base_instructions,
config=config,
cwd=cwd,
developer_instructions=developer_instructions,
ephemeral=ephemeral,
model=model,
model_provider=model_provider,
personality=personality,
sandbox=sandbox,
service_name=service_name,
service_tier=service_tier,
)
started = await self._client.thread_start(params)
return AsyncThread(self, started.thread.id)
async def thread_list(
self,
*,
archived: bool | None = None,
cursor: str | None = None,
cwd: str | None = None,
limit: int | None = None,
model_providers: list[str] | None = None,
search_term: str | None = None,
sort_key: ThreadSortKey | None = None,
source_kinds: list[ThreadSourceKind] | None = None,
) -> ThreadListResponse:
await self._ensure_initialized()
params = ThreadListParams(
archived=archived,
cursor=cursor,
cwd=cwd,
limit=limit,
model_providers=model_providers,
search_term=search_term,
sort_key=sort_key,
source_kinds=source_kinds,
)
return await self._client.thread_list(params)
async def thread_resume(
self,
thread_id: str,
*,
approval_policy: AskForApproval | None = None,
base_instructions: str | None = None,
config: JsonObject | None = None,
cwd: str | None = None,
developer_instructions: str | None = None,
model: str | None = None,
model_provider: str | None = None,
personality: Personality | None = None,
sandbox: SandboxMode | None = None,
service_tier: ServiceTier | None = None,
) -> AsyncThread:
await self._ensure_initialized()
params = ThreadResumeParams(
thread_id=thread_id,
approval_policy=approval_policy,
base_instructions=base_instructions,
config=config,
cwd=cwd,
developer_instructions=developer_instructions,
model=model,
model_provider=model_provider,
personality=personality,
sandbox=sandbox,
service_tier=service_tier,
)
resumed = await self._client.thread_resume(thread_id, params)
return AsyncThread(self, resumed.thread.id)
async def thread_fork(
self,
thread_id: str,
*,
approval_policy: AskForApproval | None = None,
base_instructions: str | None = None,
config: JsonObject | None = None,
cwd: str | None = None,
developer_instructions: str | None = None,
ephemeral: bool | None = None,
model: str | None = None,
model_provider: str | None = None,
sandbox: SandboxMode | None = None,
service_tier: ServiceTier | None = None,
) -> AsyncThread:
await self._ensure_initialized()
params = ThreadForkParams(
thread_id=thread_id,
approval_policy=approval_policy,
base_instructions=base_instructions,
config=config,
cwd=cwd,
developer_instructions=developer_instructions,
ephemeral=ephemeral,
model=model,
model_provider=model_provider,
sandbox=sandbox,
service_tier=service_tier,
)
forked = await self._client.thread_fork(thread_id, params)
return AsyncThread(self, forked.thread.id)
async def thread_archive(self, thread_id: str) -> ThreadArchiveResponse:
await self._ensure_initialized()
return await self._client.thread_archive(thread_id)
async def thread_unarchive(self, thread_id: str) -> AsyncThread:
await self._ensure_initialized()
unarchived = await self._client.thread_unarchive(thread_id)
return AsyncThread(self, unarchived.thread.id)
# END GENERATED: AsyncCodex.flat_methods
async def models(self, *, include_hidden: bool = False) -> ModelListResponse:
await self._ensure_initialized()
return await self._client.model_list(include_hidden=include_hidden)
@dataclass(slots=True)
class Thread:
_client: AppServerClient
id: str
# BEGIN GENERATED: Thread.flat_methods
def turn(
self,
input: Input,
*,
approval_policy: AskForApproval | None = None,
cwd: str | None = None,
effort: ReasoningEffort | None = None,
model: str | None = None,
output_schema: JsonObject | None = None,
personality: Personality | None = None,
sandbox_policy: SandboxPolicy | None = None,
service_tier: ServiceTier | None = None,
summary: ReasoningSummary | None = None,
) -> Turn:
wire_input = _to_wire_input(input)
params = TurnStartParams(
thread_id=self.id,
input=wire_input,
approval_policy=approval_policy,
cwd=cwd,
effort=effort,
model=model,
output_schema=output_schema,
personality=personality,
sandbox_policy=sandbox_policy,
service_tier=service_tier,
summary=summary,
)
turn = self._client.turn_start(self.id, wire_input, params=params)
return Turn(self._client, self.id, turn.turn.id)
# END GENERATED: Thread.flat_methods
def read(self, *, include_turns: bool = False) -> ThreadReadResponse:
return self._client.thread_read(self.id, include_turns=include_turns)
def set_name(self, name: str) -> ThreadSetNameResponse:
return self._client.thread_set_name(self.id, name)
def compact(self) -> ThreadCompactStartResponse:
return self._client.thread_compact(self.id)
@dataclass(slots=True)
class AsyncThread:
_codex: AsyncCodex
id: str
# BEGIN GENERATED: AsyncThread.flat_methods
async def turn(
self,
input: Input,
*,
approval_policy: AskForApproval | None = None,
cwd: str | None = None,
effort: ReasoningEffort | None = None,
model: str | None = None,
output_schema: JsonObject | None = None,
personality: Personality | None = None,
sandbox_policy: SandboxPolicy | None = None,
service_tier: ServiceTier | None = None,
summary: ReasoningSummary | None = None,
) -> AsyncTurn:
await self._codex._ensure_initialized()
wire_input = _to_wire_input(input)
params = TurnStartParams(
thread_id=self.id,
input=wire_input,
approval_policy=approval_policy,
cwd=cwd,
effort=effort,
model=model,
output_schema=output_schema,
personality=personality,
sandbox_policy=sandbox_policy,
service_tier=service_tier,
summary=summary,
)
turn = await self._codex._client.turn_start(
self.id,
wire_input,
params=params,
)
return AsyncTurn(self._codex, self.id, turn.turn.id)
# END GENERATED: AsyncThread.flat_methods
async def read(self, *, include_turns: bool = False) -> ThreadReadResponse:
await self._codex._ensure_initialized()
return await self._codex._client.thread_read(self.id, include_turns=include_turns)
async def set_name(self, name: str) -> ThreadSetNameResponse:
await self._codex._ensure_initialized()
return await self._codex._client.thread_set_name(self.id, name)
async def compact(self) -> ThreadCompactStartResponse:
await self._codex._ensure_initialized()
return await self._codex._client.thread_compact(self.id)
@dataclass(slots=True)
class Turn:
_client: AppServerClient
thread_id: str
id: str
def steer(self, input: Input) -> TurnSteerResponse:
return self._client.turn_steer(self.thread_id, self.id, _to_wire_input(input))
def interrupt(self) -> TurnInterruptResponse:
return self._client.turn_interrupt(self.thread_id, self.id)
def stream(self) -> Iterator[Notification]:
# TODO: replace this client-wide experimental guard with per-turn event demux.
self._client.acquire_turn_consumer(self.id)
try:
while True:
event = self._client.next_notification()
yield event
if (
event.method == "turn/completed"
and isinstance(event.payload, TurnCompletedNotificationPayload)
and event.payload.turn.id == self.id
):
break
finally:
self._client.release_turn_consumer(self.id)
def run(self) -> TurnResult:
completed: TurnCompletedNotificationPayload | None = None
usage: ThreadTokenUsageUpdatedNotification | None = None
delta_chunks: list[str] = []
raw_text_chunks: list[str] = []
stream = self.stream()
try:
for event in stream:
payload = event.payload
if (
isinstance(payload, AgentMessageDeltaNotification)
and payload.turn_id == self.id
):
delta_chunks.append(payload.delta)
continue
if (
isinstance(payload, RawResponseItemCompletedNotification)
and payload.turn_id == self.id
):
raw_text_chunks.extend(_assistant_output_text_chunks(payload))
continue
if (
isinstance(payload, ThreadTokenUsageUpdatedNotification)
and payload.turn_id == self.id
):
usage = payload
continue
if (
isinstance(payload, TurnCompletedNotificationPayload)
and payload.turn.id == self.id
):
completed = payload
finally:
stream.close()
return _build_turn_result(completed, usage, delta_chunks, raw_text_chunks)
@dataclass(slots=True)
class AsyncTurn:
_codex: AsyncCodex
thread_id: str
id: str
async def steer(self, input: Input) -> TurnSteerResponse:
await self._codex._ensure_initialized()
return await self._codex._client.turn_steer(
self.thread_id,
self.id,
_to_wire_input(input),
)
async def interrupt(self) -> TurnInterruptResponse:
await self._codex._ensure_initialized()
return await self._codex._client.turn_interrupt(self.thread_id, self.id)
async def stream(self) -> AsyncIterator[Notification]:
await self._codex._ensure_initialized()
# TODO: replace this client-wide experimental guard with per-turn event demux.
self._codex._client.acquire_turn_consumer(self.id)
try:
while True:
event = await self._codex._client.next_notification()
yield event
if (
event.method == "turn/completed"
and isinstance(event.payload, TurnCompletedNotificationPayload)
and event.payload.turn.id == self.id
):
break
finally:
self._codex._client.release_turn_consumer(self.id)
async def run(self) -> TurnResult:
completed: TurnCompletedNotificationPayload | None = None
usage: ThreadTokenUsageUpdatedNotification | None = None
delta_chunks: list[str] = []
raw_text_chunks: list[str] = []
stream = self.stream()
try:
async for event in stream:
payload = event.payload
if (
isinstance(payload, AgentMessageDeltaNotification)
and payload.turn_id == self.id
):
delta_chunks.append(payload.delta)
continue
if (
isinstance(payload, RawResponseItemCompletedNotification)
and payload.turn_id == self.id
):
raw_text_chunks.extend(_assistant_output_text_chunks(payload))
continue
if (
isinstance(payload, ThreadTokenUsageUpdatedNotification)
and payload.turn_id == self.id
):
usage = payload
continue
if (
isinstance(payload, TurnCompletedNotificationPayload)
and payload.turn.id == self.id
):
completed = payload
finally:
await stream.aclose()
return _build_turn_result(completed, usage, delta_chunks, raw_text_chunks)

View File

@@ -0,0 +1,41 @@
"""Shallow public aliases over the generated v2 wire models."""
from .generated.v2_all import (
AskForApproval,
Personality,
PlanType,
ReasoningEffort,
ReasoningSummary,
SandboxMode,
SandboxPolicy,
ServiceTier,
ThreadForkParams,
ThreadListParams,
ThreadResumeParams,
ThreadSortKey,
ThreadSourceKind,
ThreadStartParams,
TurnStartParams,
TurnStatus,
TurnSteerParams,
)
__all__ = [
"AskForApproval",
"Personality",
"PlanType",
"ReasoningEffort",
"ReasoningSummary",
"SandboxMode",
"SandboxPolicy",
"ServiceTier",
"ThreadForkParams",
"ThreadListParams",
"ThreadResumeParams",
"ThreadSortKey",
"ThreadSourceKind",
"ThreadStartParams",
"TurnStartParams",
"TurnStatus",
"TurnSteerParams",
]

View File

@@ -0,0 +1,64 @@
from __future__ import annotations
import asyncio
import time
from codex_app_server.async_client import AsyncAppServerClient
def test_async_client_serializes_transport_calls() -> None:
async def scenario() -> int:
client = AsyncAppServerClient()
active = 0
max_active = 0
def fake_model_list(include_hidden: bool = False) -> bool:
nonlocal active, max_active
active += 1
max_active = max(max_active, active)
time.sleep(0.05)
active -= 1
return include_hidden
client._sync.model_list = fake_model_list # type: ignore[method-assign]
await asyncio.gather(client.model_list(), client.model_list())
return max_active
assert asyncio.run(scenario()) == 1
def test_async_stream_text_is_incremental_and_blocks_parallel_calls() -> None:
async def scenario() -> tuple[str, list[str], bool]:
client = AsyncAppServerClient()
def fake_stream_text(thread_id: str, text: str, params=None): # type: ignore[no-untyped-def]
yield "first"
time.sleep(0.03)
yield "second"
yield "third"
def fake_model_list(include_hidden: bool = False) -> str:
return "done"
client._sync.stream_text = fake_stream_text # type: ignore[method-assign]
client._sync.model_list = fake_model_list # type: ignore[method-assign]
stream = client.stream_text("thread-1", "hello")
first = await anext(stream)
blocked_before_stream_done = False
competing_call = asyncio.create_task(client.model_list())
await asyncio.sleep(0.01)
blocked_before_stream_done = not competing_call.done()
remaining: list[str] = []
async for item in stream:
remaining.append(item)
await competing_call
return first, remaining, blocked_before_stream_done
first, remaining, blocked = asyncio.run(scenario())
assert first == "first"
assert remaining == ["second", "third"]
assert blocked

View File

@@ -0,0 +1,286 @@
from __future__ import annotations
import asyncio
from collections import deque
from pathlib import Path
import pytest
import codex_app_server.public_api as public_api_module
from codex_app_server.client import AppServerClient
from codex_app_server.generated.v2_all import (
AgentMessageDeltaNotification,
RawResponseItemCompletedNotification,
ThreadTokenUsageUpdatedNotification,
)
from codex_app_server.models import InitializeResponse, Notification
from codex_app_server.public_api import AsyncCodex, AsyncTurn, Codex, Turn
from codex_app_server.public_types import TurnStatus
ROOT = Path(__file__).resolve().parents[1]
def _delta_notification(
*,
thread_id: str = "thread-1",
turn_id: str = "turn-1",
text: str = "delta-text",
) -> Notification:
return Notification(
method="item/agentMessage/delta",
payload=AgentMessageDeltaNotification.model_validate(
{
"delta": text,
"itemId": "item-1",
"threadId": thread_id,
"turnId": turn_id,
}
),
)
def _raw_response_notification(
*,
thread_id: str = "thread-1",
turn_id: str = "turn-1",
text: str = "raw-text",
) -> Notification:
return Notification(
method="rawResponseItem/completed",
payload=RawResponseItemCompletedNotification.model_validate(
{
"item": {
"type": "message",
"role": "assistant",
"content": [{"type": "output_text", "text": text}],
},
"threadId": thread_id,
"turnId": turn_id,
}
),
)
def _usage_notification(
*,
thread_id: str = "thread-1",
turn_id: str = "turn-1",
) -> Notification:
return Notification(
method="thread/tokenUsage/updated",
payload=ThreadTokenUsageUpdatedNotification.model_validate(
{
"threadId": thread_id,
"turnId": turn_id,
"tokenUsage": {
"last": {
"cachedInputTokens": 0,
"inputTokens": 1,
"outputTokens": 2,
"reasoningOutputTokens": 0,
"totalTokens": 3,
},
"total": {
"cachedInputTokens": 0,
"inputTokens": 1,
"outputTokens": 2,
"reasoningOutputTokens": 0,
"totalTokens": 3,
},
},
}
),
)
def _completed_notification(
*,
thread_id: str = "thread-1",
turn_id: str = "turn-1",
status: str = "completed",
) -> Notification:
return Notification(
method="turn/completed",
payload=public_api_module.TurnCompletedNotificationPayload.model_validate(
{
"threadId": thread_id,
"turn": {
"id": turn_id,
"items": [],
"status": status,
},
}
),
)
def test_codex_init_failure_closes_client(monkeypatch: pytest.MonkeyPatch) -> None:
closed: list[bool] = []
class FakeClient:
def __init__(self, config=None) -> None: # noqa: ANN001,ARG002
self._closed = False
def start(self) -> None:
return None
def initialize(self) -> InitializeResponse:
return InitializeResponse.model_validate({})
def close(self) -> None:
self._closed = True
closed.append(True)
monkeypatch.setattr(public_api_module, "AppServerClient", FakeClient)
with pytest.raises(RuntimeError, match="missing required metadata"):
Codex()
assert closed == [True]
def test_async_codex_init_failure_closes_client() -> None:
async def scenario() -> None:
codex = AsyncCodex()
close_calls = 0
async def fake_start() -> None:
return None
async def fake_initialize() -> InitializeResponse:
return InitializeResponse.model_validate({})
async def fake_close() -> None:
nonlocal close_calls
close_calls += 1
codex._client.start = fake_start # type: ignore[method-assign]
codex._client.initialize = fake_initialize # type: ignore[method-assign]
codex._client.close = fake_close # type: ignore[method-assign]
with pytest.raises(RuntimeError, match="missing required metadata"):
await codex.models()
assert close_calls == 1
assert codex._initialized is False
assert codex._init is None
asyncio.run(scenario())
def test_async_codex_initializes_only_once_under_concurrency() -> None:
async def scenario() -> None:
codex = AsyncCodex()
start_calls = 0
initialize_calls = 0
ready = asyncio.Event()
async def fake_start() -> None:
nonlocal start_calls
start_calls += 1
async def fake_initialize() -> InitializeResponse:
nonlocal initialize_calls
initialize_calls += 1
ready.set()
await asyncio.sleep(0.02)
return InitializeResponse.model_validate(
{
"userAgent": "codex-cli/1.2.3",
"serverInfo": {"name": "codex-cli", "version": "1.2.3"},
}
)
async def fake_model_list(include_hidden: bool = False): # noqa: ANN202,ARG001
await ready.wait()
return object()
codex._client.start = fake_start # type: ignore[method-assign]
codex._client.initialize = fake_initialize # type: ignore[method-assign]
codex._client.model_list = fake_model_list # type: ignore[method-assign]
await asyncio.gather(codex.models(), codex.models())
assert start_calls == 1
assert initialize_calls == 1
asyncio.run(scenario())
def test_turn_stream_rejects_second_active_consumer() -> None:
client = AppServerClient()
notifications: deque[Notification] = deque(
[
_delta_notification(turn_id="turn-1"),
_completed_notification(turn_id="turn-1"),
]
)
client.next_notification = notifications.popleft # type: ignore[method-assign]
first_stream = Turn(client, "thread-1", "turn-1").stream()
assert next(first_stream).method == "item/agentMessage/delta"
second_stream = Turn(client, "thread-1", "turn-2").stream()
with pytest.raises(RuntimeError, match="Concurrent turn consumers are not yet supported"):
next(second_stream)
first_stream.close()
def test_async_turn_stream_rejects_second_active_consumer() -> None:
async def scenario() -> None:
codex = AsyncCodex()
async def fake_ensure_initialized() -> None:
return None
notifications: deque[Notification] = deque(
[
_delta_notification(turn_id="turn-1"),
_completed_notification(turn_id="turn-1"),
]
)
async def fake_next_notification() -> Notification:
return notifications.popleft()
codex._ensure_initialized = fake_ensure_initialized # type: ignore[method-assign]
codex._client.next_notification = fake_next_notification # type: ignore[method-assign]
first_stream = AsyncTurn(codex, "thread-1", "turn-1").stream()
assert (await anext(first_stream)).method == "item/agentMessage/delta"
second_stream = AsyncTurn(codex, "thread-1", "turn-2").stream()
with pytest.raises(RuntimeError, match="Concurrent turn consumers are not yet supported"):
await anext(second_stream)
await first_stream.aclose()
asyncio.run(scenario())
def test_turn_run_falls_back_to_completed_raw_response_text() -> None:
client = AppServerClient()
notifications: deque[Notification] = deque(
[
_raw_response_notification(text="hello from raw response"),
_usage_notification(),
_completed_notification(),
]
)
client.next_notification = notifications.popleft # type: ignore[method-assign]
result = Turn(client, "thread-1", "turn-1").run()
assert result.status == TurnStatus.completed
assert result.text == "hello from raw response"
def test_retry_examples_compare_status_with_enum() -> None:
for path in (
ROOT / "examples" / "10_error_handling_and_retry" / "sync.py",
ROOT / "examples" / "10_error_handling_and_retry" / "async.py",
):
source = path.read_text()
assert '== "failed"' not in source
assert "TurnStatus.failed" in source

View File

@@ -0,0 +1,211 @@
from __future__ import annotations
import importlib.resources as resources
import inspect
from typing import Any
from codex_app_server import AppServerConfig
from codex_app_server.models import InitializeResponse
from codex_app_server.public_api import AsyncCodex, AsyncThread, Codex, Thread
def _keyword_only_names(fn: object) -> list[str]:
signature = inspect.signature(fn)
return [
param.name
for param in signature.parameters.values()
if param.kind == inspect.Parameter.KEYWORD_ONLY
]
def _assert_no_any_annotations(fn: object) -> None:
signature = inspect.signature(fn)
for param in signature.parameters.values():
if param.annotation is Any:
raise AssertionError(f"{fn} has public parameter typed as Any: {param.name}")
if signature.return_annotation is Any:
raise AssertionError(f"{fn} has public return annotation typed as Any")
def test_root_exports_app_server_config() -> None:
assert AppServerConfig.__name__ == "AppServerConfig"
def test_package_includes_py_typed_marker() -> None:
marker = resources.files("codex_app_server").joinpath("py.typed")
assert marker.is_file()
def test_generated_public_signatures_are_snake_case_and_typed() -> None:
expected = {
Codex.thread_start: [
"approval_policy",
"base_instructions",
"config",
"cwd",
"developer_instructions",
"ephemeral",
"model",
"model_provider",
"personality",
"sandbox",
"service_name",
"service_tier",
],
Codex.thread_list: [
"archived",
"cursor",
"cwd",
"limit",
"model_providers",
"search_term",
"sort_key",
"source_kinds",
],
Codex.thread_resume: [
"approval_policy",
"base_instructions",
"config",
"cwd",
"developer_instructions",
"model",
"model_provider",
"personality",
"sandbox",
"service_tier",
],
Codex.thread_fork: [
"approval_policy",
"base_instructions",
"config",
"cwd",
"developer_instructions",
"ephemeral",
"model",
"model_provider",
"sandbox",
"service_tier",
],
Thread.turn: [
"approval_policy",
"cwd",
"effort",
"model",
"output_schema",
"personality",
"sandbox_policy",
"service_tier",
"summary",
],
AsyncCodex.thread_start: [
"approval_policy",
"base_instructions",
"config",
"cwd",
"developer_instructions",
"ephemeral",
"model",
"model_provider",
"personality",
"sandbox",
"service_name",
"service_tier",
],
AsyncCodex.thread_list: [
"archived",
"cursor",
"cwd",
"limit",
"model_providers",
"search_term",
"sort_key",
"source_kinds",
],
AsyncCodex.thread_resume: [
"approval_policy",
"base_instructions",
"config",
"cwd",
"developer_instructions",
"model",
"model_provider",
"personality",
"sandbox",
"service_tier",
],
AsyncCodex.thread_fork: [
"approval_policy",
"base_instructions",
"config",
"cwd",
"developer_instructions",
"ephemeral",
"model",
"model_provider",
"sandbox",
"service_tier",
],
AsyncThread.turn: [
"approval_policy",
"cwd",
"effort",
"model",
"output_schema",
"personality",
"sandbox_policy",
"service_tier",
"summary",
],
}
for fn, expected_kwargs in expected.items():
actual = _keyword_only_names(fn)
assert actual == expected_kwargs, f"unexpected kwargs for {fn}: {actual}"
assert all(name == name.lower() for name in actual), f"non snake_case kwargs in {fn}: {actual}"
_assert_no_any_annotations(fn)
def test_lifecycle_methods_are_codex_scoped() -> None:
assert hasattr(Codex, "thread_resume")
assert hasattr(Codex, "thread_fork")
assert hasattr(Codex, "thread_archive")
assert hasattr(Codex, "thread_unarchive")
assert hasattr(AsyncCodex, "thread_resume")
assert hasattr(AsyncCodex, "thread_fork")
assert hasattr(AsyncCodex, "thread_archive")
assert hasattr(AsyncCodex, "thread_unarchive")
assert not hasattr(Codex, "thread")
assert not hasattr(AsyncCodex, "thread")
assert not hasattr(Thread, "resume")
assert not hasattr(Thread, "fork")
assert not hasattr(Thread, "archive")
assert not hasattr(Thread, "unarchive")
assert not hasattr(AsyncThread, "resume")
assert not hasattr(AsyncThread, "fork")
assert not hasattr(AsyncThread, "archive")
assert not hasattr(AsyncThread, "unarchive")
for fn in (
Codex.thread_archive,
Codex.thread_unarchive,
AsyncCodex.thread_archive,
AsyncCodex.thread_unarchive,
):
_assert_no_any_annotations(fn)
def test_initialize_metadata_parses_user_agent_shape() -> None:
parsed = Codex._parse_initialize(InitializeResponse.model_validate({"userAgent": "codex-cli/1.2.3"}))
assert parsed.user_agent == "codex-cli/1.2.3"
assert parsed.server_name == "codex-cli"
assert parsed.server_version == "1.2.3"
def test_initialize_metadata_requires_non_empty_information() -> None:
try:
Codex._parse_initialize(InitializeResponse.model_validate({}))
except RuntimeError as exc:
assert "missing required metadata" in str(exc)
else:
raise AssertionError("expected RuntimeError when initialize metadata is missing")

View File

@@ -0,0 +1,417 @@
from __future__ import annotations
import json
import os
import subprocess
import sys
import tempfile
import textwrap
from dataclasses import dataclass
from pathlib import Path
import pytest
ROOT = Path(__file__).resolve().parents[1]
EXAMPLES_DIR = ROOT / "examples"
NOTEBOOK_PATH = ROOT / "notebooks" / "sdk_walkthrough.ipynb"
root_str = str(ROOT)
if root_str not in sys.path:
sys.path.insert(0, root_str)
from _runtime_setup import ensure_runtime_package_installed, required_runtime_version
RUN_REAL_CODEX_TESTS = os.environ.get("RUN_REAL_CODEX_TESTS") == "1"
pytestmark = pytest.mark.skipif(
not RUN_REAL_CODEX_TESTS,
reason="set RUN_REAL_CODEX_TESTS=1 to run real Codex integration coverage",
)
# 11_cli_mini_app is interactive; we still run it by feeding '/exit'.
EXAMPLE_CASES: list[tuple[str, str]] = [
("01_quickstart_constructor", "sync.py"),
("01_quickstart_constructor", "async.py"),
("02_turn_run", "sync.py"),
("02_turn_run", "async.py"),
("03_turn_stream_events", "sync.py"),
("03_turn_stream_events", "async.py"),
("04_models_and_metadata", "sync.py"),
("04_models_and_metadata", "async.py"),
("05_existing_thread", "sync.py"),
("05_existing_thread", "async.py"),
("06_thread_lifecycle_and_controls", "sync.py"),
("06_thread_lifecycle_and_controls", "async.py"),
("07_image_and_text", "sync.py"),
("07_image_and_text", "async.py"),
("08_local_image_and_text", "sync.py"),
("08_local_image_and_text", "async.py"),
("09_async_parity", "sync.py"),
# 09_async_parity async path is represented by 01 async + dedicated async-based cases above.
("10_error_handling_and_retry", "sync.py"),
("10_error_handling_and_retry", "async.py"),
("11_cli_mini_app", "sync.py"),
("11_cli_mini_app", "async.py"),
("12_turn_params_kitchen_sink", "sync.py"),
("12_turn_params_kitchen_sink", "async.py"),
("13_model_select_and_turn_params", "sync.py"),
("13_model_select_and_turn_params", "async.py"),
]
@dataclass(frozen=True)
class PreparedRuntimeEnv:
python: str
env: dict[str, str]
runtime_version: str
@pytest.fixture(scope="session")
def runtime_env(tmp_path_factory: pytest.TempPathFactory) -> PreparedRuntimeEnv:
runtime_version = required_runtime_version()
temp_root = tmp_path_factory.mktemp("python-runtime-env")
isolated_site = temp_root / "site-packages"
python = sys.executable
_run_command(
[
python,
"-m",
"pip",
"install",
"--target",
str(isolated_site),
"pydantic>=2.12",
],
cwd=ROOT,
env=os.environ.copy(),
timeout_s=240,
)
ensure_runtime_package_installed(
python,
ROOT,
runtime_version,
install_target=isolated_site,
)
env = os.environ.copy()
env["PYTHONPATH"] = os.pathsep.join([str(isolated_site), str(ROOT / "src")])
env["CODEX_PYTHON_RUNTIME_VERSION"] = runtime_version
env["CODEX_PYTHON_SDK_DIR"] = str(ROOT)
return PreparedRuntimeEnv(python=python, env=env, runtime_version=runtime_version)
def _run_command(
args: list[str],
*,
cwd: Path,
env: dict[str, str],
timeout_s: int,
stdin: str | None = None,
) -> subprocess.CompletedProcess[str]:
return subprocess.run(
args,
cwd=str(cwd),
env=env,
input=stdin,
text=True,
capture_output=True,
timeout=timeout_s,
check=False,
)
def _run_python(
runtime_env: PreparedRuntimeEnv,
source: str,
*,
cwd: Path | None = None,
timeout_s: int = 180,
) -> subprocess.CompletedProcess[str]:
return _run_command(
[str(runtime_env.python), "-c", source],
cwd=cwd or ROOT,
env=runtime_env.env,
timeout_s=timeout_s,
)
def _run_json_python(
runtime_env: PreparedRuntimeEnv,
source: str,
*,
cwd: Path | None = None,
timeout_s: int = 180,
) -> dict[str, object]:
result = _run_python(runtime_env, source, cwd=cwd, timeout_s=timeout_s)
assert result.returncode == 0, (
f"Python snippet failed.\nSTDOUT:\n{result.stdout}\nSTDERR:\n{result.stderr}"
)
return json.loads(result.stdout)
def _run_example(
runtime_env: PreparedRuntimeEnv,
folder: str,
script: str,
*,
timeout_s: int = 180,
) -> subprocess.CompletedProcess[str]:
path = EXAMPLES_DIR / folder / script
assert path.exists(), f"Missing example script: {path}"
stdin = "/exit\n" if folder == "11_cli_mini_app" else None
return _run_command(
[str(runtime_env.python), str(path)],
cwd=ROOT,
env=runtime_env.env,
timeout_s=timeout_s,
stdin=stdin,
)
def _notebook_cell_source(cell_index: int) -> str:
notebook = json.loads(NOTEBOOK_PATH.read_text())
return "".join(notebook["cells"][cell_index]["source"])
def test_real_initialize_and_model_list(runtime_env: PreparedRuntimeEnv) -> None:
data = _run_json_python(
runtime_env,
textwrap.dedent(
"""
import json
from codex_app_server import Codex
with Codex() as codex:
models = codex.models(include_hidden=True)
print(json.dumps({
"user_agent": codex.metadata.user_agent,
"server_name": codex.metadata.server_name,
"server_version": codex.metadata.server_version,
"model_count": len(models.data),
}))
"""
),
)
assert isinstance(data["user_agent"], str) and data["user_agent"].strip()
assert isinstance(data["server_name"], str) and data["server_name"].strip()
assert isinstance(data["server_version"], str) and data["server_version"].strip()
assert isinstance(data["model_count"], int)
def test_real_thread_and_turn_start_smoke(runtime_env: PreparedRuntimeEnv) -> None:
data = _run_json_python(
runtime_env,
textwrap.dedent(
"""
import json
from codex_app_server import Codex, TextInput
with Codex() as codex:
thread = codex.thread_start(
model="gpt-5.4",
config={"model_reasoning_effort": "high"},
)
result = thread.turn(TextInput("hello")).run()
print(json.dumps({
"thread_id": result.thread_id,
"turn_id": result.turn_id,
"items_count": len(result.items),
"has_usage": result.usage is not None,
"usage_thread_id": None if result.usage is None else result.usage.thread_id,
"usage_turn_id": None if result.usage is None else result.usage.turn_id,
}))
"""
),
)
assert isinstance(data["thread_id"], str) and data["thread_id"].strip()
assert isinstance(data["turn_id"], str) and data["turn_id"].strip()
assert isinstance(data["items_count"], int)
assert data["has_usage"] is True
assert data["usage_thread_id"] == data["thread_id"]
assert data["usage_turn_id"] == data["turn_id"]
def test_real_async_thread_turn_usage_and_ids_smoke(
runtime_env: PreparedRuntimeEnv,
) -> None:
data = _run_json_python(
runtime_env,
textwrap.dedent(
"""
import asyncio
import json
from codex_app_server import AsyncCodex, TextInput
async def main():
async with AsyncCodex() as codex:
thread = await codex.thread_start(
model="gpt-5.4",
config={"model_reasoning_effort": "high"},
)
result = await (await thread.turn(TextInput("say ok"))).run()
print(json.dumps({
"thread_id": result.thread_id,
"turn_id": result.turn_id,
"items_count": len(result.items),
"has_usage": result.usage is not None,
"usage_thread_id": None if result.usage is None else result.usage.thread_id,
"usage_turn_id": None if result.usage is None else result.usage.turn_id,
}))
asyncio.run(main())
"""
),
)
assert isinstance(data["thread_id"], str) and data["thread_id"].strip()
assert isinstance(data["turn_id"], str) and data["turn_id"].strip()
assert isinstance(data["items_count"], int)
assert data["has_usage"] is True
assert data["usage_thread_id"] == data["thread_id"]
assert data["usage_turn_id"] == data["turn_id"]
def test_notebook_bootstrap_resolves_sdk_and_runtime_from_unrelated_cwd(
runtime_env: PreparedRuntimeEnv,
) -> None:
cell_1_source = _notebook_cell_source(1)
env = runtime_env.env.copy()
with tempfile.TemporaryDirectory() as temp_cwd:
result = _run_command(
[str(runtime_env.python), "-c", cell_1_source],
cwd=Path(temp_cwd),
env=env,
timeout_s=180,
)
assert result.returncode == 0, (
f"Notebook bootstrap failed from unrelated cwd.\n"
f"STDOUT:\n{result.stdout}\n"
f"STDERR:\n{result.stderr}"
)
assert "SDK source:" in result.stdout
assert f"Runtime package: {runtime_env.runtime_version}" in result.stdout
def test_notebook_sync_cell_smoke(runtime_env: PreparedRuntimeEnv) -> None:
source = "\n\n".join(
[
_notebook_cell_source(1),
_notebook_cell_source(2),
_notebook_cell_source(3),
]
)
result = _run_python(runtime_env, source, timeout_s=240)
assert result.returncode == 0, (
f"Notebook sync smoke failed.\nSTDOUT:\n{result.stdout}\nSTDERR:\n{result.stderr}"
)
assert "status:" in result.stdout
assert "server:" in result.stdout
def test_real_streaming_smoke_turn_completed(runtime_env: PreparedRuntimeEnv) -> None:
data = _run_json_python(
runtime_env,
textwrap.dedent(
"""
import json
from codex_app_server import Codex, TextInput
with Codex() as codex:
thread = codex.thread_start(
model="gpt-5.4",
config={"model_reasoning_effort": "high"},
)
turn = thread.turn(TextInput("Reply with one short sentence."))
saw_delta = False
saw_completed = False
for event in turn.stream():
if event.method == "item/agentMessage/delta":
saw_delta = True
if event.method == "turn/completed":
saw_completed = True
print(json.dumps({
"saw_delta": saw_delta,
"saw_completed": saw_completed,
}))
"""
),
)
assert data["saw_completed"] is True
assert isinstance(data["saw_delta"], bool)
def test_real_turn_interrupt_smoke(runtime_env: PreparedRuntimeEnv) -> None:
data = _run_json_python(
runtime_env,
textwrap.dedent(
"""
import json
from codex_app_server import Codex, TextInput
with Codex() as codex:
thread = codex.thread_start(
model="gpt-5.4",
config={"model_reasoning_effort": "high"},
)
turn = thread.turn(TextInput("Count from 1 to 200 with commas."))
turn.interrupt()
follow_up = thread.turn(TextInput("Say 'ok' only.")).run()
print(json.dumps({"status": follow_up.status.value}))
"""
),
)
assert data["status"] in {"completed", "failed"}
@pytest.mark.parametrize(("folder", "script"), EXAMPLE_CASES)
def test_real_examples_run_and_assert(
runtime_env: PreparedRuntimeEnv,
folder: str,
script: str,
) -> None:
result = _run_example(runtime_env, folder, script)
assert result.returncode == 0, (
f"Example failed: {folder}/{script}\n"
f"STDOUT:\n{result.stdout}\n"
f"STDERR:\n{result.stderr}"
)
out = result.stdout
if folder == "01_quickstart_constructor":
assert "Status:" in out and "Text:" in out
assert "Server: None None" not in out
elif folder == "02_turn_run":
assert "thread_id:" in out and "turn_id:" in out and "status:" in out
assert "usage: None" not in out
elif folder == "03_turn_stream_events":
assert "turn/completed" in out
elif folder == "04_models_and_metadata":
assert "models.count:" in out
assert "server_name=None" not in out
assert "server_version=None" not in out
elif folder == "05_existing_thread":
assert "Created thread:" in out
elif folder == "06_thread_lifecycle_and_controls":
assert "Lifecycle OK:" in out
elif folder in {"07_image_and_text", "08_local_image_and_text"}:
assert "completed" in out.lower() or "Status:" in out
elif folder == "09_async_parity":
assert "Thread:" in out and "Turn:" in out
elif folder == "10_error_handling_and_retry":
assert "Text:" in out
elif folder == "11_cli_mini_app":
assert "Thread:" in out
elif folder == "12_turn_params_kitchen_sink":
assert "Status:" in out and "Usage:" in out
elif folder == "13_model_select_and_turn_params":
assert "selected.model:" in out and "agent.message.params:" in out and "usage.params:" in out
assert "usage.params: None" not in out