Compare commits

...

1 Commits

Author SHA1 Message Date
pakrym-oai
86262e8a51 Add code_mode experimental feature 2026-03-03 20:07:10 -07:00
14 changed files with 1018 additions and 0 deletions

View File

@@ -355,6 +355,16 @@ impl Codex {
config.features.disable(Feature::JsReplToolsOnly);
config.startup_warnings.push(message);
}
if config.features.enabled(Feature::CodeMode)
&& let Err(err) = resolve_compatible_node(config.js_repl_node_path.as_deref()).await
{
let message = format!(
"Disabled `code_mode` for this session because the configured Node runtime is unavailable or incompatible. {err}"
);
warn!("{message}");
config.features.disable(Feature::CodeMode);
config.startup_warnings.push(message);
}
let allowed_skills_for_implicit_invocation =
loaded_skills.allowed_skills_for_implicit_invocation();

View File

@@ -80,6 +80,8 @@ pub enum Feature {
// Experimental
/// Enable JavaScript REPL tools backed by a persistent Node kernel.
JsRepl,
/// Enable a minimal JavaScript mode backed by Node's built-in vm runtime.
CodeMode,
/// Only expose js_repl tools directly to the model.
JsReplToolsOnly,
/// Use the single unified PTY-backed exec tool.
@@ -468,6 +470,12 @@ pub const FEATURES: &[FeatureSpec] = &[
},
default_enabled: false,
},
FeatureSpec {
id: Feature::CodeMode,
key: "code_mode",
stage: Stage::UnderDevelopment,
default_enabled: false,
},
FeatureSpec {
id: Feature::JsReplToolsOnly,
key: "js_repl_tools_only",

View File

@@ -23,6 +23,7 @@ use crate::config_loader::project_root_markers_from_config;
use crate::features::Feature;
use crate::skills::SkillMetadata;
use crate::skills::render_skills_section;
use crate::tools::code_mode;
use codex_app_server_protocol::ConfigLayerSource;
use dunce::canonicalize as normalize_path;
use std::path::PathBuf;
@@ -103,6 +104,13 @@ pub(crate) async fn get_user_instructions(
output.push_str(&js_repl_section);
}
if let Some(code_mode_section) = code_mode::instructions(config) {
if !output.is_empty() {
output.push_str("\n\n");
}
output.push_str(&code_mode_section);
}
let skills_section = skills.and_then(render_skills_section);
if let Some(skills_section) = skills_section {
if !output.is_empty() {

View File

@@ -0,0 +1,444 @@
use std::process::ExitStatus;
use std::sync::Arc;
use crate::client_common::tools::ToolSpec;
use crate::codex::Session;
use crate::codex::TurnContext;
use crate::config::Config;
use crate::exec_env::create_env;
use crate::features::Feature;
use crate::function_tool::FunctionCallError;
use crate::tools::ToolRouter;
use crate::tools::context::SharedTurnDiffTracker;
use crate::tools::context::ToolPayload;
use crate::tools::js_repl::resolve_compatible_node;
use crate::tools::router::ToolCall;
use crate::tools::router::ToolCallSource;
use codex_protocol::models::ContentItem;
use codex_protocol::models::FunctionCallOutputBody;
use codex_protocol::models::FunctionCallOutputContentItem;
use codex_protocol::models::FunctionCallOutputPayload;
use codex_protocol::models::ResponseInputItem;
use serde::Deserialize;
use serde::Serialize;
use serde_json::Value as JsonValue;
use serde_json::json;
use tokio::io::AsyncBufReadExt;
use tokio::io::AsyncReadExt;
use tokio::io::AsyncWriteExt;
use tokio::io::BufReader;
const CODE_MODE_RUNNER_SOURCE: &str = include_str!("code_mode_runner.cjs");
const CODE_MODE_BRIDGE_SOURCE: &str = include_str!("code_mode_bridge.js");
#[derive(Clone)]
struct ExecContext {
session: Arc<Session>,
turn: Arc<TurnContext>,
tracker: SharedTurnDiffTracker,
}
#[derive(Clone, Copy, Debug, Deserialize, Eq, PartialEq, Serialize)]
#[serde(rename_all = "snake_case")]
enum CodeModeToolKind {
Function,
Freeform,
}
#[derive(Clone, Debug, Serialize)]
struct EnabledTool {
name: String,
kind: CodeModeToolKind,
}
#[derive(Serialize)]
#[serde(tag = "type", rename_all = "snake_case")]
enum HostToNodeMessage {
Init {
source: String,
timeout_ms: Option<u64>,
},
Response {
id: String,
content_items: Vec<JsonValue>,
},
}
#[derive(Deserialize)]
#[serde(tag = "type", rename_all = "snake_case")]
enum NodeToHostMessage {
ToolCall {
id: String,
name: String,
input: String,
},
Result {
content_items: Vec<JsonValue>,
},
}
pub(crate) fn instructions(config: &Config) -> Option<String> {
if !config.features.enabled(Feature::CodeMode) {
return None;
}
let mut section = String::from("## Code Mode\n");
section.push_str(
"- Use `code_mode` for JavaScript execution in a Node-backed `node:vm` context.\n",
);
section.push_str("- `code_mode` is a freeform/custom tool. Direct `code_mode` calls must send raw JavaScript tool input. Do not wrap code in JSON, quotes, or markdown code fences.\n");
section.push_str("- Direct tool calls remain available while `code_mode` is enabled.\n");
section.push_str("- `code_mode` uses the same Node runtime resolution as `js_repl`. If needed, point `js_repl_node_path` at the Node binary you want Codex to use.\n");
section.push_str("- Call nested tools with `await tools[name](args)` or identifier wrappers like `await exec_command(args)` when the tool name is a valid JavaScript identifier. Nested tool calls resolve to arrays of content items.\n");
section.push_str(
"- Function tools require JSON object arguments. Freeform tools require raw strings.\n",
);
section.push_str("- `add_content(value)` is synchronous. It accepts a content item or an array of content items, so `add_content(await exec_command(...))` returns the same content items a direct tool call would expose to the model.\n");
section
.push_str("- Only content passed to `add_content(value)` is surfaced back to the model.");
Some(section)
}
pub(crate) async fn execute(
session: Arc<Session>,
turn: Arc<TurnContext>,
tracker: SharedTurnDiffTracker,
code: String,
timeout_ms: Option<u64>,
) -> Result<Vec<FunctionCallOutputContentItem>, FunctionCallError> {
let exec = ExecContext {
session,
turn,
tracker,
};
let enabled_tools = build_enabled_tools(&exec);
let source = build_source(&code, &enabled_tools).map_err(FunctionCallError::RespondToModel)?;
execute_node(exec, source, timeout_ms)
.await
.map_err(FunctionCallError::RespondToModel)
}
async fn execute_node(
exec: ExecContext,
source: String,
timeout_ms: Option<u64>,
) -> Result<Vec<FunctionCallOutputContentItem>, String> {
let node_path = resolve_compatible_node(exec.turn.config.js_repl_node_path.as_deref()).await?;
let env = create_env(&exec.turn.shell_environment_policy, None);
let mut cmd = tokio::process::Command::new(&node_path);
cmd.arg("--eval");
cmd.arg(CODE_MODE_RUNNER_SOURCE);
cmd.current_dir(&exec.turn.cwd);
cmd.env_clear();
cmd.envs(env);
cmd.stdin(std::process::Stdio::piped())
.stdout(std::process::Stdio::piped())
.stderr(std::process::Stdio::piped())
.kill_on_drop(true);
let mut child = cmd
.spawn()
.map_err(|err| format!("failed to start code_mode Node runtime: {err}"))?;
let stdout = child
.stdout
.take()
.ok_or_else(|| "code_mode runner missing stdout".to_string())?;
let stderr = child
.stderr
.take()
.ok_or_else(|| "code_mode runner missing stderr".to_string())?;
let mut stdin = child
.stdin
.take()
.ok_or_else(|| "code_mode runner missing stdin".to_string())?;
let stderr_task = tokio::spawn(async move {
let mut reader = BufReader::new(stderr);
let mut buf = Vec::new();
let _ = reader.read_to_end(&mut buf).await;
String::from_utf8_lossy(&buf).trim().to_string()
});
write_message(&mut stdin, &HostToNodeMessage::Init { source, timeout_ms }).await?;
let mut stdout_lines = BufReader::new(stdout).lines();
let mut final_content_items = None;
while let Some(line) = stdout_lines
.next_line()
.await
.map_err(|err| format!("failed to read code_mode runner stdout: {err}"))?
{
if line.trim().is_empty() {
continue;
}
let message: NodeToHostMessage = serde_json::from_str(&line)
.map_err(|err| format!("invalid code_mode runner message: {err}; line={line}"))?;
match message {
NodeToHostMessage::ToolCall { id, name, input } => {
let response = HostToNodeMessage::Response {
id,
content_items: call_nested_tool(exec.clone(), name, input).await,
};
write_message(&mut stdin, &response).await?;
}
NodeToHostMessage::Result { content_items } => {
final_content_items = Some(output_content_items_from_json_values(content_items)?);
break;
}
}
}
drop(stdin);
let status = child
.wait()
.await
.map_err(|err| format!("failed to wait for code_mode runner: {err}"))?;
let stderr = stderr_task
.await
.map_err(|err| format!("failed to collect code_mode stderr: {err}"))?;
match final_content_items {
Some(content_items) if status.success() => Ok(content_items),
Some(_) => Err(format_runner_failure(
"code_mode execution failed",
status,
&stderr,
)),
None => Err(format_runner_failure(
"code_mode runner exited without returning a result",
status,
&stderr,
)),
}
}
async fn write_message(
stdin: &mut tokio::process::ChildStdin,
message: &HostToNodeMessage,
) -> Result<(), String> {
let line = serde_json::to_string(message)
.map_err(|err| format!("failed to serialize code_mode message: {err}"))?;
stdin
.write_all(line.as_bytes())
.await
.map_err(|err| format!("failed to write code_mode message: {err}"))?;
stdin
.write_all(b"\n")
.await
.map_err(|err| format!("failed to write code_mode message newline: {err}"))?;
stdin
.flush()
.await
.map_err(|err| format!("failed to flush code_mode message: {err}"))
}
fn append_stderr(message: String, stderr: &str) -> String {
if stderr.trim().is_empty() {
return message;
}
format!("{message}\n\nnode stderr:\n{stderr}")
}
fn format_runner_failure(message: &str, status: ExitStatus, stderr: &str) -> String {
append_stderr(format!("{message} (status {status})"), stderr)
}
fn build_source(user_code: &str, enabled_tools: &[EnabledTool]) -> Result<String, String> {
let enabled_tools_json = serde_json::to_string(enabled_tools)
.map_err(|err| format!("failed to serialize enabled tools: {err}"))?;
Ok(CODE_MODE_BRIDGE_SOURCE
.replace(
"__CODE_MODE_ENABLED_TOOLS_PLACEHOLDER__",
&enabled_tools_json,
)
.replace("__CODE_MODE_USER_CODE_PLACEHOLDER__", user_code))
}
fn build_enabled_tools(exec: &ExecContext) -> Vec<EnabledTool> {
let nested_tools_config = exec.turn.tools_config.for_code_mode_nested_tools();
let router = ToolRouter::from_config(
&nested_tools_config,
None,
None,
exec.turn.dynamic_tools.as_slice(),
);
let mut out = router
.specs()
.into_iter()
.map(|spec| EnabledTool {
name: spec.name().to_string(),
kind: tool_kind_for_spec(&spec),
})
.filter(|tool| tool.name != "code_mode")
.collect::<Vec<_>>();
out.sort_by(|left, right| left.name.cmp(&right.name));
out.dedup_by(|left, right| left.name == right.name);
out
}
async fn call_nested_tool(exec: ExecContext, tool_name: String, input: String) -> Vec<JsonValue> {
if tool_name == "code_mode" {
return error_content_items_json("code_mode cannot invoke itself".to_string());
}
let nested_config = exec.turn.tools_config.for_code_mode_nested_tools();
let router = ToolRouter::from_config(
&nested_config,
None,
None,
exec.turn.dynamic_tools.as_slice(),
);
let specs = router.specs();
let payload = match build_nested_tool_payload(&specs, &tool_name, input) {
Ok(payload) => payload,
Err(error) => return error_content_items_json(error),
};
let call = ToolCall {
tool_name: tool_name.clone(),
call_id: format!("code_mode-{}", uuid::Uuid::new_v4()),
payload,
};
let response = router
.dispatch_tool_call(
Arc::clone(&exec.session),
Arc::clone(&exec.turn),
Arc::clone(&exec.tracker),
call,
ToolCallSource::CodeMode,
)
.await;
match response {
Ok(response) => {
json_values_from_output_content_items(content_items_from_response_input(response))
}
Err(error) => error_content_items_json(error.to_string()),
}
}
fn tool_kind_for_spec(spec: &ToolSpec) -> CodeModeToolKind {
if matches!(spec, ToolSpec::Freeform(_)) {
CodeModeToolKind::Freeform
} else {
CodeModeToolKind::Function
}
}
fn tool_kind_for_name(specs: &[ToolSpec], tool_name: &str) -> Result<CodeModeToolKind, String> {
specs
.iter()
.find(|spec| spec.name() == tool_name)
.map(tool_kind_for_spec)
.ok_or_else(|| format!("tool `{tool_name}` is not enabled in code_mode"))
}
fn build_nested_tool_payload(
specs: &[ToolSpec],
tool_name: &str,
input: String,
) -> Result<ToolPayload, String> {
let actual_kind = tool_kind_for_name(specs, tool_name)?;
match actual_kind {
CodeModeToolKind::Function => {
validate_function_arguments(tool_name, &input)?;
Ok(ToolPayload::Function { arguments: input })
}
CodeModeToolKind::Freeform => Ok(ToolPayload::Custom { input }),
}
}
fn validate_function_arguments(tool_name: &str, input: &str) -> Result<(), String> {
let value: JsonValue = serde_json::from_str(input)
.map_err(|err| format!("tool `{tool_name}` expects a JSON object for arguments: {err}"))?;
if value.is_object() {
Ok(())
} else {
Err(format!(
"tool `{tool_name}` expects a JSON object for arguments"
))
}
}
fn content_items_from_response_input(
response: ResponseInputItem,
) -> Vec<FunctionCallOutputContentItem> {
match response {
ResponseInputItem::Message { content, .. } => content
.into_iter()
.map(function_output_content_item_from_content_item)
.collect(),
ResponseInputItem::FunctionCallOutput { output, .. } => {
content_items_from_function_output(output)
}
ResponseInputItem::CustomToolCallOutput { output, .. } => {
vec![FunctionCallOutputContentItem::InputText { text: output }]
}
ResponseInputItem::McpToolCallOutput { result, .. } => match result {
Ok(result) => {
content_items_from_function_output(FunctionCallOutputPayload::from(&result))
}
Err(error) => vec![FunctionCallOutputContentItem::InputText { text: error }],
},
}
}
fn content_items_from_function_output(
output: FunctionCallOutputPayload,
) -> Vec<FunctionCallOutputContentItem> {
match output.body {
FunctionCallOutputBody::Text(text) => {
vec![FunctionCallOutputContentItem::InputText { text }]
}
FunctionCallOutputBody::ContentItems(items) => items,
}
}
fn function_output_content_item_from_content_item(
item: ContentItem,
) -> FunctionCallOutputContentItem {
match item {
ContentItem::InputText { text } | ContentItem::OutputText { text } => {
FunctionCallOutputContentItem::InputText { text }
}
ContentItem::InputImage { image_url } => {
FunctionCallOutputContentItem::InputImage { image_url }
}
}
}
fn json_values_from_output_content_items(
content_items: Vec<FunctionCallOutputContentItem>,
) -> Vec<JsonValue> {
content_items
.into_iter()
.map(|item| match item {
FunctionCallOutputContentItem::InputText { text } => {
json!({ "type": "input_text", "text": text })
}
FunctionCallOutputContentItem::InputImage { image_url } => {
json!({ "type": "input_image", "image_url": image_url })
}
})
.collect()
}
fn output_content_items_from_json_values(
content_items: Vec<JsonValue>,
) -> Result<Vec<FunctionCallOutputContentItem>, String> {
content_items
.into_iter()
.enumerate()
.map(|(index, item)| {
serde_json::from_value(item)
.map_err(|err| format!("invalid code_mode content item at index {index}: {err}"))
})
.collect()
}
fn error_content_items_json(message: String) -> Vec<JsonValue> {
vec![json!({ "type": "input_text", "text": message })]
}

View File

@@ -0,0 +1,98 @@
(async () => {
const __codexEnabledTools = __CODE_MODE_ENABLED_TOOLS_PLACEHOLDER__;
const __codexEnabledToolNames = __codexEnabledTools.map((tool) => tool.name);
const __codexToolKinds = new Map(__codexEnabledTools.map((tool) => [tool.name, tool.kind]));
const __codexContentItems = [];
function __codexCloneContentItem(item) {
if (!item || typeof item !== 'object') {
throw new TypeError('content item must be an object');
}
switch (item.type) {
case 'input_text':
if (typeof item.text !== 'string') {
throw new TypeError('content item "input_text" requires a string text field');
}
return { type: 'input_text', text: item.text };
case 'input_image':
if (typeof item.image_url !== 'string') {
throw new TypeError('content item "input_image" requires a string image_url field');
}
return { type: 'input_image', image_url: item.image_url };
default:
throw new TypeError(`unsupported content item type "${item.type}"`);
}
}
function __codexNormalizeContentItems(value) {
if (Array.isArray(value)) {
return value.flatMap((entry) => __codexNormalizeContentItems(entry));
}
return [__codexCloneContentItem(value)];
}
async function __codexCallTool(name, args) {
const toolKind = __codexToolKinds.get(name);
if (toolKind === undefined) {
throw new Error(`Tool "${name}" is not enabled in code_mode`);
}
if (toolKind === 'freeform') {
if (typeof args !== 'string') {
throw new TypeError(`Tool "${name}" expects a string input`);
}
return await __codex_tool_call(name, args);
}
if (args === undefined) {
return await __codex_tool_call(name, '{}');
}
if (!args || typeof args !== 'object' || Array.isArray(args)) {
throw new TypeError(`Tool "${name}" expects a JSON object for arguments`);
}
return await __codex_tool_call(name, JSON.stringify(args));
}
Object.defineProperty(globalThis, '__codexContentItems', {
value: __codexContentItems,
configurable: true,
enumerable: false,
writable: false,
});
globalThis.codex = {
enabledTools: Object.freeze(__codexEnabledToolNames.slice()),
};
globalThis.add_content = (value) => {
const contentItems = __codexNormalizeContentItems(value);
__codexContentItems.push(...contentItems);
return contentItems;
};
globalThis.tools = new Proxy(Object.create(null), {
get(_target, prop) {
const name = String(prop);
return async (args) => __codexCallTool(name, args);
},
});
globalThis.console = Object.freeze({
log() {},
info() {},
warn() {},
error() {},
debug() {},
});
for (const name of __codexEnabledToolNames) {
if (/^[A-Za-z_$][0-9A-Za-z_$]*$/.test(name) && !(name in globalThis)) {
Object.defineProperty(globalThis, name, {
value: async (args) => __codexCallTool(name, args),
configurable: true,
enumerable: false,
writable: false,
});
}
}
__CODE_MODE_USER_CODE_PLACEHOLDER__
})();

View File

@@ -0,0 +1,162 @@
'use strict';
const readline = require('node:readline');
const vm = require('node:vm');
async function awaitWithDeadline(value, timeoutMs) {
if (timeoutMs === null || timeoutMs === undefined) {
return await value;
}
let timer;
try {
return await Promise.race([
Promise.resolve(value),
new Promise((_, reject) => {
timer = setTimeout(() => {
reject(new Error(`JavaScript execution timed out after ${timeoutMs}ms`));
}, timeoutMs);
timer.unref?.();
}),
]);
} finally {
if (timer) {
clearTimeout(timer);
}
}
}
function createProtocol() {
const rl = readline.createInterface({
input: process.stdin,
crlfDelay: Infinity,
});
let nextId = 0;
const pending = new Map();
let initResolve;
let initReject;
const init = new Promise((resolve, reject) => {
initResolve = resolve;
initReject = reject;
});
rl.on('line', (line) => {
if (!line.trim()) {
return;
}
let message;
try {
message = JSON.parse(line);
} catch (error) {
initReject(error);
return;
}
if (message.type === 'init') {
initResolve(message);
return;
}
if (message.type === 'response') {
const entry = pending.get(message.id);
if (!entry) {
return;
}
pending.delete(message.id);
entry.resolve(Array.isArray(message.content_items) ? message.content_items : []);
return;
}
initReject(new Error(`Unknown protocol message type: ${message.type}`));
});
rl.on('close', () => {
const error = new Error('stdin closed');
initReject(error);
for (const entry of pending.values()) {
entry.reject(error);
}
pending.clear();
});
function send(message) {
return new Promise((resolve, reject) => {
process.stdout.write(`${JSON.stringify(message)}\n`, (error) => {
if (error) {
reject(error);
} else {
resolve();
}
});
});
}
function request(type, payload) {
const id = `msg-${++nextId}`;
return new Promise((resolve, reject) => {
pending.set(id, { resolve, reject });
void send({ type, id, ...payload }).catch((error) => {
pending.delete(id);
reject(error);
});
});
}
return { init, request, send };
}
function readContentItems(context) {
try {
const serialized = vm.runInContext(
'JSON.stringify(globalThis.__codexContentItems ?? [])',
context
);
const contentItems = JSON.parse(serialized);
return Array.isArray(contentItems) ? contentItems : [];
} catch {
return [];
}
}
async function main() {
const protocol = createProtocol();
const request = await protocol.init;
const context = vm.createContext({
__codex_tool_call: async (name, input) =>
protocol.request('tool_call', {
name: String(name),
input,
}),
});
try {
const options = { displayErrors: true, microtaskMode: 'afterEvaluate' };
if (request.timeout_ms !== null && request.timeout_ms !== undefined) {
options.timeout = request.timeout_ms;
}
await awaitWithDeadline(vm.runInContext(request.source, context, options), request.timeout_ms);
await protocol.send({
type: 'result',
content_items: readContentItems(context),
});
process.exit(0);
} catch (error) {
process.stderr.write(`${String(error && error.stack ? error.stack : error)}\n`);
await protocol.send({
type: 'result',
content_items: readContentItems(context),
});
process.exit(1);
}
}
void main().catch(async (error) => {
try {
process.stderr.write(`${String(error && error.stack ? error.stack : error)}\n`);
} finally {
process.exitCode = 1;
}
});

View File

@@ -20,6 +20,7 @@ pub type SharedTurnDiffTracker = Arc<Mutex<TurnDiffTracker>>;
pub enum ToolCallSource {
Direct,
JsRepl,
CodeMode,
}
#[derive(Clone)]

View File

@@ -0,0 +1,72 @@
use async_trait::async_trait;
use crate::features::Feature;
use crate::function_tool::FunctionCallError;
use crate::tools::code_mode;
use crate::tools::context::ToolInvocation;
use crate::tools::context::ToolOutput;
use crate::tools::context::ToolPayload;
use crate::tools::handlers::parse_arguments;
use crate::tools::registry::ToolHandler;
use crate::tools::registry::ToolKind;
use codex_protocol::models::FunctionCallOutputBody;
use serde::Deserialize;
pub struct CodeModeHandler;
#[derive(Deserialize)]
#[serde(deny_unknown_fields)]
struct CodeModeArgs {
code: String,
#[serde(default, rename = "timeout_ms", alias = "_timeout_ms")]
timeout_ms: Option<u64>,
}
#[async_trait]
impl ToolHandler for CodeModeHandler {
fn kind(&self) -> ToolKind {
ToolKind::Function
}
fn matches_kind(&self, payload: &ToolPayload) -> bool {
matches!(
payload,
ToolPayload::Function { .. } | ToolPayload::Custom { .. }
)
}
async fn handle(&self, invocation: ToolInvocation) -> Result<ToolOutput, FunctionCallError> {
let ToolInvocation {
session,
turn,
tracker,
payload,
..
} = invocation;
if !session.features().enabled(Feature::CodeMode) {
return Err(FunctionCallError::RespondToModel(
"code_mode is disabled by feature flag".to_string(),
));
}
let (code, timeout_ms) = match payload {
ToolPayload::Function { arguments } => {
let args: CodeModeArgs = parse_arguments(&arguments)?;
(args.code, args.timeout_ms)
}
ToolPayload::Custom { input } => (input, None),
_ => {
return Err(FunctionCallError::RespondToModel(
"code_mode expects function or custom payload".to_string(),
));
}
};
let content_items = code_mode::execute(session, turn, tracker, code, timeout_ms).await?;
Ok(ToolOutput::Function {
body: FunctionCallOutputBody::ContentItems(content_items),
success: Some(true),
})
}
}

View File

@@ -1,5 +1,6 @@
pub(crate) mod agent_jobs;
pub mod apply_patch;
mod code_mode;
mod dynamic;
mod grep_files;
mod js_repl;
@@ -24,6 +25,7 @@ use crate::function_tool::FunctionCallError;
use crate::sandboxing::SandboxPermissions;
use crate::sandboxing::normalize_additional_permissions;
pub use apply_patch::ApplyPatchHandler;
pub use code_mode::CodeModeHandler;
use codex_protocol::models::PermissionProfile;
use codex_protocol::protocol::AskForApproval;
pub use dynamic::DynamicToolHandler;

View File

@@ -1,3 +1,4 @@
pub mod code_mode;
pub mod context;
pub mod events;
pub(crate) mod handlers;

View File

@@ -52,6 +52,7 @@ pub(crate) struct ToolsConfig {
pub agent_roles: BTreeMap<String, AgentRoleConfig>,
pub search_tool: bool,
pub request_permission_enabled: bool,
pub code_mode_enabled: bool,
pub js_repl_enabled: bool,
pub js_repl_tools_only: bool,
pub collab_tools: bool,
@@ -77,6 +78,7 @@ impl ToolsConfig {
session_source,
} = params;
let include_apply_patch_tool = features.enabled(Feature::ApplyPatchFreeform);
let include_code_mode = features.enabled(Feature::CodeMode);
let include_js_repl = features.enabled(Feature::JsRepl);
let include_js_repl_tools_only =
include_js_repl && features.enabled(Feature::JsReplToolsOnly);
@@ -136,6 +138,7 @@ impl ToolsConfig {
agent_roles: BTreeMap::new(),
search_tool: include_search_tool,
request_permission_enabled,
code_mode_enabled: include_code_mode,
js_repl_enabled: include_js_repl,
js_repl_tools_only: include_js_repl_tools_only,
collab_tools: include_collab_tools,
@@ -155,6 +158,12 @@ impl ToolsConfig {
self.allow_login_shell = allow_login_shell;
self
}
pub fn for_code_mode_nested_tools(&self) -> Self {
let mut nested = self.clone();
nested.code_mode_enabled = false;
nested
}
}
/// Generic JSONSchema subset needed for our tool definitions
@@ -1344,6 +1353,32 @@ fn create_js_repl_reset_tool() -> ToolSpec {
})
}
fn create_code_mode_tool(enabled_tool_names: &[String]) -> ToolSpec {
const CODE_MODE_FREEFORM_GRAMMAR: &str = r#"
start: source
source: /[\s\S]+/
"#;
let enabled_list = if enabled_tool_names.is_empty() {
"none".to_string()
} else {
enabled_tool_names.join(", ")
};
let description = format!(
"Runs JavaScript in a Node-backed `node:vm` context. This is a freeform tool: send raw JavaScript source text (no JSON/quotes/markdown fences). Direct tool calls remain available while `code_mode` is enabled. Inside JavaScript, call nested tools with `await tools[name](args)` or identifier wrappers like `await shell(args)` when the tool name is a valid JS identifier. Nested tool calls resolve to arrays of content items. Function tools require JSON object arguments. Freeform tools require raw strings. Use synchronous `add_content(value)` with a content item or content-item array, including `add_content(await exec_command(...))`, to return the same content items a direct tool call would expose to the model. Only content passed to `add_content(value)` is surfaced back to the model. Enabled nested tools: {enabled_list}."
);
ToolSpec::Freeform(FreeformTool {
name: "code_mode".to_string(),
description,
format: FreeformToolFormat {
r#type: "grammar".to_string(),
syntax: "lark".to_string(),
definition: CODE_MODE_FREEFORM_GRAMMAR.to_string(),
},
})
}
fn create_list_mcp_resources_tool() -> ToolSpec {
let properties = BTreeMap::from([
(
@@ -1649,6 +1684,7 @@ pub(crate) fn build_specs(
dynamic_tools: &[DynamicToolSpec],
) -> ToolRegistryBuilder {
use crate::tools::handlers::ApplyPatchHandler;
use crate::tools::handlers::CodeModeHandler;
use crate::tools::handlers::DynamicToolHandler;
use crate::tools::handlers::GrepFilesHandler;
use crate::tools::handlers::JsReplHandler;
@@ -1683,10 +1719,31 @@ pub(crate) fn build_specs(
default_mode_request_user_input: config.default_mode_request_user_input,
});
let search_tool_handler = Arc::new(SearchToolBm25Handler);
let code_mode_handler = Arc::new(CodeModeHandler);
let js_repl_handler = Arc::new(JsReplHandler);
let js_repl_reset_handler = Arc::new(JsReplResetHandler);
let request_permission_enabled = config.request_permission_enabled;
if config.code_mode_enabled {
let nested_config = config.for_code_mode_nested_tools();
let (nested_specs, _) = build_specs(
&nested_config,
mcp_tools.clone(),
app_tools.clone(),
dynamic_tools,
)
.build();
let mut enabled_tool_names = nested_specs
.into_iter()
.map(|spec| spec.spec.name().to_string())
.filter(|name| name != "code_mode")
.collect::<Vec<_>>();
enabled_tool_names.sort();
enabled_tool_names.dedup();
builder.push_spec(create_code_mode_tool(&enabled_tool_names));
builder.register_handler("code_mode", code_mode_handler);
}
match &config.shell_type {
ConfigShellToolType::Default => {
builder.push_spec_with_parallel_support(

View File

@@ -0,0 +1,118 @@
#![allow(clippy::expect_used, clippy::unwrap_used)]
use anyhow::Result;
use codex_core::features::Feature;
use core_test_support::responses;
use core_test_support::responses::ResponseMock;
use core_test_support::responses::ResponsesRequest;
use core_test_support::responses::ev_assistant_message;
use core_test_support::responses::ev_completed;
use core_test_support::responses::ev_custom_tool_call;
use core_test_support::responses::ev_response_created;
use core_test_support::responses::sse;
use core_test_support::skip_if_no_network;
use core_test_support::test_codex::TestCodex;
use core_test_support::test_codex::test_codex;
use pretty_assertions::assert_eq;
use std::fs;
use wiremock::MockServer;
fn custom_tool_output_text_and_success(
req: &ResponsesRequest,
call_id: &str,
) -> (String, Option<bool>) {
let (output, success) = req
.custom_tool_call_output_content_and_success(call_id)
.expect("custom tool output should be present");
(output.unwrap_or_default(), success)
}
async fn run_code_mode_turn(
server: &MockServer,
prompt: &str,
code: &str,
include_apply_patch: bool,
) -> Result<(TestCodex, ResponseMock)> {
let mut builder = test_codex().with_config(move |config| {
config.features.enable(Feature::CodeMode);
config.include_apply_patch_tool = include_apply_patch;
});
let test = builder.build(server).await?;
responses::mount_sse_once(
server,
sse(vec![
ev_response_created("resp-1"),
ev_custom_tool_call("call-1", "code_mode", code),
ev_completed("resp-1"),
]),
)
.await;
let second_mock = responses::mount_sse_once(
server,
sse(vec![
ev_assistant_message("msg-1", "done"),
ev_completed("resp-2"),
]),
)
.await;
test.submit_turn(prompt).await?;
Ok((test, second_mock))
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn code_mode_can_return_exec_command_output() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = responses::start_mock_server().await;
let (_test, second_mock) = run_code_mode_turn(
&server,
"use code_mode to run exec_command",
r#"
add_content(await exec_command({ cmd: "printf code_mode_exec_marker" }));
"#,
false,
)
.await?;
let req = second_mock.single_request();
let (output, success) = custom_tool_output_text_and_success(&req, "call-1");
assert_ne!(
success,
Some(false),
"code_mode call failed unexpectedly: {output}"
);
assert_eq!(output, "code_mode_exec_marker");
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn code_mode_can_apply_patch_via_nested_tool() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = responses::start_mock_server().await;
let file_name = "code_mode_apply_patch.txt";
let patch = format!(
"*** Begin Patch\n*** Add File: {file_name}\n+hello from code_mode\n*** End Patch\n"
);
let code = format!("const items = await apply_patch({patch:?});\nadd_content(items);\n");
let (test, second_mock) =
run_code_mode_turn(&server, "use code_mode to run apply_patch", &code, true).await?;
let req = second_mock.single_request();
let (output, success) = custom_tool_output_text_and_success(&req, "call-1");
assert_ne!(
success,
Some(false),
"code_mode apply_patch call failed unexpectedly: {output}"
);
let file_path = test.cwd_path().join(file_name);
assert_eq!(fs::read_to_string(&file_path)?, "hello from code_mode\n");
Ok(())
}

View File

@@ -65,6 +65,7 @@ mod auth_refresh;
mod cli_stream;
mod client;
mod client_websockets;
mod code_mode;
mod codex_delegate;
mod collaboration_instructions;
mod compact;

36
docs/code_mode.md Normal file
View File

@@ -0,0 +1,36 @@
# Code Mode (`code_mode`)
`code_mode` runs JavaScript in a Node-backed `node:vm` context.
## Feature gate
`code_mode` is disabled by default and only appears when:
```toml
[features]
code_mode = true
```
Unlike `js_repl`, enabling `code_mode` does **not** disable direct model tool calls.
## Node runtime
`code_mode` uses the same Node runtime resolution as `js_repl`:
1. `CODEX_JS_REPL_NODE_PATH` environment variable
2. `js_repl_node_path` in config/profile
3. `node` discovered on `PATH`
## Usage
- `code_mode` is a freeform tool: send raw JavaScript source text.
- It exposes async wrappers for other tools through `await tools[name](args)` and identifier globals for valid tool names. Nested tool calls resolve to arrays of content items.
- Function tools require JSON object arguments. Freeform tools require raw strings.
- `add_content(value)` is synchronous. It accepts a content item or an array of content items, so `add_content(await exec_command(...))` returns the same content items a direct tool call would expose.
- Only content passed to `add_content(value)` is surfaced back to the model.
- The tool description lists which nested tools are available in the current session.
- `code_mode` cannot invoke itself recursively.
## Notes
- Because `code_mode` uses `node:vm`, it is lighter than the persistent `js_repl` kernel but does not keep top-level bindings between calls.