Add store/load support for code mode (#14259)

adds support for transferring state across code mode invocations.
This commit is contained in:
pakrym-oai
2026-03-10 16:53:53 -07:00
committed by GitHub
parent f8ef154a6b
commit 18199d4e0e
7 changed files with 163 additions and 8 deletions

View File

@@ -1,3 +1,4 @@
use std::collections::HashMap;
use std::process::ExitStatus;
use std::sync::Arc;
@@ -57,6 +58,7 @@ struct EnabledTool {
enum HostToNodeMessage {
Init {
enabled_tools: Vec<EnabledTool>,
stored_values: HashMap<String, JsonValue>,
source: String,
},
Response {
@@ -76,6 +78,7 @@ enum NodeToHostMessage {
},
Result {
content_items: Vec<JsonValue>,
stored_values: HashMap<String, JsonValue>,
#[serde(default)]
max_output_tokens_per_exec_call: Option<usize>,
},
@@ -94,7 +97,7 @@ pub(crate) fn instructions(config: &Config) -> Option<String> {
section.push_str("- Direct tool calls remain available while `code_mode` is enabled.\n");
section.push_str("- `code_mode` uses the same Node runtime resolution as `js_repl`. If needed, point `js_repl_node_path` at the Node binary you want Codex to use.\n");
section.push_str("- Import nested tools from `tools.js`, for example `import { exec_command } from \"tools.js\"` or `import { tools } from \"tools.js\"`. Namespaced tools are also available from `tools/<namespace...>.js`; MCP tools use `tools/mcp/<server>.js`, for example `import { append_notebook_logs_chart } from \"tools/mcp/ologs.js\"`. `tools[name]` and identifier wrappers like `await exec_command(args)` remain available for compatibility. Nested tool calls resolve to their code-mode result values.\n");
section.push_str("- Import `{ output_text, output_image, set_max_output_tokens_per_exec_call }` from `@openai/code_mode`. `output_text(value)` surfaces text back to the model and stringifies non-string objects with `JSON.stringify(...)` when possible. `output_image(imageUrl)` appends an `input_image` content item for `http(s)` or `data:` URLs. `set_max_output_tokens_per_exec_call(value)` sets the token budget used to truncate the final Rust-side result of the current `code_mode` execution; the default is `10000`. This guards the overall `code_mode` output, not individual nested tool invocations. When truncation happens, the final text uses the unified-exec style `Original token count:` / `Output:` wrapper and the usual `…N tokens truncated…` marker.\n");
section.push_str("- Import `{ output_text, output_image, set_max_output_tokens_per_exec_call, store, load }` from `@openai/code_mode` (or `\"openai/code_mode\"`). `output_text(value)` surfaces text back to the model and stringifies non-string objects with `JSON.stringify(...)` when possible. `output_image(imageUrl)` appends an `input_image` content item for `http(s)` or `data:` URLs. `store(key, value)` persists JSON-serializable values across `code_mode` calls in the current session, and `load(key)` returns a cloned stored value or `undefined`. `set_max_output_tokens_per_exec_call(value)` sets the token budget used to truncate the final Rust-side result of the current `code_mode` execution; the default is `10000`. This guards the overall `code_mode` output, not individual nested tool invocations. When truncation happens, the final text uses the unified-exec style `Original token count:` / `Output:` wrapper and the usual `…N tokens truncated…` marker.\n");
section.push_str(
"- Function tools require JSON object arguments. Freeform tools require raw strings.\n",
);
@@ -116,8 +119,9 @@ pub(crate) async fn execute(
tracker,
};
let enabled_tools = build_enabled_tools(&exec).await;
let stored_values = exec.session.services.code_mode_store.stored_values().await;
let source = build_source(&code, &enabled_tools).map_err(FunctionCallError::RespondToModel)?;
execute_node(exec, source, enabled_tools)
execute_node(exec, source, enabled_tools, stored_values)
.await
.map_err(FunctionCallError::RespondToModel)
}
@@ -126,6 +130,7 @@ async fn execute_node(
exec: ExecContext,
source: String,
enabled_tools: Vec<EnabledTool>,
stored_values: HashMap<String, JsonValue>,
) -> Result<Vec<FunctionCallOutputContentItem>, String> {
let node_path = resolve_compatible_node(exec.turn.config.js_repl_node_path.as_deref()).await?;
@@ -169,6 +174,7 @@ async fn execute_node(
&mut stdin,
&HostToNodeMessage::Init {
enabled_tools: enabled_tools.clone(),
stored_values,
source,
},
)
@@ -196,8 +202,14 @@ async fn execute_node(
}
NodeToHostMessage::Result {
content_items,
stored_values,
max_output_tokens_per_exec_call,
} => {
exec.session
.services
.code_mode_store
.replace_stored_values(stored_values)
.await;
final_content_items = Some(truncate_code_mode_result(
output_content_items_from_json_values(content_items)?,
max_output_tokens_per_exec_call,

View File

@@ -108,6 +108,10 @@ function isValidIdentifier(name) {
return /^[A-Za-z_$][0-9A-Za-z_$]*$/.test(name);
}
function cloneJsonValue(value) {
return JSON.parse(JSON.stringify(value));
}
function createToolCaller(protocol) {
return (name, input) =>
protocol.request('tool_call', {
@@ -197,6 +201,21 @@ function normalizeOutputImageUrl(value) {
}
function createCodeModeModule(context, state) {
const load = (key) => {
if (typeof key !== 'string') {
throw new TypeError('load key must be a string');
}
if (!Object.prototype.hasOwnProperty.call(state.storedValues, key)) {
return undefined;
}
return cloneJsonValue(state.storedValues[key]);
};
const store = (key, value) => {
if (typeof key !== 'string') {
throw new TypeError('store key must be a string');
}
state.storedValues[key] = cloneJsonValue(value);
};
const outputText = (value) => {
const item = {
type: 'input_text',
@@ -215,8 +234,9 @@ function createCodeModeModule(context, state) {
};
return new SyntheticModule(
['output_text', 'output_image', 'set_max_output_tokens_per_exec_call'],
['load', 'output_text', 'output_image', 'set_max_output_tokens_per_exec_call', 'store'],
function initCodeModeModule() {
this.setExport('load', load);
this.setExport('output_text', outputText);
this.setExport('output_image', outputImage);
this.setExport('set_max_output_tokens_per_exec_call', (value) => {
@@ -224,6 +244,7 @@ function createCodeModeModule(context, state) {
state.maxOutputTokensPerExecCall = normalized;
return normalized;
});
this.setExport('store', store);
},
{ context }
);
@@ -291,10 +312,9 @@ function createModuleResolver(context, callTool, enabledTools, state) {
if (specifier === 'tools.js') {
return toolsModule;
}
if (specifier === '@openai/code_mode') {
if (specifier === '@openai/code_mode' || specifier === 'openai/code_mode') {
return codeModeModule;
}
const namespacedMatch = /^tools\/(.+)\.js$/.exec(specifier);
if (!namespacedMatch) {
throw new Error(`Unsupported import in code_mode: ${specifier}`);
@@ -318,7 +338,7 @@ function createModuleResolver(context, callTool, enabledTools, state) {
};
}
async function runModule(context, protocol, request, state, callTool) {
async function runModule(context, request, state, callTool) {
const resolveModule = createModuleResolver(
context,
callTool,
@@ -340,6 +360,7 @@ async function main() {
const request = await protocol.init;
const state = {
maxOutputTokensPerExecCall: DEFAULT_MAX_OUTPUT_TOKENS_PER_EXEC_CALL,
storedValues: cloneJsonValue(request.stored_values ?? {}),
};
const callTool = createToolCaller(protocol);
const context = vm.createContext({
@@ -348,10 +369,11 @@ async function main() {
});
try {
await runModule(context, protocol, request, state, callTool);
await runModule(context, request, state, callTool);
await protocol.send({
type: 'result',
content_items: readContentItems(context),
stored_values: state.storedValues,
max_output_tokens_per_exec_call: state.maxOutputTokensPerExecCall,
});
process.exit(0);
@@ -360,6 +382,7 @@ async function main() {
await protocol.send({
type: 'result',
content_items: readContentItems(context),
stored_values: state.storedValues,
max_output_tokens_per_exec_call: state.maxOutputTokensPerExecCall,
});
process.exit(1);

View File

@@ -1620,7 +1620,7 @@ source: /[\s\S]+/
enabled_tool_names.join(", ")
};
let description = format!(
"Runs JavaScript in a Node-backed `node:vm` context. This is a freeform tool: send raw JavaScript source text (no JSON/quotes/markdown fences). Direct tool calls remain available while `code_mode` is enabled. Inside JavaScript, import nested tools from `tools.js`, for example `import {{ exec_command }} from \"tools.js\"` or `import {{ tools }} from \"tools.js\"`. Namespaced tools are also available from `tools/<namespace...>.js`; MCP tools use `tools/mcp/<server>.js`, for example `import {{ append_notebook_logs_chart }} from \"tools/mcp/ologs.js\"`. `tools[name]` and identifier wrappers like `await shell(args)` remain available for compatibility when the tool name is a valid JS identifier. Nested tool calls resolve to their code-mode result values. Import `{{ output_text, output_image, set_max_output_tokens_per_exec_call }}` from `\"@openai/code_mode\"`; `output_text(value)` surfaces text back to the model and stringifies non-string objects when possible, `output_image(imageUrl)` appends an `input_image` content item for `http(s)` or `data:` URLs, and `set_max_output_tokens_per_exec_call(value)` sets the token budget used to truncate the final Rust-side result of the current `code_mode` execution. The default is `10000`. This guards the overall `code_mode` output, not individual nested tool invocations. When truncation happens, the final text uses the unified-exec style `Original token count:` / `Output:` wrapper and the usual `…N tokens truncated…` marker. Function tools require JSON object arguments. Freeform tools require raw strings. `add_content(value)` remains available for compatibility with a content item, content-item array, or string. Structured nested-tool results should be converted to text first, for example with `JSON.stringify(...)`. Only content passed to `output_text(...)`, `output_image(...)`, or `add_content(value)` is surfaced back to the model. Enabled nested tools: {enabled_list}."
"Runs JavaScript in a Node-backed `node:vm` context. This is a freeform tool: send raw JavaScript source text (no JSON/quotes/markdown fences). Direct tool calls remain available while `code_mode` is enabled. Inside JavaScript, import nested tools from `tools.js`, for example `import {{ exec_command }} from \"tools.js\"` or `import {{ tools }} from \"tools.js\"`. Namespaced tools are also available from `tools/<namespace...>.js`; MCP tools use `tools/mcp/<server>.js`, for example `import {{ append_notebook_logs_chart }} from \"tools/mcp/ologs.js\"`. `tools[name]` and identifier wrappers like `await shell(args)` remain available for compatibility when the tool name is a valid JS identifier. Nested tool calls resolve to their code-mode result values. Import `{{ output_text, output_image, set_max_output_tokens_per_exec_call, store, load }}` from `\"@openai/code_mode\"` (or `\"openai/code_mode\"`); `output_text(value)` surfaces text back to the model and stringifies non-string objects when possible, `output_image(imageUrl)` appends an `input_image` content item for `http(s)` or `data:` URLs, `store(key, value)` persists JSON-serializable values across `code_mode` calls in the current session, `load(key)` returns a cloned stored value or `undefined`, and `set_max_output_tokens_per_exec_call(value)` sets the token budget used to truncate the final Rust-side result of the current `code_mode` execution. The default is `10000`. This guards the overall `code_mode` output, not individual nested tool invocations. When truncation happens, the final text uses the unified-exec style `Original token count:` / `Output:` wrapper and the usual `…N tokens truncated…` marker. Function tools require JSON object arguments. Freeform tools require raw strings. `add_content(value)` remains available for compatibility with a content item, content-item array, or string. Structured nested-tool results should be converted to text first, for example with `JSON.stringify(...)`. Only content passed to `output_text(...)`, `output_image(...)`, or `add_content(value)` is surfaced back to the model. Enabled nested tools: {enabled_list}."
);
ToolSpec::Freeform(FreeformTool {