Add store/load support for code mode (#14259)

adds support for transferring state across code mode invocations.
2026-05-01 09:56:37 +00:00 · 2026-03-10 16:53:53 -07:00
parent f8ef154a6b
commit 18199d4e0e
7 changed files with 163 additions and 8 deletions
--- a/codex-rs/core/src/tools/code_mode.rs
+++ b/codex-rs/core/src/tools/code_mode.rs
@@ -1,3 +1,4 @@
+use std::collections::HashMap;
 use std::process::ExitStatus;
 use std::sync::Arc;

@@ -57,6 +58,7 @@ struct EnabledTool {
 enum HostToNodeMessage {
    Init {
        enabled_tools: Vec<EnabledTool>,
+        stored_values: HashMap<String, JsonValue>,
        source: String,
    },
    Response {
@@ -76,6 +78,7 @@ enum NodeToHostMessage {
    },
    Result {
        content_items: Vec<JsonValue>,
+        stored_values: HashMap<String, JsonValue>,
        #[serde(default)]
        max_output_tokens_per_exec_call: Option<usize>,
    },
@@ -94,7 +97,7 @@ pub(crate) fn instructions(config: &Config) -> Option<String> {
    section.push_str("- Direct tool calls remain available while `code_mode` is enabled.\n");
    section.push_str("- `code_mode` uses the same Node runtime resolution as `js_repl`. If needed, point `js_repl_node_path` at the Node binary you want Codex to use.\n");
    section.push_str("- Import nested tools from `tools.js`, for example `import { exec_command } from \"tools.js\"` or `import { tools } from \"tools.js\"`. Namespaced tools are also available from `tools/<namespace...>.js`; MCP tools use `tools/mcp/<server>.js`, for example `import { append_notebook_logs_chart } from \"tools/mcp/ologs.js\"`. `tools[name]` and identifier wrappers like `await exec_command(args)` remain available for compatibility. Nested tool calls resolve to their code-mode result values.\n");
-    section.push_str("- Import `{ output_text, output_image, set_max_output_tokens_per_exec_call }` from `@openai/code_mode`. `output_text(value)` surfaces text back to the model and stringifies non-string objects with `JSON.stringify(...)` when possible. `output_image(imageUrl)` appends an `input_image` content item for `http(s)` or `data:` URLs. `set_max_output_tokens_per_exec_call(value)` sets the token budget used to truncate the final Rust-side result of the current `code_mode` execution; the default is `10000`. This guards the overall `code_mode` output, not individual nested tool invocations. When truncation happens, the final text uses the unified-exec style `Original token count:` / `Output:` wrapper and the usual `…N tokens truncated…` marker.\n");
+    section.push_str("- Import `{ output_text, output_image, set_max_output_tokens_per_exec_call, store, load }` from `@openai/code_mode` (or `\"openai/code_mode\"`). `output_text(value)` surfaces text back to the model and stringifies non-string objects with `JSON.stringify(...)` when possible. `output_image(imageUrl)` appends an `input_image` content item for `http(s)` or `data:` URLs. `store(key, value)` persists JSON-serializable values across `code_mode` calls in the current session, and `load(key)` returns a cloned stored value or `undefined`. `set_max_output_tokens_per_exec_call(value)` sets the token budget used to truncate the final Rust-side result of the current `code_mode` execution; the default is `10000`. This guards the overall `code_mode` output, not individual nested tool invocations. When truncation happens, the final text uses the unified-exec style `Original token count:` / `Output:` wrapper and the usual `…N tokens truncated…` marker.\n");
    section.push_str(
        "- Function tools require JSON object arguments. Freeform tools require raw strings.\n",
    );
@@ -116,8 +119,9 @@ pub(crate) async fn execute(
        tracker,
    };
    let enabled_tools = build_enabled_tools(&exec).await;
+    let stored_values = exec.session.services.code_mode_store.stored_values().await;
    let source = build_source(&code, &enabled_tools).map_err(FunctionCallError::RespondToModel)?;
-    execute_node(exec, source, enabled_tools)
+    execute_node(exec, source, enabled_tools, stored_values)
        .await
        .map_err(FunctionCallError::RespondToModel)
 }
@@ -126,6 +130,7 @@ async fn execute_node(
    exec: ExecContext,
    source: String,
    enabled_tools: Vec<EnabledTool>,
+    stored_values: HashMap<String, JsonValue>,
 ) -> Result<Vec<FunctionCallOutputContentItem>, String> {
    let node_path = resolve_compatible_node(exec.turn.config.js_repl_node_path.as_deref()).await?;

@@ -169,6 +174,7 @@ async fn execute_node(
        &mut stdin,
        &HostToNodeMessage::Init {
            enabled_tools: enabled_tools.clone(),
+            stored_values,
            source,
        },
    )
@@ -196,8 +202,14 @@ async fn execute_node(
            }
            NodeToHostMessage::Result {
                content_items,
+                stored_values,
                max_output_tokens_per_exec_call,
            } => {
+                exec.session
+                    .services
+                    .code_mode_store
+                    .replace_stored_values(stored_values)
+                    .await;
                final_content_items = Some(truncate_code_mode_result(
                    output_content_items_from_json_values(content_items)?,
                    max_output_tokens_per_exec_call,
--- a/codex-rs/core/src/tools/code_mode_runner.cjs
+++ b/codex-rs/core/src/tools/code_mode_runner.cjs
@@ -108,6 +108,10 @@ function isValidIdentifier(name) {
  return /^[A-Za-z_$][0-9A-Za-z_$]*$/.test(name);
 }

+function cloneJsonValue(value) {
+  return JSON.parse(JSON.stringify(value));
+}
+
 function createToolCaller(protocol) {
  return (name, input) =>
    protocol.request('tool_call', {
@@ -197,6 +201,21 @@ function normalizeOutputImageUrl(value) {
 }

 function createCodeModeModule(context, state) {
+  const load = (key) => {
+    if (typeof key !== 'string') {
+      throw new TypeError('load key must be a string');
+    }
+    if (!Object.prototype.hasOwnProperty.call(state.storedValues, key)) {
+      return undefined;
+    }
+    return cloneJsonValue(state.storedValues[key]);
+  };
+  const store = (key, value) => {
+    if (typeof key !== 'string') {
+      throw new TypeError('store key must be a string');
+    }
+    state.storedValues[key] = cloneJsonValue(value);
+  };
  const outputText = (value) => {
    const item = {
      type: 'input_text',
@@ -215,8 +234,9 @@ function createCodeModeModule(context, state) {
  };

  return new SyntheticModule(
-    ['output_text', 'output_image', 'set_max_output_tokens_per_exec_call'],
+    ['load', 'output_text', 'output_image', 'set_max_output_tokens_per_exec_call', 'store'],
    function initCodeModeModule() {
+      this.setExport('load', load);
      this.setExport('output_text', outputText);
      this.setExport('output_image', outputImage);
      this.setExport('set_max_output_tokens_per_exec_call', (value) => {
@@ -224,6 +244,7 @@ function createCodeModeModule(context, state) {
        state.maxOutputTokensPerExecCall = normalized;
        return normalized;
      });
+      this.setExport('store', store);
    },
    { context }
  );
@@ -291,10 +312,9 @@ function createModuleResolver(context, callTool, enabledTools, state) {
    if (specifier === 'tools.js') {
      return toolsModule;
    }
-    if (specifier === '@openai/code_mode') {
+    if (specifier === '@openai/code_mode' || specifier === 'openai/code_mode') {
      return codeModeModule;
    }
-
    const namespacedMatch = /^tools\/(.+)\.js$/.exec(specifier);
    if (!namespacedMatch) {
      throw new Error(`Unsupported import in code_mode: ${specifier}`);
@@ -318,7 +338,7 @@ function createModuleResolver(context, callTool, enabledTools, state) {
  };
 }

-async function runModule(context, protocol, request, state, callTool) {
+async function runModule(context, request, state, callTool) {
  const resolveModule = createModuleResolver(
    context,
    callTool,
@@ -340,6 +360,7 @@ async function main() {
  const request = await protocol.init;
  const state = {
    maxOutputTokensPerExecCall: DEFAULT_MAX_OUTPUT_TOKENS_PER_EXEC_CALL,
+    storedValues: cloneJsonValue(request.stored_values ?? {}),
  };
  const callTool = createToolCaller(protocol);
  const context = vm.createContext({
@@ -348,10 +369,11 @@ async function main() {
  });

  try {
-    await runModule(context, protocol, request, state, callTool);
+    await runModule(context, request, state, callTool);
    await protocol.send({
      type: 'result',
      content_items: readContentItems(context),
+      stored_values: state.storedValues,
      max_output_tokens_per_exec_call: state.maxOutputTokensPerExecCall,
    });
    process.exit(0);
@@ -360,6 +382,7 @@ async function main() {
    await protocol.send({
      type: 'result',
      content_items: readContentItems(context),
+      stored_values: state.storedValues,
      max_output_tokens_per_exec_call: state.maxOutputTokensPerExecCall,
    });
    process.exit(1);
--- a/codex-rs/core/src/tools/spec.rs
+++ b/codex-rs/core/src/tools/spec.rs
@@ -1620,7 +1620,7 @@ source: /[\s\S]+/
        enabled_tool_names.join(", ")
    };
    let description = format!(
-        "Runs JavaScript in a Node-backed `node:vm` context. This is a freeform tool: send raw JavaScript source text (no JSON/quotes/markdown fences). Direct tool calls remain available while `code_mode` is enabled. Inside JavaScript, import nested tools from `tools.js`, for example `import {{ exec_command }} from \"tools.js\"` or `import {{ tools }} from \"tools.js\"`. Namespaced tools are also available from `tools/<namespace...>.js`; MCP tools use `tools/mcp/<server>.js`, for example `import {{ append_notebook_logs_chart }} from \"tools/mcp/ologs.js\"`. `tools[name]` and identifier wrappers like `await shell(args)` remain available for compatibility when the tool name is a valid JS identifier. Nested tool calls resolve to their code-mode result values. Import `{{ output_text, output_image, set_max_output_tokens_per_exec_call }}` from `\"@openai/code_mode\"`; `output_text(value)` surfaces text back to the model and stringifies non-string objects when possible, `output_image(imageUrl)` appends an `input_image` content item for `http(s)` or `data:` URLs, and `set_max_output_tokens_per_exec_call(value)` sets the token budget used to truncate the final Rust-side result of the current `code_mode` execution. The default is `10000`. This guards the overall `code_mode` output, not individual nested tool invocations. When truncation happens, the final text uses the unified-exec style `Original token count:` / `Output:` wrapper and the usual `…N tokens truncated…` marker. Function tools require JSON object arguments. Freeform tools require raw strings. `add_content(value)` remains available for compatibility with a content item, content-item array, or string. Structured nested-tool results should be converted to text first, for example with `JSON.stringify(...)`. Only content passed to `output_text(...)`, `output_image(...)`, or `add_content(value)` is surfaced back to the model. Enabled nested tools: {enabled_list}."
+        "Runs JavaScript in a Node-backed `node:vm` context. This is a freeform tool: send raw JavaScript source text (no JSON/quotes/markdown fences). Direct tool calls remain available while `code_mode` is enabled. Inside JavaScript, import nested tools from `tools.js`, for example `import {{ exec_command }} from \"tools.js\"` or `import {{ tools }} from \"tools.js\"`. Namespaced tools are also available from `tools/<namespace...>.js`; MCP tools use `tools/mcp/<server>.js`, for example `import {{ append_notebook_logs_chart }} from \"tools/mcp/ologs.js\"`. `tools[name]` and identifier wrappers like `await shell(args)` remain available for compatibility when the tool name is a valid JS identifier. Nested tool calls resolve to their code-mode result values. Import `{{ output_text, output_image, set_max_output_tokens_per_exec_call, store, load }}` from `\"@openai/code_mode\"` (or `\"openai/code_mode\"`); `output_text(value)` surfaces text back to the model and stringifies non-string objects when possible, `output_image(imageUrl)` appends an `input_image` content item for `http(s)` or `data:` URLs, `store(key, value)` persists JSON-serializable values across `code_mode` calls in the current session, `load(key)` returns a cloned stored value or `undefined`, and `set_max_output_tokens_per_exec_call(value)` sets the token budget used to truncate the final Rust-side result of the current `code_mode` execution. The default is `10000`. This guards the overall `code_mode` output, not individual nested tool invocations. When truncation happens, the final text uses the unified-exec style `Original token count:` / `Output:` wrapper and the usual `…N tokens truncated…` marker. Function tools require JSON object arguments. Freeform tools require raw strings. `add_content(value)` remains available for compatibility with a content item, content-item array, or string. Structured nested-tool results should be converted to text first, for example with `JSON.stringify(...)`. Only content passed to `output_text(...)`, `output_image(...)`, or `add_content(value)` is surfaced back to the model. Enabled nested tools: {enabled_list}."
    );

    ToolSpec::Freeform(FreeformTool {