Refine code mode background yielding

2026-04-24 06:35:50 +00:00 · 2026-03-11 23:58:58 -07:00
parent 4dc635694b
commit 32bf320bb0
3 changed files with 110 additions and 17 deletions
--- a/codex-rs/core/src/tools/code_mode.rs
+++ b/codex-rs/core/src/tools/code_mode.rs
@@ -62,7 +62,6 @@ pub(crate) struct CodeModeProcess {

 pub(crate) struct CodeModeWorker {
    shutdown_tx: Option<oneshot::Sender<()>>,
-    task: JoinHandle<()>,
 }

 #[derive(Debug, Deserialize)]
@@ -88,7 +87,7 @@ impl CodeModeProcess {
        let (shutdown_tx, mut shutdown_rx) = oneshot::channel();
        let stdin = Arc::clone(&self.stdin);
        let tool_call_rx = Arc::clone(&self.tool_call_rx);
-        let task = tokio::spawn(async move {
+        tokio::spawn(async move {
            loop {
                let tool_call = tokio::select! {
                    _ = &mut shutdown_rx => break,
@@ -119,7 +118,6 @@ impl CodeModeProcess {

        CodeModeWorker {
            shutdown_tx: Some(shutdown_tx),
-            task,
        }
    }

@@ -347,7 +345,7 @@ pub(crate) fn instructions(config: &Config) -> Option<String> {
    ));
    section.push_str("- Import nested tools from `tools.js`, for example `import { exec_command } from \"tools.js\"` or `import { ALL_TOOLS } from \"tools.js\"` to inspect the available `{ module, name, description }` entries. Namespaced tools are also available from `tools/<namespace...>.js`; MCP tools use `tools/mcp/<server>.js`, for example `import { append_notebook_logs_chart } from \"tools/mcp/ologs.js\"`. Nested tool calls resolve to their code-mode result values.\n");
    section.push_str(&format!(
-        "- Import `{{ output_text, output_image, set_max_output_tokens_per_exec_call, set_yield_time, store, load }}` from `@openai/code_mode` (or `\"openai/code_mode\"`). `output_text(value)` surfaces text back to the model and stringifies non-string objects with `JSON.stringify(...)` when possible. `output_image(imageUrl)` appends an `input_image` content item for `http(s)` or `data:` URLs. `store(key, value)` persists JSON-serializable values across `{PUBLIC_TOOL_NAME}` calls in the current session, and `load(key)` returns a cloned stored value or `undefined`. `set_max_output_tokens_per_exec_call(value)` sets the token budget used to truncate direct `{PUBLIC_TOOL_NAME}` returns; `{WAIT_TOOL_NAME}` uses its own `max_tokens` argument instead and defaults to `10000`. `set_yield_time(value)` asks `{PUBLIC_TOOL_NAME}` to return early if the script is still running after that many milliseconds so `{WAIT_TOOL_NAME}` can resume it later. The returned content starts with a separate `Script completed`, `Script failed`, or `Script running with session ID …` text item that includes wall time. When truncation happens, the final text may include `Total output lines:` and the usual `…N tokens truncated…` marker.\n",
+        "- Import `{{ output_text, output_image, set_max_output_tokens_per_exec_call, set_yield_time, store, load, yield_control }}` from `@openai/code_mode` (or `\"openai/code_mode\"`). `output_text(value)` surfaces text back to the model and stringifies non-string objects with `JSON.stringify(...)` when possible. `output_image(imageUrl)` appends an `input_image` content item for `http(s)` or `data:` URLs. `store(key, value)` persists JSON-serializable values across `{PUBLIC_TOOL_NAME}` calls in the current session, and `load(key)` returns a cloned stored value or `undefined`. `set_max_output_tokens_per_exec_call(value)` sets the token budget used to truncate direct `{PUBLIC_TOOL_NAME}` returns; `{WAIT_TOOL_NAME}` uses its own `max_tokens` argument instead and defaults to `10000`. `set_yield_time(value)` asks `{PUBLIC_TOOL_NAME}` to return early if the script is still running after that many milliseconds so `{WAIT_TOOL_NAME}` can resume it later. `yield_control()` returns a yielded `{PUBLIC_TOOL_NAME}` response immediately while the script keeps running in the background. The returned content starts with a separate `Script completed`, `Script failed`, or `Script running with session ID …` text item that includes wall time. When truncation happens, the final text may include `Total output lines:` and the usual `…N tokens truncated…` marker.\n",
    ));
    section.push_str(&format!(
        "- If `{PUBLIC_TOOL_NAME}` returns `Script running with session ID …`, call `{WAIT_TOOL_NAME}` with that `session_id` to keep waiting for more output, completion, or termination.\n",
--- a/codex-rs/core/src/tools/code_mode_runner.cjs
+++ b/codex-rs/core/src/tools/code_mode_runner.cjs
@@ -265,6 +265,7 @@ function codeModeWorkerMain() {
        'set_max_output_tokens_per_exec_call',
        'set_yield_time',
        'store',
+        'yield_control',
      ],
      function initCodeModeModule() {
        this.setExport('load', load);
@@ -288,6 +289,9 @@ function codeModeWorkerMain() {
          return normalized;
        });
        this.setExport('store', store);
+        this.setExport('yield_control', () => {
+          parentPort.postMessage({ type: 'yield' });
+        });
      },
      { context }
    );
@@ -469,8 +473,6 @@ function createProtocol() {
        session.request_id = String(message.request_id);
        if (session.pending_result) {
          void completeSession(protocol, sessions, session, session.pending_result);
-        } else if (session.pending_tool_call) {
-          void forwardToolCall(protocol, session, session.pending_tool_call);
        } else {
          schedulePollYield(protocol, session, normalizeYieldTime(message.yield_time_ms ?? 0));
        }
@@ -545,7 +547,8 @@ function createProtocol() {
    });
  }

-  function request(requestId, type, payload) {
+  function request(type, payload) {
+    const requestId = 'req-' + ++nextId;
    const id = 'msg-' + ++nextId;
    const pendingKey = requestId + ':' + id;
    return new Promise((resolve, reject) => {
@@ -574,7 +577,6 @@ function startSession(protocol, sessions, start) {
    initial_yield_triggered: false,
    max_output_tokens_per_exec_call: DEFAULT_MAX_OUTPUT_TOKENS_PER_EXEC_CALL,
    pending_result: null,
-    pending_tool_call: null,
    poll_yield_timer: null,
    request_id: String(start.request_id),
    worker: new Worker(sessionWorkerSource(), {
@@ -631,11 +633,12 @@ async function handleWorkerMessage(protocol, sessions, session, message) {
    return;
  }

+  if (message.type === 'yield') {
+    void sendYielded(protocol, session);
+    return;
+  }
+
  if (message.type === 'tool_call') {
-    if (session.request_id === null) {
-      session.pending_tool_call = message;
-      return;
-    }
    void forwardToolCall(protocol, session, message);
    return;
  }
@@ -662,11 +665,10 @@ async function handleWorkerMessage(protocol, sessions, session, message) {

 async function forwardToolCall(protocol, session, message) {
  try {
-    const result = await protocol.request(session.request_id, 'tool_call', {
+    const result = await protocol.request('tool_call', {
      name: String(message.name),
      input: message.input,
    });
-    session.pending_tool_call = null;
    if (session.completed) {
      return;
    }
@@ -678,7 +680,6 @@ async function forwardToolCall(protocol, session, message) {
      });
    } catch {}
  } catch (error) {
-    session.pending_tool_call = null;
    if (session.completed) {
      return;
    }
@@ -693,7 +694,7 @@ async function forwardToolCall(protocol, session, message) {
 }

 async function sendYielded(protocol, session) {
-  if (session.completed) {
+  if (session.completed || session.request_id === null) {
    return;
  }
  const contentItems = takeContentItems(session);
@@ -750,7 +751,6 @@ async function completeSession(protocol, sessions, session, message) {
  sessions.delete(session.id);
  const contentItems = takeContentItems(session);
  session.pending_result = null;
-  session.pending_tool_call = null;
  try {
    session.worker.postMessage({ type: 'clear_content' });
  } catch {}
--- a/codex-rs/core/tests/suite/code_mode.rs
+++ b/codex-rs/core/tests/suite/code_mode.rs
@@ -1007,6 +1007,101 @@ output_text("session b done");
    Ok(())
 }

+#[cfg_attr(windows, ignore = "no exec_command on Windows")]
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn code_mode_yield_control_keeps_running_on_later_turn_without_exec_wait() -> Result<()> {
+    skip_if_no_network!(Ok(()));
+
+    let server = responses::start_mock_server().await;
+    let mut builder = test_codex().with_config(move |config| {
+        let _ = config.features.enable(Feature::CodeMode);
+    });
+    let test = builder.build(&server).await?;
+    let resumed_file = test.workspace_path("code-mode-yield-resumed.txt");
+    let resumed_file_quoted = shlex::try_join([resumed_file.to_string_lossy().as_ref()])?;
+    let write_file_command = format!("printf resumed > {resumed_file_quoted}");
+    let wait_for_file_command =
+        format!("while [ ! -f {resumed_file_quoted} ]; do sleep 0.01; done; printf ready");
+    let code = format!(
+        r#"
+import {{ output_text, yield_control }} from "@openai/code_mode";
+import {{ exec_command }} from "tools.js";
+
+output_text("before yield");
+yield_control();
+await exec_command({{ cmd: {write_file_command:?} }});
+output_text("after yield");
+"#
+    );
+
+    responses::mount_sse_once(
+        &server,
+        sse(vec![
+            ev_response_created("resp-1"),
+            ev_custom_tool_call("call-1", "exec", &code),
+            ev_completed("resp-1"),
+        ]),
+    )
+    .await;
+    let first_completion = responses::mount_sse_once(
+        &server,
+        sse(vec![
+            ev_assistant_message("msg-1", "exec yielded"),
+            ev_completed("resp-2"),
+        ]),
+    )
+    .await;
+
+    test.submit_turn("start yielded exec").await?;
+
+    let first_request = first_completion.single_request();
+    let first_items = custom_tool_output_items(&first_request, "call-1");
+    assert_eq!(first_items.len(), 2);
+    assert_regex_match(
+        concat!(
+            r"(?s)\A",
+            r"Script running with session ID \d+\nWall time \d+\.\d seconds\nOutput:\n\z"
+        ),
+        text_item(&first_items, 0),
+    );
+    assert_eq!(text_item(&first_items, 1), "before yield");
+
+    responses::mount_sse_once(
+        &server,
+        sse(vec![
+            ev_response_created("resp-3"),
+            responses::ev_function_call(
+                "call-2",
+                "exec_command",
+                &serde_json::to_string(&serde_json::json!({
+                    "cmd": wait_for_file_command,
+                }))?,
+            ),
+            ev_completed("resp-3"),
+        ]),
+    )
+    .await;
+    let second_completion = responses::mount_sse_once(
+        &server,
+        sse(vec![
+            ev_assistant_message("msg-2", "file appeared"),
+            ev_completed("resp-4"),
+        ]),
+    )
+    .await;
+
+    test.submit_turn("wait for resumed file").await?;
+
+    let second_request = second_completion.single_request();
+    assert_eq!(
+        second_request.function_call_output_text("call-2"),
+        Some("ready".to_string())
+    );
+    assert_eq!(fs::read_to_string(&resumed_file)?, "resumed");
+
+    Ok(())
+}
+
 #[cfg_attr(windows, ignore = "no exec_command on Windows")]
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn code_mode_exec_wait_uses_its_own_max_tokens_budget() -> Result<()> {