Refine code mode background yielding

This commit is contained in:
pakrym-oai
2026-03-11 23:58:58 -07:00
parent 4dc635694b
commit 32bf320bb0
3 changed files with 110 additions and 17 deletions

View File

@@ -62,7 +62,6 @@ pub(crate) struct CodeModeProcess {
pub(crate) struct CodeModeWorker {
shutdown_tx: Option<oneshot::Sender<()>>,
task: JoinHandle<()>,
}
#[derive(Debug, Deserialize)]
@@ -88,7 +87,7 @@ impl CodeModeProcess {
let (shutdown_tx, mut shutdown_rx) = oneshot::channel();
let stdin = Arc::clone(&self.stdin);
let tool_call_rx = Arc::clone(&self.tool_call_rx);
let task = tokio::spawn(async move {
tokio::spawn(async move {
loop {
let tool_call = tokio::select! {
_ = &mut shutdown_rx => break,
@@ -119,7 +118,6 @@ impl CodeModeProcess {
CodeModeWorker {
shutdown_tx: Some(shutdown_tx),
task,
}
}
@@ -347,7 +345,7 @@ pub(crate) fn instructions(config: &Config) -> Option<String> {
));
section.push_str("- Import nested tools from `tools.js`, for example `import { exec_command } from \"tools.js\"` or `import { ALL_TOOLS } from \"tools.js\"` to inspect the available `{ module, name, description }` entries. Namespaced tools are also available from `tools/<namespace...>.js`; MCP tools use `tools/mcp/<server>.js`, for example `import { append_notebook_logs_chart } from \"tools/mcp/ologs.js\"`. Nested tool calls resolve to their code-mode result values.\n");
section.push_str(&format!(
"- Import `{{ output_text, output_image, set_max_output_tokens_per_exec_call, set_yield_time, store, load }}` from `@openai/code_mode` (or `\"openai/code_mode\"`). `output_text(value)` surfaces text back to the model and stringifies non-string objects with `JSON.stringify(...)` when possible. `output_image(imageUrl)` appends an `input_image` content item for `http(s)` or `data:` URLs. `store(key, value)` persists JSON-serializable values across `{PUBLIC_TOOL_NAME}` calls in the current session, and `load(key)` returns a cloned stored value or `undefined`. `set_max_output_tokens_per_exec_call(value)` sets the token budget used to truncate direct `{PUBLIC_TOOL_NAME}` returns; `{WAIT_TOOL_NAME}` uses its own `max_tokens` argument instead and defaults to `10000`. `set_yield_time(value)` asks `{PUBLIC_TOOL_NAME}` to return early if the script is still running after that many milliseconds so `{WAIT_TOOL_NAME}` can resume it later. The returned content starts with a separate `Script completed`, `Script failed`, or `Script running with session ID …` text item that includes wall time. When truncation happens, the final text may include `Total output lines:` and the usual `…N tokens truncated…` marker.\n",
"- Import `{{ output_text, output_image, set_max_output_tokens_per_exec_call, set_yield_time, store, load, yield_control }}` from `@openai/code_mode` (or `\"openai/code_mode\"`). `output_text(value)` surfaces text back to the model and stringifies non-string objects with `JSON.stringify(...)` when possible. `output_image(imageUrl)` appends an `input_image` content item for `http(s)` or `data:` URLs. `store(key, value)` persists JSON-serializable values across `{PUBLIC_TOOL_NAME}` calls in the current session, and `load(key)` returns a cloned stored value or `undefined`. `set_max_output_tokens_per_exec_call(value)` sets the token budget used to truncate direct `{PUBLIC_TOOL_NAME}` returns; `{WAIT_TOOL_NAME}` uses its own `max_tokens` argument instead and defaults to `10000`. `set_yield_time(value)` asks `{PUBLIC_TOOL_NAME}` to return early if the script is still running after that many milliseconds so `{WAIT_TOOL_NAME}` can resume it later. `yield_control()` returns a yielded `{PUBLIC_TOOL_NAME}` response immediately while the script keeps running in the background. The returned content starts with a separate `Script completed`, `Script failed`, or `Script running with session ID …` text item that includes wall time. When truncation happens, the final text may include `Total output lines:` and the usual `…N tokens truncated…` marker.\n",
));
section.push_str(&format!(
"- If `{PUBLIC_TOOL_NAME}` returns `Script running with session ID …`, call `{WAIT_TOOL_NAME}` with that `session_id` to keep waiting for more output, completion, or termination.\n",

View File

@@ -265,6 +265,7 @@ function codeModeWorkerMain() {
'set_max_output_tokens_per_exec_call',
'set_yield_time',
'store',
'yield_control',
],
function initCodeModeModule() {
this.setExport('load', load);
@@ -288,6 +289,9 @@ function codeModeWorkerMain() {
return normalized;
});
this.setExport('store', store);
this.setExport('yield_control', () => {
parentPort.postMessage({ type: 'yield' });
});
},
{ context }
);
@@ -469,8 +473,6 @@ function createProtocol() {
session.request_id = String(message.request_id);
if (session.pending_result) {
void completeSession(protocol, sessions, session, session.pending_result);
} else if (session.pending_tool_call) {
void forwardToolCall(protocol, session, session.pending_tool_call);
} else {
schedulePollYield(protocol, session, normalizeYieldTime(message.yield_time_ms ?? 0));
}
@@ -545,7 +547,8 @@ function createProtocol() {
});
}
function request(requestId, type, payload) {
function request(type, payload) {
const requestId = 'req-' + ++nextId;
const id = 'msg-' + ++nextId;
const pendingKey = requestId + ':' + id;
return new Promise((resolve, reject) => {
@@ -574,7 +577,6 @@ function startSession(protocol, sessions, start) {
initial_yield_triggered: false,
max_output_tokens_per_exec_call: DEFAULT_MAX_OUTPUT_TOKENS_PER_EXEC_CALL,
pending_result: null,
pending_tool_call: null,
poll_yield_timer: null,
request_id: String(start.request_id),
worker: new Worker(sessionWorkerSource(), {
@@ -631,11 +633,12 @@ async function handleWorkerMessage(protocol, sessions, session, message) {
return;
}
if (message.type === 'yield') {
void sendYielded(protocol, session);
return;
}
if (message.type === 'tool_call') {
if (session.request_id === null) {
session.pending_tool_call = message;
return;
}
void forwardToolCall(protocol, session, message);
return;
}
@@ -662,11 +665,10 @@ async function handleWorkerMessage(protocol, sessions, session, message) {
async function forwardToolCall(protocol, session, message) {
try {
const result = await protocol.request(session.request_id, 'tool_call', {
const result = await protocol.request('tool_call', {
name: String(message.name),
input: message.input,
});
session.pending_tool_call = null;
if (session.completed) {
return;
}
@@ -678,7 +680,6 @@ async function forwardToolCall(protocol, session, message) {
});
} catch {}
} catch (error) {
session.pending_tool_call = null;
if (session.completed) {
return;
}
@@ -693,7 +694,7 @@ async function forwardToolCall(protocol, session, message) {
}
async function sendYielded(protocol, session) {
if (session.completed) {
if (session.completed || session.request_id === null) {
return;
}
const contentItems = takeContentItems(session);
@@ -750,7 +751,6 @@ async function completeSession(protocol, sessions, session, message) {
sessions.delete(session.id);
const contentItems = takeContentItems(session);
session.pending_result = null;
session.pending_tool_call = null;
try {
session.worker.postMessage({ type: 'clear_content' });
} catch {}

View File

@@ -1007,6 +1007,101 @@ output_text("session b done");
Ok(())
}
#[cfg_attr(windows, ignore = "no exec_command on Windows")]
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn code_mode_yield_control_keeps_running_on_later_turn_without_exec_wait() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = responses::start_mock_server().await;
let mut builder = test_codex().with_config(move |config| {
let _ = config.features.enable(Feature::CodeMode);
});
let test = builder.build(&server).await?;
let resumed_file = test.workspace_path("code-mode-yield-resumed.txt");
let resumed_file_quoted = shlex::try_join([resumed_file.to_string_lossy().as_ref()])?;
let write_file_command = format!("printf resumed > {resumed_file_quoted}");
let wait_for_file_command =
format!("while [ ! -f {resumed_file_quoted} ]; do sleep 0.01; done; printf ready");
let code = format!(
r#"
import {{ output_text, yield_control }} from "@openai/code_mode";
import {{ exec_command }} from "tools.js";
output_text("before yield");
yield_control();
await exec_command({{ cmd: {write_file_command:?} }});
output_text("after yield");
"#
);
responses::mount_sse_once(
&server,
sse(vec![
ev_response_created("resp-1"),
ev_custom_tool_call("call-1", "exec", &code),
ev_completed("resp-1"),
]),
)
.await;
let first_completion = responses::mount_sse_once(
&server,
sse(vec![
ev_assistant_message("msg-1", "exec yielded"),
ev_completed("resp-2"),
]),
)
.await;
test.submit_turn("start yielded exec").await?;
let first_request = first_completion.single_request();
let first_items = custom_tool_output_items(&first_request, "call-1");
assert_eq!(first_items.len(), 2);
assert_regex_match(
concat!(
r"(?s)\A",
r"Script running with session ID \d+\nWall time \d+\.\d seconds\nOutput:\n\z"
),
text_item(&first_items, 0),
);
assert_eq!(text_item(&first_items, 1), "before yield");
responses::mount_sse_once(
&server,
sse(vec![
ev_response_created("resp-3"),
responses::ev_function_call(
"call-2",
"exec_command",
&serde_json::to_string(&serde_json::json!({
"cmd": wait_for_file_command,
}))?,
),
ev_completed("resp-3"),
]),
)
.await;
let second_completion = responses::mount_sse_once(
&server,
sse(vec![
ev_assistant_message("msg-2", "file appeared"),
ev_completed("resp-4"),
]),
)
.await;
test.submit_turn("wait for resumed file").await?;
let second_request = second_completion.single_request();
assert_eq!(
second_request.function_call_output_text("call-2"),
Some("ready".to_string())
);
assert_eq!(fs::read_to_string(&resumed_file)?, "resumed");
Ok(())
}
#[cfg_attr(windows, ignore = "no exec_command on Windows")]
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn code_mode_exec_wait_uses_its_own_max_tokens_budget() -> Result<()> {