Files
codex/codex-rs/core/tests/suite/code_mode.rs
Matthew Zeng d8ddeb6869 Support explicit MCP OAuth client IDs (#22575)
## Why
Some MCP OAuth providers require a pre-registered public client ID and
cannot rely on dynamic client registration. Codex already supports MCP
OAuth, but it had no way to supply that client ID from config into the
PKCE flow.

## What changed
- add `oauth.client_id` under `[mcp_servers.<server>]` config, including
config editing and schema generation
- thread the configured client ID through CLI, app-server, plugin login,
and MCP skill dependency OAuth entrypoints
- configure RMCP authorization with the explicit client when present,
while preserving the existing dynamic-registration path when it is
absent
- add focused coverage for config parsing/serialization and OAuth URL
generation

## Verification
- `cargo test -p codex-config -p codex-rmcp-client -p codex-mcp -p
codex-core-plugins`
- `cargo test -p codex-core blocking_replace_mcp_servers_round_trips
--lib`
- `cargo test -p codex-core
replace_mcp_servers_streamable_http_serializes_oauth_resource --lib`
- `cargo test -p codex-core config_schema_matches_fixture --lib`

## Notes
Broader local package runs still hit unrelated pre-existing stack
overflows in:
- `codex-app-server::in_process_start_clamps_zero_channel_capacity`
-
`codex-core::resume_agent_from_rollout_uses_edge_data_when_descendant_metadata_source_is_stale`
2026-05-14 11:52:43 -07:00

2921 lines
87 KiB
Rust

#![allow(clippy::expect_used, clippy::unwrap_used)]
use anyhow::Result;
use base64::Engine;
use base64::engine::general_purpose::STANDARD as BASE64_STANDARD;
use codex_config::types::McpServerConfig;
use codex_config::types::McpServerTransportConfig;
use codex_features::Feature;
use codex_login::CodexAuth;
use codex_models_manager::bundled_models_response;
use codex_protocol::dynamic_tools::DynamicToolCallOutputContentItem;
use codex_protocol::dynamic_tools::DynamicToolResponse;
use codex_protocol::dynamic_tools::DynamicToolSpec;
use codex_protocol::models::PermissionProfile;
use codex_protocol::protocol::AskForApproval;
use codex_protocol::protocol::EventMsg;
use codex_protocol::protocol::Op;
use codex_protocol::user_input::UserInput;
use core_test_support::apps_test_server::AppsTestServer;
use core_test_support::assert_regex_match;
use core_test_support::responses;
use core_test_support::responses::ResponseMock;
use core_test_support::responses::ResponsesRequest;
use core_test_support::responses::ev_assistant_message;
use core_test_support::responses::ev_completed;
use core_test_support::responses::ev_custom_tool_call;
use core_test_support::responses::ev_response_created;
use core_test_support::responses::sse;
use core_test_support::skip_if_no_network;
use core_test_support::stdio_server_bin;
use core_test_support::test_codex::TestCodex;
use core_test_support::test_codex::test_codex;
use core_test_support::test_codex::turn_permission_fields;
use core_test_support::wait_for_event;
use core_test_support::wait_for_event_match;
use pretty_assertions::assert_eq;
use serde_json::Value;
use std::collections::HashMap;
use std::collections::HashSet;
use std::fs;
use std::path::Path;
use std::time::Duration;
use std::time::Instant;
use wiremock::MockServer;
fn custom_tool_output_items(req: &ResponsesRequest, call_id: &str) -> Vec<Value> {
match req.custom_tool_call_output(call_id).get("output") {
Some(Value::Array(items)) => items.clone(),
Some(Value::String(text)) => {
vec![serde_json::json!({ "type": "input_text", "text": text })]
}
_ => panic!("custom tool output should be serialized as text or content items"),
}
}
fn tool_names(body: &Value) -> Vec<String> {
body.get("tools")
.and_then(Value::as_array)
.map(|tools| {
tools
.iter()
.filter_map(|tool| {
tool.get("name")
.or_else(|| tool.get("type"))
.and_then(Value::as_str)
.map(str::to_string)
})
.collect()
})
.unwrap_or_default()
}
fn function_tool_output_items(req: &ResponsesRequest, call_id: &str) -> Vec<Value> {
match req.function_call_output(call_id).get("output") {
Some(Value::Array(items)) => items.clone(),
Some(Value::String(text)) => {
vec![serde_json::json!({ "type": "input_text", "text": text })]
}
_ => panic!("function tool output should be serialized as text or content items"),
}
}
fn text_item(items: &[Value], index: usize) -> &str {
items[index]
.get("text")
.and_then(Value::as_str)
.expect("content item should be input_text")
}
fn extract_running_cell_id(text: &str) -> String {
text.strip_prefix("Script running with cell ID ")
.and_then(|rest| rest.split('\n').next())
.expect("running header should contain a cell ID")
.to_string()
}
fn wait_for_file_source(path: &Path) -> Result<String> {
let quoted_path = shlex::try_join([path.to_string_lossy().as_ref()])?;
let command = format!("if [ -f {quoted_path} ]; then printf ready; fi");
Ok(format!(
r#"while ((await tools.exec_command({{ cmd: {command:?} }})).output !== "ready") {{
}}"#
))
}
fn custom_tool_output_body_and_success(
req: &ResponsesRequest,
call_id: &str,
) -> (String, Option<bool>) {
let (content, success) = req
.custom_tool_call_output_content_and_success(call_id)
.expect("custom tool output should be present");
let items = custom_tool_output_items(req, call_id);
let text_items = items
.iter()
.filter_map(|item| item.get("text").and_then(Value::as_str))
.collect::<Vec<_>>();
let output = match text_items.as_slice() {
[] => content.unwrap_or_default(),
[only] => (*only).to_string(),
[_, rest @ ..] => rest.concat(),
};
(output, success)
}
fn custom_tool_output_last_non_empty_text(req: &ResponsesRequest, call_id: &str) -> Option<String> {
match req.custom_tool_call_output(call_id).get("output") {
Some(Value::String(text)) if !text.trim().is_empty() => Some(text.clone()),
Some(Value::Array(items)) => items
.iter()
.filter_map(|item| item.get("text").and_then(Value::as_str))
.rfind(|text| !text.trim().is_empty())
.map(str::to_string),
Some(Value::String(_))
| Some(Value::Object(_))
| Some(Value::Number(_))
| Some(Value::Bool(_))
| Some(Value::Null)
| None => None,
}
}
async fn run_code_mode_turn(
server: &MockServer,
prompt: &str,
code: &str,
include_apply_patch: bool,
) -> Result<(TestCodex, ResponseMock)> {
let mut builder = test_codex()
.with_model("test-gpt-5.1-codex")
.with_config(move |config| {
let _ = config.features.enable(Feature::CodeMode);
config.include_apply_patch_tool = include_apply_patch;
});
let test = builder.build(server).await?;
responses::mount_sse_once(
server,
sse(vec![
ev_response_created("resp-1"),
ev_custom_tool_call("call-1", "exec", code),
ev_completed("resp-1"),
]),
)
.await;
let second_mock = responses::mount_sse_once(
server,
sse(vec![
ev_assistant_message("msg-1", "done"),
ev_completed("resp-2"),
]),
)
.await;
test.submit_turn(prompt).await?;
Ok((test, second_mock))
}
async fn run_code_mode_turn_with_rmcp(
server: &MockServer,
prompt: &str,
code: &str,
) -> Result<(TestCodex, ResponseMock)> {
run_code_mode_turn_with_rmcp_model(server, prompt, code, "test-gpt-5.1-codex").await
}
async fn run_code_mode_turn_with_rmcp_model(
server: &MockServer,
prompt: &str,
code: &str,
model: &'static str,
) -> Result<(TestCodex, ResponseMock)> {
run_code_mode_turn_with_rmcp_config(server, prompt, code, model, /*code_mode_only*/ false).await
}
async fn run_code_mode_turn_with_rmcp_mode(
server: &MockServer,
prompt: &str,
code: &str,
code_mode_only: bool,
) -> Result<(TestCodex, ResponseMock)> {
run_code_mode_turn_with_rmcp_config(server, prompt, code, "test-gpt-5.1-codex", code_mode_only)
.await
}
async fn run_code_mode_turn_with_rmcp_config(
server: &MockServer,
prompt: &str,
code: &str,
model: &'static str,
code_mode_only: bool,
) -> Result<(TestCodex, ResponseMock)> {
let rmcp_test_server_bin = stdio_server_bin()?;
let mut builder = test_codex().with_model(model).with_config(move |config| {
let _ = if code_mode_only {
config.features.enable(Feature::CodeModeOnly)
} else {
config.features.enable(Feature::CodeMode)
};
let mut servers = config.mcp_servers.get().clone();
servers.insert(
"rmcp".to_string(),
McpServerConfig {
transport: McpServerTransportConfig::Stdio {
command: rmcp_test_server_bin,
args: Vec::new(),
env: Some(HashMap::from([(
"MCP_TEST_VALUE".to_string(),
"propagated-env".to_string(),
)])),
env_vars: Vec::new(),
cwd: None,
},
experimental_environment: None,
enabled: true,
required: false,
supports_parallel_tool_calls: false,
disabled_reason: None,
startup_timeout_sec: Some(Duration::from_secs(10)),
tool_timeout_sec: None,
default_tools_approval_mode: None,
enabled_tools: None,
disabled_tools: None,
scopes: None,
oauth: None,
oauth_resource: None,
tools: HashMap::new(),
},
);
config
.mcp_servers
.set(servers)
.expect("test mcp servers should accept any configuration");
});
let test = builder.build(server).await?;
responses::mount_sse_once(
server,
sse(vec![
ev_response_created("resp-1"),
ev_custom_tool_call("call-1", "exec", code),
ev_completed("resp-1"),
]),
)
.await;
let second_mock = responses::mount_sse_once(
server,
sse(vec![
ev_assistant_message("msg-1", "done"),
ev_completed("resp-2"),
]),
)
.await;
test.submit_turn(prompt).await?;
Ok((test, second_mock))
}
#[cfg_attr(windows, ignore = "no exec_command on Windows")]
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn code_mode_can_return_exec_command_output() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = responses::start_mock_server().await;
let (_test, second_mock) = run_code_mode_turn(
&server,
"use exec to run exec_command",
r#"
text(JSON.stringify(await tools.exec_command({ cmd: "printf code_mode_exec_marker" })));
"#,
/*include_apply_patch*/ false,
)
.await?;
let req = second_mock.single_request();
let items = custom_tool_output_items(&req, "call-1");
assert_eq!(items.len(), 2);
assert_regex_match(
concat!(
r"(?s)\A",
r"Script completed\nWall time \d+\.\d seconds\nOutput:\n\z"
),
text_item(&items, /*index*/ 0),
);
let parsed: Value = serde_json::from_str(text_item(&items, /*index*/ 1))?;
assert!(
parsed
.get("chunk_id")
.and_then(Value::as_str)
.is_some_and(|chunk_id| !chunk_id.is_empty())
);
assert_eq!(
parsed.get("output").and_then(Value::as_str),
Some("code_mode_exec_marker"),
);
assert_eq!(parsed.get("exit_code").and_then(Value::as_i64), Some(0));
assert!(parsed.get("wall_time_seconds").is_some());
assert!(parsed.get("session_id").is_none());
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn code_mode_only_restricts_prompt_tools() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = responses::start_mock_server().await;
let resp_mock = responses::mount_sse_once(
&server,
sse(vec![
ev_response_created("resp-1"),
ev_assistant_message("msg-1", "done"),
ev_completed("resp-1"),
]),
)
.await;
let mut builder = test_codex().with_config(|config| {
let _ = config.features.enable(Feature::CodeModeOnly);
});
let test = builder.build(&server).await?;
test.submit_turn("list tools in code mode only").await?;
let first_body = resp_mock.single_request().body_json();
assert_eq!(
tool_names(&first_body),
vec!["exec".to_string(), "wait".to_string()]
);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn code_mode_only_guides_all_tools_search_and_calls_deferred_app_tools() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = responses::start_mock_server().await;
let apps_server = AppsTestServer::mount_searchable(&server).await?;
let resp_mock = responses::mount_sse_once(
&server,
sse(vec![
ev_response_created("resp-1"),
ev_custom_tool_call(
"call-1",
"exec",
r#"
const tool = ALL_TOOLS.find(
({ name }) => name === "mcp__codex_apps__calendar_timezone_option_99"
);
if (!tool) {
text(JSON.stringify({ found: false }));
} else {
const result = await tools[tool.name]({ timezone: "UTC" });
text(JSON.stringify({
found: true,
isError: Boolean(result.isError),
text: result.content?.[0]?.text ?? "",
}));
}
"#,
),
ev_completed("resp-1"),
]),
)
.await;
let follow_up_mock = responses::mount_sse_once(
&server,
sse(vec![
ev_assistant_message("msg-1", "done"),
ev_completed("resp-2"),
]),
)
.await;
let apps_base_url = apps_server.chatgpt_base_url.clone();
let mut builder = test_codex()
.with_auth(CodexAuth::create_dummy_chatgpt_auth_for_testing())
.with_config(move |config| {
config
.features
.enable(Feature::Apps)
.expect("test config should allow feature update");
config
.features
.enable(Feature::ToolSearch)
.expect("test config should allow feature update");
config
.features
.enable(Feature::CodeMode)
.expect("test config should allow feature update");
config
.features
.enable(Feature::CodeModeOnly)
.expect("test config should allow feature update");
let mut model_catalog = bundled_models_response()
.unwrap_or_else(|err| panic!("bundled models.json should parse: {err}"));
let model = model_catalog
.models
.iter_mut()
.find(|model| model.slug == "gpt-5.4")
.expect("gpt-5.4 exists in bundled models.json");
config.chatgpt_base_url = apps_base_url;
config.model = Some("gpt-5.4".to_string());
model.supports_search_tool = true;
config.model_catalog = Some(model_catalog);
});
let test = builder.build(&server).await?;
test.submit_turn("inspect tools in code mode only").await?;
let first_body = resp_mock.single_request().body_json();
assert_eq!(
tool_names(&first_body),
vec!["exec".to_string(), "wait".to_string()]
);
let exec_description = first_body
.get("tools")
.and_then(Value::as_array)
.and_then(|tools| {
tools.iter().find_map(|tool| {
if tool
.get("name")
.or_else(|| tool.get("type"))
.and_then(Value::as_str)
== Some("exec")
{
tool.get("description").and_then(Value::as_str)
} else {
None
}
})
})
.expect("exec description should be present");
assert!(exec_description.contains("filter `ALL_TOOLS` by `name` and `description`"));
assert!(!exec_description.contains("calendar_timezone_option_99"));
let request = follow_up_mock.single_request();
let (output, success) = custom_tool_output_body_and_success(&request, "call-1");
assert_ne!(
success,
Some(false),
"code_mode_only deferred app tool call failed unexpectedly: {output}"
);
let parsed: Value = serde_json::from_str(&output)?;
assert_eq!(
parsed,
serde_json::json!({
"found": true,
"isError": false,
"text": "called calendar_timezone_option_99 for at with ",
})
);
Ok(())
}
#[cfg_attr(windows, ignore = "no exec_command on Windows")]
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn code_mode_only_can_call_nested_tools() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = responses::start_mock_server().await;
responses::mount_sse_once(
&server,
sse(vec![
ev_response_created("resp-1"),
ev_custom_tool_call(
"call-1",
"exec",
r#"
const output = await tools.exec_command({ cmd: "printf code_mode_only_nested_tool_marker" });
text(output.output);
"#,
),
ev_completed("resp-1"),
]),
)
.await;
let follow_up_mock = responses::mount_sse_once(
&server,
sse(vec![
ev_assistant_message("msg-1", "done"),
ev_completed("resp-2"),
]),
)
.await;
let mut builder = test_codex().with_config(|config| {
let _ = config.features.enable(Feature::CodeModeOnly);
});
let test = builder.build(&server).await?;
test.submit_turn("use exec to run nested tool in code mode only")
.await?;
let request = follow_up_mock.single_request();
let (output, success) = custom_tool_output_body_and_success(&request, "call-1");
assert_ne!(
success,
Some(false),
"code_mode_only nested tool call failed unexpectedly: {output}"
);
assert_eq!(output, "code_mode_only_nested_tool_marker");
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn code_mode_update_plan_nested_tool_result_is_empty_object() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = responses::start_mock_server().await;
let (_test, second_mock) = run_code_mode_turn(
&server,
"use exec to run update_plan",
r#"
const result = await tools.update_plan({
plan: [{ step: "Run update_plan from code mode", status: "in_progress" }],
});
text(JSON.stringify(result));
"#,
/*include_apply_patch*/ false,
)
.await?;
let req = second_mock.single_request();
let (output, success) = custom_tool_output_body_and_success(&req, "call-1");
assert_ne!(
success,
Some(false),
"exec update_plan call failed unexpectedly: {output}"
);
let parsed: Value = serde_json::from_str(&output)?;
assert_eq!(parsed, serde_json::json!({}));
Ok(())
}
#[cfg_attr(windows, ignore = "flaky on windows")]
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn code_mode_nested_tool_calls_can_run_in_parallel() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = responses::start_mock_server().await;
let mut builder = test_codex()
.with_model("test-gpt-5.1-codex")
.with_config(move |config| {
let _ = config.features.enable(Feature::CodeMode);
});
let test = builder.build(&server).await?;
let warmup_code = r#"
const args = {
sleep_after_ms: 10,
barrier: {
id: "code-mode-parallel-tools-warmup",
participants: 2,
timeout_ms: 1_000,
},
};
await Promise.all([
tools.test_sync_tool(args),
tools.test_sync_tool(args),
]);
"#;
let code = r#"
const args = {
sleep_after_ms: 300,
barrier: {
id: "code-mode-parallel-tools",
participants: 2,
timeout_ms: 1_000,
},
};
const results = await Promise.all([
tools.test_sync_tool(args),
tools.test_sync_tool(args),
]);
text(JSON.stringify(results));
"#;
let response_mock = responses::mount_sse_sequence(
&server,
vec![
sse(vec![
ev_response_created("resp-warm-1"),
ev_custom_tool_call("call-warm-1", "exec", warmup_code),
ev_completed("resp-warm-1"),
]),
sse(vec![
ev_assistant_message("msg-warm-1", "warmup done"),
ev_completed("resp-warm-2"),
]),
sse(vec![
ev_response_created("resp-1"),
ev_custom_tool_call("call-1", "exec", code),
ev_completed("resp-1"),
]),
sse(vec![
ev_assistant_message("msg-1", "done"),
ev_completed("resp-2"),
]),
],
)
.await;
test.submit_turn("warm up nested tools in parallel").await?;
let start = Instant::now();
test.submit_turn("run nested tools in parallel").await?;
let duration = start.elapsed();
assert!(
duration < Duration::from_millis(1_600),
"expected nested tools to finish in parallel, got {duration:?}",
);
let req = response_mock
.last_request()
.expect("parallel code mode run should send a completion request");
let items = custom_tool_output_items(&req, "call-1");
assert_eq!(items.len(), 2);
assert_eq!(text_item(&items, /*index*/ 1), "[\"ok\",\"ok\"]");
Ok(())
}
#[cfg_attr(windows, ignore = "no exec_command on Windows")]
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn code_mode_can_truncate_final_result_with_configured_budget() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = responses::start_mock_server().await;
let (_test, second_mock) = run_code_mode_turn(
&server,
"use exec to truncate the final result",
r#"// @exec: {"max_output_tokens": 6}
text(JSON.stringify(await tools.exec_command({
cmd: "printf 'token one token two token three token four token five token six token seven'",
max_output_tokens: 100
})));
"#,
/*include_apply_patch*/ false,
)
.await?;
let req = second_mock.single_request();
let items = custom_tool_output_items(&req, "call-1");
assert_eq!(items.len(), 2);
assert_regex_match(
concat!(
r"(?s)\A",
r"Script completed\nWall time \d+\.\d seconds\nOutput:\n\z"
),
text_item(&items, /*index*/ 0),
);
let expected_pattern = r#"(?sx)
\A
Total\ output\ lines:\ 1\n
\n
.*…\d+\ tokens\ truncated….*
\z
"#;
assert_regex_match(expected_pattern, text_item(&items, /*index*/ 1));
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn code_mode_returns_accumulated_output_when_script_fails() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = responses::start_mock_server().await;
let (_test, second_mock) = run_code_mode_turn(
&server,
"use code_mode to surface script failures",
r#"
text("before crash");
text("still before crash");
throw new Error("boom");
"#,
/*include_apply_patch*/ false,
)
.await?;
let req = second_mock.single_request();
let items = custom_tool_output_items(&req, "call-1");
assert_eq!(items.len(), 4);
assert_regex_match(
concat!(
r"(?s)\A",
r"Script failed\nWall time \d+\.\d seconds\nOutput:\n\z"
),
text_item(&items, /*index*/ 0),
);
assert_eq!(text_item(&items, /*index*/ 1), "before crash");
assert_eq!(text_item(&items, /*index*/ 2), "still before crash");
assert_regex_match(
r#"(?sx)
\A
Script\ error:\n
Error:\ boom\n
(?:\s+at\ .+\n?)+
\z
"#,
text_item(&items, /*index*/ 3),
);
Ok(())
}
#[cfg_attr(windows, ignore = "no exec_command on Windows")]
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn code_mode_exec_surfaces_handler_errors_as_exceptions() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = responses::start_mock_server().await;
let (_test, second_mock) = run_code_mode_turn(
&server,
"surface nested tool handler failures as script exceptions",
r#"
try {
await tools.exec_command({});
text("no-exception");
} catch (error) {
text(`caught:${error?.message ?? String(error)}`);
}
"#,
/*include_apply_patch*/ false,
)
.await?;
let request = second_mock.single_request();
let (output, success) = custom_tool_output_body_and_success(&request, "call-1");
assert_ne!(
success,
Some(false),
"script should catch the nested tool error: {output}"
);
assert!(
output.contains("caught:"),
"expected caught exception text in output: {output}"
);
assert!(
!output.contains("no-exception"),
"nested tool error should not allow success path: {output}"
);
Ok(())
}
#[cfg_attr(windows, ignore = "no exec_command on Windows")]
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn code_mode_can_yield_and_resume_with_wait() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = responses::start_mock_server().await;
let mut builder = test_codex().with_config(move |config| {
let _ = config.features.enable(Feature::CodeMode);
});
let test = builder.build(&server).await?;
let phase_2_gate = test.workspace_path("code-mode-phase-2.ready");
let phase_3_gate = test.workspace_path("code-mode-phase-3.ready");
let phase_2_wait = wait_for_file_source(&phase_2_gate)?;
let phase_3_wait = wait_for_file_source(&phase_3_gate)?;
let code = format!(
r#"
text("phase 1");
yield_control();
{phase_2_wait}
text("phase 2");
{phase_3_wait}
text("phase 3");
"#
);
responses::mount_sse_once(
&server,
sse(vec![
ev_response_created("resp-1"),
ev_custom_tool_call("call-1", "exec", &code),
ev_completed("resp-1"),
]),
)
.await;
let first_completion = responses::mount_sse_once(
&server,
sse(vec![
ev_assistant_message("msg-1", "waiting"),
ev_completed("resp-2"),
]),
)
.await;
test.submit_turn("start the long exec").await?;
let first_request = first_completion.single_request();
let first_items = custom_tool_output_items(&first_request, "call-1");
assert_eq!(first_items.len(), 2);
assert_regex_match(
concat!(
r"(?s)\A",
r"Script running with cell ID \d+\nWall time \d+\.\d seconds\nOutput:\n\z"
),
text_item(&first_items, /*index*/ 0),
);
assert_eq!(text_item(&first_items, /*index*/ 1), "phase 1");
let cell_id = extract_running_cell_id(text_item(&first_items, /*index*/ 0));
responses::mount_sse_once(
&server,
sse(vec![
ev_response_created("resp-3"),
responses::ev_function_call(
"call-2",
"wait",
&serde_json::to_string(&serde_json::json!({
"cell_id": cell_id.clone(),
"yield_time_ms": 1_000,
}))?,
),
ev_completed("resp-3"),
]),
)
.await;
let second_completion = responses::mount_sse_once(
&server,
sse(vec![
ev_assistant_message("msg-2", "still waiting"),
ev_completed("resp-4"),
]),
)
.await;
fs::write(&phase_2_gate, "ready")?;
test.submit_turn("wait again").await?;
let second_request = second_completion.single_request();
let second_items = function_tool_output_items(&second_request, "call-2");
assert_eq!(second_items.len(), 2);
assert_regex_match(
concat!(
r"(?s)\A",
r"Script running with cell ID \d+\nWall time \d+\.\d seconds\nOutput:\n\z"
),
text_item(&second_items, /*index*/ 0),
);
assert_eq!(
extract_running_cell_id(text_item(&second_items, /*index*/ 0)),
cell_id
);
assert_eq!(text_item(&second_items, /*index*/ 1), "phase 2");
responses::mount_sse_once(
&server,
sse(vec![
ev_response_created("resp-5"),
responses::ev_function_call(
"call-3",
"wait",
&serde_json::to_string(&serde_json::json!({
"cell_id": cell_id.clone(),
"yield_time_ms": 1_000,
}))?,
),
ev_completed("resp-5"),
]),
)
.await;
let third_completion = responses::mount_sse_once(
&server,
sse(vec![
ev_assistant_message("msg-3", "done"),
ev_completed("resp-6"),
]),
)
.await;
fs::write(&phase_3_gate, "ready")?;
test.submit_turn("wait for completion").await?;
let third_request = third_completion.single_request();
let third_items = function_tool_output_items(&third_request, "call-3");
assert_eq!(third_items.len(), 2);
assert_regex_match(
concat!(
r"(?s)\A",
r"Script completed\nWall time \d+\.\d seconds\nOutput:\n\z"
),
text_item(&third_items, /*index*/ 0),
);
assert_eq!(text_item(&third_items, /*index*/ 1), "phase 3");
Ok(())
}
#[cfg_attr(windows, ignore = "no exec_command on Windows")]
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn code_mode_yield_timeout_works_for_busy_loop() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = responses::start_mock_server().await;
let mut builder = test_codex().with_config(move |config| {
let _ = config.features.enable(Feature::CodeMode);
});
let test = builder.build(&server).await?;
let code = r#"// @exec: {"yield_time_ms": 100}
text("phase 1");
while (true) {}
"#;
responses::mount_sse_once(
&server,
sse(vec![
ev_response_created("resp-1"),
ev_custom_tool_call("call-1", "exec", code),
ev_completed("resp-1"),
]),
)
.await;
let first_completion = responses::mount_sse_once(
&server,
sse(vec![
ev_assistant_message("msg-1", "waiting"),
ev_completed("resp-2"),
]),
)
.await;
tokio::time::timeout(
Duration::from_secs(5),
test.submit_turn("start the busy loop"),
)
.await??;
let first_request = first_completion.single_request();
let first_items = custom_tool_output_items(&first_request, "call-1");
assert_eq!(first_items.len(), 2);
assert_regex_match(
concat!(
r"(?s)\A",
r"Script running with cell ID \d+\nWall time \d+\.\d seconds\nOutput:\n\z"
),
text_item(&first_items, /*index*/ 0),
);
assert_eq!(text_item(&first_items, /*index*/ 1), "phase 1");
let cell_id = extract_running_cell_id(text_item(&first_items, /*index*/ 0));
responses::mount_sse_once(
&server,
sse(vec![
ev_response_created("resp-3"),
responses::ev_function_call(
"call-2",
"wait",
&serde_json::to_string(&serde_json::json!({
"cell_id": cell_id.clone(),
"terminate": true,
}))?,
),
ev_completed("resp-3"),
]),
)
.await;
let second_completion = responses::mount_sse_once(
&server,
sse(vec![
ev_assistant_message("msg-2", "terminated"),
ev_completed("resp-4"),
]),
)
.await;
test.submit_turn("terminate it").await?;
let second_request = second_completion.single_request();
let second_items = function_tool_output_items(&second_request, "call-2");
assert_eq!(second_items.len(), 1);
assert_regex_match(
concat!(
r"(?s)\A",
r"Script terminated\nWall time \d+\.\d seconds\nOutput:\n\z"
),
text_item(&second_items, /*index*/ 0),
);
Ok(())
}
#[cfg_attr(windows, ignore = "no exec_command on Windows")]
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn code_mode_can_run_multiple_yielded_sessions() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = responses::start_mock_server().await;
let mut builder = test_codex().with_config(move |config| {
let _ = config.features.enable(Feature::CodeMode);
});
let test = builder.build(&server).await?;
let session_a_gate = test.workspace_path("code-mode-session-a.ready");
let session_b_gate = test.workspace_path("code-mode-session-b.ready");
let session_a_wait = wait_for_file_source(&session_a_gate)?;
let session_b_wait = wait_for_file_source(&session_b_gate)?;
let session_a_code = format!(
r#"
text("session a start");
yield_control();
{session_a_wait}
text("session a done");
"#
);
let session_b_code = format!(
r#"
text("session b start");
yield_control();
{session_b_wait}
text("session b done");
"#
);
responses::mount_sse_once(
&server,
sse(vec![
ev_response_created("resp-1"),
ev_custom_tool_call("call-1", "exec", &session_a_code),
ev_completed("resp-1"),
]),
)
.await;
let first_completion = responses::mount_sse_once(
&server,
sse(vec![
ev_assistant_message("msg-1", "session a waiting"),
ev_completed("resp-2"),
]),
)
.await;
test.submit_turn("start session a").await?;
let first_request = first_completion.single_request();
let first_items = custom_tool_output_items(&first_request, "call-1");
assert_eq!(first_items.len(), 2);
let session_a_id = extract_running_cell_id(text_item(&first_items, /*index*/ 0));
assert_eq!(text_item(&first_items, /*index*/ 1), "session a start");
responses::mount_sse_once(
&server,
sse(vec![
ev_response_created("resp-3"),
ev_custom_tool_call("call-2", "exec", &session_b_code),
ev_completed("resp-3"),
]),
)
.await;
let second_completion = responses::mount_sse_once(
&server,
sse(vec![
ev_assistant_message("msg-2", "session b waiting"),
ev_completed("resp-4"),
]),
)
.await;
test.submit_turn("start session b").await?;
let second_request = second_completion.single_request();
let second_items = custom_tool_output_items(&second_request, "call-2");
assert_eq!(second_items.len(), 2);
let session_b_id = extract_running_cell_id(text_item(&second_items, /*index*/ 0));
assert_eq!(text_item(&second_items, /*index*/ 1), "session b start");
assert_ne!(session_a_id, session_b_id);
fs::write(&session_a_gate, "ready")?;
responses::mount_sse_once(
&server,
sse(vec![
ev_response_created("resp-5"),
responses::ev_function_call(
"call-3",
"wait",
&serde_json::to_string(&serde_json::json!({
"cell_id": session_a_id.clone(),
"yield_time_ms": 1_000,
}))?,
),
ev_completed("resp-5"),
]),
)
.await;
let third_completion = responses::mount_sse_once(
&server,
sse(vec![
ev_assistant_message("msg-3", "session a done"),
ev_completed("resp-6"),
]),
)
.await;
test.submit_turn("wait session a").await?;
let third_request = third_completion.single_request();
let third_items = function_tool_output_items(&third_request, "call-3");
assert_eq!(third_items.len(), 2);
assert_regex_match(
concat!(
r"(?s)\A",
r"Script completed\nWall time \d+\.\d seconds\nOutput:\n\z"
),
text_item(&third_items, /*index*/ 0),
);
assert_eq!(text_item(&third_items, /*index*/ 1), "session a done");
fs::write(&session_b_gate, "ready")?;
responses::mount_sse_once(
&server,
sse(vec![
ev_response_created("resp-7"),
responses::ev_function_call(
"call-4",
"wait",
&serde_json::to_string(&serde_json::json!({
"cell_id": session_b_id.clone(),
"yield_time_ms": 1_000,
}))?,
),
ev_completed("resp-7"),
]),
)
.await;
let fourth_completion = responses::mount_sse_once(
&server,
sse(vec![
ev_assistant_message("msg-4", "session b done"),
ev_completed("resp-8"),
]),
)
.await;
test.submit_turn("wait session b").await?;
let fourth_request = fourth_completion.single_request();
let fourth_items = function_tool_output_items(&fourth_request, "call-4");
assert_eq!(fourth_items.len(), 2);
assert_regex_match(
concat!(
r"(?s)\A",
r"Script completed\nWall time \d+\.\d seconds\nOutput:\n\z"
),
text_item(&fourth_items, /*index*/ 0),
);
assert_eq!(text_item(&fourth_items, /*index*/ 1), "session b done");
Ok(())
}
#[cfg_attr(windows, ignore = "no exec_command on Windows")]
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn code_mode_wait_can_terminate_and_continue() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = responses::start_mock_server().await;
let mut builder = test_codex().with_config(move |config| {
let _ = config.features.enable(Feature::CodeMode);
});
let test = builder.build(&server).await?;
let termination_gate = test.workspace_path("code-mode-terminate.ready");
let termination_wait = wait_for_file_source(&termination_gate)?;
let code = format!(
r#"
text("phase 1");
yield_control();
{termination_wait}
text("phase 2");
"#
);
responses::mount_sse_once(
&server,
sse(vec![
ev_response_created("resp-1"),
ev_custom_tool_call("call-1", "exec", &code),
ev_completed("resp-1"),
]),
)
.await;
let first_completion = responses::mount_sse_once(
&server,
sse(vec![
ev_assistant_message("msg-1", "waiting"),
ev_completed("resp-2"),
]),
)
.await;
test.submit_turn("start the long exec").await?;
let first_request = first_completion.single_request();
let first_items = custom_tool_output_items(&first_request, "call-1");
assert_eq!(first_items.len(), 2);
let cell_id = extract_running_cell_id(text_item(&first_items, /*index*/ 0));
assert_eq!(text_item(&first_items, /*index*/ 1), "phase 1");
responses::mount_sse_once(
&server,
sse(vec![
ev_response_created("resp-3"),
responses::ev_function_call(
"call-2",
"wait",
&serde_json::to_string(&serde_json::json!({
"cell_id": cell_id.clone(),
"terminate": true,
}))?,
),
ev_completed("resp-3"),
]),
)
.await;
let second_completion = responses::mount_sse_once(
&server,
sse(vec![
ev_assistant_message("msg-2", "terminated"),
ev_completed("resp-4"),
]),
)
.await;
test.submit_turn("terminate it").await?;
let second_request = second_completion.single_request();
let second_items = function_tool_output_items(&second_request, "call-2");
assert_eq!(second_items.len(), 1);
assert_regex_match(
concat!(
r"(?s)\A",
r"Script terminated\nWall time \d+\.\d seconds\nOutput:\n\z"
),
text_item(&second_items, /*index*/ 0),
);
responses::mount_sse_once(
&server,
sse(vec![
ev_response_created("resp-5"),
ev_custom_tool_call(
"call-3",
"exec",
r#"
text("after terminate");
"#,
),
ev_completed("resp-5"),
]),
)
.await;
let third_completion = responses::mount_sse_once(
&server,
sse(vec![
ev_assistant_message("msg-3", "done"),
ev_completed("resp-6"),
]),
)
.await;
test.submit_turn("run another exec").await?;
let third_request = third_completion.single_request();
let third_items = custom_tool_output_items(&third_request, "call-3");
assert_eq!(third_items.len(), 2);
assert_regex_match(
concat!(
r"(?s)\A",
r"Script completed\nWall time \d+\.\d seconds\nOutput:\n\z"
),
text_item(&third_items, /*index*/ 0),
);
assert_eq!(text_item(&third_items, /*index*/ 1), "after terminate");
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn code_mode_wait_returns_error_for_unknown_session() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = responses::start_mock_server().await;
let mut builder = test_codex().with_config(move |config| {
let _ = config.features.enable(Feature::CodeMode);
});
let test = builder.build(&server).await?;
responses::mount_sse_once(
&server,
sse(vec![
ev_response_created("resp-1"),
responses::ev_function_call(
"call-1",
"wait",
&serde_json::to_string(&serde_json::json!({
"cell_id": "999999",
"yield_time_ms": 1_000,
}))?,
),
ev_completed("resp-1"),
]),
)
.await;
let completion = responses::mount_sse_once(
&server,
sse(vec![
ev_assistant_message("msg-1", "done"),
ev_completed("resp-2"),
]),
)
.await;
test.submit_turn("wait on an unknown exec cell").await?;
let request = completion.single_request();
let (_, success) = request
.function_call_output_content_and_success("call-1")
.expect("function tool output should be present");
assert_ne!(success, Some(true));
let items = function_tool_output_items(&request, "call-1");
assert_eq!(items.len(), 2);
assert_regex_match(
concat!(
r"(?s)\A",
r"Script failed\nWall time \d+\.\d seconds\nOutput:\n\z"
),
text_item(&items, /*index*/ 0),
);
assert_eq!(
text_item(&items, /*index*/ 1),
"Script error:\nexec cell 999999 not found"
);
Ok(())
}
#[cfg_attr(windows, ignore = "no exec_command on Windows")]
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn code_mode_wait_terminate_returns_completed_session_if_it_finished_after_yield_control()
-> Result<()> {
skip_if_no_network!(Ok(()));
let server = responses::start_mock_server().await;
let mut builder = test_codex().with_config(move |config| {
let _ = config.features.enable(Feature::CodeMode);
});
let test = builder.build(&server).await?;
let session_a_gate = test.workspace_path("code-mode-session-a-finished.ready");
let session_b_gate = test.workspace_path("code-mode-session-b-blocked.ready");
let session_a_done_marker = test.workspace_path("code-mode-session-a-done.txt");
let session_a_wait = wait_for_file_source(&session_a_gate)?;
let session_b_wait = wait_for_file_source(&session_b_gate)?;
let session_a_done_marker_quoted =
shlex::try_join([session_a_done_marker.to_string_lossy().as_ref()])?;
let session_a_done_command = format!("printf done > {session_a_done_marker_quoted}");
let session_a_code = format!(
r#"
text("session a start");
yield_control();
{session_a_wait}
text("session a done");
await tools.exec_command({{ cmd: {session_a_done_command:?} }});
"#
);
let session_b_code = format!(
r#"
text("session b start");
yield_control();
{session_b_wait}
text("session b done");
"#
);
responses::mount_sse_once(
&server,
sse(vec![
ev_response_created("resp-1"),
ev_custom_tool_call("call-1", "exec", &session_a_code),
ev_completed("resp-1"),
]),
)
.await;
let first_completion = responses::mount_sse_once(
&server,
sse(vec![
ev_assistant_message("msg-1", "session a waiting"),
ev_completed("resp-2"),
]),
)
.await;
test.submit_turn("start session a").await?;
let first_request = first_completion.single_request();
let first_items = custom_tool_output_items(&first_request, "call-1");
assert_eq!(first_items.len(), 2);
let session_a_id = extract_running_cell_id(text_item(&first_items, /*index*/ 0));
assert_eq!(text_item(&first_items, /*index*/ 1), "session a start");
responses::mount_sse_once(
&server,
sse(vec![
ev_response_created("resp-3"),
ev_custom_tool_call("call-2", "exec", &session_b_code),
ev_completed("resp-3"),
]),
)
.await;
let second_completion = responses::mount_sse_once(
&server,
sse(vec![
ev_assistant_message("msg-2", "session b waiting"),
ev_completed("resp-4"),
]),
)
.await;
test.submit_turn("start session b").await?;
let second_request = second_completion.single_request();
let second_items = custom_tool_output_items(&second_request, "call-2");
assert_eq!(second_items.len(), 2);
let session_b_id = extract_running_cell_id(text_item(&second_items, /*index*/ 0));
assert_eq!(text_item(&second_items, /*index*/ 1), "session b start");
fs::write(&session_a_gate, "ready")?;
responses::mount_sse_once(
&server,
sse(vec![
ev_response_created("resp-5"),
responses::ev_function_call(
"call-3",
"wait",
&serde_json::to_string(&serde_json::json!({
"cell_id": session_b_id.clone(),
"yield_time_ms": 1_000,
}))?,
),
ev_completed("resp-5"),
]),
)
.await;
let third_completion = responses::mount_sse_once(
&server,
sse(vec![
ev_assistant_message("msg-3", "session b still waiting"),
ev_completed("resp-6"),
]),
)
.await;
test.submit_turn("wait session b").await?;
let third_request = third_completion.single_request();
let third_items = function_tool_output_items(&third_request, "call-3");
assert_eq!(third_items.len(), 1);
assert_regex_match(
concat!(
r"(?s)\A",
r"Script running with cell ID \d+\nWall time \d+\.\d seconds\nOutput:\n\z"
),
text_item(&third_items, /*index*/ 0),
);
assert_eq!(
extract_running_cell_id(text_item(&third_items, /*index*/ 0)),
session_b_id
);
for _ in 0..100 {
if session_a_done_marker.exists() {
break;
}
tokio::time::sleep(Duration::from_millis(50)).await;
}
assert!(session_a_done_marker.exists());
responses::mount_sse_once(
&server,
sse(vec![
ev_response_created("resp-7"),
responses::ev_function_call(
"call-4",
"wait",
&serde_json::to_string(&serde_json::json!({
"cell_id": session_a_id.clone(),
"terminate": true,
}))?,
),
ev_completed("resp-7"),
]),
)
.await;
let fourth_completion = responses::mount_sse_once(
&server,
sse(vec![
ev_assistant_message("msg-4", "session a already done"),
ev_completed("resp-8"),
]),
)
.await;
test.submit_turn("terminate session a").await?;
let fourth_request = fourth_completion.single_request();
let fourth_items = function_tool_output_items(&fourth_request, "call-4");
match fourth_items.len() {
1 => {
assert_regex_match(
concat!(
r"(?s)\A",
r"Script terminated\nWall time \d+\.\d seconds\nOutput:\n\z"
),
text_item(&fourth_items, /*index*/ 0),
);
}
2 => {
assert_regex_match(
concat!(
r"(?s)\A",
r"Script (?:completed|terminated)\nWall time \d+\.\d seconds\nOutput:\n\z"
),
text_item(&fourth_items, /*index*/ 0),
);
assert_eq!(text_item(&fourth_items, /*index*/ 1), "session a done");
}
other => panic!("unexpected number of content items: {other}"),
}
Ok(())
}
#[cfg_attr(windows, ignore = "no exec_command on Windows")]
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn code_mode_background_keeps_running_on_later_turn_without_wait() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = responses::start_mock_server().await;
let mut builder = test_codex().with_config(move |config| {
let _ = config.features.enable(Feature::CodeMode);
});
let test = builder.build(&server).await?;
let resumed_file = test.workspace_path("code-mode-yield-resumed.txt");
let resumed_file_quoted = shlex::try_join([resumed_file.to_string_lossy().as_ref()])?;
let write_file_command = format!("printf resumed > {resumed_file_quoted}");
let wait_for_file_command =
format!("while [ ! -f {resumed_file_quoted} ]; do sleep 0.01; done; printf ready");
let code = format!(
r#"
text("before yield");
yield_control();
await tools.exec_command({{ cmd: {write_file_command:?} }});
text("after yield");
"#
);
responses::mount_sse_once(
&server,
sse(vec![
ev_response_created("resp-1"),
ev_custom_tool_call("call-1", "exec", &code),
ev_completed("resp-1"),
]),
)
.await;
let first_completion = responses::mount_sse_once(
&server,
sse(vec![
ev_assistant_message("msg-1", "exec yielded"),
ev_completed("resp-2"),
]),
)
.await;
test.submit_turn("start yielded exec").await?;
let first_request = first_completion.single_request();
let first_items = custom_tool_output_items(&first_request, "call-1");
assert_eq!(first_items.len(), 2);
assert_regex_match(
concat!(
r"(?s)\A",
r"Script running with cell ID \d+\nWall time \d+\.\d seconds\nOutput:\n\z"
),
text_item(&first_items, /*index*/ 0),
);
assert_eq!(text_item(&first_items, /*index*/ 1), "before yield");
responses::mount_sse_once(
&server,
sse(vec![
ev_response_created("resp-3"),
responses::ev_function_call(
"call-2",
"exec_command",
&serde_json::to_string(&serde_json::json!({
"cmd": wait_for_file_command,
}))?,
),
ev_completed("resp-3"),
]),
)
.await;
let second_completion = responses::mount_sse_once(
&server,
sse(vec![
ev_assistant_message("msg-2", "file appeared"),
ev_completed("resp-4"),
]),
)
.await;
test.submit_turn("wait for resumed file").await?;
let second_request = second_completion.single_request();
assert!(
second_request
.function_call_output_text("call-2")
.is_some_and(|output| output.ends_with("ready"))
);
assert_eq!(fs::read_to_string(&resumed_file)?, "resumed");
Ok(())
}
#[cfg_attr(windows, ignore = "no exec_command on Windows")]
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn code_mode_wait_uses_its_own_max_tokens_budget() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = responses::start_mock_server().await;
let mut builder = test_codex().with_config(move |config| {
let _ = config.features.enable(Feature::CodeMode);
});
let test = builder.build(&server).await?;
let completion_gate = test.workspace_path("code-mode-max-tokens.ready");
let completion_wait = wait_for_file_source(&completion_gate)?;
let code = format!(
r#"// @exec: {{"max_output_tokens": 100}}
text("phase 1");
yield_control();
{completion_wait}
text("token one token two token three token four token five token six token seven");
"#
);
responses::mount_sse_once(
&server,
sse(vec![
ev_response_created("resp-1"),
ev_custom_tool_call("call-1", "exec", &code),
ev_completed("resp-1"),
]),
)
.await;
let first_completion = responses::mount_sse_once(
&server,
sse(vec![
ev_assistant_message("msg-1", "waiting"),
ev_completed("resp-2"),
]),
)
.await;
test.submit_turn("start the long exec").await?;
let first_request = first_completion.single_request();
let first_items = custom_tool_output_items(&first_request, "call-1");
assert_eq!(first_items.len(), 2);
assert_eq!(text_item(&first_items, /*index*/ 1), "phase 1");
let cell_id = extract_running_cell_id(text_item(&first_items, /*index*/ 0));
fs::write(&completion_gate, "ready")?;
responses::mount_sse_once(
&server,
sse(vec![
ev_response_created("resp-3"),
responses::ev_function_call(
"call-2",
"wait",
&serde_json::to_string(&serde_json::json!({
"cell_id": cell_id.clone(),
"yield_time_ms": 1_000,
"max_tokens": 6,
}))?,
),
ev_completed("resp-3"),
]),
)
.await;
let second_completion = responses::mount_sse_once(
&server,
sse(vec![
ev_assistant_message("msg-2", "done"),
ev_completed("resp-4"),
]),
)
.await;
test.submit_turn("wait for completion").await?;
let second_request = second_completion.single_request();
let second_items = function_tool_output_items(&second_request, "call-2");
assert_eq!(second_items.len(), 2);
assert_regex_match(
concat!(
r"(?s)\A",
r"Script completed\nWall time \d+\.\d seconds\nOutput:\n\z"
),
text_item(&second_items, /*index*/ 0),
);
let expected_pattern = r#"(?sx)
\A
Total\ output\ lines:\ 1\n
\n
.*…\d+\ tokens\ truncated….*
\z
"#;
assert_regex_match(expected_pattern, text_item(&second_items, /*index*/ 1));
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn code_mode_can_output_serialized_text_via_global_helper() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = responses::start_mock_server().await;
let (_test, second_mock) = run_code_mode_turn(
&server,
"use exec to return structured text",
r#"
text({ json: true });
"#,
/*include_apply_patch*/ false,
)
.await?;
let req = second_mock.single_request();
let (output, success) = custom_tool_output_body_and_success(&req, "call-1");
eprintln!(
"hidden dynamic tool raw output: {}",
req.custom_tool_call_output("call-1")
);
assert_ne!(
success,
Some(false),
"exec call failed unexpectedly: {output}"
);
assert_eq!(output, r#"{"json":true}"#);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn code_mode_can_resume_after_set_timeout() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = responses::start_mock_server().await;
let (_test, second_mock) = run_code_mode_turn(
&server,
"use exec to wait for a timeout",
r#"
await new Promise((resolve) => setTimeout(resolve, 10));
text("timer done");
"#,
/*include_apply_patch*/ false,
)
.await?;
let req = second_mock.single_request();
let (output, success) = custom_tool_output_body_and_success(&req, "call-1");
assert_ne!(
success,
Some(false),
"exec setTimeout call failed unexpectedly: {output}"
);
assert_eq!(output, "timer done");
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn code_mode_notify_injects_additional_exec_tool_output_into_active_context() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = responses::start_mock_server().await;
let (_test, second_mock) = run_code_mode_turn(
&server,
"use exec notify helper",
r#"
notify("code_mode_notify_marker");
await tools.test_sync_tool({});
text("done");
"#,
/*include_apply_patch*/ false,
)
.await?;
let req = second_mock.single_request();
let has_notify_output = req
.inputs_of_type("custom_tool_call_output")
.iter()
.any(|item| {
item.get("call_id").and_then(serde_json::Value::as_str) == Some("call-1")
&& item
.get("output")
.and_then(serde_json::Value::as_str)
.is_some_and(|text| text.contains("code_mode_notify_marker"))
&& item.get("name").and_then(serde_json::Value::as_str) == Some("exec")
});
assert!(
has_notify_output,
"expected notify marker in custom_tool_call_output item: {:?}",
req.input()
);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn code_mode_exit_stops_script_immediately() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = responses::start_mock_server().await;
let (_test, second_mock) = run_code_mode_turn(
&server,
"use exec to stop script early with exit helper",
r#"
text("before");
exit();
text("after");
"#,
/*include_apply_patch*/ false,
)
.await?;
let req = second_mock.single_request();
let items = custom_tool_output_items(&req, "call-1");
let (output, success) = custom_tool_output_body_and_success(&req, "call-1");
assert_ne!(
success,
Some(false),
"exec exit helper call failed unexpectedly: {output}"
);
assert_eq!(items.len(), 2);
assert_regex_match(
concat!(
r"(?s)\A",
r"Script completed\nWall time \d+\.\d seconds\nOutput:\n\z"
),
text_item(&items, /*index*/ 0),
);
assert_eq!(text_item(&items, /*index*/ 1), "before");
assert_eq!(output, "before");
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn code_mode_surfaces_text_stringify_errors() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = responses::start_mock_server().await;
let (_test, second_mock) = run_code_mode_turn(
&server,
"use exec to return circular text",
r#"
const circular = {};
circular.self = circular;
text(circular);
"#,
/*include_apply_patch*/ false,
)
.await?;
let req = second_mock.single_request();
let items = custom_tool_output_items(&req, "call-1");
let (_, success) = req
.custom_tool_call_output_content_and_success("call-1")
.expect("custom tool output should be present");
assert_ne!(
success,
Some(true),
"circular stringify unexpectedly succeeded"
);
assert_eq!(items.len(), 2);
assert_regex_match(
concat!(
r"(?s)\A",
r"Script failed\nWall time \d+\.\d seconds\nOutput:\n\z"
),
text_item(&items, /*index*/ 0),
);
assert!(text_item(&items, /*index*/ 1).contains("Script error:"));
assert!(text_item(&items, /*index*/ 1).contains("Converting circular structure to JSON"));
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn code_mode_can_output_images_via_global_helper() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = responses::start_mock_server().await;
let (_test, second_mock) = run_code_mode_turn(
&server,
"use exec to return images",
r#"
image("https://example.com/image.jpg");
image("data:image/png;base64,AAA");
"#,
/*include_apply_patch*/ false,
)
.await?;
let req = second_mock.single_request();
let items = custom_tool_output_items(&req, "call-1");
let (_, success) = custom_tool_output_body_and_success(&req, "call-1");
assert_ne!(
success,
Some(false),
"code_mode image output failed unexpectedly"
);
assert_eq!(items.len(), 3);
assert_regex_match(
concat!(
r"(?s)\A",
r"Script completed\nWall time \d+\.\d seconds\nOutput:\n\z"
),
text_item(&items, /*index*/ 0),
);
assert_eq!(
items[1],
serde_json::json!({
"type": "input_image",
"image_url": "https://example.com/image.jpg",
"detail": "high"
}),
);
assert_eq!(
items[2],
serde_json::json!({
"type": "input_image",
"image_url": "data:image/png;base64,AAA",
"detail": "high"
}),
);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn code_mode_can_use_view_image_result_with_image_helper() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = responses::start_mock_server().await;
let mut builder = test_codex()
.with_model("gpt-5.3-codex")
.with_config(move |config| {
let _ = config.features.enable(Feature::CodeMode);
});
let test = builder.build(&server).await?;
let image_bytes = BASE64_STANDARD.decode(
"iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR4nGP4z8DwHwAFAAH/iZk9HQAAAABJRU5ErkJggg==",
)?;
let image_path = test.cwd_path().join("code_mode_view_image.png");
fs::write(&image_path, image_bytes)?;
let image_path_json = serde_json::to_string(&image_path.to_string_lossy().to_string())?;
let code = format!(
r#"
const out = await tools.view_image({{ path: {image_path_json}, detail: "original" }});
image(out);
"#
);
responses::mount_sse_once(
&server,
sse(vec![
ev_response_created("resp-1"),
ev_custom_tool_call("call-1", "exec", &code),
ev_completed("resp-1"),
]),
)
.await;
let second_mock = responses::mount_sse_once(
&server,
sse(vec![
ev_assistant_message("msg-1", "done"),
ev_completed("resp-2"),
]),
)
.await;
test.submit_turn("use exec to call view_image and emit its image output")
.await?;
let req = second_mock.single_request();
let items = custom_tool_output_items(&req, "call-1");
let (_, success) = custom_tool_output_body_and_success(&req, "call-1");
assert_ne!(
success,
Some(false),
"code_mode view_image call failed unexpectedly"
);
assert_eq!(items.len(), 2);
assert_regex_match(
concat!(
r"(?s)\A",
r"Script completed\nWall time \d+\.\d seconds\nOutput:\n\z"
),
text_item(&items, /*index*/ 0),
);
assert_eq!(
items[1].get("type").and_then(Value::as_str),
Some("input_image")
);
let emitted_image_url = items[1]
.get("image_url")
.and_then(Value::as_str)
.expect("image helper should emit an input_image item with image_url");
assert!(emitted_image_url.starts_with("data:image/png;base64,"));
assert_eq!(
items[1].get("detail").and_then(Value::as_str),
Some("original")
);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn code_mode_can_use_mcp_image_result_with_image_helper() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = responses::start_mock_server().await;
let code = r#"
const out = await tools.mcp__rmcp__image_scenario({
scenario: "image_only_original_detail",
});
const imageItem = out.content.find((item) => item.type === "image");
image(imageItem);
"#;
let (_test, second_mock) = run_code_mode_turn_with_rmcp_model(
&server,
"use exec to call the rmcp image scenario tool and emit its image output",
code,
"gpt-5.3-codex",
)
.await?;
let req = second_mock.single_request();
let items = custom_tool_output_items(&req, "call-1");
let (_, success) = custom_tool_output_body_and_success(&req, "call-1");
assert_ne!(
success,
Some(false),
"code_mode mcp image scenario call failed unexpectedly"
);
assert_eq!(items.len(), 2);
assert_regex_match(
concat!(
r"(?s)\A",
r"Script completed\nWall time \d+\.\d seconds\nOutput:\n\z"
),
text_item(&items, /*index*/ 0),
);
assert_eq!(
items[1].get("type").and_then(Value::as_str),
Some("input_image")
);
let emitted_image_url = items[1]
.get("image_url")
.and_then(Value::as_str)
.expect("image helper should emit an input_image item with image_url");
assert!(emitted_image_url.starts_with("data:image/png;base64,"));
assert_eq!(
items[1].get("detail").and_then(Value::as_str),
Some("original")
);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn code_mode_can_apply_patch_via_nested_tool() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = responses::start_mock_server().await;
let file_name = "code_mode_apply_patch.txt";
let patch = format!(
"*** Begin Patch\n*** Add File: {file_name}\n+hello from code_mode\n*** End Patch\n"
);
let code = format!("text(await tools.apply_patch({patch:?}));\n");
let (test, second_mock) = run_code_mode_turn(
&server,
"use exec to run apply_patch",
&code,
/*include_apply_patch*/ true,
)
.await?;
let req = second_mock.single_request();
let items = custom_tool_output_items(&req, "call-1");
let (_, success) = req
.custom_tool_call_output_content_and_success("call-1")
.expect("custom tool output should be present");
assert_ne!(
success,
Some(false),
"exec apply_patch call failed unexpectedly: {items:?}"
);
assert_eq!(items.len(), 2);
assert_regex_match(
concat!(
r"(?s)\A",
r"Script completed\nWall time \d+\.\d seconds\nOutput:\n\z"
),
text_item(&items, /*index*/ 0),
);
assert_eq!(text_item(&items, /*index*/ 1), "{}");
let file_path = test.cwd_path().join(file_name);
assert_eq!(fs::read_to_string(&file_path)?, "hello from code_mode\n");
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn code_mode_can_print_structured_mcp_tool_result_fields() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = responses::start_mock_server().await;
let code = r#"
const { content, structuredContent, isError } = await tools.mcp__rmcp__echo({
message: "ping",
});
text(
`echo=${structuredContent?.echo ?? "missing"}\n` +
`env=${structuredContent?.env ?? "missing"}\n` +
`isError=${String(isError)}\n` +
`contentLength=${content.length}`
);
"#;
let (_test, second_mock) =
run_code_mode_turn_with_rmcp(&server, "use exec to run the rmcp echo tool", code).await?;
let req = second_mock.single_request();
let (output, success) = custom_tool_output_body_and_success(&req, "call-1");
assert_ne!(
success,
Some(false),
"exec rmcp echo call failed unexpectedly: {output}"
);
assert_eq!(
output,
"echo=ECHOING: ping
env=propagated-env
isError=false
contentLength=0"
);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn code_mode_only_can_call_mcp_tool() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = responses::start_mock_server().await;
let code = r#"
const result = await tools.mcp__rmcp__echo({ message: "ping" });
text(`echo=${result.structuredContent?.echo ?? "missing"}`);
"#;
let (_test, second_mock) = run_code_mode_turn_with_rmcp_mode(
&server,
"use exec to run the rmcp echo tool in code mode only",
code,
/*code_mode_only*/ true,
)
.await?;
let req = second_mock.single_request();
let (output, success) = custom_tool_output_body_and_success(&req, "call-1");
assert_ne!(
success,
Some(false),
"code_mode_only rmcp tool call failed unexpectedly: {output}"
);
assert_eq!(output, "echo=ECHOING: ping");
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn code_mode_exposes_mcp_tools_on_global_tools_object() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = responses::start_mock_server().await;
let code = r#"
const { content, structuredContent, isError } = await tools.mcp__rmcp__echo({
message: "ping",
});
text(
`hasEcho=${String(Object.keys(tools).includes("mcp__rmcp__echo"))}\n` +
`echoType=${typeof tools.mcp__rmcp__echo}\n` +
`echo=${structuredContent?.echo ?? "missing"}\n` +
`isError=${String(isError)}\n` +
`contentLength=${content.length}`
);
"#;
let (_test, second_mock) =
run_code_mode_turn_with_rmcp(&server, "use exec to inspect the global tools object", code)
.await?;
let req = second_mock.single_request();
let (output, success) = custom_tool_output_body_and_success(&req, "call-1");
assert_ne!(
success,
Some(false),
"exec global rmcp access failed unexpectedly: {output}"
);
assert_eq!(
output,
"hasEcho=true
echoType=function
echo=ECHOING: ping
isError=false
contentLength=0"
);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn code_mode_exposes_namespaced_mcp_tools_on_global_tools_object() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = responses::start_mock_server().await;
let code = r#"
text(JSON.stringify({
hasExecCommand: typeof tools.exec_command === "function",
hasNamespacedEcho: typeof tools.mcp__rmcp__echo === "function",
}));
"#;
let (_test, second_mock) =
run_code_mode_turn_with_rmcp(&server, "use exec to inspect the global tools object", code)
.await?;
let req = second_mock.single_request();
let (output, success) = custom_tool_output_body_and_success(&req, "call-1");
assert_ne!(
success,
Some(false),
"exec global tools inspection failed unexpectedly: {output}"
);
let parsed: Value = serde_json::from_str(&output)?;
assert_eq!(
parsed,
serde_json::json!({
"hasExecCommand": !cfg!(windows),
"hasNamespacedEcho": true,
})
);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn code_mode_exposes_normalized_illegal_mcp_tool_names() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = responses::start_mock_server().await;
let code = r#"
const result = await tools.mcp__rmcp__echo_tool({ message: "ping" });
text(`echo=${result.structuredContent.echo}`);
"#;
let (_test, second_mock) = run_code_mode_turn_with_rmcp(
&server,
"use exec to call a normalized rmcp tool name",
code,
)
.await?;
let req = second_mock.single_request();
let (output, success) = custom_tool_output_body_and_success(&req, "call-1");
assert_ne!(
success,
Some(false),
"exec normalized rmcp tool call failed unexpectedly: {output}"
);
assert_eq!(output, "echo=ECHOING: ping");
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn code_mode_lists_global_scope_items() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = responses::start_mock_server().await;
let code = r#"
text(JSON.stringify(Object.getOwnPropertyNames(globalThis).sort()));
"#;
let (_test, second_mock) =
run_code_mode_turn_with_rmcp(&server, "use exec to inspect global scope", code).await?;
let req = second_mock.single_request();
let (output, success) = custom_tool_output_body_and_success(&req, "call-1");
assert_ne!(
success,
Some(false),
"exec global scope inspection failed unexpectedly: {output}"
);
let globals = serde_json::from_str::<Vec<String>>(&output)?;
let globals = globals.into_iter().collect::<HashSet<_>>();
let expected = [
"AggregateError",
"ALL_TOOLS",
"Array",
"ArrayBuffer",
"AsyncDisposableStack",
"BigInt",
"BigInt64Array",
"BigUint64Array",
"Boolean",
"clearTimeout",
"DataView",
"Date",
"DisposableStack",
"Error",
"EvalError",
"FinalizationRegistry",
"Float16Array",
"Float32Array",
"Float64Array",
"Function",
"Infinity",
"Int16Array",
"Int32Array",
"Int8Array",
"Intl",
"Iterator",
"JSON",
"Map",
"Math",
"NaN",
"Number",
"Object",
"Promise",
"Proxy",
"RangeError",
"ReferenceError",
"Reflect",
"RegExp",
"Set",
"String",
"SuppressedError",
"Symbol",
"SyntaxError",
"Temporal",
"TypeError",
"URIError",
"Uint16Array",
"Uint32Array",
"Uint8Array",
"Uint8ClampedArray",
"WeakMap",
"WeakRef",
"WeakSet",
"__codexContentItems",
"add_content",
"decodeURI",
"decodeURIComponent",
"encodeURI",
"encodeURIComponent",
"escape",
"exit",
"eval",
"globalThis",
"image",
"isFinite",
"isNaN",
"load",
"notify",
"parseFloat",
"parseInt",
"setTimeout",
"store",
"text",
"tools",
"undefined",
"unescape",
"yield_control",
];
for g in &globals {
assert!(
expected.contains(&g.as_str()),
"unexpected global {g} in {globals:?}"
);
}
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn code_mode_exports_all_tools_metadata_for_builtin_tools() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = responses::start_mock_server().await;
let code = r#"
const tool = ALL_TOOLS.find(({ name }) => name === "view_image");
text(JSON.stringify(tool));
"#;
let (_test, second_mock) = run_code_mode_turn(
&server,
"use exec to inspect ALL_TOOLS",
code,
/*include_apply_patch*/ false,
)
.await?;
let req = second_mock.single_request();
let (output, success) = custom_tool_output_body_and_success(&req, "call-1");
assert_ne!(
success,
Some(false),
"exec ALL_TOOLS lookup failed unexpectedly: {output}"
);
let parsed: Value = serde_json::from_str(
&custom_tool_output_last_non_empty_text(&req, "call-1")
.expect("exec ALL_TOOLS lookup should emit JSON"),
)?;
assert_eq!(
parsed,
serde_json::json!({
"name": "view_image",
"description": "View a local image from the filesystem (only use if given a full filepath by the user, and the image isn't already attached to the thread context within <image ...> tags).\n\nexec tool declaration:\n```ts\ndeclare const tools: { view_image(args: {\n // Local filesystem path to an image file\n path: string;\n}): Promise<{\n // Image detail hint returned by view_image. Returns `original` when original resolution is preserved, otherwise `null`.\n detail: string | null;\n // Data URL for the loaded image.\n image_url: string;\n}>; };\n```",
})
);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn code_mode_exports_all_tools_metadata_for_namespaced_mcp_tools() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = responses::start_mock_server().await;
let code = r#"
const tool = ALL_TOOLS.find(
({ name }) => name === "mcp__rmcp__echo"
);
text(JSON.stringify(tool));
"#;
let (_test, second_mock) =
run_code_mode_turn_with_rmcp(&server, "use exec to inspect ALL_TOOLS", code).await?;
let req = second_mock.single_request();
let (output, success) = custom_tool_output_body_and_success(&req, "call-1");
assert_ne!(
success,
Some(false),
"exec ALL_TOOLS MCP lookup failed unexpectedly: {output}"
);
let parsed: Value = serde_json::from_str(
&custom_tool_output_last_non_empty_text(&req, "call-1")
.expect("exec ALL_TOOLS MCP lookup should emit JSON"),
)?;
assert_eq!(
parsed,
serde_json::json!({
"name": "mcp__rmcp__echo",
"description": concat!(
"Echo back the provided message and include environment data.\n\n",
"exec tool declaration:\n",
"```ts\n",
"declare const tools: { mcp__rmcp__echo(args: { env_var?: string; message: string; }): ",
"Promise<CallToolResult<{ echo: string; env: string | null; }>>; };\n",
"```",
),
})
);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn code_mode_can_call_hidden_dynamic_tools() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = responses::start_mock_server().await;
let mut builder = test_codex().with_config(move |config| {
let _ = config.features.enable(Feature::CodeMode);
});
let base_test = builder.build(&server).await?;
let new_thread = base_test
.thread_manager
.start_thread_with_tools(
base_test.config.clone(),
vec![DynamicToolSpec {
namespace: Some("codex_app".to_string()),
name: "hidden_dynamic_tool".to_string(),
description: "A hidden dynamic tool.".to_string(),
input_schema: serde_json::json!({
"type": "object",
"properties": {
"city": { "type": "string" }
},
"required": ["city"],
"additionalProperties": false,
}),
defer_loading: true,
}],
/*persist_extended_history*/ false,
)
.await?;
let mut test = base_test;
test.codex = new_thread.thread;
test.session_configured = new_thread.session_configured;
let code = r#"
const tool = ALL_TOOLS.find(({ name }) => name === "codex_app_hidden_dynamic_tool");
const out = await tools.codex_app_hidden_dynamic_tool({ city: "Paris" });
text(
JSON.stringify({
name: tool?.name ?? null,
description: tool?.description ?? null,
out,
})
);
"#;
responses::mount_sse_once(
&server,
sse(vec![
ev_response_created("resp-1"),
ev_custom_tool_call("call-1", "exec", code),
ev_completed("resp-1"),
]),
)
.await;
let second_mock = responses::mount_sse_once(
&server,
sse(vec![
ev_assistant_message("msg-1", "done"),
ev_completed("resp-2"),
]),
)
.await;
let cwd = test.cwd.path().to_path_buf();
let (sandbox_policy, permission_profile) =
turn_permission_fields(PermissionProfile::Disabled, cwd.as_path());
test.codex
.submit(Op::UserTurn {
environments: None,
items: vec![UserInput::Text {
text: "use exec to inspect and call hidden tools".into(),
text_elements: Vec::new(),
}],
final_output_json_schema: None,
cwd,
approval_policy: AskForApproval::Never,
approvals_reviewer: None,
sandbox_policy,
permission_profile,
model: test.session_configured.model.clone(),
effort: None,
summary: None,
service_tier: None,
collaboration_mode: None,
personality: None,
})
.await?;
let turn_id = wait_for_event_match(&test.codex, |event| match event {
EventMsg::TurnStarted(event) => Some(event.turn_id.clone()),
_ => None,
})
.await;
let request = wait_for_event_match(&test.codex, |event| match event {
EventMsg::DynamicToolCallRequest(request) => Some(request.clone()),
_ => None,
})
.await;
assert_eq!(request.namespace.as_deref(), Some("codex_app"));
assert_eq!(request.tool, "hidden_dynamic_tool");
assert_eq!(request.arguments, serde_json::json!({ "city": "Paris" }));
test.codex
.submit(Op::DynamicToolResponse {
id: request.call_id,
response: DynamicToolResponse {
content_items: vec![DynamicToolCallOutputContentItem::InputText {
text: "hidden-ok".to_string(),
}],
success: true,
},
})
.await?;
wait_for_event(&test.codex, |event| match event {
EventMsg::TurnComplete(event) => event.turn_id == turn_id,
_ => false,
})
.await;
let req = second_mock.single_request();
let (output, success) = custom_tool_output_body_and_success(&req, "call-1");
assert_ne!(
success,
Some(false),
"exec hidden dynamic tool call failed unexpectedly: {output}"
);
let parsed: Value = serde_json::from_str(
&custom_tool_output_last_non_empty_text(&req, "call-1")
.expect("exec hidden dynamic tool lookup should emit JSON"),
)?;
assert_eq!(
parsed.get("name"),
Some(&Value::String("codex_app_hidden_dynamic_tool".to_string()))
);
assert_eq!(
parsed.get("out"),
Some(&Value::String("hidden-ok".to_string()))
);
assert!(
parsed
.get("description")
.and_then(Value::as_str)
.is_some_and(|description| {
description.contains("A hidden dynamic tool.")
&& description.contains("declare const tools:")
&& description.contains("codex_app_hidden_dynamic_tool(args:")
})
);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn code_mode_can_print_content_only_mcp_tool_result_fields() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = responses::start_mock_server().await;
let code = r#"
const { content, structuredContent, isError } = await tools.mcp__rmcp__image_scenario({
scenario: "text_only",
caption: "caption from mcp",
});
text(
`firstType=${content[0]?.type ?? "missing"}\n` +
`firstText=${content[0]?.text ?? "missing"}\n` +
`structuredContent=${String(structuredContent ?? null)}\n` +
`isError=${String(isError)}`
);
"#;
let (_test, second_mock) = run_code_mode_turn_with_rmcp(
&server,
"use exec to run the rmcp image scenario tool",
code,
)
.await?;
let req = second_mock.single_request();
let (output, success) = custom_tool_output_body_and_success(&req, "call-1");
assert_ne!(
success,
Some(false),
"exec rmcp image scenario call failed unexpectedly: {output}"
);
assert_eq!(
output,
"firstType=text
firstText=caption from mcp
structuredContent=null
isError=false"
);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn code_mode_can_print_error_mcp_tool_result_fields() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = responses::start_mock_server().await;
let code = r#"
const { content, structuredContent, isError } = await tools.mcp__rmcp__echo({});
const firstText = content[0]?.text ?? "";
const mentionsMissingMessage =
firstText.includes("missing field") && firstText.includes("message");
text(
`isError=${String(isError)}\n` +
`contentLength=${content.length}\n` +
`mentionsMissingMessage=${String(mentionsMissingMessage)}\n` +
`structuredContent=${String(structuredContent ?? null)}`
);
"#;
let (_test, second_mock) =
run_code_mode_turn_with_rmcp(&server, "use exec to call rmcp echo badly", code).await?;
let req = second_mock.single_request();
let (output, success) = custom_tool_output_body_and_success(&req, "call-1");
assert_ne!(
success,
Some(false),
"exec rmcp error call failed unexpectedly: {output}"
);
assert_eq!(
output,
"isError=true
contentLength=1
mentionsMissingMessage=true
structuredContent=null"
);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn code_mode_can_store_and_load_values_across_turns() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = responses::start_mock_server().await;
let mut builder = test_codex().with_config(move |config| {
let _ = config.features.enable(Feature::CodeMode);
});
let test = builder.build(&server).await?;
responses::mount_sse_once(
&server,
sse(vec![
ev_response_created("resp-1"),
ev_custom_tool_call(
"call-1",
"exec",
r#"
store("nb", { title: "Notebook", items: [1, true, null] });
text("stored");
"#,
),
ev_completed("resp-1"),
]),
)
.await;
let first_follow_up = responses::mount_sse_once(
&server,
sse(vec![
ev_assistant_message("msg-1", "stored"),
ev_completed("resp-2"),
]),
)
.await;
test.submit_turn("store value for later").await?;
let first_request = first_follow_up.single_request();
let (first_output, first_success) =
custom_tool_output_body_and_success(&first_request, "call-1");
assert_ne!(
first_success,
Some(false),
"exec store call failed unexpectedly: {first_output}"
);
assert_eq!(first_output, "stored");
responses::mount_sse_once(
&server,
sse(vec![
ev_response_created("resp-3"),
ev_custom_tool_call(
"call-2",
"exec",
r#"
text(JSON.stringify(load("nb")));
"#,
),
ev_completed("resp-3"),
]),
)
.await;
let second_follow_up = responses::mount_sse_once(
&server,
sse(vec![
ev_assistant_message("msg-2", "loaded"),
ev_completed("resp-4"),
]),
)
.await;
test.submit_turn("load the stored value").await?;
let second_request = second_follow_up.single_request();
let (second_output, second_success) =
custom_tool_output_body_and_success(&second_request, "call-2");
assert_ne!(
second_success,
Some(false),
"exec load call failed unexpectedly: {second_output}"
);
let loaded: Value = serde_json::from_str(
&custom_tool_output_last_non_empty_text(&second_request, "call-2")
.expect("exec load call should emit JSON"),
)?;
assert_eq!(
loaded,
serde_json::json!({ "title": "Notebook", "items": [1, true, null] })
);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn code_mode_can_compare_elapsed_time_around_set_timeout() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = responses::start_mock_server().await;
let (_test, second_mock) = run_code_mode_turn(
&server,
"measure elapsed time around setTimeout",
r#"
const start_ms = Date.now();
await new Promise((resolve) => setTimeout(resolve, 100));
const end_ms = Date.now();
text(JSON.stringify({
start_ms,
end_ms,
elapsed_ms: end_ms - start_ms,
waited_long_enough: end_ms - start_ms >= 100,
}));
"#,
/*include_apply_patch*/ false,
)
.await?;
let second_request = second_mock.single_request();
let (second_output, second_success) =
custom_tool_output_body_and_success(&second_request, "call-1");
assert_ne!(
second_success,
Some(false),
"exec compare time call failed unexpectedly: {second_output}"
);
let compared: Value = serde_json::from_str(
&custom_tool_output_last_non_empty_text(&second_request, "call-1")
.expect("exec compare time call should emit JSON"),
)?;
let elapsed_ms = compared
.get("elapsed_ms")
.and_then(Value::as_i64)
.expect("elapsed_ms should be an integer");
assert!(
elapsed_ms >= 100,
"expected elapsed_ms >= 100, got {elapsed_ms}"
);
assert_eq!(compared.get("waited_long_enough"), Some(&Value::Bool(true)));
Ok(())
}