mirror of
https://github.com/openai/codex.git
synced 2026-04-30 17:36:40 +00:00
Merge remote-tracking branch 'origin/rhan/surface-updates' into rhan/emittance
This commit is contained in:
@@ -1321,7 +1321,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
expectation: Expectation::FileNotCreated {
|
||||
target: TargetPath::Workspace("ro_never.txt"),
|
||||
message_contains: if cfg!(target_os = "linux") {
|
||||
&["Permission denied"]
|
||||
&["Permission denied|Read-only file system"]
|
||||
} else {
|
||||
&[
|
||||
"Permission denied|Operation not permitted|operation not permitted|\
|
||||
@@ -1468,7 +1468,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
expectation: Expectation::FileNotCreated {
|
||||
target: TargetPath::OutsideWorkspace("ww_never.txt"),
|
||||
message_contains: if cfg!(target_os = "linux") {
|
||||
&["Permission denied"]
|
||||
&["Permission denied|Read-only file system"]
|
||||
} else {
|
||||
&[
|
||||
"Permission denied|Operation not permitted|operation not permitted|\
|
||||
@@ -2290,20 +2290,16 @@ allow_local_binding = true
|
||||
test.config.permissions.network.is_some(),
|
||||
"expected managed network proxy config to be present"
|
||||
);
|
||||
let runtime_proxy = test
|
||||
.session_configured
|
||||
test.session_configured
|
||||
.network_proxy
|
||||
.as_ref()
|
||||
.expect("expected runtime managed network proxy addresses");
|
||||
let proxy_addr = runtime_proxy.http_addr.as_str();
|
||||
|
||||
let call_id_first = "allow-network-first";
|
||||
// Use the same urllib-based pattern as the other network integration tests,
|
||||
// but point it at the runtime proxy directly so the blocked host reliably
|
||||
// produces a network approval request without relying on curl.
|
||||
let fetch_command = format!(
|
||||
"python3 -c \"import urllib.request; proxy = urllib.request.ProxyHandler({{'http': 'http://{proxy_addr}'}}); opener = urllib.request.build_opener(proxy); print('OK:' + opener.open('http://codex-network-test.invalid', timeout=30).read().decode(errors='replace'))\""
|
||||
);
|
||||
// Use urllib without overriding proxy settings so managed-network sessions
|
||||
// continue to exercise the env-based proxy routing path under bubblewrap.
|
||||
let fetch_command = r#"python3 -c "import urllib.request; opener = urllib.request.build_opener(urllib.request.ProxyHandler()); print('OK:' + opener.open('http://codex-network-test.invalid', timeout=30).read().decode(errors='replace'))""#
|
||||
.to_string();
|
||||
let first_event = shell_event(
|
||||
call_id_first,
|
||||
&fetch_command,
|
||||
|
||||
@@ -519,6 +519,7 @@ async fn resume_replays_image_tool_outputs_with_detail() {
|
||||
item: RolloutItem::ResponseItem(ResponseItem::FunctionCall {
|
||||
id: None,
|
||||
name: "view_image".to_string(),
|
||||
namespace: None,
|
||||
arguments: "{\"path\":\"/tmp/example.webp\"}".to_string(),
|
||||
call_id: function_call_id.to_string(),
|
||||
}),
|
||||
@@ -1883,6 +1884,7 @@ async fn azure_responses_request_includes_store_and_reasoning_ids() {
|
||||
prompt.input.push(ResponseItem::FunctionCall {
|
||||
id: Some("function-id".into()),
|
||||
name: "do_thing".into(),
|
||||
namespace: None,
|
||||
arguments: "{}".into(),
|
||||
call_id: "function-call-id".into(),
|
||||
});
|
||||
|
||||
@@ -21,6 +21,7 @@ use pretty_assertions::assert_eq;
|
||||
use serde_json::Value;
|
||||
use std::collections::HashMap;
|
||||
use std::fs;
|
||||
use std::path::Path;
|
||||
use std::time::Duration;
|
||||
use wiremock::MockServer;
|
||||
|
||||
@@ -32,6 +33,16 @@ fn custom_tool_output_items(req: &ResponsesRequest, call_id: &str) -> Vec<Value>
|
||||
.clone()
|
||||
}
|
||||
|
||||
fn function_tool_output_items(req: &ResponsesRequest, call_id: &str) -> Vec<Value> {
|
||||
match req.function_call_output(call_id).get("output") {
|
||||
Some(Value::Array(items)) => items.clone(),
|
||||
Some(Value::String(text)) => {
|
||||
vec![serde_json::json!({ "type": "input_text", "text": text })]
|
||||
}
|
||||
_ => panic!("function tool output should be serialized as text or content items"),
|
||||
}
|
||||
}
|
||||
|
||||
fn text_item(items: &[Value], index: usize) -> &str {
|
||||
items[index]
|
||||
.get("text")
|
||||
@@ -39,6 +50,23 @@ fn text_item(items: &[Value], index: usize) -> &str {
|
||||
.expect("content item should be input_text")
|
||||
}
|
||||
|
||||
fn extract_running_session_id(text: &str) -> i32 {
|
||||
text.strip_prefix("Script running with session ID ")
|
||||
.and_then(|rest| rest.split('\n').next())
|
||||
.expect("running header should contain a session ID")
|
||||
.parse()
|
||||
.expect("session ID should parse as i32")
|
||||
}
|
||||
|
||||
fn wait_for_file_source(path: &Path) -> Result<String> {
|
||||
let quoted_path = shlex::try_join([path.to_string_lossy().as_ref()])?;
|
||||
let command = format!("if [ -f {quoted_path} ]; then printf ready; fi");
|
||||
Ok(format!(
|
||||
r#"while ((await exec_command({{ cmd: {command:?} }})).output !== "ready") {{
|
||||
}}"#
|
||||
))
|
||||
}
|
||||
|
||||
fn custom_tool_output_body_and_success(
|
||||
req: &ResponsesRequest,
|
||||
call_id: &str,
|
||||
@@ -289,6 +317,922 @@ Error:\ boom\n
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg_attr(windows, ignore = "no exec_command on Windows")]
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn code_mode_can_yield_and_resume_with_exec_wait() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = responses::start_mock_server().await;
|
||||
let mut builder = test_codex().with_config(move |config| {
|
||||
let _ = config.features.enable(Feature::CodeMode);
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
let phase_2_gate = test.workspace_path("code-mode-phase-2.ready");
|
||||
let phase_3_gate = test.workspace_path("code-mode-phase-3.ready");
|
||||
let phase_2_wait = wait_for_file_source(&phase_2_gate)?;
|
||||
let phase_3_wait = wait_for_file_source(&phase_3_gate)?;
|
||||
|
||||
let code = format!(
|
||||
r#"
|
||||
import {{ output_text, set_yield_time }} from "@openai/code_mode";
|
||||
import {{ exec_command }} from "tools.js";
|
||||
|
||||
output_text("phase 1");
|
||||
set_yield_time(10);
|
||||
{phase_2_wait}
|
||||
output_text("phase 2");
|
||||
{phase_3_wait}
|
||||
output_text("phase 3");
|
||||
"#
|
||||
);
|
||||
|
||||
responses::mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_custom_tool_call("call-1", "exec", &code),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
let first_completion = responses::mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "waiting"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
|
||||
test.submit_turn("start the long exec").await?;
|
||||
|
||||
let first_request = first_completion.single_request();
|
||||
let first_items = custom_tool_output_items(&first_request, "call-1");
|
||||
assert_eq!(first_items.len(), 2);
|
||||
assert_regex_match(
|
||||
concat!(
|
||||
r"(?s)\A",
|
||||
r"Script running with session ID \d+\nWall time \d+\.\d seconds\nOutput:\n\z"
|
||||
),
|
||||
text_item(&first_items, 0),
|
||||
);
|
||||
assert_eq!(text_item(&first_items, 1), "phase 1");
|
||||
let session_id = extract_running_session_id(text_item(&first_items, 0));
|
||||
|
||||
responses::mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_response_created("resp-3"),
|
||||
responses::ev_function_call(
|
||||
"call-2",
|
||||
"exec_wait",
|
||||
&serde_json::to_string(&serde_json::json!({
|
||||
"session_id": session_id,
|
||||
"yield_time_ms": 1_000,
|
||||
}))?,
|
||||
),
|
||||
ev_completed("resp-3"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
let second_completion = responses::mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-2", "still waiting"),
|
||||
ev_completed("resp-4"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
|
||||
fs::write(&phase_2_gate, "ready")?;
|
||||
test.submit_turn("wait again").await?;
|
||||
|
||||
let second_request = second_completion.single_request();
|
||||
let second_items = function_tool_output_items(&second_request, "call-2");
|
||||
assert_eq!(second_items.len(), 2);
|
||||
assert_regex_match(
|
||||
concat!(
|
||||
r"(?s)\A",
|
||||
r"Script running with session ID \d+\nWall time \d+\.\d seconds\nOutput:\n\z"
|
||||
),
|
||||
text_item(&second_items, 0),
|
||||
);
|
||||
assert_eq!(
|
||||
extract_running_session_id(text_item(&second_items, 0)),
|
||||
session_id
|
||||
);
|
||||
assert_eq!(text_item(&second_items, 1), "phase 2");
|
||||
|
||||
responses::mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_response_created("resp-5"),
|
||||
responses::ev_function_call(
|
||||
"call-3",
|
||||
"exec_wait",
|
||||
&serde_json::to_string(&serde_json::json!({
|
||||
"session_id": session_id,
|
||||
"yield_time_ms": 1_000,
|
||||
}))?,
|
||||
),
|
||||
ev_completed("resp-5"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
let third_completion = responses::mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-3", "done"),
|
||||
ev_completed("resp-6"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
|
||||
fs::write(&phase_3_gate, "ready")?;
|
||||
test.submit_turn("wait for completion").await?;
|
||||
|
||||
let third_request = third_completion.single_request();
|
||||
let third_items = function_tool_output_items(&third_request, "call-3");
|
||||
assert_eq!(third_items.len(), 2);
|
||||
assert_regex_match(
|
||||
concat!(
|
||||
r"(?s)\A",
|
||||
r"Script completed\nWall time \d+\.\d seconds\nOutput:\n\z"
|
||||
),
|
||||
text_item(&third_items, 0),
|
||||
);
|
||||
assert_eq!(text_item(&third_items, 1), "phase 3");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg_attr(windows, ignore = "no exec_command on Windows")]
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn code_mode_can_run_multiple_yielded_sessions() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = responses::start_mock_server().await;
|
||||
let mut builder = test_codex().with_config(move |config| {
|
||||
let _ = config.features.enable(Feature::CodeMode);
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
let session_a_gate = test.workspace_path("code-mode-session-a.ready");
|
||||
let session_b_gate = test.workspace_path("code-mode-session-b.ready");
|
||||
let session_a_wait = wait_for_file_source(&session_a_gate)?;
|
||||
let session_b_wait = wait_for_file_source(&session_b_gate)?;
|
||||
|
||||
let session_a_code = format!(
|
||||
r#"
|
||||
import {{ output_text, set_yield_time }} from "@openai/code_mode";
|
||||
import {{ exec_command }} from "tools.js";
|
||||
|
||||
output_text("session a start");
|
||||
set_yield_time(10);
|
||||
{session_a_wait}
|
||||
output_text("session a done");
|
||||
"#
|
||||
);
|
||||
let session_b_code = format!(
|
||||
r#"
|
||||
import {{ output_text, set_yield_time }} from "@openai/code_mode";
|
||||
import {{ exec_command }} from "tools.js";
|
||||
|
||||
output_text("session b start");
|
||||
set_yield_time(10);
|
||||
{session_b_wait}
|
||||
output_text("session b done");
|
||||
"#
|
||||
);
|
||||
|
||||
responses::mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_custom_tool_call("call-1", "exec", &session_a_code),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
let first_completion = responses::mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "session a waiting"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
|
||||
test.submit_turn("start session a").await?;
|
||||
|
||||
let first_request = first_completion.single_request();
|
||||
let first_items = custom_tool_output_items(&first_request, "call-1");
|
||||
assert_eq!(first_items.len(), 2);
|
||||
let session_a_id = extract_running_session_id(text_item(&first_items, 0));
|
||||
assert_eq!(text_item(&first_items, 1), "session a start");
|
||||
|
||||
responses::mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_response_created("resp-3"),
|
||||
ev_custom_tool_call("call-2", "exec", &session_b_code),
|
||||
ev_completed("resp-3"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
let second_completion = responses::mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-2", "session b waiting"),
|
||||
ev_completed("resp-4"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
|
||||
test.submit_turn("start session b").await?;
|
||||
|
||||
let second_request = second_completion.single_request();
|
||||
let second_items = custom_tool_output_items(&second_request, "call-2");
|
||||
assert_eq!(second_items.len(), 2);
|
||||
let session_b_id = extract_running_session_id(text_item(&second_items, 0));
|
||||
assert_eq!(text_item(&second_items, 1), "session b start");
|
||||
assert_ne!(session_a_id, session_b_id);
|
||||
|
||||
fs::write(&session_a_gate, "ready")?;
|
||||
responses::mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_response_created("resp-5"),
|
||||
responses::ev_function_call(
|
||||
"call-3",
|
||||
"exec_wait",
|
||||
&serde_json::to_string(&serde_json::json!({
|
||||
"session_id": session_a_id,
|
||||
"yield_time_ms": 1_000,
|
||||
}))?,
|
||||
),
|
||||
ev_completed("resp-5"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
let third_completion = responses::mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-3", "session a done"),
|
||||
ev_completed("resp-6"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
|
||||
test.submit_turn("wait session a").await?;
|
||||
|
||||
let third_request = third_completion.single_request();
|
||||
let third_items = function_tool_output_items(&third_request, "call-3");
|
||||
assert_eq!(third_items.len(), 2);
|
||||
assert_regex_match(
|
||||
concat!(
|
||||
r"(?s)\A",
|
||||
r"Script completed\nWall time \d+\.\d seconds\nOutput:\n\z"
|
||||
),
|
||||
text_item(&third_items, 0),
|
||||
);
|
||||
assert_eq!(text_item(&third_items, 1), "session a done");
|
||||
|
||||
fs::write(&session_b_gate, "ready")?;
|
||||
responses::mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_response_created("resp-7"),
|
||||
responses::ev_function_call(
|
||||
"call-4",
|
||||
"exec_wait",
|
||||
&serde_json::to_string(&serde_json::json!({
|
||||
"session_id": session_b_id,
|
||||
"yield_time_ms": 1_000,
|
||||
}))?,
|
||||
),
|
||||
ev_completed("resp-7"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
let fourth_completion = responses::mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-4", "session b done"),
|
||||
ev_completed("resp-8"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
|
||||
test.submit_turn("wait session b").await?;
|
||||
|
||||
let fourth_request = fourth_completion.single_request();
|
||||
let fourth_items = function_tool_output_items(&fourth_request, "call-4");
|
||||
assert_eq!(fourth_items.len(), 2);
|
||||
assert_regex_match(
|
||||
concat!(
|
||||
r"(?s)\A",
|
||||
r"Script completed\nWall time \d+\.\d seconds\nOutput:\n\z"
|
||||
),
|
||||
text_item(&fourth_items, 0),
|
||||
);
|
||||
assert_eq!(text_item(&fourth_items, 1), "session b done");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg_attr(windows, ignore = "no exec_command on Windows")]
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn code_mode_exec_wait_can_terminate_and_continue() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = responses::start_mock_server().await;
|
||||
let mut builder = test_codex().with_config(move |config| {
|
||||
let _ = config.features.enable(Feature::CodeMode);
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
let termination_gate = test.workspace_path("code-mode-terminate.ready");
|
||||
let termination_wait = wait_for_file_source(&termination_gate)?;
|
||||
|
||||
let code = format!(
|
||||
r#"
|
||||
import {{ output_text, set_yield_time }} from "@openai/code_mode";
|
||||
import {{ exec_command }} from "tools.js";
|
||||
|
||||
output_text("phase 1");
|
||||
set_yield_time(10);
|
||||
{termination_wait}
|
||||
output_text("phase 2");
|
||||
"#
|
||||
);
|
||||
|
||||
responses::mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_custom_tool_call("call-1", "exec", &code),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
let first_completion = responses::mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "waiting"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
|
||||
test.submit_turn("start the long exec").await?;
|
||||
|
||||
let first_request = first_completion.single_request();
|
||||
let first_items = custom_tool_output_items(&first_request, "call-1");
|
||||
assert_eq!(first_items.len(), 2);
|
||||
let session_id = extract_running_session_id(text_item(&first_items, 0));
|
||||
assert_eq!(text_item(&first_items, 1), "phase 1");
|
||||
|
||||
responses::mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_response_created("resp-3"),
|
||||
responses::ev_function_call(
|
||||
"call-2",
|
||||
"exec_wait",
|
||||
&serde_json::to_string(&serde_json::json!({
|
||||
"session_id": session_id,
|
||||
"terminate": true,
|
||||
}))?,
|
||||
),
|
||||
ev_completed("resp-3"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
let second_completion = responses::mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-2", "terminated"),
|
||||
ev_completed("resp-4"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
|
||||
test.submit_turn("terminate it").await?;
|
||||
|
||||
let second_request = second_completion.single_request();
|
||||
let second_items = function_tool_output_items(&second_request, "call-2");
|
||||
assert_eq!(second_items.len(), 1);
|
||||
assert_regex_match(
|
||||
concat!(
|
||||
r"(?s)\A",
|
||||
r"Script terminated\nWall time \d+\.\d seconds\nOutput:\n\z"
|
||||
),
|
||||
text_item(&second_items, 0),
|
||||
);
|
||||
|
||||
responses::mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_response_created("resp-5"),
|
||||
ev_custom_tool_call(
|
||||
"call-3",
|
||||
"exec",
|
||||
r#"
|
||||
import { output_text } from "@openai/code_mode";
|
||||
|
||||
output_text("after terminate");
|
||||
"#,
|
||||
),
|
||||
ev_completed("resp-5"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
let third_completion = responses::mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-3", "done"),
|
||||
ev_completed("resp-6"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
|
||||
test.submit_turn("run another exec").await?;
|
||||
|
||||
let third_request = third_completion.single_request();
|
||||
let third_items = custom_tool_output_items(&third_request, "call-3");
|
||||
assert_eq!(third_items.len(), 2);
|
||||
assert_regex_match(
|
||||
concat!(
|
||||
r"(?s)\A",
|
||||
r"Script completed\nWall time \d+\.\d seconds\nOutput:\n\z"
|
||||
),
|
||||
text_item(&third_items, 0),
|
||||
);
|
||||
assert_eq!(text_item(&third_items, 1), "after terminate");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn code_mode_exec_wait_returns_error_for_unknown_session() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = responses::start_mock_server().await;
|
||||
let mut builder = test_codex().with_config(move |config| {
|
||||
let _ = config.features.enable(Feature::CodeMode);
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
|
||||
responses::mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
responses::ev_function_call(
|
||||
"call-1",
|
||||
"exec_wait",
|
||||
&serde_json::to_string(&serde_json::json!({
|
||||
"session_id": 999_999,
|
||||
"yield_time_ms": 1_000,
|
||||
}))?,
|
||||
),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
let completion = responses::mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
|
||||
test.submit_turn("wait on an unknown exec session").await?;
|
||||
|
||||
let request = completion.single_request();
|
||||
let (_, success) = request
|
||||
.function_call_output_content_and_success("call-1")
|
||||
.expect("function tool output should be present");
|
||||
assert_ne!(success, Some(true));
|
||||
|
||||
let items = function_tool_output_items(&request, "call-1");
|
||||
assert_eq!(items.len(), 2);
|
||||
assert_regex_match(
|
||||
concat!(
|
||||
r"(?s)\A",
|
||||
r"Script failed\nWall time \d+\.\d seconds\nOutput:\n\z"
|
||||
),
|
||||
text_item(&items, 0),
|
||||
);
|
||||
assert_eq!(
|
||||
text_item(&items, 1),
|
||||
"Script error:\nexec session 999999 not found"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg_attr(windows, ignore = "no exec_command on Windows")]
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn code_mode_exec_wait_terminate_returns_completed_session_if_it_finished_in_background()
|
||||
-> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = responses::start_mock_server().await;
|
||||
let mut builder = test_codex().with_config(move |config| {
|
||||
let _ = config.features.enable(Feature::CodeMode);
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
let session_a_gate = test.workspace_path("code-mode-session-a-finished.ready");
|
||||
let session_b_gate = test.workspace_path("code-mode-session-b-blocked.ready");
|
||||
let session_a_done_marker = test.workspace_path("code-mode-session-a-done.txt");
|
||||
let session_a_wait = wait_for_file_source(&session_a_gate)?;
|
||||
let session_b_wait = wait_for_file_source(&session_b_gate)?;
|
||||
let session_a_done_marker_quoted =
|
||||
shlex::try_join([session_a_done_marker.to_string_lossy().as_ref()])?;
|
||||
let session_a_done_command = format!("printf done > {session_a_done_marker_quoted}");
|
||||
|
||||
let session_a_code = format!(
|
||||
r#"
|
||||
import {{ output_text, set_yield_time }} from "@openai/code_mode";
|
||||
import {{ exec_command }} from "tools.js";
|
||||
|
||||
output_text("session a start");
|
||||
set_yield_time(10);
|
||||
{session_a_wait}
|
||||
output_text("session a done");
|
||||
await exec_command({{ cmd: {session_a_done_command:?} }});
|
||||
"#
|
||||
);
|
||||
let session_b_code = format!(
|
||||
r#"
|
||||
import {{ output_text, set_yield_time }} from "@openai/code_mode";
|
||||
import {{ exec_command }} from "tools.js";
|
||||
|
||||
output_text("session b start");
|
||||
set_yield_time(10);
|
||||
{session_b_wait}
|
||||
output_text("session b done");
|
||||
"#
|
||||
);
|
||||
|
||||
responses::mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_custom_tool_call("call-1", "exec", &session_a_code),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
let first_completion = responses::mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "session a waiting"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
|
||||
test.submit_turn("start session a").await?;
|
||||
|
||||
let first_request = first_completion.single_request();
|
||||
let first_items = custom_tool_output_items(&first_request, "call-1");
|
||||
assert_eq!(first_items.len(), 2);
|
||||
let session_a_id = extract_running_session_id(text_item(&first_items, 0));
|
||||
assert_eq!(text_item(&first_items, 1), "session a start");
|
||||
|
||||
responses::mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_response_created("resp-3"),
|
||||
ev_custom_tool_call("call-2", "exec", &session_b_code),
|
||||
ev_completed("resp-3"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
let second_completion = responses::mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-2", "session b waiting"),
|
||||
ev_completed("resp-4"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
|
||||
test.submit_turn("start session b").await?;
|
||||
|
||||
let second_request = second_completion.single_request();
|
||||
let second_items = custom_tool_output_items(&second_request, "call-2");
|
||||
assert_eq!(second_items.len(), 2);
|
||||
let session_b_id = extract_running_session_id(text_item(&second_items, 0));
|
||||
assert_eq!(text_item(&second_items, 1), "session b start");
|
||||
|
||||
fs::write(&session_a_gate, "ready")?;
|
||||
responses::mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_response_created("resp-5"),
|
||||
responses::ev_function_call(
|
||||
"call-3",
|
||||
"exec_wait",
|
||||
&serde_json::to_string(&serde_json::json!({
|
||||
"session_id": session_b_id,
|
||||
"yield_time_ms": 1_000,
|
||||
}))?,
|
||||
),
|
||||
ev_completed("resp-5"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
let third_completion = responses::mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-3", "session b still waiting"),
|
||||
ev_completed("resp-6"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
|
||||
test.submit_turn("wait session b").await?;
|
||||
|
||||
let third_request = third_completion.single_request();
|
||||
let third_items = function_tool_output_items(&third_request, "call-3");
|
||||
assert_eq!(third_items.len(), 1);
|
||||
assert_regex_match(
|
||||
concat!(
|
||||
r"(?s)\A",
|
||||
r"Script running with session ID \d+\nWall time \d+\.\d seconds\nOutput:\n\z"
|
||||
),
|
||||
text_item(&third_items, 0),
|
||||
);
|
||||
assert_eq!(
|
||||
extract_running_session_id(text_item(&third_items, 0)),
|
||||
session_b_id
|
||||
);
|
||||
|
||||
for _ in 0..100 {
|
||||
if session_a_done_marker.exists() {
|
||||
break;
|
||||
}
|
||||
tokio::time::sleep(Duration::from_millis(50)).await;
|
||||
}
|
||||
assert!(session_a_done_marker.exists());
|
||||
|
||||
responses::mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_response_created("resp-7"),
|
||||
responses::ev_function_call(
|
||||
"call-4",
|
||||
"exec_wait",
|
||||
&serde_json::to_string(&serde_json::json!({
|
||||
"session_id": session_a_id,
|
||||
"terminate": true,
|
||||
}))?,
|
||||
),
|
||||
ev_completed("resp-7"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
let fourth_completion = responses::mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-4", "session a already done"),
|
||||
ev_completed("resp-8"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
|
||||
test.submit_turn("terminate session a").await?;
|
||||
|
||||
let fourth_request = fourth_completion.single_request();
|
||||
let fourth_items = function_tool_output_items(&fourth_request, "call-4");
|
||||
match fourth_items.len() {
|
||||
1 => {
|
||||
assert_regex_match(
|
||||
concat!(
|
||||
r"(?s)\A",
|
||||
r"Script terminated\nWall time \d+\.\d seconds\nOutput:\n\z"
|
||||
),
|
||||
text_item(&fourth_items, 0),
|
||||
);
|
||||
}
|
||||
2 => {
|
||||
assert_regex_match(
|
||||
concat!(
|
||||
r"(?s)\A",
|
||||
r"Script (?:completed|terminated)\nWall time \d+\.\d seconds\nOutput:\n\z"
|
||||
),
|
||||
text_item(&fourth_items, 0),
|
||||
);
|
||||
assert_eq!(text_item(&fourth_items, 1), "session a done");
|
||||
}
|
||||
other => panic!("unexpected number of content items: {other}"),
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg_attr(windows, ignore = "no exec_command on Windows")]
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn code_mode_background_keeps_running_on_later_turn_without_exec_wait() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = responses::start_mock_server().await;
|
||||
let mut builder = test_codex().with_config(move |config| {
|
||||
let _ = config.features.enable(Feature::CodeMode);
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
let resumed_file = test.workspace_path("code-mode-yield-resumed.txt");
|
||||
let resumed_file_quoted = shlex::try_join([resumed_file.to_string_lossy().as_ref()])?;
|
||||
let write_file_command = format!("printf resumed > {resumed_file_quoted}");
|
||||
let wait_for_file_command =
|
||||
format!("while [ ! -f {resumed_file_quoted} ]; do sleep 0.01; done; printf ready");
|
||||
let code = format!(
|
||||
r#"
|
||||
import {{ background, output_text }} from "@openai/code_mode";
|
||||
import {{ exec_command }} from "tools.js";
|
||||
|
||||
output_text("before yield");
|
||||
background();
|
||||
await exec_command({{ cmd: {write_file_command:?} }});
|
||||
output_text("after yield");
|
||||
"#
|
||||
);
|
||||
|
||||
responses::mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_custom_tool_call("call-1", "exec", &code),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
let first_completion = responses::mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "exec yielded"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
|
||||
test.submit_turn("start yielded exec").await?;
|
||||
|
||||
let first_request = first_completion.single_request();
|
||||
let first_items = custom_tool_output_items(&first_request, "call-1");
|
||||
assert_eq!(first_items.len(), 2);
|
||||
assert_regex_match(
|
||||
concat!(
|
||||
r"(?s)\A",
|
||||
r"Script running with session ID \d+\nWall time \d+\.\d seconds\nOutput:\n\z"
|
||||
),
|
||||
text_item(&first_items, 0),
|
||||
);
|
||||
assert_eq!(text_item(&first_items, 1), "before yield");
|
||||
|
||||
responses::mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_response_created("resp-3"),
|
||||
responses::ev_function_call(
|
||||
"call-2",
|
||||
"exec_command",
|
||||
&serde_json::to_string(&serde_json::json!({
|
||||
"cmd": wait_for_file_command,
|
||||
}))?,
|
||||
),
|
||||
ev_completed("resp-3"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
let second_completion = responses::mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-2", "file appeared"),
|
||||
ev_completed("resp-4"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
|
||||
test.submit_turn("wait for resumed file").await?;
|
||||
|
||||
let second_request = second_completion.single_request();
|
||||
assert!(
|
||||
second_request
|
||||
.function_call_output_text("call-2")
|
||||
.is_some_and(|output| output.ends_with("ready"))
|
||||
);
|
||||
assert_eq!(fs::read_to_string(&resumed_file)?, "resumed");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg_attr(windows, ignore = "no exec_command on Windows")]
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn code_mode_exec_wait_uses_its_own_max_tokens_budget() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = responses::start_mock_server().await;
|
||||
let mut builder = test_codex().with_config(move |config| {
|
||||
let _ = config.features.enable(Feature::CodeMode);
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
let completion_gate = test.workspace_path("code-mode-max-tokens.ready");
|
||||
let completion_wait = wait_for_file_source(&completion_gate)?;
|
||||
|
||||
let code = format!(
|
||||
r#"
|
||||
import {{ output_text, set_max_output_tokens_per_exec_call, set_yield_time }} from "@openai/code_mode";
|
||||
import {{ exec_command }} from "tools.js";
|
||||
|
||||
output_text("phase 1");
|
||||
set_max_output_tokens_per_exec_call(100);
|
||||
set_yield_time(10);
|
||||
{completion_wait}
|
||||
output_text("token one token two token three token four token five token six token seven");
|
||||
"#
|
||||
);
|
||||
|
||||
responses::mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_custom_tool_call("call-1", "exec", &code),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
let first_completion = responses::mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "waiting"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
|
||||
test.submit_turn("start the long exec").await?;
|
||||
|
||||
let first_request = first_completion.single_request();
|
||||
let first_items = custom_tool_output_items(&first_request, "call-1");
|
||||
assert_eq!(first_items.len(), 2);
|
||||
assert_eq!(text_item(&first_items, 1), "phase 1");
|
||||
let session_id = extract_running_session_id(text_item(&first_items, 0));
|
||||
|
||||
fs::write(&completion_gate, "ready")?;
|
||||
responses::mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_response_created("resp-3"),
|
||||
responses::ev_function_call(
|
||||
"call-2",
|
||||
"exec_wait",
|
||||
&serde_json::to_string(&serde_json::json!({
|
||||
"session_id": session_id,
|
||||
"yield_time_ms": 1_000,
|
||||
"max_tokens": 6,
|
||||
}))?,
|
||||
),
|
||||
ev_completed("resp-3"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
let second_completion = responses::mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-2", "done"),
|
||||
ev_completed("resp-4"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
|
||||
test.submit_turn("wait for completion").await?;
|
||||
|
||||
let second_request = second_completion.single_request();
|
||||
let second_items = function_tool_output_items(&second_request, "call-2");
|
||||
assert_eq!(second_items.len(), 2);
|
||||
assert_regex_match(
|
||||
concat!(
|
||||
r"(?s)\A",
|
||||
r"Script completed\nWall time \d+\.\d seconds\nOutput:\n\z"
|
||||
),
|
||||
text_item(&second_items, 0),
|
||||
);
|
||||
let expected_pattern = r#"(?sx)
|
||||
\A
|
||||
Total\ output\ lines:\ 1\n
|
||||
\n
|
||||
.*…\d+\ tokens\ truncated….*
|
||||
\z
|
||||
"#;
|
||||
assert_regex_match(expected_pattern, text_item(&second_items, 1));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn code_mode_can_output_serialized_text_via_openai_code_mode_module() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
@@ -687,7 +687,7 @@ async fn resume_conversation(
|
||||
let auth_manager = codex_core::test_support::auth_manager_from_auth(
|
||||
codex_core::CodexAuth::from_api_key("dummy"),
|
||||
);
|
||||
Box::pin(manager.resume_thread_from_rollout(config.clone(), path, auth_manager))
|
||||
Box::pin(manager.resume_thread_from_rollout(config.clone(), path, auth_manager, None))
|
||||
.await
|
||||
.expect("resume conversation")
|
||||
.thread
|
||||
@@ -700,7 +700,7 @@ async fn fork_thread(
|
||||
path: std::path::PathBuf,
|
||||
nth_user_message: usize,
|
||||
) -> Arc<CodexThread> {
|
||||
Box::pin(manager.fork_thread(nth_user_message, config.clone(), path, false))
|
||||
Box::pin(manager.fork_thread(nth_user_message, config.clone(), path, false, None))
|
||||
.await
|
||||
.expect("fork conversation")
|
||||
.thread
|
||||
|
||||
@@ -110,7 +110,7 @@ async fn fork_thread_twice_drops_to_first_message() {
|
||||
thread: codex_fork1,
|
||||
..
|
||||
} = thread_manager
|
||||
.fork_thread(1, config_for_fork.clone(), base_path.clone(), false)
|
||||
.fork_thread(1, config_for_fork.clone(), base_path.clone(), false, None)
|
||||
.await
|
||||
.expect("fork 1");
|
||||
|
||||
@@ -129,7 +129,7 @@ async fn fork_thread_twice_drops_to_first_message() {
|
||||
thread: codex_fork2,
|
||||
..
|
||||
} = thread_manager
|
||||
.fork_thread(0, config_for_fork.clone(), fork1_path.clone(), false)
|
||||
.fork_thread(0, config_for_fork.clone(), fork1_path.clone(), false, None)
|
||||
.await
|
||||
.expect("fork 2");
|
||||
|
||||
|
||||
@@ -659,6 +659,34 @@ async fn js_repl_does_not_expose_process_global() -> Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn js_repl_exposes_codex_path_helpers() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = responses::start_mock_server().await;
|
||||
let mock = run_js_repl_turn(
|
||||
&server,
|
||||
"check codex path helpers",
|
||||
&[(
|
||||
"call-1",
|
||||
"console.log(`cwd:${typeof codex.cwd}:${codex.cwd.length > 0}`); console.log(`home:${codex.homeDir === null || typeof codex.homeDir === \"string\"}`);",
|
||||
)],
|
||||
)
|
||||
.await?;
|
||||
|
||||
let req = mock.single_request();
|
||||
let (output, success) = custom_tool_output_text_and_success(&req, "call-1");
|
||||
assert_ne!(
|
||||
success,
|
||||
Some(false),
|
||||
"js_repl call failed unexpectedly: {output}"
|
||||
);
|
||||
assert!(output.contains("cwd:string:true"));
|
||||
assert!(output.contains("home:true"));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn js_repl_blocks_sensitive_builtin_imports() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
@@ -416,7 +416,7 @@ async fn resume_and_fork_append_permissions_messages() -> Result<()> {
|
||||
fork_config.permissions.approval_policy = Constrained::allow_any(AskForApproval::UnlessTrusted);
|
||||
let forked = initial
|
||||
.thread_manager
|
||||
.fork_thread(usize::MAX, fork_config, rollout_path, false)
|
||||
.fork_thread(usize::MAX, fork_config, rollout_path, false, None)
|
||||
.await?;
|
||||
forked
|
||||
.thread
|
||||
|
||||
@@ -277,7 +277,7 @@ async fn explicit_plugin_mentions_inject_plugin_guidance() -> Result<()> {
|
||||
assert!(
|
||||
request_tools
|
||||
.iter()
|
||||
.any(|name| name == "mcp__codex_apps__calendar_create_event"),
|
||||
.any(|name| name == "mcp__codex_apps__google-calendar-create-event"),
|
||||
"expected plugin app tools to become visible for this turn: {request_tools:?}"
|
||||
);
|
||||
let echo_description = tool_description(&request_body, "mcp__sample__echo")
|
||||
@@ -286,9 +286,11 @@ async fn explicit_plugin_mentions_inject_plugin_guidance() -> Result<()> {
|
||||
echo_description.contains("This tool is part of plugin `sample`."),
|
||||
"expected plugin MCP provenance in tool description: {echo_description:?}"
|
||||
);
|
||||
let calendar_description =
|
||||
tool_description(&request_body, "mcp__codex_apps__calendar_create_event")
|
||||
.expect("plugin app tool description should be present");
|
||||
let calendar_description = tool_description(
|
||||
&request_body,
|
||||
"mcp__codex_apps__google-calendar-create-event",
|
||||
)
|
||||
.expect("plugin app tool description should be present");
|
||||
assert!(
|
||||
calendar_description.contains("This tool is part of plugin `sample`."),
|
||||
"expected plugin app provenance in tool description: {calendar_description:?}"
|
||||
|
||||
@@ -98,7 +98,7 @@ async fn emits_warning_when_resumed_model_differs() {
|
||||
thread: conversation,
|
||||
..
|
||||
} = thread_manager
|
||||
.resume_thread_with_history(config, initial_history, auth_manager, false)
|
||||
.resume_thread_with_history(config, initial_history, auth_manager, false, None)
|
||||
.await
|
||||
.expect("resume conversation");
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -180,6 +180,24 @@ async fn spawn_agent_description_lists_visible_models_and_reasoning_efforts() ->
|
||||
!description.contains("Hidden Model"),
|
||||
"hidden picker model should be omitted from spawn_agent description: {description:?}"
|
||||
);
|
||||
assert!(
|
||||
description.contains(
|
||||
"Only use `spawn_agent` if and only if the user explicitly asks for sub-agents, delegation, or parallel agent work."
|
||||
),
|
||||
"expected explicit authorization rule in spawn_agent description: {description:?}"
|
||||
);
|
||||
assert!(
|
||||
description.contains(
|
||||
"Requests for depth, thoroughness, research, investigation, or detailed codebase analysis do not count as permission to spawn."
|
||||
),
|
||||
"expected non-authorization clarification in spawn_agent description: {description:?}"
|
||||
);
|
||||
assert!(
|
||||
description.contains(
|
||||
"Agent-role guidance below only helps choose which agent to use after spawning is already authorized; it never authorizes spawning by itself."
|
||||
),
|
||||
"expected agent-role clarification in spawn_agent description: {description:?}"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -42,7 +42,7 @@ async fn emits_warning_when_unstable_features_enabled_via_config() {
|
||||
thread: conversation,
|
||||
..
|
||||
} = thread_manager
|
||||
.resume_thread_with_history(config, InitialHistory::New, auth_manager, false)
|
||||
.resume_thread_with_history(config, InitialHistory::New, auth_manager, false, None)
|
||||
.await
|
||||
.expect("spawn conversation");
|
||||
|
||||
@@ -83,7 +83,7 @@ async fn suppresses_warning_when_configured() {
|
||||
thread: conversation,
|
||||
..
|
||||
} = thread_manager
|
||||
.resume_thread_with_history(config, InitialHistory::New, auth_manager, false)
|
||||
.resume_thread_with_history(config, InitialHistory::New, auth_manager, false, None)
|
||||
.await
|
||||
.expect("spawn conversation");
|
||||
|
||||
|
||||
@@ -296,7 +296,8 @@ async fn view_image_tool_attaches_local_image() -> anyhow::Result<()> {
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn view_image_tool_can_preserve_original_resolution_on_gpt5_3_codex() -> anyhow::Result<()> {
|
||||
async fn view_image_tool_can_preserve_original_resolution_when_requested_on_gpt5_3_codex()
|
||||
-> anyhow::Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
@@ -326,7 +327,7 @@ async fn view_image_tool_can_preserve_original_resolution_on_gpt5_3_codex() -> a
|
||||
image.save(&abs_path)?;
|
||||
|
||||
let call_id = "view-image-original";
|
||||
let arguments = serde_json::json!({ "path": rel_path }).to_string();
|
||||
let arguments = serde_json::json!({ "path": rel_path, "detail": "original" }).to_string();
|
||||
|
||||
let first_response = sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
@@ -400,7 +401,191 @@ async fn view_image_tool_can_preserve_original_resolution_on_gpt5_3_codex() -> a
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn view_image_tool_keeps_legacy_behavior_below_gpt5_3_codex() -> anyhow::Result<()> {
|
||||
async fn view_image_tool_errors_clearly_for_unsupported_detail_values() -> anyhow::Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex()
|
||||
.with_model("gpt-5.3-codex")
|
||||
.with_config(|config| {
|
||||
config
|
||||
.features
|
||||
.enable(Feature::ImageDetailOriginal)
|
||||
.expect("test config should allow feature update");
|
||||
});
|
||||
let TestCodex {
|
||||
codex,
|
||||
cwd,
|
||||
session_configured,
|
||||
..
|
||||
} = builder.build(&server).await?;
|
||||
|
||||
let rel_path = "assets/unsupported-detail.png";
|
||||
let abs_path = cwd.path().join(rel_path);
|
||||
if let Some(parent) = abs_path.parent() {
|
||||
std::fs::create_dir_all(parent)?;
|
||||
}
|
||||
let image = ImageBuffer::from_pixel(256, 128, Rgba([0u8, 80, 255, 255]));
|
||||
image.save(&abs_path)?;
|
||||
|
||||
let call_id = "view-image-unsupported-detail";
|
||||
let arguments = serde_json::json!({ "path": rel_path, "detail": "low" }).to_string();
|
||||
|
||||
let first_response = sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_function_call(call_id, "view_image", &arguments),
|
||||
ev_completed("resp-1"),
|
||||
]);
|
||||
responses::mount_sse_once(&server, first_response).await;
|
||||
|
||||
let second_response = sse(vec![
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]);
|
||||
let mock = responses::mount_sse_once(&server, second_response).await;
|
||||
|
||||
let session_model = session_configured.model.clone();
|
||||
|
||||
codex
|
||||
.submit(Op::UserTurn {
|
||||
items: vec![UserInput::Text {
|
||||
text: "please attach the image at low detail".into(),
|
||||
text_elements: Vec::new(),
|
||||
}],
|
||||
final_output_json_schema: None,
|
||||
cwd: cwd.path().to_path_buf(),
|
||||
approval_policy: AskForApproval::Never,
|
||||
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
||||
model: session_model,
|
||||
effort: None,
|
||||
service_tier: None,
|
||||
summary: None,
|
||||
collaboration_mode: None,
|
||||
personality: None,
|
||||
})
|
||||
.await?;
|
||||
|
||||
wait_for_event(&codex, |event| matches!(event, EventMsg::TurnComplete(_))).await;
|
||||
|
||||
let req = mock.single_request();
|
||||
let body_with_tool_output = req.body_json();
|
||||
let output_text = req
|
||||
.function_call_output_content_and_success(call_id)
|
||||
.and_then(|(content, _)| content)
|
||||
.expect("output text present");
|
||||
assert_eq!(
|
||||
output_text,
|
||||
"view_image.detail only supports `original`; omit `detail` for default resized behavior, got `low`"
|
||||
);
|
||||
|
||||
assert!(
|
||||
find_image_message(&body_with_tool_output).is_none(),
|
||||
"unsupported detail values should not produce an input_image message"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn view_image_tool_treats_null_detail_as_omitted() -> anyhow::Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex()
|
||||
.with_model("gpt-5.3-codex")
|
||||
.with_config(|config| {
|
||||
config
|
||||
.features
|
||||
.enable(Feature::ImageDetailOriginal)
|
||||
.expect("test config should allow feature update");
|
||||
});
|
||||
let TestCodex {
|
||||
codex,
|
||||
cwd,
|
||||
session_configured,
|
||||
..
|
||||
} = builder.build(&server).await?;
|
||||
|
||||
let rel_path = "assets/null-detail.png";
|
||||
let abs_path = cwd.path().join(rel_path);
|
||||
if let Some(parent) = abs_path.parent() {
|
||||
std::fs::create_dir_all(parent)?;
|
||||
}
|
||||
let original_width = 2304;
|
||||
let original_height = 864;
|
||||
let image = ImageBuffer::from_pixel(original_width, original_height, Rgba([0u8, 80, 255, 255]));
|
||||
image.save(&abs_path)?;
|
||||
|
||||
let call_id = "view-image-null-detail";
|
||||
let arguments = serde_json::json!({ "path": rel_path, "detail": null }).to_string();
|
||||
|
||||
let first_response = sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_function_call(call_id, "view_image", &arguments),
|
||||
ev_completed("resp-1"),
|
||||
]);
|
||||
responses::mount_sse_once(&server, first_response).await;
|
||||
|
||||
let second_response = sse(vec![
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]);
|
||||
let mock = responses::mount_sse_once(&server, second_response).await;
|
||||
|
||||
let session_model = session_configured.model.clone();
|
||||
|
||||
codex
|
||||
.submit(Op::UserTurn {
|
||||
items: vec![UserInput::Text {
|
||||
text: "please attach the image with a null detail".into(),
|
||||
text_elements: Vec::new(),
|
||||
}],
|
||||
final_output_json_schema: None,
|
||||
cwd: cwd.path().to_path_buf(),
|
||||
approval_policy: AskForApproval::Never,
|
||||
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
||||
model: session_model,
|
||||
effort: None,
|
||||
service_tier: None,
|
||||
summary: None,
|
||||
collaboration_mode: None,
|
||||
personality: None,
|
||||
})
|
||||
.await?;
|
||||
|
||||
wait_for_event(&codex, |event| matches!(event, EventMsg::TurnComplete(_))).await;
|
||||
|
||||
let req = mock.single_request();
|
||||
let function_output = req.function_call_output(call_id);
|
||||
let output_items = function_output
|
||||
.get("output")
|
||||
.and_then(Value::as_array)
|
||||
.expect("function_call_output should be a content item array");
|
||||
assert_eq!(output_items.len(), 1);
|
||||
assert_eq!(output_items[0].get("detail"), None);
|
||||
let image_url = output_items[0]
|
||||
.get("image_url")
|
||||
.and_then(Value::as_str)
|
||||
.expect("image_url present");
|
||||
|
||||
let (_, encoded) = image_url
|
||||
.split_once(',')
|
||||
.expect("image url contains data prefix");
|
||||
let decoded = BASE64_STANDARD
|
||||
.decode(encoded)
|
||||
.expect("image data decodes from base64 for request");
|
||||
let resized = load_from_memory(&decoded).expect("load resized image");
|
||||
let (width, height) = resized.dimensions();
|
||||
assert!(width <= 2048);
|
||||
assert!(height <= 768);
|
||||
assert!(width < original_width);
|
||||
assert!(height < original_height);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn view_image_tool_resizes_when_model_lacks_original_detail_support() -> anyhow::Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
@@ -503,6 +688,110 @@ async fn view_image_tool_keeps_legacy_behavior_below_gpt5_3_codex() -> anyhow::R
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn view_image_tool_does_not_force_original_resolution_with_capability_feature_only()
|
||||
-> anyhow::Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex()
|
||||
.with_model("gpt-5.3-codex")
|
||||
.with_config(|config| {
|
||||
config
|
||||
.features
|
||||
.enable(Feature::ImageDetailOriginal)
|
||||
.expect("test config should allow feature update");
|
||||
});
|
||||
let TestCodex {
|
||||
codex,
|
||||
cwd,
|
||||
session_configured,
|
||||
..
|
||||
} = builder.build(&server).await?;
|
||||
|
||||
let rel_path = "assets/original-example-capability-only.png";
|
||||
let abs_path = cwd.path().join(rel_path);
|
||||
if let Some(parent) = abs_path.parent() {
|
||||
std::fs::create_dir_all(parent)?;
|
||||
}
|
||||
let original_width = 2304;
|
||||
let original_height = 864;
|
||||
let image = ImageBuffer::from_pixel(original_width, original_height, Rgba([0u8, 80, 255, 255]));
|
||||
image.save(&abs_path)?;
|
||||
|
||||
let call_id = "view-image-capability-only";
|
||||
let arguments = serde_json::json!({ "path": rel_path }).to_string();
|
||||
|
||||
let first_response = sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_function_call(call_id, "view_image", &arguments),
|
||||
ev_completed("resp-1"),
|
||||
]);
|
||||
responses::mount_sse_once(&server, first_response).await;
|
||||
|
||||
let second_response = sse(vec![
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]);
|
||||
let mock = responses::mount_sse_once(&server, second_response).await;
|
||||
|
||||
let session_model = session_configured.model.clone();
|
||||
|
||||
codex
|
||||
.submit(Op::UserTurn {
|
||||
items: vec![UserInput::Text {
|
||||
text: "please add the screenshot".into(),
|
||||
text_elements: Vec::new(),
|
||||
}],
|
||||
final_output_json_schema: None,
|
||||
cwd: cwd.path().to_path_buf(),
|
||||
approval_policy: AskForApproval::Never,
|
||||
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
||||
model: session_model,
|
||||
effort: None,
|
||||
service_tier: None,
|
||||
summary: None,
|
||||
collaboration_mode: None,
|
||||
personality: None,
|
||||
})
|
||||
.await?;
|
||||
|
||||
wait_for_event_with_timeout(
|
||||
&codex,
|
||||
|event| matches!(event, EventMsg::TurnComplete(_)),
|
||||
Duration::from_secs(10),
|
||||
)
|
||||
.await;
|
||||
|
||||
let req = mock.single_request();
|
||||
let function_output = req.function_call_output(call_id);
|
||||
let output_items = function_output
|
||||
.get("output")
|
||||
.and_then(Value::as_array)
|
||||
.expect("function_call_output should be a content item array");
|
||||
assert_eq!(output_items.len(), 1);
|
||||
assert_eq!(output_items[0].get("detail"), None);
|
||||
let image_url = output_items[0]
|
||||
.get("image_url")
|
||||
.and_then(Value::as_str)
|
||||
.expect("image_url present");
|
||||
|
||||
let (_, encoded) = image_url
|
||||
.split_once(',')
|
||||
.expect("image url contains data prefix");
|
||||
let decoded = BASE64_STANDARD
|
||||
.decode(encoded)
|
||||
.expect("image data decodes from base64 for request");
|
||||
let resized = load_from_memory(&decoded).expect("load resized image");
|
||||
let (resized_width, resized_height) = resized.dimensions();
|
||||
assert!(resized_width <= 2048);
|
||||
assert!(resized_height <= 768);
|
||||
assert!(resized_width < original_width);
|
||||
assert!(resized_height < original_height);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn js_repl_emit_image_attaches_local_image() -> anyhow::Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
Reference in New Issue
Block a user