mirror of
https://github.com/openai/codex.git
synced 2026-05-04 03:16:31 +00:00
Summary is a required parameter on UserTurn. Ideally we'd like the core to decide the appropriate summary level. Make the summary optional and don't send it when not needed.
373 lines
11 KiB
Rust
373 lines
11 KiB
Rust
#![allow(clippy::unwrap_used, clippy::expect_used)]
|
|
|
|
use anyhow::Result;
|
|
use codex_core::features::Feature;
|
|
use codex_protocol::config_types::CollaborationMode;
|
|
use codex_protocol::config_types::ModeKind;
|
|
use codex_protocol::config_types::Settings;
|
|
use codex_protocol::protocol::AskForApproval;
|
|
use codex_protocol::protocol::EventMsg;
|
|
use codex_protocol::protocol::Op;
|
|
use codex_protocol::protocol::SandboxPolicy;
|
|
use codex_protocol::user_input::UserInput;
|
|
use core_test_support::responses::ev_assistant_message;
|
|
use core_test_support::responses::ev_completed;
|
|
use core_test_support::responses::ev_function_call;
|
|
use core_test_support::responses::ev_response_created;
|
|
use core_test_support::responses::mount_sse_once;
|
|
use core_test_support::responses::sse;
|
|
use core_test_support::responses::start_mock_server;
|
|
use core_test_support::test_codex::test_codex;
|
|
use core_test_support::wait_for_event;
|
|
use serde_json::Value;
|
|
use serde_json::json;
|
|
use std::fs;
|
|
|
|
fn collaboration_mode_for_model(model: String) -> CollaborationMode {
|
|
CollaborationMode {
|
|
mode: ModeKind::Default,
|
|
settings: Settings {
|
|
model,
|
|
reasoning_effort: None,
|
|
developer_instructions: Some("exercise approvals in collaboration mode".to_string()),
|
|
},
|
|
}
|
|
}
|
|
|
|
async fn submit_user_turn(
|
|
test: &core_test_support::test_codex::TestCodex,
|
|
prompt: &str,
|
|
approval_policy: AskForApproval,
|
|
sandbox_policy: SandboxPolicy,
|
|
collaboration_mode: Option<CollaborationMode>,
|
|
) -> Result<()> {
|
|
let session_model = test.session_configured.model.clone();
|
|
test.codex
|
|
.submit(Op::UserTurn {
|
|
items: vec![UserInput::Text {
|
|
text: prompt.into(),
|
|
text_elements: Vec::new(),
|
|
}],
|
|
final_output_json_schema: None,
|
|
cwd: test.cwd_path().to_path_buf(),
|
|
approval_policy,
|
|
sandbox_policy,
|
|
model: session_model,
|
|
effort: None,
|
|
summary: None,
|
|
collaboration_mode,
|
|
personality: None,
|
|
})
|
|
.await?;
|
|
Ok(())
|
|
}
|
|
|
|
fn assert_no_matched_rules_invariant(output_item: &Value) {
|
|
let Some(output) = output_item.get("output").and_then(Value::as_str) else {
|
|
panic!("function_call_output should include string output payload: {output_item:?}");
|
|
};
|
|
assert!(
|
|
!output.contains("invariant failed: matched_rules must be non-empty"),
|
|
"unexpected invariant panic surfaced in output: {output}"
|
|
);
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn execpolicy_blocks_shell_invocation() -> Result<()> {
|
|
// TODO execpolicy doesn't parse powershell commands yet
|
|
if cfg!(windows) {
|
|
return Ok(());
|
|
}
|
|
|
|
let mut builder = test_codex().with_config(|config| {
|
|
let policy_path = config.codex_home.join("rules").join("policy.rules");
|
|
fs::create_dir_all(
|
|
policy_path
|
|
.parent()
|
|
.expect("policy directory must have a parent"),
|
|
)
|
|
.expect("create policy directory");
|
|
fs::write(
|
|
&policy_path,
|
|
r#"prefix_rule(pattern=["echo"], decision="forbidden")"#,
|
|
)
|
|
.expect("write policy file");
|
|
});
|
|
let server = start_mock_server().await;
|
|
let test = builder.build(&server).await?;
|
|
|
|
let call_id = "shell-forbidden";
|
|
let args = json!({
|
|
"command": "echo blocked",
|
|
"timeout_ms": 1_000,
|
|
});
|
|
|
|
mount_sse_once(
|
|
&server,
|
|
sse(vec![
|
|
ev_response_created("resp-1"),
|
|
ev_function_call(call_id, "shell_command", &serde_json::to_string(&args)?),
|
|
ev_completed("resp-1"),
|
|
]),
|
|
)
|
|
.await;
|
|
mount_sse_once(
|
|
&server,
|
|
sse(vec![
|
|
ev_assistant_message("msg-1", "done"),
|
|
ev_completed("resp-2"),
|
|
]),
|
|
)
|
|
.await;
|
|
|
|
let session_model = test.session_configured.model.clone();
|
|
test.codex
|
|
.submit(Op::UserTurn {
|
|
items: vec![UserInput::Text {
|
|
text: "run shell command".into(),
|
|
text_elements: Vec::new(),
|
|
}],
|
|
final_output_json_schema: None,
|
|
cwd: test.cwd_path().to_path_buf(),
|
|
approval_policy: AskForApproval::Never,
|
|
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
|
model: session_model,
|
|
effort: None,
|
|
summary: None,
|
|
collaboration_mode: None,
|
|
personality: None,
|
|
})
|
|
.await?;
|
|
|
|
let EventMsg::ExecCommandEnd(end) = wait_for_event(&test.codex, |event| {
|
|
matches!(event, EventMsg::ExecCommandEnd(_))
|
|
})
|
|
.await
|
|
else {
|
|
unreachable!()
|
|
};
|
|
wait_for_event(&test.codex, |event| {
|
|
matches!(event, EventMsg::TurnComplete(_))
|
|
})
|
|
.await;
|
|
|
|
assert!(
|
|
end.aggregated_output
|
|
.contains("policy forbids commands starting with `echo`"),
|
|
"unexpected output: {}",
|
|
end.aggregated_output
|
|
);
|
|
|
|
Ok(())
|
|
}
|
|
|
|
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
|
async fn shell_command_empty_script_with_collaboration_mode_does_not_panic() -> Result<()> {
|
|
let server = start_mock_server().await;
|
|
let mut builder = test_codex().with_model("gpt-5").with_config(|config| {
|
|
config.features.enable(Feature::CollaborationModes);
|
|
});
|
|
let test = builder.build(&server).await?;
|
|
let call_id = "shell-empty-script-collab";
|
|
let args = json!({
|
|
"command": "",
|
|
"timeout_ms": 1_000,
|
|
});
|
|
|
|
mount_sse_once(
|
|
&server,
|
|
sse(vec![
|
|
ev_response_created("resp-empty-shell-1"),
|
|
ev_function_call(call_id, "shell_command", &serde_json::to_string(&args)?),
|
|
ev_completed("resp-empty-shell-1"),
|
|
]),
|
|
)
|
|
.await;
|
|
let results_mock = mount_sse_once(
|
|
&server,
|
|
sse(vec![
|
|
ev_assistant_message("msg-empty-shell-1", "done"),
|
|
ev_completed("resp-empty-shell-2"),
|
|
]),
|
|
)
|
|
.await;
|
|
|
|
let collaboration_mode = collaboration_mode_for_model(test.session_configured.model.clone());
|
|
submit_user_turn(
|
|
&test,
|
|
"run an empty shell command",
|
|
AskForApproval::OnRequest,
|
|
SandboxPolicy::DangerFullAccess,
|
|
Some(collaboration_mode),
|
|
)
|
|
.await?;
|
|
|
|
wait_for_event(&test.codex, |event| {
|
|
matches!(event, EventMsg::TurnComplete(_))
|
|
})
|
|
.await;
|
|
|
|
let output_item = results_mock.single_request().function_call_output(call_id);
|
|
assert_no_matched_rules_invariant(&output_item);
|
|
|
|
Ok(())
|
|
}
|
|
|
|
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
|
async fn unified_exec_empty_script_with_collaboration_mode_does_not_panic() -> Result<()> {
|
|
let server = start_mock_server().await;
|
|
let mut builder = test_codex().with_model("gpt-5").with_config(|config| {
|
|
config.features.enable(Feature::UnifiedExec);
|
|
config.features.enable(Feature::CollaborationModes);
|
|
});
|
|
let test = builder.build(&server).await?;
|
|
let call_id = "unified-exec-empty-script-collab";
|
|
let args = json!({
|
|
"cmd": "",
|
|
"yield_time_ms": 1_000,
|
|
});
|
|
|
|
mount_sse_once(
|
|
&server,
|
|
sse(vec![
|
|
ev_response_created("resp-empty-unified-1"),
|
|
ev_function_call(call_id, "exec_command", &serde_json::to_string(&args)?),
|
|
ev_completed("resp-empty-unified-1"),
|
|
]),
|
|
)
|
|
.await;
|
|
let results_mock = mount_sse_once(
|
|
&server,
|
|
sse(vec![
|
|
ev_assistant_message("msg-empty-unified-1", "done"),
|
|
ev_completed("resp-empty-unified-2"),
|
|
]),
|
|
)
|
|
.await;
|
|
|
|
let collaboration_mode = collaboration_mode_for_model(test.session_configured.model.clone());
|
|
submit_user_turn(
|
|
&test,
|
|
"run empty unified exec command",
|
|
AskForApproval::OnRequest,
|
|
SandboxPolicy::DangerFullAccess,
|
|
Some(collaboration_mode),
|
|
)
|
|
.await?;
|
|
|
|
wait_for_event(&test.codex, |event| {
|
|
matches!(event, EventMsg::TurnComplete(_))
|
|
})
|
|
.await;
|
|
|
|
let output_item = results_mock.single_request().function_call_output(call_id);
|
|
assert_no_matched_rules_invariant(&output_item);
|
|
|
|
Ok(())
|
|
}
|
|
|
|
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
|
async fn shell_command_whitespace_script_with_collaboration_mode_does_not_panic() -> Result<()> {
|
|
let server = start_mock_server().await;
|
|
let mut builder = test_codex().with_model("gpt-5").with_config(|config| {
|
|
config.features.enable(Feature::CollaborationModes);
|
|
});
|
|
let test = builder.build(&server).await?;
|
|
let call_id = "shell-whitespace-script-collab";
|
|
let args = json!({
|
|
"command": " \n\t ",
|
|
"timeout_ms": 1_000,
|
|
});
|
|
|
|
mount_sse_once(
|
|
&server,
|
|
sse(vec![
|
|
ev_response_created("resp-whitespace-shell-1"),
|
|
ev_function_call(call_id, "shell_command", &serde_json::to_string(&args)?),
|
|
ev_completed("resp-whitespace-shell-1"),
|
|
]),
|
|
)
|
|
.await;
|
|
let results_mock = mount_sse_once(
|
|
&server,
|
|
sse(vec![
|
|
ev_assistant_message("msg-whitespace-shell-1", "done"),
|
|
ev_completed("resp-whitespace-shell-2"),
|
|
]),
|
|
)
|
|
.await;
|
|
|
|
let collaboration_mode = collaboration_mode_for_model(test.session_configured.model.clone());
|
|
submit_user_turn(
|
|
&test,
|
|
"run whitespace shell command",
|
|
AskForApproval::OnRequest,
|
|
SandboxPolicy::DangerFullAccess,
|
|
Some(collaboration_mode),
|
|
)
|
|
.await?;
|
|
|
|
wait_for_event(&test.codex, |event| {
|
|
matches!(event, EventMsg::TurnComplete(_))
|
|
})
|
|
.await;
|
|
|
|
let output_item = results_mock.single_request().function_call_output(call_id);
|
|
assert_no_matched_rules_invariant(&output_item);
|
|
|
|
Ok(())
|
|
}
|
|
|
|
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
|
async fn unified_exec_whitespace_script_with_collaboration_mode_does_not_panic() -> Result<()> {
|
|
let server = start_mock_server().await;
|
|
let mut builder = test_codex().with_model("gpt-5").with_config(|config| {
|
|
config.features.enable(Feature::UnifiedExec);
|
|
config.features.enable(Feature::CollaborationModes);
|
|
});
|
|
let test = builder.build(&server).await?;
|
|
let call_id = "unified-exec-whitespace-script-collab";
|
|
let args = json!({
|
|
"cmd": " \n \t",
|
|
"yield_time_ms": 1_000,
|
|
});
|
|
|
|
mount_sse_once(
|
|
&server,
|
|
sse(vec![
|
|
ev_response_created("resp-whitespace-unified-1"),
|
|
ev_function_call(call_id, "exec_command", &serde_json::to_string(&args)?),
|
|
ev_completed("resp-whitespace-unified-1"),
|
|
]),
|
|
)
|
|
.await;
|
|
let results_mock = mount_sse_once(
|
|
&server,
|
|
sse(vec![
|
|
ev_assistant_message("msg-whitespace-unified-1", "done"),
|
|
ev_completed("resp-whitespace-unified-2"),
|
|
]),
|
|
)
|
|
.await;
|
|
|
|
let collaboration_mode = collaboration_mode_for_model(test.session_configured.model.clone());
|
|
submit_user_turn(
|
|
&test,
|
|
"run whitespace unified exec command",
|
|
AskForApproval::OnRequest,
|
|
SandboxPolicy::DangerFullAccess,
|
|
Some(collaboration_mode),
|
|
)
|
|
.await?;
|
|
|
|
wait_for_event(&test.codex, |event| {
|
|
matches!(event, EventMsg::TurnComplete(_))
|
|
})
|
|
.await;
|
|
|
|
let output_item = results_mock.single_request().function_call_output(call_id);
|
|
assert_no_matched_rules_invariant(&output_item);
|
|
|
|
Ok(())
|
|
}
|