#![allow(clippy::unwrap_used, clippy::expect_used)] use anyhow::Result; use codex_core::features::Feature; use codex_core::protocol::AskForApproval; use codex_core::protocol::EventMsg; use codex_core::protocol::Op; use codex_core::protocol::SandboxPolicy; use codex_protocol::config_types::CollaborationMode; use codex_protocol::config_types::ModeKind; use codex_protocol::config_types::ReasoningSummary; use codex_protocol::config_types::Settings; use codex_protocol::user_input::UserInput; use core_test_support::responses::ev_assistant_message; use core_test_support::responses::ev_completed; use core_test_support::responses::ev_function_call; use core_test_support::responses::ev_response_created; use core_test_support::responses::mount_sse_once; use core_test_support::responses::sse; use core_test_support::responses::start_mock_server; use core_test_support::test_codex::test_codex; use core_test_support::wait_for_event; use serde_json::Value; use serde_json::json; use std::fs; fn collaboration_mode_for_model(model: String) -> CollaborationMode { CollaborationMode { mode: ModeKind::Default, settings: Settings { model, reasoning_effort: None, developer_instructions: Some("exercise approvals in collaboration mode".to_string()), }, } } async fn submit_user_turn( test: &core_test_support::test_codex::TestCodex, prompt: &str, approval_policy: AskForApproval, sandbox_policy: SandboxPolicy, collaboration_mode: Option, ) -> Result<()> { let session_model = test.session_configured.model.clone(); test.codex .submit(Op::UserTurn { items: vec![UserInput::Text { text: prompt.into(), text_elements: Vec::new(), }], final_output_json_schema: None, cwd: test.cwd_path().to_path_buf(), approval_policy, sandbox_policy, model: session_model, effort: None, summary: ReasoningSummary::Auto, collaboration_mode, personality: None, }) .await?; Ok(()) } fn assert_no_matched_rules_invariant(output_item: &Value) { let Some(output) = output_item.get("output").and_then(Value::as_str) else { panic!("function_call_output should include string output payload: {output_item:?}"); }; assert!( !output.contains("invariant failed: matched_rules must be non-empty"), "unexpected invariant panic surfaced in output: {output}" ); } #[tokio::test] async fn execpolicy_blocks_shell_invocation() -> Result<()> { // TODO execpolicy doesn't parse powershell commands yet if cfg!(windows) { return Ok(()); } let mut builder = test_codex().with_config(|config| { let policy_path = config.codex_home.join("rules").join("policy.rules"); fs::create_dir_all( policy_path .parent() .expect("policy directory must have a parent"), ) .expect("create policy directory"); fs::write( &policy_path, r#"prefix_rule(pattern=["echo"], decision="forbidden")"#, ) .expect("write policy file"); }); let server = start_mock_server().await; let test = builder.build(&server).await?; let call_id = "shell-forbidden"; let args = json!({ "command": "echo blocked", "timeout_ms": 1_000, }); mount_sse_once( &server, sse(vec![ ev_response_created("resp-1"), ev_function_call(call_id, "shell_command", &serde_json::to_string(&args)?), ev_completed("resp-1"), ]), ) .await; mount_sse_once( &server, sse(vec![ ev_assistant_message("msg-1", "done"), ev_completed("resp-2"), ]), ) .await; let session_model = test.session_configured.model.clone(); test.codex .submit(Op::UserTurn { items: vec![UserInput::Text { text: "run shell command".into(), text_elements: Vec::new(), }], final_output_json_schema: None, cwd: test.cwd_path().to_path_buf(), approval_policy: AskForApproval::Never, sandbox_policy: SandboxPolicy::DangerFullAccess, model: session_model, effort: None, summary: ReasoningSummary::Auto, collaboration_mode: None, personality: None, }) .await?; let EventMsg::ExecCommandEnd(end) = wait_for_event(&test.codex, |event| { matches!(event, EventMsg::ExecCommandEnd(_)) }) .await else { unreachable!() }; wait_for_event(&test.codex, |event| { matches!(event, EventMsg::TurnComplete(_)) }) .await; assert!( end.aggregated_output .contains("policy forbids commands starting with `echo`"), "unexpected output: {}", end.aggregated_output ); Ok(()) } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn shell_command_empty_script_with_collaboration_mode_does_not_panic() -> Result<()> { let server = start_mock_server().await; let mut builder = test_codex().with_model("gpt-5").with_config(|config| { config.features.enable(Feature::CollaborationModes); }); let test = builder.build(&server).await?; let call_id = "shell-empty-script-collab"; let args = json!({ "command": "", "timeout_ms": 1_000, }); mount_sse_once( &server, sse(vec![ ev_response_created("resp-empty-shell-1"), ev_function_call(call_id, "shell_command", &serde_json::to_string(&args)?), ev_completed("resp-empty-shell-1"), ]), ) .await; let results_mock = mount_sse_once( &server, sse(vec![ ev_assistant_message("msg-empty-shell-1", "done"), ev_completed("resp-empty-shell-2"), ]), ) .await; let collaboration_mode = collaboration_mode_for_model(test.session_configured.model.clone()); submit_user_turn( &test, "run an empty shell command", AskForApproval::OnRequest, SandboxPolicy::DangerFullAccess, Some(collaboration_mode), ) .await?; wait_for_event(&test.codex, |event| { matches!(event, EventMsg::TurnComplete(_)) }) .await; let output_item = results_mock.single_request().function_call_output(call_id); assert_no_matched_rules_invariant(&output_item); Ok(()) } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn unified_exec_empty_script_with_collaboration_mode_does_not_panic() -> Result<()> { let server = start_mock_server().await; let mut builder = test_codex().with_model("gpt-5").with_config(|config| { config.features.enable(Feature::UnifiedExec); config.features.enable(Feature::CollaborationModes); }); let test = builder.build(&server).await?; let call_id = "unified-exec-empty-script-collab"; let args = json!({ "cmd": "", "yield_time_ms": 1_000, }); mount_sse_once( &server, sse(vec![ ev_response_created("resp-empty-unified-1"), ev_function_call(call_id, "exec_command", &serde_json::to_string(&args)?), ev_completed("resp-empty-unified-1"), ]), ) .await; let results_mock = mount_sse_once( &server, sse(vec![ ev_assistant_message("msg-empty-unified-1", "done"), ev_completed("resp-empty-unified-2"), ]), ) .await; let collaboration_mode = collaboration_mode_for_model(test.session_configured.model.clone()); submit_user_turn( &test, "run empty unified exec command", AskForApproval::OnRequest, SandboxPolicy::DangerFullAccess, Some(collaboration_mode), ) .await?; wait_for_event(&test.codex, |event| { matches!(event, EventMsg::TurnComplete(_)) }) .await; let output_item = results_mock.single_request().function_call_output(call_id); assert_no_matched_rules_invariant(&output_item); Ok(()) } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn shell_command_whitespace_script_with_collaboration_mode_does_not_panic() -> Result<()> { let server = start_mock_server().await; let mut builder = test_codex().with_model("gpt-5").with_config(|config| { config.features.enable(Feature::CollaborationModes); }); let test = builder.build(&server).await?; let call_id = "shell-whitespace-script-collab"; let args = json!({ "command": " \n\t ", "timeout_ms": 1_000, }); mount_sse_once( &server, sse(vec![ ev_response_created("resp-whitespace-shell-1"), ev_function_call(call_id, "shell_command", &serde_json::to_string(&args)?), ev_completed("resp-whitespace-shell-1"), ]), ) .await; let results_mock = mount_sse_once( &server, sse(vec![ ev_assistant_message("msg-whitespace-shell-1", "done"), ev_completed("resp-whitespace-shell-2"), ]), ) .await; let collaboration_mode = collaboration_mode_for_model(test.session_configured.model.clone()); submit_user_turn( &test, "run whitespace shell command", AskForApproval::OnRequest, SandboxPolicy::DangerFullAccess, Some(collaboration_mode), ) .await?; wait_for_event(&test.codex, |event| { matches!(event, EventMsg::TurnComplete(_)) }) .await; let output_item = results_mock.single_request().function_call_output(call_id); assert_no_matched_rules_invariant(&output_item); Ok(()) } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn unified_exec_whitespace_script_with_collaboration_mode_does_not_panic() -> Result<()> { let server = start_mock_server().await; let mut builder = test_codex().with_model("gpt-5").with_config(|config| { config.features.enable(Feature::UnifiedExec); config.features.enable(Feature::CollaborationModes); }); let test = builder.build(&server).await?; let call_id = "unified-exec-whitespace-script-collab"; let args = json!({ "cmd": " \n \t", "yield_time_ms": 1_000, }); mount_sse_once( &server, sse(vec![ ev_response_created("resp-whitespace-unified-1"), ev_function_call(call_id, "exec_command", &serde_json::to_string(&args)?), ev_completed("resp-whitespace-unified-1"), ]), ) .await; let results_mock = mount_sse_once( &server, sse(vec![ ev_assistant_message("msg-whitespace-unified-1", "done"), ev_completed("resp-whitespace-unified-2"), ]), ) .await; let collaboration_mode = collaboration_mode_for_model(test.session_configured.model.clone()); submit_user_turn( &test, "run whitespace unified exec command", AskForApproval::OnRequest, SandboxPolicy::DangerFullAccess, Some(collaboration_mode), ) .await?; wait_for_event(&test.codex, |event| { matches!(event, EventMsg::TurnComplete(_)) }) .await; let output_item = results_mock.single_request().function_call_output(call_id); assert_no_matched_rules_invariant(&output_item); Ok(()) }