mirror of
https://github.com/openai/codex.git
synced 2026-04-24 22:54:54 +00:00
2750 lines
99 KiB
Rust
2750 lines
99 KiB
Rust
#![allow(clippy::unwrap_used, clippy::expect_used)]
|
|
|
|
use anyhow::Result;
|
|
use codex_core::config::Constrained;
|
|
use codex_core::config_loader::ConfigLayerStack;
|
|
use codex_core::config_loader::ConfigLayerStackOrdering;
|
|
use codex_core::config_loader::NetworkConstraints;
|
|
use codex_core::config_loader::NetworkRequirementsToml;
|
|
use codex_core::config_loader::RequirementSource;
|
|
use codex_core::config_loader::Sourced;
|
|
use codex_core::features::Feature;
|
|
use codex_core::sandboxing::SandboxPermissions;
|
|
use codex_protocol::approvals::NetworkApprovalProtocol;
|
|
use codex_protocol::approvals::NetworkPolicyAmendment;
|
|
use codex_protocol::approvals::NetworkPolicyRuleAction;
|
|
use codex_protocol::protocol::ApplyPatchApprovalRequestEvent;
|
|
use codex_protocol::protocol::AskForApproval;
|
|
use codex_protocol::protocol::EventMsg;
|
|
use codex_protocol::protocol::ExecApprovalRequestEvent;
|
|
use codex_protocol::protocol::ExecPolicyAmendment;
|
|
use codex_protocol::protocol::Op;
|
|
use codex_protocol::protocol::ReviewDecision;
|
|
use codex_protocol::protocol::SandboxPolicy;
|
|
use codex_protocol::user_input::UserInput;
|
|
use core_test_support::responses::ev_apply_patch_function_call;
|
|
use core_test_support::responses::ev_assistant_message;
|
|
use core_test_support::responses::ev_completed;
|
|
use core_test_support::responses::ev_function_call;
|
|
use core_test_support::responses::ev_response_created;
|
|
use core_test_support::responses::mount_sse_once;
|
|
use core_test_support::responses::sse;
|
|
use core_test_support::responses::start_mock_server;
|
|
use core_test_support::skip_if_no_network;
|
|
use core_test_support::test_codex::TestCodex;
|
|
use core_test_support::test_codex::test_codex;
|
|
use core_test_support::wait_for_event;
|
|
use core_test_support::wait_for_event_with_timeout;
|
|
use core_test_support::zsh_fork::build_unified_exec_zsh_fork_test;
|
|
use core_test_support::zsh_fork::build_zsh_fork_test;
|
|
use core_test_support::zsh_fork::restrictive_workspace_write_policy;
|
|
use core_test_support::zsh_fork::zsh_fork_runtime;
|
|
use pretty_assertions::assert_eq;
|
|
use regex_lite::Regex;
|
|
use serde_json::Value;
|
|
use serde_json::json;
|
|
use std::env;
|
|
use std::fs;
|
|
use std::path::PathBuf;
|
|
use std::sync::Arc;
|
|
use tempfile::TempDir;
|
|
use wiremock::Mock;
|
|
use wiremock::MockServer;
|
|
use wiremock::ResponseTemplate;
|
|
use wiremock::matchers::method;
|
|
use wiremock::matchers::path;
|
|
|
|
#[derive(Clone, Copy)]
|
|
enum TargetPath {
|
|
Workspace(&'static str),
|
|
OutsideWorkspace(&'static str),
|
|
}
|
|
|
|
impl TargetPath {
|
|
fn resolve_for_patch(self, test: &TestCodex) -> (PathBuf, String) {
|
|
match self {
|
|
TargetPath::Workspace(name) => {
|
|
let path = test.cwd.path().join(name);
|
|
(path, name.to_string())
|
|
}
|
|
TargetPath::OutsideWorkspace(name) => {
|
|
let path = env::current_dir()
|
|
.expect("current dir should be available")
|
|
.join(name);
|
|
(path.clone(), path.display().to_string())
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
#[derive(Clone)]
|
|
enum ActionKind {
|
|
WriteFile {
|
|
target: TargetPath,
|
|
content: &'static str,
|
|
},
|
|
FetchUrlNoProxy {
|
|
endpoint: &'static str,
|
|
response_body: &'static str,
|
|
},
|
|
FetchUrl {
|
|
endpoint: &'static str,
|
|
response_body: &'static str,
|
|
},
|
|
RunCommand {
|
|
command: &'static str,
|
|
},
|
|
RunUnifiedExecCommand {
|
|
command: &'static str,
|
|
justification: Option<&'static str>,
|
|
},
|
|
ApplyPatchFunction {
|
|
target: TargetPath,
|
|
content: &'static str,
|
|
},
|
|
ApplyPatchShell {
|
|
target: TargetPath,
|
|
content: &'static str,
|
|
},
|
|
}
|
|
|
|
const DEFAULT_UNIFIED_EXEC_JUSTIFICATION: &str =
|
|
"Requires escalated permissions to bypass the sandbox in tests.";
|
|
|
|
impl ActionKind {
|
|
async fn prepare(
|
|
&self,
|
|
test: &TestCodex,
|
|
server: &MockServer,
|
|
call_id: &str,
|
|
sandbox_permissions: SandboxPermissions,
|
|
) -> Result<(Value, Option<String>)> {
|
|
match self {
|
|
ActionKind::WriteFile { target, content } => {
|
|
let (path, _) = target.resolve_for_patch(test);
|
|
let _ = fs::remove_file(&path);
|
|
let command = format!("printf {content:?} > {path:?} && cat {path:?}");
|
|
let event = shell_event(call_id, &command, 5_000, sandbox_permissions)?;
|
|
Ok((event, Some(command)))
|
|
}
|
|
ActionKind::FetchUrl {
|
|
endpoint,
|
|
response_body,
|
|
} => {
|
|
Mock::given(method("GET"))
|
|
.and(path(*endpoint))
|
|
.respond_with(
|
|
ResponseTemplate::new(200).set_body_string(response_body.to_string()),
|
|
)
|
|
.mount(server)
|
|
.await;
|
|
|
|
let url = format!("{}{}", server.uri(), endpoint);
|
|
let escaped_url = url.replace('\'', "\\'");
|
|
let script = format!(
|
|
"import sys\nimport urllib.request\nurl = '{escaped_url}'\ntry:\n data = urllib.request.urlopen(url, timeout=2).read().decode()\n print('OK:' + data.strip())\nexcept Exception as exc:\n print('ERR:' + exc.__class__.__name__)\n sys.exit(1)",
|
|
);
|
|
|
|
let command = format!("python3 -c \"{script}\"");
|
|
let event = shell_event(call_id, &command, 5_000, sandbox_permissions)?;
|
|
Ok((event, Some(command)))
|
|
}
|
|
ActionKind::FetchUrlNoProxy {
|
|
endpoint,
|
|
response_body,
|
|
} => {
|
|
Mock::given(method("GET"))
|
|
.and(path(*endpoint))
|
|
.respond_with(
|
|
ResponseTemplate::new(200).set_body_string(response_body.to_string()),
|
|
)
|
|
.mount(server)
|
|
.await;
|
|
|
|
let url = format!("{}{}", server.uri(), endpoint);
|
|
let escaped_url = url.replace('\'', "\\'");
|
|
let script = format!(
|
|
"import sys\nimport urllib.request\nurl = '{escaped_url}'\nopener = urllib.request.build_opener(urllib.request.ProxyHandler({{}}))\ntry:\n data = opener.open(url, timeout=2).read().decode()\n print('OK:' + data.strip())\nexcept Exception as exc:\n print('ERR:' + exc.__class__.__name__)\n sys.exit(1)",
|
|
);
|
|
|
|
let command = format!("python3 -c \"{script}\"");
|
|
let event = shell_event(call_id, &command, 5_000, sandbox_permissions)?;
|
|
Ok((event, Some(command)))
|
|
}
|
|
ActionKind::RunCommand { command } => {
|
|
let event = shell_event(call_id, command, 1_000, sandbox_permissions)?;
|
|
Ok((event, Some(command.to_string())))
|
|
}
|
|
ActionKind::RunUnifiedExecCommand {
|
|
command,
|
|
justification,
|
|
} => {
|
|
let event = exec_command_event(
|
|
call_id,
|
|
command,
|
|
Some(1000),
|
|
sandbox_permissions,
|
|
*justification,
|
|
)?;
|
|
Ok((event, Some(command.to_string())))
|
|
}
|
|
ActionKind::ApplyPatchFunction { target, content } => {
|
|
let (path, patch_path) = target.resolve_for_patch(test);
|
|
let _ = fs::remove_file(&path);
|
|
let patch = build_add_file_patch(&patch_path, content);
|
|
Ok((ev_apply_patch_function_call(call_id, &patch), None))
|
|
}
|
|
ActionKind::ApplyPatchShell { target, content } => {
|
|
let (path, patch_path) = target.resolve_for_patch(test);
|
|
let _ = fs::remove_file(&path);
|
|
let patch = build_add_file_patch(&patch_path, content);
|
|
let command = shell_apply_patch_command(&patch);
|
|
let event = shell_event(call_id, &command, 5_000, sandbox_permissions)?;
|
|
Ok((event, Some(command)))
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
fn build_add_file_patch(patch_path: &str, content: &str) -> String {
|
|
format!("*** Begin Patch\n*** Add File: {patch_path}\n+{content}\n*** End Patch\n")
|
|
}
|
|
|
|
fn shell_apply_patch_command(patch: &str) -> String {
|
|
let mut script = String::from("apply_patch <<'PATCH'\n");
|
|
script.push_str(patch);
|
|
if !patch.ends_with('\n') {
|
|
script.push('\n');
|
|
}
|
|
script.push_str("PATCH\n");
|
|
script
|
|
}
|
|
|
|
fn shell_event(
|
|
call_id: &str,
|
|
command: &str,
|
|
timeout_ms: u64,
|
|
sandbox_permissions: SandboxPermissions,
|
|
) -> Result<Value> {
|
|
shell_event_with_prefix_rule(call_id, command, timeout_ms, sandbox_permissions, None)
|
|
}
|
|
|
|
fn shell_event_with_prefix_rule(
|
|
call_id: &str,
|
|
command: &str,
|
|
timeout_ms: u64,
|
|
sandbox_permissions: SandboxPermissions,
|
|
prefix_rule: Option<Vec<String>>,
|
|
) -> Result<Value> {
|
|
let mut args = json!({
|
|
"command": command,
|
|
"timeout_ms": timeout_ms,
|
|
});
|
|
if sandbox_permissions.requests_sandbox_override() {
|
|
args["sandbox_permissions"] = json!(sandbox_permissions);
|
|
}
|
|
if let Some(prefix_rule) = prefix_rule {
|
|
args["prefix_rule"] = json!(prefix_rule);
|
|
}
|
|
let args_str = serde_json::to_string(&args)?;
|
|
Ok(ev_function_call(call_id, "shell_command", &args_str))
|
|
}
|
|
|
|
fn exec_command_event(
|
|
call_id: &str,
|
|
cmd: &str,
|
|
yield_time_ms: Option<u64>,
|
|
sandbox_permissions: SandboxPermissions,
|
|
justification: Option<&str>,
|
|
) -> Result<Value> {
|
|
let mut args = json!({
|
|
"cmd": cmd.to_string(),
|
|
});
|
|
if let Some(yield_time_ms) = yield_time_ms {
|
|
args["yield_time_ms"] = json!(yield_time_ms);
|
|
}
|
|
if sandbox_permissions.requests_sandbox_override() {
|
|
args["sandbox_permissions"] = json!(sandbox_permissions);
|
|
let reason = justification.unwrap_or(DEFAULT_UNIFIED_EXEC_JUSTIFICATION);
|
|
args["justification"] = json!(reason);
|
|
}
|
|
let args_str = serde_json::to_string(&args)?;
|
|
Ok(ev_function_call(call_id, "exec_command", &args_str))
|
|
}
|
|
|
|
#[derive(Clone)]
|
|
enum Expectation {
|
|
FileCreated {
|
|
target: TargetPath,
|
|
content: &'static str,
|
|
},
|
|
FileCreatedNoExitCode {
|
|
target: TargetPath,
|
|
content: &'static str,
|
|
},
|
|
PatchApplied {
|
|
target: TargetPath,
|
|
content: &'static str,
|
|
},
|
|
FileNotCreated {
|
|
target: TargetPath,
|
|
message_contains: &'static [&'static str],
|
|
},
|
|
NetworkSuccess {
|
|
body_contains: &'static str,
|
|
},
|
|
NetworkSuccessNoExitCode {
|
|
body_contains: &'static str,
|
|
},
|
|
NetworkFailure {
|
|
expect_tag: &'static str,
|
|
},
|
|
CommandSuccess {
|
|
stdout_contains: &'static str,
|
|
},
|
|
CommandSuccessNoExitCode {
|
|
stdout_contains: &'static str,
|
|
},
|
|
CommandFailure {
|
|
output_contains: &'static str,
|
|
},
|
|
}
|
|
|
|
impl Expectation {
|
|
fn verify(&self, test: &TestCodex, result: &CommandResult) -> Result<()> {
|
|
match self {
|
|
Expectation::FileCreated { target, content } => {
|
|
let (path, _) = target.resolve_for_patch(test);
|
|
assert_eq!(
|
|
result.exit_code,
|
|
Some(0),
|
|
"expected successful exit for {path:?}"
|
|
);
|
|
assert!(
|
|
result.stdout.contains(content),
|
|
"stdout missing {content:?}: {}",
|
|
result.stdout
|
|
);
|
|
let file_contents = fs::read_to_string(&path)?;
|
|
assert!(
|
|
file_contents.contains(content),
|
|
"file contents missing {content:?}: {file_contents}"
|
|
);
|
|
let _ = fs::remove_file(path);
|
|
}
|
|
Expectation::FileCreatedNoExitCode { target, content } => {
|
|
let (path, _) = target.resolve_for_patch(test);
|
|
assert!(
|
|
result.exit_code.is_none() || result.exit_code == Some(0),
|
|
"expected no exit code for {path:?}",
|
|
);
|
|
assert!(
|
|
result.stdout.contains(content),
|
|
"stdout missing {content:?}: {}",
|
|
result.stdout
|
|
);
|
|
let file_contents = fs::read_to_string(&path)?;
|
|
assert!(
|
|
file_contents.contains(content),
|
|
"file contents missing {content:?}: {file_contents}"
|
|
);
|
|
let _ = fs::remove_file(path);
|
|
}
|
|
Expectation::PatchApplied { target, content } => {
|
|
let (path, _) = target.resolve_for_patch(test);
|
|
match result.exit_code {
|
|
Some(0) | None => {
|
|
if result.exit_code.is_none() {
|
|
assert!(
|
|
result.stdout.contains("Success."),
|
|
"patch output missing success indicator: {}",
|
|
result.stdout
|
|
);
|
|
}
|
|
}
|
|
Some(code) => panic!(
|
|
"expected successful patch exit for {:?}, got {code} with stdout {}",
|
|
path, result.stdout
|
|
),
|
|
}
|
|
let file_contents = fs::read_to_string(&path)?;
|
|
assert!(
|
|
file_contents.contains(content),
|
|
"patched file missing {content:?}: {file_contents}"
|
|
);
|
|
let _ = fs::remove_file(path);
|
|
}
|
|
Expectation::FileNotCreated {
|
|
target,
|
|
message_contains,
|
|
} => {
|
|
let (path, _) = target.resolve_for_patch(test);
|
|
assert_ne!(
|
|
result.exit_code,
|
|
Some(0),
|
|
"expected non-zero exit for {path:?}"
|
|
);
|
|
for needle in *message_contains {
|
|
if needle.contains('|') {
|
|
let options: Vec<&str> = needle.split('|').collect();
|
|
let matches_any =
|
|
options.iter().any(|option| result.stdout.contains(option));
|
|
assert!(
|
|
matches_any,
|
|
"stdout missing one of {options:?}: {}",
|
|
result.stdout
|
|
);
|
|
} else {
|
|
assert!(
|
|
result.stdout.contains(needle),
|
|
"stdout missing {needle:?}: {}",
|
|
result.stdout
|
|
);
|
|
}
|
|
}
|
|
assert!(
|
|
!path.exists(),
|
|
"command should not create {path:?}, but file exists"
|
|
);
|
|
}
|
|
Expectation::NetworkSuccess { body_contains } => {
|
|
assert_eq!(
|
|
result.exit_code,
|
|
Some(0),
|
|
"expected successful network exit: {}",
|
|
result.stdout
|
|
);
|
|
assert!(
|
|
result.stdout.contains("OK:"),
|
|
"stdout missing OK prefix: {}",
|
|
result.stdout
|
|
);
|
|
assert!(
|
|
result.stdout.contains(body_contains),
|
|
"stdout missing body text {body_contains:?}: {}",
|
|
result.stdout
|
|
);
|
|
}
|
|
Expectation::NetworkSuccessNoExitCode { body_contains } => {
|
|
assert!(
|
|
result.exit_code.is_none() || result.exit_code == Some(0),
|
|
"expected no exit code for successful network call: {}",
|
|
result.stdout
|
|
);
|
|
assert!(
|
|
result.stdout.contains("OK:"),
|
|
"stdout missing OK prefix: {}",
|
|
result.stdout
|
|
);
|
|
assert!(
|
|
result.stdout.contains(body_contains),
|
|
"stdout missing body text {body_contains:?}: {}",
|
|
result.stdout
|
|
);
|
|
}
|
|
Expectation::NetworkFailure { expect_tag } => {
|
|
assert_ne!(
|
|
result.exit_code,
|
|
Some(0),
|
|
"expected non-zero exit for network failure: {}",
|
|
result.stdout
|
|
);
|
|
assert!(
|
|
result.stdout.contains("ERR:"),
|
|
"stdout missing ERR prefix: {}",
|
|
result.stdout
|
|
);
|
|
assert!(
|
|
result.stdout.contains(expect_tag),
|
|
"stdout missing expected tag {expect_tag:?}: {}",
|
|
result.stdout
|
|
);
|
|
}
|
|
Expectation::CommandSuccess { stdout_contains } => {
|
|
assert_eq!(
|
|
result.exit_code,
|
|
Some(0),
|
|
"expected successful trusted command exit: {}",
|
|
result.stdout
|
|
);
|
|
assert!(
|
|
result.stdout.contains(stdout_contains),
|
|
"trusted command stdout missing {stdout_contains:?}: {}",
|
|
result.stdout
|
|
);
|
|
}
|
|
Expectation::CommandSuccessNoExitCode { stdout_contains } => {
|
|
assert!(
|
|
result.exit_code.is_none() || result.exit_code == Some(0),
|
|
"expected no exit code for trusted command: {}",
|
|
result.stdout
|
|
);
|
|
assert!(
|
|
result.stdout.contains(stdout_contains),
|
|
"trusted command stdout missing {stdout_contains:?}: {}",
|
|
result.stdout
|
|
);
|
|
}
|
|
Expectation::CommandFailure { output_contains } => {
|
|
assert_ne!(
|
|
result.exit_code,
|
|
Some(0),
|
|
"expected non-zero exit for command failure: {}",
|
|
result.stdout
|
|
);
|
|
assert!(
|
|
result.stdout.contains(output_contains),
|
|
"command failure stderr missing {output_contains:?}: {}",
|
|
result.stdout
|
|
);
|
|
}
|
|
}
|
|
Ok(())
|
|
}
|
|
}
|
|
|
|
#[derive(Clone)]
|
|
enum Outcome {
|
|
Auto,
|
|
ExecApproval {
|
|
decision: ReviewDecision,
|
|
expected_reason: Option<&'static str>,
|
|
},
|
|
PatchApproval {
|
|
decision: ReviewDecision,
|
|
expected_reason: Option<&'static str>,
|
|
},
|
|
}
|
|
|
|
#[derive(Clone)]
|
|
struct ScenarioSpec {
|
|
name: &'static str,
|
|
approval_policy: AskForApproval,
|
|
sandbox_policy: SandboxPolicy,
|
|
action: ActionKind,
|
|
sandbox_permissions: SandboxPermissions,
|
|
features: Vec<Feature>,
|
|
model_override: Option<&'static str>,
|
|
outcome: Outcome,
|
|
expectation: Expectation,
|
|
}
|
|
|
|
struct CommandResult {
|
|
exit_code: Option<i64>,
|
|
stdout: String,
|
|
}
|
|
|
|
async fn submit_turn(
|
|
test: &TestCodex,
|
|
prompt: &str,
|
|
approval_policy: AskForApproval,
|
|
sandbox_policy: SandboxPolicy,
|
|
) -> Result<()> {
|
|
let session_model = test.session_configured.model.clone();
|
|
|
|
test.codex
|
|
.submit(Op::UserTurn {
|
|
items: vec![UserInput::Text {
|
|
text: prompt.into(),
|
|
text_elements: Vec::new(),
|
|
}],
|
|
final_output_json_schema: None,
|
|
cwd: test.cwd.path().to_path_buf(),
|
|
approval_policy,
|
|
sandbox_policy,
|
|
model: session_model,
|
|
effort: None,
|
|
summary: None,
|
|
service_tier: None,
|
|
collaboration_mode: None,
|
|
personality: None,
|
|
})
|
|
.await?;
|
|
|
|
Ok(())
|
|
}
|
|
|
|
fn parse_result(item: &Value) -> CommandResult {
|
|
let output_str = item
|
|
.get("output")
|
|
.and_then(Value::as_str)
|
|
.expect("shell output payload");
|
|
match serde_json::from_str::<Value>(output_str) {
|
|
Ok(parsed) => {
|
|
let exit_code = parsed["metadata"]["exit_code"].as_i64();
|
|
let stdout = parsed["output"].as_str().unwrap_or_default().to_string();
|
|
CommandResult { exit_code, stdout }
|
|
}
|
|
Err(_) => {
|
|
let structured = Regex::new(r"(?s)^Exit code:\s*(-?\d+).*?Output:\n(.*)$").unwrap();
|
|
let regex =
|
|
Regex::new(r"(?s)^.*?Process exited with code (\d+)\n.*?Output:\n(.*)$").unwrap();
|
|
// parse freeform output
|
|
if let Some(captures) = structured.captures(output_str) {
|
|
let exit_code = captures.get(1).unwrap().as_str().parse::<i64>().unwrap();
|
|
let output = captures.get(2).unwrap().as_str();
|
|
CommandResult {
|
|
exit_code: Some(exit_code),
|
|
stdout: output.to_string(),
|
|
}
|
|
} else if let Some(captures) = regex.captures(output_str) {
|
|
let exit_code = captures.get(1).unwrap().as_str().parse::<i64>().unwrap();
|
|
let output = captures.get(2).unwrap().as_str();
|
|
CommandResult {
|
|
exit_code: Some(exit_code),
|
|
stdout: output.to_string(),
|
|
}
|
|
} else {
|
|
CommandResult {
|
|
exit_code: None,
|
|
stdout: output_str.to_string(),
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
async fn expect_exec_approval(
|
|
test: &TestCodex,
|
|
expected_command: &str,
|
|
) -> ExecApprovalRequestEvent {
|
|
let event = wait_for_event(&test.codex, |event| {
|
|
matches!(
|
|
event,
|
|
EventMsg::ExecApprovalRequest(_) | EventMsg::TurnComplete(_)
|
|
)
|
|
})
|
|
.await;
|
|
|
|
match event {
|
|
EventMsg::ExecApprovalRequest(approval) => {
|
|
let last_arg = approval
|
|
.command
|
|
.last()
|
|
.map(std::string::String::as_str)
|
|
.unwrap_or_default();
|
|
assert_eq!(last_arg, expected_command);
|
|
approval
|
|
}
|
|
EventMsg::TurnComplete(_) => panic!("expected approval request before completion"),
|
|
other => panic!("unexpected event: {other:?}"),
|
|
}
|
|
}
|
|
|
|
async fn expect_patch_approval(
|
|
test: &TestCodex,
|
|
expected_call_id: &str,
|
|
) -> ApplyPatchApprovalRequestEvent {
|
|
let event = wait_for_event(&test.codex, |event| {
|
|
matches!(
|
|
event,
|
|
EventMsg::ApplyPatchApprovalRequest(_) | EventMsg::TurnComplete(_)
|
|
)
|
|
})
|
|
.await;
|
|
|
|
match event {
|
|
EventMsg::ApplyPatchApprovalRequest(approval) => {
|
|
assert_eq!(approval.call_id, expected_call_id);
|
|
approval
|
|
}
|
|
EventMsg::TurnComplete(_) => panic!("expected patch approval request before completion"),
|
|
other => panic!("unexpected event: {other:?}"),
|
|
}
|
|
}
|
|
|
|
async fn wait_for_completion_without_approval(test: &TestCodex) {
|
|
let event = wait_for_event(&test.codex, |event| {
|
|
matches!(
|
|
event,
|
|
EventMsg::ExecApprovalRequest(_) | EventMsg::TurnComplete(_)
|
|
)
|
|
})
|
|
.await;
|
|
|
|
match event {
|
|
EventMsg::TurnComplete(_) => {}
|
|
EventMsg::ExecApprovalRequest(event) => {
|
|
panic!("unexpected approval request: {:?}", event.command)
|
|
}
|
|
other => panic!("unexpected event: {other:?}"),
|
|
}
|
|
}
|
|
|
|
async fn wait_for_completion(test: &TestCodex) {
|
|
wait_for_event(&test.codex, |event| {
|
|
matches!(event, EventMsg::TurnComplete(_))
|
|
})
|
|
.await;
|
|
}
|
|
|
|
fn scenarios() -> Vec<ScenarioSpec> {
|
|
use AskForApproval::*;
|
|
|
|
let workspace_write = |network_access| SandboxPolicy::WorkspaceWrite {
|
|
writable_roots: vec![],
|
|
read_only_access: Default::default(),
|
|
network_access,
|
|
exclude_tmpdir_env_var: false,
|
|
exclude_slash_tmp: false,
|
|
};
|
|
|
|
vec![
|
|
ScenarioSpec {
|
|
name: "danger_full_access_on_request_allows_outside_write",
|
|
approval_policy: OnRequest,
|
|
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
|
action: ActionKind::WriteFile {
|
|
target: TargetPath::OutsideWorkspace("dfa_on_request.txt"),
|
|
content: "danger-on-request",
|
|
},
|
|
sandbox_permissions: SandboxPermissions::UseDefault,
|
|
features: vec![],
|
|
model_override: Some("gpt-5"),
|
|
outcome: Outcome::Auto,
|
|
expectation: Expectation::FileCreated {
|
|
target: TargetPath::OutsideWorkspace("dfa_on_request.txt"),
|
|
content: "danger-on-request",
|
|
},
|
|
},
|
|
ScenarioSpec {
|
|
name: "danger_full_access_on_request_allows_outside_write_gpt_5_1_no_exit",
|
|
approval_policy: OnRequest,
|
|
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
|
action: ActionKind::WriteFile {
|
|
target: TargetPath::OutsideWorkspace("dfa_on_request_5_1.txt"),
|
|
content: "danger-on-request",
|
|
},
|
|
sandbox_permissions: SandboxPermissions::UseDefault,
|
|
features: vec![],
|
|
model_override: Some("gpt-5.1"),
|
|
outcome: Outcome::Auto,
|
|
expectation: Expectation::FileCreated {
|
|
target: TargetPath::OutsideWorkspace("dfa_on_request_5_1.txt"),
|
|
content: "danger-on-request",
|
|
},
|
|
},
|
|
ScenarioSpec {
|
|
name: "danger_full_access_on_request_allows_network",
|
|
approval_policy: OnRequest,
|
|
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
|
action: ActionKind::FetchUrlNoProxy {
|
|
endpoint: "/dfa/network",
|
|
response_body: "danger-network-ok",
|
|
},
|
|
sandbox_permissions: SandboxPermissions::UseDefault,
|
|
features: vec![],
|
|
model_override: Some("gpt-5"),
|
|
outcome: Outcome::Auto,
|
|
expectation: Expectation::NetworkSuccess {
|
|
body_contains: "danger-network-ok",
|
|
},
|
|
},
|
|
ScenarioSpec {
|
|
name: "danger_full_access_on_request_allows_network_gpt_5_1_no_exit",
|
|
approval_policy: OnRequest,
|
|
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
|
action: ActionKind::FetchUrlNoProxy {
|
|
endpoint: "/dfa/network",
|
|
response_body: "danger-network-ok",
|
|
},
|
|
sandbox_permissions: SandboxPermissions::UseDefault,
|
|
features: vec![],
|
|
model_override: Some("gpt-5.1"),
|
|
outcome: Outcome::Auto,
|
|
expectation: Expectation::NetworkSuccessNoExitCode {
|
|
body_contains: "danger-network-ok",
|
|
},
|
|
},
|
|
ScenarioSpec {
|
|
name: "trusted_command_unless_trusted_runs_without_prompt",
|
|
approval_policy: UnlessTrusted,
|
|
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
|
action: ActionKind::RunCommand {
|
|
command: "echo trusted-unless",
|
|
},
|
|
sandbox_permissions: SandboxPermissions::UseDefault,
|
|
features: vec![],
|
|
model_override: Some("gpt-5"),
|
|
outcome: Outcome::Auto,
|
|
expectation: Expectation::CommandSuccess {
|
|
stdout_contains: "trusted-unless",
|
|
},
|
|
},
|
|
ScenarioSpec {
|
|
name: "trusted_command_unless_trusted_runs_without_prompt_gpt_5_1_no_exit",
|
|
approval_policy: UnlessTrusted,
|
|
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
|
action: ActionKind::RunCommand {
|
|
command: "echo trusted-unless",
|
|
},
|
|
sandbox_permissions: SandboxPermissions::UseDefault,
|
|
features: vec![],
|
|
model_override: Some("gpt-5.1"),
|
|
outcome: Outcome::Auto,
|
|
expectation: Expectation::CommandSuccessNoExitCode {
|
|
stdout_contains: "trusted-unless",
|
|
},
|
|
},
|
|
ScenarioSpec {
|
|
name: "cat_redirect_unless_trusted_requires_approval",
|
|
approval_policy: UnlessTrusted,
|
|
sandbox_policy: workspace_write(false),
|
|
action: ActionKind::RunCommand {
|
|
command: r#"cat < "hello" > /var/test.txt"#,
|
|
},
|
|
sandbox_permissions: SandboxPermissions::UseDefault,
|
|
features: vec![],
|
|
model_override: Some("gpt-5"),
|
|
outcome: Outcome::ExecApproval {
|
|
decision: ReviewDecision::Denied,
|
|
expected_reason: None,
|
|
},
|
|
expectation: Expectation::CommandFailure {
|
|
output_contains: "rejected by user",
|
|
},
|
|
},
|
|
ScenarioSpec {
|
|
name: "cat_redirect_on_request_requires_approval",
|
|
approval_policy: OnRequest,
|
|
sandbox_policy: workspace_write(false),
|
|
action: ActionKind::RunCommand {
|
|
command: r#"cat < "hello" > /var/test.txt"#,
|
|
},
|
|
sandbox_permissions: SandboxPermissions::RequireEscalated,
|
|
features: vec![],
|
|
model_override: Some("gpt-5"),
|
|
outcome: Outcome::ExecApproval {
|
|
decision: ReviewDecision::Denied,
|
|
expected_reason: None,
|
|
},
|
|
expectation: Expectation::CommandFailure {
|
|
output_contains: "rejected by user",
|
|
},
|
|
},
|
|
ScenarioSpec {
|
|
name: "danger_full_access_on_failure_allows_outside_write",
|
|
approval_policy: OnFailure,
|
|
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
|
action: ActionKind::WriteFile {
|
|
target: TargetPath::OutsideWorkspace("dfa_on_failure.txt"),
|
|
content: "danger-on-failure",
|
|
},
|
|
sandbox_permissions: SandboxPermissions::UseDefault,
|
|
features: vec![],
|
|
model_override: Some("gpt-5"),
|
|
outcome: Outcome::Auto,
|
|
expectation: Expectation::FileCreated {
|
|
target: TargetPath::OutsideWorkspace("dfa_on_failure.txt"),
|
|
content: "danger-on-failure",
|
|
},
|
|
},
|
|
ScenarioSpec {
|
|
name: "danger_full_access_on_failure_allows_outside_write_gpt_5_1_no_exit",
|
|
approval_policy: OnFailure,
|
|
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
|
action: ActionKind::WriteFile {
|
|
target: TargetPath::OutsideWorkspace("dfa_on_failure_5_1.txt"),
|
|
content: "danger-on-failure",
|
|
},
|
|
sandbox_permissions: SandboxPermissions::UseDefault,
|
|
features: vec![],
|
|
model_override: Some("gpt-5.1"),
|
|
outcome: Outcome::Auto,
|
|
expectation: Expectation::FileCreatedNoExitCode {
|
|
target: TargetPath::OutsideWorkspace("dfa_on_failure_5_1.txt"),
|
|
content: "danger-on-failure",
|
|
},
|
|
},
|
|
ScenarioSpec {
|
|
name: "danger_full_access_unless_trusted_requests_approval",
|
|
approval_policy: UnlessTrusted,
|
|
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
|
action: ActionKind::WriteFile {
|
|
target: TargetPath::OutsideWorkspace("dfa_unless_trusted.txt"),
|
|
content: "danger-unless-trusted",
|
|
},
|
|
sandbox_permissions: SandboxPermissions::UseDefault,
|
|
features: vec![],
|
|
model_override: Some("gpt-5"),
|
|
outcome: Outcome::ExecApproval {
|
|
decision: ReviewDecision::Approved,
|
|
expected_reason: None,
|
|
},
|
|
expectation: Expectation::FileCreated {
|
|
target: TargetPath::OutsideWorkspace("dfa_unless_trusted.txt"),
|
|
content: "danger-unless-trusted",
|
|
},
|
|
},
|
|
ScenarioSpec {
|
|
name: "danger_full_access_unless_trusted_requests_approval_gpt_5_1_no_exit",
|
|
approval_policy: UnlessTrusted,
|
|
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
|
action: ActionKind::WriteFile {
|
|
target: TargetPath::OutsideWorkspace("dfa_unless_trusted_5_1.txt"),
|
|
content: "danger-unless-trusted",
|
|
},
|
|
sandbox_permissions: SandboxPermissions::UseDefault,
|
|
features: vec![],
|
|
model_override: Some("gpt-5.1"),
|
|
outcome: Outcome::ExecApproval {
|
|
decision: ReviewDecision::Approved,
|
|
expected_reason: None,
|
|
},
|
|
expectation: Expectation::FileCreatedNoExitCode {
|
|
target: TargetPath::OutsideWorkspace("dfa_unless_trusted_5_1.txt"),
|
|
content: "danger-unless-trusted",
|
|
},
|
|
},
|
|
ScenarioSpec {
|
|
name: "danger_full_access_never_allows_outside_write",
|
|
approval_policy: Never,
|
|
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
|
action: ActionKind::WriteFile {
|
|
target: TargetPath::OutsideWorkspace("dfa_never.txt"),
|
|
content: "danger-never",
|
|
},
|
|
sandbox_permissions: SandboxPermissions::UseDefault,
|
|
features: vec![],
|
|
model_override: Some("gpt-5"),
|
|
outcome: Outcome::Auto,
|
|
expectation: Expectation::FileCreated {
|
|
target: TargetPath::OutsideWorkspace("dfa_never.txt"),
|
|
content: "danger-never",
|
|
},
|
|
},
|
|
ScenarioSpec {
|
|
name: "danger_full_access_never_allows_outside_write_gpt_5_1_no_exit",
|
|
approval_policy: Never,
|
|
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
|
action: ActionKind::WriteFile {
|
|
target: TargetPath::OutsideWorkspace("dfa_never_5_1.txt"),
|
|
content: "danger-never",
|
|
},
|
|
sandbox_permissions: SandboxPermissions::UseDefault,
|
|
features: vec![],
|
|
model_override: Some("gpt-5.1"),
|
|
outcome: Outcome::Auto,
|
|
expectation: Expectation::FileCreatedNoExitCode {
|
|
target: TargetPath::OutsideWorkspace("dfa_never_5_1.txt"),
|
|
content: "danger-never",
|
|
},
|
|
},
|
|
ScenarioSpec {
|
|
name: "read_only_on_request_requires_approval",
|
|
approval_policy: OnRequest,
|
|
sandbox_policy: SandboxPolicy::new_read_only_policy(),
|
|
action: ActionKind::WriteFile {
|
|
target: TargetPath::Workspace("ro_on_request.txt"),
|
|
content: "read-only-approval",
|
|
},
|
|
sandbox_permissions: SandboxPermissions::RequireEscalated,
|
|
features: vec![],
|
|
model_override: Some("gpt-5"),
|
|
outcome: Outcome::ExecApproval {
|
|
decision: ReviewDecision::Approved,
|
|
expected_reason: None,
|
|
},
|
|
expectation: Expectation::FileCreated {
|
|
target: TargetPath::Workspace("ro_on_request.txt"),
|
|
content: "read-only-approval",
|
|
},
|
|
},
|
|
ScenarioSpec {
|
|
name: "read_only_on_request_requires_approval_gpt_5_1_no_exit",
|
|
approval_policy: OnRequest,
|
|
sandbox_policy: SandboxPolicy::new_read_only_policy(),
|
|
action: ActionKind::WriteFile {
|
|
target: TargetPath::Workspace("ro_on_request_5_1.txt"),
|
|
content: "read-only-approval",
|
|
},
|
|
sandbox_permissions: SandboxPermissions::RequireEscalated,
|
|
features: vec![],
|
|
model_override: Some("gpt-5.1"),
|
|
outcome: Outcome::ExecApproval {
|
|
decision: ReviewDecision::Approved,
|
|
expected_reason: None,
|
|
},
|
|
expectation: Expectation::FileCreatedNoExitCode {
|
|
target: TargetPath::Workspace("ro_on_request_5_1.txt"),
|
|
content: "read-only-approval",
|
|
},
|
|
},
|
|
ScenarioSpec {
|
|
name: "trusted_command_on_request_read_only_runs_without_prompt",
|
|
approval_policy: OnRequest,
|
|
sandbox_policy: SandboxPolicy::new_read_only_policy(),
|
|
action: ActionKind::RunCommand {
|
|
command: "echo trusted-read-only",
|
|
},
|
|
sandbox_permissions: SandboxPermissions::UseDefault,
|
|
features: vec![],
|
|
model_override: Some("gpt-5"),
|
|
outcome: Outcome::Auto,
|
|
expectation: Expectation::CommandSuccess {
|
|
stdout_contains: "trusted-read-only",
|
|
},
|
|
},
|
|
ScenarioSpec {
|
|
name: "trusted_command_on_request_read_only_runs_without_prompt_gpt_5_1_no_exit",
|
|
approval_policy: OnRequest,
|
|
sandbox_policy: SandboxPolicy::new_read_only_policy(),
|
|
action: ActionKind::RunCommand {
|
|
command: "echo trusted-read-only",
|
|
},
|
|
sandbox_permissions: SandboxPermissions::UseDefault,
|
|
features: vec![],
|
|
model_override: Some("gpt-5.1"),
|
|
outcome: Outcome::Auto,
|
|
expectation: Expectation::CommandSuccessNoExitCode {
|
|
stdout_contains: "trusted-read-only",
|
|
},
|
|
},
|
|
ScenarioSpec {
|
|
name: "read_only_on_request_blocks_network",
|
|
approval_policy: OnRequest,
|
|
sandbox_policy: SandboxPolicy::new_read_only_policy(),
|
|
action: ActionKind::FetchUrl {
|
|
endpoint: "/ro/network-blocked",
|
|
response_body: "should-not-see",
|
|
},
|
|
sandbox_permissions: SandboxPermissions::UseDefault,
|
|
features: vec![],
|
|
model_override: None,
|
|
outcome: Outcome::Auto,
|
|
expectation: Expectation::NetworkFailure { expect_tag: "ERR:" },
|
|
},
|
|
ScenarioSpec {
|
|
name: "read_only_on_request_denied_blocks_execution",
|
|
approval_policy: OnRequest,
|
|
sandbox_policy: SandboxPolicy::new_read_only_policy(),
|
|
action: ActionKind::WriteFile {
|
|
target: TargetPath::Workspace("ro_on_request_denied.txt"),
|
|
content: "should-not-write",
|
|
},
|
|
sandbox_permissions: SandboxPermissions::RequireEscalated,
|
|
features: vec![],
|
|
model_override: None,
|
|
outcome: Outcome::ExecApproval {
|
|
decision: ReviewDecision::Denied,
|
|
expected_reason: None,
|
|
},
|
|
expectation: Expectation::FileNotCreated {
|
|
target: TargetPath::Workspace("ro_on_request_denied.txt"),
|
|
message_contains: &["exec command rejected by user"],
|
|
},
|
|
},
|
|
#[cfg(not(target_os = "linux"))] // TODO (pakrym): figure out why linux behaves differently
|
|
ScenarioSpec {
|
|
name: "read_only_on_failure_escalates_after_sandbox_error",
|
|
approval_policy: OnFailure,
|
|
sandbox_policy: SandboxPolicy::new_read_only_policy(),
|
|
action: ActionKind::WriteFile {
|
|
target: TargetPath::Workspace("ro_on_failure.txt"),
|
|
content: "read-only-on-failure",
|
|
},
|
|
sandbox_permissions: SandboxPermissions::UseDefault,
|
|
features: vec![],
|
|
model_override: Some("gpt-5"),
|
|
outcome: Outcome::ExecApproval {
|
|
decision: ReviewDecision::Approved,
|
|
expected_reason: Some("command failed; retry without sandbox?"),
|
|
},
|
|
expectation: Expectation::FileCreated {
|
|
target: TargetPath::Workspace("ro_on_failure.txt"),
|
|
content: "read-only-on-failure",
|
|
},
|
|
},
|
|
#[cfg(not(target_os = "linux"))]
|
|
ScenarioSpec {
|
|
name: "read_only_on_failure_escalates_after_sandbox_error_gpt_5_1_no_exit",
|
|
approval_policy: OnFailure,
|
|
sandbox_policy: SandboxPolicy::new_read_only_policy(),
|
|
action: ActionKind::WriteFile {
|
|
target: TargetPath::Workspace("ro_on_failure_5_1.txt"),
|
|
content: "read-only-on-failure",
|
|
},
|
|
sandbox_permissions: SandboxPermissions::UseDefault,
|
|
features: vec![],
|
|
model_override: Some("gpt-5.1"),
|
|
outcome: Outcome::ExecApproval {
|
|
decision: ReviewDecision::Approved,
|
|
expected_reason: Some("command failed; retry without sandbox?"),
|
|
},
|
|
expectation: Expectation::FileCreatedNoExitCode {
|
|
target: TargetPath::Workspace("ro_on_failure_5_1.txt"),
|
|
content: "read-only-on-failure",
|
|
},
|
|
},
|
|
ScenarioSpec {
|
|
name: "read_only_on_request_network_escalates_when_approved",
|
|
approval_policy: OnRequest,
|
|
sandbox_policy: SandboxPolicy::new_read_only_policy(),
|
|
action: ActionKind::FetchUrl {
|
|
endpoint: "/ro/network-approved",
|
|
response_body: "read-only-network-ok",
|
|
},
|
|
sandbox_permissions: SandboxPermissions::RequireEscalated,
|
|
features: vec![],
|
|
model_override: Some("gpt-5"),
|
|
outcome: Outcome::ExecApproval {
|
|
decision: ReviewDecision::Approved,
|
|
expected_reason: None,
|
|
},
|
|
expectation: Expectation::NetworkSuccess {
|
|
body_contains: "read-only-network-ok",
|
|
},
|
|
},
|
|
ScenarioSpec {
|
|
name: "read_only_on_request_network_escalates_when_approved_gpt_5_1_no_exit",
|
|
approval_policy: OnRequest,
|
|
sandbox_policy: SandboxPolicy::new_read_only_policy(),
|
|
action: ActionKind::FetchUrl {
|
|
endpoint: "/ro/network-approved",
|
|
response_body: "read-only-network-ok",
|
|
},
|
|
sandbox_permissions: SandboxPermissions::RequireEscalated,
|
|
features: vec![],
|
|
model_override: Some("gpt-5.1"),
|
|
outcome: Outcome::ExecApproval {
|
|
decision: ReviewDecision::Approved,
|
|
expected_reason: None,
|
|
},
|
|
expectation: Expectation::NetworkSuccessNoExitCode {
|
|
body_contains: "read-only-network-ok",
|
|
},
|
|
},
|
|
ScenarioSpec {
|
|
name: "apply_patch_shell_command_requires_patch_approval",
|
|
approval_policy: UnlessTrusted,
|
|
sandbox_policy: workspace_write(false),
|
|
action: ActionKind::ApplyPatchShell {
|
|
target: TargetPath::Workspace("apply_patch_shell.txt"),
|
|
content: "shell-apply-patch",
|
|
},
|
|
sandbox_permissions: SandboxPermissions::UseDefault,
|
|
features: vec![],
|
|
model_override: None,
|
|
outcome: Outcome::PatchApproval {
|
|
decision: ReviewDecision::Approved,
|
|
expected_reason: None,
|
|
},
|
|
expectation: Expectation::PatchApplied {
|
|
target: TargetPath::Workspace("apply_patch_shell.txt"),
|
|
content: "shell-apply-patch",
|
|
},
|
|
},
|
|
ScenarioSpec {
|
|
name: "apply_patch_function_auto_inside_workspace",
|
|
approval_policy: OnRequest,
|
|
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
|
action: ActionKind::ApplyPatchFunction {
|
|
target: TargetPath::Workspace("apply_patch_function.txt"),
|
|
content: "function-apply-patch",
|
|
},
|
|
sandbox_permissions: SandboxPermissions::UseDefault,
|
|
features: vec![],
|
|
model_override: Some("gpt-5.1-codex"),
|
|
outcome: Outcome::Auto,
|
|
expectation: Expectation::PatchApplied {
|
|
target: TargetPath::Workspace("apply_patch_function.txt"),
|
|
content: "function-apply-patch",
|
|
},
|
|
},
|
|
ScenarioSpec {
|
|
name: "apply_patch_function_danger_allows_outside_workspace",
|
|
approval_policy: OnRequest,
|
|
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
|
action: ActionKind::ApplyPatchFunction {
|
|
target: TargetPath::OutsideWorkspace("apply_patch_function_danger.txt"),
|
|
content: "function-patch-danger",
|
|
},
|
|
sandbox_permissions: SandboxPermissions::UseDefault,
|
|
features: vec![Feature::ApplyPatchFreeform],
|
|
model_override: Some("gpt-5.1-codex"),
|
|
outcome: Outcome::Auto,
|
|
expectation: Expectation::PatchApplied {
|
|
target: TargetPath::OutsideWorkspace("apply_patch_function_danger.txt"),
|
|
content: "function-patch-danger",
|
|
},
|
|
},
|
|
ScenarioSpec {
|
|
name: "apply_patch_function_outside_requires_patch_approval",
|
|
approval_policy: OnRequest,
|
|
sandbox_policy: workspace_write(false),
|
|
action: ActionKind::ApplyPatchFunction {
|
|
target: TargetPath::OutsideWorkspace("apply_patch_function_outside.txt"),
|
|
content: "function-patch-outside",
|
|
},
|
|
sandbox_permissions: SandboxPermissions::UseDefault,
|
|
features: vec![],
|
|
model_override: Some("gpt-5.1-codex"),
|
|
outcome: Outcome::PatchApproval {
|
|
decision: ReviewDecision::Approved,
|
|
expected_reason: None,
|
|
},
|
|
expectation: Expectation::PatchApplied {
|
|
target: TargetPath::OutsideWorkspace("apply_patch_function_outside.txt"),
|
|
content: "function-patch-outside",
|
|
},
|
|
},
|
|
ScenarioSpec {
|
|
name: "apply_patch_function_outside_denied_blocks_patch",
|
|
approval_policy: OnRequest,
|
|
sandbox_policy: workspace_write(false),
|
|
action: ActionKind::ApplyPatchFunction {
|
|
target: TargetPath::OutsideWorkspace("apply_patch_function_outside_denied.txt"),
|
|
content: "function-patch-outside-denied",
|
|
},
|
|
sandbox_permissions: SandboxPermissions::UseDefault,
|
|
features: vec![],
|
|
model_override: Some("gpt-5.1-codex"),
|
|
outcome: Outcome::PatchApproval {
|
|
decision: ReviewDecision::Denied,
|
|
expected_reason: None,
|
|
},
|
|
expectation: Expectation::FileNotCreated {
|
|
target: TargetPath::OutsideWorkspace("apply_patch_function_outside_denied.txt"),
|
|
message_contains: &["patch rejected by user"],
|
|
},
|
|
},
|
|
ScenarioSpec {
|
|
name: "apply_patch_shell_command_outside_requires_patch_approval",
|
|
approval_policy: OnRequest,
|
|
sandbox_policy: workspace_write(false),
|
|
action: ActionKind::ApplyPatchShell {
|
|
target: TargetPath::OutsideWorkspace("apply_patch_shell_outside.txt"),
|
|
content: "shell-patch-outside",
|
|
},
|
|
sandbox_permissions: SandboxPermissions::UseDefault,
|
|
features: vec![],
|
|
model_override: None,
|
|
outcome: Outcome::PatchApproval {
|
|
decision: ReviewDecision::Approved,
|
|
expected_reason: None,
|
|
},
|
|
expectation: Expectation::PatchApplied {
|
|
target: TargetPath::OutsideWorkspace("apply_patch_shell_outside.txt"),
|
|
content: "shell-patch-outside",
|
|
},
|
|
},
|
|
ScenarioSpec {
|
|
name: "apply_patch_function_unless_trusted_requires_patch_approval",
|
|
approval_policy: UnlessTrusted,
|
|
sandbox_policy: workspace_write(false),
|
|
action: ActionKind::ApplyPatchFunction {
|
|
target: TargetPath::Workspace("apply_patch_function_unless_trusted.txt"),
|
|
content: "function-patch-unless-trusted",
|
|
},
|
|
sandbox_permissions: SandboxPermissions::UseDefault,
|
|
features: vec![],
|
|
model_override: Some("gpt-5.1-codex"),
|
|
outcome: Outcome::PatchApproval {
|
|
decision: ReviewDecision::Approved,
|
|
expected_reason: None,
|
|
},
|
|
expectation: Expectation::PatchApplied {
|
|
target: TargetPath::Workspace("apply_patch_function_unless_trusted.txt"),
|
|
content: "function-patch-unless-trusted",
|
|
},
|
|
},
|
|
ScenarioSpec {
|
|
name: "apply_patch_function_never_rejects_outside_workspace",
|
|
approval_policy: Never,
|
|
sandbox_policy: workspace_write(false),
|
|
action: ActionKind::ApplyPatchFunction {
|
|
target: TargetPath::OutsideWorkspace("apply_patch_function_never.txt"),
|
|
content: "function-patch-never",
|
|
},
|
|
sandbox_permissions: SandboxPermissions::UseDefault,
|
|
features: vec![],
|
|
model_override: Some("gpt-5.1-codex"),
|
|
outcome: Outcome::Auto,
|
|
expectation: Expectation::FileNotCreated {
|
|
target: TargetPath::OutsideWorkspace("apply_patch_function_never.txt"),
|
|
message_contains: &[
|
|
"patch rejected: writing outside of the project; rejected by user approval settings",
|
|
],
|
|
},
|
|
},
|
|
ScenarioSpec {
|
|
name: "read_only_unless_trusted_requires_approval",
|
|
approval_policy: UnlessTrusted,
|
|
sandbox_policy: SandboxPolicy::new_read_only_policy(),
|
|
action: ActionKind::WriteFile {
|
|
target: TargetPath::Workspace("ro_unless_trusted.txt"),
|
|
content: "read-only-unless-trusted",
|
|
},
|
|
sandbox_permissions: SandboxPermissions::UseDefault,
|
|
features: vec![],
|
|
model_override: Some("gpt-5"),
|
|
outcome: Outcome::ExecApproval {
|
|
decision: ReviewDecision::Approved,
|
|
expected_reason: None,
|
|
},
|
|
expectation: Expectation::FileCreated {
|
|
target: TargetPath::Workspace("ro_unless_trusted.txt"),
|
|
content: "read-only-unless-trusted",
|
|
},
|
|
},
|
|
ScenarioSpec {
|
|
name: "read_only_unless_trusted_requires_approval_gpt_5_1_no_exit",
|
|
approval_policy: UnlessTrusted,
|
|
sandbox_policy: SandboxPolicy::new_read_only_policy(),
|
|
action: ActionKind::WriteFile {
|
|
target: TargetPath::Workspace("ro_unless_trusted_5_1.txt"),
|
|
content: "read-only-unless-trusted",
|
|
},
|
|
sandbox_permissions: SandboxPermissions::UseDefault,
|
|
features: vec![],
|
|
model_override: Some("gpt-5.1"),
|
|
outcome: Outcome::ExecApproval {
|
|
decision: ReviewDecision::Approved,
|
|
expected_reason: None,
|
|
},
|
|
expectation: Expectation::FileCreatedNoExitCode {
|
|
target: TargetPath::Workspace("ro_unless_trusted_5_1.txt"),
|
|
content: "read-only-unless-trusted",
|
|
},
|
|
},
|
|
ScenarioSpec {
|
|
name: "read_only_never_reports_sandbox_failure",
|
|
approval_policy: Never,
|
|
sandbox_policy: SandboxPolicy::new_read_only_policy(),
|
|
action: ActionKind::WriteFile {
|
|
target: TargetPath::Workspace("ro_never.txt"),
|
|
content: "read-only-never",
|
|
},
|
|
sandbox_permissions: SandboxPermissions::UseDefault,
|
|
features: vec![],
|
|
model_override: None,
|
|
outcome: Outcome::Auto,
|
|
expectation: Expectation::FileNotCreated {
|
|
target: TargetPath::Workspace("ro_never.txt"),
|
|
message_contains: if cfg!(target_os = "linux") {
|
|
&["Permission denied|Read-only file system"]
|
|
} else {
|
|
&[
|
|
"Permission denied|Operation not permitted|operation not permitted|\
|
|
Read-only file system",
|
|
]
|
|
},
|
|
},
|
|
},
|
|
ScenarioSpec {
|
|
name: "trusted_command_never_runs_without_prompt",
|
|
approval_policy: Never,
|
|
sandbox_policy: SandboxPolicy::new_read_only_policy(),
|
|
action: ActionKind::RunCommand {
|
|
command: "echo trusted-never",
|
|
},
|
|
sandbox_permissions: SandboxPermissions::UseDefault,
|
|
features: vec![],
|
|
model_override: Some("gpt-5"),
|
|
outcome: Outcome::Auto,
|
|
expectation: Expectation::CommandSuccess {
|
|
stdout_contains: "trusted-never",
|
|
},
|
|
},
|
|
ScenarioSpec {
|
|
name: "workspace_write_on_request_allows_workspace_write",
|
|
approval_policy: OnRequest,
|
|
sandbox_policy: workspace_write(false),
|
|
action: ActionKind::WriteFile {
|
|
target: TargetPath::Workspace("ww_on_request.txt"),
|
|
content: "workspace-on-request",
|
|
},
|
|
sandbox_permissions: SandboxPermissions::UseDefault,
|
|
features: vec![],
|
|
model_override: Some("gpt-5"),
|
|
outcome: Outcome::Auto,
|
|
expectation: Expectation::FileCreated {
|
|
target: TargetPath::Workspace("ww_on_request.txt"),
|
|
content: "workspace-on-request",
|
|
},
|
|
},
|
|
ScenarioSpec {
|
|
name: "workspace_write_network_disabled_blocks_network",
|
|
approval_policy: OnRequest,
|
|
sandbox_policy: workspace_write(false),
|
|
action: ActionKind::FetchUrl {
|
|
endpoint: "/ww/network-blocked",
|
|
response_body: "workspace-network-blocked",
|
|
},
|
|
sandbox_permissions: SandboxPermissions::UseDefault,
|
|
features: vec![],
|
|
model_override: None,
|
|
outcome: Outcome::Auto,
|
|
expectation: Expectation::NetworkFailure { expect_tag: "ERR:" },
|
|
},
|
|
ScenarioSpec {
|
|
name: "workspace_write_on_request_requires_approval_outside_workspace",
|
|
approval_policy: OnRequest,
|
|
sandbox_policy: workspace_write(false),
|
|
action: ActionKind::WriteFile {
|
|
target: TargetPath::OutsideWorkspace("ww_on_request_outside.txt"),
|
|
content: "workspace-on-request-outside",
|
|
},
|
|
sandbox_permissions: SandboxPermissions::RequireEscalated,
|
|
features: vec![],
|
|
model_override: Some("gpt-5"),
|
|
outcome: Outcome::ExecApproval {
|
|
decision: ReviewDecision::Approved,
|
|
expected_reason: None,
|
|
},
|
|
expectation: Expectation::FileCreated {
|
|
target: TargetPath::OutsideWorkspace("ww_on_request_outside.txt"),
|
|
content: "workspace-on-request-outside",
|
|
},
|
|
},
|
|
ScenarioSpec {
|
|
name: "workspace_write_network_enabled_allows_network",
|
|
approval_policy: OnRequest,
|
|
sandbox_policy: workspace_write(true),
|
|
action: ActionKind::FetchUrl {
|
|
endpoint: "/ww/network-ok",
|
|
response_body: "workspace-network-ok",
|
|
},
|
|
sandbox_permissions: SandboxPermissions::UseDefault,
|
|
features: vec![],
|
|
model_override: Some("gpt-5"),
|
|
outcome: Outcome::Auto,
|
|
expectation: Expectation::NetworkSuccess {
|
|
body_contains: "workspace-network-ok",
|
|
},
|
|
},
|
|
#[cfg(not(target_os = "linux"))] // TODO (pakrym): figure out why linux behaves differently
|
|
ScenarioSpec {
|
|
name: "workspace_write_on_failure_escalates_outside_workspace",
|
|
approval_policy: OnFailure,
|
|
sandbox_policy: workspace_write(false),
|
|
action: ActionKind::WriteFile {
|
|
target: TargetPath::OutsideWorkspace("ww_on_failure.txt"),
|
|
content: "workspace-on-failure",
|
|
},
|
|
sandbox_permissions: SandboxPermissions::UseDefault,
|
|
features: vec![],
|
|
model_override: Some("gpt-5"),
|
|
outcome: Outcome::ExecApproval {
|
|
decision: ReviewDecision::Approved,
|
|
expected_reason: Some("command failed; retry without sandbox?"),
|
|
},
|
|
expectation: Expectation::FileCreated {
|
|
target: TargetPath::OutsideWorkspace("ww_on_failure.txt"),
|
|
content: "workspace-on-failure",
|
|
},
|
|
},
|
|
ScenarioSpec {
|
|
name: "workspace_write_unless_trusted_requires_approval_outside_workspace",
|
|
approval_policy: UnlessTrusted,
|
|
sandbox_policy: workspace_write(false),
|
|
action: ActionKind::WriteFile {
|
|
target: TargetPath::OutsideWorkspace("ww_unless_trusted.txt"),
|
|
content: "workspace-unless-trusted",
|
|
},
|
|
sandbox_permissions: SandboxPermissions::UseDefault,
|
|
features: vec![],
|
|
model_override: Some("gpt-5"),
|
|
outcome: Outcome::ExecApproval {
|
|
decision: ReviewDecision::Approved,
|
|
expected_reason: None,
|
|
},
|
|
expectation: Expectation::FileCreated {
|
|
target: TargetPath::OutsideWorkspace("ww_unless_trusted.txt"),
|
|
content: "workspace-unless-trusted",
|
|
},
|
|
},
|
|
ScenarioSpec {
|
|
name: "workspace_write_never_blocks_outside_workspace",
|
|
approval_policy: Never,
|
|
sandbox_policy: workspace_write(false),
|
|
action: ActionKind::WriteFile {
|
|
target: TargetPath::OutsideWorkspace("ww_never.txt"),
|
|
content: "workspace-never",
|
|
},
|
|
sandbox_permissions: SandboxPermissions::UseDefault,
|
|
features: vec![],
|
|
model_override: None,
|
|
outcome: Outcome::Auto,
|
|
expectation: Expectation::FileNotCreated {
|
|
target: TargetPath::OutsideWorkspace("ww_never.txt"),
|
|
message_contains: if cfg!(target_os = "linux") {
|
|
&["Permission denied|Read-only file system"]
|
|
} else {
|
|
&[
|
|
"Permission denied|Operation not permitted|operation not permitted|\
|
|
Read-only file system",
|
|
]
|
|
},
|
|
},
|
|
},
|
|
ScenarioSpec {
|
|
name: "unified exec on request no approval for safe command",
|
|
approval_policy: OnRequest,
|
|
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
|
action: ActionKind::RunUnifiedExecCommand {
|
|
command: "echo \"hello unified exec\"",
|
|
justification: None,
|
|
},
|
|
sandbox_permissions: SandboxPermissions::UseDefault,
|
|
features: vec![Feature::UnifiedExec],
|
|
model_override: Some("gpt-5"),
|
|
outcome: Outcome::Auto,
|
|
expectation: Expectation::CommandSuccess {
|
|
stdout_contains: "hello unified exec",
|
|
},
|
|
},
|
|
#[cfg(not(all(target_os = "linux", target_arch = "aarch64")))]
|
|
// Linux sandbox arg0 test workaround doesn't work on ARM
|
|
ScenarioSpec {
|
|
name: "unified exec on request escalated requires approval",
|
|
approval_policy: OnRequest,
|
|
sandbox_policy: SandboxPolicy::new_read_only_policy(),
|
|
action: ActionKind::RunUnifiedExecCommand {
|
|
command: "python3 -c 'print('\"'\"'escalated unified exec'\"'\"')'",
|
|
justification: Some(DEFAULT_UNIFIED_EXEC_JUSTIFICATION),
|
|
},
|
|
sandbox_permissions: SandboxPermissions::RequireEscalated,
|
|
features: vec![Feature::UnifiedExec],
|
|
model_override: Some("gpt-5"),
|
|
outcome: Outcome::ExecApproval {
|
|
decision: ReviewDecision::Approved,
|
|
expected_reason: Some(DEFAULT_UNIFIED_EXEC_JUSTIFICATION),
|
|
},
|
|
expectation: Expectation::CommandSuccess {
|
|
stdout_contains: "escalated unified exec",
|
|
},
|
|
},
|
|
ScenarioSpec {
|
|
name: "unified exec on request requires approval unless trusted",
|
|
approval_policy: AskForApproval::UnlessTrusted,
|
|
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
|
action: ActionKind::RunUnifiedExecCommand {
|
|
command: "git reset --hard",
|
|
justification: None,
|
|
},
|
|
sandbox_permissions: SandboxPermissions::UseDefault,
|
|
features: vec![Feature::UnifiedExec],
|
|
model_override: None,
|
|
outcome: Outcome::ExecApproval {
|
|
decision: ReviewDecision::Denied,
|
|
expected_reason: None,
|
|
},
|
|
expectation: Expectation::CommandFailure {
|
|
output_contains: "rejected by user",
|
|
},
|
|
},
|
|
ScenarioSpec {
|
|
name: "safe command with heredoc and redirect still requires approval",
|
|
approval_policy: AskForApproval::OnRequest,
|
|
sandbox_policy: workspace_write(false),
|
|
action: ActionKind::RunUnifiedExecCommand {
|
|
command: "cat <<'EOF' > /tmp/out.txt \nhello\nEOF",
|
|
justification: None,
|
|
},
|
|
sandbox_permissions: SandboxPermissions::RequireEscalated,
|
|
features: vec![Feature::UnifiedExec],
|
|
model_override: None,
|
|
outcome: Outcome::ExecApproval {
|
|
decision: ReviewDecision::Denied,
|
|
expected_reason: None,
|
|
},
|
|
expectation: Expectation::CommandFailure {
|
|
output_contains: "rejected by user",
|
|
},
|
|
},
|
|
ScenarioSpec {
|
|
name: "compound command with one safe command still requires approval",
|
|
approval_policy: AskForApproval::OnRequest,
|
|
sandbox_policy: workspace_write(false),
|
|
action: ActionKind::RunUnifiedExecCommand {
|
|
command: "cat ./one.txt && touch ./two.txt",
|
|
justification: None,
|
|
},
|
|
sandbox_permissions: SandboxPermissions::RequireEscalated,
|
|
features: vec![Feature::UnifiedExec],
|
|
model_override: None,
|
|
outcome: Outcome::ExecApproval {
|
|
decision: ReviewDecision::Denied,
|
|
expected_reason: None,
|
|
},
|
|
expectation: Expectation::CommandFailure {
|
|
output_contains: "rejected by user",
|
|
},
|
|
},
|
|
]
|
|
}
|
|
|
|
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
|
async fn approval_matrix_covers_all_modes() -> Result<()> {
|
|
skip_if_no_network!(Ok(()));
|
|
|
|
for scenario in scenarios() {
|
|
run_scenario(&scenario).await?;
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
|
|
async fn run_scenario(scenario: &ScenarioSpec) -> Result<()> {
|
|
eprintln!("running approval scenario: {}", scenario.name);
|
|
let server = start_mock_server().await;
|
|
let approval_policy = scenario.approval_policy;
|
|
let sandbox_policy = scenario.sandbox_policy.clone();
|
|
let features = scenario.features.clone();
|
|
let model_override = scenario.model_override;
|
|
let model = model_override.unwrap_or("gpt-5.1");
|
|
|
|
let mut builder = test_codex().with_model(model).with_config(move |config| {
|
|
config.permissions.approval_policy = Constrained::allow_any(approval_policy);
|
|
config.permissions.sandbox_policy = Constrained::allow_any(sandbox_policy.clone());
|
|
for feature in features {
|
|
config
|
|
.features
|
|
.enable(feature)
|
|
.expect("test config should allow feature update");
|
|
}
|
|
});
|
|
let test = builder.build(&server).await?;
|
|
|
|
let call_id = scenario.name;
|
|
let (event, expected_command) = scenario
|
|
.action
|
|
.prepare(&test, &server, call_id, scenario.sandbox_permissions)
|
|
.await?;
|
|
|
|
let _ = mount_sse_once(
|
|
&server,
|
|
sse(vec![
|
|
ev_response_created("resp-1"),
|
|
event,
|
|
ev_completed("resp-1"),
|
|
]),
|
|
)
|
|
.await;
|
|
let results_mock = mount_sse_once(
|
|
&server,
|
|
sse(vec![
|
|
ev_assistant_message("msg-1", "done"),
|
|
ev_completed("resp-2"),
|
|
]),
|
|
)
|
|
.await;
|
|
|
|
submit_turn(
|
|
&test,
|
|
scenario.name,
|
|
scenario.approval_policy,
|
|
scenario.sandbox_policy.clone(),
|
|
)
|
|
.await?;
|
|
|
|
match &scenario.outcome {
|
|
Outcome::Auto => {
|
|
wait_for_completion_without_approval(&test).await;
|
|
}
|
|
Outcome::ExecApproval {
|
|
decision,
|
|
expected_reason,
|
|
} => {
|
|
let command = expected_command
|
|
.as_deref()
|
|
.expect("exec approval requires shell command");
|
|
let approval = expect_exec_approval(&test, command).await;
|
|
if let Some(expected_reason) = expected_reason {
|
|
assert_eq!(
|
|
approval.reason.as_deref(),
|
|
Some(*expected_reason),
|
|
"unexpected approval reason for {}",
|
|
scenario.name
|
|
);
|
|
}
|
|
test.codex
|
|
.submit(Op::ExecApproval {
|
|
id: approval.effective_approval_id(),
|
|
turn_id: None,
|
|
decision: decision.clone(),
|
|
})
|
|
.await?;
|
|
wait_for_completion(&test).await;
|
|
}
|
|
Outcome::PatchApproval {
|
|
decision,
|
|
expected_reason,
|
|
} => {
|
|
let approval = expect_patch_approval(&test, call_id).await;
|
|
if let Some(expected_reason) = expected_reason {
|
|
assert_eq!(
|
|
approval.reason.as_deref(),
|
|
Some(*expected_reason),
|
|
"unexpected patch approval reason for {}",
|
|
scenario.name
|
|
);
|
|
}
|
|
test.codex
|
|
.submit(Op::PatchApproval {
|
|
id: approval.call_id,
|
|
decision: decision.clone(),
|
|
})
|
|
.await?;
|
|
wait_for_completion(&test).await;
|
|
}
|
|
}
|
|
|
|
let output_item = results_mock.single_request().function_call_output(call_id);
|
|
let result = parse_result(&output_item);
|
|
scenario.expectation.verify(&test, &result)?;
|
|
|
|
Ok(())
|
|
}
|
|
|
|
#[tokio::test(flavor = "current_thread")]
|
|
#[cfg(unix)]
|
|
async fn approving_apply_patch_for_session_skips_future_prompts_for_same_file() -> Result<()> {
|
|
skip_if_no_network!(Ok(()));
|
|
|
|
let server = start_mock_server().await;
|
|
let approval_policy = AskForApproval::OnRequest;
|
|
let sandbox_policy = SandboxPolicy::WorkspaceWrite {
|
|
writable_roots: vec![],
|
|
read_only_access: Default::default(),
|
|
network_access: false,
|
|
exclude_tmpdir_env_var: false,
|
|
exclude_slash_tmp: false,
|
|
};
|
|
let sandbox_policy_for_config = sandbox_policy.clone();
|
|
|
|
let mut builder = test_codex()
|
|
.with_model("gpt-5.1-codex")
|
|
.with_config(move |config| {
|
|
config.permissions.approval_policy = Constrained::allow_any(approval_policy);
|
|
config.permissions.sandbox_policy = Constrained::allow_any(sandbox_policy_for_config);
|
|
});
|
|
let test = builder.build(&server).await?;
|
|
|
|
let target = TargetPath::OutsideWorkspace("apply_patch_allow_session.txt");
|
|
let (path, patch_path) = target.resolve_for_patch(&test);
|
|
let _ = fs::remove_file(&path);
|
|
|
|
let patch_add = build_add_file_patch(&patch_path, "before");
|
|
let patch_update = format!(
|
|
"*** Begin Patch\n*** Update File: {patch_path}\n@@\n-before\n+after\n*** End Patch\n"
|
|
);
|
|
|
|
let call_id_1 = "apply_patch_allow_session_1";
|
|
let call_id_2 = "apply_patch_allow_session_2";
|
|
|
|
let _ = mount_sse_once(
|
|
&server,
|
|
sse(vec![
|
|
ev_response_created("resp-1"),
|
|
ev_apply_patch_function_call(call_id_1, &patch_add),
|
|
ev_completed("resp-1"),
|
|
]),
|
|
)
|
|
.await;
|
|
let _ = mount_sse_once(
|
|
&server,
|
|
sse(vec![
|
|
ev_assistant_message("msg-1", "done"),
|
|
ev_completed("resp-2"),
|
|
]),
|
|
)
|
|
.await;
|
|
|
|
submit_turn(
|
|
&test,
|
|
"apply_patch allow session",
|
|
approval_policy,
|
|
sandbox_policy.clone(),
|
|
)
|
|
.await?;
|
|
let approval = expect_patch_approval(&test, call_id_1).await;
|
|
test.codex
|
|
.submit(Op::PatchApproval {
|
|
id: approval.call_id,
|
|
decision: ReviewDecision::ApprovedForSession,
|
|
})
|
|
.await?;
|
|
wait_for_completion(&test).await;
|
|
assert!(fs::read_to_string(&path)?.contains("before"));
|
|
|
|
let _ = mount_sse_once(
|
|
&server,
|
|
sse(vec![
|
|
ev_response_created("resp-3"),
|
|
ev_apply_patch_function_call(call_id_2, &patch_update),
|
|
ev_completed("resp-3"),
|
|
]),
|
|
)
|
|
.await;
|
|
let _ = mount_sse_once(
|
|
&server,
|
|
sse(vec![
|
|
ev_assistant_message("msg-2", "done"),
|
|
ev_completed("resp-4"),
|
|
]),
|
|
)
|
|
.await;
|
|
|
|
submit_turn(
|
|
&test,
|
|
"apply_patch allow session followup",
|
|
approval_policy,
|
|
sandbox_policy.clone(),
|
|
)
|
|
.await?;
|
|
|
|
let event = wait_for_event(&test.codex, |event| {
|
|
matches!(
|
|
event,
|
|
EventMsg::ApplyPatchApprovalRequest(_) | EventMsg::TurnComplete(_)
|
|
)
|
|
})
|
|
.await;
|
|
match event {
|
|
EventMsg::TurnComplete(_) => {}
|
|
EventMsg::ApplyPatchApprovalRequest(event) => {
|
|
panic!("unexpected patch approval request: {:?}", event.call_id)
|
|
}
|
|
other => panic!("unexpected event: {other:?}"),
|
|
}
|
|
|
|
assert!(fs::read_to_string(&path)?.contains("after"));
|
|
let _ = fs::remove_file(path);
|
|
|
|
Ok(())
|
|
}
|
|
|
|
#[tokio::test(flavor = "current_thread")]
|
|
#[cfg(unix)]
|
|
async fn approving_execpolicy_amendment_persists_policy_and_skips_future_prompts() -> Result<()> {
|
|
let server = start_mock_server().await;
|
|
let approval_policy = AskForApproval::UnlessTrusted;
|
|
let sandbox_policy = SandboxPolicy::new_read_only_policy();
|
|
let sandbox_policy_for_config = sandbox_policy.clone();
|
|
let mut builder = test_codex().with_config(move |config| {
|
|
config.permissions.approval_policy = Constrained::allow_any(approval_policy);
|
|
config.permissions.sandbox_policy = Constrained::allow_any(sandbox_policy_for_config);
|
|
});
|
|
let test = builder.build(&server).await?;
|
|
let allow_prefix_path = test.cwd.path().join("allow-prefix.txt");
|
|
let _ = fs::remove_file(&allow_prefix_path);
|
|
|
|
let call_id_first = "allow-prefix-first";
|
|
let (first_event, expected_command) = ActionKind::RunCommand {
|
|
command: "touch allow-prefix.txt",
|
|
}
|
|
.prepare(
|
|
&test,
|
|
&server,
|
|
call_id_first,
|
|
SandboxPermissions::UseDefault,
|
|
)
|
|
.await?;
|
|
let expected_command =
|
|
expected_command.expect("execpolicy amendment scenario should produce a shell command");
|
|
let expected_execpolicy_amendment =
|
|
ExecPolicyAmendment::new(vec!["touch".to_string(), "allow-prefix.txt".to_string()]);
|
|
|
|
let _ = mount_sse_once(
|
|
&server,
|
|
sse(vec![
|
|
ev_response_created("resp-allow-prefix-1"),
|
|
first_event,
|
|
ev_completed("resp-allow-prefix-1"),
|
|
]),
|
|
)
|
|
.await;
|
|
let first_results = mount_sse_once(
|
|
&server,
|
|
sse(vec![
|
|
ev_assistant_message("msg-allow-prefix-1", "done"),
|
|
ev_completed("resp-allow-prefix-2"),
|
|
]),
|
|
)
|
|
.await;
|
|
|
|
submit_turn(
|
|
&test,
|
|
"allow-prefix-first",
|
|
approval_policy,
|
|
sandbox_policy.clone(),
|
|
)
|
|
.await?;
|
|
|
|
let approval = expect_exec_approval(&test, expected_command.as_str()).await;
|
|
assert_eq!(
|
|
approval.proposed_execpolicy_amendment,
|
|
Some(expected_execpolicy_amendment.clone())
|
|
);
|
|
|
|
test.codex
|
|
.submit(Op::ExecApproval {
|
|
id: approval.effective_approval_id(),
|
|
turn_id: None,
|
|
decision: ReviewDecision::ApprovedExecpolicyAmendment {
|
|
proposed_execpolicy_amendment: expected_execpolicy_amendment.clone(),
|
|
},
|
|
})
|
|
.await?;
|
|
wait_for_completion(&test).await;
|
|
|
|
let developer_messages = first_results
|
|
.single_request()
|
|
.message_input_texts("developer");
|
|
assert!(
|
|
developer_messages
|
|
.iter()
|
|
.any(|message| message.contains(r#"["touch", "allow-prefix.txt"]"#)),
|
|
"expected developer message documenting saved rule, got: {developer_messages:?}"
|
|
);
|
|
|
|
let policy_path = test.home.path().join("rules").join("default.rules");
|
|
let policy_contents = fs::read_to_string(&policy_path)?;
|
|
assert!(
|
|
policy_contents
|
|
.contains(r#"prefix_rule(pattern=["touch", "allow-prefix.txt"], decision="allow")"#),
|
|
"unexpected policy contents: {policy_contents}"
|
|
);
|
|
|
|
let first_output = parse_result(
|
|
&first_results
|
|
.single_request()
|
|
.function_call_output(call_id_first),
|
|
);
|
|
assert_eq!(first_output.exit_code.unwrap_or(0), 0);
|
|
assert!(
|
|
first_output.stdout.is_empty(),
|
|
"unexpected stdout: {}",
|
|
first_output.stdout
|
|
);
|
|
assert_eq!(
|
|
fs::read_to_string(&allow_prefix_path)?,
|
|
"",
|
|
"unexpected file contents after first run"
|
|
);
|
|
|
|
let call_id_second = "allow-prefix-second";
|
|
let (second_event, second_command) = ActionKind::RunCommand {
|
|
command: "touch allow-prefix.txt",
|
|
}
|
|
.prepare(
|
|
&test,
|
|
&server,
|
|
call_id_second,
|
|
SandboxPermissions::UseDefault,
|
|
)
|
|
.await?;
|
|
assert_eq!(second_command.as_deref(), Some(expected_command.as_str()));
|
|
|
|
let _ = mount_sse_once(
|
|
&server,
|
|
sse(vec![
|
|
ev_response_created("resp-allow-prefix-3"),
|
|
second_event,
|
|
ev_completed("resp-allow-prefix-3"),
|
|
]),
|
|
)
|
|
.await;
|
|
let second_results = mount_sse_once(
|
|
&server,
|
|
sse(vec![
|
|
ev_assistant_message("msg-allow-prefix-2", "done"),
|
|
ev_completed("resp-allow-prefix-4"),
|
|
]),
|
|
)
|
|
.await;
|
|
|
|
submit_turn(
|
|
&test,
|
|
"allow-prefix-second",
|
|
approval_policy,
|
|
sandbox_policy.clone(),
|
|
)
|
|
.await?;
|
|
|
|
wait_for_completion_without_approval(&test).await;
|
|
|
|
let second_output = parse_result(
|
|
&second_results
|
|
.single_request()
|
|
.function_call_output(call_id_second),
|
|
);
|
|
assert_eq!(second_output.exit_code.unwrap_or(0), 0);
|
|
assert!(
|
|
second_output.stdout.is_empty(),
|
|
"unexpected stdout: {}",
|
|
second_output.stdout
|
|
);
|
|
assert_eq!(
|
|
fs::read_to_string(&allow_prefix_path)?,
|
|
"",
|
|
"unexpected file contents after second run"
|
|
);
|
|
|
|
Ok(())
|
|
}
|
|
|
|
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
|
#[cfg(unix)]
|
|
async fn unified_exec_zsh_fork_execpolicy_amendment_skips_later_subcommands() -> Result<()> {
|
|
skip_if_no_network!(Ok(()));
|
|
|
|
let Some(runtime) = zsh_fork_runtime("unified exec zsh-fork execpolicy amendment test")? else {
|
|
return Ok(());
|
|
};
|
|
|
|
let approval_policy = AskForApproval::UnlessTrusted;
|
|
let sandbox_policy = SandboxPolicy::new_read_only_policy();
|
|
let server = start_mock_server().await;
|
|
let test = build_unified_exec_zsh_fork_test(
|
|
&server,
|
|
runtime,
|
|
approval_policy,
|
|
sandbox_policy.clone(),
|
|
|_| {},
|
|
)
|
|
.await?;
|
|
let allow_prefix_path = test.cwd.path().join("allow-prefix-zsh-fork.txt");
|
|
let _ = fs::remove_file(&allow_prefix_path);
|
|
|
|
let call_id = "allow-prefix-zsh-fork";
|
|
let command = "touch allow-prefix-zsh-fork.txt && touch allow-prefix-zsh-fork.txt";
|
|
let event = exec_command_event(
|
|
call_id,
|
|
command,
|
|
Some(1_000),
|
|
SandboxPermissions::UseDefault,
|
|
None,
|
|
)?;
|
|
let _ = mount_sse_once(
|
|
&server,
|
|
sse(vec![
|
|
ev_response_created("resp-zsh-fork-allow-prefix-1"),
|
|
event,
|
|
ev_completed("resp-zsh-fork-allow-prefix-1"),
|
|
]),
|
|
)
|
|
.await;
|
|
let results = mount_sse_once(
|
|
&server,
|
|
sse(vec![
|
|
ev_assistant_message("msg-zsh-fork-allow-prefix-1", "done"),
|
|
ev_completed("resp-zsh-fork-allow-prefix-2"),
|
|
]),
|
|
)
|
|
.await;
|
|
|
|
submit_turn(
|
|
&test,
|
|
"allow-prefix-zsh-fork",
|
|
approval_policy,
|
|
sandbox_policy,
|
|
)
|
|
.await?;
|
|
|
|
let expected_execpolicy_amendment = ExecPolicyAmendment::new(vec![
|
|
"touch".to_string(),
|
|
"allow-prefix-zsh-fork.txt".to_string(),
|
|
]);
|
|
let mut saw_parent_approval = false;
|
|
let mut saw_subcommand_approval = false;
|
|
loop {
|
|
let event = wait_for_event(&test.codex, |event| {
|
|
matches!(
|
|
event,
|
|
EventMsg::ExecApprovalRequest(_) | EventMsg::TurnComplete(_)
|
|
)
|
|
})
|
|
.await;
|
|
|
|
match event {
|
|
EventMsg::TurnComplete(_) => break,
|
|
EventMsg::ExecApprovalRequest(approval) => {
|
|
let command_parts = approval.command.clone();
|
|
let last_arg = command_parts.last().map(String::as_str).unwrap_or_default();
|
|
if last_arg == command {
|
|
assert!(
|
|
!saw_parent_approval,
|
|
"unexpected duplicate parent approval: {command_parts:?}"
|
|
);
|
|
saw_parent_approval = true;
|
|
test.codex
|
|
.submit(Op::ExecApproval {
|
|
id: approval.effective_approval_id(),
|
|
turn_id: None,
|
|
decision: ReviewDecision::Approved,
|
|
})
|
|
.await?;
|
|
continue;
|
|
}
|
|
|
|
let is_touch_subcommand = command_parts
|
|
.iter()
|
|
.any(|part| part == "allow-prefix-zsh-fork.txt")
|
|
&& command_parts
|
|
.first()
|
|
.is_some_and(|part| part.ends_with("/touch") || part == "touch");
|
|
if is_touch_subcommand {
|
|
assert!(
|
|
!saw_subcommand_approval,
|
|
"execpolicy amendment should suppress later matching subcommand approvals: {command_parts:?}"
|
|
);
|
|
saw_subcommand_approval = true;
|
|
assert_eq!(
|
|
approval.proposed_execpolicy_amendment,
|
|
Some(expected_execpolicy_amendment.clone())
|
|
);
|
|
test.codex
|
|
.submit(Op::ExecApproval {
|
|
id: approval.effective_approval_id(),
|
|
turn_id: None,
|
|
decision: ReviewDecision::ApprovedExecpolicyAmendment {
|
|
proposed_execpolicy_amendment: expected_execpolicy_amendment
|
|
.clone(),
|
|
},
|
|
})
|
|
.await?;
|
|
continue;
|
|
}
|
|
|
|
test.codex
|
|
.submit(Op::ExecApproval {
|
|
id: approval.effective_approval_id(),
|
|
turn_id: None,
|
|
decision: ReviewDecision::Approved,
|
|
})
|
|
.await?;
|
|
}
|
|
other => panic!("unexpected event: {other:?}"),
|
|
}
|
|
}
|
|
|
|
assert!(saw_parent_approval, "expected parent unified-exec approval");
|
|
assert!(
|
|
saw_subcommand_approval,
|
|
"expected at least one intercepted touch approval"
|
|
);
|
|
|
|
let result = parse_result(&results.single_request().function_call_output(call_id));
|
|
assert_eq!(result.exit_code.unwrap_or(0), 0);
|
|
assert!(
|
|
allow_prefix_path.exists(),
|
|
"expected touch command to complete after approving the first intercepted subcommand; output: {}",
|
|
result.stdout
|
|
);
|
|
|
|
Ok(())
|
|
}
|
|
|
|
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
|
#[cfg(unix)]
|
|
async fn matched_prefix_rule_runs_unsandboxed_under_zsh_fork() -> Result<()> {
|
|
skip_if_no_network!(Ok(()));
|
|
|
|
let Some(runtime) = zsh_fork_runtime("zsh-fork prefix rule unsandboxed test")? else {
|
|
return Ok(());
|
|
};
|
|
|
|
let approval_policy = AskForApproval::Never;
|
|
let sandbox_policy = restrictive_workspace_write_policy();
|
|
let outside_dir = tempfile::tempdir_in(std::env::current_dir()?)?;
|
|
let outside_path = outside_dir
|
|
.path()
|
|
.join("zsh-fork-prefix-rule-unsandboxed.txt");
|
|
let command = format!("touch {outside_path:?}");
|
|
let rules = r#"prefix_rule(pattern=["touch"], decision="allow")"#.to_string();
|
|
|
|
let server = start_mock_server().await;
|
|
let outside_path_for_hook = outside_path.clone();
|
|
let test = build_zsh_fork_test(
|
|
&server,
|
|
runtime,
|
|
approval_policy,
|
|
sandbox_policy.clone(),
|
|
move |home| {
|
|
let _ = fs::remove_file(&outside_path_for_hook);
|
|
let rules_dir = home.join("rules");
|
|
fs::create_dir_all(&rules_dir).unwrap();
|
|
fs::write(rules_dir.join("default.rules"), &rules).unwrap();
|
|
},
|
|
)
|
|
.await?;
|
|
|
|
let call_id = "zsh-fork-prefix-rule-unsandboxed";
|
|
let event = shell_event(call_id, &command, 1_000, SandboxPermissions::UseDefault)?;
|
|
let _ = mount_sse_once(
|
|
&server,
|
|
sse(vec![
|
|
ev_response_created("resp-zsh-fork-prefix-1"),
|
|
event,
|
|
ev_completed("resp-zsh-fork-prefix-1"),
|
|
]),
|
|
)
|
|
.await;
|
|
let results = mount_sse_once(
|
|
&server,
|
|
sse(vec![
|
|
ev_assistant_message("msg-zsh-fork-prefix-1", "done"),
|
|
ev_completed("resp-zsh-fork-prefix-2"),
|
|
]),
|
|
)
|
|
.await;
|
|
|
|
submit_turn(
|
|
&test,
|
|
"run allowed touch under zsh fork",
|
|
approval_policy,
|
|
sandbox_policy,
|
|
)
|
|
.await?;
|
|
|
|
wait_for_completion_without_approval(&test).await;
|
|
|
|
let result = parse_result(&results.single_request().function_call_output(call_id));
|
|
assert_eq!(result.exit_code.unwrap_or(0), 0);
|
|
assert!(
|
|
outside_path.exists(),
|
|
"expected matched prefix_rule to rerun touch unsandboxed; output: {}",
|
|
result.stdout
|
|
);
|
|
|
|
Ok(())
|
|
}
|
|
|
|
#[tokio::test(flavor = "current_thread")]
|
|
#[cfg(unix)]
|
|
async fn invalid_requested_prefix_rule_falls_back_for_compound_command() -> Result<()> {
|
|
let server = start_mock_server().await;
|
|
let approval_policy = AskForApproval::OnRequest;
|
|
let sandbox_policy = SandboxPolicy::new_read_only_policy();
|
|
let sandbox_policy_for_config = sandbox_policy.clone();
|
|
let mut builder = test_codex().with_config(move |config| {
|
|
config.permissions.approval_policy = Constrained::allow_any(approval_policy);
|
|
config.permissions.sandbox_policy = Constrained::allow_any(sandbox_policy_for_config);
|
|
});
|
|
let test = builder.build(&server).await?;
|
|
|
|
let call_id = "invalid-prefix-rule";
|
|
let command =
|
|
"touch /tmp/codex-fallback-rule-test.txt && echo hello > /tmp/codex-fallback-rule-test.txt";
|
|
let event = shell_event_with_prefix_rule(
|
|
call_id,
|
|
command,
|
|
1_000,
|
|
SandboxPermissions::RequireEscalated,
|
|
Some(vec!["touch".to_string()]),
|
|
)?;
|
|
|
|
let _ = mount_sse_once(
|
|
&server,
|
|
sse(vec![
|
|
ev_response_created("resp-invalid-prefix-1"),
|
|
event,
|
|
ev_completed("resp-invalid-prefix-1"),
|
|
]),
|
|
)
|
|
.await;
|
|
|
|
submit_turn(
|
|
&test,
|
|
"invalid-prefix-rule",
|
|
approval_policy,
|
|
sandbox_policy.clone(),
|
|
)
|
|
.await?;
|
|
|
|
let approval = expect_exec_approval(&test, command).await;
|
|
let amendment = approval
|
|
.proposed_execpolicy_amendment
|
|
.expect("should have a proposed execpolicy amendment");
|
|
assert!(amendment.command.contains(&command.to_string()));
|
|
|
|
Ok(())
|
|
}
|
|
|
|
#[tokio::test(flavor = "current_thread")]
|
|
#[cfg(unix)]
|
|
async fn approving_fallback_rule_for_compound_command_works() -> Result<()> {
|
|
let server = start_mock_server().await;
|
|
let approval_policy = AskForApproval::OnRequest;
|
|
let sandbox_policy = SandboxPolicy::new_read_only_policy();
|
|
let sandbox_policy_for_config = sandbox_policy.clone();
|
|
let mut builder = test_codex().with_config(move |config| {
|
|
config.permissions.approval_policy = Constrained::allow_any(approval_policy);
|
|
config.permissions.sandbox_policy = Constrained::allow_any(sandbox_policy_for_config);
|
|
});
|
|
let test = builder.build(&server).await?;
|
|
|
|
let call_id = "invalid-prefix-rule";
|
|
let command =
|
|
"touch /tmp/codex-fallback-rule-test.txt && echo hello > /tmp/codex-fallback-rule-test.txt";
|
|
let event = shell_event_with_prefix_rule(
|
|
call_id,
|
|
command,
|
|
1_000,
|
|
SandboxPermissions::RequireEscalated,
|
|
Some(vec!["touch".to_string()]),
|
|
)?;
|
|
|
|
let _ = mount_sse_once(
|
|
&server,
|
|
sse(vec![
|
|
ev_response_created("resp-invalid-prefix-1"),
|
|
event,
|
|
ev_completed("resp-invalid-prefix-1"),
|
|
]),
|
|
)
|
|
.await;
|
|
|
|
submit_turn(
|
|
&test,
|
|
"invalid-prefix-rule",
|
|
approval_policy,
|
|
sandbox_policy.clone(),
|
|
)
|
|
.await?;
|
|
|
|
let approval = expect_exec_approval(&test, command).await;
|
|
let approval_id = approval.effective_approval_id();
|
|
let amendment = approval
|
|
.proposed_execpolicy_amendment
|
|
.expect("should have a proposed execpolicy amendment");
|
|
assert!(amendment.command.contains(&command.to_string()));
|
|
|
|
test.codex
|
|
.submit(Op::ExecApproval {
|
|
id: approval_id,
|
|
turn_id: None,
|
|
decision: ReviewDecision::ApprovedExecpolicyAmendment {
|
|
proposed_execpolicy_amendment: amendment.clone(),
|
|
},
|
|
})
|
|
.await?;
|
|
wait_for_completion(&test).await;
|
|
|
|
let call_id = "invalid-prefix-rule-again";
|
|
let command =
|
|
"touch /tmp/codex-fallback-rule-test.txt && echo hello > /tmp/codex-fallback-rule-test.txt";
|
|
let event = shell_event_with_prefix_rule(
|
|
call_id,
|
|
command,
|
|
1_000,
|
|
SandboxPermissions::RequireEscalated,
|
|
Some(vec!["touch".to_string()]),
|
|
)?;
|
|
|
|
let _ = mount_sse_once(
|
|
&server,
|
|
sse(vec![
|
|
ev_response_created("resp-invalid-prefix-1"),
|
|
event,
|
|
ev_completed("resp-invalid-prefix-1"),
|
|
]),
|
|
)
|
|
.await;
|
|
let second_results = mount_sse_once(
|
|
&server,
|
|
sse(vec![
|
|
ev_assistant_message("msg-invalid-prefix-1", "done"),
|
|
ev_completed("resp-invalid-prefix-2"),
|
|
]),
|
|
)
|
|
.await;
|
|
|
|
submit_turn(
|
|
&test,
|
|
"invalid-prefix-rule",
|
|
approval_policy,
|
|
sandbox_policy.clone(),
|
|
)
|
|
.await?;
|
|
|
|
wait_for_completion_without_approval(&test).await;
|
|
|
|
let second_output = parse_result(
|
|
&second_results
|
|
.single_request()
|
|
.function_call_output(call_id),
|
|
);
|
|
assert_eq!(second_output.exit_code.unwrap_or(0), 0);
|
|
assert!(
|
|
second_output.stdout.is_empty(),
|
|
"unexpected stdout: {}",
|
|
second_output.stdout
|
|
);
|
|
|
|
Ok(())
|
|
}
|
|
|
|
#[tokio::test(flavor = "current_thread")]
|
|
async fn denying_network_policy_amendment_persists_policy_and_skips_future_network_prompt()
|
|
-> Result<()> {
|
|
skip_if_no_network!(Ok(()));
|
|
|
|
let server = start_mock_server().await;
|
|
let home = Arc::new(TempDir::new()?);
|
|
fs::write(
|
|
home.path().join("config.toml"),
|
|
r#"default_permissions = "workspace"
|
|
|
|
[permissions.workspace.filesystem]
|
|
":minimal" = "read"
|
|
|
|
[permissions.workspace.network]
|
|
enabled = true
|
|
mode = "limited"
|
|
allow_local_binding = true
|
|
"#,
|
|
)?;
|
|
let approval_policy = AskForApproval::OnFailure;
|
|
let sandbox_policy = SandboxPolicy::WorkspaceWrite {
|
|
writable_roots: vec![],
|
|
read_only_access: Default::default(),
|
|
network_access: true,
|
|
exclude_tmpdir_env_var: false,
|
|
exclude_slash_tmp: false,
|
|
};
|
|
let sandbox_policy_for_config = sandbox_policy.clone();
|
|
let mut builder = test_codex().with_home(home).with_config(move |config| {
|
|
config.permissions.approval_policy = Constrained::allow_any(approval_policy);
|
|
config.permissions.sandbox_policy = Constrained::allow_any(sandbox_policy_for_config);
|
|
let layers = config
|
|
.config_layer_stack
|
|
.get_layers(ConfigLayerStackOrdering::LowestPrecedenceFirst, true)
|
|
.into_iter()
|
|
.cloned()
|
|
.collect();
|
|
let mut requirements = config.config_layer_stack.requirements().clone();
|
|
requirements.network = Some(Sourced::new(
|
|
NetworkConstraints {
|
|
enabled: Some(true),
|
|
allow_local_binding: Some(true),
|
|
..Default::default()
|
|
},
|
|
RequirementSource::CloudRequirements,
|
|
));
|
|
let mut requirements_toml = config.config_layer_stack.requirements_toml().clone();
|
|
requirements_toml.network = Some(NetworkRequirementsToml {
|
|
enabled: Some(true),
|
|
allow_local_binding: Some(true),
|
|
..Default::default()
|
|
});
|
|
config.config_layer_stack = ConfigLayerStack::new(layers, requirements, requirements_toml)
|
|
.expect("rebuild config layer stack with network requirements");
|
|
});
|
|
let test = builder.build(&server).await?;
|
|
assert!(
|
|
test.config.managed_network_requirements_enabled(),
|
|
"expected managed network requirements to be enabled"
|
|
);
|
|
assert!(
|
|
test.config.permissions.network.is_some(),
|
|
"expected managed network proxy config to be present"
|
|
);
|
|
test.session_configured
|
|
.network_proxy
|
|
.as_ref()
|
|
.expect("expected runtime managed network proxy addresses");
|
|
|
|
let call_id_first = "allow-network-first";
|
|
// Use urllib without overriding proxy settings so managed-network sessions
|
|
// continue to exercise the env-based proxy routing path under bubblewrap.
|
|
let fetch_command = r#"python3 -c "import urllib.request; opener = urllib.request.build_opener(urllib.request.ProxyHandler()); print('OK:' + opener.open('http://codex-network-test.invalid', timeout=30).read().decode(errors='replace'))""#
|
|
.to_string();
|
|
let first_event = shell_event(
|
|
call_id_first,
|
|
&fetch_command,
|
|
30_000,
|
|
SandboxPermissions::UseDefault,
|
|
)?;
|
|
|
|
let _ = mount_sse_once(
|
|
&server,
|
|
sse(vec![
|
|
ev_response_created("resp-allow-network-1"),
|
|
first_event,
|
|
ev_completed("resp-allow-network-1"),
|
|
]),
|
|
)
|
|
.await;
|
|
let first_results = mount_sse_once(
|
|
&server,
|
|
sse(vec![
|
|
ev_assistant_message("msg-allow-network-1", "done"),
|
|
ev_completed("resp-allow-network-2"),
|
|
]),
|
|
)
|
|
.await;
|
|
|
|
submit_turn(
|
|
&test,
|
|
"allow-network-first",
|
|
approval_policy,
|
|
sandbox_policy.clone(),
|
|
)
|
|
.await?;
|
|
|
|
let deadline = std::time::Instant::now() + std::time::Duration::from_secs(30);
|
|
let approval = loop {
|
|
let remaining = deadline
|
|
.checked_duration_since(std::time::Instant::now())
|
|
.expect("timed out waiting for network approval request");
|
|
let event = wait_for_event_with_timeout(
|
|
&test.codex,
|
|
|event| {
|
|
matches!(
|
|
event,
|
|
EventMsg::ExecApprovalRequest(_) | EventMsg::TurnComplete(_)
|
|
)
|
|
},
|
|
remaining,
|
|
)
|
|
.await;
|
|
match event {
|
|
EventMsg::ExecApprovalRequest(approval) => {
|
|
if approval.command.first().map(std::string::String::as_str)
|
|
== Some("network-access")
|
|
{
|
|
break approval;
|
|
}
|
|
test.codex
|
|
.submit(Op::ExecApproval {
|
|
id: approval.effective_approval_id(),
|
|
turn_id: None,
|
|
decision: ReviewDecision::Approved,
|
|
})
|
|
.await?;
|
|
}
|
|
EventMsg::TurnComplete(_) => {
|
|
panic!("expected network approval request before completion");
|
|
}
|
|
other => panic!("unexpected event: {other:?}"),
|
|
}
|
|
};
|
|
let network_context = approval
|
|
.network_approval_context
|
|
.clone()
|
|
.expect("expected network approval context");
|
|
assert_eq!(network_context.protocol, NetworkApprovalProtocol::Http);
|
|
let expected_network_amendments = vec![
|
|
NetworkPolicyAmendment {
|
|
host: network_context.host.clone(),
|
|
action: NetworkPolicyRuleAction::Allow,
|
|
},
|
|
NetworkPolicyAmendment {
|
|
host: network_context.host.clone(),
|
|
action: NetworkPolicyRuleAction::Deny,
|
|
},
|
|
];
|
|
assert_eq!(
|
|
approval.proposed_network_policy_amendments,
|
|
Some(expected_network_amendments.clone())
|
|
);
|
|
let deny_network_amendment = expected_network_amendments
|
|
.into_iter()
|
|
.find(|amendment| amendment.action == NetworkPolicyRuleAction::Deny)
|
|
.expect("expected deny network policy amendment");
|
|
|
|
test.codex
|
|
.submit(Op::ExecApproval {
|
|
id: approval.effective_approval_id(),
|
|
turn_id: None,
|
|
decision: ReviewDecision::NetworkPolicyAmendment {
|
|
network_policy_amendment: deny_network_amendment.clone(),
|
|
},
|
|
})
|
|
.await?;
|
|
wait_for_completion(&test).await;
|
|
|
|
let policy_path = test.home.path().join("rules").join("default.rules");
|
|
let policy_contents = fs::read_to_string(&policy_path)?;
|
|
let expected_rule = format!(
|
|
r#"network_rule(host="{}", protocol="{}", decision="deny", justification="Deny {} access to {}")"#,
|
|
deny_network_amendment.host,
|
|
match network_context.protocol {
|
|
NetworkApprovalProtocol::Http => "http",
|
|
NetworkApprovalProtocol::Https => "https_connect",
|
|
NetworkApprovalProtocol::Socks5Tcp => "socks5_tcp",
|
|
NetworkApprovalProtocol::Socks5Udp => "socks5_udp",
|
|
},
|
|
match network_context.protocol {
|
|
NetworkApprovalProtocol::Http => "http",
|
|
NetworkApprovalProtocol::Https => "https_connect",
|
|
NetworkApprovalProtocol::Socks5Tcp => "socks5_tcp",
|
|
NetworkApprovalProtocol::Socks5Udp => "socks5_udp",
|
|
},
|
|
deny_network_amendment.host
|
|
);
|
|
assert!(
|
|
policy_contents.contains(&expected_rule),
|
|
"unexpected policy contents: {policy_contents}"
|
|
);
|
|
|
|
let first_output = parse_result(
|
|
&first_results
|
|
.single_request()
|
|
.function_call_output(call_id_first),
|
|
);
|
|
Expectation::CommandFailure {
|
|
output_contains: "",
|
|
}
|
|
.verify(&test, &first_output)?;
|
|
|
|
let call_id_second = "allow-network-second";
|
|
let second_event = shell_event(
|
|
call_id_second,
|
|
&fetch_command,
|
|
30_000,
|
|
SandboxPermissions::UseDefault,
|
|
)?;
|
|
|
|
let _ = mount_sse_once(
|
|
&server,
|
|
sse(vec![
|
|
ev_response_created("resp-allow-network-3"),
|
|
second_event,
|
|
ev_completed("resp-allow-network-3"),
|
|
]),
|
|
)
|
|
.await;
|
|
let second_results = mount_sse_once(
|
|
&server,
|
|
sse(vec![
|
|
ev_assistant_message("msg-allow-network-2", "done"),
|
|
ev_completed("resp-allow-network-4"),
|
|
]),
|
|
)
|
|
.await;
|
|
|
|
submit_turn(
|
|
&test,
|
|
"allow-network-second",
|
|
approval_policy,
|
|
sandbox_policy.clone(),
|
|
)
|
|
.await?;
|
|
|
|
let deadline = std::time::Instant::now() + std::time::Duration::from_secs(30);
|
|
loop {
|
|
let remaining = deadline
|
|
.checked_duration_since(std::time::Instant::now())
|
|
.expect("timed out waiting for second turn completion");
|
|
let event = wait_for_event_with_timeout(
|
|
&test.codex,
|
|
|event| {
|
|
matches!(
|
|
event,
|
|
EventMsg::ExecApprovalRequest(_) | EventMsg::TurnComplete(_)
|
|
)
|
|
},
|
|
remaining,
|
|
)
|
|
.await;
|
|
match event {
|
|
EventMsg::ExecApprovalRequest(approval) => {
|
|
if approval.command.first().map(std::string::String::as_str)
|
|
== Some("network-access")
|
|
{
|
|
panic!(
|
|
"unexpected network approval request: {:?}",
|
|
approval.command
|
|
);
|
|
}
|
|
test.codex
|
|
.submit(Op::ExecApproval {
|
|
id: approval.effective_approval_id(),
|
|
turn_id: None,
|
|
decision: ReviewDecision::Approved,
|
|
})
|
|
.await?;
|
|
}
|
|
EventMsg::TurnComplete(_) => break,
|
|
other => panic!("unexpected event: {other:?}"),
|
|
}
|
|
}
|
|
|
|
let second_output = parse_result(
|
|
&second_results
|
|
.single_request()
|
|
.function_call_output(call_id_second),
|
|
);
|
|
Expectation::CommandFailure {
|
|
output_contains: "",
|
|
}
|
|
.verify(&test, &second_output)?;
|
|
|
|
Ok(())
|
|
}
|
|
|
|
// todo(dylan) add ScenarioSpec support for rules
|
|
#[tokio::test(flavor = "current_thread")]
|
|
#[cfg(unix)]
|
|
async fn compound_command_with_one_safe_command_still_requires_approval() -> Result<()> {
|
|
skip_if_no_network!(Ok(()));
|
|
|
|
let server = start_mock_server().await;
|
|
let approval_policy = AskForApproval::UnlessTrusted;
|
|
let sandbox_policy = SandboxPolicy::new_workspace_write_policy();
|
|
let sandbox_policy_for_config = sandbox_policy.clone();
|
|
let mut builder = test_codex().with_config(move |config| {
|
|
config.permissions.approval_policy = Constrained::allow_any(approval_policy);
|
|
config.permissions.sandbox_policy = Constrained::allow_any(sandbox_policy_for_config);
|
|
});
|
|
let test = builder.build(&server).await?;
|
|
|
|
let rules_dir = test.home.path().join("rules");
|
|
fs::create_dir_all(&rules_dir)?;
|
|
fs::write(
|
|
rules_dir.join("default.rules"),
|
|
r#"prefix_rule(pattern=["touch", "allow-prefix.txt"], decision="allow")"#,
|
|
)?;
|
|
|
|
let call_id = "heredoc-with-chained-prefix";
|
|
let command = "touch ./test.txt && rm ./test.txt";
|
|
let (event, expected_command) = ActionKind::RunCommand { command }
|
|
.prepare(&test, &server, call_id, SandboxPermissions::UseDefault)
|
|
.await?;
|
|
let expected_command =
|
|
expected_command.expect("compound command should produce a shell command");
|
|
|
|
let _ = mount_sse_once(
|
|
&server,
|
|
sse(vec![
|
|
ev_response_created("resp-heredoc-prefix-1"),
|
|
event,
|
|
ev_completed("resp-heredoc-prefix-1"),
|
|
]),
|
|
)
|
|
.await;
|
|
let _ = mount_sse_once(
|
|
&server,
|
|
sse(vec![
|
|
ev_assistant_message("msg-heredoc-prefix-1", "done"),
|
|
ev_completed("resp-heredoc-prefix-2"),
|
|
]),
|
|
)
|
|
.await;
|
|
|
|
submit_turn(
|
|
&test,
|
|
"compound command",
|
|
approval_policy,
|
|
sandbox_policy.clone(),
|
|
)
|
|
.await?;
|
|
|
|
let approval = expect_exec_approval(&test, expected_command.as_str()).await;
|
|
test.codex
|
|
.submit(Op::ExecApproval {
|
|
id: approval.effective_approval_id(),
|
|
turn_id: None,
|
|
decision: ReviewDecision::Denied,
|
|
})
|
|
.await?;
|
|
wait_for_completion(&test).await;
|
|
|
|
Ok(())
|
|
}
|