mirror of
https://github.com/openai/codex.git
synced 2026-04-27 08:05:51 +00:00
## Why `argument-comment-lint` was green in CI even though the repo still had many uncommented literal arguments. The main gap was target coverage: the repo wrapper did not force Cargo to inspect test-only call sites, so examples like the `latest_session_lookup_params(true, ...)` tests in `codex-rs/tui_app_server/src/lib.rs` never entered the blocking CI path. This change cleans up the existing backlog, makes the default repo lint path cover all Cargo targets, and starts rolling that stricter CI enforcement out on the platform where it is currently validated. ## What changed - mechanically fixed existing `argument-comment-lint` violations across the `codex-rs` workspace, including tests, examples, and benches - updated `tools/argument-comment-lint/run-prebuilt-linter.sh` and `tools/argument-comment-lint/run.sh` so non-`--fix` runs default to `--all-targets` unless the caller explicitly narrows the target set - fixed both wrappers so forwarded cargo arguments after `--` are preserved with a single separator - documented the new default behavior in `tools/argument-comment-lint/README.md` - updated `rust-ci` so the macOS lint lane keeps the plain wrapper invocation and therefore enforces `--all-targets`, while Linux and Windows temporarily pass `-- --lib --bins` That temporary CI split keeps the stricter all-targets check where it is already cleaned up, while leaving room to finish the remaining Linux- and Windows-specific target-gated cleanup before enabling `--all-targets` on those runners. The Linux and Windows failures on the intermediate revision were caused by the wrapper forwarding bug, not by additional lint findings in those lanes. ## Validation - `bash -n tools/argument-comment-lint/run.sh` - `bash -n tools/argument-comment-lint/run-prebuilt-linter.sh` - shell-level wrapper forwarding check for `-- --lib --bins` - shell-level wrapper forwarding check for `-- --tests` - `just argument-comment-lint` - `cargo test` in `tools/argument-comment-lint` - `cargo test -p codex-terminal-detection` ## Follow-up - Clean up remaining Linux-only target-gated callsites, then switch the Linux lint lane back to the plain wrapper invocation. - Clean up remaining Windows-only target-gated callsites, then switch the Windows lint lane back to the plain wrapper invocation.
2875 lines
102 KiB
Rust
2875 lines
102 KiB
Rust
#![allow(clippy::unwrap_used, clippy::expect_used)]
|
|
|
|
use anyhow::Result;
|
|
use codex_core::CodexThread;
|
|
use codex_core::config::Constrained;
|
|
use codex_core::config_loader::ConfigLayerStack;
|
|
use codex_core::config_loader::ConfigLayerStackOrdering;
|
|
use codex_core::config_loader::NetworkConstraints;
|
|
use codex_core::config_loader::NetworkRequirementsToml;
|
|
use codex_core::config_loader::RequirementSource;
|
|
use codex_core::config_loader::Sourced;
|
|
use codex_core::sandboxing::SandboxPermissions;
|
|
use codex_features::Feature;
|
|
use codex_protocol::approvals::NetworkApprovalProtocol;
|
|
use codex_protocol::approvals::NetworkPolicyAmendment;
|
|
use codex_protocol::approvals::NetworkPolicyRuleAction;
|
|
use codex_protocol::protocol::ApplyPatchApprovalRequestEvent;
|
|
use codex_protocol::protocol::AskForApproval;
|
|
use codex_protocol::protocol::EventMsg;
|
|
use codex_protocol::protocol::ExecApprovalRequestEvent;
|
|
use codex_protocol::protocol::ExecPolicyAmendment;
|
|
use codex_protocol::protocol::Op;
|
|
use codex_protocol::protocol::ReviewDecision;
|
|
use codex_protocol::protocol::SandboxPolicy;
|
|
use codex_protocol::user_input::UserInput;
|
|
use core_test_support::responses::ev_apply_patch_function_call;
|
|
use core_test_support::responses::ev_assistant_message;
|
|
use core_test_support::responses::ev_completed;
|
|
use core_test_support::responses::ev_function_call;
|
|
use core_test_support::responses::ev_response_created;
|
|
use core_test_support::responses::mount_sse_once;
|
|
use core_test_support::responses::mount_sse_once_match;
|
|
use core_test_support::responses::sse;
|
|
use core_test_support::responses::start_mock_server;
|
|
use core_test_support::skip_if_no_network;
|
|
use core_test_support::test_codex::TestCodex;
|
|
use core_test_support::test_codex::test_codex;
|
|
use core_test_support::wait_for_event;
|
|
use core_test_support::wait_for_event_with_timeout;
|
|
use core_test_support::zsh_fork::build_zsh_fork_test;
|
|
use core_test_support::zsh_fork::restrictive_workspace_write_policy;
|
|
use core_test_support::zsh_fork::zsh_fork_runtime;
|
|
use pretty_assertions::assert_eq;
|
|
use regex_lite::Regex;
|
|
use serde_json::Value;
|
|
use serde_json::json;
|
|
use std::env;
|
|
use std::fs;
|
|
use std::path::PathBuf;
|
|
use std::sync::Arc;
|
|
use std::time::Duration;
|
|
use tempfile::TempDir;
|
|
use wiremock::Mock;
|
|
use wiremock::MockServer;
|
|
use wiremock::Request;
|
|
use wiremock::ResponseTemplate;
|
|
use wiremock::matchers::method;
|
|
use wiremock::matchers::path;
|
|
|
|
#[derive(Clone, Copy)]
|
|
enum TargetPath {
|
|
Workspace(&'static str),
|
|
OutsideWorkspace(&'static str),
|
|
}
|
|
|
|
impl TargetPath {
|
|
fn resolve_for_patch(self, test: &TestCodex) -> (PathBuf, String) {
|
|
match self {
|
|
TargetPath::Workspace(name) => {
|
|
let path = test.cwd.path().join(name);
|
|
(path, name.to_string())
|
|
}
|
|
TargetPath::OutsideWorkspace(name) => {
|
|
let path = env::current_dir()
|
|
.expect("current dir should be available")
|
|
.join(name);
|
|
(path.clone(), path.display().to_string())
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
#[derive(Clone)]
|
|
enum ActionKind {
|
|
WriteFile {
|
|
target: TargetPath,
|
|
content: &'static str,
|
|
},
|
|
FetchUrlNoProxy {
|
|
endpoint: &'static str,
|
|
response_body: &'static str,
|
|
},
|
|
FetchUrl {
|
|
endpoint: &'static str,
|
|
response_body: &'static str,
|
|
},
|
|
RunCommand {
|
|
command: &'static str,
|
|
},
|
|
RunUnifiedExecCommand {
|
|
command: &'static str,
|
|
justification: Option<&'static str>,
|
|
},
|
|
ApplyPatchFunction {
|
|
target: TargetPath,
|
|
content: &'static str,
|
|
},
|
|
ApplyPatchShell {
|
|
target: TargetPath,
|
|
content: &'static str,
|
|
},
|
|
}
|
|
|
|
const DEFAULT_UNIFIED_EXEC_JUSTIFICATION: &str =
|
|
"Requires escalated permissions to bypass the sandbox in tests.";
|
|
|
|
impl ActionKind {
|
|
async fn prepare(
|
|
&self,
|
|
test: &TestCodex,
|
|
server: &MockServer,
|
|
call_id: &str,
|
|
sandbox_permissions: SandboxPermissions,
|
|
) -> Result<(Value, Option<String>)> {
|
|
match self {
|
|
ActionKind::WriteFile { target, content } => {
|
|
let (path, _) = target.resolve_for_patch(test);
|
|
let _ = fs::remove_file(&path);
|
|
let path_str = path.display().to_string();
|
|
let script = format!(
|
|
"from pathlib import Path; path = Path({path_str:?}); content = {content:?}; path.write_text(content, encoding='utf-8'); print(path.read_text(encoding='utf-8'), end='')",
|
|
);
|
|
let command = format!("python3 -c {script:?}");
|
|
let event = shell_event(
|
|
call_id,
|
|
&command,
|
|
/*timeout_ms*/ 5_000,
|
|
sandbox_permissions,
|
|
)?;
|
|
Ok((event, Some(command)))
|
|
}
|
|
ActionKind::FetchUrl {
|
|
endpoint,
|
|
response_body,
|
|
} => {
|
|
Mock::given(method("GET"))
|
|
.and(path(*endpoint))
|
|
.respond_with(
|
|
ResponseTemplate::new(200).set_body_string(response_body.to_string()),
|
|
)
|
|
.mount(server)
|
|
.await;
|
|
|
|
let url = format!("{}{}", server.uri(), endpoint);
|
|
let escaped_url = url.replace('\'', "\\'");
|
|
let script = format!(
|
|
"import sys\nimport urllib.request\nurl = '{escaped_url}'\ntry:\n data = urllib.request.urlopen(url, timeout=2).read().decode()\n print('OK:' + data.strip())\nexcept Exception as exc:\n print('ERR:' + exc.__class__.__name__)\n sys.exit(1)",
|
|
);
|
|
|
|
let command = format!("python3 -c \"{script}\"");
|
|
let event = shell_event(
|
|
call_id,
|
|
&command,
|
|
/*timeout_ms*/ 5_000,
|
|
sandbox_permissions,
|
|
)?;
|
|
Ok((event, Some(command)))
|
|
}
|
|
ActionKind::FetchUrlNoProxy {
|
|
endpoint,
|
|
response_body,
|
|
} => {
|
|
Mock::given(method("GET"))
|
|
.and(path(*endpoint))
|
|
.respond_with(
|
|
ResponseTemplate::new(200).set_body_string(response_body.to_string()),
|
|
)
|
|
.mount(server)
|
|
.await;
|
|
|
|
let url = format!("{}{}", server.uri(), endpoint);
|
|
let escaped_url = url.replace('\'', "\\'");
|
|
let script = format!(
|
|
"import sys\nimport urllib.request\nurl = '{escaped_url}'\nopener = urllib.request.build_opener(urllib.request.ProxyHandler({{}}))\ntry:\n data = opener.open(url, timeout=2).read().decode()\n print('OK:' + data.strip())\nexcept Exception as exc:\n print('ERR:' + exc.__class__.__name__)\n sys.exit(1)",
|
|
);
|
|
|
|
let command = format!("python3 -c \"{script}\"");
|
|
let event = shell_event(
|
|
call_id,
|
|
&command,
|
|
/*timeout_ms*/ 5_000,
|
|
sandbox_permissions,
|
|
)?;
|
|
Ok((event, Some(command)))
|
|
}
|
|
ActionKind::RunCommand { command } => {
|
|
let event = shell_event(
|
|
call_id,
|
|
command,
|
|
/*timeout_ms*/ 2_000,
|
|
sandbox_permissions,
|
|
)?;
|
|
Ok((event, Some(command.to_string())))
|
|
}
|
|
ActionKind::RunUnifiedExecCommand {
|
|
command,
|
|
justification,
|
|
} => {
|
|
let event = exec_command_event(
|
|
call_id,
|
|
command,
|
|
Some(1000),
|
|
sandbox_permissions,
|
|
*justification,
|
|
)?;
|
|
Ok((event, Some(command.to_string())))
|
|
}
|
|
ActionKind::ApplyPatchFunction { target, content } => {
|
|
let (path, patch_path) = target.resolve_for_patch(test);
|
|
let _ = fs::remove_file(&path);
|
|
let patch = build_add_file_patch(&patch_path, content);
|
|
Ok((ev_apply_patch_function_call(call_id, &patch), None))
|
|
}
|
|
ActionKind::ApplyPatchShell { target, content } => {
|
|
let (path, patch_path) = target.resolve_for_patch(test);
|
|
let _ = fs::remove_file(&path);
|
|
let patch = build_add_file_patch(&patch_path, content);
|
|
let command = shell_apply_patch_command(&patch);
|
|
let event = shell_event(
|
|
call_id,
|
|
&command,
|
|
/*timeout_ms*/ 5_000,
|
|
sandbox_permissions,
|
|
)?;
|
|
Ok((event, Some(command)))
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
fn build_add_file_patch(patch_path: &str, content: &str) -> String {
|
|
format!("*** Begin Patch\n*** Add File: {patch_path}\n+{content}\n*** End Patch\n")
|
|
}
|
|
|
|
fn shell_apply_patch_command(patch: &str) -> String {
|
|
let mut script = String::from("apply_patch <<'PATCH'\n");
|
|
script.push_str(patch);
|
|
if !patch.ends_with('\n') {
|
|
script.push('\n');
|
|
}
|
|
script.push_str("PATCH\n");
|
|
script
|
|
}
|
|
|
|
fn shell_event(
|
|
call_id: &str,
|
|
command: &str,
|
|
timeout_ms: u64,
|
|
sandbox_permissions: SandboxPermissions,
|
|
) -> Result<Value> {
|
|
shell_event_with_prefix_rule(
|
|
call_id,
|
|
command,
|
|
timeout_ms,
|
|
sandbox_permissions,
|
|
/*prefix_rule*/ None,
|
|
)
|
|
}
|
|
|
|
fn shell_event_with_prefix_rule(
|
|
call_id: &str,
|
|
command: &str,
|
|
timeout_ms: u64,
|
|
sandbox_permissions: SandboxPermissions,
|
|
prefix_rule: Option<Vec<String>>,
|
|
) -> Result<Value> {
|
|
let mut args = json!({
|
|
"command": command,
|
|
"timeout_ms": timeout_ms,
|
|
});
|
|
if sandbox_permissions.requests_sandbox_override() {
|
|
args["sandbox_permissions"] = json!(sandbox_permissions);
|
|
}
|
|
if let Some(prefix_rule) = prefix_rule {
|
|
args["prefix_rule"] = json!(prefix_rule);
|
|
}
|
|
let args_str = serde_json::to_string(&args)?;
|
|
Ok(ev_function_call(call_id, "shell_command", &args_str))
|
|
}
|
|
|
|
fn exec_command_event(
|
|
call_id: &str,
|
|
cmd: &str,
|
|
yield_time_ms: Option<u64>,
|
|
sandbox_permissions: SandboxPermissions,
|
|
justification: Option<&str>,
|
|
) -> Result<Value> {
|
|
let mut args = json!({
|
|
"cmd": cmd.to_string(),
|
|
});
|
|
if let Some(yield_time_ms) = yield_time_ms {
|
|
args["yield_time_ms"] = json!(yield_time_ms);
|
|
}
|
|
if sandbox_permissions.requests_sandbox_override() {
|
|
args["sandbox_permissions"] = json!(sandbox_permissions);
|
|
let reason = justification.unwrap_or(DEFAULT_UNIFIED_EXEC_JUSTIFICATION);
|
|
args["justification"] = json!(reason);
|
|
}
|
|
let args_str = serde_json::to_string(&args)?;
|
|
Ok(ev_function_call(call_id, "exec_command", &args_str))
|
|
}
|
|
|
|
#[derive(Clone)]
|
|
enum Expectation {
|
|
FileCreated {
|
|
target: TargetPath,
|
|
content: &'static str,
|
|
},
|
|
FileCreatedNoExitCode {
|
|
target: TargetPath,
|
|
content: &'static str,
|
|
},
|
|
PatchApplied {
|
|
target: TargetPath,
|
|
content: &'static str,
|
|
},
|
|
FileNotCreated {
|
|
target: TargetPath,
|
|
message_contains: &'static [&'static str],
|
|
},
|
|
NetworkSuccess {
|
|
body_contains: &'static str,
|
|
},
|
|
NetworkSuccessNoExitCode {
|
|
body_contains: &'static str,
|
|
},
|
|
NetworkFailure {
|
|
expect_tag: &'static str,
|
|
},
|
|
CommandSuccess {
|
|
stdout_contains: &'static str,
|
|
},
|
|
CommandSuccessNoExitCode {
|
|
stdout_contains: &'static str,
|
|
},
|
|
CommandFailure {
|
|
output_contains: &'static str,
|
|
},
|
|
}
|
|
|
|
impl Expectation {
|
|
fn verify(&self, test: &TestCodex, result: &CommandResult) -> Result<()> {
|
|
match self {
|
|
Expectation::FileCreated { target, content } => {
|
|
let (path, _) = target.resolve_for_patch(test);
|
|
assert_eq!(
|
|
result.exit_code,
|
|
Some(0),
|
|
"expected successful exit for {path:?}"
|
|
);
|
|
assert!(
|
|
result.stdout.contains(content),
|
|
"stdout missing {content:?}: {}",
|
|
result.stdout
|
|
);
|
|
let file_contents = fs::read_to_string(&path)?;
|
|
assert!(
|
|
file_contents.contains(content),
|
|
"file contents missing {content:?}: {file_contents}"
|
|
);
|
|
let _ = fs::remove_file(path);
|
|
}
|
|
Expectation::FileCreatedNoExitCode { target, content } => {
|
|
let (path, _) = target.resolve_for_patch(test);
|
|
assert!(
|
|
result.exit_code.is_none() || result.exit_code == Some(0),
|
|
"expected no exit code for {path:?}",
|
|
);
|
|
assert!(
|
|
result.stdout.contains(content),
|
|
"stdout missing {content:?}: {}",
|
|
result.stdout
|
|
);
|
|
let file_contents = fs::read_to_string(&path)?;
|
|
assert!(
|
|
file_contents.contains(content),
|
|
"file contents missing {content:?}: {file_contents}"
|
|
);
|
|
let _ = fs::remove_file(path);
|
|
}
|
|
Expectation::PatchApplied { target, content } => {
|
|
let (path, _) = target.resolve_for_patch(test);
|
|
match result.exit_code {
|
|
Some(0) | None => {
|
|
if result.exit_code.is_none() {
|
|
assert!(
|
|
result.stdout.contains("Success."),
|
|
"patch output missing success indicator: {}",
|
|
result.stdout
|
|
);
|
|
}
|
|
}
|
|
Some(code) => panic!(
|
|
"expected successful patch exit for {:?}, got {code} with stdout {}",
|
|
path, result.stdout
|
|
),
|
|
}
|
|
let file_contents = fs::read_to_string(&path)?;
|
|
assert!(
|
|
file_contents.contains(content),
|
|
"patched file missing {content:?}: {file_contents}"
|
|
);
|
|
let _ = fs::remove_file(path);
|
|
}
|
|
Expectation::FileNotCreated {
|
|
target,
|
|
message_contains,
|
|
} => {
|
|
let (path, _) = target.resolve_for_patch(test);
|
|
assert_ne!(
|
|
result.exit_code,
|
|
Some(0),
|
|
"expected non-zero exit for {path:?}"
|
|
);
|
|
for needle in *message_contains {
|
|
if needle.contains('|') {
|
|
let options: Vec<&str> = needle.split('|').collect();
|
|
let matches_any =
|
|
options.iter().any(|option| result.stdout.contains(option));
|
|
assert!(
|
|
matches_any,
|
|
"stdout missing one of {options:?}: {}",
|
|
result.stdout
|
|
);
|
|
} else {
|
|
assert!(
|
|
result.stdout.contains(needle),
|
|
"stdout missing {needle:?}: {}",
|
|
result.stdout
|
|
);
|
|
}
|
|
}
|
|
assert!(
|
|
!path.exists(),
|
|
"command should not create {path:?}, but file exists"
|
|
);
|
|
}
|
|
Expectation::NetworkSuccess { body_contains } => {
|
|
assert_eq!(
|
|
result.exit_code,
|
|
Some(0),
|
|
"expected successful network exit: {}",
|
|
result.stdout
|
|
);
|
|
assert!(
|
|
result.stdout.contains("OK:"),
|
|
"stdout missing OK prefix: {}",
|
|
result.stdout
|
|
);
|
|
assert!(
|
|
result.stdout.contains(body_contains),
|
|
"stdout missing body text {body_contains:?}: {}",
|
|
result.stdout
|
|
);
|
|
}
|
|
Expectation::NetworkSuccessNoExitCode { body_contains } => {
|
|
assert!(
|
|
result.exit_code.is_none() || result.exit_code == Some(0),
|
|
"expected no exit code for successful network call: {}",
|
|
result.stdout
|
|
);
|
|
assert!(
|
|
result.stdout.contains("OK:"),
|
|
"stdout missing OK prefix: {}",
|
|
result.stdout
|
|
);
|
|
assert!(
|
|
result.stdout.contains(body_contains),
|
|
"stdout missing body text {body_contains:?}: {}",
|
|
result.stdout
|
|
);
|
|
}
|
|
Expectation::NetworkFailure { expect_tag } => {
|
|
assert_ne!(
|
|
result.exit_code,
|
|
Some(0),
|
|
"expected non-zero exit for network failure: {}",
|
|
result.stdout
|
|
);
|
|
assert!(
|
|
result.stdout.contains("ERR:"),
|
|
"stdout missing ERR prefix: {}",
|
|
result.stdout
|
|
);
|
|
assert!(
|
|
result.stdout.contains(expect_tag),
|
|
"stdout missing expected tag {expect_tag:?}: {}",
|
|
result.stdout
|
|
);
|
|
}
|
|
Expectation::CommandSuccess { stdout_contains } => {
|
|
assert_eq!(
|
|
result.exit_code,
|
|
Some(0),
|
|
"expected successful trusted command exit: {}",
|
|
result.stdout
|
|
);
|
|
assert!(
|
|
result.stdout.contains(stdout_contains),
|
|
"trusted command stdout missing {stdout_contains:?}: {}",
|
|
result.stdout
|
|
);
|
|
}
|
|
Expectation::CommandSuccessNoExitCode { stdout_contains } => {
|
|
assert!(
|
|
result.exit_code.is_none() || result.exit_code == Some(0),
|
|
"expected no exit code for trusted command: {}",
|
|
result.stdout
|
|
);
|
|
assert!(
|
|
result.stdout.contains(stdout_contains),
|
|
"trusted command stdout missing {stdout_contains:?}: {}",
|
|
result.stdout
|
|
);
|
|
}
|
|
Expectation::CommandFailure { output_contains } => {
|
|
assert_ne!(
|
|
result.exit_code,
|
|
Some(0),
|
|
"expected non-zero exit for command failure: {}",
|
|
result.stdout
|
|
);
|
|
assert!(
|
|
result.stdout.contains(output_contains),
|
|
"command failure stderr missing {output_contains:?}: {}",
|
|
result.stdout
|
|
);
|
|
}
|
|
}
|
|
Ok(())
|
|
}
|
|
}
|
|
|
|
#[derive(Clone)]
|
|
enum Outcome {
|
|
Auto,
|
|
ExecApproval {
|
|
decision: ReviewDecision,
|
|
expected_reason: Option<&'static str>,
|
|
},
|
|
PatchApproval {
|
|
decision: ReviewDecision,
|
|
expected_reason: Option<&'static str>,
|
|
},
|
|
}
|
|
|
|
#[derive(Clone)]
|
|
struct ScenarioSpec {
|
|
name: &'static str,
|
|
approval_policy: AskForApproval,
|
|
sandbox_policy: SandboxPolicy,
|
|
action: ActionKind,
|
|
sandbox_permissions: SandboxPermissions,
|
|
features: Vec<Feature>,
|
|
model_override: Option<&'static str>,
|
|
outcome: Outcome,
|
|
expectation: Expectation,
|
|
}
|
|
|
|
struct CommandResult {
|
|
exit_code: Option<i64>,
|
|
stdout: String,
|
|
}
|
|
|
|
async fn submit_turn(
|
|
test: &TestCodex,
|
|
prompt: &str,
|
|
approval_policy: AskForApproval,
|
|
sandbox_policy: SandboxPolicy,
|
|
) -> Result<()> {
|
|
let session_model = test.session_configured.model.clone();
|
|
|
|
test.codex
|
|
.submit(Op::UserTurn {
|
|
items: vec![UserInput::Text {
|
|
text: prompt.into(),
|
|
text_elements: Vec::new(),
|
|
}],
|
|
final_output_json_schema: None,
|
|
cwd: test.cwd.path().to_path_buf(),
|
|
approval_policy,
|
|
approvals_reviewer: None,
|
|
sandbox_policy,
|
|
model: session_model,
|
|
effort: None,
|
|
summary: None,
|
|
service_tier: None,
|
|
collaboration_mode: None,
|
|
personality: None,
|
|
})
|
|
.await?;
|
|
|
|
Ok(())
|
|
}
|
|
|
|
fn parse_result(item: &Value) -> CommandResult {
|
|
let output_str = item
|
|
.get("output")
|
|
.and_then(Value::as_str)
|
|
.expect("shell output payload");
|
|
match serde_json::from_str::<Value>(output_str) {
|
|
Ok(parsed) => {
|
|
let exit_code = parsed["metadata"]["exit_code"].as_i64();
|
|
let stdout = parsed["output"].as_str().unwrap_or_default().to_string();
|
|
CommandResult { exit_code, stdout }
|
|
}
|
|
Err(_) => {
|
|
let structured = Regex::new(r"(?s)^Exit code:\s*(-?\d+).*?Output:\n(.*)$").unwrap();
|
|
let regex =
|
|
Regex::new(r"(?s)^.*?Process exited with code (\d+)\n.*?Output:\n(.*)$").unwrap();
|
|
// parse freeform output
|
|
if let Some(captures) = structured.captures(output_str) {
|
|
let exit_code = captures.get(1).unwrap().as_str().parse::<i64>().unwrap();
|
|
let output = captures.get(2).unwrap().as_str();
|
|
CommandResult {
|
|
exit_code: Some(exit_code),
|
|
stdout: output.to_string(),
|
|
}
|
|
} else if let Some(captures) = regex.captures(output_str) {
|
|
let exit_code = captures.get(1).unwrap().as_str().parse::<i64>().unwrap();
|
|
let output = captures.get(2).unwrap().as_str();
|
|
CommandResult {
|
|
exit_code: Some(exit_code),
|
|
stdout: output.to_string(),
|
|
}
|
|
} else {
|
|
CommandResult {
|
|
exit_code: None,
|
|
stdout: output_str.to_string(),
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
async fn expect_exec_approval(
|
|
test: &TestCodex,
|
|
expected_command: &str,
|
|
) -> ExecApprovalRequestEvent {
|
|
let event = wait_for_event(&test.codex, |event| {
|
|
matches!(
|
|
event,
|
|
EventMsg::ExecApprovalRequest(_) | EventMsg::TurnComplete(_)
|
|
)
|
|
})
|
|
.await;
|
|
|
|
match event {
|
|
EventMsg::ExecApprovalRequest(approval) => {
|
|
let last_arg = approval
|
|
.command
|
|
.last()
|
|
.map(std::string::String::as_str)
|
|
.unwrap_or_default();
|
|
assert_eq!(last_arg, expected_command);
|
|
approval
|
|
}
|
|
EventMsg::TurnComplete(_) => panic!("expected approval request before completion"),
|
|
other => panic!("unexpected event: {other:?}"),
|
|
}
|
|
}
|
|
|
|
async fn expect_patch_approval(
|
|
test: &TestCodex,
|
|
expected_call_id: &str,
|
|
) -> ApplyPatchApprovalRequestEvent {
|
|
let event = wait_for_event(&test.codex, |event| {
|
|
matches!(
|
|
event,
|
|
EventMsg::ApplyPatchApprovalRequest(_) | EventMsg::TurnComplete(_)
|
|
)
|
|
})
|
|
.await;
|
|
|
|
match event {
|
|
EventMsg::ApplyPatchApprovalRequest(approval) => {
|
|
assert_eq!(approval.call_id, expected_call_id);
|
|
approval
|
|
}
|
|
EventMsg::TurnComplete(_) => panic!("expected patch approval request before completion"),
|
|
other => panic!("unexpected event: {other:?}"),
|
|
}
|
|
}
|
|
|
|
async fn wait_for_completion_without_approval(test: &TestCodex) {
|
|
let event = wait_for_event(&test.codex, |event| {
|
|
matches!(
|
|
event,
|
|
EventMsg::ExecApprovalRequest(_) | EventMsg::TurnComplete(_)
|
|
)
|
|
})
|
|
.await;
|
|
|
|
match event {
|
|
EventMsg::TurnComplete(_) => {}
|
|
EventMsg::ExecApprovalRequest(event) => {
|
|
panic!("unexpected approval request: {:?}", event.command)
|
|
}
|
|
other => panic!("unexpected event: {other:?}"),
|
|
}
|
|
}
|
|
|
|
async fn wait_for_completion(test: &TestCodex) {
|
|
wait_for_event(&test.codex, |event| {
|
|
matches!(event, EventMsg::TurnComplete(_))
|
|
})
|
|
.await;
|
|
}
|
|
|
|
fn body_contains(req: &Request, text: &str) -> bool {
|
|
let is_zstd = req
|
|
.headers
|
|
.get("content-encoding")
|
|
.and_then(|value| value.to_str().ok())
|
|
.is_some_and(|value| {
|
|
value
|
|
.split(',')
|
|
.any(|entry| entry.trim().eq_ignore_ascii_case("zstd"))
|
|
});
|
|
let bytes = if is_zstd {
|
|
zstd::stream::decode_all(std::io::Cursor::new(&req.body)).ok()
|
|
} else {
|
|
Some(req.body.clone())
|
|
};
|
|
bytes
|
|
.and_then(|body| String::from_utf8(body).ok())
|
|
.is_some_and(|body| body.contains(text))
|
|
}
|
|
|
|
async fn wait_for_spawned_thread(test: &TestCodex) -> Result<Arc<CodexThread>> {
|
|
let deadline = tokio::time::Instant::now() + Duration::from_secs(2);
|
|
loop {
|
|
let ids = test.thread_manager.list_thread_ids().await;
|
|
if let Some(thread_id) = ids
|
|
.iter()
|
|
.find(|id| **id != test.session_configured.session_id)
|
|
{
|
|
return test
|
|
.thread_manager
|
|
.get_thread(*thread_id)
|
|
.await
|
|
.map_err(anyhow::Error::from);
|
|
}
|
|
if tokio::time::Instant::now() >= deadline {
|
|
anyhow::bail!("timed out waiting for spawned thread");
|
|
}
|
|
tokio::time::sleep(Duration::from_millis(10)).await;
|
|
}
|
|
}
|
|
|
|
fn scenarios() -> Vec<ScenarioSpec> {
|
|
use AskForApproval::*;
|
|
|
|
let workspace_write = |network_access| SandboxPolicy::WorkspaceWrite {
|
|
writable_roots: vec![],
|
|
read_only_access: Default::default(),
|
|
network_access,
|
|
exclude_tmpdir_env_var: false,
|
|
exclude_slash_tmp: false,
|
|
};
|
|
|
|
vec![
|
|
ScenarioSpec {
|
|
name: "danger_full_access_on_request_allows_outside_write",
|
|
approval_policy: OnRequest,
|
|
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
|
action: ActionKind::WriteFile {
|
|
target: TargetPath::OutsideWorkspace("dfa_on_request.txt"),
|
|
content: "danger-on-request",
|
|
},
|
|
sandbox_permissions: SandboxPermissions::UseDefault,
|
|
features: vec![],
|
|
model_override: Some("gpt-5"),
|
|
outcome: Outcome::Auto,
|
|
expectation: Expectation::FileCreated {
|
|
target: TargetPath::OutsideWorkspace("dfa_on_request.txt"),
|
|
content: "danger-on-request",
|
|
},
|
|
},
|
|
ScenarioSpec {
|
|
name: "danger_full_access_on_request_allows_outside_write_gpt_5_1_no_exit",
|
|
approval_policy: OnRequest,
|
|
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
|
action: ActionKind::WriteFile {
|
|
target: TargetPath::OutsideWorkspace("dfa_on_request_5_1.txt"),
|
|
content: "danger-on-request",
|
|
},
|
|
sandbox_permissions: SandboxPermissions::UseDefault,
|
|
features: vec![],
|
|
model_override: Some("gpt-5.1"),
|
|
outcome: Outcome::Auto,
|
|
expectation: Expectation::FileCreated {
|
|
target: TargetPath::OutsideWorkspace("dfa_on_request_5_1.txt"),
|
|
content: "danger-on-request",
|
|
},
|
|
},
|
|
ScenarioSpec {
|
|
name: "danger_full_access_on_request_allows_network",
|
|
approval_policy: OnRequest,
|
|
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
|
action: ActionKind::FetchUrlNoProxy {
|
|
endpoint: "/dfa/network",
|
|
response_body: "danger-network-ok",
|
|
},
|
|
sandbox_permissions: SandboxPermissions::UseDefault,
|
|
features: vec![],
|
|
model_override: Some("gpt-5"),
|
|
outcome: Outcome::Auto,
|
|
expectation: Expectation::NetworkSuccess {
|
|
body_contains: "danger-network-ok",
|
|
},
|
|
},
|
|
ScenarioSpec {
|
|
name: "danger_full_access_on_request_allows_network_gpt_5_1_no_exit",
|
|
approval_policy: OnRequest,
|
|
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
|
action: ActionKind::FetchUrlNoProxy {
|
|
endpoint: "/dfa/network",
|
|
response_body: "danger-network-ok",
|
|
},
|
|
sandbox_permissions: SandboxPermissions::UseDefault,
|
|
features: vec![],
|
|
model_override: Some("gpt-5.1"),
|
|
outcome: Outcome::Auto,
|
|
expectation: Expectation::NetworkSuccessNoExitCode {
|
|
body_contains: "danger-network-ok",
|
|
},
|
|
},
|
|
ScenarioSpec {
|
|
name: "trusted_command_unless_trusted_runs_without_prompt",
|
|
approval_policy: UnlessTrusted,
|
|
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
|
action: ActionKind::RunCommand {
|
|
command: "echo trusted-unless",
|
|
},
|
|
sandbox_permissions: SandboxPermissions::UseDefault,
|
|
features: vec![],
|
|
model_override: Some("gpt-5"),
|
|
outcome: Outcome::Auto,
|
|
expectation: Expectation::CommandSuccess {
|
|
stdout_contains: "trusted-unless",
|
|
},
|
|
},
|
|
ScenarioSpec {
|
|
name: "trusted_command_unless_trusted_runs_without_prompt_gpt_5_1_no_exit",
|
|
approval_policy: UnlessTrusted,
|
|
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
|
action: ActionKind::RunCommand {
|
|
command: "echo trusted-unless",
|
|
},
|
|
sandbox_permissions: SandboxPermissions::UseDefault,
|
|
features: vec![],
|
|
model_override: Some("gpt-5.1"),
|
|
outcome: Outcome::Auto,
|
|
expectation: Expectation::CommandSuccessNoExitCode {
|
|
stdout_contains: "trusted-unless",
|
|
},
|
|
},
|
|
ScenarioSpec {
|
|
name: "cat_redirect_unless_trusted_requires_approval",
|
|
approval_policy: UnlessTrusted,
|
|
sandbox_policy: workspace_write(false),
|
|
action: ActionKind::RunCommand {
|
|
command: r#"cat < "hello" > /var/test.txt"#,
|
|
},
|
|
sandbox_permissions: SandboxPermissions::UseDefault,
|
|
features: vec![],
|
|
model_override: Some("gpt-5"),
|
|
outcome: Outcome::ExecApproval {
|
|
decision: ReviewDecision::Denied,
|
|
expected_reason: None,
|
|
},
|
|
expectation: Expectation::CommandFailure {
|
|
output_contains: "rejected by user",
|
|
},
|
|
},
|
|
ScenarioSpec {
|
|
name: "cat_redirect_on_request_requires_approval",
|
|
approval_policy: OnRequest,
|
|
sandbox_policy: workspace_write(false),
|
|
action: ActionKind::RunCommand {
|
|
command: r#"cat < "hello" > /var/test.txt"#,
|
|
},
|
|
sandbox_permissions: SandboxPermissions::RequireEscalated,
|
|
features: vec![],
|
|
model_override: Some("gpt-5"),
|
|
outcome: Outcome::ExecApproval {
|
|
decision: ReviewDecision::Denied,
|
|
expected_reason: None,
|
|
},
|
|
expectation: Expectation::CommandFailure {
|
|
output_contains: "rejected by user",
|
|
},
|
|
},
|
|
ScenarioSpec {
|
|
name: "danger_full_access_on_failure_allows_outside_write",
|
|
approval_policy: OnFailure,
|
|
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
|
action: ActionKind::WriteFile {
|
|
target: TargetPath::OutsideWorkspace("dfa_on_failure.txt"),
|
|
content: "danger-on-failure",
|
|
},
|
|
sandbox_permissions: SandboxPermissions::UseDefault,
|
|
features: vec![],
|
|
model_override: Some("gpt-5"),
|
|
outcome: Outcome::Auto,
|
|
expectation: Expectation::FileCreated {
|
|
target: TargetPath::OutsideWorkspace("dfa_on_failure.txt"),
|
|
content: "danger-on-failure",
|
|
},
|
|
},
|
|
ScenarioSpec {
|
|
name: "danger_full_access_on_failure_allows_outside_write_gpt_5_1_no_exit",
|
|
approval_policy: OnFailure,
|
|
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
|
action: ActionKind::WriteFile {
|
|
target: TargetPath::OutsideWorkspace("dfa_on_failure_5_1.txt"),
|
|
content: "danger-on-failure",
|
|
},
|
|
sandbox_permissions: SandboxPermissions::UseDefault,
|
|
features: vec![],
|
|
model_override: Some("gpt-5.1"),
|
|
outcome: Outcome::Auto,
|
|
expectation: Expectation::FileCreatedNoExitCode {
|
|
target: TargetPath::OutsideWorkspace("dfa_on_failure_5_1.txt"),
|
|
content: "danger-on-failure",
|
|
},
|
|
},
|
|
ScenarioSpec {
|
|
name: "danger_full_access_unless_trusted_requests_approval",
|
|
approval_policy: UnlessTrusted,
|
|
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
|
action: ActionKind::WriteFile {
|
|
target: TargetPath::OutsideWorkspace("dfa_unless_trusted.txt"),
|
|
content: "danger-unless-trusted",
|
|
},
|
|
sandbox_permissions: SandboxPermissions::UseDefault,
|
|
features: vec![],
|
|
model_override: Some("gpt-5"),
|
|
outcome: Outcome::ExecApproval {
|
|
decision: ReviewDecision::Approved,
|
|
expected_reason: None,
|
|
},
|
|
expectation: Expectation::FileCreated {
|
|
target: TargetPath::OutsideWorkspace("dfa_unless_trusted.txt"),
|
|
content: "danger-unless-trusted",
|
|
},
|
|
},
|
|
ScenarioSpec {
|
|
name: "danger_full_access_unless_trusted_requests_approval_gpt_5_1_no_exit",
|
|
approval_policy: UnlessTrusted,
|
|
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
|
action: ActionKind::WriteFile {
|
|
target: TargetPath::OutsideWorkspace("dfa_unless_trusted_5_1.txt"),
|
|
content: "danger-unless-trusted",
|
|
},
|
|
sandbox_permissions: SandboxPermissions::UseDefault,
|
|
features: vec![],
|
|
model_override: Some("gpt-5.1"),
|
|
outcome: Outcome::ExecApproval {
|
|
decision: ReviewDecision::Approved,
|
|
expected_reason: None,
|
|
},
|
|
expectation: Expectation::FileCreatedNoExitCode {
|
|
target: TargetPath::OutsideWorkspace("dfa_unless_trusted_5_1.txt"),
|
|
content: "danger-unless-trusted",
|
|
},
|
|
},
|
|
ScenarioSpec {
|
|
name: "danger_full_access_never_allows_outside_write",
|
|
approval_policy: Never,
|
|
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
|
action: ActionKind::WriteFile {
|
|
target: TargetPath::OutsideWorkspace("dfa_never.txt"),
|
|
content: "danger-never",
|
|
},
|
|
sandbox_permissions: SandboxPermissions::UseDefault,
|
|
features: vec![],
|
|
model_override: Some("gpt-5"),
|
|
outcome: Outcome::Auto,
|
|
expectation: Expectation::FileCreated {
|
|
target: TargetPath::OutsideWorkspace("dfa_never.txt"),
|
|
content: "danger-never",
|
|
},
|
|
},
|
|
ScenarioSpec {
|
|
name: "danger_full_access_never_allows_outside_write_gpt_5_1_no_exit",
|
|
approval_policy: Never,
|
|
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
|
action: ActionKind::WriteFile {
|
|
target: TargetPath::OutsideWorkspace("dfa_never_5_1.txt"),
|
|
content: "danger-never",
|
|
},
|
|
sandbox_permissions: SandboxPermissions::UseDefault,
|
|
features: vec![],
|
|
model_override: Some("gpt-5.1"),
|
|
outcome: Outcome::Auto,
|
|
expectation: Expectation::FileCreatedNoExitCode {
|
|
target: TargetPath::OutsideWorkspace("dfa_never_5_1.txt"),
|
|
content: "danger-never",
|
|
},
|
|
},
|
|
ScenarioSpec {
|
|
name: "read_only_on_request_requires_approval",
|
|
approval_policy: OnRequest,
|
|
sandbox_policy: SandboxPolicy::new_read_only_policy(),
|
|
action: ActionKind::WriteFile {
|
|
target: TargetPath::Workspace("ro_on_request.txt"),
|
|
content: "read-only-approval",
|
|
},
|
|
sandbox_permissions: SandboxPermissions::RequireEscalated,
|
|
features: vec![],
|
|
model_override: Some("gpt-5"),
|
|
outcome: Outcome::ExecApproval {
|
|
decision: ReviewDecision::Approved,
|
|
expected_reason: None,
|
|
},
|
|
expectation: Expectation::FileCreated {
|
|
target: TargetPath::Workspace("ro_on_request.txt"),
|
|
content: "read-only-approval",
|
|
},
|
|
},
|
|
ScenarioSpec {
|
|
name: "read_only_on_request_requires_approval_gpt_5_1_no_exit",
|
|
approval_policy: OnRequest,
|
|
sandbox_policy: SandboxPolicy::new_read_only_policy(),
|
|
action: ActionKind::WriteFile {
|
|
target: TargetPath::Workspace("ro_on_request_5_1.txt"),
|
|
content: "read-only-approval",
|
|
},
|
|
sandbox_permissions: SandboxPermissions::RequireEscalated,
|
|
features: vec![],
|
|
model_override: Some("gpt-5.1"),
|
|
outcome: Outcome::ExecApproval {
|
|
decision: ReviewDecision::Approved,
|
|
expected_reason: None,
|
|
},
|
|
expectation: Expectation::FileCreatedNoExitCode {
|
|
target: TargetPath::Workspace("ro_on_request_5_1.txt"),
|
|
content: "read-only-approval",
|
|
},
|
|
},
|
|
ScenarioSpec {
|
|
name: "trusted_command_on_request_read_only_runs_without_prompt",
|
|
approval_policy: OnRequest,
|
|
sandbox_policy: SandboxPolicy::new_read_only_policy(),
|
|
action: ActionKind::RunCommand {
|
|
command: "echo trusted-read-only",
|
|
},
|
|
sandbox_permissions: SandboxPermissions::UseDefault,
|
|
features: vec![],
|
|
model_override: Some("gpt-5"),
|
|
outcome: Outcome::Auto,
|
|
expectation: Expectation::CommandSuccess {
|
|
stdout_contains: "trusted-read-only",
|
|
},
|
|
},
|
|
ScenarioSpec {
|
|
name: "trusted_command_on_request_read_only_runs_without_prompt_gpt_5_1_no_exit",
|
|
approval_policy: OnRequest,
|
|
sandbox_policy: SandboxPolicy::new_read_only_policy(),
|
|
action: ActionKind::RunCommand {
|
|
command: "echo trusted-read-only",
|
|
},
|
|
sandbox_permissions: SandboxPermissions::UseDefault,
|
|
features: vec![],
|
|
model_override: Some("gpt-5.1"),
|
|
outcome: Outcome::Auto,
|
|
expectation: Expectation::CommandSuccessNoExitCode {
|
|
stdout_contains: "trusted-read-only",
|
|
},
|
|
},
|
|
ScenarioSpec {
|
|
name: "read_only_on_request_blocks_network",
|
|
approval_policy: OnRequest,
|
|
sandbox_policy: SandboxPolicy::new_read_only_policy(),
|
|
action: ActionKind::FetchUrl {
|
|
endpoint: "/ro/network-blocked",
|
|
response_body: "should-not-see",
|
|
},
|
|
sandbox_permissions: SandboxPermissions::UseDefault,
|
|
features: vec![],
|
|
model_override: None,
|
|
outcome: Outcome::Auto,
|
|
expectation: Expectation::NetworkFailure { expect_tag: "ERR:" },
|
|
},
|
|
ScenarioSpec {
|
|
name: "read_only_on_request_denied_blocks_execution",
|
|
approval_policy: OnRequest,
|
|
sandbox_policy: SandboxPolicy::new_read_only_policy(),
|
|
action: ActionKind::WriteFile {
|
|
target: TargetPath::Workspace("ro_on_request_denied.txt"),
|
|
content: "should-not-write",
|
|
},
|
|
sandbox_permissions: SandboxPermissions::RequireEscalated,
|
|
features: vec![],
|
|
model_override: None,
|
|
outcome: Outcome::ExecApproval {
|
|
decision: ReviewDecision::Denied,
|
|
expected_reason: None,
|
|
},
|
|
expectation: Expectation::FileNotCreated {
|
|
target: TargetPath::Workspace("ro_on_request_denied.txt"),
|
|
message_contains: &["exec command rejected by user"],
|
|
},
|
|
},
|
|
#[cfg(not(target_os = "linux"))] // TODO (pakrym): figure out why linux behaves differently
|
|
ScenarioSpec {
|
|
name: "read_only_on_failure_escalates_after_sandbox_error",
|
|
approval_policy: OnFailure,
|
|
sandbox_policy: SandboxPolicy::new_read_only_policy(),
|
|
action: ActionKind::WriteFile {
|
|
target: TargetPath::Workspace("ro_on_failure.txt"),
|
|
content: "read-only-on-failure",
|
|
},
|
|
sandbox_permissions: SandboxPermissions::UseDefault,
|
|
features: vec![],
|
|
model_override: Some("gpt-5"),
|
|
outcome: Outcome::ExecApproval {
|
|
decision: ReviewDecision::Approved,
|
|
expected_reason: Some("command failed; retry without sandbox?"),
|
|
},
|
|
expectation: Expectation::FileCreated {
|
|
target: TargetPath::Workspace("ro_on_failure.txt"),
|
|
content: "read-only-on-failure",
|
|
},
|
|
},
|
|
#[cfg(not(target_os = "linux"))]
|
|
ScenarioSpec {
|
|
name: "read_only_on_failure_escalates_after_sandbox_error_gpt_5_1_no_exit",
|
|
approval_policy: OnFailure,
|
|
sandbox_policy: SandboxPolicy::new_read_only_policy(),
|
|
action: ActionKind::WriteFile {
|
|
target: TargetPath::Workspace("ro_on_failure_5_1.txt"),
|
|
content: "read-only-on-failure",
|
|
},
|
|
sandbox_permissions: SandboxPermissions::UseDefault,
|
|
features: vec![],
|
|
model_override: Some("gpt-5.1"),
|
|
outcome: Outcome::ExecApproval {
|
|
decision: ReviewDecision::Approved,
|
|
expected_reason: Some("command failed; retry without sandbox?"),
|
|
},
|
|
expectation: Expectation::FileCreatedNoExitCode {
|
|
target: TargetPath::Workspace("ro_on_failure_5_1.txt"),
|
|
content: "read-only-on-failure",
|
|
},
|
|
},
|
|
ScenarioSpec {
|
|
name: "read_only_on_request_network_escalates_when_approved",
|
|
approval_policy: OnRequest,
|
|
sandbox_policy: SandboxPolicy::new_read_only_policy(),
|
|
action: ActionKind::FetchUrl {
|
|
endpoint: "/ro/network-approved",
|
|
response_body: "read-only-network-ok",
|
|
},
|
|
sandbox_permissions: SandboxPermissions::RequireEscalated,
|
|
features: vec![],
|
|
model_override: Some("gpt-5"),
|
|
outcome: Outcome::ExecApproval {
|
|
decision: ReviewDecision::Approved,
|
|
expected_reason: None,
|
|
},
|
|
expectation: Expectation::NetworkSuccess {
|
|
body_contains: "read-only-network-ok",
|
|
},
|
|
},
|
|
ScenarioSpec {
|
|
name: "read_only_on_request_network_escalates_when_approved_gpt_5_1_no_exit",
|
|
approval_policy: OnRequest,
|
|
sandbox_policy: SandboxPolicy::new_read_only_policy(),
|
|
action: ActionKind::FetchUrl {
|
|
endpoint: "/ro/network-approved",
|
|
response_body: "read-only-network-ok",
|
|
},
|
|
sandbox_permissions: SandboxPermissions::RequireEscalated,
|
|
features: vec![],
|
|
model_override: Some("gpt-5.1"),
|
|
outcome: Outcome::ExecApproval {
|
|
decision: ReviewDecision::Approved,
|
|
expected_reason: None,
|
|
},
|
|
expectation: Expectation::NetworkSuccessNoExitCode {
|
|
body_contains: "read-only-network-ok",
|
|
},
|
|
},
|
|
ScenarioSpec {
|
|
name: "apply_patch_shell_command_requires_patch_approval",
|
|
approval_policy: UnlessTrusted,
|
|
sandbox_policy: workspace_write(false),
|
|
action: ActionKind::ApplyPatchShell {
|
|
target: TargetPath::Workspace("apply_patch_shell.txt"),
|
|
content: "shell-apply-patch",
|
|
},
|
|
sandbox_permissions: SandboxPermissions::UseDefault,
|
|
features: vec![],
|
|
model_override: None,
|
|
outcome: Outcome::PatchApproval {
|
|
decision: ReviewDecision::Approved,
|
|
expected_reason: None,
|
|
},
|
|
expectation: Expectation::PatchApplied {
|
|
target: TargetPath::Workspace("apply_patch_shell.txt"),
|
|
content: "shell-apply-patch",
|
|
},
|
|
},
|
|
ScenarioSpec {
|
|
name: "apply_patch_function_auto_inside_workspace",
|
|
approval_policy: OnRequest,
|
|
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
|
action: ActionKind::ApplyPatchFunction {
|
|
target: TargetPath::Workspace("apply_patch_function.txt"),
|
|
content: "function-apply-patch",
|
|
},
|
|
sandbox_permissions: SandboxPermissions::UseDefault,
|
|
features: vec![],
|
|
model_override: Some("gpt-5.1-codex"),
|
|
outcome: Outcome::Auto,
|
|
expectation: Expectation::PatchApplied {
|
|
target: TargetPath::Workspace("apply_patch_function.txt"),
|
|
content: "function-apply-patch",
|
|
},
|
|
},
|
|
ScenarioSpec {
|
|
name: "apply_patch_function_danger_allows_outside_workspace",
|
|
approval_policy: OnRequest,
|
|
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
|
action: ActionKind::ApplyPatchFunction {
|
|
target: TargetPath::OutsideWorkspace("apply_patch_function_danger.txt"),
|
|
content: "function-patch-danger",
|
|
},
|
|
sandbox_permissions: SandboxPermissions::UseDefault,
|
|
features: vec![Feature::ApplyPatchFreeform],
|
|
model_override: Some("gpt-5.1-codex"),
|
|
outcome: Outcome::Auto,
|
|
expectation: Expectation::PatchApplied {
|
|
target: TargetPath::OutsideWorkspace("apply_patch_function_danger.txt"),
|
|
content: "function-patch-danger",
|
|
},
|
|
},
|
|
ScenarioSpec {
|
|
name: "apply_patch_function_outside_requires_patch_approval",
|
|
approval_policy: OnRequest,
|
|
sandbox_policy: workspace_write(false),
|
|
action: ActionKind::ApplyPatchFunction {
|
|
target: TargetPath::OutsideWorkspace("apply_patch_function_outside.txt"),
|
|
content: "function-patch-outside",
|
|
},
|
|
sandbox_permissions: SandboxPermissions::UseDefault,
|
|
features: vec![],
|
|
model_override: Some("gpt-5.1-codex"),
|
|
outcome: Outcome::PatchApproval {
|
|
decision: ReviewDecision::Approved,
|
|
expected_reason: None,
|
|
},
|
|
expectation: Expectation::PatchApplied {
|
|
target: TargetPath::OutsideWorkspace("apply_patch_function_outside.txt"),
|
|
content: "function-patch-outside",
|
|
},
|
|
},
|
|
ScenarioSpec {
|
|
name: "apply_patch_function_outside_denied_blocks_patch",
|
|
approval_policy: OnRequest,
|
|
sandbox_policy: workspace_write(false),
|
|
action: ActionKind::ApplyPatchFunction {
|
|
target: TargetPath::OutsideWorkspace("apply_patch_function_outside_denied.txt"),
|
|
content: "function-patch-outside-denied",
|
|
},
|
|
sandbox_permissions: SandboxPermissions::UseDefault,
|
|
features: vec![],
|
|
model_override: Some("gpt-5.1-codex"),
|
|
outcome: Outcome::PatchApproval {
|
|
decision: ReviewDecision::Denied,
|
|
expected_reason: None,
|
|
},
|
|
expectation: Expectation::FileNotCreated {
|
|
target: TargetPath::OutsideWorkspace("apply_patch_function_outside_denied.txt"),
|
|
message_contains: &["patch rejected by user"],
|
|
},
|
|
},
|
|
ScenarioSpec {
|
|
name: "apply_patch_shell_command_outside_requires_patch_approval",
|
|
approval_policy: OnRequest,
|
|
sandbox_policy: workspace_write(false),
|
|
action: ActionKind::ApplyPatchShell {
|
|
target: TargetPath::OutsideWorkspace("apply_patch_shell_outside.txt"),
|
|
content: "shell-patch-outside",
|
|
},
|
|
sandbox_permissions: SandboxPermissions::UseDefault,
|
|
features: vec![],
|
|
model_override: None,
|
|
outcome: Outcome::PatchApproval {
|
|
decision: ReviewDecision::Approved,
|
|
expected_reason: None,
|
|
},
|
|
expectation: Expectation::PatchApplied {
|
|
target: TargetPath::OutsideWorkspace("apply_patch_shell_outside.txt"),
|
|
content: "shell-patch-outside",
|
|
},
|
|
},
|
|
ScenarioSpec {
|
|
name: "apply_patch_function_unless_trusted_requires_patch_approval",
|
|
approval_policy: UnlessTrusted,
|
|
sandbox_policy: workspace_write(false),
|
|
action: ActionKind::ApplyPatchFunction {
|
|
target: TargetPath::Workspace("apply_patch_function_unless_trusted.txt"),
|
|
content: "function-patch-unless-trusted",
|
|
},
|
|
sandbox_permissions: SandboxPermissions::UseDefault,
|
|
features: vec![],
|
|
model_override: Some("gpt-5.1-codex"),
|
|
outcome: Outcome::PatchApproval {
|
|
decision: ReviewDecision::Approved,
|
|
expected_reason: None,
|
|
},
|
|
expectation: Expectation::PatchApplied {
|
|
target: TargetPath::Workspace("apply_patch_function_unless_trusted.txt"),
|
|
content: "function-patch-unless-trusted",
|
|
},
|
|
},
|
|
ScenarioSpec {
|
|
name: "apply_patch_function_never_rejects_outside_workspace",
|
|
approval_policy: Never,
|
|
sandbox_policy: workspace_write(false),
|
|
action: ActionKind::ApplyPatchFunction {
|
|
target: TargetPath::OutsideWorkspace("apply_patch_function_never.txt"),
|
|
content: "function-patch-never",
|
|
},
|
|
sandbox_permissions: SandboxPermissions::UseDefault,
|
|
features: vec![],
|
|
model_override: Some("gpt-5.1-codex"),
|
|
outcome: Outcome::Auto,
|
|
expectation: Expectation::FileNotCreated {
|
|
target: TargetPath::OutsideWorkspace("apply_patch_function_never.txt"),
|
|
message_contains: &[
|
|
"patch rejected: writing outside of the project; rejected by user approval settings",
|
|
],
|
|
},
|
|
},
|
|
ScenarioSpec {
|
|
name: "read_only_unless_trusted_requires_approval",
|
|
approval_policy: UnlessTrusted,
|
|
sandbox_policy: SandboxPolicy::new_read_only_policy(),
|
|
action: ActionKind::WriteFile {
|
|
target: TargetPath::Workspace("ro_unless_trusted.txt"),
|
|
content: "read-only-unless-trusted",
|
|
},
|
|
sandbox_permissions: SandboxPermissions::UseDefault,
|
|
features: vec![],
|
|
model_override: Some("gpt-5"),
|
|
outcome: Outcome::ExecApproval {
|
|
decision: ReviewDecision::Approved,
|
|
expected_reason: None,
|
|
},
|
|
expectation: Expectation::FileCreated {
|
|
target: TargetPath::Workspace("ro_unless_trusted.txt"),
|
|
content: "read-only-unless-trusted",
|
|
},
|
|
},
|
|
ScenarioSpec {
|
|
name: "read_only_unless_trusted_requires_approval_gpt_5_1_no_exit",
|
|
approval_policy: UnlessTrusted,
|
|
sandbox_policy: SandboxPolicy::new_read_only_policy(),
|
|
action: ActionKind::WriteFile {
|
|
target: TargetPath::Workspace("ro_unless_trusted_5_1.txt"),
|
|
content: "read-only-unless-trusted",
|
|
},
|
|
sandbox_permissions: SandboxPermissions::UseDefault,
|
|
features: vec![],
|
|
model_override: Some("gpt-5.1"),
|
|
outcome: Outcome::ExecApproval {
|
|
decision: ReviewDecision::Approved,
|
|
expected_reason: None,
|
|
},
|
|
expectation: Expectation::FileCreatedNoExitCode {
|
|
target: TargetPath::Workspace("ro_unless_trusted_5_1.txt"),
|
|
content: "read-only-unless-trusted",
|
|
},
|
|
},
|
|
ScenarioSpec {
|
|
name: "read_only_never_reports_sandbox_failure",
|
|
approval_policy: Never,
|
|
sandbox_policy: SandboxPolicy::new_read_only_policy(),
|
|
action: ActionKind::WriteFile {
|
|
target: TargetPath::Workspace("ro_never.txt"),
|
|
content: "read-only-never",
|
|
},
|
|
sandbox_permissions: SandboxPermissions::UseDefault,
|
|
features: vec![],
|
|
model_override: None,
|
|
outcome: Outcome::Auto,
|
|
expectation: Expectation::FileNotCreated {
|
|
target: TargetPath::Workspace("ro_never.txt"),
|
|
message_contains: if cfg!(target_os = "linux") {
|
|
&["Permission denied|Read-only file system"]
|
|
} else {
|
|
&[
|
|
"Permission denied|Operation not permitted|operation not permitted|\
|
|
Read-only file system",
|
|
]
|
|
},
|
|
},
|
|
},
|
|
ScenarioSpec {
|
|
name: "trusted_command_never_runs_without_prompt",
|
|
approval_policy: Never,
|
|
sandbox_policy: SandboxPolicy::new_read_only_policy(),
|
|
action: ActionKind::RunCommand {
|
|
command: "echo trusted-never",
|
|
},
|
|
sandbox_permissions: SandboxPermissions::UseDefault,
|
|
features: vec![],
|
|
model_override: Some("gpt-5"),
|
|
outcome: Outcome::Auto,
|
|
expectation: Expectation::CommandSuccess {
|
|
stdout_contains: "trusted-never",
|
|
},
|
|
},
|
|
ScenarioSpec {
|
|
name: "workspace_write_on_request_allows_workspace_write",
|
|
approval_policy: OnRequest,
|
|
sandbox_policy: workspace_write(false),
|
|
action: ActionKind::WriteFile {
|
|
target: TargetPath::Workspace("ww_on_request.txt"),
|
|
content: "workspace-on-request",
|
|
},
|
|
sandbox_permissions: SandboxPermissions::UseDefault,
|
|
features: vec![],
|
|
model_override: Some("gpt-5"),
|
|
outcome: Outcome::Auto,
|
|
expectation: Expectation::FileCreated {
|
|
target: TargetPath::Workspace("ww_on_request.txt"),
|
|
content: "workspace-on-request",
|
|
},
|
|
},
|
|
ScenarioSpec {
|
|
name: "workspace_write_network_disabled_blocks_network",
|
|
approval_policy: OnRequest,
|
|
sandbox_policy: workspace_write(false),
|
|
action: ActionKind::FetchUrl {
|
|
endpoint: "/ww/network-blocked",
|
|
response_body: "workspace-network-blocked",
|
|
},
|
|
sandbox_permissions: SandboxPermissions::UseDefault,
|
|
features: vec![],
|
|
model_override: None,
|
|
outcome: Outcome::Auto,
|
|
expectation: Expectation::NetworkFailure { expect_tag: "ERR:" },
|
|
},
|
|
ScenarioSpec {
|
|
name: "workspace_write_on_request_requires_approval_outside_workspace",
|
|
approval_policy: OnRequest,
|
|
sandbox_policy: workspace_write(false),
|
|
action: ActionKind::WriteFile {
|
|
target: TargetPath::OutsideWorkspace("ww_on_request_outside.txt"),
|
|
content: "workspace-on-request-outside",
|
|
},
|
|
sandbox_permissions: SandboxPermissions::RequireEscalated,
|
|
features: vec![],
|
|
model_override: Some("gpt-5"),
|
|
outcome: Outcome::ExecApproval {
|
|
decision: ReviewDecision::Approved,
|
|
expected_reason: None,
|
|
},
|
|
expectation: Expectation::FileCreated {
|
|
target: TargetPath::OutsideWorkspace("ww_on_request_outside.txt"),
|
|
content: "workspace-on-request-outside",
|
|
},
|
|
},
|
|
ScenarioSpec {
|
|
name: "workspace_write_network_enabled_allows_network",
|
|
approval_policy: OnRequest,
|
|
sandbox_policy: workspace_write(true),
|
|
action: ActionKind::FetchUrl {
|
|
endpoint: "/ww/network-ok",
|
|
response_body: "workspace-network-ok",
|
|
},
|
|
sandbox_permissions: SandboxPermissions::UseDefault,
|
|
features: vec![],
|
|
model_override: Some("gpt-5"),
|
|
outcome: Outcome::Auto,
|
|
expectation: Expectation::NetworkSuccess {
|
|
body_contains: "workspace-network-ok",
|
|
},
|
|
},
|
|
#[cfg(not(target_os = "linux"))] // TODO (pakrym): figure out why linux behaves differently
|
|
ScenarioSpec {
|
|
name: "workspace_write_on_failure_escalates_outside_workspace",
|
|
approval_policy: OnFailure,
|
|
sandbox_policy: workspace_write(false),
|
|
action: ActionKind::WriteFile {
|
|
target: TargetPath::OutsideWorkspace("ww_on_failure.txt"),
|
|
content: "workspace-on-failure",
|
|
},
|
|
sandbox_permissions: SandboxPermissions::UseDefault,
|
|
features: vec![],
|
|
model_override: Some("gpt-5"),
|
|
outcome: Outcome::ExecApproval {
|
|
decision: ReviewDecision::Approved,
|
|
expected_reason: Some("command failed; retry without sandbox?"),
|
|
},
|
|
expectation: Expectation::FileCreated {
|
|
target: TargetPath::OutsideWorkspace("ww_on_failure.txt"),
|
|
content: "workspace-on-failure",
|
|
},
|
|
},
|
|
ScenarioSpec {
|
|
name: "workspace_write_unless_trusted_requires_approval_outside_workspace",
|
|
approval_policy: UnlessTrusted,
|
|
sandbox_policy: workspace_write(false),
|
|
action: ActionKind::WriteFile {
|
|
target: TargetPath::OutsideWorkspace("ww_unless_trusted.txt"),
|
|
content: "workspace-unless-trusted",
|
|
},
|
|
sandbox_permissions: SandboxPermissions::UseDefault,
|
|
features: vec![],
|
|
model_override: Some("gpt-5"),
|
|
outcome: Outcome::ExecApproval {
|
|
decision: ReviewDecision::Approved,
|
|
expected_reason: None,
|
|
},
|
|
expectation: Expectation::FileCreated {
|
|
target: TargetPath::OutsideWorkspace("ww_unless_trusted.txt"),
|
|
content: "workspace-unless-trusted",
|
|
},
|
|
},
|
|
ScenarioSpec {
|
|
name: "workspace_write_never_blocks_outside_workspace",
|
|
approval_policy: Never,
|
|
sandbox_policy: workspace_write(false),
|
|
action: ActionKind::WriteFile {
|
|
target: TargetPath::OutsideWorkspace("ww_never.txt"),
|
|
content: "workspace-never",
|
|
},
|
|
sandbox_permissions: SandboxPermissions::UseDefault,
|
|
features: vec![],
|
|
model_override: None,
|
|
outcome: Outcome::Auto,
|
|
expectation: Expectation::FileNotCreated {
|
|
target: TargetPath::OutsideWorkspace("ww_never.txt"),
|
|
message_contains: if cfg!(target_os = "linux") {
|
|
&["Permission denied|Read-only file system"]
|
|
} else {
|
|
&[
|
|
"Permission denied|Operation not permitted|operation not permitted|\
|
|
Read-only file system",
|
|
]
|
|
},
|
|
},
|
|
},
|
|
ScenarioSpec {
|
|
name: "unified exec on request no approval for safe command",
|
|
approval_policy: OnRequest,
|
|
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
|
action: ActionKind::RunUnifiedExecCommand {
|
|
command: "echo \"hello unified exec\"",
|
|
justification: None,
|
|
},
|
|
sandbox_permissions: SandboxPermissions::UseDefault,
|
|
features: vec![Feature::UnifiedExec],
|
|
model_override: Some("gpt-5"),
|
|
outcome: Outcome::Auto,
|
|
expectation: Expectation::CommandSuccess {
|
|
stdout_contains: "hello unified exec",
|
|
},
|
|
},
|
|
#[cfg(not(all(target_os = "linux", target_arch = "aarch64")))]
|
|
// Linux sandbox arg0 test workaround doesn't work on ARM
|
|
ScenarioSpec {
|
|
name: "unified exec on request escalated requires approval",
|
|
approval_policy: OnRequest,
|
|
sandbox_policy: SandboxPolicy::new_read_only_policy(),
|
|
action: ActionKind::RunUnifiedExecCommand {
|
|
command: "python3 -c 'print('\"'\"'escalated unified exec'\"'\"')'",
|
|
justification: Some(DEFAULT_UNIFIED_EXEC_JUSTIFICATION),
|
|
},
|
|
sandbox_permissions: SandboxPermissions::RequireEscalated,
|
|
features: vec![Feature::UnifiedExec],
|
|
model_override: Some("gpt-5"),
|
|
outcome: Outcome::ExecApproval {
|
|
decision: ReviewDecision::Approved,
|
|
expected_reason: Some(DEFAULT_UNIFIED_EXEC_JUSTIFICATION),
|
|
},
|
|
expectation: Expectation::CommandSuccess {
|
|
stdout_contains: "escalated unified exec",
|
|
},
|
|
},
|
|
ScenarioSpec {
|
|
name: "unified exec on request requires approval unless trusted",
|
|
approval_policy: AskForApproval::UnlessTrusted,
|
|
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
|
action: ActionKind::RunUnifiedExecCommand {
|
|
command: "git reset --hard",
|
|
justification: None,
|
|
},
|
|
sandbox_permissions: SandboxPermissions::UseDefault,
|
|
features: vec![Feature::UnifiedExec],
|
|
model_override: None,
|
|
outcome: Outcome::ExecApproval {
|
|
decision: ReviewDecision::Denied,
|
|
expected_reason: None,
|
|
},
|
|
expectation: Expectation::CommandFailure {
|
|
output_contains: "rejected by user",
|
|
},
|
|
},
|
|
ScenarioSpec {
|
|
name: "safe command with heredoc and redirect still requires approval",
|
|
approval_policy: AskForApproval::OnRequest,
|
|
sandbox_policy: workspace_write(false),
|
|
action: ActionKind::RunUnifiedExecCommand {
|
|
command: "cat <<'EOF' > /tmp/out.txt \nhello\nEOF",
|
|
justification: None,
|
|
},
|
|
sandbox_permissions: SandboxPermissions::RequireEscalated,
|
|
features: vec![Feature::UnifiedExec],
|
|
model_override: None,
|
|
outcome: Outcome::ExecApproval {
|
|
decision: ReviewDecision::Denied,
|
|
expected_reason: None,
|
|
},
|
|
expectation: Expectation::CommandFailure {
|
|
output_contains: "rejected by user",
|
|
},
|
|
},
|
|
ScenarioSpec {
|
|
name: "compound command with one safe command still requires approval",
|
|
approval_policy: AskForApproval::OnRequest,
|
|
sandbox_policy: workspace_write(false),
|
|
action: ActionKind::RunUnifiedExecCommand {
|
|
command: "cat ./one.txt && touch ./two.txt",
|
|
justification: None,
|
|
},
|
|
sandbox_permissions: SandboxPermissions::RequireEscalated,
|
|
features: vec![Feature::UnifiedExec],
|
|
model_override: None,
|
|
outcome: Outcome::ExecApproval {
|
|
decision: ReviewDecision::Denied,
|
|
expected_reason: None,
|
|
},
|
|
expectation: Expectation::CommandFailure {
|
|
output_contains: "rejected by user",
|
|
},
|
|
},
|
|
]
|
|
}
|
|
|
|
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
|
async fn approval_matrix_covers_all_modes() -> Result<()> {
|
|
skip_if_no_network!(Ok(()));
|
|
|
|
for scenario in scenarios() {
|
|
run_scenario(&scenario).await?;
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
|
|
async fn run_scenario(scenario: &ScenarioSpec) -> Result<()> {
|
|
eprintln!("running approval scenario: {}", scenario.name);
|
|
let server = start_mock_server().await;
|
|
let approval_policy = scenario.approval_policy;
|
|
let sandbox_policy = scenario.sandbox_policy.clone();
|
|
let features = scenario.features.clone();
|
|
let model_override = scenario.model_override;
|
|
let model = model_override.unwrap_or("gpt-5.1");
|
|
|
|
let mut builder = test_codex().with_model(model).with_config(move |config| {
|
|
config.permissions.approval_policy = Constrained::allow_any(approval_policy);
|
|
config.permissions.sandbox_policy = Constrained::allow_any(sandbox_policy.clone());
|
|
for feature in features {
|
|
config
|
|
.features
|
|
.enable(feature)
|
|
.expect("test config should allow feature update");
|
|
}
|
|
});
|
|
let test = builder.build(&server).await?;
|
|
|
|
let call_id = scenario.name;
|
|
let (event, expected_command) = scenario
|
|
.action
|
|
.prepare(&test, &server, call_id, scenario.sandbox_permissions)
|
|
.await?;
|
|
if let Some(command) = expected_command.as_deref() {
|
|
eprintln!("approval scenario {} command: {command}", scenario.name);
|
|
}
|
|
|
|
let _ = mount_sse_once(
|
|
&server,
|
|
sse(vec![
|
|
ev_response_created("resp-1"),
|
|
event,
|
|
ev_completed("resp-1"),
|
|
]),
|
|
)
|
|
.await;
|
|
let results_mock = mount_sse_once(
|
|
&server,
|
|
sse(vec![
|
|
ev_assistant_message("msg-1", "done"),
|
|
ev_completed("resp-2"),
|
|
]),
|
|
)
|
|
.await;
|
|
|
|
submit_turn(
|
|
&test,
|
|
scenario.name,
|
|
scenario.approval_policy,
|
|
scenario.sandbox_policy.clone(),
|
|
)
|
|
.await?;
|
|
|
|
match &scenario.outcome {
|
|
Outcome::Auto => {
|
|
wait_for_completion_without_approval(&test).await;
|
|
}
|
|
Outcome::ExecApproval {
|
|
decision,
|
|
expected_reason,
|
|
} => {
|
|
let command = expected_command
|
|
.as_deref()
|
|
.expect("exec approval requires shell command");
|
|
let approval = expect_exec_approval(&test, command).await;
|
|
if let Some(expected_reason) = expected_reason {
|
|
assert_eq!(
|
|
approval.reason.as_deref(),
|
|
Some(*expected_reason),
|
|
"unexpected approval reason for {}",
|
|
scenario.name
|
|
);
|
|
}
|
|
test.codex
|
|
.submit(Op::ExecApproval {
|
|
id: approval.effective_approval_id(),
|
|
turn_id: None,
|
|
decision: decision.clone(),
|
|
})
|
|
.await?;
|
|
wait_for_completion(&test).await;
|
|
}
|
|
Outcome::PatchApproval {
|
|
decision,
|
|
expected_reason,
|
|
} => {
|
|
let approval = expect_patch_approval(&test, call_id).await;
|
|
if let Some(expected_reason) = expected_reason {
|
|
assert_eq!(
|
|
approval.reason.as_deref(),
|
|
Some(*expected_reason),
|
|
"unexpected patch approval reason for {}",
|
|
scenario.name
|
|
);
|
|
}
|
|
test.codex
|
|
.submit(Op::PatchApproval {
|
|
id: approval.call_id,
|
|
decision: decision.clone(),
|
|
})
|
|
.await?;
|
|
wait_for_completion(&test).await;
|
|
}
|
|
}
|
|
|
|
let output_item = results_mock.single_request().function_call_output(call_id);
|
|
let result = parse_result(&output_item);
|
|
eprintln!(
|
|
"approval scenario {} result: exit_code={:?} stdout={:?}",
|
|
scenario.name, result.exit_code, result.stdout
|
|
);
|
|
scenario.expectation.verify(&test, &result)?;
|
|
|
|
Ok(())
|
|
}
|
|
|
|
#[tokio::test(flavor = "current_thread")]
|
|
#[cfg(unix)]
|
|
async fn approving_apply_patch_for_session_skips_future_prompts_for_same_file() -> Result<()> {
|
|
skip_if_no_network!(Ok(()));
|
|
|
|
let server = start_mock_server().await;
|
|
let approval_policy = AskForApproval::OnRequest;
|
|
let sandbox_policy = SandboxPolicy::WorkspaceWrite {
|
|
writable_roots: vec![],
|
|
read_only_access: Default::default(),
|
|
network_access: false,
|
|
exclude_tmpdir_env_var: false,
|
|
exclude_slash_tmp: false,
|
|
};
|
|
let sandbox_policy_for_config = sandbox_policy.clone();
|
|
|
|
let mut builder = test_codex()
|
|
.with_model("gpt-5.1-codex")
|
|
.with_config(move |config| {
|
|
config.permissions.approval_policy = Constrained::allow_any(approval_policy);
|
|
config.permissions.sandbox_policy = Constrained::allow_any(sandbox_policy_for_config);
|
|
});
|
|
let test = builder.build(&server).await?;
|
|
|
|
let target = TargetPath::OutsideWorkspace("apply_patch_allow_session.txt");
|
|
let (path, patch_path) = target.resolve_for_patch(&test);
|
|
let _ = fs::remove_file(&path);
|
|
|
|
let patch_add = build_add_file_patch(&patch_path, "before");
|
|
let patch_update = format!(
|
|
"*** Begin Patch\n*** Update File: {patch_path}\n@@\n-before\n+after\n*** End Patch\n"
|
|
);
|
|
|
|
let call_id_1 = "apply_patch_allow_session_1";
|
|
let call_id_2 = "apply_patch_allow_session_2";
|
|
|
|
let _ = mount_sse_once(
|
|
&server,
|
|
sse(vec![
|
|
ev_response_created("resp-1"),
|
|
ev_apply_patch_function_call(call_id_1, &patch_add),
|
|
ev_completed("resp-1"),
|
|
]),
|
|
)
|
|
.await;
|
|
let _ = mount_sse_once(
|
|
&server,
|
|
sse(vec![
|
|
ev_assistant_message("msg-1", "done"),
|
|
ev_completed("resp-2"),
|
|
]),
|
|
)
|
|
.await;
|
|
|
|
submit_turn(
|
|
&test,
|
|
"apply_patch allow session",
|
|
approval_policy,
|
|
sandbox_policy.clone(),
|
|
)
|
|
.await?;
|
|
let approval = expect_patch_approval(&test, call_id_1).await;
|
|
test.codex
|
|
.submit(Op::PatchApproval {
|
|
id: approval.call_id,
|
|
decision: ReviewDecision::ApprovedForSession,
|
|
})
|
|
.await?;
|
|
wait_for_completion(&test).await;
|
|
assert!(fs::read_to_string(&path)?.contains("before"));
|
|
|
|
let _ = mount_sse_once(
|
|
&server,
|
|
sse(vec![
|
|
ev_response_created("resp-3"),
|
|
ev_apply_patch_function_call(call_id_2, &patch_update),
|
|
ev_completed("resp-3"),
|
|
]),
|
|
)
|
|
.await;
|
|
let _ = mount_sse_once(
|
|
&server,
|
|
sse(vec![
|
|
ev_assistant_message("msg-2", "done"),
|
|
ev_completed("resp-4"),
|
|
]),
|
|
)
|
|
.await;
|
|
|
|
submit_turn(
|
|
&test,
|
|
"apply_patch allow session followup",
|
|
approval_policy,
|
|
sandbox_policy.clone(),
|
|
)
|
|
.await?;
|
|
|
|
let event = wait_for_event(&test.codex, |event| {
|
|
matches!(
|
|
event,
|
|
EventMsg::ApplyPatchApprovalRequest(_) | EventMsg::TurnComplete(_)
|
|
)
|
|
})
|
|
.await;
|
|
match event {
|
|
EventMsg::TurnComplete(_) => {}
|
|
EventMsg::ApplyPatchApprovalRequest(event) => {
|
|
panic!("unexpected patch approval request: {:?}", event.call_id)
|
|
}
|
|
other => panic!("unexpected event: {other:?}"),
|
|
}
|
|
|
|
assert!(fs::read_to_string(&path)?.contains("after"));
|
|
let _ = fs::remove_file(path);
|
|
|
|
Ok(())
|
|
}
|
|
|
|
#[tokio::test(flavor = "current_thread")]
|
|
#[cfg(unix)]
|
|
async fn approving_execpolicy_amendment_persists_policy_and_skips_future_prompts() -> Result<()> {
|
|
let server = start_mock_server().await;
|
|
let approval_policy = AskForApproval::UnlessTrusted;
|
|
let sandbox_policy = SandboxPolicy::new_read_only_policy();
|
|
let sandbox_policy_for_config = sandbox_policy.clone();
|
|
let mut builder = test_codex().with_config(move |config| {
|
|
config.permissions.approval_policy = Constrained::allow_any(approval_policy);
|
|
config.permissions.sandbox_policy = Constrained::allow_any(sandbox_policy_for_config);
|
|
});
|
|
let test = builder.build(&server).await?;
|
|
let allow_prefix_path = test.cwd.path().join("allow-prefix.txt");
|
|
let _ = fs::remove_file(&allow_prefix_path);
|
|
|
|
let call_id_first = "allow-prefix-first";
|
|
let (first_event, expected_command) = ActionKind::RunCommand {
|
|
command: "touch allow-prefix.txt",
|
|
}
|
|
.prepare(
|
|
&test,
|
|
&server,
|
|
call_id_first,
|
|
SandboxPermissions::UseDefault,
|
|
)
|
|
.await?;
|
|
let expected_command =
|
|
expected_command.expect("execpolicy amendment scenario should produce a shell command");
|
|
let expected_execpolicy_amendment =
|
|
ExecPolicyAmendment::new(vec!["touch".to_string(), "allow-prefix.txt".to_string()]);
|
|
|
|
let _ = mount_sse_once(
|
|
&server,
|
|
sse(vec![
|
|
ev_response_created("resp-allow-prefix-1"),
|
|
first_event,
|
|
ev_completed("resp-allow-prefix-1"),
|
|
]),
|
|
)
|
|
.await;
|
|
let first_results = mount_sse_once(
|
|
&server,
|
|
sse(vec![
|
|
ev_assistant_message("msg-allow-prefix-1", "done"),
|
|
ev_completed("resp-allow-prefix-2"),
|
|
]),
|
|
)
|
|
.await;
|
|
|
|
submit_turn(
|
|
&test,
|
|
"allow-prefix-first",
|
|
approval_policy,
|
|
sandbox_policy.clone(),
|
|
)
|
|
.await?;
|
|
|
|
let approval = expect_exec_approval(&test, expected_command.as_str()).await;
|
|
assert_eq!(
|
|
approval.proposed_execpolicy_amendment,
|
|
Some(expected_execpolicy_amendment.clone())
|
|
);
|
|
|
|
test.codex
|
|
.submit(Op::ExecApproval {
|
|
id: approval.effective_approval_id(),
|
|
turn_id: None,
|
|
decision: ReviewDecision::ApprovedExecpolicyAmendment {
|
|
proposed_execpolicy_amendment: expected_execpolicy_amendment.clone(),
|
|
},
|
|
})
|
|
.await?;
|
|
wait_for_completion(&test).await;
|
|
|
|
let developer_messages = first_results
|
|
.single_request()
|
|
.message_input_texts("developer");
|
|
assert!(
|
|
developer_messages
|
|
.iter()
|
|
.any(|message| message.contains(r#"["touch", "allow-prefix.txt"]"#)),
|
|
"expected developer message documenting saved rule, got: {developer_messages:?}"
|
|
);
|
|
|
|
let policy_path = test.home.path().join("rules").join("default.rules");
|
|
let policy_contents = fs::read_to_string(&policy_path)?;
|
|
assert!(
|
|
policy_contents
|
|
.contains(r#"prefix_rule(pattern=["touch", "allow-prefix.txt"], decision="allow")"#),
|
|
"unexpected policy contents: {policy_contents}"
|
|
);
|
|
|
|
let first_output = parse_result(
|
|
&first_results
|
|
.single_request()
|
|
.function_call_output(call_id_first),
|
|
);
|
|
assert_eq!(first_output.exit_code.unwrap_or(0), 0);
|
|
assert!(
|
|
first_output.stdout.is_empty(),
|
|
"unexpected stdout: {}",
|
|
first_output.stdout
|
|
);
|
|
assert_eq!(
|
|
fs::read_to_string(&allow_prefix_path)?,
|
|
"",
|
|
"unexpected file contents after first run"
|
|
);
|
|
|
|
let call_id_second = "allow-prefix-second";
|
|
let (second_event, second_command) = ActionKind::RunCommand {
|
|
command: "touch allow-prefix.txt",
|
|
}
|
|
.prepare(
|
|
&test,
|
|
&server,
|
|
call_id_second,
|
|
SandboxPermissions::UseDefault,
|
|
)
|
|
.await?;
|
|
assert_eq!(second_command.as_deref(), Some(expected_command.as_str()));
|
|
|
|
let _ = mount_sse_once(
|
|
&server,
|
|
sse(vec![
|
|
ev_response_created("resp-allow-prefix-3"),
|
|
second_event,
|
|
ev_completed("resp-allow-prefix-3"),
|
|
]),
|
|
)
|
|
.await;
|
|
let second_results = mount_sse_once(
|
|
&server,
|
|
sse(vec![
|
|
ev_assistant_message("msg-allow-prefix-2", "done"),
|
|
ev_completed("resp-allow-prefix-4"),
|
|
]),
|
|
)
|
|
.await;
|
|
|
|
submit_turn(
|
|
&test,
|
|
"allow-prefix-second",
|
|
approval_policy,
|
|
sandbox_policy.clone(),
|
|
)
|
|
.await?;
|
|
|
|
wait_for_completion_without_approval(&test).await;
|
|
|
|
let second_output = parse_result(
|
|
&second_results
|
|
.single_request()
|
|
.function_call_output(call_id_second),
|
|
);
|
|
assert_eq!(second_output.exit_code.unwrap_or(0), 0);
|
|
assert!(
|
|
second_output.stdout.is_empty(),
|
|
"unexpected stdout: {}",
|
|
second_output.stdout
|
|
);
|
|
assert_eq!(
|
|
fs::read_to_string(&allow_prefix_path)?,
|
|
"",
|
|
"unexpected file contents after second run"
|
|
);
|
|
|
|
Ok(())
|
|
}
|
|
|
|
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
|
async fn spawned_subagent_execpolicy_amendment_propagates_to_parent_session() -> Result<()> {
|
|
skip_if_no_network!(Ok(()));
|
|
|
|
let server = start_mock_server().await;
|
|
let approval_policy = AskForApproval::UnlessTrusted;
|
|
let sandbox_policy = SandboxPolicy::new_read_only_policy();
|
|
let sandbox_policy_for_config = sandbox_policy.clone();
|
|
let mut builder = test_codex().with_config(move |config| {
|
|
config.permissions.approval_policy = Constrained::allow_any(approval_policy);
|
|
config.permissions.sandbox_policy = Constrained::allow_any(sandbox_policy_for_config);
|
|
config
|
|
.features
|
|
.enable(Feature::Collab)
|
|
.expect("test config should allow feature update");
|
|
});
|
|
let test = builder.build(&server).await?;
|
|
|
|
const PARENT_PROMPT: &str = "spawn a child that repeats a command";
|
|
const CHILD_PROMPT: &str = "run the same command twice";
|
|
const SPAWN_CALL_ID: &str = "spawn-child-1";
|
|
const CHILD_CALL_ID_1: &str = "child-touch-1";
|
|
const PARENT_CALL_ID_2: &str = "parent-touch-2";
|
|
|
|
let child_file = test.cwd.path().join("subagent-allow-prefix.txt");
|
|
let _ = fs::remove_file(&child_file);
|
|
|
|
let spawn_args = serde_json::to_string(&json!({
|
|
"message": CHILD_PROMPT,
|
|
}))?;
|
|
mount_sse_once_match(
|
|
&server,
|
|
|req: &Request| body_contains(req, PARENT_PROMPT),
|
|
sse(vec![
|
|
ev_response_created("resp-parent-1"),
|
|
ev_function_call(SPAWN_CALL_ID, "spawn_agent", &spawn_args),
|
|
ev_completed("resp-parent-1"),
|
|
]),
|
|
)
|
|
.await;
|
|
|
|
let child_cmd_args = serde_json::to_string(&json!({
|
|
"command": "touch subagent-allow-prefix.txt",
|
|
"timeout_ms": 1_000,
|
|
"prefix_rule": ["touch", "subagent-allow-prefix.txt"],
|
|
}))?;
|
|
mount_sse_once_match(
|
|
&server,
|
|
|req: &Request| body_contains(req, CHILD_PROMPT) && !body_contains(req, SPAWN_CALL_ID),
|
|
sse(vec![
|
|
ev_response_created("resp-child-1"),
|
|
ev_function_call(CHILD_CALL_ID_1, "shell_command", &child_cmd_args),
|
|
ev_completed("resp-child-1"),
|
|
]),
|
|
)
|
|
.await;
|
|
|
|
mount_sse_once_match(
|
|
&server,
|
|
|req: &Request| body_contains(req, CHILD_CALL_ID_1),
|
|
sse(vec![
|
|
ev_response_created("resp-child-2"),
|
|
ev_assistant_message("msg-child-2", "child done"),
|
|
ev_completed("resp-child-2"),
|
|
]),
|
|
)
|
|
.await;
|
|
|
|
mount_sse_once_match(
|
|
&server,
|
|
|req: &Request| body_contains(req, SPAWN_CALL_ID),
|
|
sse(vec![
|
|
ev_response_created("resp-parent-2"),
|
|
ev_assistant_message("msg-parent-2", "parent done"),
|
|
ev_completed("resp-parent-2"),
|
|
]),
|
|
)
|
|
.await;
|
|
|
|
let _ = mount_sse_once(
|
|
&server,
|
|
sse(vec![
|
|
ev_response_created("resp-parent-3"),
|
|
ev_function_call(PARENT_CALL_ID_2, "shell_command", &child_cmd_args),
|
|
ev_completed("resp-parent-3"),
|
|
]),
|
|
)
|
|
.await;
|
|
|
|
let _ = mount_sse_once(
|
|
&server,
|
|
sse(vec![
|
|
ev_response_created("resp-parent-4"),
|
|
ev_assistant_message("msg-parent-4", "parent rerun done"),
|
|
ev_completed("resp-parent-4"),
|
|
]),
|
|
)
|
|
.await;
|
|
|
|
submit_turn(
|
|
&test,
|
|
PARENT_PROMPT,
|
|
approval_policy,
|
|
sandbox_policy.clone(),
|
|
)
|
|
.await?;
|
|
|
|
let child = wait_for_spawned_thread(&test).await?;
|
|
let approval_event = wait_for_event_with_timeout(
|
|
&child,
|
|
|event| {
|
|
matches!(
|
|
event,
|
|
EventMsg::ExecApprovalRequest(_) | EventMsg::TurnComplete(_)
|
|
)
|
|
},
|
|
Duration::from_secs(2),
|
|
)
|
|
.await;
|
|
|
|
let EventMsg::ExecApprovalRequest(approval) = approval_event else {
|
|
panic!("expected child approval before completion");
|
|
};
|
|
let expected_execpolicy_amendment = ExecPolicyAmendment::new(vec![
|
|
"touch".to_string(),
|
|
"subagent-allow-prefix.txt".to_string(),
|
|
]);
|
|
assert_eq!(
|
|
approval.proposed_execpolicy_amendment,
|
|
Some(expected_execpolicy_amendment.clone())
|
|
);
|
|
|
|
child
|
|
.submit(Op::ExecApproval {
|
|
id: approval.effective_approval_id(),
|
|
turn_id: None,
|
|
decision: ReviewDecision::ApprovedExecpolicyAmendment {
|
|
proposed_execpolicy_amendment: expected_execpolicy_amendment,
|
|
},
|
|
})
|
|
.await?;
|
|
|
|
let child_event = wait_for_event_with_timeout(
|
|
&child,
|
|
|event| {
|
|
matches!(
|
|
event,
|
|
EventMsg::ExecApprovalRequest(_) | EventMsg::TurnComplete(_)
|
|
)
|
|
},
|
|
Duration::from_secs(2),
|
|
)
|
|
.await;
|
|
match child_event {
|
|
EventMsg::TurnComplete(_) => {}
|
|
EventMsg::ExecApprovalRequest(ev) => {
|
|
panic!("unexpected second child approval request: {:?}", ev.command)
|
|
}
|
|
other => panic!("unexpected event: {other:?}"),
|
|
}
|
|
assert!(
|
|
child_file.exists(),
|
|
"expected subagent command to create file"
|
|
);
|
|
fs::remove_file(&child_file)?;
|
|
assert!(
|
|
!child_file.exists(),
|
|
"expected child file to be removed before parent rerun"
|
|
);
|
|
|
|
submit_turn(
|
|
&test,
|
|
"parent reruns child command",
|
|
approval_policy,
|
|
sandbox_policy,
|
|
)
|
|
.await?;
|
|
wait_for_completion_without_approval(&test).await;
|
|
|
|
Ok(())
|
|
}
|
|
|
|
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
|
#[cfg(unix)]
|
|
async fn matched_prefix_rule_runs_unsandboxed_under_zsh_fork() -> Result<()> {
|
|
skip_if_no_network!(Ok(()));
|
|
|
|
let Some(runtime) = zsh_fork_runtime("zsh-fork prefix rule unsandboxed test")? else {
|
|
return Ok(());
|
|
};
|
|
|
|
let approval_policy = AskForApproval::Never;
|
|
let sandbox_policy = restrictive_workspace_write_policy();
|
|
let outside_dir = tempfile::tempdir_in(std::env::current_dir()?)?;
|
|
let outside_path = outside_dir
|
|
.path()
|
|
.join("zsh-fork-prefix-rule-unsandboxed.txt");
|
|
let command = format!("touch {outside_path:?}");
|
|
let rules = r#"prefix_rule(pattern=["touch"], decision="allow")"#.to_string();
|
|
|
|
let server = start_mock_server().await;
|
|
let outside_path_for_hook = outside_path.clone();
|
|
let test = build_zsh_fork_test(
|
|
&server,
|
|
runtime,
|
|
approval_policy,
|
|
sandbox_policy.clone(),
|
|
move |home| {
|
|
let _ = fs::remove_file(&outside_path_for_hook);
|
|
let rules_dir = home.join("rules");
|
|
fs::create_dir_all(&rules_dir).unwrap();
|
|
fs::write(rules_dir.join("default.rules"), &rules).unwrap();
|
|
},
|
|
)
|
|
.await?;
|
|
|
|
let call_id = "zsh-fork-prefix-rule-unsandboxed";
|
|
let event = shell_event(
|
|
call_id,
|
|
&command,
|
|
/*timeout_ms*/ 1_000,
|
|
SandboxPermissions::UseDefault,
|
|
)?;
|
|
let _ = mount_sse_once(
|
|
&server,
|
|
sse(vec![
|
|
ev_response_created("resp-zsh-fork-prefix-1"),
|
|
event,
|
|
ev_completed("resp-zsh-fork-prefix-1"),
|
|
]),
|
|
)
|
|
.await;
|
|
let results = mount_sse_once(
|
|
&server,
|
|
sse(vec![
|
|
ev_assistant_message("msg-zsh-fork-prefix-1", "done"),
|
|
ev_completed("resp-zsh-fork-prefix-2"),
|
|
]),
|
|
)
|
|
.await;
|
|
|
|
submit_turn(
|
|
&test,
|
|
"run allowed touch under zsh fork",
|
|
approval_policy,
|
|
sandbox_policy,
|
|
)
|
|
.await?;
|
|
|
|
wait_for_completion_without_approval(&test).await;
|
|
|
|
let result = parse_result(&results.single_request().function_call_output(call_id));
|
|
assert_eq!(result.exit_code.unwrap_or(0), 0);
|
|
assert!(
|
|
outside_path.exists(),
|
|
"expected matched prefix_rule to rerun touch unsandboxed; output: {}",
|
|
result.stdout
|
|
);
|
|
|
|
Ok(())
|
|
}
|
|
|
|
#[tokio::test(flavor = "current_thread")]
|
|
#[cfg(unix)]
|
|
async fn invalid_requested_prefix_rule_falls_back_for_compound_command() -> Result<()> {
|
|
let server = start_mock_server().await;
|
|
let approval_policy = AskForApproval::OnRequest;
|
|
let sandbox_policy = SandboxPolicy::new_read_only_policy();
|
|
let sandbox_policy_for_config = sandbox_policy.clone();
|
|
let mut builder = test_codex().with_config(move |config| {
|
|
config.permissions.approval_policy = Constrained::allow_any(approval_policy);
|
|
config.permissions.sandbox_policy = Constrained::allow_any(sandbox_policy_for_config);
|
|
});
|
|
let test = builder.build(&server).await?;
|
|
|
|
let call_id = "invalid-prefix-rule";
|
|
let command =
|
|
"touch /tmp/codex-fallback-rule-test.txt && echo hello > /tmp/codex-fallback-rule-test.txt";
|
|
let event = shell_event_with_prefix_rule(
|
|
call_id,
|
|
command,
|
|
/*timeout_ms*/ 1_000,
|
|
SandboxPermissions::RequireEscalated,
|
|
Some(vec!["touch".to_string()]),
|
|
)?;
|
|
|
|
let _ = mount_sse_once(
|
|
&server,
|
|
sse(vec![
|
|
ev_response_created("resp-invalid-prefix-1"),
|
|
event,
|
|
ev_completed("resp-invalid-prefix-1"),
|
|
]),
|
|
)
|
|
.await;
|
|
|
|
submit_turn(
|
|
&test,
|
|
"invalid-prefix-rule",
|
|
approval_policy,
|
|
sandbox_policy.clone(),
|
|
)
|
|
.await?;
|
|
|
|
let approval = expect_exec_approval(&test, command).await;
|
|
let amendment = approval
|
|
.proposed_execpolicy_amendment
|
|
.expect("should have a proposed execpolicy amendment");
|
|
assert!(amendment.command.contains(&command.to_string()));
|
|
|
|
Ok(())
|
|
}
|
|
|
|
#[tokio::test(flavor = "current_thread")]
|
|
#[cfg(unix)]
|
|
async fn approving_fallback_rule_for_compound_command_works() -> Result<()> {
|
|
let server = start_mock_server().await;
|
|
let approval_policy = AskForApproval::OnRequest;
|
|
let sandbox_policy = SandboxPolicy::new_read_only_policy();
|
|
let sandbox_policy_for_config = sandbox_policy.clone();
|
|
let mut builder = test_codex().with_config(move |config| {
|
|
config.permissions.approval_policy = Constrained::allow_any(approval_policy);
|
|
config.permissions.sandbox_policy = Constrained::allow_any(sandbox_policy_for_config);
|
|
});
|
|
let test = builder.build(&server).await?;
|
|
|
|
let call_id = "invalid-prefix-rule";
|
|
let command =
|
|
"touch /tmp/codex-fallback-rule-test.txt && echo hello > /tmp/codex-fallback-rule-test.txt";
|
|
let event = shell_event_with_prefix_rule(
|
|
call_id,
|
|
command,
|
|
/*timeout_ms*/ 1_000,
|
|
SandboxPermissions::RequireEscalated,
|
|
Some(vec!["touch".to_string()]),
|
|
)?;
|
|
|
|
let _ = mount_sse_once(
|
|
&server,
|
|
sse(vec![
|
|
ev_response_created("resp-invalid-prefix-1"),
|
|
event,
|
|
ev_completed("resp-invalid-prefix-1"),
|
|
]),
|
|
)
|
|
.await;
|
|
|
|
submit_turn(
|
|
&test,
|
|
"invalid-prefix-rule",
|
|
approval_policy,
|
|
sandbox_policy.clone(),
|
|
)
|
|
.await?;
|
|
|
|
let approval = expect_exec_approval(&test, command).await;
|
|
let approval_id = approval.effective_approval_id();
|
|
let amendment = approval
|
|
.proposed_execpolicy_amendment
|
|
.expect("should have a proposed execpolicy amendment");
|
|
assert!(amendment.command.contains(&command.to_string()));
|
|
|
|
test.codex
|
|
.submit(Op::ExecApproval {
|
|
id: approval_id,
|
|
turn_id: None,
|
|
decision: ReviewDecision::ApprovedExecpolicyAmendment {
|
|
proposed_execpolicy_amendment: amendment.clone(),
|
|
},
|
|
})
|
|
.await?;
|
|
wait_for_completion(&test).await;
|
|
|
|
let call_id = "invalid-prefix-rule-again";
|
|
let command =
|
|
"touch /tmp/codex-fallback-rule-test.txt && echo hello > /tmp/codex-fallback-rule-test.txt";
|
|
let event = shell_event_with_prefix_rule(
|
|
call_id,
|
|
command,
|
|
/*timeout_ms*/ 1_000,
|
|
SandboxPermissions::RequireEscalated,
|
|
Some(vec!["touch".to_string()]),
|
|
)?;
|
|
|
|
let _ = mount_sse_once(
|
|
&server,
|
|
sse(vec![
|
|
ev_response_created("resp-invalid-prefix-1"),
|
|
event,
|
|
ev_completed("resp-invalid-prefix-1"),
|
|
]),
|
|
)
|
|
.await;
|
|
let second_results = mount_sse_once(
|
|
&server,
|
|
sse(vec![
|
|
ev_assistant_message("msg-invalid-prefix-1", "done"),
|
|
ev_completed("resp-invalid-prefix-2"),
|
|
]),
|
|
)
|
|
.await;
|
|
|
|
submit_turn(
|
|
&test,
|
|
"invalid-prefix-rule",
|
|
approval_policy,
|
|
sandbox_policy.clone(),
|
|
)
|
|
.await?;
|
|
|
|
wait_for_completion_without_approval(&test).await;
|
|
|
|
let second_output = parse_result(
|
|
&second_results
|
|
.single_request()
|
|
.function_call_output(call_id),
|
|
);
|
|
assert_eq!(second_output.exit_code.unwrap_or(0), 0);
|
|
assert!(
|
|
second_output.stdout.is_empty(),
|
|
"unexpected stdout: {}",
|
|
second_output.stdout
|
|
);
|
|
|
|
Ok(())
|
|
}
|
|
|
|
#[tokio::test(flavor = "current_thread")]
|
|
async fn denying_network_policy_amendment_persists_policy_and_skips_future_network_prompt()
|
|
-> Result<()> {
|
|
skip_if_no_network!(Ok(()));
|
|
|
|
let server = start_mock_server().await;
|
|
let home = Arc::new(TempDir::new()?);
|
|
fs::write(
|
|
home.path().join("config.toml"),
|
|
r#"default_permissions = "workspace"
|
|
|
|
[permissions.workspace.filesystem]
|
|
":minimal" = "read"
|
|
|
|
[permissions.workspace.network]
|
|
enabled = true
|
|
mode = "limited"
|
|
allow_local_binding = true
|
|
"#,
|
|
)?;
|
|
let approval_policy = AskForApproval::OnFailure;
|
|
let sandbox_policy = SandboxPolicy::WorkspaceWrite {
|
|
writable_roots: vec![],
|
|
read_only_access: Default::default(),
|
|
network_access: true,
|
|
exclude_tmpdir_env_var: false,
|
|
exclude_slash_tmp: false,
|
|
};
|
|
let sandbox_policy_for_config = sandbox_policy.clone();
|
|
let mut builder = test_codex().with_home(home).with_config(move |config| {
|
|
config.permissions.approval_policy = Constrained::allow_any(approval_policy);
|
|
config.permissions.sandbox_policy = Constrained::allow_any(sandbox_policy_for_config);
|
|
let layers = config
|
|
.config_layer_stack
|
|
.get_layers(
|
|
ConfigLayerStackOrdering::LowestPrecedenceFirst,
|
|
/*include_disabled*/ true,
|
|
)
|
|
.into_iter()
|
|
.cloned()
|
|
.collect();
|
|
let mut requirements = config.config_layer_stack.requirements().clone();
|
|
requirements.network = Some(Sourced::new(
|
|
NetworkConstraints {
|
|
enabled: Some(true),
|
|
allow_local_binding: Some(true),
|
|
..Default::default()
|
|
},
|
|
RequirementSource::CloudRequirements,
|
|
));
|
|
let mut requirements_toml = config.config_layer_stack.requirements_toml().clone();
|
|
requirements_toml.network = Some(NetworkRequirementsToml {
|
|
enabled: Some(true),
|
|
allow_local_binding: Some(true),
|
|
..Default::default()
|
|
});
|
|
config.config_layer_stack = ConfigLayerStack::new(layers, requirements, requirements_toml)
|
|
.expect("rebuild config layer stack with network requirements");
|
|
});
|
|
let test = builder.build(&server).await?;
|
|
assert!(
|
|
test.config.managed_network_requirements_enabled(),
|
|
"expected managed network requirements to be enabled"
|
|
);
|
|
assert!(
|
|
test.config.permissions.network.is_some(),
|
|
"expected managed network proxy config to be present"
|
|
);
|
|
test.session_configured
|
|
.network_proxy
|
|
.as_ref()
|
|
.expect("expected runtime managed network proxy addresses");
|
|
|
|
let call_id_first = "allow-network-first";
|
|
// Use urllib without overriding proxy settings so managed-network sessions
|
|
// continue to exercise the env-based proxy routing path under bubblewrap.
|
|
let fetch_command = r#"python3 -c "import urllib.request; opener = urllib.request.build_opener(urllib.request.ProxyHandler()); print('OK:' + opener.open('http://codex-network-test.invalid', timeout=30).read().decode(errors='replace'))""#
|
|
.to_string();
|
|
let first_event = shell_event(
|
|
call_id_first,
|
|
&fetch_command,
|
|
/*timeout_ms*/ 30_000,
|
|
SandboxPermissions::UseDefault,
|
|
)?;
|
|
|
|
let _ = mount_sse_once(
|
|
&server,
|
|
sse(vec![
|
|
ev_response_created("resp-allow-network-1"),
|
|
first_event,
|
|
ev_completed("resp-allow-network-1"),
|
|
]),
|
|
)
|
|
.await;
|
|
let first_results = mount_sse_once(
|
|
&server,
|
|
sse(vec![
|
|
ev_assistant_message("msg-allow-network-1", "done"),
|
|
ev_completed("resp-allow-network-2"),
|
|
]),
|
|
)
|
|
.await;
|
|
|
|
submit_turn(
|
|
&test,
|
|
"allow-network-first",
|
|
approval_policy,
|
|
sandbox_policy.clone(),
|
|
)
|
|
.await?;
|
|
|
|
let deadline = std::time::Instant::now() + std::time::Duration::from_secs(30);
|
|
let approval = loop {
|
|
let remaining = deadline
|
|
.checked_duration_since(std::time::Instant::now())
|
|
.expect("timed out waiting for network approval request");
|
|
let event = wait_for_event_with_timeout(
|
|
&test.codex,
|
|
|event| {
|
|
matches!(
|
|
event,
|
|
EventMsg::ExecApprovalRequest(_) | EventMsg::TurnComplete(_)
|
|
)
|
|
},
|
|
remaining,
|
|
)
|
|
.await;
|
|
match event {
|
|
EventMsg::ExecApprovalRequest(approval) => {
|
|
if approval.command.first().map(std::string::String::as_str)
|
|
== Some("network-access")
|
|
{
|
|
break approval;
|
|
}
|
|
test.codex
|
|
.submit(Op::ExecApproval {
|
|
id: approval.effective_approval_id(),
|
|
turn_id: None,
|
|
decision: ReviewDecision::Approved,
|
|
})
|
|
.await?;
|
|
}
|
|
EventMsg::TurnComplete(_) => {
|
|
panic!("expected network approval request before completion");
|
|
}
|
|
other => panic!("unexpected event: {other:?}"),
|
|
}
|
|
};
|
|
let network_context = approval
|
|
.network_approval_context
|
|
.clone()
|
|
.expect("expected network approval context");
|
|
assert_eq!(network_context.protocol, NetworkApprovalProtocol::Http);
|
|
let expected_network_amendments = vec![
|
|
NetworkPolicyAmendment {
|
|
host: network_context.host.clone(),
|
|
action: NetworkPolicyRuleAction::Allow,
|
|
},
|
|
NetworkPolicyAmendment {
|
|
host: network_context.host.clone(),
|
|
action: NetworkPolicyRuleAction::Deny,
|
|
},
|
|
];
|
|
assert_eq!(
|
|
approval.proposed_network_policy_amendments,
|
|
Some(expected_network_amendments.clone())
|
|
);
|
|
let deny_network_amendment = expected_network_amendments
|
|
.into_iter()
|
|
.find(|amendment| amendment.action == NetworkPolicyRuleAction::Deny)
|
|
.expect("expected deny network policy amendment");
|
|
|
|
test.codex
|
|
.submit(Op::ExecApproval {
|
|
id: approval.effective_approval_id(),
|
|
turn_id: None,
|
|
decision: ReviewDecision::NetworkPolicyAmendment {
|
|
network_policy_amendment: deny_network_amendment.clone(),
|
|
},
|
|
})
|
|
.await?;
|
|
wait_for_completion(&test).await;
|
|
|
|
let policy_path = test.home.path().join("rules").join("default.rules");
|
|
let policy_contents = fs::read_to_string(&policy_path)?;
|
|
let expected_rule = format!(
|
|
r#"network_rule(host="{}", protocol="{}", decision="deny", justification="Deny {} access to {}")"#,
|
|
deny_network_amendment.host,
|
|
match network_context.protocol {
|
|
NetworkApprovalProtocol::Http => "http",
|
|
NetworkApprovalProtocol::Https => "https_connect",
|
|
NetworkApprovalProtocol::Socks5Tcp => "socks5_tcp",
|
|
NetworkApprovalProtocol::Socks5Udp => "socks5_udp",
|
|
},
|
|
match network_context.protocol {
|
|
NetworkApprovalProtocol::Http => "http",
|
|
NetworkApprovalProtocol::Https => "https_connect",
|
|
NetworkApprovalProtocol::Socks5Tcp => "socks5_tcp",
|
|
NetworkApprovalProtocol::Socks5Udp => "socks5_udp",
|
|
},
|
|
deny_network_amendment.host
|
|
);
|
|
assert!(
|
|
policy_contents.contains(&expected_rule),
|
|
"unexpected policy contents: {policy_contents}"
|
|
);
|
|
|
|
let first_output = parse_result(
|
|
&first_results
|
|
.single_request()
|
|
.function_call_output(call_id_first),
|
|
);
|
|
Expectation::CommandFailure {
|
|
output_contains: "",
|
|
}
|
|
.verify(&test, &first_output)?;
|
|
|
|
let call_id_second = "allow-network-second";
|
|
let second_event = shell_event(
|
|
call_id_second,
|
|
&fetch_command,
|
|
/*timeout_ms*/ 30_000,
|
|
SandboxPermissions::UseDefault,
|
|
)?;
|
|
|
|
let _ = mount_sse_once(
|
|
&server,
|
|
sse(vec![
|
|
ev_response_created("resp-allow-network-3"),
|
|
second_event,
|
|
ev_completed("resp-allow-network-3"),
|
|
]),
|
|
)
|
|
.await;
|
|
let second_results = mount_sse_once(
|
|
&server,
|
|
sse(vec![
|
|
ev_assistant_message("msg-allow-network-2", "done"),
|
|
ev_completed("resp-allow-network-4"),
|
|
]),
|
|
)
|
|
.await;
|
|
|
|
submit_turn(
|
|
&test,
|
|
"allow-network-second",
|
|
approval_policy,
|
|
sandbox_policy.clone(),
|
|
)
|
|
.await?;
|
|
|
|
let deadline = std::time::Instant::now() + std::time::Duration::from_secs(30);
|
|
loop {
|
|
let remaining = deadline
|
|
.checked_duration_since(std::time::Instant::now())
|
|
.expect("timed out waiting for second turn completion");
|
|
let event = wait_for_event_with_timeout(
|
|
&test.codex,
|
|
|event| {
|
|
matches!(
|
|
event,
|
|
EventMsg::ExecApprovalRequest(_) | EventMsg::TurnComplete(_)
|
|
)
|
|
},
|
|
remaining,
|
|
)
|
|
.await;
|
|
match event {
|
|
EventMsg::ExecApprovalRequest(approval) => {
|
|
if approval.command.first().map(std::string::String::as_str)
|
|
== Some("network-access")
|
|
{
|
|
panic!(
|
|
"unexpected network approval request: {:?}",
|
|
approval.command
|
|
);
|
|
}
|
|
test.codex
|
|
.submit(Op::ExecApproval {
|
|
id: approval.effective_approval_id(),
|
|
turn_id: None,
|
|
decision: ReviewDecision::Approved,
|
|
})
|
|
.await?;
|
|
}
|
|
EventMsg::TurnComplete(_) => break,
|
|
other => panic!("unexpected event: {other:?}"),
|
|
}
|
|
}
|
|
|
|
let second_output = parse_result(
|
|
&second_results
|
|
.single_request()
|
|
.function_call_output(call_id_second),
|
|
);
|
|
Expectation::CommandFailure {
|
|
output_contains: "",
|
|
}
|
|
.verify(&test, &second_output)?;
|
|
|
|
Ok(())
|
|
}
|
|
|
|
// todo(dylan) add ScenarioSpec support for rules
|
|
#[tokio::test(flavor = "current_thread")]
|
|
#[cfg(unix)]
|
|
async fn compound_command_with_one_safe_command_still_requires_approval() -> Result<()> {
|
|
skip_if_no_network!(Ok(()));
|
|
|
|
let server = start_mock_server().await;
|
|
let approval_policy = AskForApproval::UnlessTrusted;
|
|
let sandbox_policy = SandboxPolicy::new_workspace_write_policy();
|
|
let sandbox_policy_for_config = sandbox_policy.clone();
|
|
let mut builder = test_codex().with_config(move |config| {
|
|
config.permissions.approval_policy = Constrained::allow_any(approval_policy);
|
|
config.permissions.sandbox_policy = Constrained::allow_any(sandbox_policy_for_config);
|
|
});
|
|
let test = builder.build(&server).await?;
|
|
|
|
let rules_dir = test.home.path().join("rules");
|
|
fs::create_dir_all(&rules_dir)?;
|
|
fs::write(
|
|
rules_dir.join("default.rules"),
|
|
r#"prefix_rule(pattern=["touch", "allow-prefix.txt"], decision="allow")"#,
|
|
)?;
|
|
|
|
let call_id = "heredoc-with-chained-prefix";
|
|
let command = "touch ./test.txt && rm ./test.txt";
|
|
let (event, expected_command) = ActionKind::RunCommand { command }
|
|
.prepare(&test, &server, call_id, SandboxPermissions::UseDefault)
|
|
.await?;
|
|
let expected_command =
|
|
expected_command.expect("compound command should produce a shell command");
|
|
|
|
let _ = mount_sse_once(
|
|
&server,
|
|
sse(vec![
|
|
ev_response_created("resp-heredoc-prefix-1"),
|
|
event,
|
|
ev_completed("resp-heredoc-prefix-1"),
|
|
]),
|
|
)
|
|
.await;
|
|
let _ = mount_sse_once(
|
|
&server,
|
|
sse(vec![
|
|
ev_assistant_message("msg-heredoc-prefix-1", "done"),
|
|
ev_completed("resp-heredoc-prefix-2"),
|
|
]),
|
|
)
|
|
.await;
|
|
|
|
submit_turn(
|
|
&test,
|
|
"compound command",
|
|
approval_policy,
|
|
sandbox_policy.clone(),
|
|
)
|
|
.await?;
|
|
|
|
let approval = expect_exec_approval(&test, expected_command.as_str()).await;
|
|
test.codex
|
|
.submit(Op::ExecApproval {
|
|
id: approval.effective_approval_id(),
|
|
turn_id: None,
|
|
decision: ReviewDecision::Denied,
|
|
})
|
|
.await?;
|
|
wait_for_completion(&test).await;
|
|
|
|
Ok(())
|
|
}
|