Refactor execpolicy fallback evaluation (#7544)

## Refactor of the `execpolicy` crate

To illustrate why we need this refactor, consider an agent attempting to
run `apple | rm -rf ./`. Suppose `apple` is allowed by `execpolicy`.
Before this PR, `execpolicy` would consider `apple` and `pear` and only
render one rule match: `Allow`. We would skip any heuristics checks on
`rm -rf ./` and immediately approve `apple | rm -rf ./` to run.

To fix this, we now thread a `fallback` evaluation function into
`execpolicy` that runs when no `execpolicy` rules match a given command.
In our example, we would run `fallback` on `rm -rf ./` and prevent
`apple | rm -rf ./` from being run without approval.
This commit is contained in:
zhao-oai
2025-12-04 02:39:48 -05:00
committed by GitHub
parent e925a380dc
commit 3d35cb4619
27 changed files with 538 additions and 257 deletions

View File

@@ -6,6 +6,7 @@ use codex_core::protocol::ApplyPatchApprovalRequestEvent;
use codex_core::protocol::AskForApproval;
use codex_core::protocol::EventMsg;
use codex_core::protocol::ExecApprovalRequestEvent;
use codex_core::protocol::ExecPolicyAmendment;
use codex_core::protocol::Op;
use codex_core::protocol::SandboxPolicy;
use codex_protocol::config_types::ReasoningSummary;
@@ -1560,7 +1561,7 @@ async fn run_scenario(scenario: &ScenarioSpec) -> Result<()> {
#[tokio::test(flavor = "current_thread")]
#[cfg(unix)]
async fn approving_allow_prefix_persists_policy_and_skips_future_prompts() -> Result<()> {
async fn approving_execpolicy_amendment_persists_policy_and_skips_future_prompts() -> Result<()> {
let server = start_mock_server().await;
let approval_policy = AskForApproval::UnlessTrusted;
let sandbox_policy = SandboxPolicy::ReadOnly;
@@ -1580,8 +1581,9 @@ async fn approving_allow_prefix_persists_policy_and_skips_future_prompts() -> Re
.prepare(&test, &server, call_id_first, false)
.await?;
let expected_command =
expected_command.expect("allow prefix scenario should produce a shell command");
let expected_allow_prefix = vec!["touch".to_string(), "allow-prefix.txt".to_string()];
expected_command.expect("execpolicy amendment scenario should produce a shell command");
let expected_execpolicy_amendment =
ExecPolicyAmendment::new(vec!["touch".to_string(), "allow-prefix.txt".to_string()]);
let _ = mount_sse_once(
&server,
@@ -1610,13 +1612,16 @@ async fn approving_allow_prefix_persists_policy_and_skips_future_prompts() -> Re
.await?;
let approval = expect_exec_approval(&test, expected_command.as_str()).await;
assert_eq!(approval.allow_prefix, Some(expected_allow_prefix.clone()));
assert_eq!(
approval.proposed_execpolicy_amendment,
Some(expected_execpolicy_amendment.clone())
);
test.codex
.submit(Op::ExecApproval {
id: "0".into(),
decision: ReviewDecision::ApprovedAllowPrefix {
allow_prefix: expected_allow_prefix.clone(),
decision: ReviewDecision::ApprovedExecpolicyAmendment {
proposed_execpolicy_amendment: expected_execpolicy_amendment.clone(),
},
})
.await?;