Refactor execpolicy fallback evaluation (#7544)

## Refactor of the `execpolicy` crate To illustrate why we need this refactor, consider an agent attempting to run `apple | rm -rf ./`. Suppose `apple` is allowed by `execpolicy`. Before this PR, `execpolicy` would consider `apple` and `pear` and only render one rule match: `Allow`. We would skip any heuristics checks on `rm -rf ./` and immediately approve `apple | rm -rf ./` to run. To fix this, we now thread a `fallback` evaluation function into `execpolicy` that runs when no `execpolicy` rules match a given command. In our example, we would run `fallback` on `rm -rf ./` and prevent `apple | rm -rf ./` from being run without approval.
2026-04-27 16:15:09 +00:00 · 2025-12-04 02:39:48 -05:00
parent e925a380dc
commit 3d35cb4619
27 changed files with 538 additions and 257 deletions
--- a/codex-rs/core/tests/suite/approvals.rs
+++ b/codex-rs/core/tests/suite/approvals.rs
@@ -6,6 +6,7 @@ use codex_core::protocol::ApplyPatchApprovalRequestEvent;
 use codex_core::protocol::AskForApproval;
 use codex_core::protocol::EventMsg;
 use codex_core::protocol::ExecApprovalRequestEvent;
+use codex_core::protocol::ExecPolicyAmendment;
 use codex_core::protocol::Op;
 use codex_core::protocol::SandboxPolicy;
 use codex_protocol::config_types::ReasoningSummary;
@@ -1560,7 +1561,7 @@ async fn run_scenario(scenario: &ScenarioSpec) -> Result<()> {

 #[tokio::test(flavor = "current_thread")]
 #[cfg(unix)]
-async fn approving_allow_prefix_persists_policy_and_skips_future_prompts() -> Result<()> {
+async fn approving_execpolicy_amendment_persists_policy_and_skips_future_prompts() -> Result<()> {
    let server = start_mock_server().await;
    let approval_policy = AskForApproval::UnlessTrusted;
    let sandbox_policy = SandboxPolicy::ReadOnly;
@@ -1580,8 +1581,9 @@ async fn approving_allow_prefix_persists_policy_and_skips_future_prompts() -> Re
    .prepare(&test, &server, call_id_first, false)
    .await?;
    let expected_command =
-        expected_command.expect("allow prefix scenario should produce a shell command");
-    let expected_allow_prefix = vec!["touch".to_string(), "allow-prefix.txt".to_string()];
+        expected_command.expect("execpolicy amendment scenario should produce a shell command");
+    let expected_execpolicy_amendment =
+        ExecPolicyAmendment::new(vec!["touch".to_string(), "allow-prefix.txt".to_string()]);

    let _ = mount_sse_once(
        &server,
@@ -1610,13 +1612,16 @@ async fn approving_allow_prefix_persists_policy_and_skips_future_prompts() -> Re
    .await?;

    let approval = expect_exec_approval(&test, expected_command.as_str()).await;
-    assert_eq!(approval.allow_prefix, Some(expected_allow_prefix.clone()));
+    assert_eq!(
+        approval.proposed_execpolicy_amendment,
+        Some(expected_execpolicy_amendment.clone())
+    );

    test.codex
        .submit(Op::ExecApproval {
            id: "0".into(),
-            decision: ReviewDecision::ApprovedAllowPrefix {
-                allow_prefix: expected_allow_prefix.clone(),
+            decision: ReviewDecision::ApprovedExecpolicyAmendment {
+                proposed_execpolicy_amendment: expected_execpolicy_amendment.clone(),
            },
        })
        .await?;