feat: integrating heuristics-based fallback in execpolicy

This commit is contained in:
kevin zhao
2025-12-01 18:36:05 -05:00
parent a69bd729db
commit a91a00e8a7
5 changed files with 375 additions and 147 deletions

View File

@@ -8,6 +8,7 @@ use codex_execpolicy::AmendError;
use codex_execpolicy::Decision;
use codex_execpolicy::Error as ExecPolicyRuleError;
use codex_execpolicy::Evaluation;
use codex_execpolicy::RuleMatch;
use codex_execpolicy::Policy;
use codex_execpolicy::PolicyParser;
use codex_execpolicy::blocking_append_allow_prefix_rule;
@@ -129,52 +130,35 @@ pub(crate) async fn append_allow_prefix_rule_and_update(
Ok(())
}
fn requirement_from_decision(
decision: Decision,
approval_policy: AskForApproval,
) -> ApprovalRequirement {
match decision {
Decision::Forbidden => ApprovalRequirement::Forbidden {
reason: FORBIDDEN_REASON.to_string(),
},
Decision::Prompt => {
let reason = PROMPT_REASON.to_string();
if matches!(approval_policy, AskForApproval::Never) {
ApprovalRequirement::Forbidden { reason }
} else {
ApprovalRequirement::NeedsApproval {
reason: Some(reason),
allow_prefix: None,
}
}
}
Decision::Allow => ApprovalRequirement::Skip {
bypass_sandbox: true,
},
}
}
/// Return an allow-prefix option when a single plain command needs approval without
/// any matching policy rule. We only surface the prefix opt-in when execpolicy did
/// not already drive the decision (NoMatch) and when the command is a single
/// unrolled command (multi-part scripts shouldnt be whitelisted via prefix) and
/// when execpolicy feature is enabled.
fn allow_prefix_if_applicable(
policy: &Policy,
commands: &[Vec<String>],
features: &Features,
) -> Option<Vec<String>> {
/// Return an allow-prefix option when we are prompting solely due to heuristics;
/// any rule-driven prompt suppresses the opt-in. Only surfaced when execpolicy
/// feature is enabled.
fn allow_prefix_if_applicable(evaluation: &Evaluation, features: &Features) -> Option<Vec<String>> {
if !features.enabled(Feature::ExecPolicy) {
return None;
}
// Only offer a prefix when the prompt is driven by heuristics (policy has no matches).
// For multi-command scripts, choose the first command segment that also has no policy
// match (i.e. the first segment that could plausibly be the prompt trigger).
commands
.iter()
.find(|cmd| matches!(policy.check(cmd), Evaluation::NoMatch))
.cloned()
if evaluation.decision != Decision::Prompt {
return None;
}
let mut first_prompt_from_heuristics: Option<Vec<String>> = None;
for rule_match in &evaluation.matched_rules {
match rule_match {
RuleMatch::HeuristicsRuleMatch { command, decision } => {
if *decision == Decision::Prompt && first_prompt_from_heuristics.is_none() {
first_prompt_from_heuristics = Some(command.clone());
}
}
_ if rule_match.decision() == Decision::Prompt => {
// Any rule-based prompt suppresses allow-prefix.
return None;
}
_ => {}
}
}
first_prompt_from_heuristics
}
pub(crate) fn create_approval_requirement_for_command(
@@ -186,30 +170,57 @@ pub(crate) fn create_approval_requirement_for_command(
sandbox_permissions: SandboxPermissions,
) -> ApprovalRequirement {
let commands = parse_shell_lc_plain_commands(command).unwrap_or_else(|| vec![command.to_vec()]);
let evaluation = policy.check_multiple(commands.iter());
let heuristics_fallback = |cmd: &[String]| {
if requires_initial_appoval(approval_policy, sandbox_policy, cmd, sandbox_permissions) {
Decision::Prompt
} else {
Decision::Allow
}
};
let evaluation = policy.check_multiple(commands.iter(), &heuristics_fallback);
let has_policy_allow = evaluation.matched_rules.iter().any(|rule_match| {
!matches!(rule_match, RuleMatch::HeuristicsRuleMatch { .. })
&& rule_match.decision() == Decision::Allow
});
match evaluation {
Evaluation::Match { decision, .. } => requirement_from_decision(decision, approval_policy),
Evaluation::NoMatch { .. } => {
if requires_initial_appoval(
approval_policy,
sandbox_policy,
command,
sandbox_permissions,
) {
ApprovalRequirement::NeedsApproval {
reason: None,
allow_prefix: allow_prefix_if_applicable(policy, &commands, features),
match evaluation.decision {
Decision::Forbidden => ApprovalRequirement::Forbidden {
reason: FORBIDDEN_REASON.to_string(),
},
Decision::Prompt => {
let prompt_reason = prompt_reason_for(&evaluation);
if matches!(approval_policy, AskForApproval::Never) {
ApprovalRequirement::Forbidden {
reason: prompt_reason.unwrap_or_else(|| PROMPT_REASON.to_string()),
}
} else {
ApprovalRequirement::Skip {
bypass_sandbox: false,
ApprovalRequirement::NeedsApproval {
reason: prompt_reason,
allow_prefix: allow_prefix_if_applicable(&evaluation, features),
}
}
}
Decision::Allow => ApprovalRequirement::Skip {
bypass_sandbox: has_policy_allow,
},
}
}
/// Determine the prompt reason: only rule-based prompts surface the execpolicy
/// message. Heuristics-only prompts leave the reason empty so callers can inject
/// contextual messaging (e.g. sandbox escalation justifications) when needed.
fn prompt_reason_for(evaluation: &Evaluation) -> Option<String> {
evaluation.matched_rules.iter().find_map(|rule_match| {
if !matches!(rule_match, RuleMatch::HeuristicsRuleMatch { .. })
&& rule_match.decision() == Decision::Prompt
{
Some(PROMPT_REASON.to_string())
} else {
None
}
})
}
async fn collect_policy_files(dir: &Path) -> Result<Vec<PathBuf>, ExecPolicyError> {
let mut read_dir = match fs::read_dir(dir).await {
Ok(read_dir) => read_dir,
@@ -279,10 +290,20 @@ mod tests {
.expect("policy result");
let commands = [vec!["rm".to_string()]];
assert!(matches!(
policy.read().await.check_multiple(commands.iter()),
Evaluation::NoMatch { .. }
));
let evaluation = policy
.read()
.await
.check_multiple(commands.iter(), &|_| Decision::Allow);
assert_eq!(
Evaluation {
decision: Decision::Allow,
matched_rules: vec![RuleMatch::HeuristicsRuleMatch {
command: vec!["rm".to_string()],
decision: Decision::Allow
}],
},
evaluation
);
assert!(!temp_dir.path().join(POLICY_DIR_NAME).exists());
}
@@ -313,10 +334,20 @@ mod tests {
.await
.expect("policy result");
let command = [vec!["rm".to_string()]];
assert!(matches!(
policy.read().await.check_multiple(command.iter()),
Evaluation::Match { .. }
));
let evaluation = policy
.read()
.await
.check_multiple(command.iter(), &|_| Decision::Allow);
assert_eq!(
Evaluation {
decision: Decision::Forbidden,
matched_rules: vec![RuleMatch::PrefixRuleMatch {
matched_prefix: vec!["rm".to_string()],
decision: Decision::Forbidden
}],
},
evaluation
);
}
#[tokio::test]
@@ -332,10 +363,20 @@ mod tests {
.await
.expect("policy result");
let command = [vec!["ls".to_string()]];
assert!(matches!(
policy.read().await.check_multiple(command.iter()),
Evaluation::NoMatch { .. }
));
let evaluation = policy
.read()
.await
.check_multiple(command.iter(), &|_| Decision::Allow);
assert_eq!(
Evaluation {
decision: Decision::Allow,
matched_rules: vec![RuleMatch::HeuristicsRuleMatch {
command: vec!["ls".to_string()],
decision: Decision::Allow
}],
},
evaluation
);
}
#[test]
@@ -450,6 +491,38 @@ prefix_rule(pattern=["rm"], decision="forbidden")
);
}
#[test]
fn heuristics_apply_when_other_commands_match_policy() {
let policy_src = r#"prefix_rule(pattern=["apple"], decision="allow")"#;
let mut parser = PolicyParser::new();
parser
.parse("test.codexpolicy", policy_src)
.expect("parse policy");
let policy = parser.build();
let command = vec![
"bash".to_string(),
"-lc".to_string(),
"apple | orange".to_string(),
];
let requirement = create_approval_requirement_for_command(
&policy,
&Features::with_defaults(),
&command,
AskForApproval::UnlessTrusted,
&SandboxPolicy::DangerFullAccess,
SandboxPermissions::UseDefault,
);
assert_eq!(
requirement,
ApprovalRequirement::NeedsApproval {
reason: None,
allow_prefix: Some(vec!["orange".to_string()])
}
);
}
#[tokio::test]
async fn append_allow_prefix_rule_updates_policy_and_file() {
let codex_home = tempdir().expect("create temp dir");
@@ -460,14 +533,13 @@ prefix_rule(pattern=["rm"], decision="forbidden")
.await
.expect("update policy");
let evaluation = current_policy.read().await.check(&[
"echo".to_string(),
"hello".to_string(),
"world".to_string(),
]);
let evaluation = current_policy.read().await.check(
&["echo".to_string(), "hello".to_string(), "world".to_string()],
&|_| Decision::Allow,
);
assert!(matches!(
evaluation,
Evaluation::Match {
Evaluation {
decision: Decision::Allow,
..
}
@@ -624,6 +696,11 @@ prefix_rule(pattern=["rm"], decision="forbidden")
SandboxPermissions::UseDefault,
);
assert_eq!(requirement, ApprovalRequirement::Skip);
assert_eq!(
requirement,
ApprovalRequirement::Skip {
bypass_sandbox: true
}
);
}
}

View File

@@ -1,22 +1,67 @@
use std::fs;
use std::path::PathBuf;
use anyhow::Context;
use anyhow::Result;
use clap::Parser;
use codex_execpolicy::ExecPolicyCheckCommand;
use codex_execpolicy::Decision;
use codex_execpolicy::PolicyParser;
/// CLI for evaluating exec policies
#[derive(Parser)]
#[command(name = "codex-execpolicy")]
enum Cli {
/// Evaluate a command against a policy.
Check(ExecPolicyCheckCommand),
Check {
#[arg(short, long = "policy", value_name = "PATH", required = true)]
policies: Vec<PathBuf>,
/// Pretty-print the JSON output.
#[arg(long)]
pretty: bool,
/// Command tokens to check.
#[arg(
value_name = "COMMAND",
required = true,
trailing_var_arg = true,
allow_hyphen_values = true
)]
command: Vec<String>,
},
}
fn main() -> Result<()> {
let cli = Cli::parse();
match cli {
Cli::Check(cmd) => cmd_check(cmd),
Cli::Check {
policies,
command,
pretty,
} => cmd_check(policies, command, pretty),
}
}
fn cmd_check(cmd: ExecPolicyCheckCommand) -> Result<()> {
cmd.run()
fn cmd_check(policy_paths: Vec<PathBuf>, args: Vec<String>, pretty: bool) -> Result<()> {
let policy = load_policies(&policy_paths)?;
let eval = policy.check(&args, &|_| Decision::Allow);
let json = if pretty {
serde_json::to_string_pretty(&eval)?
} else {
serde_json::to_string(&eval)?
};
println!("{json}");
Ok(())
}
fn load_policies(policy_paths: &[PathBuf]) -> Result<codex_execpolicy::Policy> {
let mut parser = PolicyParser::new();
for policy_path in policy_paths {
let policy_file_contents = fs::read_to_string(policy_path)
.with_context(|| format!("failed to read policy at {}", policy_path.display()))?;
let policy_identifier = policy_path.to_string_lossy().to_string();
parser.parse(&policy_identifier, &policy_file_contents)?;
}
Ok(parser.build())
}

View File

@@ -50,62 +50,81 @@ impl Policy {
Ok(())
}
pub fn check(&self, cmd: &[String]) -> Evaluation {
let rules = match cmd.first() {
Some(first) => match self.rules_by_program.get_vec(first) {
Some(rules) => rules,
None => return Evaluation::NoMatch {},
},
None => return Evaluation::NoMatch {},
};
let matched_rules: Vec<RuleMatch> =
rules.iter().filter_map(|rule| rule.matches(cmd)).collect();
match matched_rules.iter().map(RuleMatch::decision).max() {
Some(decision) => Evaluation::Match {
decision,
matched_rules,
},
None => Evaluation::NoMatch {},
}
pub fn check<F>(&self, cmd: &[String], heuristics_fallback: &F) -> Evaluation
where
F: Fn(&[String]) -> Decision,
{
let matched_rules = self.matches_for_command(cmd, heuristics_fallback);
Evaluation::from_matches(matched_rules)
}
pub fn check_multiple<Commands>(&self, commands: Commands) -> Evaluation
pub fn check_multiple<Commands, F>(
&self,
commands: Commands,
heuristics_fallback: &F,
) -> Evaluation
where
Commands: IntoIterator,
Commands::Item: AsRef<[String]>,
F: Fn(&[String]) -> Decision,
{
let matched_rules: Vec<RuleMatch> = commands
.into_iter()
.flat_map(|command| match self.check(command.as_ref()) {
Evaluation::Match { matched_rules, .. } => matched_rules,
Evaluation::NoMatch { .. } => Vec::new(),
})
.flat_map(|command| self.matches_for_command(command.as_ref(), heuristics_fallback))
.collect();
match matched_rules.iter().map(RuleMatch::decision).max() {
Some(decision) => Evaluation::Match {
decision,
matched_rules,
},
None => Evaluation::NoMatch {},
Evaluation::from_matches(matched_rules)
}
fn matches_for_command<F>(&self, cmd: &[String], heuristics_fallback: &F) -> Vec<RuleMatch>
where
F: Fn(&[String]) -> Decision,
{
let mut matched_rules: Vec<RuleMatch> = match cmd.first() {
Some(first) => self
.rules_by_program
.get_vec(first)
.map(|rules| rules.iter().filter_map(|rule| rule.matches(cmd)).collect())
.unwrap_or_default(),
None => Vec::new(),
};
if matched_rules.is_empty() {
matched_rules.push(RuleMatch::HeuristicsRuleMatch {
command: cmd.to_vec(),
decision: heuristics_fallback(cmd),
});
}
matched_rules
}
}
#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub enum Evaluation {
NoMatch {},
Match {
decision: Decision,
#[serde(rename = "matchedRules")]
matched_rules: Vec<RuleMatch>,
},
pub struct Evaluation {
pub decision: Decision,
#[serde(rename = "matchedRules")]
pub matched_rules: Vec<RuleMatch>,
}
impl Evaluation {
pub fn is_match(&self) -> bool {
matches!(self, Self::Match { .. })
self.matched_rules
.iter()
.any(|rule_match| !matches!(rule_match, RuleMatch::HeuristicsRuleMatch { .. }))
}
fn from_matches(matched_rules: Vec<RuleMatch>) -> Self {
let decision = matched_rules
.iter()
.map(RuleMatch::decision)
.max()
.unwrap_or(Decision::Allow);
Self {
decision,
matched_rules,
}
}
}

View File

@@ -64,12 +64,17 @@ pub enum RuleMatch {
matched_prefix: Vec<String>,
decision: Decision,
},
HeuristicsRuleMatch {
command: Vec<String>,
decision: Decision,
},
}
impl RuleMatch {
pub fn decision(&self) -> Decision {
match self {
Self::PrefixRuleMatch { decision, .. } => *decision,
Self::HeuristicsRuleMatch { decision, .. } => *decision,
}
}
}

View File

@@ -1,5 +1,6 @@
use std::any::Any;
use std::sync::Arc;
use std::sync::Mutex;
use anyhow::Context;
use anyhow::Result;
@@ -19,6 +20,14 @@ fn tokens(cmd: &[&str]) -> Vec<String> {
cmd.iter().map(std::string::ToString::to_string).collect()
}
fn allow_all(_: &[String]) -> Decision {
Decision::Allow
}
fn prompt_all(_: &[String]) -> Decision {
Decision::Prompt
}
#[derive(Clone, Debug, Eq, PartialEq)]
enum RuleSnapshot {
Prefix(PrefixRule),
@@ -49,9 +58,9 @@ prefix_rule(
parser.parse("test.codexpolicy", policy_src)?;
let policy = parser.build();
let cmd = tokens(&["git", "status"]);
let evaluation = policy.check(&cmd);
let evaluation = policy.check(&cmd, &allow_all);
assert_eq!(
Evaluation::Match {
Evaluation {
decision: Decision::Allow,
matched_rules: vec![RuleMatch::PrefixRuleMatch {
matched_prefix: tokens(&["git", "status"]),
@@ -80,9 +89,9 @@ fn add_prefix_rule_extends_policy() -> Result<()> {
rules
);
let evaluation = policy.check(&tokens(&["ls", "-l", "/tmp"]));
let evaluation = policy.check(&tokens(&["ls", "-l", "/tmp"]), &allow_all);
assert_eq!(
Evaluation::Match {
Evaluation {
decision: Decision::Prompt,
matched_rules: vec![RuleMatch::PrefixRuleMatch {
matched_prefix: tokens(&["ls", "-l"]),
@@ -146,9 +155,9 @@ prefix_rule(
git_rules
);
let status_eval = policy.check(&tokens(&["git", "status"]));
let status_eval = policy.check(&tokens(&["git", "status"]), &allow_all);
assert_eq!(
Evaluation::Match {
Evaluation {
decision: Decision::Prompt,
matched_rules: vec![RuleMatch::PrefixRuleMatch {
matched_prefix: tokens(&["git"]),
@@ -158,9 +167,9 @@ prefix_rule(
status_eval
);
let commit_eval = policy.check(&tokens(&["git", "commit", "-m", "hi"]));
let commit_eval = policy.check(&tokens(&["git", "commit", "-m", "hi"]), &allow_all);
assert_eq!(
Evaluation::Match {
Evaluation {
decision: Decision::Forbidden,
matched_rules: vec![
RuleMatch::PrefixRuleMatch {
@@ -217,9 +226,9 @@ prefix_rule(
sh_rules
);
let bash_eval = policy.check(&tokens(&["bash", "-c", "echo", "hi"]));
let bash_eval = policy.check(&tokens(&["bash", "-c", "echo", "hi"]), &allow_all);
assert_eq!(
Evaluation::Match {
Evaluation {
decision: Decision::Allow,
matched_rules: vec![RuleMatch::PrefixRuleMatch {
matched_prefix: tokens(&["bash", "-c"]),
@@ -229,9 +238,9 @@ prefix_rule(
bash_eval
);
let sh_eval = policy.check(&tokens(&["sh", "-l", "echo", "hi"]));
let sh_eval = policy.check(&tokens(&["sh", "-l", "echo", "hi"]), &allow_all);
assert_eq!(
Evaluation::Match {
Evaluation {
decision: Decision::Allow,
matched_rules: vec![RuleMatch::PrefixRuleMatch {
matched_prefix: tokens(&["sh", "-l"]),
@@ -273,9 +282,9 @@ prefix_rule(
rules
);
let npm_i = policy.check(&tokens(&["npm", "i", "--legacy-peer-deps"]));
let npm_i = policy.check(&tokens(&["npm", "i", "--legacy-peer-deps"]), &allow_all);
assert_eq!(
Evaluation::Match {
Evaluation {
decision: Decision::Allow,
matched_rules: vec![RuleMatch::PrefixRuleMatch {
matched_prefix: tokens(&["npm", "i", "--legacy-peer-deps"]),
@@ -285,9 +294,12 @@ prefix_rule(
npm_i
);
let npm_install = policy.check(&tokens(&["npm", "install", "--no-save", "leftpad"]));
let npm_install = policy.check(
&tokens(&["npm", "install", "--no-save", "leftpad"]),
&allow_all,
);
assert_eq!(
Evaluation::Match {
Evaluation {
decision: Decision::Allow,
matched_rules: vec![RuleMatch::PrefixRuleMatch {
matched_prefix: tokens(&["npm", "install", "--no-save"]),
@@ -314,9 +326,9 @@ prefix_rule(
let mut parser = PolicyParser::new();
parser.parse("test.codexpolicy", policy_src)?;
let policy = parser.build();
let match_eval = policy.check(&tokens(&["git", "status"]));
let match_eval = policy.check(&tokens(&["git", "status"]), &allow_all);
assert_eq!(
Evaluation::Match {
Evaluation {
decision: Decision::Allow,
matched_rules: vec![RuleMatch::PrefixRuleMatch {
matched_prefix: tokens(&["git", "status"]),
@@ -326,13 +338,20 @@ prefix_rule(
match_eval
);
let no_match_eval = policy.check(&tokens(&[
"git",
"--config",
"color.status=always",
"status",
]));
assert_eq!(Evaluation::NoMatch {}, no_match_eval);
let no_match_eval = policy.check(
&tokens(&["git", "--config", "color.status=always", "status"]),
&allow_all,
);
assert_eq!(
Evaluation {
decision: Decision::Allow,
matched_rules: vec![RuleMatch::HeuristicsRuleMatch {
command: tokens(&["git", "--config", "color.status=always", "status",]),
decision: Decision::Allow,
}],
},
no_match_eval
);
Ok(())
}
@@ -352,9 +371,9 @@ prefix_rule(
parser.parse("test.codexpolicy", policy_src)?;
let policy = parser.build();
let commit = policy.check(&tokens(&["git", "commit", "-m", "hi"]));
let commit = policy.check(&tokens(&["git", "commit", "-m", "hi"]), &allow_all);
assert_eq!(
Evaluation::Match {
Evaluation {
decision: Decision::Forbidden,
matched_rules: vec![
RuleMatch::PrefixRuleMatch {
@@ -393,9 +412,9 @@ prefix_rule(
tokens(&["git", "commit", "-m", "hi"]),
];
let evaluation = policy.check_multiple(&commands);
let evaluation = policy.check_multiple(&commands, &allow_all);
assert_eq!(
Evaluation::Match {
Evaluation {
decision: Decision::Forbidden,
matched_rules: vec![
RuleMatch::PrefixRuleMatch {
@@ -416,3 +435,66 @@ prefix_rule(
);
Ok(())
}
#[test]
fn heuristics_match_is_returned_when_no_policy_matches() {
let policy = Policy::empty();
let command = tokens(&["python"]);
let evaluation = policy.check(&command, &prompt_all);
assert_eq!(
Evaluation {
decision: Decision::Prompt,
matched_rules: vec![RuleMatch::HeuristicsRuleMatch {
command,
decision: Decision::Prompt,
}],
},
evaluation
);
}
#[test]
fn heuristics_only_runs_for_commands_without_policy_matches() {
let policy_src = r#"
prefix_rule(
pattern = ["git"],
decision = "allow",
)
"#;
let mut parser = PolicyParser::new();
parser
.parse("policy.codexpolicy", policy_src)
.expect("parse policy");
let policy = parser.build();
let commands = vec![tokens(&["git", "status"]), tokens(&["python"])];
let heuristics_calls = Arc::new(Mutex::new(Vec::new()));
let heuristics_call_log = Arc::clone(&heuristics_calls);
let heuristics = move |cmd: &[String]| {
heuristics_call_log
.lock()
.expect("lock heuristics call log")
.push(cmd.to_vec());
Decision::Prompt
};
let evaluation = policy.check_multiple(&commands, &heuristics);
assert_eq!(Decision::Prompt, evaluation.decision);
assert!(evaluation.matched_rules.iter().any(|rule_match| {
matches!(
rule_match,
RuleMatch::HeuristicsRuleMatch {
command,
decision: Decision::Prompt
} if command == &tokens(&["python"])
)
}));
assert_eq!(
vec![tokens(&["python"])],
*heuristics_calls
.lock()
.expect("lock heuristics call log after evaluation")
);
}