Compare commits

...

103 Commits

Author SHA1 Message Date
kevin zhao
ca1428670e better error handling + example 2025-11-14 00:54:47 -05:00
kevin zhao
f64e4aa4f4 fix initial approval bug 2025-11-13 23:23:39 -05:00
kevin zhao
15ccd83d44 more cleanup 2025-11-13 22:14:18 -05:00
kevin zhao
daef241372 fix execpolicy2_blocks_shell_invocation 2025-11-13 22:14:18 -05:00
kevin zhao
3735d677f5 more cleanup in exec_policy.rs 2025-11-13 22:14:18 -05:00
kevin zhao
ebbeff9b32 remove tab 2025-11-13 22:14:18 -05:00
kevin zhao
b7ab622709 add back space 2025-11-13 22:14:18 -05:00
kevin zhao
e266a8da1b add back comment 2025-11-13 22:14:17 -05:00
kevin zhao
9a15ae1286 inlining base_approval_requirequirements 2025-11-13 22:14:17 -05:00
kevin zhao
40b8a6e311 fmt 2025-11-13 22:14:17 -05:00
kevin zhao
f665b1ce08 further simplifying error cases 2025-11-13 22:14:17 -05:00
kevin zhao
84315d8234 simplify load policy 2025-11-13 22:14:17 -05:00
kevin zhao
c15e1b432b remove caching of policy 2025-11-13 22:14:17 -05:00
kevin zhao
24a9b7ba28 first pass at integrating execpolicy2 into codex 2025-11-13 22:14:13 -05:00
kevin zhao
bdb5210721 cleanup 2025-11-13 22:08:06 -05:00
kevin zhao
cd4df53316 parser state 2025-11-13 22:08:06 -05:00
kevin zhao
43db77a97b feat: implementing parse_many 2025-11-13 22:08:06 -05:00
kevin zhao
8cbb5378f3 check_multiple 2025-11-13 22:08:06 -05:00
zhao-oai
ce220f0be2 Use camelCase for policy responses 2025-11-13 22:07:46 -05:00
kevin zhao
7697a2bb81 update readme 2025-11-13 22:07:46 -05:00
kevin zhao
762881b60c remove use of parking lot 2025-11-13 22:07:46 -05:00
kevin zhao
2e14f258c3 now keeping PolicyBuilder in a RefCell, allowing add_rule to take &mut self w/o Mutex 2025-11-13 22:07:46 -05:00
kevin zhao
c4fea9d99f moving default codexpolicy to example 2025-11-13 22:07:45 -05:00
kevin zhao
ae17058704 directly passing starlark error 2025-11-13 22:07:45 -05:00
kevin zhao
820e26f9d2 better naming in Policy.parse() 2025-11-13 22:07:45 -05:00
kevin zhao
175e3530bc move validation logic to rule.rs 2025-11-13 22:07:45 -05:00
kevin zhao
c7993e0471 conslidate validate_match_examples and validate_not_match_examples 2025-11-13 22:07:45 -05:00
kevin zhao
dc76907771 refactor: rule traits 2025-11-13 22:07:45 -05:00
kevin zhao
f7fa9c5c0f using expect to provide more context if unwraps fail 2025-11-13 22:07:45 -05:00
kevin zhao
556510f378 avoid matching on as_slice() when you can use is_empty() 2025-11-13 22:07:45 -05:00
kevin zhao
b038bd71f9 sort deps 2025-11-13 22:07:45 -05:00
kevin zhao
ffdebc2937 again, more descriptive error messages 2025-11-13 22:07:44 -05:00
kevin zhao
9b3041ffe5 print out actual type in error message 2025-11-13 22:07:44 -05:00
kevin zhao
80c67243df cleanup 2025-11-13 22:07:44 -05:00
kevin zhao
83cfac9442 delete more display() 2025-11-13 22:07:44 -05:00
kevin zhao
9bf0c066d9 more cleanup 2025-11-13 22:07:44 -05:00
kevin zhao
615a027a18 further cleanup 2025-11-13 22:07:44 -05:00
kevin zhao
ceea9075ea docstrings and other cleanup 2025-11-13 22:07:44 -05:00
kevin zhao
3162a77e2a remove fragile test 2025-11-13 22:07:44 -05:00
kevin zhao
842d6adb59 refactor to test positive matches 2025-11-13 22:07:44 -05:00
kevin zhao
e51178cb27 consolidate new() and parse() in PolicyParser 2025-11-13 22:07:44 -05:00
kevin zhao
ff970d57fb using PathBufs in main.rs 2025-11-13 22:07:44 -05:00
kevin zhao
a4cd582b49 removing decription() 2025-11-13 22:07:43 -05:00
kevin zhao
ab546c69ed removing expect-test as dependency 2025-11-13 22:07:43 -05:00
kevin zhao
ad55ca629c refactor: using deep assertions instead of expect tests; removed Display impls 2025-11-13 22:07:43 -05:00
kevin zhao
8a98b7afd3 reordering so that impl comes before def 2025-11-13 22:07:43 -05:00
kevin zhao
7b5916cb4b Share prefix pattern data in execpolicy2 instead of cloning 2025-11-13 22:07:43 -05:00
kevin zhao
0f51ed0baa removing len() and is_empty() 2025-11-13 22:07:43 -05:00
kevin zhao
00757b43aa remove default policy 2025-11-13 22:07:43 -05:00
kevin zhao
b751be9156 cleanup and prettify 2025-11-13 22:07:43 -05:00
kevin zhao
4d0fffd387 moving up expect test dependency 2025-11-13 22:07:43 -05:00
kevin zhao
fd29c3f0a1 removing rg and sed from default codexpolicy 2025-11-13 22:07:43 -05:00
kevin zhao
8c909a0084 making things more functional 2025-11-13 22:07:43 -05:00
kevin zhao
2f8d3f6b42 code cleanup 2025-11-13 22:07:42 -05:00
kevin zhao
17f86a6c86 clippy 2025-11-13 22:07:42 -05:00
kevin zhao
d3aafb93f3 feat: tokenizing whole strings in match and not_match 2025-11-13 22:07:42 -05:00
zhao-oai
814c6506d2 execpolicy2 now inherits workspace crate version
Co-authored-by: Michael Bolin <mbolin@openai.com>
2025-11-13 22:07:42 -05:00
kevin zhao
94e8d4e0ae .policy -> .codexpolicy 2025-11-13 22:07:42 -05:00
kevin zhao
66dca34c1c using camelCase 2025-11-13 22:07:42 -05:00
kevin zhao
1743981ee5 evaluation -> check 2025-11-13 22:07:41 -05:00
kevin zhao
0e6d965f65 updating default.policy with better not_match (or, if it is too evident, removing not_match entirely) 2025-11-13 22:07:41 -05:00
kevin zhao
b323a9248d updating basic.rs to have a less obvious not match 2025-11-13 22:07:41 -05:00
kevin zhao
7681b325fe basic.rs: using expect tests 2025-11-13 22:07:41 -05:00
kevin zhao
2217cb05ee fix lint 2025-11-13 22:07:41 -05:00
kevin zhao
c086fe35a5 adding is_match to Evaluation 2025-11-13 22:07:41 -05:00
kevin zhao
755331425e refactor: implementing rule traits, rulematch enum 2025-11-13 22:07:41 -05:00
kevin zhao
4ca1834eac update docstring 2025-11-13 22:07:41 -05:00
kevin zhao
8b4283e4ad improve phrasing 2025-11-13 22:07:41 -05:00
kevin zhao
58acf0b833 adding docstrings for PrefixPattern and PatternToken 2025-11-13 22:07:40 -05:00
kevin zhao
8834e4bbdc helper function to get PolicyBuilder + only getting one builder in prefix_rule 2025-11-13 22:07:40 -05:00
kevin zhao
8775a99e3d fallback instead of panicking 2025-11-13 22:07:40 -05:00
kevin zhao
00ff42d594 using shlex::try_join instead of " ".join() 2025-11-13 22:07:40 -05:00
kevin zhao
ff09321eb1 updated error messaging to indicate type 2025-11-13 22:07:40 -05:00
kevin zhao
a67521926a fmt 2025-11-13 22:07:40 -05:00
kevin zhao
17bb82de77 using .iter() instead of mut 2025-11-13 22:07:40 -05:00
kevin zhao
ecbcc37bfb alphasort members 2025-11-13 22:07:40 -05:00
kevin zhao
b03c3c3d1f more early return lines 2025-11-13 22:07:40 -05:00
kevin zhao
a2890e2ca7 adding a line after early return block 2025-11-13 22:07:39 -05:00
kevin zhao
230cb2e3e2 best -> strictest 2025-11-13 22:07:39 -05:00
kevin zhao
7737444356 clippy 2025-11-13 22:07:39 -05:00
kevin zhao
90411d6e90 [parser.rs] match token vector on length 2025-11-13 22:07:39 -05:00
kevin zhao
5fd2172f9f update lints 2025-11-13 22:07:39 -05:00
zhao-oai
1b09f32cd5 Update codex-rs/execpolicy2/src/parser.rs
Co-authored-by: Michael Bolin <mbolin@openai.com>
2025-11-13 22:07:39 -05:00
kevin zhao
31736f2c20 remove license 2025-11-13 22:07:39 -05:00
kevin zhao
b405a8c812 refactor parse_pattern to use iterator chain 2025-11-13 22:07:39 -05:00
kevin zhao
bc375db0ab using parking_lot mutex + AtomicU64 for auto id 2025-11-13 22:07:38 -05:00
kevin zhao
73cbf5a83e add docstrings to decision struct 2025-11-13 22:07:38 -05:00
kevin zhao
072f5c28fc using clap for execpolicy v2 cli 2025-11-13 22:07:38 -05:00
kevin zhao
2952c14719 implementing PartialOrd for Decision instead of defining custom is_stricter_than func 2025-11-13 22:07:38 -05:00
kevin zhao
270abdc0c4 use camelCase in Decision' 2025-11-13 22:07:38 -05:00
kevin zhao
aa15efd7a3 update readme 2025-11-13 22:07:38 -05:00
kevin zhao
ca9e61497d README 2025-11-13 22:07:38 -05:00
kevin zhao
687a8c38ff introduce variant typing to policy result 2025-11-13 22:07:38 -05:00
kevin zhao
0bac9939af better default policy + lint 2025-11-13 22:07:38 -05:00
kevin zhao
bf716081fc clean up test 2025-11-13 22:07:38 -05:00
kevin zhao
55e49a8016 remove extraneous test 2025-11-13 22:07:37 -05:00
kevin zhao
64e786caad simplify 2025-11-13 22:07:37 -05:00
kevin zhao
9aea52efb1 simplify logic 2025-11-13 22:07:37 -05:00
kevin zhao
7e79c4dc5b refactor rules so no more cartesian product 2025-11-13 22:07:37 -05:00
kevin zhao
eea9bff1fb remove tokenize_command use cases 2025-11-13 22:07:37 -05:00
kevin zhao
71ea7edca4 update shape of RuleMatch and Evaluation 2025-11-13 22:07:37 -05:00
kevin zhao
85edde08c1 fix cargo shear 2025-11-13 22:07:37 -05:00
kevin zhao
773177ec8b first pass at prefix rules 2025-11-13 22:07:37 -05:00
28 changed files with 1698 additions and 42 deletions

View File

@@ -0,0 +1,13 @@
# Execpolicy v2 smoke-test policy
prefix_rule(
pattern = ["echo", ["a", "b"]],
decision = "allow",
match = ["echo c"],
not_match = ["echo-policy-allowed"],
)
prefix_rule(
pattern = ["echo", "c"],
decision = "forbidden",
match = ["echo c"],
)

16
codex-rs/Cargo.lock generated
View File

@@ -1070,6 +1070,7 @@ dependencies = [
"codex-apply-patch",
"codex-arg0",
"codex-async-utils",
"codex-execpolicy2",
"codex-file-search",
"codex-git",
"codex-keyring-store",
@@ -1188,6 +1189,21 @@ dependencies = [
"tempfile",
]
[[package]]
name = "codex-execpolicy2"
version = "0.0.0"
dependencies = [
"anyhow",
"clap",
"multimap",
"pretty_assertions",
"serde",
"serde_json",
"shlex",
"starlark",
"thiserror 2.0.17",
]
[[package]]
name = "codex-feedback"
version = "0.0.0"

View File

@@ -16,6 +16,7 @@ members = [
"core",
"exec",
"execpolicy",
"execpolicy2",
"keyring-store",
"file-search",
"linux-sandbox",
@@ -63,6 +64,7 @@ codex-chatgpt = { path = "chatgpt" }
codex-common = { path = "common" }
codex-core = { path = "core" }
codex-exec = { path = "exec" }
codex-execpolicy2 = { path = "execpolicy2" }
codex-feedback = { path = "feedback" }
codex-file-search = { path = "file-search" }
codex-git = { path = "utils/git" }

View File

@@ -25,6 +25,7 @@ codex-async-utils = { workspace = true }
codex-file-search = { workspace = true }
codex-git = { workspace = true }
codex-keyring-store = { workspace = true }
codex-execpolicy2 = { workspace = true }
codex-otel = { workspace = true, features = ["otel"] }
codex-protocol = { workspace = true }
codex-rmcp-client = { workspace = true }

View File

@@ -122,6 +122,7 @@ use crate::user_instructions::UserInstructions;
use crate::user_notification::UserNotification;
use crate::util::backoff;
use codex_async_utils::OrCancelExt;
use codex_execpolicy2::Policy as ExecPolicyV2;
use codex_otel::otel_event_manager::OtelEventManager;
use codex_protocol::config_types::ReasoningEffort as ReasoningEffortConfig;
use codex_protocol::config_types::ReasoningSummary as ReasoningSummaryConfig;
@@ -166,6 +167,9 @@ impl Codex {
let user_instructions = get_user_instructions(&config).await;
let exec_policy_v2 = crate::exec_policy::exec_policy_for(&config.features, &config.cwd)
.map_err(|err| CodexErr::Fatal(format!("failed to load execpolicy2: {err}")))?;
let config = Arc::new(config);
let session_configuration = SessionConfiguration {
@@ -182,6 +186,7 @@ impl Codex {
cwd: config.cwd.clone(),
original_config_do_not_use: Arc::clone(&config),
features: config.features.clone(),
exec_policy_v2,
session_source,
};
@@ -279,6 +284,7 @@ pub(crate) struct TurnContext {
pub(crate) final_output_json_schema: Option<Value>,
pub(crate) codex_linux_sandbox_exe: Option<PathBuf>,
pub(crate) tool_call_gate: Arc<ReadinessFlag>,
pub(crate) exec_policy_v2: Option<Arc<ExecPolicyV2>>,
}
impl TurnContext {
@@ -335,6 +341,8 @@ pub(crate) struct SessionConfiguration {
/// Set of feature flags for this session
features: Features,
/// Optional execpolicy2 policy, applied only when enabled by feature flag.
exec_policy_v2: Option<Arc<ExecPolicyV2>>,
// TODO(pakrym): Remove config from here
original_config_do_not_use: Arc<Config>,
@@ -435,6 +443,7 @@ impl Session {
final_output_json_schema: None,
codex_linux_sandbox_exe: config.codex_linux_sandbox_exe.clone(),
tool_call_gate: Arc::new(ReadinessFlag::new()),
exec_policy_v2: session_configuration.exec_policy_v2.clone(),
}
}
@@ -1763,6 +1772,7 @@ async fn spawn_review_thread(
final_output_json_schema: None,
codex_linux_sandbox_exe: parent_turn_context.codex_linux_sandbox_exe.clone(),
tool_call_gate: Arc::new(ReadinessFlag::new()),
exec_policy_v2: parent_turn_context.exec_policy_v2.clone(),
};
// Seed the child task with the review prompt as the initial user message.
@@ -2612,6 +2622,7 @@ mod tests {
cwd: config.cwd.clone(),
original_config_do_not_use: Arc::clone(&config),
features: Features::default(),
exec_policy_v2: None,
session_source: SessionSource::Exec,
};
@@ -2688,6 +2699,7 @@ mod tests {
cwd: config.cwd.clone(),
original_config_do_not_use: Arc::clone(&config),
features: Features::default(),
exec_policy_v2: None,
session_source: SessionSource::Exec,
};

View File

@@ -0,0 +1,173 @@
use std::fs;
use std::path::Path;
use std::path::PathBuf;
use std::sync::Arc;
use codex_execpolicy2::Decision;
use codex_execpolicy2::Evaluation;
use codex_execpolicy2::Policy;
use codex_execpolicy2::PolicyParser;
use codex_protocol::protocol::AskForApproval;
use thiserror::Error;
use crate::bash::parse_shell_lc_plain_commands;
use crate::features::Feature;
use crate::features::Features;
use crate::tools::sandboxing::ApprovalRequirement;
const FORBIDDEN_REASON: &str = "execpolicy forbids this command";
const PROMPT_REASON: &str = "execpolicy requires approval for this command";
#[derive(Debug, Error)]
pub enum ExecPolicyError {
#[error("failed to read execpolicy files from {dir}: {source}")]
ReadDir {
dir: PathBuf,
source: std::io::Error,
},
#[error("failed to read execpolicy file {path}: {source}")]
ReadFile {
path: PathBuf,
source: std::io::Error,
},
#[error("failed to parse execpolicy file {path}: {source}")]
ParsePolicy {
path: String,
source: codex_execpolicy2::Error,
},
}
pub(crate) fn exec_policy_for(
features: &Features,
cwd: &Path,
) -> Result<Option<Arc<Policy>>, ExecPolicyError> {
if !features.enabled(Feature::ExecPolicyV2) {
return Ok(None);
}
load_policy(cwd).map(Some)
}
pub(crate) fn evaluate_with_policy(
policy: &Policy,
command: &[String],
approval_policy: AskForApproval,
) -> Option<ApprovalRequirement> {
let commands = parse_shell_lc_plain_commands(command).unwrap_or_else(|| vec![command.to_vec()]);
let evaluation = policy.check_multiple(commands.iter());
match evaluation {
Evaluation::Match { decision, .. } => match decision {
Decision::Forbidden => Some(ApprovalRequirement::Forbidden {
reason: FORBIDDEN_REASON.to_string(),
}),
Decision::Prompt => {
let reason = PROMPT_REASON.to_string();
if matches!(approval_policy, AskForApproval::Never) {
Some(ApprovalRequirement::Forbidden { reason })
} else {
Some(ApprovalRequirement::NeedsApproval {
reason: Some(reason),
})
}
}
Decision::Allow => Some(ApprovalRequirement::Skip),
},
Evaluation::NoMatch => None,
}
}
fn load_policy(cwd: &Path) -> Result<Arc<Policy>, ExecPolicyError> {
let codex_dir = cwd.join(".codex");
let entries = match fs::read_dir(&codex_dir) {
Ok(entries) => entries,
Err(source) => {
return Err(ExecPolicyError::ReadDir {
dir: codex_dir,
source,
});
}
};
let mut policy_paths: Vec<PathBuf> = Vec::new();
for entry in entries {
let entry = entry.map_err(|source| ExecPolicyError::ReadDir {
dir: codex_dir.clone(),
source,
})?;
let path = entry.path();
if path
.extension()
.and_then(|ext| ext.to_str())
.is_some_and(|ext| ext == "codexpolicy")
&& path.is_file()
{
policy_paths.push(path);
}
}
policy_paths.sort();
let mut parser = PolicyParser::new();
for policy_path in &policy_paths {
let contents =
fs::read_to_string(policy_path).map_err(|source| ExecPolicyError::ReadFile {
path: policy_path.clone(),
source,
})?;
let identifier = policy_path.to_string_lossy().to_string();
parser
.parse(&identifier, &contents)
.map_err(|source| ExecPolicyError::ParsePolicy {
path: identifier,
source,
})?;
}
let policy = Arc::new(parser.build());
tracing::debug!(
file_count = policy_paths.len(),
"loaded execpolicy2 from {}",
codex_dir.display()
);
Ok(policy)
}
#[cfg(test)]
mod tests {
use super::*;
use codex_protocol::protocol::AskForApproval;
use pretty_assertions::assert_eq;
#[test]
fn evaluates_bash_lc_inner_commands() {
let policy_src = r#"
prefix_rule(pattern=["rm"], decision="forbidden")
"#;
let mut parser = PolicyParser::new();
parser
.parse("test.codexpolicy", policy_src)
.expect("parse policy");
let policy = parser.build();
let forbidden_script = vec![
"bash".to_string(),
"-lc".to_string(),
"rm -rf /tmp".to_string(),
];
let requirement =
evaluate_with_policy(&policy, &forbidden_script, AskForApproval::OnRequest)
.expect("expected match for forbidden command");
assert_eq!(
requirement,
ApprovalRequirement::Forbidden {
reason: FORBIDDEN_REASON.to_string()
}
);
}
}

View File

@@ -40,6 +40,8 @@ pub enum Feature {
ViewImageTool,
/// Allow the model to request web searches.
WebSearchRequest,
/// Gate the execpolicy2 enforcement for shell/unified exec.
ExecPolicyV2,
/// Enable the model-based risk assessments for sandboxed commands.
SandboxCommandAssessment,
/// Create a ghost commit at each turn.
@@ -283,6 +285,12 @@ pub const FEATURES: &[FeatureSpec] = &[
stage: Stage::Stable,
default_enabled: false,
},
FeatureSpec {
id: Feature::ExecPolicyV2,
key: "exec_policy_v2",
stage: Stage::Experimental,
default_enabled: false,
},
FeatureSpec {
id: Feature::SandboxCommandAssessment,
key: "experimental_sandbox_command_assessment",

View File

@@ -24,6 +24,7 @@ mod environment_context;
pub mod error;
pub mod exec;
pub mod exec_env;
mod exec_policy;
pub mod features;
mod flags;
pub mod git_info;

View File

@@ -300,6 +300,11 @@ impl ShellHandler {
env: exec_params.env.clone(),
with_escalated_permissions: exec_params.with_escalated_permissions,
justification: exec_params.justification.clone(),
exec_policy: if is_user_shell_command {
None
} else {
turn.exec_policy_v2.clone()
},
};
let mut orchestrator = ToolOrchestrator::new();
let mut runtime = ShellRuntime::new();

View File

@@ -11,6 +11,7 @@ use crate::error::get_error_message_ui;
use crate::exec::ExecToolCallOutput;
use crate::sandboxing::SandboxManager;
use crate::tools::sandboxing::ApprovalCtx;
use crate::tools::sandboxing::ApprovalRequirement;
use crate::tools::sandboxing::ProvidesSandboxRetryData;
use crate::tools::sandboxing::SandboxAttempt;
use crate::tools::sandboxing::ToolCtx;
@@ -49,40 +50,49 @@ impl ToolOrchestrator {
let otel_cfg = codex_otel::otel_event_manager::ToolDecisionSource::Config;
// 1) Approval
let needs_initial_approval =
tool.wants_initial_approval(req, approval_policy, &turn_ctx.sandbox_policy);
let mut already_approved = false;
if needs_initial_approval {
let mut risk = None;
if let Some(metadata) = req.sandbox_retry_data() {
risk = tool_ctx
.session
.assess_sandbox_command(turn_ctx, &tool_ctx.call_id, &metadata.command, None)
.await;
match tool.approval_requirement(req, approval_policy, &turn_ctx.sandbox_policy) {
ApprovalRequirement::Skip => {
otel.tool_decision(otel_tn, otel_ci, ReviewDecision::Approved, otel_cfg);
}
ApprovalRequirement::Forbidden { reason } => {
return Err(ToolError::Rejected(reason));
}
ApprovalRequirement::NeedsApproval { reason } => {
let mut risk = None;
let approval_ctx = ApprovalCtx {
session: tool_ctx.session,
turn: turn_ctx,
call_id: &tool_ctx.call_id,
retry_reason: None,
risk,
};
let decision = tool.start_approval_async(req, approval_ctx).await;
otel.tool_decision(otel_tn, otel_ci, decision, otel_user.clone());
match decision {
ReviewDecision::Denied | ReviewDecision::Abort => {
return Err(ToolError::Rejected("rejected by user".to_string()));
if let Some(metadata) = req.sandbox_retry_data() {
risk = tool_ctx
.session
.assess_sandbox_command(
turn_ctx,
&tool_ctx.call_id,
&metadata.command,
None,
)
.await;
}
ReviewDecision::Approved | ReviewDecision::ApprovedForSession => {}
let approval_ctx = ApprovalCtx {
session: tool_ctx.session,
turn: turn_ctx,
call_id: &tool_ctx.call_id,
retry_reason: reason,
risk,
};
let decision = tool.start_approval_async(req, approval_ctx).await;
otel.tool_decision(otel_tn, otel_ci, decision, otel_user.clone());
match decision {
ReviewDecision::Denied | ReviewDecision::Abort => {
return Err(ToolError::Rejected("rejected by user".to_string()));
}
ReviewDecision::Approved | ReviewDecision::ApprovedForSession => {}
}
already_approved = true;
}
already_approved = true;
} else {
otel.tool_decision(otel_tn, otel_ci, ReviewDecision::Approved, otel_cfg);
}
// 2) First attempt under the selected sandbox.

View File

@@ -6,11 +6,13 @@ builds a CommandSpec, and runs it under the current SandboxAttempt.
*/
use crate::command_safety::is_dangerous_command::requires_initial_appoval;
use crate::exec::ExecToolCallOutput;
use crate::exec_policy::evaluate_with_policy;
use crate::protocol::SandboxPolicy;
use crate::sandboxing::execute_env;
use crate::tools::runtimes::build_command_spec;
use crate::tools::sandboxing::Approvable;
use crate::tools::sandboxing::ApprovalCtx;
use crate::tools::sandboxing::ApprovalRequirement;
use crate::tools::sandboxing::ProvidesSandboxRetryData;
use crate::tools::sandboxing::SandboxAttempt;
use crate::tools::sandboxing::SandboxRetryData;
@@ -20,10 +22,12 @@ use crate::tools::sandboxing::ToolCtx;
use crate::tools::sandboxing::ToolError;
use crate::tools::sandboxing::ToolRuntime;
use crate::tools::sandboxing::with_cached_approval;
use codex_execpolicy2::Policy as ExecPolicyV2;
use codex_protocol::protocol::AskForApproval;
use codex_protocol::protocol::ReviewDecision;
use futures::future::BoxFuture;
use std::path::PathBuf;
use std::sync::Arc;
#[derive(Clone, Debug)]
pub struct ShellRequest {
@@ -33,6 +37,7 @@ pub struct ShellRequest {
pub env: std::collections::HashMap<String, String>,
pub with_escalated_permissions: Option<bool>,
pub justification: Option<String>,
pub exec_policy: Option<Arc<ExecPolicyV2>>,
}
impl ProvidesSandboxRetryData for ShellRequest {
@@ -114,18 +119,26 @@ impl Approvable<ShellRequest> for ShellRuntime {
})
}
fn wants_initial_approval(
fn approval_requirement(
&self,
req: &ShellRequest,
policy: AskForApproval,
sandbox_policy: &SandboxPolicy,
) -> bool {
requires_initial_appoval(
) -> ApprovalRequirement {
if let Some(exec_policy) = &req.exec_policy
&& let Some(requirement) = evaluate_with_policy(exec_policy, &req.command, policy)
{
requirement
} else if requires_initial_appoval(
policy,
sandbox_policy,
&req.command,
req.with_escalated_permissions.unwrap_or(false),
)
) {
ApprovalRequirement::NeedsApproval { reason: None }
} else {
ApprovalRequirement::Skip
}
}
fn wants_escalated_first_attempt(&self, req: &ShellRequest) -> bool {
@@ -157,3 +170,85 @@ impl ToolRuntime<ShellRequest, ExecToolCallOutput> for ShellRuntime {
Ok(out)
}
}
#[cfg(test)]
mod tests {
use super::*;
use codex_execpolicy2::PolicyParser;
use pretty_assertions::assert_eq;
use std::collections::HashMap;
fn parse_policy(src: &str) -> Arc<ExecPolicyV2> {
let mut parser = PolicyParser::new();
parser
.parse("test.codexpolicy", src)
.expect("parse execpolicy2 file");
Arc::new(parser.build())
}
fn shell_request(command: &[&str], exec_policy: Option<Arc<ExecPolicyV2>>) -> ShellRequest {
ShellRequest {
command: command.iter().map(ToString::to_string).collect(),
cwd: PathBuf::from("."),
timeout_ms: None,
env: HashMap::new(),
with_escalated_permissions: None,
justification: None,
exec_policy,
}
}
#[test]
fn prompt_decision_requires_approval() {
let policy = parse_policy(r#"prefix_rule(pattern=["echo"], decision="prompt")"#);
let req = shell_request(&["echo", "hi"], Some(policy));
let runtime = ShellRuntime::new();
let requirement = runtime.approval_requirement(
&req,
AskForApproval::OnRequest,
&SandboxPolicy::DangerFullAccess,
);
assert_eq!(
requirement,
ApprovalRequirement::NeedsApproval {
reason: Some("execpolicy requires approval for this command".to_string())
}
);
}
#[test]
fn prompt_blocked_when_approval_disabled() {
let policy = parse_policy(r#"prefix_rule(pattern=["echo"], decision="prompt")"#);
let req = shell_request(&["echo", "hi"], Some(policy));
let runtime = ShellRuntime::new();
let requirement = runtime.approval_requirement(
&req,
AskForApproval::Never,
&SandboxPolicy::DangerFullAccess,
);
assert_eq!(
requirement,
ApprovalRequirement::Forbidden {
reason: "execpolicy requires approval for this command".to_string()
}
);
}
#[test]
fn user_shell_commands_skip_execpolicy() {
let req = shell_request(&["echo", "hi"], None);
let runtime = ShellRuntime::new();
let requirement = runtime.approval_requirement(
&req,
AskForApproval::OnRequest,
&SandboxPolicy::DangerFullAccess,
);
assert_eq!(requirement, ApprovalRequirement::Skip);
}
}

View File

@@ -1,4 +1,5 @@
use crate::command_safety::is_dangerous_command::requires_initial_appoval;
use crate::exec_policy::evaluate_with_policy;
/*
Runtime: unified exec
@@ -10,6 +11,7 @@ use crate::error::SandboxErr;
use crate::tools::runtimes::build_command_spec;
use crate::tools::sandboxing::Approvable;
use crate::tools::sandboxing::ApprovalCtx;
use crate::tools::sandboxing::ApprovalRequirement;
use crate::tools::sandboxing::ProvidesSandboxRetryData;
use crate::tools::sandboxing::SandboxAttempt;
use crate::tools::sandboxing::SandboxRetryData;
@@ -22,18 +24,21 @@ use crate::tools::sandboxing::with_cached_approval;
use crate::unified_exec::UnifiedExecError;
use crate::unified_exec::UnifiedExecSession;
use crate::unified_exec::UnifiedExecSessionManager;
use codex_execpolicy2::Policy as ExecPolicyV2;
use codex_protocol::protocol::AskForApproval;
use codex_protocol::protocol::ReviewDecision;
use codex_protocol::protocol::SandboxPolicy;
use futures::future::BoxFuture;
use std::collections::HashMap;
use std::path::PathBuf;
use std::sync::Arc;
#[derive(Clone, Debug)]
pub struct UnifiedExecRequest {
pub command: Vec<String>,
pub cwd: PathBuf,
pub env: HashMap<String, String>,
pub exec_policy: Option<Arc<ExecPolicyV2>>,
pub with_escalated_permissions: Option<bool>,
pub justification: Option<String>,
}
@@ -63,6 +68,7 @@ impl UnifiedExecRequest {
command: Vec<String>,
cwd: PathBuf,
env: HashMap<String, String>,
exec_policy: Option<Arc<ExecPolicyV2>>,
with_escalated_permissions: Option<bool>,
justification: Option<String>,
) -> Self {
@@ -70,6 +76,7 @@ impl UnifiedExecRequest {
command,
cwd,
env,
exec_policy,
with_escalated_permissions,
justification,
}
@@ -129,18 +136,26 @@ impl Approvable<UnifiedExecRequest> for UnifiedExecRuntime<'_> {
})
}
fn wants_initial_approval(
fn approval_requirement(
&self,
req: &UnifiedExecRequest,
policy: AskForApproval,
sandbox_policy: &SandboxPolicy,
) -> bool {
requires_initial_appoval(
) -> ApprovalRequirement {
if let Some(exec_policy) = &req.exec_policy
&& let Some(requirement) = evaluate_with_policy(exec_policy, &req.command, policy)
{
requirement
} else if requires_initial_appoval(
policy,
sandbox_policy,
&req.command,
req.with_escalated_permissions.unwrap_or(false),
)
) {
ApprovalRequirement::NeedsApproval { reason: None }
} else {
ApprovalRequirement::Skip
}
}
fn wants_escalated_first_attempt(&self, req: &UnifiedExecRequest) -> bool {

View File

@@ -86,6 +86,13 @@ pub(crate) struct ApprovalCtx<'a> {
pub risk: Option<SandboxCommandAssessment>,
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub(crate) enum ApprovalRequirement {
Skip,
NeedsApproval { reason: Option<String> },
Forbidden { reason: String },
}
pub(crate) trait Approvable<Req> {
type ApprovalKey: Hash + Eq + Clone + Debug + Serialize;
@@ -106,21 +113,27 @@ pub(crate) trait Approvable<Req> {
matches!(policy, AskForApproval::Never)
}
/// Decide whether an initial user approval should be requested before the
/// Decide whether an initial user approval should be requested or forbidden before the
/// first attempt. Defaults to the orchestrator's behavior (prerefactor):
/// - Never, OnFailure: do not ask
/// - OnRequest: ask unless sandbox policy is DangerFullAccess
/// - UnlessTrusted: always ask
fn wants_initial_approval(
fn approval_requirement(
&self,
_req: &Req,
policy: AskForApproval,
sandbox_policy: &SandboxPolicy,
) -> bool {
match policy {
) -> ApprovalRequirement {
let needs_approval = match policy {
AskForApproval::Never | AskForApproval::OnFailure => false,
AskForApproval::OnRequest => !matches!(sandbox_policy, SandboxPolicy::DangerFullAccess),
AskForApproval::UnlessTrusted => true,
};
if needs_approval {
ApprovalRequirement::NeedsApproval { reason: None }
} else {
ApprovalRequirement::Skip
}
}

View File

@@ -325,6 +325,7 @@ impl UnifiedExecSessionManager {
command.to_vec(),
cwd,
create_env(&context.turn.shell_environment_policy),
context.turn.exec_policy_v2.clone(),
with_escalated_permissions,
justification,
);

View File

@@ -0,0 +1,100 @@
#![cfg(not(target_os = "windows"))]
#![allow(clippy::unwrap_used, clippy::expect_used)]
use anyhow::Result;
use codex_core::features::Feature;
use codex_core::protocol::AskForApproval;
use codex_core::protocol::EventMsg;
use codex_core::protocol::Op;
use codex_core::protocol::SandboxPolicy;
use codex_protocol::config_types::ReasoningSummary;
use codex_protocol::user_input::UserInput;
use core_test_support::responses::ev_assistant_message;
use core_test_support::responses::ev_completed;
use core_test_support::responses::ev_function_call;
use core_test_support::responses::ev_response_created;
use core_test_support::responses::mount_sse_once;
use core_test_support::responses::sse;
use core_test_support::responses::start_mock_server;
use core_test_support::test_codex::test_codex;
use core_test_support::wait_for_event;
use serde_json::json;
use std::fs;
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn execpolicy2_blocks_shell_invocation() -> Result<()> {
let mut builder = test_codex().with_config(|config| {
config.features.enable(Feature::ExecPolicyV2);
let policy_dir = config.cwd.join(".codex");
fs::create_dir_all(&policy_dir).expect("create .codex directory");
let policy_path = policy_dir.join("policy.codexpolicy");
fs::write(
&policy_path,
r#"prefix_rule(pattern=["echo"], decision="forbidden")"#,
)
.expect("write policy file");
});
let server = start_mock_server().await;
let test = builder.build(&server).await?;
let call_id = "shell-forbidden";
let args = json!({
"command": ["echo", "blocked"],
"timeout_ms": 1_000,
});
mount_sse_once(
&server,
sse(vec![
ev_response_created("resp-1"),
ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
ev_completed("resp-1"),
]),
)
.await;
mount_sse_once(
&server,
sse(vec![
ev_assistant_message("msg-1", "done"),
ev_completed("resp-2"),
]),
)
.await;
let session_model = test.session_configured.model.clone();
test.codex
.submit(Op::UserTurn {
items: vec![UserInput::Text {
text: "run shell command".into(),
}],
final_output_json_schema: None,
cwd: test.cwd_path().to_path_buf(),
approval_policy: AskForApproval::Never,
sandbox_policy: SandboxPolicy::DangerFullAccess,
model: session_model,
effort: None,
summary: ReasoningSummary::Auto,
})
.await?;
let EventMsg::ExecCommandEnd(end) = wait_for_event(&test.codex, |event| {
matches!(event, EventMsg::ExecCommandEnd(_))
})
.await
else {
unreachable!()
};
wait_for_event(&test.codex, |event| {
matches!(event, EventMsg::TaskComplete(_))
})
.await;
assert!(
end.aggregated_output
.contains("execpolicy forbids this command"),
"unexpected output: {}",
end.aggregated_output
);
Ok(())
}

View File

@@ -27,6 +27,8 @@ mod compact;
mod compact_resume_fork;
mod deprecation_notice;
mod exec;
#[cfg(not(target_os = "windows"))]
mod execpolicy2;
mod fork_conversation;
mod grep_files;
mod items;

View File

@@ -0,0 +1,29 @@
[package]
name = "codex-execpolicy2"
version = { workspace = true }
edition = "2024"
description = "Codex exec policy v2: prefix-based Starlark rules for command decisions."
[lib]
name = "codex_execpolicy2"
path = "src/lib.rs"
[[bin]]
name = "codex-execpolicy2"
path = "src/main.rs"
[lints]
workspace = true
[dependencies]
anyhow = { workspace = true }
clap = { workspace = true, features = ["derive"] }
multimap = { workspace = true }
serde = { workspace = true, features = ["derive"] }
serde_json = { workspace = true }
shlex = { workspace = true }
starlark = { workspace = true }
thiserror = { workspace = true }
[dev-dependencies]
pretty_assertions = { workspace = true }

View File

@@ -0,0 +1,58 @@
# codex-execpolicy2
## Overview
- Policy engine and CLI built around `prefix_rule(pattern=[...], decision?, match?, not_match?)`.
- This release covers only the prefix-rule subset of the planned execpolicy v2 language; a richer language will follow.
- Tokens are matched in order; any `pattern` element may be a list to denote alternatives. `decision` defaults to `allow`; valid values: `allow`, `prompt`, `forbidden`.
- `match` / `not_match` supply example invocations that are validated at load time (think of them as unit tests); examples can be token arrays or strings (strings are tokenized with `shlex`).
- The CLI always prints the JSON serialization of the evaluation result (whether a match or not).
## Policy shapes
- Prefix rules use Starlark syntax:
```starlark
prefix_rule(
pattern = ["cmd", ["alt1", "alt2"]], # ordered tokens; list entries denote alternatives
decision = "prompt", # allow | prompt | forbidden; defaults to allow
match = [["cmd", "alt1"], "cmd alt2"], # examples that must match this rule
not_match = [["cmd", "oops"], "cmd alt3"], # examples that must not match this rule
)
```
## Response shapes
- Match:
```json
{
"match": {
"decision": "allow|prompt|forbidden",
"matchedRules": [
{
"prefixRuleMatch": {
"matchedPrefix": ["<token>", "..."],
"decision": "allow|prompt|forbidden"
}
}
]
}
}
```
- No match:
```json
"noMatch"
```
- `matchedRules` lists every rule whose prefix matched the command; `matchedPrefix` is the exact prefix that matched.
- The effective `decision` is the strictest severity across all matches (`forbidden` > `prompt` > `allow`).
## CLI
- Provide one or more policy files (for example `src/default.codexpolicy`) to check a command:
```bash
cargo run -p codex-execpolicy2 -- check --policy path/to/policy.codexpolicy git status
```
- Pass multiple `--policy` flags to merge rules, evaluated in the order provided:
```bash
cargo run -p codex-execpolicy2 -- check --policy base.codexpolicy --policy overrides.codexpolicy git status
```
- Example outcomes:
- Match: `{"match": { ... "decision": "allow" ... }}`
- No match: `"noMatch"`

View File

@@ -0,0 +1,77 @@
# Example policy to illustrate syntax; not comprehensive and not recommended for actual use.
prefix_rule(
pattern = ["git", "reset", "--hard"],
decision = "forbidden",
match = [
["git", "reset", "--hard"],
],
not_match = [
["git", "reset", "--keep"],
"git reset --merge",
],
)
prefix_rule(
pattern = ["ls"],
match = [
["ls"],
["ls", "-l"],
["ls", "-a", "."],
],
)
prefix_rule(
pattern = ["cat"],
match = [
["cat", "file.txt"],
["cat", "-n", "README.md"],
],
)
prefix_rule(
pattern = ["cp"],
decision = "prompt",
match = [
["cp", "foo", "bar"],
"cp -r src dest",
],
)
prefix_rule(
pattern = ["head"],
match = [
["head", "README.md"],
["head", "-n", "5", "CHANGELOG.md"],
],
not_match = [
["hea", "-n", "1,5p", "CHANGELOG.md"],
],
)
prefix_rule(
pattern = ["printenv"],
match = [
["printenv"],
["printenv", "PATH"],
],
not_match = [
["print", "-0"],
],
)
prefix_rule(
pattern = ["pwd"],
match = [
["pwd"],
],
)
prefix_rule(
pattern = ["which"],
match = [
["which", "python3"],
["which", "-a", "python3"],
],
)

View File

@@ -0,0 +1,27 @@
use serde::Deserialize;
use serde::Serialize;
use crate::error::Error;
use crate::error::Result;
#[derive(Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub enum Decision {
/// Command may run without further approval.
Allow,
/// Request explicit user approval; rejected outright when running with `approval_policy="never"`.
Prompt,
/// Command is blocked without further consideration.
Forbidden,
}
impl Decision {
pub fn parse(raw: &str) -> Result<Self> {
match raw {
"allow" => Ok(Self::Allow),
"prompt" => Ok(Self::Prompt),
"forbidden" => Ok(Self::Forbidden),
other => Err(Error::InvalidDecision(other.to_string())),
}
}
}

View File

@@ -0,0 +1,26 @@
use starlark::Error as StarlarkError;
use thiserror::Error;
pub type Result<T> = std::result::Result<T, Error>;
#[derive(Debug, Error)]
pub enum Error {
#[error("invalid decision: {0}")]
InvalidDecision(String),
#[error("invalid pattern element: {0}")]
InvalidPattern(String),
#[error("invalid example: {0}")]
InvalidExample(String),
#[error(
"expected every example to match at least one rule. rules: {rules:?}; unmatched examples: \
{examples:?}"
)]
ExampleDidNotMatch {
rules: Vec<String>,
examples: Vec<String>,
},
#[error("expected example to not match rule `{rule}`: {example}")]
ExampleDidMatch { rule: String, example: String },
#[error("starlark error: {0}")]
Starlark(StarlarkError),
}

View File

@@ -0,0 +1,15 @@
pub mod decision;
pub mod error;
pub mod parser;
pub mod policy;
pub mod rule;
pub use decision::Decision;
pub use error::Error;
pub use error::Result;
pub use parser::PolicyParser;
pub use policy::Evaluation;
pub use policy::Policy;
pub use rule::Rule;
pub use rule::RuleMatch;
pub use rule::RuleRef;

View File

@@ -0,0 +1,61 @@
use std::fs;
use std::path::PathBuf;
use anyhow::Context;
use anyhow::Result;
use clap::Parser;
use codex_execpolicy2::PolicyParser;
/// CLI for evaluating exec policies
#[derive(Parser)]
#[command(name = "codex-execpolicy2")]
enum Cli {
/// Evaluate a command against a policy.
Check {
#[arg(short, long, value_name = "PATH", required = true)]
policies: Vec<PathBuf>,
/// Command tokens to check.
#[arg(
value_name = "COMMAND",
required = true,
trailing_var_arg = true,
allow_hyphen_values = true
)]
command: Vec<String>,
},
}
fn main() -> Result<()> {
let cli = Cli::parse();
match cli {
Cli::Check { policies, command } => cmd_check(policies, command),
}
}
fn cmd_check(policies: Vec<PathBuf>, args: Vec<String>) -> Result<()> {
let policy = load_policies(&policies)?;
let eval = policy.check(&args);
let json = serde_json::to_string_pretty(&eval)?;
println!("{json}");
Ok(())
}
fn load_policies(policy_paths: &[PathBuf]) -> Result<codex_execpolicy2::Policy> {
let loaded_policies: Vec<(String, String)> = policy_paths
.iter()
.map(|policy_path| {
let policy_file_contents = fs::read_to_string(policy_path)
.with_context(|| format!("failed to read policy at {}", policy_path.display()))?;
let policy_identifier = policy_path.to_string_lossy().to_string();
Ok((policy_identifier, policy_file_contents))
})
.collect::<Result<_>>()
.context("failed to load policy files")?;
let mut parser = PolicyParser::new();
for (policy_identifier, policy_file_contents) in &loaded_policies {
parser.parse(policy_identifier, policy_file_contents)?;
}
Ok(parser.build())
}

View File

@@ -0,0 +1,259 @@
use multimap::MultiMap;
use shlex;
use starlark::any::ProvidesStaticType;
use starlark::environment::GlobalsBuilder;
use starlark::environment::Module;
use starlark::eval::Evaluator;
use starlark::starlark_module;
use starlark::syntax::AstModule;
use starlark::syntax::Dialect;
use starlark::values::Value;
use starlark::values::list::ListRef;
use starlark::values::list::UnpackList;
use starlark::values::none::NoneType;
use std::cell::RefCell;
use std::cell::RefMut;
use std::sync::Arc;
use crate::decision::Decision;
use crate::error::Error;
use crate::error::Result;
use crate::rule::PatternToken;
use crate::rule::PrefixPattern;
use crate::rule::PrefixRule;
use crate::rule::RuleRef;
use crate::rule::validate_match_examples;
use crate::rule::validate_not_match_examples;
pub struct PolicyParser {
builder: RefCell<PolicyBuilder>,
}
impl Default for PolicyParser {
fn default() -> Self {
Self::new()
}
}
impl PolicyParser {
pub fn new() -> Self {
Self {
builder: RefCell::new(PolicyBuilder::new()),
}
}
/// Parses a policy, tagging parser errors with `policy_identifier` so failures include the
/// identifier alongside line numbers.
pub fn parse(&mut self, policy_identifier: &str, policy_file_contents: &str) -> Result<()> {
let mut dialect = Dialect::Extended.clone();
dialect.enable_f_strings = true;
let ast = AstModule::parse(
policy_identifier,
policy_file_contents.to_string(),
&dialect,
)
.map_err(Error::Starlark)?;
let globals = GlobalsBuilder::standard().with(policy_builtins).build();
let module = Module::new();
{
let mut eval = Evaluator::new(&module);
eval.extra = Some(&self.builder);
eval.eval_module(ast, &globals).map_err(Error::Starlark)?;
}
Ok(())
}
pub fn build(self) -> crate::policy::Policy {
self.builder.into_inner().build()
}
}
#[derive(Debug, ProvidesStaticType)]
struct PolicyBuilder {
rules_by_program: MultiMap<String, RuleRef>,
}
impl PolicyBuilder {
fn new() -> Self {
Self {
rules_by_program: MultiMap::new(),
}
}
fn add_rule(&mut self, rule: RuleRef) {
self.rules_by_program
.insert(rule.program().to_string(), rule);
}
fn build(self) -> crate::policy::Policy {
crate::policy::Policy::new(self.rules_by_program)
}
}
fn parse_pattern<'v>(pattern: UnpackList<Value<'v>>) -> Result<Vec<PatternToken>> {
let tokens: Vec<PatternToken> = pattern
.items
.into_iter()
.map(parse_pattern_token)
.collect::<Result<_>>()?;
if tokens.is_empty() {
Err(Error::InvalidPattern("pattern cannot be empty".to_string()))
} else {
Ok(tokens)
}
}
fn parse_pattern_token<'v>(value: Value<'v>) -> Result<PatternToken> {
if let Some(s) = value.unpack_str() {
Ok(PatternToken::Single(s.to_string()))
} else if let Some(list) = ListRef::from_value(value) {
let tokens: Vec<String> = list
.content()
.iter()
.map(|value| {
value
.unpack_str()
.ok_or_else(|| {
Error::InvalidPattern(format!(
"pattern alternative must be a string (got {})",
value.get_type()
))
})
.map(str::to_string)
})
.collect::<Result<_>>()?;
match tokens.as_slice() {
[] => Err(Error::InvalidPattern(
"pattern alternatives cannot be empty".to_string(),
)),
[single] => Ok(PatternToken::Single(single.clone())),
_ => Ok(PatternToken::Alts(tokens)),
}
} else {
Err(Error::InvalidPattern(format!(
"pattern element must be a string or list of strings (got {})",
value.get_type()
)))
}
}
fn parse_examples<'v>(examples: UnpackList<Value<'v>>) -> Result<Vec<Vec<String>>> {
examples.items.into_iter().map(parse_example).collect()
}
fn parse_example<'v>(value: Value<'v>) -> Result<Vec<String>> {
if let Some(raw) = value.unpack_str() {
parse_string_example(raw)
} else if let Some(list) = ListRef::from_value(value) {
parse_list_example(list)
} else {
Err(Error::InvalidExample(format!(
"example must be a string or list of strings (got {})",
value.get_type()
)))
}
}
fn parse_string_example(raw: &str) -> Result<Vec<String>> {
let tokens = shlex::split(raw).ok_or_else(|| {
Error::InvalidExample("example string has invalid shell syntax".to_string())
})?;
if tokens.is_empty() {
Err(Error::InvalidExample(
"example cannot be an empty string".to_string(),
))
} else {
Ok(tokens)
}
}
fn parse_list_example(list: &ListRef) -> Result<Vec<String>> {
let tokens: Vec<String> = list
.content()
.iter()
.map(|value| {
value
.unpack_str()
.ok_or_else(|| {
Error::InvalidExample(format!(
"example tokens must be strings (got {})",
value.get_type()
))
})
.map(str::to_string)
})
.collect::<Result<_>>()?;
if tokens.is_empty() {
Err(Error::InvalidExample(
"example cannot be an empty list".to_string(),
))
} else {
Ok(tokens)
}
}
fn policy_builder<'v, 'a>(eval: &Evaluator<'v, 'a, '_>) -> RefMut<'a, PolicyBuilder> {
#[expect(clippy::expect_used)]
eval.extra
.as_ref()
.expect("policy_builder requires Evaluator.extra to be populated")
.downcast_ref::<RefCell<PolicyBuilder>>()
.expect("Evaluator.extra must contain a PolicyBuilder")
.borrow_mut()
}
#[starlark_module]
fn policy_builtins(builder: &mut GlobalsBuilder) {
fn prefix_rule<'v>(
pattern: UnpackList<Value<'v>>,
decision: Option<&'v str>,
r#match: Option<UnpackList<Value<'v>>>,
not_match: Option<UnpackList<Value<'v>>>,
eval: &mut Evaluator<'v, '_, '_>,
) -> anyhow::Result<NoneType> {
let decision = match decision {
Some(raw) => Decision::parse(raw)?,
None => Decision::Allow,
};
let pattern_tokens = parse_pattern(pattern)?;
let matches: Vec<Vec<String>> =
r#match.map(parse_examples).transpose()?.unwrap_or_default();
let not_matches: Vec<Vec<String>> = not_match
.map(parse_examples)
.transpose()?
.unwrap_or_default();
let mut builder = policy_builder(eval);
let (first_token, remaining_tokens) = pattern_tokens
.split_first()
.ok_or_else(|| Error::InvalidPattern("pattern cannot be empty".to_string()))?;
let rest: Arc<[PatternToken]> = remaining_tokens.to_vec().into();
let rules: Vec<RuleRef> = first_token
.alternatives()
.iter()
.map(|head| {
Arc::new(PrefixRule {
pattern: PrefixPattern {
first: Arc::from(head.as_str()),
rest: rest.clone(),
},
decision,
}) as RuleRef
})
.collect();
validate_not_match_examples(&rules, &not_matches)?;
validate_match_examples(&rules, &matches)?;
rules.into_iter().for_each(|rule| builder.add_rule(rule));
Ok(NoneType)
}
}

View File

@@ -0,0 +1,80 @@
use crate::decision::Decision;
use crate::rule::RuleMatch;
use crate::rule::RuleRef;
use multimap::MultiMap;
use serde::Deserialize;
use serde::Serialize;
#[derive(Clone, Debug)]
pub struct Policy {
rules_by_program: MultiMap<String, RuleRef>,
}
impl Policy {
pub fn new(rules_by_program: MultiMap<String, RuleRef>) -> Self {
Self { rules_by_program }
}
pub fn rules(&self) -> &MultiMap<String, RuleRef> {
&self.rules_by_program
}
pub fn check(&self, cmd: &[String]) -> Evaluation {
let rules = match cmd.first() {
Some(first) => match self.rules_by_program.get_vec(first) {
Some(rules) => rules,
None => return Evaluation::NoMatch,
},
None => return Evaluation::NoMatch,
};
let matched_rules: Vec<RuleMatch> =
rules.iter().filter_map(|rule| rule.matches(cmd)).collect();
match matched_rules.iter().map(RuleMatch::decision).max() {
Some(decision) => Evaluation::Match {
decision,
matched_rules,
},
None => Evaluation::NoMatch,
}
}
pub fn check_multiple<Commands>(&self, commands: Commands) -> Evaluation
where
Commands: IntoIterator,
Commands::Item: AsRef<[String]>,
{
let matched_rules: Vec<RuleMatch> = commands
.into_iter()
.flat_map(|command| match self.check(command.as_ref()) {
Evaluation::Match { matched_rules, .. } => matched_rules,
Evaluation::NoMatch => Vec::new(),
})
.collect();
match matched_rules.iter().map(RuleMatch::decision).max() {
Some(decision) => Evaluation::Match {
decision,
matched_rules,
},
None => Evaluation::NoMatch,
}
}
}
#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub enum Evaluation {
NoMatch,
Match {
decision: Decision,
#[serde(rename = "matchedRules")]
matched_rules: Vec<RuleMatch>,
},
}
impl Evaluation {
pub fn is_match(&self) -> bool {
matches!(self, Self::Match { .. })
}
}

View File

@@ -0,0 +1,147 @@
use crate::decision::Decision;
use crate::error::Error;
use crate::error::Result;
use serde::Deserialize;
use serde::Serialize;
use shlex::try_join;
use std::any::Any;
use std::fmt::Debug;
use std::sync::Arc;
/// Matches a single command token, either a fixed string or one of several allowed alternatives.
#[derive(Clone, Debug, Eq, PartialEq)]
pub enum PatternToken {
Single(String),
Alts(Vec<String>),
}
impl PatternToken {
fn matches(&self, token: &str) -> bool {
match self {
Self::Single(expected) => expected == token,
Self::Alts(alternatives) => alternatives.iter().any(|alt| alt == token),
}
}
pub fn alternatives(&self) -> &[String] {
match self {
Self::Single(expected) => std::slice::from_ref(expected),
Self::Alts(alternatives) => alternatives,
}
}
}
/// Prefix matcher for commands with support for alternative match tokens.
/// First token is fixed since we key by the first token in policy.
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct PrefixPattern {
pub first: Arc<str>,
pub rest: Arc<[PatternToken]>,
}
impl PrefixPattern {
pub fn matches_prefix(&self, cmd: &[String]) -> Option<Vec<String>> {
let pattern_length = self.rest.len() + 1;
if cmd.len() < pattern_length || cmd[0] != self.first.as_ref() {
return None;
}
for (pattern_token, cmd_token) in self.rest.iter().zip(&cmd[1..pattern_length]) {
if !pattern_token.matches(cmd_token) {
return None;
}
}
Some(cmd[..pattern_length].to_vec())
}
}
#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub enum RuleMatch {
PrefixRuleMatch {
#[serde(rename = "matchedPrefix")]
matched_prefix: Vec<String>,
decision: Decision,
},
}
impl RuleMatch {
pub fn decision(&self) -> Decision {
match self {
Self::PrefixRuleMatch { decision, .. } => *decision,
}
}
}
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct PrefixRule {
pub pattern: PrefixPattern,
pub decision: Decision,
}
pub trait Rule: Any + Debug + Send + Sync {
fn program(&self) -> &str;
fn matches(&self, cmd: &[String]) -> Option<RuleMatch>;
}
pub type RuleRef = Arc<dyn Rule>;
impl Rule for PrefixRule {
fn program(&self) -> &str {
self.pattern.first.as_ref()
}
fn matches(&self, cmd: &[String]) -> Option<RuleMatch> {
self.pattern
.matches_prefix(cmd)
.map(|matched_prefix| RuleMatch::PrefixRuleMatch {
matched_prefix,
decision: self.decision,
})
}
}
/// Count how many rules match each provided example and error if any example is unmatched.
pub(crate) fn validate_match_examples(rules: &[RuleRef], matches: &[Vec<String>]) -> Result<()> {
let mut unmatched_examples = Vec::new();
for example in matches {
if rules.iter().any(|rule| rule.matches(example).is_some()) {
continue;
}
unmatched_examples.push(
try_join(example.iter().map(String::as_str))
.unwrap_or_else(|_| "unable to render example".to_string()),
);
}
if unmatched_examples.is_empty() {
Ok(())
} else {
Err(Error::ExampleDidNotMatch {
rules: rules.iter().map(|rule| format!("{rule:?}")).collect(),
examples: unmatched_examples,
})
}
}
/// Ensure that no rule matches any provided negative example.
pub(crate) fn validate_not_match_examples(
rules: &[RuleRef],
not_matches: &[Vec<String>],
) -> Result<()> {
for example in not_matches {
if let Some(rule) = rules.iter().find(|rule| rule.matches(example).is_some()) {
return Err(Error::ExampleDidMatch {
rule: format!("{rule:?}"),
example: try_join(example.iter().map(String::as_str))
.unwrap_or_else(|_| "unable to render example".to_string()),
});
}
}
Ok(())
}

View File

@@ -0,0 +1,405 @@
use std::any::Any;
use std::sync::Arc;
use codex_execpolicy2::Decision;
use codex_execpolicy2::Evaluation;
use codex_execpolicy2::PolicyParser;
use codex_execpolicy2::RuleMatch;
use codex_execpolicy2::RuleRef;
use codex_execpolicy2::rule::PatternToken;
use codex_execpolicy2::rule::PrefixPattern;
use codex_execpolicy2::rule::PrefixRule;
use pretty_assertions::assert_eq;
fn tokens(cmd: &[&str]) -> Vec<String> {
cmd.iter().map(std::string::ToString::to_string).collect()
}
#[derive(Clone, Debug, Eq, PartialEq)]
enum RuleSnapshot {
Prefix(PrefixRule),
}
fn rule_snapshots(rules: &[RuleRef]) -> Vec<RuleSnapshot> {
rules
.iter()
.map(|rule| {
let rule_any = rule.as_ref() as &dyn Any;
if let Some(prefix_rule) = rule_any.downcast_ref::<PrefixRule>() {
RuleSnapshot::Prefix(prefix_rule.clone())
} else {
panic!("unexpected rule type in RuleRef: {rule:?}");
}
})
.collect()
}
#[test]
fn basic_match() {
let policy_src = r#"
prefix_rule(
pattern = ["git", "status"],
)
"#;
let mut parser = PolicyParser::new();
parser
.parse("test.codexpolicy", policy_src)
.expect("parse policy");
let policy = parser.build();
let cmd = tokens(&["git", "status"]);
let evaluation = policy.check(&cmd);
assert_eq!(
Evaluation::Match {
decision: Decision::Allow,
matched_rules: vec![RuleMatch::PrefixRuleMatch {
matched_prefix: tokens(&["git", "status"]),
decision: Decision::Allow,
}],
},
evaluation
);
}
#[test]
fn parses_multiple_policy_files() {
let first_policy = r#"
prefix_rule(
pattern = ["git"],
decision = "prompt",
)
"#;
let second_policy = r#"
prefix_rule(
pattern = ["git", "commit"],
decision = "forbidden",
)
"#;
let mut parser = PolicyParser::new();
parser
.parse("first.codexpolicy", first_policy)
.expect("parse policy");
parser
.parse("second.codexpolicy", second_policy)
.expect("parse policy");
let policy = parser.build();
let git_rules = rule_snapshots(policy.rules().get_vec("git").expect("git rules"));
assert_eq!(
vec![
RuleSnapshot::Prefix(PrefixRule {
pattern: PrefixPattern {
first: Arc::from("git"),
rest: Vec::<PatternToken>::new().into(),
},
decision: Decision::Prompt,
}),
RuleSnapshot::Prefix(PrefixRule {
pattern: PrefixPattern {
first: Arc::from("git"),
rest: vec![PatternToken::Single("commit".to_string())].into(),
},
decision: Decision::Forbidden,
}),
],
git_rules
);
let status_eval = policy.check(&tokens(&["git", "status"]));
assert_eq!(
Evaluation::Match {
decision: Decision::Prompt,
matched_rules: vec![RuleMatch::PrefixRuleMatch {
matched_prefix: tokens(&["git"]),
decision: Decision::Prompt,
}],
},
status_eval
);
let commit_eval = policy.check(&tokens(&["git", "commit", "-m", "hi"]));
assert_eq!(
Evaluation::Match {
decision: Decision::Forbidden,
matched_rules: vec![
RuleMatch::PrefixRuleMatch {
matched_prefix: tokens(&["git"]),
decision: Decision::Prompt,
},
RuleMatch::PrefixRuleMatch {
matched_prefix: tokens(&["git", "commit"]),
decision: Decision::Forbidden,
},
],
},
commit_eval
);
}
#[test]
fn only_first_token_alias_expands_to_multiple_rules() {
let policy_src = r#"
prefix_rule(
pattern = [["bash", "sh"], ["-c", "-l"]],
)
"#;
let mut parser = PolicyParser::new();
parser
.parse("test.codexpolicy", policy_src)
.expect("parse policy");
let policy = parser.build();
let bash_rules = rule_snapshots(policy.rules().get_vec("bash").expect("bash rules"));
let sh_rules = rule_snapshots(policy.rules().get_vec("sh").expect("sh rules"));
assert_eq!(
vec![RuleSnapshot::Prefix(PrefixRule {
pattern: PrefixPattern {
first: Arc::from("bash"),
rest: vec![PatternToken::Alts(vec!["-c".to_string(), "-l".to_string()])].into(),
},
decision: Decision::Allow,
})],
bash_rules
);
assert_eq!(
vec![RuleSnapshot::Prefix(PrefixRule {
pattern: PrefixPattern {
first: Arc::from("sh"),
rest: vec![PatternToken::Alts(vec!["-c".to_string(), "-l".to_string()])].into(),
},
decision: Decision::Allow,
})],
sh_rules
);
let bash_eval = policy.check(&tokens(&["bash", "-c", "echo", "hi"]));
assert_eq!(
Evaluation::Match {
decision: Decision::Allow,
matched_rules: vec![RuleMatch::PrefixRuleMatch {
matched_prefix: tokens(&["bash", "-c"]),
decision: Decision::Allow,
}],
},
bash_eval
);
let sh_eval = policy.check(&tokens(&["sh", "-l", "echo", "hi"]));
assert_eq!(
Evaluation::Match {
decision: Decision::Allow,
matched_rules: vec![RuleMatch::PrefixRuleMatch {
matched_prefix: tokens(&["sh", "-l"]),
decision: Decision::Allow,
}],
},
sh_eval
);
}
#[test]
fn tail_aliases_are_not_cartesian_expanded() {
let policy_src = r#"
prefix_rule(
pattern = ["npm", ["i", "install"], ["--legacy-peer-deps", "--no-save"]],
)
"#;
let mut parser = PolicyParser::new();
parser
.parse("test.codexpolicy", policy_src)
.expect("parse policy");
let policy = parser.build();
let rules = rule_snapshots(policy.rules().get_vec("npm").expect("npm rules"));
assert_eq!(
vec![RuleSnapshot::Prefix(PrefixRule {
pattern: PrefixPattern {
first: Arc::from("npm"),
rest: vec![
PatternToken::Alts(vec!["i".to_string(), "install".to_string()]),
PatternToken::Alts(vec![
"--legacy-peer-deps".to_string(),
"--no-save".to_string(),
]),
]
.into(),
},
decision: Decision::Allow,
})],
rules
);
let npm_i = policy.check(&tokens(&["npm", "i", "--legacy-peer-deps"]));
assert_eq!(
Evaluation::Match {
decision: Decision::Allow,
matched_rules: vec![RuleMatch::PrefixRuleMatch {
matched_prefix: tokens(&["npm", "i", "--legacy-peer-deps"]),
decision: Decision::Allow,
}],
},
npm_i
);
let npm_install = policy.check(&tokens(&["npm", "install", "--no-save", "leftpad"]));
assert_eq!(
Evaluation::Match {
decision: Decision::Allow,
matched_rules: vec![RuleMatch::PrefixRuleMatch {
matched_prefix: tokens(&["npm", "install", "--no-save"]),
decision: Decision::Allow,
}],
},
npm_install
);
}
#[test]
fn match_and_not_match_examples_are_enforced() {
let policy_src = r#"
prefix_rule(
pattern = ["git", "status"],
match = [["git", "status"], "git status"],
not_match = [
["git", "--config", "color.status=always", "status"],
"git --config color.status=always status",
],
)
"#;
let mut parser = PolicyParser::new();
parser
.parse("test.codexpolicy", policy_src)
.expect("parse policy");
let policy = parser.build();
let match_eval = policy.check(&tokens(&["git", "status"]));
assert_eq!(
Evaluation::Match {
decision: Decision::Allow,
matched_rules: vec![RuleMatch::PrefixRuleMatch {
matched_prefix: tokens(&["git", "status"]),
decision: Decision::Allow,
}],
},
match_eval
);
let no_match_eval = policy.check(&tokens(&[
"git",
"--config",
"color.status=always",
"status",
]));
assert_eq!(Evaluation::NoMatch, no_match_eval);
}
#[test]
fn strictest_decision_wins_across_matches() {
let policy_src = r#"
prefix_rule(
pattern = ["git", "status"],
decision = "allow",
)
prefix_rule(
pattern = ["git"],
decision = "prompt",
)
prefix_rule(
pattern = ["git", "commit"],
decision = "forbidden",
)
"#;
let mut parser = PolicyParser::new();
parser
.parse("test.codexpolicy", policy_src)
.expect("parse policy");
let policy = parser.build();
let status = policy.check(&tokens(&["git", "status"]));
assert_eq!(
Evaluation::Match {
decision: Decision::Prompt,
matched_rules: vec![
RuleMatch::PrefixRuleMatch {
matched_prefix: tokens(&["git", "status"]),
decision: Decision::Allow,
},
RuleMatch::PrefixRuleMatch {
matched_prefix: tokens(&["git"]),
decision: Decision::Prompt,
},
],
},
status
);
let commit = policy.check(&tokens(&["git", "commit", "-m", "hi"]));
assert_eq!(
Evaluation::Match {
decision: Decision::Forbidden,
matched_rules: vec![
RuleMatch::PrefixRuleMatch {
matched_prefix: tokens(&["git"]),
decision: Decision::Prompt,
},
RuleMatch::PrefixRuleMatch {
matched_prefix: tokens(&["git", "commit"]),
decision: Decision::Forbidden,
},
],
},
commit
);
}
#[test]
fn strictest_decision_across_multiple_commands() {
let policy_src = r#"
prefix_rule(
pattern = ["git", "status"],
decision = "allow",
)
prefix_rule(
pattern = ["git"],
decision = "prompt",
)
prefix_rule(
pattern = ["git", "commit"],
decision = "forbidden",
)
"#;
let mut parser = PolicyParser::new();
parser
.parse("test.codexpolicy", policy_src)
.expect("parse policy");
let policy = parser.build();
let commands = vec![
tokens(&["git", "status"]),
tokens(&["git", "commit", "-m", "hi"]),
];
let evaluation = policy.check_multiple(&commands);
assert_eq!(
Evaluation::Match {
decision: Decision::Forbidden,
matched_rules: vec![
RuleMatch::PrefixRuleMatch {
matched_prefix: tokens(&["git", "status"]),
decision: Decision::Allow,
},
RuleMatch::PrefixRuleMatch {
matched_prefix: tokens(&["git"]),
decision: Decision::Prompt,
},
RuleMatch::PrefixRuleMatch {
matched_prefix: tokens(&["git"]),
decision: Decision::Prompt,
},
RuleMatch::PrefixRuleMatch {
matched_prefix: tokens(&["git", "commit"]),
decision: Decision::Forbidden,
},
],
},
evaluation
);
}

View File

@@ -10,6 +10,7 @@ use tokio::sync::mpsc::unbounded_channel;
use crate::app_event::AppEvent;
use crate::app_event_sender::AppEventSender;
use crate::history_cell;
/// Spawn the agent bootstrapper and op forwarding loop, returning the
/// `UnboundedSender<Op>` used by the UI to submit operations.
@@ -29,8 +30,12 @@ pub(crate) fn spawn_agent(
} = match server.new_conversation(config).await {
Ok(v) => v,
Err(e) => {
// TODO: surface this error to the user.
tracing::error!("failed to initialize codex: {e}");
let message =
format!("Failed to initialize Codex: {e}");
tracing::error!("{message}");
app_event_tx_clone.send(AppEvent::InsertHistoryCell(Box::new(
history_cell::new_error_event(message),
)));
return;
}
};