execpolicy2 core integration (#6641)

This PR threads execpolicy2 into codex-core.

activated via feature flag: exec_policy (on by default)

reads and parses all .codexpolicy files in `codex_home/codex`

refactored tool runtime API to integrate execpolicy logic

---------

Co-authored-by: Michael Bolin <mbolin@openai.com>
This commit is contained in:
zhao-oai
2025-11-19 16:50:43 -08:00
committed by GitHub
parent b00a7cf40d
commit 65c13f1ae7
21 changed files with 692 additions and 95 deletions

View File

@@ -69,6 +69,39 @@ Codex can access MCP servers. To configure them, refer to the [config docs](./do
Codex CLI supports a rich set of configuration options, with preferences stored in `~/.codex/config.toml`. For full configuration options, see [Configuration](./docs/config.md).
### Execpolicy quickstart
Codex can enforce your own rules-based execution policy before it runs shell commands.
1. Create a policy directory: `mkdir -p ~/.codex/policy`.
2. Create one or more `.codexpolicy` files into that folder. Codex automatically loads every `.codexpolicy` file in there on startup.
3. Write `prefix_rule` entries to describe the commands you want to allow, prompt, or block:
```starlark
prefix_rule(
pattern = ["git", ["push", "fetch"]],
decision = "prompt", # allow | prompt | forbidden
match = [["git", "push", "origin", "main"]], # examples that must match
not_match = [["git", "status"]], # examples that must not match
)
```
- `pattern` is a list of shell tokens, evaluated from left to right; wrap tokens in a nested list to express alternatives (e.g., match both `push` and `fetch`).
- `decision` sets the severity; Codex picks the strictest decision when multiple rules match.
- `match` and `not_match` act as (optional) unit tests. Codex validates them when it loads your policy, so you get feedback if an example has unexpected behavior.
In this example rule, if Codex wants to run commands with the prefix `git push` or `git fetch`, it will first ask for user approval.
Note: If Codex wants to run a command that matches with multiple rules, it will use the strictest decision among the matched rules (forbidden > prompt > allow).
Use the [`execpolicy2` CLI](./codex-rs/execpolicy2/README.md) to preview decisions before you save a rule:
```shell
cargo run -p codex-execpolicy2 -- check --policy ~/.codex/policy/default.codexpolicy git push origin main
```
Pass multiple `--policy` flags to test how several files combine. See the [`codex-rs/execpolicy2` README](./codex-rs/execpolicy2/README.md) for a more detailed walkthrough of the available syntax.
---
### Docs & FAQ

1
codex-rs/Cargo.lock generated
View File

@@ -1086,6 +1086,7 @@ dependencies = [
"codex-apply-patch",
"codex-arg0",
"codex-async-utils",
"codex-execpolicy2",
"codex-file-search",
"codex-git",
"codex-keyring-store",

View File

@@ -67,6 +67,7 @@ codex-chatgpt = { path = "chatgpt" }
codex-common = { path = "common" }
codex-core = { path = "core" }
codex-exec = { path = "exec" }
codex-execpolicy2 = { path = "execpolicy2" }
codex-feedback = { path = "feedback" }
codex-file-search = { path = "file-search" }
codex-git = { path = "utils/git" }

View File

@@ -22,6 +22,7 @@ chrono = { workspace = true, features = ["serde"] }
codex-app-server-protocol = { workspace = true }
codex-apply-patch = { workspace = true }
codex-async-utils = { workspace = true }
codex-execpolicy2 = { workspace = true }
codex-file-search = { workspace = true }
codex-git = { workspace = true }
codex-keyring-store = { workspace = true }

View File

@@ -121,6 +121,7 @@ use crate::user_instructions::UserInstructions;
use crate::user_notification::UserNotification;
use crate::util::backoff;
use codex_async_utils::OrCancelExt;
use codex_execpolicy2::Policy as ExecPolicy;
use codex_otel::otel_event_manager::OtelEventManager;
use codex_protocol::config_types::ReasoningEffort as ReasoningEffortConfig;
use codex_protocol::config_types::ReasoningSummary as ReasoningSummaryConfig;
@@ -167,6 +168,10 @@ impl Codex {
let user_instructions = get_user_instructions(&config).await;
let exec_policy = crate::exec_policy::exec_policy_for(&config.features, &config.codex_home)
.await
.map_err(|err| CodexErr::Fatal(format!("failed to load execpolicy: {err}")))?;
let config = Arc::new(config);
let session_configuration = SessionConfiguration {
@@ -183,6 +188,7 @@ impl Codex {
cwd: config.cwd.clone(),
original_config_do_not_use: Arc::clone(&config),
features: config.features.clone(),
exec_policy,
session_source,
};
@@ -280,6 +286,7 @@ pub(crate) struct TurnContext {
pub(crate) final_output_json_schema: Option<Value>,
pub(crate) codex_linux_sandbox_exe: Option<PathBuf>,
pub(crate) tool_call_gate: Arc<ReadinessFlag>,
pub(crate) exec_policy: Arc<ExecPolicy>,
pub(crate) truncation_policy: TruncationPolicy,
}
@@ -336,6 +343,8 @@ pub(crate) struct SessionConfiguration {
/// Set of feature flags for this session
features: Features,
/// Execpolicy policy, applied only when enabled by feature flag.
exec_policy: Arc<ExecPolicy>,
// TODO(pakrym): Remove config from here
original_config_do_not_use: Arc<Config>,
@@ -436,6 +445,7 @@ impl Session {
final_output_json_schema: None,
codex_linux_sandbox_exe: config.codex_linux_sandbox_exe.clone(),
tool_call_gate: Arc::new(ReadinessFlag::new()),
exec_policy: session_configuration.exec_policy.clone(),
truncation_policy: TruncationPolicy::new(&per_turn_config),
}
}
@@ -1789,6 +1799,7 @@ async fn spawn_review_thread(
final_output_json_schema: None,
codex_linux_sandbox_exe: parent_turn_context.codex_linux_sandbox_exe.clone(),
tool_call_gate: Arc::new(ReadinessFlag::new()),
exec_policy: parent_turn_context.exec_policy.clone(),
truncation_policy: TruncationPolicy::new(&per_turn_config),
};
@@ -2608,6 +2619,7 @@ mod tests {
cwd: config.cwd.clone(),
original_config_do_not_use: Arc::clone(&config),
features: Features::default(),
exec_policy: Arc::new(codex_execpolicy2::Policy::empty()),
session_source: SessionSource::Exec,
};
@@ -2685,6 +2697,7 @@ mod tests {
cwd: config.cwd.clone(),
original_config_do_not_use: Arc::clone(&config),
features: Features::default(),
exec_policy: Arc::new(codex_execpolicy2::Policy::empty()),
session_source: SessionSource::Exec,
};

View File

@@ -1,6 +1,8 @@
use codex_protocol::protocol::AskForApproval;
use codex_protocol::protocol::SandboxPolicy;
use crate::sandboxing::SandboxPermissions;
use crate::bash::parse_shell_lc_plain_commands;
use crate::is_safe_command::is_known_safe_command;
@@ -8,7 +10,7 @@ pub fn requires_initial_appoval(
policy: AskForApproval,
sandbox_policy: &SandboxPolicy,
command: &[String],
with_escalated_permissions: bool,
sandbox_permissions: SandboxPermissions,
) -> bool {
if is_known_safe_command(command) {
return false;
@@ -24,8 +26,7 @@ pub fn requires_initial_appoval(
// In restricted sandboxes (ReadOnly/WorkspaceWrite), do not prompt for
// nonescalated, nondangerous commands — let the sandbox enforce
// restrictions (e.g., block network/write) without a user prompt.
let wants_escalation: bool = with_escalated_permissions;
if wants_escalation {
if sandbox_permissions.requires_escalated_permissions() {
return true;
}
command_might_be_dangerous(command)

View File

@@ -0,0 +1,365 @@
use std::io::ErrorKind;
use std::path::Path;
use std::path::PathBuf;
use std::sync::Arc;
use crate::command_safety::is_dangerous_command::requires_initial_appoval;
use codex_execpolicy2::Decision;
use codex_execpolicy2::Evaluation;
use codex_execpolicy2::Policy;
use codex_execpolicy2::PolicyParser;
use codex_protocol::protocol::AskForApproval;
use codex_protocol::protocol::SandboxPolicy;
use thiserror::Error;
use tokio::fs;
use crate::bash::parse_shell_lc_plain_commands;
use crate::features::Feature;
use crate::features::Features;
use crate::sandboxing::SandboxPermissions;
use crate::tools::sandboxing::ApprovalRequirement;
const FORBIDDEN_REASON: &str = "execpolicy forbids this command";
const PROMPT_REASON: &str = "execpolicy requires approval for this command";
const POLICY_DIR_NAME: &str = "policy";
const POLICY_EXTENSION: &str = "codexpolicy";
#[derive(Debug, Error)]
pub enum ExecPolicyError {
#[error("failed to read execpolicy files from {dir}: {source}")]
ReadDir {
dir: PathBuf,
source: std::io::Error,
},
#[error("failed to read execpolicy file {path}: {source}")]
ReadFile {
path: PathBuf,
source: std::io::Error,
},
#[error("failed to parse execpolicy file {path}: {source}")]
ParsePolicy {
path: String,
source: codex_execpolicy2::Error,
},
}
pub(crate) async fn exec_policy_for(
features: &Features,
codex_home: &Path,
) -> Result<Arc<Policy>, ExecPolicyError> {
if !features.enabled(Feature::ExecPolicy) {
return Ok(Arc::new(Policy::empty()));
}
let policy_dir = codex_home.join(POLICY_DIR_NAME);
let policy_paths = collect_policy_files(&policy_dir).await?;
let mut parser = PolicyParser::new();
for policy_path in &policy_paths {
let contents =
fs::read_to_string(policy_path)
.await
.map_err(|source| ExecPolicyError::ReadFile {
path: policy_path.clone(),
source,
})?;
let identifier = policy_path.to_string_lossy().to_string();
parser
.parse(&identifier, &contents)
.map_err(|source| ExecPolicyError::ParsePolicy {
path: identifier,
source,
})?;
}
let policy = Arc::new(parser.build());
tracing::debug!(
"loaded execpolicy from {} files in {}",
policy_paths.len(),
policy_dir.display()
);
Ok(policy)
}
fn evaluate_with_policy(
policy: &Policy,
command: &[String],
approval_policy: AskForApproval,
) -> Option<ApprovalRequirement> {
let commands = parse_shell_lc_plain_commands(command).unwrap_or_else(|| vec![command.to_vec()]);
let evaluation = policy.check_multiple(commands.iter());
match evaluation {
Evaluation::Match { decision, .. } => match decision {
Decision::Forbidden => Some(ApprovalRequirement::Forbidden {
reason: FORBIDDEN_REASON.to_string(),
}),
Decision::Prompt => {
let reason = PROMPT_REASON.to_string();
if matches!(approval_policy, AskForApproval::Never) {
Some(ApprovalRequirement::Forbidden { reason })
} else {
Some(ApprovalRequirement::NeedsApproval {
reason: Some(reason),
})
}
}
Decision::Allow => Some(ApprovalRequirement::Skip),
},
Evaluation::NoMatch => None,
}
}
pub(crate) fn create_approval_requirement_for_command(
policy: &Policy,
command: &[String],
approval_policy: AskForApproval,
sandbox_policy: &SandboxPolicy,
sandbox_permissions: SandboxPermissions,
) -> ApprovalRequirement {
if let Some(requirement) = evaluate_with_policy(policy, command, approval_policy) {
return requirement;
}
if requires_initial_appoval(
approval_policy,
sandbox_policy,
command,
sandbox_permissions,
) {
ApprovalRequirement::NeedsApproval { reason: None }
} else {
ApprovalRequirement::Skip
}
}
async fn collect_policy_files(dir: &Path) -> Result<Vec<PathBuf>, ExecPolicyError> {
let mut read_dir = match fs::read_dir(dir).await {
Ok(read_dir) => read_dir,
Err(err) if err.kind() == ErrorKind::NotFound => return Ok(Vec::new()),
Err(source) => {
return Err(ExecPolicyError::ReadDir {
dir: dir.to_path_buf(),
source,
});
}
};
let mut policy_paths = Vec::new();
while let Some(entry) =
read_dir
.next_entry()
.await
.map_err(|source| ExecPolicyError::ReadDir {
dir: dir.to_path_buf(),
source,
})?
{
let path = entry.path();
let file_type = entry
.file_type()
.await
.map_err(|source| ExecPolicyError::ReadDir {
dir: dir.to_path_buf(),
source,
})?;
if path
.extension()
.and_then(|ext| ext.to_str())
.is_some_and(|ext| ext == POLICY_EXTENSION)
&& file_type.is_file()
{
policy_paths.push(path);
}
}
policy_paths.sort();
Ok(policy_paths)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::features::Feature;
use crate::features::Features;
use codex_protocol::protocol::AskForApproval;
use codex_protocol::protocol::SandboxPolicy;
use pretty_assertions::assert_eq;
use std::fs;
use tempfile::tempdir;
#[tokio::test]
async fn returns_empty_policy_when_feature_disabled() {
let mut features = Features::with_defaults();
features.disable(Feature::ExecPolicy);
let temp_dir = tempdir().expect("create temp dir");
let policy = exec_policy_for(&features, temp_dir.path())
.await
.expect("policy result");
let commands = [vec!["rm".to_string()]];
assert!(matches!(
policy.check_multiple(commands.iter()),
Evaluation::NoMatch
));
assert!(!temp_dir.path().join(POLICY_DIR_NAME).exists());
}
#[tokio::test]
async fn collect_policy_files_returns_empty_when_dir_missing() {
let temp_dir = tempdir().expect("create temp dir");
let policy_dir = temp_dir.path().join(POLICY_DIR_NAME);
let files = collect_policy_files(&policy_dir)
.await
.expect("collect policy files");
assert!(files.is_empty());
}
#[tokio::test]
async fn loads_policies_from_policy_subdirectory() {
let temp_dir = tempdir().expect("create temp dir");
let policy_dir = temp_dir.path().join(POLICY_DIR_NAME);
fs::create_dir_all(&policy_dir).expect("create policy dir");
fs::write(
policy_dir.join("deny.codexpolicy"),
r#"prefix_rule(pattern=["rm"], decision="forbidden")"#,
)
.expect("write policy file");
let policy = exec_policy_for(&Features::with_defaults(), temp_dir.path())
.await
.expect("policy result");
let command = [vec!["rm".to_string()]];
assert!(matches!(
policy.check_multiple(command.iter()),
Evaluation::Match { .. }
));
}
#[tokio::test]
async fn ignores_policies_outside_policy_dir() {
let temp_dir = tempdir().expect("create temp dir");
fs::write(
temp_dir.path().join("root.codexpolicy"),
r#"prefix_rule(pattern=["ls"], decision="prompt")"#,
)
.expect("write policy file");
let policy = exec_policy_for(&Features::with_defaults(), temp_dir.path())
.await
.expect("policy result");
let command = [vec!["ls".to_string()]];
assert!(matches!(
policy.check_multiple(command.iter()),
Evaluation::NoMatch
));
}
#[test]
fn evaluates_bash_lc_inner_commands() {
let policy_src = r#"
prefix_rule(pattern=["rm"], decision="forbidden")
"#;
let mut parser = PolicyParser::new();
parser
.parse("test.codexpolicy", policy_src)
.expect("parse policy");
let policy = parser.build();
let forbidden_script = vec![
"bash".to_string(),
"-lc".to_string(),
"rm -rf /tmp".to_string(),
];
let requirement =
evaluate_with_policy(&policy, &forbidden_script, AskForApproval::OnRequest)
.expect("expected match for forbidden command");
assert_eq!(
requirement,
ApprovalRequirement::Forbidden {
reason: FORBIDDEN_REASON.to_string()
}
);
}
#[test]
fn approval_requirement_prefers_execpolicy_match() {
let policy_src = r#"prefix_rule(pattern=["rm"], decision="prompt")"#;
let mut parser = PolicyParser::new();
parser
.parse("test.codexpolicy", policy_src)
.expect("parse policy");
let policy = parser.build();
let command = vec!["rm".to_string()];
let requirement = create_approval_requirement_for_command(
&policy,
&command,
AskForApproval::OnRequest,
&SandboxPolicy::DangerFullAccess,
SandboxPermissions::UseDefault,
);
assert_eq!(
requirement,
ApprovalRequirement::NeedsApproval {
reason: Some(PROMPT_REASON.to_string())
}
);
}
#[test]
fn approval_requirement_respects_approval_policy() {
let policy_src = r#"prefix_rule(pattern=["rm"], decision="prompt")"#;
let mut parser = PolicyParser::new();
parser
.parse("test.codexpolicy", policy_src)
.expect("parse policy");
let policy = parser.build();
let command = vec!["rm".to_string()];
let requirement = create_approval_requirement_for_command(
&policy,
&command,
AskForApproval::Never,
&SandboxPolicy::DangerFullAccess,
SandboxPermissions::UseDefault,
);
assert_eq!(
requirement,
ApprovalRequirement::Forbidden {
reason: PROMPT_REASON.to_string()
}
);
}
#[test]
fn approval_requirement_falls_back_to_heuristics() {
let command = vec!["python".to_string()];
let empty_policy = Policy::empty();
let requirement = create_approval_requirement_for_command(
&empty_policy,
&command,
AskForApproval::UnlessTrusted,
&SandboxPolicy::ReadOnly,
SandboxPermissions::UseDefault,
);
assert_eq!(
requirement,
ApprovalRequirement::NeedsApproval { reason: None }
);
}
}

View File

@@ -42,6 +42,8 @@ pub enum Feature {
ViewImageTool,
/// Allow the model to request web searches.
WebSearchRequest,
/// Gate the execpolicy enforcement for shell/unified exec.
ExecPolicy,
/// Enable the model-based risk assessments for sandboxed commands.
SandboxCommandAssessment,
/// Enable Windows sandbox (restricted token) on Windows.
@@ -297,6 +299,12 @@ pub const FEATURES: &[FeatureSpec] = &[
stage: Stage::Stable,
default_enabled: false,
},
FeatureSpec {
id: Feature::ExecPolicy,
key: "exec_policy",
stage: Stage::Experimental,
default_enabled: true,
},
FeatureSpec {
id: Feature::SandboxCommandAssessment,
key: "experimental_sandbox_command_assessment",

View File

@@ -25,6 +25,7 @@ mod environment_context;
pub mod error;
pub mod exec;
pub mod exec_env;
mod exec_policy;
pub mod features;
mod flags;
pub mod git_info;

View File

@@ -26,6 +26,28 @@ use std::collections::HashMap;
use std::path::Path;
use std::path::PathBuf;
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum SandboxPermissions {
UseDefault,
RequireEscalated,
}
impl SandboxPermissions {
pub fn requires_escalated_permissions(self) -> bool {
matches!(self, SandboxPermissions::RequireEscalated)
}
}
impl From<bool> for SandboxPermissions {
fn from(with_escalated_permissions: bool) -> Self {
if with_escalated_permissions {
SandboxPermissions::RequireEscalated
} else {
SandboxPermissions::UseDefault
}
}
}
#[derive(Clone, Debug)]
pub struct CommandSpec {
pub program: String,

View File

@@ -9,9 +9,11 @@ use crate::apply_patch::convert_apply_patch_to_protocol;
use crate::codex::TurnContext;
use crate::exec::ExecParams;
use crate::exec_env::create_env;
use crate::exec_policy::create_approval_requirement_for_command;
use crate::function_tool::FunctionCallError;
use crate::is_safe_command::is_known_safe_command;
use crate::protocol::ExecCommandSource;
use crate::sandboxing::SandboxPermissions;
use crate::tools::context::ToolInvocation;
use crate::tools::context::ToolOutput;
use crate::tools::context::ToolPayload;
@@ -302,6 +304,13 @@ impl ShellHandler {
env: exec_params.env.clone(),
with_escalated_permissions: exec_params.with_escalated_permissions,
justification: exec_params.justification.clone(),
approval_requirement: create_approval_requirement_for_command(
&turn.exec_policy,
&exec_params.command,
turn.approval_policy,
&turn.sandbox_policy,
SandboxPermissions::from(exec_params.with_escalated_permissions.unwrap_or(false)),
),
};
let mut orchestrator = ToolOrchestrator::new();
let mut runtime = ShellRuntime::new();

View File

@@ -11,11 +11,13 @@ use crate::error::get_error_message_ui;
use crate::exec::ExecToolCallOutput;
use crate::sandboxing::SandboxManager;
use crate::tools::sandboxing::ApprovalCtx;
use crate::tools::sandboxing::ApprovalRequirement;
use crate::tools::sandboxing::ProvidesSandboxRetryData;
use crate::tools::sandboxing::SandboxAttempt;
use crate::tools::sandboxing::ToolCtx;
use crate::tools::sandboxing::ToolError;
use crate::tools::sandboxing::ToolRuntime;
use crate::tools::sandboxing::default_approval_requirement;
use codex_protocol::protocol::AskForApproval;
use codex_protocol::protocol::ReviewDecision;
@@ -49,40 +51,52 @@ impl ToolOrchestrator {
let otel_cfg = codex_otel::otel_event_manager::ToolDecisionSource::Config;
// 1) Approval
let needs_initial_approval =
tool.wants_initial_approval(req, approval_policy, &turn_ctx.sandbox_policy);
let mut already_approved = false;
if needs_initial_approval {
let mut risk = None;
if let Some(metadata) = req.sandbox_retry_data() {
risk = tool_ctx
.session
.assess_sandbox_command(turn_ctx, &tool_ctx.call_id, &metadata.command, None)
.await;
let requirement = tool.approval_requirement(req).unwrap_or_else(|| {
default_approval_requirement(approval_policy, &turn_ctx.sandbox_policy)
});
match requirement {
ApprovalRequirement::Skip => {
otel.tool_decision(otel_tn, otel_ci, ReviewDecision::Approved, otel_cfg);
}
ApprovalRequirement::Forbidden { reason } => {
return Err(ToolError::Rejected(reason));
}
ApprovalRequirement::NeedsApproval { reason } => {
let mut risk = None;
let approval_ctx = ApprovalCtx {
session: tool_ctx.session,
turn: turn_ctx,
call_id: &tool_ctx.call_id,
retry_reason: None,
risk,
};
let decision = tool.start_approval_async(req, approval_ctx).await;
otel.tool_decision(otel_tn, otel_ci, decision, otel_user.clone());
match decision {
ReviewDecision::Denied | ReviewDecision::Abort => {
return Err(ToolError::Rejected("rejected by user".to_string()));
if let Some(metadata) = req.sandbox_retry_data() {
risk = tool_ctx
.session
.assess_sandbox_command(
turn_ctx,
&tool_ctx.call_id,
&metadata.command,
None,
)
.await;
}
ReviewDecision::Approved | ReviewDecision::ApprovedForSession => {}
let approval_ctx = ApprovalCtx {
session: tool_ctx.session,
turn: turn_ctx,
call_id: &tool_ctx.call_id,
retry_reason: reason,
risk,
};
let decision = tool.start_approval_async(req, approval_ctx).await;
otel.tool_decision(otel_tn, otel_ci, decision, otel_user.clone());
match decision {
ReviewDecision::Denied | ReviewDecision::Abort => {
return Err(ToolError::Rejected("rejected by user".to_string()));
}
ReviewDecision::Approved | ReviewDecision::ApprovedForSession => {}
}
already_approved = true;
}
already_approved = true;
} else {
otel.tool_decision(otel_tn, otel_ci, ReviewDecision::Approved, otel_cfg);
}
// 2) First attempt under the selected sandbox.

View File

@@ -4,13 +4,12 @@ Runtime: shell
Executes shell requests under the orchestrator: asks for approval when needed,
builds a CommandSpec, and runs it under the current SandboxAttempt.
*/
use crate::command_safety::is_dangerous_command::requires_initial_appoval;
use crate::exec::ExecToolCallOutput;
use crate::protocol::SandboxPolicy;
use crate::sandboxing::execute_env;
use crate::tools::runtimes::build_command_spec;
use crate::tools::sandboxing::Approvable;
use crate::tools::sandboxing::ApprovalCtx;
use crate::tools::sandboxing::ApprovalRequirement;
use crate::tools::sandboxing::ProvidesSandboxRetryData;
use crate::tools::sandboxing::SandboxAttempt;
use crate::tools::sandboxing::SandboxRetryData;
@@ -20,7 +19,6 @@ use crate::tools::sandboxing::ToolCtx;
use crate::tools::sandboxing::ToolError;
use crate::tools::sandboxing::ToolRuntime;
use crate::tools::sandboxing::with_cached_approval;
use codex_protocol::protocol::AskForApproval;
use codex_protocol::protocol::ReviewDecision;
use futures::future::BoxFuture;
use std::path::PathBuf;
@@ -33,6 +31,7 @@ pub struct ShellRequest {
pub env: std::collections::HashMap<String, String>,
pub with_escalated_permissions: Option<bool>,
pub justification: Option<String>,
pub approval_requirement: ApprovalRequirement,
}
impl ProvidesSandboxRetryData for ShellRequest {
@@ -114,18 +113,8 @@ impl Approvable<ShellRequest> for ShellRuntime {
})
}
fn wants_initial_approval(
&self,
req: &ShellRequest,
policy: AskForApproval,
sandbox_policy: &SandboxPolicy,
) -> bool {
requires_initial_appoval(
policy,
sandbox_policy,
&req.command,
req.with_escalated_permissions.unwrap_or(false),
)
fn approval_requirement(&self, req: &ShellRequest) -> Option<ApprovalRequirement> {
Some(req.approval_requirement.clone())
}
fn wants_escalated_first_attempt(&self, req: &ShellRequest) -> bool {

View File

@@ -1,4 +1,3 @@
use crate::command_safety::is_dangerous_command::requires_initial_appoval;
/*
Runtime: unified exec
@@ -10,6 +9,7 @@ use crate::error::SandboxErr;
use crate::tools::runtimes::build_command_spec;
use crate::tools::sandboxing::Approvable;
use crate::tools::sandboxing::ApprovalCtx;
use crate::tools::sandboxing::ApprovalRequirement;
use crate::tools::sandboxing::ProvidesSandboxRetryData;
use crate::tools::sandboxing::SandboxAttempt;
use crate::tools::sandboxing::SandboxRetryData;
@@ -22,9 +22,7 @@ use crate::tools::sandboxing::with_cached_approval;
use crate::unified_exec::UnifiedExecError;
use crate::unified_exec::UnifiedExecSession;
use crate::unified_exec::UnifiedExecSessionManager;
use codex_protocol::protocol::AskForApproval;
use codex_protocol::protocol::ReviewDecision;
use codex_protocol::protocol::SandboxPolicy;
use futures::future::BoxFuture;
use std::collections::HashMap;
use std::path::PathBuf;
@@ -36,6 +34,7 @@ pub struct UnifiedExecRequest {
pub env: HashMap<String, String>,
pub with_escalated_permissions: Option<bool>,
pub justification: Option<String>,
pub approval_requirement: ApprovalRequirement,
}
impl ProvidesSandboxRetryData for UnifiedExecRequest {
@@ -65,6 +64,7 @@ impl UnifiedExecRequest {
env: HashMap<String, String>,
with_escalated_permissions: Option<bool>,
justification: Option<String>,
approval_requirement: ApprovalRequirement,
) -> Self {
Self {
command,
@@ -72,6 +72,7 @@ impl UnifiedExecRequest {
env,
with_escalated_permissions,
justification,
approval_requirement,
}
}
}
@@ -129,18 +130,8 @@ impl Approvable<UnifiedExecRequest> for UnifiedExecRuntime<'_> {
})
}
fn wants_initial_approval(
&self,
req: &UnifiedExecRequest,
policy: AskForApproval,
sandbox_policy: &SandboxPolicy,
) -> bool {
requires_initial_appoval(
policy,
sandbox_policy,
&req.command,
req.with_escalated_permissions.unwrap_or(false),
)
fn approval_requirement(&self, req: &UnifiedExecRequest) -> Option<ApprovalRequirement> {
Some(req.approval_requirement.clone())
}
fn wants_escalated_first_attempt(&self, req: &UnifiedExecRequest) -> bool {

View File

@@ -86,6 +86,37 @@ pub(crate) struct ApprovalCtx<'a> {
pub risk: Option<SandboxCommandAssessment>,
}
// Specifies what tool orchestrator should do with a given tool call.
#[derive(Clone, Debug, PartialEq, Eq)]
pub(crate) enum ApprovalRequirement {
/// No approval required for this tool call
Skip,
/// Approval required for this tool call
NeedsApproval { reason: Option<String> },
/// Execution forbidden for this tool call
Forbidden { reason: String },
}
/// - Never, OnFailure: do not ask
/// - OnRequest: ask unless sandbox policy is DangerFullAccess
/// - UnlessTrusted: always ask
pub(crate) fn default_approval_requirement(
policy: AskForApproval,
sandbox_policy: &SandboxPolicy,
) -> ApprovalRequirement {
let needs_approval = match policy {
AskForApproval::Never | AskForApproval::OnFailure => false,
AskForApproval::OnRequest => !matches!(sandbox_policy, SandboxPolicy::DangerFullAccess),
AskForApproval::UnlessTrusted => true,
};
if needs_approval {
ApprovalRequirement::NeedsApproval { reason: None }
} else {
ApprovalRequirement::Skip
}
}
pub(crate) trait Approvable<Req> {
type ApprovalKey: Hash + Eq + Clone + Debug + Serialize;
@@ -106,22 +137,11 @@ pub(crate) trait Approvable<Req> {
matches!(policy, AskForApproval::Never)
}
/// Decide whether an initial user approval should be requested before the
/// first attempt. Defaults to the orchestrator's behavior (prerefactor):
/// - Never, OnFailure: do not ask
/// - OnRequest: ask unless sandbox policy is DangerFullAccess
/// - UnlessTrusted: always ask
fn wants_initial_approval(
&self,
_req: &Req,
policy: AskForApproval,
sandbox_policy: &SandboxPolicy,
) -> bool {
match policy {
AskForApproval::Never | AskForApproval::OnFailure => false,
AskForApproval::OnRequest => !matches!(sandbox_policy, SandboxPolicy::DangerFullAccess),
AskForApproval::UnlessTrusted => true,
}
/// Override the default approval requirement. Return `Some(_)` to specify
/// a custom requirement, or `None` to fall back to
/// policy-based default.
fn approval_requirement(&self, _req: &Req) -> Option<ApprovalRequirement> {
None
}
/// Decide we can request an approval for no-sandbox execution.

View File

@@ -11,10 +11,12 @@ use crate::codex::TurnContext;
use crate::exec::ExecToolCallOutput;
use crate::exec::StreamOutput;
use crate::exec_env::create_env;
use crate::exec_policy::create_approval_requirement_for_command;
use crate::protocol::BackgroundEventEvent;
use crate::protocol::EventMsg;
use crate::protocol::ExecCommandSource;
use crate::sandboxing::ExecEnv;
use crate::sandboxing::SandboxPermissions;
use crate::tools::events::ToolEmitter;
use crate::tools::events::ToolEventCtx;
use crate::tools::events::ToolEventFailure;
@@ -449,6 +451,13 @@ impl UnifiedExecSessionManager {
create_env(&context.turn.shell_environment_policy),
with_escalated_permissions,
justification,
create_approval_requirement_for_command(
&context.turn.exec_policy,
command,
context.turn.approval_policy,
&context.turn.sandbox_policy,
SandboxPermissions::from(with_escalated_permissions.unwrap_or(false)),
),
);
let tool_ctx = ToolCtx {
session: context.session.as_ref(),

View File

@@ -0,0 +1,101 @@
#![allow(clippy::unwrap_used, clippy::expect_used)]
use anyhow::Result;
use codex_core::protocol::AskForApproval;
use codex_core::protocol::EventMsg;
use codex_core::protocol::Op;
use codex_core::protocol::SandboxPolicy;
use codex_protocol::config_types::ReasoningSummary;
use codex_protocol::user_input::UserInput;
use core_test_support::responses::ev_assistant_message;
use core_test_support::responses::ev_completed;
use core_test_support::responses::ev_function_call;
use core_test_support::responses::ev_response_created;
use core_test_support::responses::mount_sse_once;
use core_test_support::responses::sse;
use core_test_support::responses::start_mock_server;
use core_test_support::test_codex::test_codex;
use core_test_support::wait_for_event;
use serde_json::json;
use std::fs;
#[tokio::test]
async fn execpolicy_blocks_shell_invocation() -> Result<()> {
let mut builder = test_codex().with_config(|config| {
let policy_path = config.codex_home.join("policy").join("policy.codexpolicy");
fs::create_dir_all(
policy_path
.parent()
.expect("policy directory must have a parent"),
)
.expect("create policy directory");
fs::write(
&policy_path,
r#"prefix_rule(pattern=["echo"], decision="forbidden")"#,
)
.expect("write policy file");
});
let server = start_mock_server().await;
let test = builder.build(&server).await?;
let call_id = "shell-forbidden";
let args = json!({
"command": ["echo", "blocked"],
"timeout_ms": 1_000,
});
mount_sse_once(
&server,
sse(vec![
ev_response_created("resp-1"),
ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
ev_completed("resp-1"),
]),
)
.await;
mount_sse_once(
&server,
sse(vec![
ev_assistant_message("msg-1", "done"),
ev_completed("resp-2"),
]),
)
.await;
let session_model = test.session_configured.model.clone();
test.codex
.submit(Op::UserTurn {
items: vec![UserInput::Text {
text: "run shell command".into(),
}],
final_output_json_schema: None,
cwd: test.cwd_path().to_path_buf(),
approval_policy: AskForApproval::Never,
sandbox_policy: SandboxPolicy::DangerFullAccess,
model: session_model,
effort: None,
summary: ReasoningSummary::Auto,
})
.await?;
let EventMsg::ExecCommandEnd(end) = wait_for_event(&test.codex, |event| {
matches!(event, EventMsg::ExecCommandEnd(_))
})
.await
else {
unreachable!()
};
wait_for_event(&test.codex, |event| {
matches!(event, EventMsg::TaskComplete(_))
})
.await;
assert!(
end.aggregated_output
.contains("execpolicy forbids this command"),
"unexpected output: {}",
end.aggregated_output
);
Ok(())
}

View File

@@ -28,6 +28,7 @@ mod compact_remote;
mod compact_resume_fork;
mod deprecation_notice;
mod exec;
mod exec_policy;
mod fork_conversation;
mod grep_files;
mod items;

View File

@@ -5,7 +5,7 @@
- This release covers only the prefix-rule subset of the planned execpolicy v2 language; a richer language will follow.
- Tokens are matched in order; any `pattern` element may be a list to denote alternatives. `decision` defaults to `allow`; valid values: `allow`, `prompt`, `forbidden`.
- `match` / `not_match` supply example invocations that are validated at load time (think of them as unit tests); examples can be token arrays or strings (strings are tokenized with `shlex`).
- The CLI always prints the JSON serialization of the evaluation result (whether a match or not).
- The CLI always prints the JSON serialization of the evaluation result.
## Policy shapes
- Prefix rules use Starlark syntax:
@@ -18,6 +18,20 @@ prefix_rule(
)
```
## CLI
- Provide one or more policy files (for example `src/default.codexpolicy`) to check a command:
```bash
cargo run -p codex-execpolicy2 -- check --policy path/to/policy.codexpolicy git status
```
- Pass multiple `--policy` flags to merge rules, evaluated in the order provided:
```bash
cargo run -p codex-execpolicy2 -- check --policy base.codexpolicy --policy overrides.codexpolicy git status
```
- Output is JSON by default; pass `--pretty` for pretty-printed JSON
- Example outcomes:
- Match: `{"match": { ... "decision": "allow" ... }}`
- No match: `"noMatch"`
## Response shapes
- Match:
```json
@@ -43,17 +57,3 @@ prefix_rule(
- `matchedRules` lists every rule whose prefix matched the command; `matchedPrefix` is the exact prefix that matched.
- The effective `decision` is the strictest severity across all matches (`forbidden` > `prompt` > `allow`).
## CLI
- Provide one or more policy files (for example `src/default.codexpolicy`) to check a command:
```bash
cargo run -p codex-execpolicy2 -- check --policy path/to/policy.codexpolicy git status
```
- Pass multiple `--policy` flags to merge rules, evaluated in the order provided:
```bash
cargo run -p codex-execpolicy2 -- check --policy base.codexpolicy --policy overrides.codexpolicy git status
```
- Output is newline-delimited JSON by default; pass `--pretty` for pretty-printed JSON if desired.
- Example outcomes:
- Match: `{"match": { ... "decision": "allow" ... }}`
- No match: `"noMatch"`

View File

@@ -15,6 +15,10 @@ impl Policy {
Self { rules_by_program }
}
pub fn empty() -> Self {
Self::new(MultiMap::new())
}
pub fn rules(&self) -> &MultiMap<String, RuleRef> {
&self.rules_by_program
}

View File

@@ -4,6 +4,9 @@ use codex_core::CodexConversation;
use codex_core::ConversationManager;
use codex_core::NewConversation;
use codex_core::config::Config;
use codex_core::protocol::ErrorEvent;
use codex_core::protocol::Event;
use codex_core::protocol::EventMsg;
use codex_core::protocol::Op;
use tokio::sync::mpsc::UnboundedSender;
use tokio::sync::mpsc::unbounded_channel;
@@ -28,9 +31,19 @@ pub(crate) fn spawn_agent(
session_configured,
} = match server.new_conversation(config).await {
Ok(v) => v,
Err(e) => {
// TODO: surface this error to the user.
tracing::error!("failed to initialize codex: {e}");
#[allow(clippy::print_stderr)]
Err(err) => {
let message = err.to_string();
eprintln!("{message}");
app_event_tx_clone.send(AppEvent::CodexEvent(Event {
id: "".to_string(),
msg: EventMsg::Error(ErrorEvent {
message,
http_status_code: None,
}),
}));
app_event_tx_clone.send(AppEvent::ExitRequest);
tracing::error!("failed to initialize codex: {err}");
return;
}
};