mirror of
https://github.com/openai/codex.git
synced 2026-02-02 15:03:38 +00:00
Compare commits
32 Commits
compaction
...
codex/add-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
876ade0445 | ||
|
|
2b8cdc7be3 | ||
|
|
20a4f95136 | ||
|
|
b6cd0a5f02 | ||
|
|
9ec2873084 | ||
|
|
745e2a6790 | ||
|
|
119b1855f3 | ||
|
|
4f2ee5f94c | ||
|
|
3cb8a0068d | ||
|
|
de9b3fd75d | ||
|
|
408cc0b0f2 | ||
|
|
5a1e6defd9 | ||
|
|
db36ccbe35 | ||
|
|
9bb7589a36 | ||
|
|
25bf30661b | ||
|
|
89271eccc5 | ||
|
|
a34b9fc259 | ||
|
|
345050e1be | ||
|
|
b5241d7f38 | ||
|
|
48a2db1a5a | ||
|
|
9cbe84748e | ||
|
|
cda6857fff | ||
|
|
7ac303b051 | ||
|
|
c9a34cd493 | ||
|
|
f69b225f44 | ||
|
|
f8dc20279b | ||
|
|
a13b81adea | ||
|
|
512a6c3386 | ||
|
|
3cd5c23910 | ||
|
|
c6c03aed22 | ||
|
|
3990d90e10 | ||
|
|
f18fdc97b3 |
33
README.md
33
README.md
@@ -69,6 +69,39 @@ Codex can access MCP servers. To configure them, refer to the [config docs](./do
|
||||
|
||||
Codex CLI supports a rich set of configuration options, with preferences stored in `~/.codex/config.toml`. For full configuration options, see [Configuration](./docs/config.md).
|
||||
|
||||
### Execpolicy quickstart
|
||||
|
||||
Codex can enforce your own rules-based execution policy before it runs shell commands.
|
||||
|
||||
1. Create a policy directory: `mkdir -p ~/.codex/policy`.
|
||||
2. Create one or more `.codexpolicy` files into that folder. Codex automatically loads every `.codexpolicy` file in there on startup.
|
||||
3. Write `prefix_rule` entries to describe the commands you want to allow, prompt, or block:
|
||||
|
||||
```starlark
|
||||
prefix_rule(
|
||||
pattern = ["git", ["push", "fetch"]],
|
||||
decision = "prompt", # allow | prompt | forbidden
|
||||
match = [["git", "push", "origin", "main"]], # examples that must match
|
||||
not_match = [["git", "status"]], # examples that must not match
|
||||
)
|
||||
```
|
||||
|
||||
- `pattern` is a list of shell tokens, evaluated from left to right; wrap tokens in a nested list to express alternatives (e.g., match both `push` and `fetch`).
|
||||
- `decision` sets the severity; Codex picks the strictest decision when multiple rules match.
|
||||
- `match` and `not_match` act as (optional) unit tests. Codex validates them when it loads your policy, so you get feedback if an example has unexpected behavior.
|
||||
|
||||
In this example rule, if Codex wants to run commands with the prefix `git push` or `git fetch`, it will first ask for user approval.
|
||||
|
||||
Note: If Codex wants to run a command that matches with multiple rules, it will use the strictest decision among the matched rules (forbidden > prompt > allow).
|
||||
|
||||
Use the `codex execpolicycheck` subcommand to preview decisions before you save a rule (see the [`execpolicy2` README](./codex-rs/execpolicy2/README.md) for syntax details):
|
||||
|
||||
```shell
|
||||
codex execpolicycheck --policy ~/.codex/policy/default.codexpolicy git push origin main
|
||||
```
|
||||
|
||||
Pass multiple `--policy` flags to test how several files combine, and use `--pretty` for formatted JSON output. See the [`codex-rs/execpolicy2` README](./codex-rs/execpolicy2/README.md) for a more detailed walkthrough of the available syntax.
|
||||
|
||||
---
|
||||
|
||||
### Docs & FAQ
|
||||
|
||||
2
codex-rs/Cargo.lock
generated
2
codex-rs/Cargo.lock
generated
@@ -989,6 +989,7 @@ dependencies = [
|
||||
"codex-common",
|
||||
"codex-core",
|
||||
"codex-exec",
|
||||
"codex-execpolicy2",
|
||||
"codex-login",
|
||||
"codex-mcp-server",
|
||||
"codex-process-hardening",
|
||||
@@ -1085,6 +1086,7 @@ dependencies = [
|
||||
"codex-apply-patch",
|
||||
"codex-arg0",
|
||||
"codex-async-utils",
|
||||
"codex-execpolicy2",
|
||||
"codex-file-search",
|
||||
"codex-git",
|
||||
"codex-keyring-store",
|
||||
|
||||
@@ -66,6 +66,7 @@ codex-chatgpt = { path = "chatgpt" }
|
||||
codex-common = { path = "common" }
|
||||
codex-core = { path = "core" }
|
||||
codex-exec = { path = "exec" }
|
||||
codex-execpolicy2 = { path = "execpolicy2" }
|
||||
codex-feedback = { path = "feedback" }
|
||||
codex-file-search = { path = "file-search" }
|
||||
codex-git = { path = "utils/git" }
|
||||
|
||||
@@ -26,6 +26,7 @@ codex-cloud-tasks = { path = "../cloud-tasks" }
|
||||
codex-common = { workspace = true, features = ["cli"] }
|
||||
codex-core = { workspace = true }
|
||||
codex-exec = { workspace = true }
|
||||
codex-execpolicy2 = { workspace = true }
|
||||
codex-login = { workspace = true }
|
||||
codex-mcp-server = { workspace = true }
|
||||
codex-process-hardening = { workspace = true }
|
||||
|
||||
@@ -18,6 +18,7 @@ use codex_cli::login::run_logout;
|
||||
use codex_cloud_tasks::Cli as CloudTasksCli;
|
||||
use codex_common::CliConfigOverrides;
|
||||
use codex_exec::Cli as ExecCli;
|
||||
use codex_execpolicy2::ExecPolicyCheckCommand;
|
||||
use codex_responses_api_proxy::Args as ResponsesApiProxyArgs;
|
||||
use codex_tui::AppExitInfo;
|
||||
use codex_tui::Cli as TuiCli;
|
||||
@@ -112,6 +113,10 @@ enum Subcommand {
|
||||
#[clap(hide = true, name = "stdio-to-uds")]
|
||||
StdioToUds(StdioToUdsCommand),
|
||||
|
||||
/// Check execpolicy files against a command.
|
||||
#[clap(name = "execpolicycheck")]
|
||||
ExecPolicyCheck(ExecPolicyCheckCommand),
|
||||
|
||||
/// Inspect feature flags.
|
||||
Features(FeaturesCli),
|
||||
}
|
||||
@@ -323,6 +328,12 @@ fn run_update_action(action: UpdateAction) -> anyhow::Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn run_execpolicycheck(cmd: ExecPolicyCheckCommand) -> anyhow::Result<()> {
|
||||
let json = cmd.run()?;
|
||||
println!("{json}");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[derive(Debug, Default, Parser, Clone)]
|
||||
struct FeatureToggles {
|
||||
/// Enable a feature (repeatable). Equivalent to `-c features.<name>=true`.
|
||||
@@ -559,6 +570,7 @@ async fn cli_main(codex_linux_sandbox_exe: Option<PathBuf>) -> anyhow::Result<()
|
||||
tokio::task::spawn_blocking(move || codex_stdio_to_uds::run(socket_path.as_path()))
|
||||
.await??;
|
||||
}
|
||||
Some(Subcommand::ExecPolicyCheck(cmd)) => run_execpolicycheck(cmd)?,
|
||||
Some(Subcommand::Features(FeaturesCli { sub })) => match sub {
|
||||
FeaturesSubcommand::List => {
|
||||
// Respect root-level `-c` overrides plus top-level flags like `--profile`.
|
||||
|
||||
@@ -22,6 +22,7 @@ chrono = { workspace = true, features = ["serde"] }
|
||||
codex-app-server-protocol = { workspace = true }
|
||||
codex-apply-patch = { workspace = true }
|
||||
codex-async-utils = { workspace = true }
|
||||
codex-execpolicy2 = { workspace = true }
|
||||
codex-file-search = { workspace = true }
|
||||
codex-git = { workspace = true }
|
||||
codex-keyring-store = { workspace = true }
|
||||
|
||||
@@ -118,6 +118,7 @@ use crate::user_instructions::UserInstructions;
|
||||
use crate::user_notification::UserNotification;
|
||||
use crate::util::backoff;
|
||||
use codex_async_utils::OrCancelExt;
|
||||
use codex_execpolicy2::Policy as ExecPolicy;
|
||||
use codex_otel::otel_event_manager::OtelEventManager;
|
||||
use codex_protocol::config_types::ReasoningEffort as ReasoningEffortConfig;
|
||||
use codex_protocol::config_types::ReasoningSummary as ReasoningSummaryConfig;
|
||||
@@ -163,6 +164,10 @@ impl Codex {
|
||||
|
||||
let user_instructions = get_user_instructions(&config).await;
|
||||
|
||||
let exec_policy = crate::exec_policy::exec_policy_for(&config.features, &config.codex_home)
|
||||
.await
|
||||
.map_err(|err| CodexErr::Fatal(format!("failed to load execpolicy: {err}")))?;
|
||||
|
||||
let config = Arc::new(config);
|
||||
|
||||
let session_configuration = SessionConfiguration {
|
||||
@@ -179,6 +184,7 @@ impl Codex {
|
||||
cwd: config.cwd.clone(),
|
||||
original_config_do_not_use: Arc::clone(&config),
|
||||
features: config.features.clone(),
|
||||
exec_policy,
|
||||
session_source,
|
||||
};
|
||||
|
||||
@@ -276,6 +282,7 @@ pub(crate) struct TurnContext {
|
||||
pub(crate) final_output_json_schema: Option<Value>,
|
||||
pub(crate) codex_linux_sandbox_exe: Option<PathBuf>,
|
||||
pub(crate) tool_call_gate: Arc<ReadinessFlag>,
|
||||
pub(crate) exec_policy: Arc<ExecPolicy>,
|
||||
pub(crate) truncation_policy: TruncationPolicy,
|
||||
}
|
||||
|
||||
@@ -333,6 +340,8 @@ pub(crate) struct SessionConfiguration {
|
||||
|
||||
/// Set of feature flags for this session
|
||||
features: Features,
|
||||
/// Execpolicy policy, applied only when enabled by feature flag.
|
||||
exec_policy: Arc<ExecPolicy>,
|
||||
|
||||
// TODO(pakrym): Remove config from here
|
||||
original_config_do_not_use: Arc<Config>,
|
||||
@@ -433,6 +442,7 @@ impl Session {
|
||||
final_output_json_schema: None,
|
||||
codex_linux_sandbox_exe: config.codex_linux_sandbox_exe.clone(),
|
||||
tool_call_gate: Arc::new(ReadinessFlag::new()),
|
||||
exec_policy: session_configuration.exec_policy.clone(),
|
||||
truncation_policy: TruncationPolicy::new(&per_turn_config),
|
||||
}
|
||||
}
|
||||
@@ -1766,6 +1776,7 @@ async fn spawn_review_thread(
|
||||
final_output_json_schema: None,
|
||||
codex_linux_sandbox_exe: parent_turn_context.codex_linux_sandbox_exe.clone(),
|
||||
tool_call_gate: Arc::new(ReadinessFlag::new()),
|
||||
exec_policy: parent_turn_context.exec_policy.clone(),
|
||||
truncation_policy: TruncationPolicy::new(&per_turn_config),
|
||||
};
|
||||
|
||||
@@ -2573,6 +2584,7 @@ mod tests {
|
||||
cwd: config.cwd.clone(),
|
||||
original_config_do_not_use: Arc::clone(&config),
|
||||
features: Features::default(),
|
||||
exec_policy: Arc::new(codex_execpolicy2::PolicyParser::new().build()),
|
||||
session_source: SessionSource::Exec,
|
||||
};
|
||||
|
||||
@@ -2650,6 +2662,7 @@ mod tests {
|
||||
cwd: config.cwd.clone(),
|
||||
original_config_do_not_use: Arc::clone(&config),
|
||||
features: Features::default(),
|
||||
exec_policy: Arc::new(codex_execpolicy2::PolicyParser::new().build()),
|
||||
session_source: SessionSource::Exec,
|
||||
};
|
||||
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
use codex_protocol::protocol::AskForApproval;
|
||||
use codex_protocol::protocol::SandboxPolicy;
|
||||
|
||||
use crate::sandboxing::SandboxPermissions;
|
||||
|
||||
use crate::bash::parse_shell_lc_plain_commands;
|
||||
use crate::is_safe_command::is_known_safe_command;
|
||||
|
||||
@@ -8,7 +10,7 @@ pub fn requires_initial_appoval(
|
||||
policy: AskForApproval,
|
||||
sandbox_policy: &SandboxPolicy,
|
||||
command: &[String],
|
||||
with_escalated_permissions: bool,
|
||||
sandbox_permissions: SandboxPermissions,
|
||||
) -> bool {
|
||||
if is_known_safe_command(command) {
|
||||
return false;
|
||||
@@ -24,8 +26,7 @@ pub fn requires_initial_appoval(
|
||||
// In restricted sandboxes (ReadOnly/WorkspaceWrite), do not prompt for
|
||||
// non‑escalated, non‑dangerous commands — let the sandbox enforce
|
||||
// restrictions (e.g., block network/write) without a user prompt.
|
||||
let wants_escalation: bool = with_escalated_permissions;
|
||||
if wants_escalation {
|
||||
if sandbox_permissions.requires_escalated_permissions() {
|
||||
return true;
|
||||
}
|
||||
command_might_be_dangerous(command)
|
||||
|
||||
365
codex-rs/core/src/exec_policy.rs
Normal file
365
codex-rs/core/src/exec_policy.rs
Normal file
@@ -0,0 +1,365 @@
|
||||
use std::io::ErrorKind;
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
|
||||
use crate::command_safety::is_dangerous_command::requires_initial_appoval;
|
||||
use codex_execpolicy2::Decision;
|
||||
use codex_execpolicy2::Evaluation;
|
||||
use codex_execpolicy2::Policy;
|
||||
use codex_execpolicy2::PolicyParser;
|
||||
use codex_protocol::protocol::AskForApproval;
|
||||
use codex_protocol::protocol::SandboxPolicy;
|
||||
use thiserror::Error;
|
||||
use tokio::fs;
|
||||
|
||||
use crate::bash::parse_shell_lc_plain_commands;
|
||||
use crate::features::Feature;
|
||||
use crate::features::Features;
|
||||
use crate::sandboxing::SandboxPermissions;
|
||||
use crate::tools::sandboxing::ApprovalRequirement;
|
||||
|
||||
const FORBIDDEN_REASON: &str = "execpolicy forbids this command";
|
||||
const PROMPT_REASON: &str = "execpolicy requires approval for this command";
|
||||
const POLICY_DIR_NAME: &str = "policy";
|
||||
const POLICY_EXTENSION: &str = "codexpolicy";
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
pub enum ExecPolicyError {
|
||||
#[error("failed to read execpolicy files from {dir}: {source}")]
|
||||
ReadDir {
|
||||
dir: PathBuf,
|
||||
source: std::io::Error,
|
||||
},
|
||||
|
||||
#[error("failed to read execpolicy file {path}: {source}")]
|
||||
ReadFile {
|
||||
path: PathBuf,
|
||||
source: std::io::Error,
|
||||
},
|
||||
|
||||
#[error("failed to parse execpolicy file {path}: {source}")]
|
||||
ParsePolicy {
|
||||
path: String,
|
||||
source: codex_execpolicy2::Error,
|
||||
},
|
||||
}
|
||||
|
||||
pub(crate) async fn exec_policy_for(
|
||||
features: &Features,
|
||||
codex_home: &Path,
|
||||
) -> Result<Arc<Policy>, ExecPolicyError> {
|
||||
if !features.enabled(Feature::ExecPolicy) {
|
||||
return Ok(Arc::new(Policy::empty()));
|
||||
}
|
||||
|
||||
let policy_dir = codex_home.join(POLICY_DIR_NAME);
|
||||
let policy_paths = collect_policy_files(&policy_dir).await?;
|
||||
|
||||
let mut parser = PolicyParser::new();
|
||||
for policy_path in &policy_paths {
|
||||
let contents =
|
||||
fs::read_to_string(policy_path)
|
||||
.await
|
||||
.map_err(|source| ExecPolicyError::ReadFile {
|
||||
path: policy_path.clone(),
|
||||
source,
|
||||
})?;
|
||||
let identifier = policy_path.to_string_lossy().to_string();
|
||||
parser
|
||||
.parse(&identifier, &contents)
|
||||
.map_err(|source| ExecPolicyError::ParsePolicy {
|
||||
path: identifier,
|
||||
source,
|
||||
})?;
|
||||
}
|
||||
|
||||
let policy = Arc::new(parser.build());
|
||||
tracing::debug!(
|
||||
"loaded execpolicy from {} files in {}",
|
||||
policy_paths.len(),
|
||||
policy_dir.display()
|
||||
);
|
||||
|
||||
Ok(policy)
|
||||
}
|
||||
|
||||
fn evaluate_with_policy(
|
||||
policy: &Policy,
|
||||
command: &[String],
|
||||
approval_policy: AskForApproval,
|
||||
) -> Option<ApprovalRequirement> {
|
||||
let commands = parse_shell_lc_plain_commands(command).unwrap_or_else(|| vec![command.to_vec()]);
|
||||
let evaluation = policy.check_multiple(commands.iter());
|
||||
|
||||
match evaluation {
|
||||
Evaluation::Match { decision, .. } => match decision {
|
||||
Decision::Forbidden => Some(ApprovalRequirement::Forbidden {
|
||||
reason: FORBIDDEN_REASON.to_string(),
|
||||
}),
|
||||
Decision::Prompt => {
|
||||
let reason = PROMPT_REASON.to_string();
|
||||
if matches!(approval_policy, AskForApproval::Never) {
|
||||
Some(ApprovalRequirement::Forbidden { reason })
|
||||
} else {
|
||||
Some(ApprovalRequirement::NeedsApproval {
|
||||
reason: Some(reason),
|
||||
})
|
||||
}
|
||||
}
|
||||
Decision::Allow => Some(ApprovalRequirement::Skip),
|
||||
},
|
||||
Evaluation::NoMatch { .. } => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn create_approval_requirement_for_command(
|
||||
policy: &Policy,
|
||||
command: &[String],
|
||||
approval_policy: AskForApproval,
|
||||
sandbox_policy: &SandboxPolicy,
|
||||
sandbox_permissions: SandboxPermissions,
|
||||
) -> ApprovalRequirement {
|
||||
if let Some(requirement) = evaluate_with_policy(policy, command, approval_policy) {
|
||||
return requirement;
|
||||
}
|
||||
|
||||
if requires_initial_appoval(
|
||||
approval_policy,
|
||||
sandbox_policy,
|
||||
command,
|
||||
sandbox_permissions,
|
||||
) {
|
||||
ApprovalRequirement::NeedsApproval { reason: None }
|
||||
} else {
|
||||
ApprovalRequirement::Skip
|
||||
}
|
||||
}
|
||||
|
||||
async fn collect_policy_files(dir: &Path) -> Result<Vec<PathBuf>, ExecPolicyError> {
|
||||
let mut read_dir = match fs::read_dir(dir).await {
|
||||
Ok(read_dir) => read_dir,
|
||||
Err(err) if err.kind() == ErrorKind::NotFound => return Ok(Vec::new()),
|
||||
Err(source) => {
|
||||
return Err(ExecPolicyError::ReadDir {
|
||||
dir: dir.to_path_buf(),
|
||||
source,
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
let mut policy_paths = Vec::new();
|
||||
while let Some(entry) =
|
||||
read_dir
|
||||
.next_entry()
|
||||
.await
|
||||
.map_err(|source| ExecPolicyError::ReadDir {
|
||||
dir: dir.to_path_buf(),
|
||||
source,
|
||||
})?
|
||||
{
|
||||
let path = entry.path();
|
||||
let file_type = entry
|
||||
.file_type()
|
||||
.await
|
||||
.map_err(|source| ExecPolicyError::ReadDir {
|
||||
dir: dir.to_path_buf(),
|
||||
source,
|
||||
})?;
|
||||
|
||||
if path
|
||||
.extension()
|
||||
.and_then(|ext| ext.to_str())
|
||||
.is_some_and(|ext| ext == POLICY_EXTENSION)
|
||||
&& file_type.is_file()
|
||||
{
|
||||
policy_paths.push(path);
|
||||
}
|
||||
}
|
||||
|
||||
policy_paths.sort();
|
||||
|
||||
Ok(policy_paths)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::features::Feature;
|
||||
use crate::features::Features;
|
||||
use codex_protocol::protocol::AskForApproval;
|
||||
use codex_protocol::protocol::SandboxPolicy;
|
||||
use pretty_assertions::assert_eq;
|
||||
use std::fs;
|
||||
use tempfile::tempdir;
|
||||
|
||||
#[tokio::test]
|
||||
async fn returns_empty_policy_when_feature_disabled() {
|
||||
let mut features = Features::with_defaults();
|
||||
features.disable(Feature::ExecPolicy);
|
||||
let temp_dir = tempdir().expect("create temp dir");
|
||||
|
||||
let policy = exec_policy_for(&features, temp_dir.path())
|
||||
.await
|
||||
.expect("policy result");
|
||||
|
||||
let commands = [vec!["rm".to_string()]];
|
||||
assert!(matches!(
|
||||
policy.check_multiple(commands.iter()),
|
||||
Evaluation::NoMatch { .. }
|
||||
));
|
||||
assert!(!temp_dir.path().join(POLICY_DIR_NAME).exists());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn collect_policy_files_returns_empty_when_dir_missing() {
|
||||
let temp_dir = tempdir().expect("create temp dir");
|
||||
|
||||
let policy_dir = temp_dir.path().join(POLICY_DIR_NAME);
|
||||
let files = collect_policy_files(&policy_dir)
|
||||
.await
|
||||
.expect("collect policy files");
|
||||
|
||||
assert!(files.is_empty());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn loads_policies_from_policy_subdirectory() {
|
||||
let temp_dir = tempdir().expect("create temp dir");
|
||||
let policy_dir = temp_dir.path().join(POLICY_DIR_NAME);
|
||||
fs::create_dir_all(&policy_dir).expect("create policy dir");
|
||||
fs::write(
|
||||
policy_dir.join("deny.codexpolicy"),
|
||||
r#"prefix_rule(pattern=["rm"], decision="forbidden")"#,
|
||||
)
|
||||
.expect("write policy file");
|
||||
|
||||
let policy = exec_policy_for(&Features::with_defaults(), temp_dir.path())
|
||||
.await
|
||||
.expect("policy result");
|
||||
let command = [vec!["rm".to_string()]];
|
||||
assert!(matches!(
|
||||
policy.check_multiple(command.iter()),
|
||||
Evaluation::Match { .. }
|
||||
));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn ignores_policies_outside_policy_dir() {
|
||||
let temp_dir = tempdir().expect("create temp dir");
|
||||
fs::write(
|
||||
temp_dir.path().join("root.codexpolicy"),
|
||||
r#"prefix_rule(pattern=["ls"], decision="prompt")"#,
|
||||
)
|
||||
.expect("write policy file");
|
||||
|
||||
let policy = exec_policy_for(&Features::with_defaults(), temp_dir.path())
|
||||
.await
|
||||
.expect("policy result");
|
||||
let command = [vec!["ls".to_string()]];
|
||||
assert!(matches!(
|
||||
policy.check_multiple(command.iter()),
|
||||
Evaluation::NoMatch { .. }
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn evaluates_bash_lc_inner_commands() {
|
||||
let policy_src = r#"
|
||||
prefix_rule(pattern=["rm"], decision="forbidden")
|
||||
"#;
|
||||
let mut parser = PolicyParser::new();
|
||||
parser
|
||||
.parse("test.codexpolicy", policy_src)
|
||||
.expect("parse policy");
|
||||
let policy = parser.build();
|
||||
|
||||
let forbidden_script = vec![
|
||||
"bash".to_string(),
|
||||
"-lc".to_string(),
|
||||
"rm -rf /tmp".to_string(),
|
||||
];
|
||||
|
||||
let requirement =
|
||||
evaluate_with_policy(&policy, &forbidden_script, AskForApproval::OnRequest)
|
||||
.expect("expected match for forbidden command");
|
||||
|
||||
assert_eq!(
|
||||
requirement,
|
||||
ApprovalRequirement::Forbidden {
|
||||
reason: FORBIDDEN_REASON.to_string()
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn approval_requirement_prefers_execpolicy_match() {
|
||||
let policy_src = r#"prefix_rule(pattern=["rm"], decision="prompt")"#;
|
||||
let mut parser = PolicyParser::new();
|
||||
parser
|
||||
.parse("test.codexpolicy", policy_src)
|
||||
.expect("parse policy");
|
||||
let policy = parser.build();
|
||||
let command = vec!["rm".to_string()];
|
||||
|
||||
let requirement = create_approval_requirement_for_command(
|
||||
&policy,
|
||||
&command,
|
||||
AskForApproval::OnRequest,
|
||||
&SandboxPolicy::DangerFullAccess,
|
||||
SandboxPermissions::UseDefault,
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
requirement,
|
||||
ApprovalRequirement::NeedsApproval {
|
||||
reason: Some(PROMPT_REASON.to_string())
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn approval_requirement_respects_approval_policy() {
|
||||
let policy_src = r#"prefix_rule(pattern=["rm"], decision="prompt")"#;
|
||||
let mut parser = PolicyParser::new();
|
||||
parser
|
||||
.parse("test.codexpolicy", policy_src)
|
||||
.expect("parse policy");
|
||||
let policy = parser.build();
|
||||
let command = vec!["rm".to_string()];
|
||||
|
||||
let requirement = create_approval_requirement_for_command(
|
||||
&policy,
|
||||
&command,
|
||||
AskForApproval::Never,
|
||||
&SandboxPolicy::DangerFullAccess,
|
||||
SandboxPermissions::UseDefault,
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
requirement,
|
||||
ApprovalRequirement::Forbidden {
|
||||
reason: PROMPT_REASON.to_string()
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn approval_requirement_falls_back_to_heuristics() {
|
||||
let command = vec!["python".to_string()];
|
||||
|
||||
let empty_policy = Policy::empty();
|
||||
let requirement = create_approval_requirement_for_command(
|
||||
&empty_policy,
|
||||
&command,
|
||||
AskForApproval::UnlessTrusted,
|
||||
&SandboxPolicy::ReadOnly,
|
||||
SandboxPermissions::UseDefault,
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
requirement,
|
||||
ApprovalRequirement::NeedsApproval { reason: None }
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -42,6 +42,8 @@ pub enum Feature {
|
||||
ViewImageTool,
|
||||
/// Allow the model to request web searches.
|
||||
WebSearchRequest,
|
||||
/// Gate the execpolicy enforcement for shell/unified exec.
|
||||
ExecPolicy,
|
||||
/// Enable the model-based risk assessments for sandboxed commands.
|
||||
SandboxCommandAssessment,
|
||||
/// Enable Windows sandbox (restricted token) on Windows.
|
||||
@@ -297,6 +299,12 @@ pub const FEATURES: &[FeatureSpec] = &[
|
||||
stage: Stage::Stable,
|
||||
default_enabled: false,
|
||||
},
|
||||
FeatureSpec {
|
||||
id: Feature::ExecPolicy,
|
||||
key: "exec_policy",
|
||||
stage: Stage::Experimental,
|
||||
default_enabled: true,
|
||||
},
|
||||
FeatureSpec {
|
||||
id: Feature::SandboxCommandAssessment,
|
||||
key: "experimental_sandbox_command_assessment",
|
||||
|
||||
@@ -25,6 +25,7 @@ mod environment_context;
|
||||
pub mod error;
|
||||
pub mod exec;
|
||||
pub mod exec_env;
|
||||
mod exec_policy;
|
||||
pub mod features;
|
||||
mod flags;
|
||||
pub mod git_info;
|
||||
|
||||
@@ -26,6 +26,28 @@ use std::collections::HashMap;
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
|
||||
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
|
||||
pub enum SandboxPermissions {
|
||||
UseDefault,
|
||||
RequireEscalated,
|
||||
}
|
||||
|
||||
impl SandboxPermissions {
|
||||
pub fn requires_escalated_permissions(self) -> bool {
|
||||
matches!(self, SandboxPermissions::RequireEscalated)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<bool> for SandboxPermissions {
|
||||
fn from(with_escalated_permissions: bool) -> Self {
|
||||
if with_escalated_permissions {
|
||||
SandboxPermissions::RequireEscalated
|
||||
} else {
|
||||
SandboxPermissions::UseDefault
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct CommandSpec {
|
||||
pub program: String,
|
||||
|
||||
@@ -9,9 +9,11 @@ use crate::apply_patch::convert_apply_patch_to_protocol;
|
||||
use crate::codex::TurnContext;
|
||||
use crate::exec::ExecParams;
|
||||
use crate::exec_env::create_env;
|
||||
use crate::exec_policy::create_approval_requirement_for_command;
|
||||
use crate::function_tool::FunctionCallError;
|
||||
use crate::is_safe_command::is_known_safe_command;
|
||||
use crate::protocol::ExecCommandSource;
|
||||
use crate::sandboxing::SandboxPermissions;
|
||||
use crate::tools::context::ToolInvocation;
|
||||
use crate::tools::context::ToolOutput;
|
||||
use crate::tools::context::ToolPayload;
|
||||
@@ -294,6 +296,13 @@ impl ShellHandler {
|
||||
env: exec_params.env.clone(),
|
||||
with_escalated_permissions: exec_params.with_escalated_permissions,
|
||||
justification: exec_params.justification.clone(),
|
||||
approval_requirement: create_approval_requirement_for_command(
|
||||
&turn.exec_policy,
|
||||
&exec_params.command,
|
||||
turn.approval_policy,
|
||||
&turn.sandbox_policy,
|
||||
SandboxPermissions::from(exec_params.with_escalated_permissions.unwrap_or(false)),
|
||||
),
|
||||
};
|
||||
let mut orchestrator = ToolOrchestrator::new();
|
||||
let mut runtime = ShellRuntime::new();
|
||||
|
||||
@@ -11,11 +11,13 @@ use crate::error::get_error_message_ui;
|
||||
use crate::exec::ExecToolCallOutput;
|
||||
use crate::sandboxing::SandboxManager;
|
||||
use crate::tools::sandboxing::ApprovalCtx;
|
||||
use crate::tools::sandboxing::ApprovalRequirement;
|
||||
use crate::tools::sandboxing::ProvidesSandboxRetryData;
|
||||
use crate::tools::sandboxing::SandboxAttempt;
|
||||
use crate::tools::sandboxing::ToolCtx;
|
||||
use crate::tools::sandboxing::ToolError;
|
||||
use crate::tools::sandboxing::ToolRuntime;
|
||||
use crate::tools::sandboxing::default_approval_requirement;
|
||||
use codex_protocol::protocol::AskForApproval;
|
||||
use codex_protocol::protocol::ReviewDecision;
|
||||
|
||||
@@ -49,40 +51,52 @@ impl ToolOrchestrator {
|
||||
let otel_cfg = codex_otel::otel_event_manager::ToolDecisionSource::Config;
|
||||
|
||||
// 1) Approval
|
||||
let needs_initial_approval =
|
||||
tool.wants_initial_approval(req, approval_policy, &turn_ctx.sandbox_policy);
|
||||
let mut already_approved = false;
|
||||
|
||||
if needs_initial_approval {
|
||||
let mut risk = None;
|
||||
|
||||
if let Some(metadata) = req.sandbox_retry_data() {
|
||||
risk = tool_ctx
|
||||
.session
|
||||
.assess_sandbox_command(turn_ctx, &tool_ctx.call_id, &metadata.command, None)
|
||||
.await;
|
||||
let requirement = tool.approval_requirement(req).unwrap_or_else(|| {
|
||||
default_approval_requirement(approval_policy, &turn_ctx.sandbox_policy)
|
||||
});
|
||||
match requirement {
|
||||
ApprovalRequirement::Skip => {
|
||||
otel.tool_decision(otel_tn, otel_ci, ReviewDecision::Approved, otel_cfg);
|
||||
}
|
||||
ApprovalRequirement::Forbidden { reason } => {
|
||||
return Err(ToolError::Rejected(reason));
|
||||
}
|
||||
ApprovalRequirement::NeedsApproval { reason } => {
|
||||
let mut risk = None;
|
||||
|
||||
let approval_ctx = ApprovalCtx {
|
||||
session: tool_ctx.session,
|
||||
turn: turn_ctx,
|
||||
call_id: &tool_ctx.call_id,
|
||||
retry_reason: None,
|
||||
risk,
|
||||
};
|
||||
let decision = tool.start_approval_async(req, approval_ctx).await;
|
||||
|
||||
otel.tool_decision(otel_tn, otel_ci, decision, otel_user.clone());
|
||||
|
||||
match decision {
|
||||
ReviewDecision::Denied | ReviewDecision::Abort => {
|
||||
return Err(ToolError::Rejected("rejected by user".to_string()));
|
||||
if let Some(metadata) = req.sandbox_retry_data() {
|
||||
risk = tool_ctx
|
||||
.session
|
||||
.assess_sandbox_command(
|
||||
turn_ctx,
|
||||
&tool_ctx.call_id,
|
||||
&metadata.command,
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
ReviewDecision::Approved | ReviewDecision::ApprovedForSession => {}
|
||||
|
||||
let approval_ctx = ApprovalCtx {
|
||||
session: tool_ctx.session,
|
||||
turn: turn_ctx,
|
||||
call_id: &tool_ctx.call_id,
|
||||
retry_reason: reason,
|
||||
risk,
|
||||
};
|
||||
let decision = tool.start_approval_async(req, approval_ctx).await;
|
||||
|
||||
otel.tool_decision(otel_tn, otel_ci, decision, otel_user.clone());
|
||||
|
||||
match decision {
|
||||
ReviewDecision::Denied | ReviewDecision::Abort => {
|
||||
return Err(ToolError::Rejected("rejected by user".to_string()));
|
||||
}
|
||||
ReviewDecision::Approved | ReviewDecision::ApprovedForSession => {}
|
||||
}
|
||||
already_approved = true;
|
||||
}
|
||||
already_approved = true;
|
||||
} else {
|
||||
otel.tool_decision(otel_tn, otel_ci, ReviewDecision::Approved, otel_cfg);
|
||||
}
|
||||
|
||||
// 2) First attempt under the selected sandbox.
|
||||
|
||||
@@ -4,13 +4,12 @@ Runtime: shell
|
||||
Executes shell requests under the orchestrator: asks for approval when needed,
|
||||
builds a CommandSpec, and runs it under the current SandboxAttempt.
|
||||
*/
|
||||
use crate::command_safety::is_dangerous_command::requires_initial_appoval;
|
||||
use crate::exec::ExecToolCallOutput;
|
||||
use crate::protocol::SandboxPolicy;
|
||||
use crate::sandboxing::execute_env;
|
||||
use crate::tools::runtimes::build_command_spec;
|
||||
use crate::tools::sandboxing::Approvable;
|
||||
use crate::tools::sandboxing::ApprovalCtx;
|
||||
use crate::tools::sandboxing::ApprovalRequirement;
|
||||
use crate::tools::sandboxing::ProvidesSandboxRetryData;
|
||||
use crate::tools::sandboxing::SandboxAttempt;
|
||||
use crate::tools::sandboxing::SandboxRetryData;
|
||||
@@ -20,7 +19,6 @@ use crate::tools::sandboxing::ToolCtx;
|
||||
use crate::tools::sandboxing::ToolError;
|
||||
use crate::tools::sandboxing::ToolRuntime;
|
||||
use crate::tools::sandboxing::with_cached_approval;
|
||||
use codex_protocol::protocol::AskForApproval;
|
||||
use codex_protocol::protocol::ReviewDecision;
|
||||
use futures::future::BoxFuture;
|
||||
use std::path::PathBuf;
|
||||
@@ -33,6 +31,7 @@ pub struct ShellRequest {
|
||||
pub env: std::collections::HashMap<String, String>,
|
||||
pub with_escalated_permissions: Option<bool>,
|
||||
pub justification: Option<String>,
|
||||
pub approval_requirement: ApprovalRequirement,
|
||||
}
|
||||
|
||||
impl ProvidesSandboxRetryData for ShellRequest {
|
||||
@@ -114,18 +113,8 @@ impl Approvable<ShellRequest> for ShellRuntime {
|
||||
})
|
||||
}
|
||||
|
||||
fn wants_initial_approval(
|
||||
&self,
|
||||
req: &ShellRequest,
|
||||
policy: AskForApproval,
|
||||
sandbox_policy: &SandboxPolicy,
|
||||
) -> bool {
|
||||
requires_initial_appoval(
|
||||
policy,
|
||||
sandbox_policy,
|
||||
&req.command,
|
||||
req.with_escalated_permissions.unwrap_or(false),
|
||||
)
|
||||
fn approval_requirement(&self, req: &ShellRequest) -> Option<ApprovalRequirement> {
|
||||
Some(req.approval_requirement.clone())
|
||||
}
|
||||
|
||||
fn wants_escalated_first_attempt(&self, req: &ShellRequest) -> bool {
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
use crate::command_safety::is_dangerous_command::requires_initial_appoval;
|
||||
/*
|
||||
Runtime: unified exec
|
||||
|
||||
@@ -10,6 +9,7 @@ use crate::error::SandboxErr;
|
||||
use crate::tools::runtimes::build_command_spec;
|
||||
use crate::tools::sandboxing::Approvable;
|
||||
use crate::tools::sandboxing::ApprovalCtx;
|
||||
use crate::tools::sandboxing::ApprovalRequirement;
|
||||
use crate::tools::sandboxing::ProvidesSandboxRetryData;
|
||||
use crate::tools::sandboxing::SandboxAttempt;
|
||||
use crate::tools::sandboxing::SandboxRetryData;
|
||||
@@ -22,9 +22,7 @@ use crate::tools::sandboxing::with_cached_approval;
|
||||
use crate::unified_exec::UnifiedExecError;
|
||||
use crate::unified_exec::UnifiedExecSession;
|
||||
use crate::unified_exec::UnifiedExecSessionManager;
|
||||
use codex_protocol::protocol::AskForApproval;
|
||||
use codex_protocol::protocol::ReviewDecision;
|
||||
use codex_protocol::protocol::SandboxPolicy;
|
||||
use futures::future::BoxFuture;
|
||||
use std::collections::HashMap;
|
||||
use std::path::PathBuf;
|
||||
@@ -36,6 +34,7 @@ pub struct UnifiedExecRequest {
|
||||
pub env: HashMap<String, String>,
|
||||
pub with_escalated_permissions: Option<bool>,
|
||||
pub justification: Option<String>,
|
||||
pub approval_requirement: ApprovalRequirement,
|
||||
}
|
||||
|
||||
impl ProvidesSandboxRetryData for UnifiedExecRequest {
|
||||
@@ -65,6 +64,7 @@ impl UnifiedExecRequest {
|
||||
env: HashMap<String, String>,
|
||||
with_escalated_permissions: Option<bool>,
|
||||
justification: Option<String>,
|
||||
approval_requirement: ApprovalRequirement,
|
||||
) -> Self {
|
||||
Self {
|
||||
command,
|
||||
@@ -72,6 +72,7 @@ impl UnifiedExecRequest {
|
||||
env,
|
||||
with_escalated_permissions,
|
||||
justification,
|
||||
approval_requirement,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -129,18 +130,8 @@ impl Approvable<UnifiedExecRequest> for UnifiedExecRuntime<'_> {
|
||||
})
|
||||
}
|
||||
|
||||
fn wants_initial_approval(
|
||||
&self,
|
||||
req: &UnifiedExecRequest,
|
||||
policy: AskForApproval,
|
||||
sandbox_policy: &SandboxPolicy,
|
||||
) -> bool {
|
||||
requires_initial_appoval(
|
||||
policy,
|
||||
sandbox_policy,
|
||||
&req.command,
|
||||
req.with_escalated_permissions.unwrap_or(false),
|
||||
)
|
||||
fn approval_requirement(&self, req: &UnifiedExecRequest) -> Option<ApprovalRequirement> {
|
||||
Some(req.approval_requirement.clone())
|
||||
}
|
||||
|
||||
fn wants_escalated_first_attempt(&self, req: &UnifiedExecRequest) -> bool {
|
||||
|
||||
@@ -86,6 +86,37 @@ pub(crate) struct ApprovalCtx<'a> {
|
||||
pub risk: Option<SandboxCommandAssessment>,
|
||||
}
|
||||
|
||||
// Specifies what tool orchestrator should do with a given tool call.
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub(crate) enum ApprovalRequirement {
|
||||
/// No approval required for this tool call
|
||||
Skip,
|
||||
/// Approval required for this tool call
|
||||
NeedsApproval { reason: Option<String> },
|
||||
/// Execution forbidden for this tool call
|
||||
Forbidden { reason: String },
|
||||
}
|
||||
|
||||
/// - Never, OnFailure: do not ask
|
||||
/// - OnRequest: ask unless sandbox policy is DangerFullAccess
|
||||
/// - UnlessTrusted: always ask
|
||||
pub(crate) fn default_approval_requirement(
|
||||
policy: AskForApproval,
|
||||
sandbox_policy: &SandboxPolicy,
|
||||
) -> ApprovalRequirement {
|
||||
let needs_approval = match policy {
|
||||
AskForApproval::Never | AskForApproval::OnFailure => false,
|
||||
AskForApproval::OnRequest => !matches!(sandbox_policy, SandboxPolicy::DangerFullAccess),
|
||||
AskForApproval::UnlessTrusted => true,
|
||||
};
|
||||
|
||||
if needs_approval {
|
||||
ApprovalRequirement::NeedsApproval { reason: None }
|
||||
} else {
|
||||
ApprovalRequirement::Skip
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) trait Approvable<Req> {
|
||||
type ApprovalKey: Hash + Eq + Clone + Debug + Serialize;
|
||||
|
||||
@@ -106,22 +137,11 @@ pub(crate) trait Approvable<Req> {
|
||||
matches!(policy, AskForApproval::Never)
|
||||
}
|
||||
|
||||
/// Decide whether an initial user approval should be requested before the
|
||||
/// first attempt. Defaults to the orchestrator's behavior (pre‑refactor):
|
||||
/// - Never, OnFailure: do not ask
|
||||
/// - OnRequest: ask unless sandbox policy is DangerFullAccess
|
||||
/// - UnlessTrusted: always ask
|
||||
fn wants_initial_approval(
|
||||
&self,
|
||||
_req: &Req,
|
||||
policy: AskForApproval,
|
||||
sandbox_policy: &SandboxPolicy,
|
||||
) -> bool {
|
||||
match policy {
|
||||
AskForApproval::Never | AskForApproval::OnFailure => false,
|
||||
AskForApproval::OnRequest => !matches!(sandbox_policy, SandboxPolicy::DangerFullAccess),
|
||||
AskForApproval::UnlessTrusted => true,
|
||||
}
|
||||
/// Override the default approval requirement. Return `Some(_)` to specify
|
||||
/// a custom requirement, or `None` to fall back to
|
||||
/// policy-based default.
|
||||
fn approval_requirement(&self, _req: &Req) -> Option<ApprovalRequirement> {
|
||||
None
|
||||
}
|
||||
|
||||
/// Decide we can request an approval for no-sandbox execution.
|
||||
|
||||
@@ -11,10 +11,12 @@ use crate::codex::TurnContext;
|
||||
use crate::exec::ExecToolCallOutput;
|
||||
use crate::exec::StreamOutput;
|
||||
use crate::exec_env::create_env;
|
||||
use crate::exec_policy::create_approval_requirement_for_command;
|
||||
use crate::protocol::BackgroundEventEvent;
|
||||
use crate::protocol::EventMsg;
|
||||
use crate::protocol::ExecCommandSource;
|
||||
use crate::sandboxing::ExecEnv;
|
||||
use crate::sandboxing::SandboxPermissions;
|
||||
use crate::tools::events::ToolEmitter;
|
||||
use crate::tools::events::ToolEventCtx;
|
||||
use crate::tools::events::ToolEventFailure;
|
||||
@@ -449,6 +451,13 @@ impl UnifiedExecSessionManager {
|
||||
create_env(&context.turn.shell_environment_policy),
|
||||
with_escalated_permissions,
|
||||
justification,
|
||||
create_approval_requirement_for_command(
|
||||
&context.turn.exec_policy,
|
||||
command,
|
||||
context.turn.approval_policy,
|
||||
&context.turn.sandbox_policy,
|
||||
SandboxPermissions::from(with_escalated_permissions.unwrap_or(false)),
|
||||
),
|
||||
);
|
||||
let tool_ctx = ToolCtx {
|
||||
session: context.session.as_ref(),
|
||||
|
||||
101
codex-rs/core/tests/suite/exec_policy.rs
Normal file
101
codex-rs/core/tests/suite/exec_policy.rs
Normal file
@@ -0,0 +1,101 @@
|
||||
#![allow(clippy::unwrap_used, clippy::expect_used)]
|
||||
|
||||
use anyhow::Result;
|
||||
use codex_core::protocol::AskForApproval;
|
||||
use codex_core::protocol::EventMsg;
|
||||
use codex_core::protocol::Op;
|
||||
use codex_core::protocol::SandboxPolicy;
|
||||
use codex_protocol::config_types::ReasoningSummary;
|
||||
use codex_protocol::user_input::UserInput;
|
||||
use core_test_support::responses::ev_assistant_message;
|
||||
use core_test_support::responses::ev_completed;
|
||||
use core_test_support::responses::ev_function_call;
|
||||
use core_test_support::responses::ev_response_created;
|
||||
use core_test_support::responses::mount_sse_once;
|
||||
use core_test_support::responses::sse;
|
||||
use core_test_support::responses::start_mock_server;
|
||||
use core_test_support::test_codex::test_codex;
|
||||
use core_test_support::wait_for_event;
|
||||
use serde_json::json;
|
||||
use std::fs;
|
||||
|
||||
#[tokio::test]
|
||||
async fn execpolicy_blocks_shell_invocation() -> Result<()> {
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
let policy_path = config.codex_home.join("policy").join("policy.codexpolicy");
|
||||
fs::create_dir_all(
|
||||
policy_path
|
||||
.parent()
|
||||
.expect("policy directory must have a parent"),
|
||||
)
|
||||
.expect("create policy directory");
|
||||
fs::write(
|
||||
&policy_path,
|
||||
r#"prefix_rule(pattern=["echo"], decision="forbidden")"#,
|
||||
)
|
||||
.expect("write policy file");
|
||||
});
|
||||
let server = start_mock_server().await;
|
||||
let test = builder.build(&server).await?;
|
||||
|
||||
let call_id = "shell-forbidden";
|
||||
let args = json!({
|
||||
"command": ["echo", "blocked"],
|
||||
"timeout_ms": 1_000,
|
||||
});
|
||||
|
||||
mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
|
||||
let session_model = test.session_configured.model.clone();
|
||||
test.codex
|
||||
.submit(Op::UserTurn {
|
||||
items: vec![UserInput::Text {
|
||||
text: "run shell command".into(),
|
||||
}],
|
||||
final_output_json_schema: None,
|
||||
cwd: test.cwd_path().to_path_buf(),
|
||||
approval_policy: AskForApproval::Never,
|
||||
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
||||
model: session_model,
|
||||
effort: None,
|
||||
summary: ReasoningSummary::Auto,
|
||||
})
|
||||
.await?;
|
||||
|
||||
let EventMsg::ExecCommandEnd(end) = wait_for_event(&test.codex, |event| {
|
||||
matches!(event, EventMsg::ExecCommandEnd(_))
|
||||
})
|
||||
.await
|
||||
else {
|
||||
unreachable!()
|
||||
};
|
||||
wait_for_event(&test.codex, |event| {
|
||||
matches!(event, EventMsg::TaskComplete(_))
|
||||
})
|
||||
.await;
|
||||
|
||||
assert!(
|
||||
end.aggregated_output
|
||||
.contains("execpolicy forbids this command"),
|
||||
"unexpected output: {}",
|
||||
end.aggregated_output
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -28,6 +28,7 @@ mod compact_remote;
|
||||
mod compact_resume_fork;
|
||||
mod deprecation_notice;
|
||||
mod exec;
|
||||
mod exec_policy;
|
||||
mod fork_conversation;
|
||||
mod grep_files;
|
||||
mod items;
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
- This release covers only the prefix-rule subset of the planned execpolicy v2 language; a richer language will follow.
|
||||
- Tokens are matched in order; any `pattern` element may be a list to denote alternatives. `decision` defaults to `allow`; valid values: `allow`, `prompt`, `forbidden`.
|
||||
- `match` / `not_match` supply example invocations that are validated at load time (think of them as unit tests); examples can be token arrays or strings (strings are tokenized with `shlex`).
|
||||
- The CLI always prints the JSON serialization of the evaluation result (whether a match or not).
|
||||
- The CLI always prints the JSON serialization of the evaluation result.
|
||||
|
||||
## Policy shapes
|
||||
- Prefix rules use Starlark syntax:
|
||||
@@ -18,6 +18,24 @@ prefix_rule(
|
||||
)
|
||||
```
|
||||
|
||||
## CLI
|
||||
- From the Codex CLI, run `codex execpolicycheck` with one or more policy files (for example `src/default.codexpolicy`) to check a command:
|
||||
```bash
|
||||
codex execpolicycheck --policy path/to/policy.codexpolicy git status
|
||||
```
|
||||
- Pass multiple `--policy` flags to merge rules, evaluated in the order provided:
|
||||
```bash
|
||||
codex execpolicycheck --policy base.codexpolicy --policy overrides.codexpolicy git status
|
||||
```
|
||||
- Output is JSON by default; pass `--pretty` for pretty-printed JSON
|
||||
- You can also run the standalone dev binary directly during development:
|
||||
```bash
|
||||
cargo run -p codex-execpolicy2 -- check --policy path/to/policy.codexpolicy git status
|
||||
```
|
||||
- Example outcomes:
|
||||
- Match: `{"match": { ... "decision": "allow" ... }}`
|
||||
- No match: `{"noMatch": {}}`
|
||||
|
||||
## Response shapes
|
||||
- Match:
|
||||
```json
|
||||
@@ -38,22 +56,8 @@ prefix_rule(
|
||||
|
||||
- No match:
|
||||
```json
|
||||
"noMatch"
|
||||
{"noMatch": {}}
|
||||
```
|
||||
|
||||
- `matchedRules` lists every rule whose prefix matched the command; `matchedPrefix` is the exact prefix that matched.
|
||||
- The effective `decision` is the strictest severity across all matches (`forbidden` > `prompt` > `allow`).
|
||||
|
||||
## CLI
|
||||
- Provide one or more policy files (for example `src/default.codexpolicy`) to check a command:
|
||||
```bash
|
||||
cargo run -p codex-execpolicy2 -- check --policy path/to/policy.codexpolicy git status
|
||||
```
|
||||
- Pass multiple `--policy` flags to merge rules, evaluated in the order provided:
|
||||
```bash
|
||||
cargo run -p codex-execpolicy2 -- check --policy base.codexpolicy --policy overrides.codexpolicy git status
|
||||
```
|
||||
- Output is newline-delimited JSON by default; pass `--pretty` for pretty-printed JSON if desired.
|
||||
- Example outcomes:
|
||||
- Match: `{"match": { ... "decision": "allow" ... }}`
|
||||
- No match: `"noMatch"`
|
||||
|
||||
64
codex-rs/execpolicy2/src/execpolicycheck.rs
Normal file
64
codex-rs/execpolicy2/src/execpolicycheck.rs
Normal file
@@ -0,0 +1,64 @@
|
||||
use std::fs;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use anyhow::Context;
|
||||
use anyhow::Result;
|
||||
use clap::Parser;
|
||||
|
||||
use crate::Evaluation;
|
||||
use crate::Policy;
|
||||
use crate::PolicyParser;
|
||||
|
||||
/// Arguments for evaluating a command against one or more execpolicy files.
|
||||
#[derive(Debug, Parser, Clone)]
|
||||
pub struct ExecPolicyCheckCommand {
|
||||
/// Paths to execpolicy files to evaluate (repeatable).
|
||||
#[arg(short, long = "policy", value_name = "PATH", required = true)]
|
||||
pub policies: Vec<PathBuf>,
|
||||
|
||||
/// Pretty-print the JSON output.
|
||||
#[arg(long)]
|
||||
pub pretty: bool,
|
||||
|
||||
/// Command tokens to check against the policy.
|
||||
#[arg(
|
||||
value_name = "COMMAND",
|
||||
required = true,
|
||||
trailing_var_arg = true,
|
||||
allow_hyphen_values = true
|
||||
)]
|
||||
pub command: Vec<String>,
|
||||
}
|
||||
|
||||
impl ExecPolicyCheckCommand {
|
||||
/// Load the policies for this command, evaluate the command, and render JSON output.
|
||||
pub fn run(&self) -> Result<String> {
|
||||
let policy = load_policies(&self.policies)?;
|
||||
let evaluation = policy.check(&self.command);
|
||||
|
||||
format_evaluation_json(&evaluation, self.pretty)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn format_evaluation_json(evaluation: &Evaluation, pretty: bool) -> Result<String> {
|
||||
if pretty {
|
||||
serde_json::to_string_pretty(evaluation).map_err(Into::into)
|
||||
} else {
|
||||
serde_json::to_string(evaluation).map_err(Into::into)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn load_policies(policy_paths: &[PathBuf]) -> Result<Policy> {
|
||||
let mut parser = PolicyParser::new();
|
||||
|
||||
for policy_path in policy_paths {
|
||||
let policy_file_contents = fs::read_to_string(policy_path)
|
||||
.with_context(|| format!("failed to read policy at {}", policy_path.display()))?;
|
||||
let policy_identifier = policy_path.to_string_lossy().to_string();
|
||||
parser
|
||||
.parse(&policy_identifier, &policy_file_contents)
|
||||
.with_context(|| format!("failed to parse policy at {}", policy_path.display()))?;
|
||||
}
|
||||
|
||||
Ok(parser.build())
|
||||
}
|
||||
@@ -1,5 +1,6 @@
|
||||
pub mod decision;
|
||||
pub mod error;
|
||||
pub mod execpolicycheck;
|
||||
pub mod parser;
|
||||
pub mod policy;
|
||||
pub mod rule;
|
||||
@@ -7,6 +8,7 @@ pub mod rule;
|
||||
pub use decision::Decision;
|
||||
pub use error::Error;
|
||||
pub use error::Result;
|
||||
pub use execpolicycheck::ExecPolicyCheckCommand;
|
||||
pub use parser::PolicyParser;
|
||||
pub use policy::Evaluation;
|
||||
pub use policy::Policy;
|
||||
|
||||
@@ -1,66 +1,24 @@
|
||||
use std::fs;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use anyhow::Context;
|
||||
use anyhow::Result;
|
||||
use clap::Parser;
|
||||
use codex_execpolicy2::PolicyParser;
|
||||
use codex_execpolicy2::ExecPolicyCheckCommand;
|
||||
|
||||
/// CLI for evaluating exec policies
|
||||
#[derive(Parser)]
|
||||
#[command(name = "codex-execpolicy2")]
|
||||
enum Cli {
|
||||
/// Evaluate a command against a policy.
|
||||
Check {
|
||||
#[arg(short, long = "policy", value_name = "PATH", required = true)]
|
||||
policies: Vec<PathBuf>,
|
||||
|
||||
/// Pretty-print the JSON output.
|
||||
#[arg(long)]
|
||||
pretty: bool,
|
||||
|
||||
/// Command tokens to check.
|
||||
#[arg(
|
||||
value_name = "COMMAND",
|
||||
required = true,
|
||||
trailing_var_arg = true,
|
||||
allow_hyphen_values = true
|
||||
)]
|
||||
command: Vec<String>,
|
||||
},
|
||||
Check(ExecPolicyCheckCommand),
|
||||
}
|
||||
|
||||
fn main() -> Result<()> {
|
||||
let cli = Cli::parse();
|
||||
match cli {
|
||||
Cli::Check {
|
||||
policies,
|
||||
command,
|
||||
pretty,
|
||||
} => cmd_check(policies, command, pretty),
|
||||
Cli::Check(cmd) => cmd_check(cmd),
|
||||
}
|
||||
}
|
||||
|
||||
fn cmd_check(policy_paths: Vec<PathBuf>, args: Vec<String>, pretty: bool) -> Result<()> {
|
||||
let policy = load_policies(&policy_paths)?;
|
||||
|
||||
let eval = policy.check(&args);
|
||||
let json = if pretty {
|
||||
serde_json::to_string_pretty(&eval)?
|
||||
} else {
|
||||
serde_json::to_string(&eval)?
|
||||
};
|
||||
fn cmd_check(cmd: ExecPolicyCheckCommand) -> Result<()> {
|
||||
let json = cmd.run()?;
|
||||
println!("{json}");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn load_policies(policy_paths: &[PathBuf]) -> Result<codex_execpolicy2::Policy> {
|
||||
let mut parser = PolicyParser::new();
|
||||
for policy_path in policy_paths {
|
||||
let policy_file_contents = fs::read_to_string(policy_path)
|
||||
.with_context(|| format!("failed to read policy at {}", policy_path.display()))?;
|
||||
let policy_identifier = policy_path.to_string_lossy().to_string();
|
||||
parser.parse(&policy_identifier, &policy_file_contents)?;
|
||||
}
|
||||
Ok(parser.build())
|
||||
}
|
||||
|
||||
@@ -15,6 +15,10 @@ impl Policy {
|
||||
Self { rules_by_program }
|
||||
}
|
||||
|
||||
pub fn empty() -> Self {
|
||||
Self::new(MultiMap::new())
|
||||
}
|
||||
|
||||
pub fn rules(&self) -> &MultiMap<String, RuleRef> {
|
||||
&self.rules_by_program
|
||||
}
|
||||
@@ -23,9 +27,9 @@ impl Policy {
|
||||
let rules = match cmd.first() {
|
||||
Some(first) => match self.rules_by_program.get_vec(first) {
|
||||
Some(rules) => rules,
|
||||
None => return Evaluation::NoMatch,
|
||||
None => return Evaluation::NoMatch {},
|
||||
},
|
||||
None => return Evaluation::NoMatch,
|
||||
None => return Evaluation::NoMatch {},
|
||||
};
|
||||
|
||||
let matched_rules: Vec<RuleMatch> =
|
||||
@@ -35,7 +39,7 @@ impl Policy {
|
||||
decision,
|
||||
matched_rules,
|
||||
},
|
||||
None => Evaluation::NoMatch,
|
||||
None => Evaluation::NoMatch {},
|
||||
}
|
||||
}
|
||||
|
||||
@@ -48,7 +52,7 @@ impl Policy {
|
||||
.into_iter()
|
||||
.flat_map(|command| match self.check(command.as_ref()) {
|
||||
Evaluation::Match { matched_rules, .. } => matched_rules,
|
||||
Evaluation::NoMatch => Vec::new(),
|
||||
Evaluation::NoMatch { .. } => Vec::new(),
|
||||
})
|
||||
.collect();
|
||||
|
||||
@@ -57,7 +61,7 @@ impl Policy {
|
||||
decision,
|
||||
matched_rules,
|
||||
},
|
||||
None => Evaluation::NoMatch,
|
||||
None => Evaluation::NoMatch {},
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -65,7 +69,7 @@ impl Policy {
|
||||
#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub enum Evaluation {
|
||||
NoMatch,
|
||||
NoMatch {},
|
||||
Match {
|
||||
decision: Decision,
|
||||
#[serde(rename = "matchedRules")]
|
||||
|
||||
@@ -10,6 +10,7 @@ use codex_execpolicy2::rule::PatternToken;
|
||||
use codex_execpolicy2::rule::PrefixPattern;
|
||||
use codex_execpolicy2::rule::PrefixRule;
|
||||
use pretty_assertions::assert_eq;
|
||||
use serde_json::json;
|
||||
|
||||
fn tokens(cmd: &[&str]) -> Vec<String> {
|
||||
cmd.iter().map(std::string::ToString::to_string).collect()
|
||||
@@ -60,6 +61,14 @@ prefix_rule(
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn serializes_no_match_as_object() {
|
||||
let serialized =
|
||||
serde_json::to_value(&Evaluation::NoMatch {}).expect("should serialize evaluation");
|
||||
|
||||
assert_eq!(json!({"noMatch": {}}), serialized);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parses_multiple_policy_files() {
|
||||
let first_policy = r#"
|
||||
@@ -288,7 +297,7 @@ prefix_rule(
|
||||
"color.status=always",
|
||||
"status",
|
||||
]));
|
||||
assert_eq!(Evaluation::NoMatch, no_match_eval);
|
||||
assert_eq!(Evaluation::NoMatch {}, no_match_eval);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
Reference in New Issue
Block a user