mirror of
https://github.com/openai/codex.git
synced 2026-02-01 22:47:52 +00:00
Compare commits
3 Commits
main
...
dev/zhao/t
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e35cbb7acf | ||
|
|
2c87ca8c68 | ||
|
|
3aa9c0886f |
1
codex-rs/Cargo.lock
generated
1
codex-rs/Cargo.lock
generated
@@ -1085,6 +1085,7 @@ dependencies = [
|
||||
"codex-apply-patch",
|
||||
"codex-arg0",
|
||||
"codex-async-utils",
|
||||
"codex-execpolicy2",
|
||||
"codex-file-search",
|
||||
"codex-git",
|
||||
"codex-keyring-store",
|
||||
|
||||
@@ -66,6 +66,7 @@ codex-chatgpt = { path = "chatgpt" }
|
||||
codex-common = { path = "common" }
|
||||
codex-core = { path = "core" }
|
||||
codex-exec = { path = "exec" }
|
||||
codex-execpolicy2 = { path = "execpolicy2" }
|
||||
codex-feedback = { path = "feedback" }
|
||||
codex-file-search = { path = "file-search" }
|
||||
codex-git = { path = "utils/git" }
|
||||
|
||||
@@ -22,6 +22,7 @@ chrono = { workspace = true, features = ["serde"] }
|
||||
codex-app-server-protocol = { workspace = true }
|
||||
codex-apply-patch = { workspace = true }
|
||||
codex-async-utils = { workspace = true }
|
||||
codex-execpolicy2 = { workspace = true }
|
||||
codex-file-search = { workspace = true }
|
||||
codex-git = { workspace = true }
|
||||
codex-keyring-store = { workspace = true }
|
||||
|
||||
@@ -120,6 +120,7 @@ use crate::user_instructions::UserInstructions;
|
||||
use crate::user_notification::UserNotification;
|
||||
use crate::util::backoff;
|
||||
use codex_async_utils::OrCancelExt;
|
||||
use codex_execpolicy2::Policy as ExecPolicyV2;
|
||||
use codex_otel::otel_event_manager::OtelEventManager;
|
||||
use codex_protocol::config_types::ReasoningEffort as ReasoningEffortConfig;
|
||||
use codex_protocol::config_types::ReasoningSummary as ReasoningSummaryConfig;
|
||||
@@ -165,6 +166,10 @@ impl Codex {
|
||||
|
||||
let user_instructions = get_user_instructions(&config).await;
|
||||
|
||||
let exec_policy_v2 =
|
||||
crate::exec_policy::exec_policy_for(&config.features, &config.codex_home)
|
||||
.map_err(|err| CodexErr::Fatal(format!("failed to load execpolicy2: {err}")))?;
|
||||
|
||||
let config = Arc::new(config);
|
||||
|
||||
let session_configuration = SessionConfiguration {
|
||||
@@ -181,6 +186,7 @@ impl Codex {
|
||||
cwd: config.cwd.clone(),
|
||||
original_config_do_not_use: Arc::clone(&config),
|
||||
features: config.features.clone(),
|
||||
exec_policy_v2,
|
||||
session_source,
|
||||
};
|
||||
|
||||
@@ -278,6 +284,7 @@ pub(crate) struct TurnContext {
|
||||
pub(crate) final_output_json_schema: Option<Value>,
|
||||
pub(crate) codex_linux_sandbox_exe: Option<PathBuf>,
|
||||
pub(crate) tool_call_gate: Arc<ReadinessFlag>,
|
||||
pub(crate) exec_policy_v2: Option<Arc<ExecPolicyV2>>,
|
||||
}
|
||||
|
||||
impl TurnContext {
|
||||
@@ -334,6 +341,8 @@ pub(crate) struct SessionConfiguration {
|
||||
|
||||
/// Set of feature flags for this session
|
||||
features: Features,
|
||||
/// Optional execpolicy2 policy, applied only when enabled by feature flag.
|
||||
exec_policy_v2: Option<Arc<ExecPolicyV2>>,
|
||||
|
||||
// TODO(pakrym): Remove config from here
|
||||
original_config_do_not_use: Arc<Config>,
|
||||
@@ -434,6 +443,7 @@ impl Session {
|
||||
final_output_json_schema: None,
|
||||
codex_linux_sandbox_exe: config.codex_linux_sandbox_exe.clone(),
|
||||
tool_call_gate: Arc::new(ReadinessFlag::new()),
|
||||
exec_policy_v2: session_configuration.exec_policy_v2.clone(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1758,6 +1768,7 @@ async fn spawn_review_thread(
|
||||
final_output_json_schema: None,
|
||||
codex_linux_sandbox_exe: parent_turn_context.codex_linux_sandbox_exe.clone(),
|
||||
tool_call_gate: Arc::new(ReadinessFlag::new()),
|
||||
exec_policy_v2: parent_turn_context.exec_policy_v2.clone(),
|
||||
};
|
||||
|
||||
// Seed the child task with the review prompt as the initial user message.
|
||||
@@ -2546,6 +2557,7 @@ mod tests {
|
||||
cwd: config.cwd.clone(),
|
||||
original_config_do_not_use: Arc::clone(&config),
|
||||
features: Features::default(),
|
||||
exec_policy_v2: None,
|
||||
session_source: SessionSource::Exec,
|
||||
};
|
||||
|
||||
@@ -2623,6 +2635,7 @@ mod tests {
|
||||
cwd: config.cwd.clone(),
|
||||
original_config_do_not_use: Arc::clone(&config),
|
||||
features: Features::default(),
|
||||
exec_policy_v2: None,
|
||||
session_source: SessionSource::Exec,
|
||||
};
|
||||
|
||||
|
||||
293
codex-rs/core/src/exec_policy.rs
Normal file
293
codex-rs/core/src/exec_policy.rs
Normal file
@@ -0,0 +1,293 @@
|
||||
use std::fs;
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
|
||||
use crate::command_safety::is_dangerous_command::requires_initial_appoval;
|
||||
use codex_execpolicy2::Decision;
|
||||
use codex_execpolicy2::Evaluation;
|
||||
use codex_execpolicy2::Policy;
|
||||
use codex_execpolicy2::PolicyParser;
|
||||
use codex_protocol::protocol::AskForApproval;
|
||||
use codex_protocol::protocol::SandboxPolicy;
|
||||
use thiserror::Error;
|
||||
|
||||
use crate::bash::parse_shell_lc_plain_commands;
|
||||
use crate::features::Feature;
|
||||
use crate::features::Features;
|
||||
use crate::tools::sandboxing::ApprovalRequirement;
|
||||
|
||||
const FORBIDDEN_REASON: &str = "execpolicy forbids this command";
|
||||
const PROMPT_REASON: &str = "execpolicy requires approval for this command";
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
pub enum ExecPolicyError {
|
||||
#[error("failed to read execpolicy files from {dir}: {source}")]
|
||||
ReadDir {
|
||||
dir: PathBuf,
|
||||
source: std::io::Error,
|
||||
},
|
||||
|
||||
#[error("failed to read execpolicy file {path}: {source}")]
|
||||
ReadFile {
|
||||
path: PathBuf,
|
||||
source: std::io::Error,
|
||||
},
|
||||
|
||||
#[error("failed to parse execpolicy file {path}: {source}")]
|
||||
ParsePolicy {
|
||||
path: String,
|
||||
source: codex_execpolicy2::Error,
|
||||
},
|
||||
}
|
||||
|
||||
pub(crate) fn exec_policy_for(
|
||||
features: &Features,
|
||||
codex_home: &Path,
|
||||
) -> Result<Option<Arc<Policy>>, ExecPolicyError> {
|
||||
if !features.enabled(Feature::ExecPolicyV2) {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
let policy_dir = codex_home.to_path_buf();
|
||||
let entries = match fs::read_dir(&policy_dir) {
|
||||
Ok(entries) => entries,
|
||||
Err(source) if source.kind() == std::io::ErrorKind::NotFound => return Ok(None),
|
||||
Err(source) => {
|
||||
return Err(ExecPolicyError::ReadDir {
|
||||
dir: policy_dir,
|
||||
source,
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
let mut policy_paths: Vec<PathBuf> = Vec::new();
|
||||
for entry in entries {
|
||||
let entry = entry.map_err(|source| ExecPolicyError::ReadDir {
|
||||
dir: policy_dir.clone(),
|
||||
source,
|
||||
})?;
|
||||
let path = entry.path();
|
||||
if path
|
||||
.extension()
|
||||
.and_then(|ext| ext.to_str())
|
||||
.is_some_and(|ext| ext == "codexpolicy")
|
||||
&& path.is_file()
|
||||
{
|
||||
policy_paths.push(path);
|
||||
}
|
||||
}
|
||||
|
||||
policy_paths.sort();
|
||||
|
||||
let mut parser = PolicyParser::new();
|
||||
for policy_path in &policy_paths {
|
||||
let contents =
|
||||
fs::read_to_string(policy_path).map_err(|source| ExecPolicyError::ReadFile {
|
||||
path: policy_path.clone(),
|
||||
source,
|
||||
})?;
|
||||
let identifier = policy_path.to_string_lossy().to_string();
|
||||
parser
|
||||
.parse(&identifier, &contents)
|
||||
.map_err(|source| ExecPolicyError::ParsePolicy {
|
||||
path: identifier,
|
||||
source,
|
||||
})?;
|
||||
}
|
||||
|
||||
let policy = Arc::new(parser.build());
|
||||
tracing::debug!(
|
||||
file_count = policy_paths.len(),
|
||||
"loaded execpolicy2 from {}",
|
||||
policy_dir.display()
|
||||
);
|
||||
|
||||
Ok(Some(policy))
|
||||
}
|
||||
|
||||
fn evaluate_with_policy(
|
||||
policy: &Policy,
|
||||
command: &[String],
|
||||
approval_policy: AskForApproval,
|
||||
) -> Option<ApprovalRequirement> {
|
||||
let commands = parse_shell_lc_plain_commands(command).unwrap_or_else(|| vec![command.to_vec()]);
|
||||
let evaluation = policy.check_multiple(commands.iter());
|
||||
|
||||
match evaluation {
|
||||
Evaluation::Match { decision, .. } => match decision {
|
||||
Decision::Forbidden => Some(ApprovalRequirement::Forbidden {
|
||||
reason: FORBIDDEN_REASON.to_string(),
|
||||
}),
|
||||
Decision::Prompt => {
|
||||
let reason = PROMPT_REASON.to_string();
|
||||
if matches!(approval_policy, AskForApproval::Never) {
|
||||
Some(ApprovalRequirement::Forbidden { reason })
|
||||
} else {
|
||||
Some(ApprovalRequirement::NeedsApproval {
|
||||
reason: Some(reason),
|
||||
})
|
||||
}
|
||||
}
|
||||
Decision::Allow => Some(ApprovalRequirement::Skip),
|
||||
},
|
||||
Evaluation::NoMatch => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn approval_requirement_for_command(
|
||||
policy: Option<&Policy>,
|
||||
command: &[String],
|
||||
approval_policy: AskForApproval,
|
||||
sandbox_policy: &SandboxPolicy,
|
||||
with_escalated_permissions: bool,
|
||||
) -> ApprovalRequirement {
|
||||
if let Some(policy) = policy
|
||||
&& let Some(requirement) = evaluate_with_policy(policy, command, approval_policy)
|
||||
{
|
||||
return requirement;
|
||||
}
|
||||
|
||||
if requires_initial_appoval(
|
||||
approval_policy,
|
||||
sandbox_policy,
|
||||
command,
|
||||
with_escalated_permissions,
|
||||
) {
|
||||
ApprovalRequirement::NeedsApproval { reason: None }
|
||||
} else {
|
||||
ApprovalRequirement::Skip
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::features::Feature;
|
||||
use crate::features::Features;
|
||||
use codex_protocol::protocol::AskForApproval;
|
||||
use codex_protocol::protocol::SandboxPolicy;
|
||||
use pretty_assertions::assert_eq;
|
||||
use tempfile::tempdir;
|
||||
|
||||
#[test]
|
||||
fn returns_none_when_feature_disabled() {
|
||||
let features = Features::with_defaults();
|
||||
let temp_dir = tempdir().expect("create temp dir");
|
||||
|
||||
let policy = exec_policy_for(&features, temp_dir.path()).expect("policy result");
|
||||
|
||||
assert!(policy.is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn returns_none_when_policy_dir_is_missing() {
|
||||
let mut features = Features::with_defaults();
|
||||
features.enable(Feature::ExecPolicyV2);
|
||||
let temp_dir = tempdir().expect("create temp dir");
|
||||
let missing_dir = temp_dir.path().join("missing");
|
||||
|
||||
let policy = exec_policy_for(&features, &missing_dir).expect("policy result");
|
||||
|
||||
assert!(policy.is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn evaluates_bash_lc_inner_commands() {
|
||||
let policy_src = r#"
|
||||
prefix_rule(pattern=["rm"], decision="forbidden")
|
||||
"#;
|
||||
let mut parser = PolicyParser::new();
|
||||
parser
|
||||
.parse("test.codexpolicy", policy_src)
|
||||
.expect("parse policy");
|
||||
let policy = parser.build();
|
||||
|
||||
let forbidden_script = vec![
|
||||
"bash".to_string(),
|
||||
"-lc".to_string(),
|
||||
"rm -rf /tmp".to_string(),
|
||||
];
|
||||
|
||||
let requirement =
|
||||
evaluate_with_policy(&policy, &forbidden_script, AskForApproval::OnRequest)
|
||||
.expect("expected match for forbidden command");
|
||||
|
||||
assert_eq!(
|
||||
requirement,
|
||||
ApprovalRequirement::Forbidden {
|
||||
reason: FORBIDDEN_REASON.to_string()
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn approval_requirement_prefers_execpolicy_match() {
|
||||
let policy_src = r#"prefix_rule(pattern=["rm"], decision="prompt")"#;
|
||||
let mut parser = PolicyParser::new();
|
||||
parser
|
||||
.parse("test.codexpolicy", policy_src)
|
||||
.expect("parse policy");
|
||||
let policy = parser.build();
|
||||
let command = vec!["rm".to_string()];
|
||||
|
||||
let requirement = approval_requirement_for_command(
|
||||
Some(&policy),
|
||||
&command,
|
||||
AskForApproval::OnRequest,
|
||||
&SandboxPolicy::DangerFullAccess,
|
||||
false,
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
requirement,
|
||||
ApprovalRequirement::NeedsApproval {
|
||||
reason: Some(PROMPT_REASON.to_string())
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn approval_requirement_respects_approval_policy() {
|
||||
let policy_src = r#"prefix_rule(pattern=["rm"], decision="prompt")"#;
|
||||
let mut parser = PolicyParser::new();
|
||||
parser
|
||||
.parse("test.codexpolicy", policy_src)
|
||||
.expect("parse policy");
|
||||
let policy = parser.build();
|
||||
let command = vec!["rm".to_string()];
|
||||
|
||||
let requirement = approval_requirement_for_command(
|
||||
Some(&policy),
|
||||
&command,
|
||||
AskForApproval::Never,
|
||||
&SandboxPolicy::DangerFullAccess,
|
||||
false,
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
requirement,
|
||||
ApprovalRequirement::Forbidden {
|
||||
reason: PROMPT_REASON.to_string()
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn approval_requirement_falls_back_to_heuristics() {
|
||||
let command = vec!["python".to_string()];
|
||||
|
||||
let requirement = approval_requirement_for_command(
|
||||
None,
|
||||
&command,
|
||||
AskForApproval::UnlessTrusted,
|
||||
&SandboxPolicy::ReadOnly,
|
||||
false,
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
requirement,
|
||||
ApprovalRequirement::NeedsApproval { reason: None }
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -40,6 +40,8 @@ pub enum Feature {
|
||||
ViewImageTool,
|
||||
/// Allow the model to request web searches.
|
||||
WebSearchRequest,
|
||||
/// Gate the execpolicy2 enforcement for shell/unified exec.
|
||||
ExecPolicyV2,
|
||||
/// Enable the model-based risk assessments for sandboxed commands.
|
||||
SandboxCommandAssessment,
|
||||
/// Create a ghost commit at each turn.
|
||||
@@ -285,6 +287,12 @@ pub const FEATURES: &[FeatureSpec] = &[
|
||||
stage: Stage::Stable,
|
||||
default_enabled: false,
|
||||
},
|
||||
FeatureSpec {
|
||||
id: Feature::ExecPolicyV2,
|
||||
key: "exec_policy_v2",
|
||||
stage: Stage::Experimental,
|
||||
default_enabled: false,
|
||||
},
|
||||
FeatureSpec {
|
||||
id: Feature::SandboxCommandAssessment,
|
||||
key: "experimental_sandbox_command_assessment",
|
||||
|
||||
@@ -24,6 +24,7 @@ mod environment_context;
|
||||
pub mod error;
|
||||
pub mod exec;
|
||||
pub mod exec_env;
|
||||
mod exec_policy;
|
||||
pub mod features;
|
||||
mod flags;
|
||||
pub mod git_info;
|
||||
|
||||
@@ -9,6 +9,7 @@ use crate::apply_patch::convert_apply_patch_to_protocol;
|
||||
use crate::codex::TurnContext;
|
||||
use crate::exec::ExecParams;
|
||||
use crate::exec_env::create_env;
|
||||
use crate::exec_policy::approval_requirement_for_command;
|
||||
use crate::function_tool::FunctionCallError;
|
||||
use crate::is_safe_command::is_known_safe_command;
|
||||
use crate::protocol::ExecCommandSource;
|
||||
@@ -24,6 +25,7 @@ use crate::tools::runtimes::apply_patch::ApplyPatchRequest;
|
||||
use crate::tools::runtimes::apply_patch::ApplyPatchRuntime;
|
||||
use crate::tools::runtimes::shell::ShellRequest;
|
||||
use crate::tools::runtimes::shell::ShellRuntime;
|
||||
use crate::tools::sandboxing::ApprovalRequirement;
|
||||
use crate::tools::sandboxing::ToolCtx;
|
||||
|
||||
pub struct ShellHandler;
|
||||
@@ -303,6 +305,17 @@ impl ShellHandler {
|
||||
env: exec_params.env.clone(),
|
||||
with_escalated_permissions: exec_params.with_escalated_permissions,
|
||||
justification: exec_params.justification.clone(),
|
||||
approval_requirement: if is_user_shell_command {
|
||||
ApprovalRequirement::Skip
|
||||
} else {
|
||||
approval_requirement_for_command(
|
||||
turn.exec_policy_v2.as_deref(),
|
||||
&exec_params.command,
|
||||
turn.approval_policy,
|
||||
&turn.sandbox_policy,
|
||||
exec_params.with_escalated_permissions.unwrap_or(false),
|
||||
)
|
||||
},
|
||||
};
|
||||
let mut orchestrator = ToolOrchestrator::new();
|
||||
let mut runtime = ShellRuntime::new();
|
||||
|
||||
@@ -11,11 +11,13 @@ use crate::error::get_error_message_ui;
|
||||
use crate::exec::ExecToolCallOutput;
|
||||
use crate::sandboxing::SandboxManager;
|
||||
use crate::tools::sandboxing::ApprovalCtx;
|
||||
use crate::tools::sandboxing::ApprovalRequirement;
|
||||
use crate::tools::sandboxing::ProvidesSandboxRetryData;
|
||||
use crate::tools::sandboxing::SandboxAttempt;
|
||||
use crate::tools::sandboxing::ToolCtx;
|
||||
use crate::tools::sandboxing::ToolError;
|
||||
use crate::tools::sandboxing::ToolRuntime;
|
||||
use crate::tools::sandboxing::default_approval_requirement;
|
||||
use codex_protocol::protocol::AskForApproval;
|
||||
use codex_protocol::protocol::ReviewDecision;
|
||||
|
||||
@@ -49,40 +51,52 @@ impl ToolOrchestrator {
|
||||
let otel_cfg = codex_otel::otel_event_manager::ToolDecisionSource::Config;
|
||||
|
||||
// 1) Approval
|
||||
let needs_initial_approval =
|
||||
tool.wants_initial_approval(req, approval_policy, &turn_ctx.sandbox_policy);
|
||||
let mut already_approved = false;
|
||||
|
||||
if needs_initial_approval {
|
||||
let mut risk = None;
|
||||
|
||||
if let Some(metadata) = req.sandbox_retry_data() {
|
||||
risk = tool_ctx
|
||||
.session
|
||||
.assess_sandbox_command(turn_ctx, &tool_ctx.call_id, &metadata.command, None)
|
||||
.await;
|
||||
let requirement = tool.approval_requirement(req).unwrap_or_else(|| {
|
||||
default_approval_requirement(approval_policy, &turn_ctx.sandbox_policy)
|
||||
});
|
||||
match requirement {
|
||||
ApprovalRequirement::Skip => {
|
||||
otel.tool_decision(otel_tn, otel_ci, ReviewDecision::Approved, otel_cfg);
|
||||
}
|
||||
ApprovalRequirement::Forbidden { reason } => {
|
||||
return Err(ToolError::Rejected(reason));
|
||||
}
|
||||
ApprovalRequirement::NeedsApproval { reason } => {
|
||||
let mut risk = None;
|
||||
|
||||
let approval_ctx = ApprovalCtx {
|
||||
session: tool_ctx.session,
|
||||
turn: turn_ctx,
|
||||
call_id: &tool_ctx.call_id,
|
||||
retry_reason: None,
|
||||
risk,
|
||||
};
|
||||
let decision = tool.start_approval_async(req, approval_ctx).await;
|
||||
|
||||
otel.tool_decision(otel_tn, otel_ci, decision, otel_user.clone());
|
||||
|
||||
match decision {
|
||||
ReviewDecision::Denied | ReviewDecision::Abort => {
|
||||
return Err(ToolError::Rejected("rejected by user".to_string()));
|
||||
if let Some(metadata) = req.sandbox_retry_data() {
|
||||
risk = tool_ctx
|
||||
.session
|
||||
.assess_sandbox_command(
|
||||
turn_ctx,
|
||||
&tool_ctx.call_id,
|
||||
&metadata.command,
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
ReviewDecision::Approved | ReviewDecision::ApprovedForSession => {}
|
||||
|
||||
let approval_ctx = ApprovalCtx {
|
||||
session: tool_ctx.session,
|
||||
turn: turn_ctx,
|
||||
call_id: &tool_ctx.call_id,
|
||||
retry_reason: reason,
|
||||
risk,
|
||||
};
|
||||
let decision = tool.start_approval_async(req, approval_ctx).await;
|
||||
|
||||
otel.tool_decision(otel_tn, otel_ci, decision, otel_user.clone());
|
||||
|
||||
match decision {
|
||||
ReviewDecision::Denied | ReviewDecision::Abort => {
|
||||
return Err(ToolError::Rejected("rejected by user".to_string()));
|
||||
}
|
||||
ReviewDecision::Approved | ReviewDecision::ApprovedForSession => {}
|
||||
}
|
||||
already_approved = true;
|
||||
}
|
||||
already_approved = true;
|
||||
} else {
|
||||
otel.tool_decision(otel_tn, otel_ci, ReviewDecision::Approved, otel_cfg);
|
||||
}
|
||||
|
||||
// 2) First attempt under the selected sandbox.
|
||||
|
||||
@@ -4,13 +4,12 @@ Runtime: shell
|
||||
Executes shell requests under the orchestrator: asks for approval when needed,
|
||||
builds a CommandSpec, and runs it under the current SandboxAttempt.
|
||||
*/
|
||||
use crate::command_safety::is_dangerous_command::requires_initial_appoval;
|
||||
use crate::exec::ExecToolCallOutput;
|
||||
use crate::protocol::SandboxPolicy;
|
||||
use crate::sandboxing::execute_env;
|
||||
use crate::tools::runtimes::build_command_spec;
|
||||
use crate::tools::sandboxing::Approvable;
|
||||
use crate::tools::sandboxing::ApprovalCtx;
|
||||
use crate::tools::sandboxing::ApprovalRequirement;
|
||||
use crate::tools::sandboxing::ProvidesSandboxRetryData;
|
||||
use crate::tools::sandboxing::SandboxAttempt;
|
||||
use crate::tools::sandboxing::SandboxRetryData;
|
||||
@@ -20,7 +19,6 @@ use crate::tools::sandboxing::ToolCtx;
|
||||
use crate::tools::sandboxing::ToolError;
|
||||
use crate::tools::sandboxing::ToolRuntime;
|
||||
use crate::tools::sandboxing::with_cached_approval;
|
||||
use codex_protocol::protocol::AskForApproval;
|
||||
use codex_protocol::protocol::ReviewDecision;
|
||||
use futures::future::BoxFuture;
|
||||
use std::path::PathBuf;
|
||||
@@ -33,6 +31,7 @@ pub struct ShellRequest {
|
||||
pub env: std::collections::HashMap<String, String>,
|
||||
pub with_escalated_permissions: Option<bool>,
|
||||
pub justification: Option<String>,
|
||||
pub approval_requirement: ApprovalRequirement,
|
||||
}
|
||||
|
||||
impl ProvidesSandboxRetryData for ShellRequest {
|
||||
@@ -114,18 +113,8 @@ impl Approvable<ShellRequest> for ShellRuntime {
|
||||
})
|
||||
}
|
||||
|
||||
fn wants_initial_approval(
|
||||
&self,
|
||||
req: &ShellRequest,
|
||||
policy: AskForApproval,
|
||||
sandbox_policy: &SandboxPolicy,
|
||||
) -> bool {
|
||||
requires_initial_appoval(
|
||||
policy,
|
||||
sandbox_policy,
|
||||
&req.command,
|
||||
req.with_escalated_permissions.unwrap_or(false),
|
||||
)
|
||||
fn approval_requirement(&self, req: &ShellRequest) -> Option<ApprovalRequirement> {
|
||||
Some(req.approval_requirement.clone())
|
||||
}
|
||||
|
||||
fn wants_escalated_first_attempt(&self, req: &ShellRequest) -> bool {
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
use crate::command_safety::is_dangerous_command::requires_initial_appoval;
|
||||
/*
|
||||
Runtime: unified exec
|
||||
|
||||
@@ -10,6 +9,7 @@ use crate::error::SandboxErr;
|
||||
use crate::tools::runtimes::build_command_spec;
|
||||
use crate::tools::sandboxing::Approvable;
|
||||
use crate::tools::sandboxing::ApprovalCtx;
|
||||
use crate::tools::sandboxing::ApprovalRequirement;
|
||||
use crate::tools::sandboxing::ProvidesSandboxRetryData;
|
||||
use crate::tools::sandboxing::SandboxAttempt;
|
||||
use crate::tools::sandboxing::SandboxRetryData;
|
||||
@@ -22,9 +22,7 @@ use crate::tools::sandboxing::with_cached_approval;
|
||||
use crate::unified_exec::UnifiedExecError;
|
||||
use crate::unified_exec::UnifiedExecSession;
|
||||
use crate::unified_exec::UnifiedExecSessionManager;
|
||||
use codex_protocol::protocol::AskForApproval;
|
||||
use codex_protocol::protocol::ReviewDecision;
|
||||
use codex_protocol::protocol::SandboxPolicy;
|
||||
use futures::future::BoxFuture;
|
||||
use std::collections::HashMap;
|
||||
use std::path::PathBuf;
|
||||
@@ -36,6 +34,7 @@ pub struct UnifiedExecRequest {
|
||||
pub env: HashMap<String, String>,
|
||||
pub with_escalated_permissions: Option<bool>,
|
||||
pub justification: Option<String>,
|
||||
pub approval_requirement: ApprovalRequirement,
|
||||
}
|
||||
|
||||
impl ProvidesSandboxRetryData for UnifiedExecRequest {
|
||||
@@ -65,6 +64,7 @@ impl UnifiedExecRequest {
|
||||
env: HashMap<String, String>,
|
||||
with_escalated_permissions: Option<bool>,
|
||||
justification: Option<String>,
|
||||
approval_requirement: ApprovalRequirement,
|
||||
) -> Self {
|
||||
Self {
|
||||
command,
|
||||
@@ -72,6 +72,7 @@ impl UnifiedExecRequest {
|
||||
env,
|
||||
with_escalated_permissions,
|
||||
justification,
|
||||
approval_requirement,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -129,18 +130,8 @@ impl Approvable<UnifiedExecRequest> for UnifiedExecRuntime<'_> {
|
||||
})
|
||||
}
|
||||
|
||||
fn wants_initial_approval(
|
||||
&self,
|
||||
req: &UnifiedExecRequest,
|
||||
policy: AskForApproval,
|
||||
sandbox_policy: &SandboxPolicy,
|
||||
) -> bool {
|
||||
requires_initial_appoval(
|
||||
policy,
|
||||
sandbox_policy,
|
||||
&req.command,
|
||||
req.with_escalated_permissions.unwrap_or(false),
|
||||
)
|
||||
fn approval_requirement(&self, req: &UnifiedExecRequest) -> Option<ApprovalRequirement> {
|
||||
Some(req.approval_requirement.clone())
|
||||
}
|
||||
|
||||
fn wants_escalated_first_attempt(&self, req: &UnifiedExecRequest) -> bool {
|
||||
|
||||
@@ -86,6 +86,34 @@ pub(crate) struct ApprovalCtx<'a> {
|
||||
pub risk: Option<SandboxCommandAssessment>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub(crate) enum ApprovalRequirement {
|
||||
Skip,
|
||||
NeedsApproval { reason: Option<String> },
|
||||
Forbidden { reason: String },
|
||||
}
|
||||
|
||||
/// Reflects the orchestrator's behavior (pre-refactor):
|
||||
/// - Never, OnFailure: do not ask
|
||||
/// - OnRequest: ask unless sandbox policy is DangerFullAccess
|
||||
/// - UnlessTrusted: always ask
|
||||
pub(crate) fn default_approval_requirement(
|
||||
policy: AskForApproval,
|
||||
sandbox_policy: &SandboxPolicy,
|
||||
) -> ApprovalRequirement {
|
||||
let needs_approval = match policy {
|
||||
AskForApproval::Never | AskForApproval::OnFailure => false,
|
||||
AskForApproval::OnRequest => !matches!(sandbox_policy, SandboxPolicy::DangerFullAccess),
|
||||
AskForApproval::UnlessTrusted => true,
|
||||
};
|
||||
|
||||
if needs_approval {
|
||||
ApprovalRequirement::NeedsApproval { reason: None }
|
||||
} else {
|
||||
ApprovalRequirement::Skip
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) trait Approvable<Req> {
|
||||
type ApprovalKey: Hash + Eq + Clone + Debug + Serialize;
|
||||
|
||||
@@ -106,22 +134,11 @@ pub(crate) trait Approvable<Req> {
|
||||
matches!(policy, AskForApproval::Never)
|
||||
}
|
||||
|
||||
/// Decide whether an initial user approval should be requested before the
|
||||
/// first attempt. Defaults to the orchestrator's behavior (pre‑refactor):
|
||||
/// - Never, OnFailure: do not ask
|
||||
/// - OnRequest: ask unless sandbox policy is DangerFullAccess
|
||||
/// - UnlessTrusted: always ask
|
||||
fn wants_initial_approval(
|
||||
&self,
|
||||
_req: &Req,
|
||||
policy: AskForApproval,
|
||||
sandbox_policy: &SandboxPolicy,
|
||||
) -> bool {
|
||||
match policy {
|
||||
AskForApproval::Never | AskForApproval::OnFailure => false,
|
||||
AskForApproval::OnRequest => !matches!(sandbox_policy, SandboxPolicy::DangerFullAccess),
|
||||
AskForApproval::UnlessTrusted => true,
|
||||
}
|
||||
/// Override the default approval requirement. Return `Some(_)` to specify
|
||||
/// a custom requirement, or `None` to fall back to
|
||||
/// policy-based default.
|
||||
fn approval_requirement(&self, _req: &Req) -> Option<ApprovalRequirement> {
|
||||
None
|
||||
}
|
||||
|
||||
/// Decide we can request an approval for no-sandbox execution.
|
||||
|
||||
@@ -11,6 +11,7 @@ use crate::codex::TurnContext;
|
||||
use crate::exec::ExecToolCallOutput;
|
||||
use crate::exec::StreamOutput;
|
||||
use crate::exec_env::create_env;
|
||||
use crate::exec_policy::approval_requirement_for_command;
|
||||
use crate::protocol::BackgroundEventEvent;
|
||||
use crate::protocol::EventMsg;
|
||||
use crate::protocol::ExecCommandSource;
|
||||
@@ -444,6 +445,13 @@ impl UnifiedExecSessionManager {
|
||||
create_env(&context.turn.shell_environment_policy),
|
||||
with_escalated_permissions,
|
||||
justification,
|
||||
approval_requirement_for_command(
|
||||
context.turn.exec_policy_v2.as_deref(),
|
||||
command,
|
||||
context.turn.approval_policy,
|
||||
&context.turn.sandbox_policy,
|
||||
with_escalated_permissions.unwrap_or(false),
|
||||
),
|
||||
);
|
||||
let tool_ctx = ToolCtx {
|
||||
session: context.session.as_ref(),
|
||||
|
||||
97
codex-rs/core/tests/suite/execpolicy2.rs
Normal file
97
codex-rs/core/tests/suite/execpolicy2.rs
Normal file
@@ -0,0 +1,97 @@
|
||||
#![allow(clippy::unwrap_used, clippy::expect_used)]
|
||||
|
||||
use anyhow::Result;
|
||||
use codex_core::features::Feature;
|
||||
use codex_core::protocol::AskForApproval;
|
||||
use codex_core::protocol::EventMsg;
|
||||
use codex_core::protocol::Op;
|
||||
use codex_core::protocol::SandboxPolicy;
|
||||
use codex_protocol::config_types::ReasoningSummary;
|
||||
use codex_protocol::user_input::UserInput;
|
||||
use core_test_support::responses::ev_assistant_message;
|
||||
use core_test_support::responses::ev_completed;
|
||||
use core_test_support::responses::ev_function_call;
|
||||
use core_test_support::responses::ev_response_created;
|
||||
use core_test_support::responses::mount_sse_once;
|
||||
use core_test_support::responses::sse;
|
||||
use core_test_support::responses::start_mock_server;
|
||||
use core_test_support::test_codex::test_codex;
|
||||
use core_test_support::wait_for_event;
|
||||
use serde_json::json;
|
||||
use std::fs;
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn execpolicy2_blocks_shell_invocation() -> Result<()> {
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.features.enable(Feature::ExecPolicyV2);
|
||||
let policy_path = config.codex_home.join("policy.codexpolicy");
|
||||
fs::write(
|
||||
&policy_path,
|
||||
r#"prefix_rule(pattern=["echo"], decision="forbidden")"#,
|
||||
)
|
||||
.expect("write policy file");
|
||||
});
|
||||
let server = start_mock_server().await;
|
||||
let test = builder.build(&server).await?;
|
||||
|
||||
let call_id = "shell-forbidden";
|
||||
let args = json!({
|
||||
"command": ["echo", "blocked"],
|
||||
"timeout_ms": 1_000,
|
||||
});
|
||||
|
||||
mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
|
||||
let session_model = test.session_configured.model.clone();
|
||||
test.codex
|
||||
.submit(Op::UserTurn {
|
||||
items: vec![UserInput::Text {
|
||||
text: "run shell command".into(),
|
||||
}],
|
||||
final_output_json_schema: None,
|
||||
cwd: test.cwd_path().to_path_buf(),
|
||||
approval_policy: AskForApproval::Never,
|
||||
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
||||
model: session_model,
|
||||
effort: None,
|
||||
summary: ReasoningSummary::Auto,
|
||||
})
|
||||
.await?;
|
||||
|
||||
let EventMsg::ExecCommandEnd(end) = wait_for_event(&test.codex, |event| {
|
||||
matches!(event, EventMsg::ExecCommandEnd(_))
|
||||
})
|
||||
.await
|
||||
else {
|
||||
unreachable!()
|
||||
};
|
||||
wait_for_event(&test.codex, |event| {
|
||||
matches!(event, EventMsg::TaskComplete(_))
|
||||
})
|
||||
.await;
|
||||
|
||||
assert!(
|
||||
end.aggregated_output
|
||||
.contains("execpolicy forbids this command"),
|
||||
"unexpected output: {}",
|
||||
end.aggregated_output
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -27,6 +27,7 @@ mod compact;
|
||||
mod compact_resume_fork;
|
||||
mod deprecation_notice;
|
||||
mod exec;
|
||||
mod execpolicy2;
|
||||
mod fork_conversation;
|
||||
mod grep_files;
|
||||
mod items;
|
||||
|
||||
@@ -379,7 +379,7 @@ impl Serialize for FunctionCallOutputPayload {
|
||||
where
|
||||
S: Serializer,
|
||||
{
|
||||
tracing::debug!("Function call output payload: {:?}", self);
|
||||
tracing::error!("Payload: {:?}", self);
|
||||
if let Some(items) = &self.content_items {
|
||||
items.serialize(serializer)
|
||||
} else {
|
||||
|
||||
Reference in New Issue
Block a user