feat(core) RequestRule (#9489)

## Summary
Instead of trying to derive the prefix_rule for a command mechanically,
let's let the model decide for us.

## Testing
- [x] tested locally
This commit is contained in:
Dylan Hurd
2026-01-28 01:43:17 -07:00
committed by GitHub
parent 9f79365691
commit 996e09ca24
20 changed files with 696 additions and 302 deletions

2
codex-rs/Cargo.lock generated
View File

@@ -1077,6 +1077,7 @@ dependencies = [
"codex-chatgpt",
"codex-common",
"codex-core",
"codex-execpolicy",
"codex-feedback",
"codex-file-search",
"codex-login",
@@ -1756,6 +1757,7 @@ name = "codex-protocol"
version = "0.0.0"
dependencies = [
"anyhow",
"codex-execpolicy",
"codex-git",
"codex-utils-absolute-path",
"codex-utils-image",

View File

@@ -56,6 +56,7 @@ axum = { workspace = true, default-features = false, features = [
"tokio",
] }
base64 = { workspace = true }
codex-execpolicy = { workspace = true }
core_test_support = { workspace = true }
mcp-types = { workspace = true }
os_info = { workspace = true }

View File

@@ -11,6 +11,7 @@ use codex_app_server_protocol::NewConversationResponse;
use codex_app_server_protocol::RequestId;
use codex_app_server_protocol::SendUserMessageParams;
use codex_app_server_protocol::SendUserMessageResponse;
use codex_execpolicy::Policy;
use codex_protocol::ThreadId;
use codex_protocol::models::ContentItem;
use codex_protocol::models::DeveloperInstructions;
@@ -358,6 +359,8 @@ fn assert_permissions_message(item: &ResponseItem) {
let expected = DeveloperInstructions::from_policy(
&SandboxPolicy::DangerFullAccess,
AskForApproval::Never,
&Policy::empty(),
false,
&PathBuf::from("/tmp"),
)
.into_text();

View File

@@ -189,6 +189,9 @@
"remote_models": {
"type": "boolean"
},
"request_rule": {
"type": "boolean"
},
"responses_websockets": {
"type": "boolean"
},
@@ -1184,6 +1187,9 @@
"remote_models": {
"type": "boolean"
},
"request_rule": {
"type": "boolean"
},
"responses_websockets": {
"type": "boolean"
},

View File

@@ -190,6 +190,7 @@ use codex_protocol::models::ContentItem;
use codex_protocol::models::DeveloperInstructions;
use codex_protocol::models::ResponseInputItem;
use codex_protocol::models::ResponseItem;
use codex_protocol::models::render_command_prefix_list;
use codex_protocol::protocol::CodexErrorInfo;
use codex_protocol::protocol::InitialHistory;
use codex_protocol::user_input::UserInput;
@@ -1226,6 +1227,8 @@ impl Session {
DeveloperInstructions::from_policy(
&next.sandbox_policy,
next.approval_policy,
self.services.exec_policy.current().as_ref(),
self.features.enabled(Feature::RequestRule),
&next.cwd,
)
.into(),
@@ -1416,6 +1419,44 @@ impl Session {
Ok(())
}
async fn turn_context_for_sub_id(&self, sub_id: &str) -> Option<Arc<TurnContext>> {
let active = self.active_turn.lock().await;
active
.as_ref()
.and_then(|turn| turn.tasks.get(sub_id))
.map(|task| Arc::clone(&task.turn_context))
}
pub(crate) async fn record_execpolicy_amendment_message(
&self,
sub_id: &str,
amendment: &ExecPolicyAmendment,
) {
let Some(prefixes) = render_command_prefix_list([amendment.command.as_slice()]) else {
warn!("execpolicy amendment for {sub_id} had no command prefix");
return;
};
let text = format!("Approved command prefix saved:\n{prefixes}");
let message: ResponseItem = DeveloperInstructions::new(text.clone()).into();
if let Some(turn_context) = self.turn_context_for_sub_id(sub_id).await {
self.record_conversation_items(&turn_context, std::slice::from_ref(&message))
.await;
return;
}
if self
.inject_response_items(vec![ResponseInputItem::Message {
role: "developer".to_string(),
content: vec![ContentItem::InputText { text }],
}])
.await
.is_err()
{
warn!("no active turn found to record execpolicy amendment message for {sub_id}");
}
}
/// Emit an exec approval request event and await the user's decision.
///
/// The request is keyed by `sub_id`/`call_id` so matching responses are delivered
@@ -1749,6 +1790,8 @@ impl Session {
DeveloperInstructions::from_policy(
&turn_context.sandbox_policy,
turn_context.approval_policy,
self.services.exec_policy.current().as_ref(),
self.features.enabled(Feature::RequestRule),
&turn_context.cwd,
)
.into(),
@@ -2595,18 +2638,26 @@ mod handlers {
if let ReviewDecision::ApprovedExecpolicyAmendment {
proposed_execpolicy_amendment,
} = &decision
&& let Err(err) = sess
{
match sess
.persist_execpolicy_amendment(proposed_execpolicy_amendment)
.await
{
let message = format!("Failed to apply execpolicy amendment: {err}");
tracing::warn!("{message}");
let warning = EventMsg::Warning(WarningEvent { message });
sess.send_event_raw(Event {
id: id.clone(),
msg: warning,
})
.await;
{
Ok(()) => {
sess.record_execpolicy_amendment_message(&id, proposed_execpolicy_amendment)
.await;
}
Err(err) => {
let message = format!("Failed to apply execpolicy amendment: {err}");
tracing::warn!("{message}");
let warning = EventMsg::Warning(WarningEvent { message });
sess.send_event_raw(Event {
id: id.clone(),
msg: warning,
})
.await;
}
}
}
match decision {
ReviewDecision::Abort => {

View File

@@ -87,6 +87,15 @@ pub(crate) struct ExecPolicyManager {
policy: ArcSwap<Policy>,
}
pub(crate) struct ExecApprovalRequest<'a> {
pub(crate) features: &'a Features,
pub(crate) command: &'a [String],
pub(crate) approval_policy: AskForApproval,
pub(crate) sandbox_policy: &'a SandboxPolicy,
pub(crate) sandbox_permissions: SandboxPermissions,
pub(crate) prefix_rule: Option<Vec<String>>,
}
impl ExecPolicyManager {
pub(crate) fn new(policy: Arc<Policy>) -> Self {
Self {
@@ -112,12 +121,16 @@ impl ExecPolicyManager {
pub(crate) async fn create_exec_approval_requirement_for_command(
&self,
features: &Features,
command: &[String],
approval_policy: AskForApproval,
sandbox_policy: &SandboxPolicy,
sandbox_permissions: SandboxPermissions,
req: ExecApprovalRequest<'_>,
) -> ExecApprovalRequirement {
let ExecApprovalRequest {
features,
command,
approval_policy,
sandbox_policy,
sandbox_permissions,
prefix_rule,
} = req;
let exec_policy = self.current();
let commands =
parse_shell_lc_plain_commands(command).unwrap_or_else(|| vec![command.to_vec()]);
@@ -131,6 +144,12 @@ impl ExecPolicyManager {
};
let evaluation = exec_policy.check_multiple(commands.iter(), &exec_policy_fallback);
let requested_amendment = derive_requested_execpolicy_amendment(
features,
prefix_rule.as_ref(),
&evaluation.matched_rules,
);
match evaluation.decision {
Decision::Forbidden => ExecApprovalRequirement::Forbidden {
reason: derive_forbidden_reason(command, &evaluation),
@@ -144,9 +163,11 @@ impl ExecPolicyManager {
ExecApprovalRequirement::NeedsApproval {
reason: derive_prompt_reason(command, &evaluation),
proposed_execpolicy_amendment: if features.enabled(Feature::ExecPolicy) {
try_derive_execpolicy_amendment_for_prompt_rules(
&evaluation.matched_rules,
)
requested_amendment.or_else(|| {
try_derive_execpolicy_amendment_for_prompt_rules(
&evaluation.matched_rules,
)
})
} else {
None
},
@@ -382,6 +403,30 @@ fn try_derive_execpolicy_amendment_for_allow_rules(
})
}
fn derive_requested_execpolicy_amendment(
features: &Features,
prefix_rule: Option<&Vec<String>>,
matched_rules: &[RuleMatch],
) -> Option<ExecPolicyAmendment> {
if !features.enabled(Feature::ExecPolicy) {
return None;
}
let prefix_rule = prefix_rule?;
if prefix_rule.is_empty() {
return None;
}
if matched_rules
.iter()
.any(|rule_match| is_policy_match(rule_match) && rule_match.decision() == Decision::Prompt)
{
return None;
}
Some(ExecPolicyAmendment::new(prefix_rule.clone()))
}
/// Only return a reason when a policy rule drove the prompt decision.
fn derive_prompt_reason(command_args: &[String], evaluation: &Evaluation) -> Option<String> {
let command = render_shlex_command(command_args);
@@ -756,13 +801,14 @@ prefix_rule(pattern=["rm"], decision="forbidden")
let manager = ExecPolicyManager::new(policy);
let requirement = manager
.create_exec_approval_requirement_for_command(
&Features::with_defaults(),
&forbidden_script,
AskForApproval::OnRequest,
&SandboxPolicy::DangerFullAccess,
SandboxPermissions::UseDefault,
)
.create_exec_approval_requirement_for_command(ExecApprovalRequest {
features: &Features::with_defaults(),
command: &forbidden_script,
approval_policy: AskForApproval::OnRequest,
sandbox_policy: &SandboxPolicy::DangerFullAccess,
sandbox_permissions: SandboxPermissions::UseDefault,
prefix_rule: None,
})
.await;
assert_eq!(
@@ -790,17 +836,18 @@ prefix_rule(
let manager = ExecPolicyManager::new(policy);
let requirement = manager
.create_exec_approval_requirement_for_command(
&Features::with_defaults(),
&[
.create_exec_approval_requirement_for_command(ExecApprovalRequest {
features: &Features::with_defaults(),
command: &[
"rm".to_string(),
"-rf".to_string(),
"/some/important/folder".to_string(),
],
AskForApproval::OnRequest,
&SandboxPolicy::DangerFullAccess,
SandboxPermissions::UseDefault,
)
approval_policy: AskForApproval::OnRequest,
sandbox_policy: &SandboxPolicy::DangerFullAccess,
sandbox_permissions: SandboxPermissions::UseDefault,
prefix_rule: None,
})
.await;
assert_eq!(
@@ -823,13 +870,14 @@ prefix_rule(
let manager = ExecPolicyManager::new(policy);
let requirement = manager
.create_exec_approval_requirement_for_command(
&Features::with_defaults(),
&command,
AskForApproval::OnRequest,
&SandboxPolicy::DangerFullAccess,
SandboxPermissions::UseDefault,
)
.create_exec_approval_requirement_for_command(ExecApprovalRequest {
features: &Features::with_defaults(),
command: &command,
approval_policy: AskForApproval::OnRequest,
sandbox_policy: &SandboxPolicy::DangerFullAccess,
sandbox_permissions: SandboxPermissions::UseDefault,
prefix_rule: None,
})
.await;
assert_eq!(
@@ -853,13 +901,14 @@ prefix_rule(
let manager = ExecPolicyManager::new(policy);
let requirement = manager
.create_exec_approval_requirement_for_command(
&Features::with_defaults(),
&command,
AskForApproval::Never,
&SandboxPolicy::DangerFullAccess,
SandboxPermissions::UseDefault,
)
.create_exec_approval_requirement_for_command(ExecApprovalRequest {
features: &Features::with_defaults(),
command: &command,
approval_policy: AskForApproval::Never,
sandbox_policy: &SandboxPolicy::DangerFullAccess,
sandbox_permissions: SandboxPermissions::UseDefault,
prefix_rule: None,
})
.await;
assert_eq!(
@@ -876,13 +925,14 @@ prefix_rule(
let manager = ExecPolicyManager::default();
let requirement = manager
.create_exec_approval_requirement_for_command(
&Features::with_defaults(),
&command,
AskForApproval::UnlessTrusted,
&SandboxPolicy::ReadOnly,
SandboxPermissions::UseDefault,
)
.create_exec_approval_requirement_for_command(ExecApprovalRequest {
features: &Features::with_defaults(),
command: &command,
approval_policy: AskForApproval::UnlessTrusted,
sandbox_policy: &SandboxPolicy::ReadOnly,
sandbox_permissions: SandboxPermissions::UseDefault,
prefix_rule: None,
})
.await;
assert_eq!(
@@ -894,6 +944,40 @@ prefix_rule(
);
}
#[tokio::test]
async fn request_rule_uses_prefix_rule() {
let command = vec![
"cargo".to_string(),
"install".to_string(),
"cargo-insta".to_string(),
];
let manager = ExecPolicyManager::default();
let mut features = Features::with_defaults();
features.enable(Feature::RequestRule);
let requirement = manager
.create_exec_approval_requirement_for_command(ExecApprovalRequest {
features: &features,
command: &command,
approval_policy: AskForApproval::OnRequest,
sandbox_policy: &SandboxPolicy::ReadOnly,
sandbox_permissions: SandboxPermissions::RequireEscalated,
prefix_rule: Some(vec!["cargo".to_string(), "install".to_string()]),
})
.await;
assert_eq!(
requirement,
ExecApprovalRequirement::NeedsApproval {
reason: None,
proposed_execpolicy_amendment: Some(ExecPolicyAmendment::new(vec![
"cargo".to_string(),
"install".to_string(),
])),
}
);
}
#[tokio::test]
async fn heuristics_apply_when_other_commands_match_policy() {
let policy_src = r#"prefix_rule(pattern=["apple"], decision="allow")"#;
@@ -910,13 +994,14 @@ prefix_rule(
assert_eq!(
ExecPolicyManager::new(policy)
.create_exec_approval_requirement_for_command(
&Features::with_defaults(),
&command,
AskForApproval::UnlessTrusted,
&SandboxPolicy::DangerFullAccess,
SandboxPermissions::UseDefault,
)
.create_exec_approval_requirement_for_command(ExecApprovalRequest {
features: &Features::with_defaults(),
command: &command,
approval_policy: AskForApproval::UnlessTrusted,
sandbox_policy: &SandboxPolicy::DangerFullAccess,
sandbox_permissions: SandboxPermissions::UseDefault,
prefix_rule: None,
})
.await,
ExecApprovalRequirement::NeedsApproval {
reason: None,
@@ -984,13 +1069,14 @@ prefix_rule(
let manager = ExecPolicyManager::default();
let requirement = manager
.create_exec_approval_requirement_for_command(
&Features::with_defaults(),
&command,
AskForApproval::UnlessTrusted,
&SandboxPolicy::ReadOnly,
SandboxPermissions::UseDefault,
)
.create_exec_approval_requirement_for_command(ExecApprovalRequest {
features: &Features::with_defaults(),
command: &command,
approval_policy: AskForApproval::UnlessTrusted,
sandbox_policy: &SandboxPolicy::ReadOnly,
sandbox_permissions: SandboxPermissions::UseDefault,
prefix_rule: None,
})
.await;
assert_eq!(
@@ -1011,13 +1097,14 @@ prefix_rule(
let manager = ExecPolicyManager::default();
let requirement = manager
.create_exec_approval_requirement_for_command(
&features,
&command,
AskForApproval::UnlessTrusted,
&SandboxPolicy::ReadOnly,
SandboxPermissions::UseDefault,
)
.create_exec_approval_requirement_for_command(ExecApprovalRequest {
features: &features,
command: &command,
approval_policy: AskForApproval::UnlessTrusted,
sandbox_policy: &SandboxPolicy::ReadOnly,
sandbox_permissions: SandboxPermissions::UseDefault,
prefix_rule: None,
})
.await;
assert_eq!(
@@ -1041,13 +1128,14 @@ prefix_rule(
let manager = ExecPolicyManager::new(policy);
let requirement = manager
.create_exec_approval_requirement_for_command(
&Features::with_defaults(),
&command,
AskForApproval::OnRequest,
&SandboxPolicy::DangerFullAccess,
SandboxPermissions::UseDefault,
)
.create_exec_approval_requirement_for_command(ExecApprovalRequest {
features: &Features::with_defaults(),
command: &command,
approval_policy: AskForApproval::OnRequest,
sandbox_policy: &SandboxPolicy::DangerFullAccess,
sandbox_permissions: SandboxPermissions::UseDefault,
prefix_rule: None,
})
.await;
assert_eq!(
@@ -1068,13 +1156,14 @@ prefix_rule(
];
let manager = ExecPolicyManager::default();
let requirement = manager
.create_exec_approval_requirement_for_command(
&Features::with_defaults(),
&command,
AskForApproval::UnlessTrusted,
&SandboxPolicy::ReadOnly,
SandboxPermissions::UseDefault,
)
.create_exec_approval_requirement_for_command(ExecApprovalRequest {
features: &Features::with_defaults(),
command: &command,
approval_policy: AskForApproval::UnlessTrusted,
sandbox_policy: &SandboxPolicy::ReadOnly,
sandbox_permissions: SandboxPermissions::UseDefault,
prefix_rule: None,
})
.await;
assert_eq!(
@@ -1106,13 +1195,14 @@ prefix_rule(
assert_eq!(
ExecPolicyManager::new(policy)
.create_exec_approval_requirement_for_command(
&Features::with_defaults(),
&command,
AskForApproval::UnlessTrusted,
&SandboxPolicy::ReadOnly,
SandboxPermissions::UseDefault,
)
.create_exec_approval_requirement_for_command(ExecApprovalRequest {
features: &Features::with_defaults(),
command: &command,
approval_policy: AskForApproval::UnlessTrusted,
sandbox_policy: &SandboxPolicy::ReadOnly,
sandbox_permissions: SandboxPermissions::UseDefault,
prefix_rule: None,
})
.await,
ExecApprovalRequirement::NeedsApproval {
reason: None,
@@ -1129,13 +1219,14 @@ prefix_rule(
let manager = ExecPolicyManager::default();
let requirement = manager
.create_exec_approval_requirement_for_command(
&Features::with_defaults(),
&command,
AskForApproval::OnRequest,
&SandboxPolicy::ReadOnly,
SandboxPermissions::UseDefault,
)
.create_exec_approval_requirement_for_command(ExecApprovalRequest {
features: &Features::with_defaults(),
command: &command,
approval_policy: AskForApproval::OnRequest,
sandbox_policy: &SandboxPolicy::ReadOnly,
sandbox_permissions: SandboxPermissions::UseDefault,
prefix_rule: None,
})
.await;
assert_eq!(
@@ -1159,13 +1250,14 @@ prefix_rule(
let manager = ExecPolicyManager::new(policy);
let requirement = manager
.create_exec_approval_requirement_for_command(
&Features::with_defaults(),
&command,
AskForApproval::OnRequest,
&SandboxPolicy::ReadOnly,
SandboxPermissions::UseDefault,
)
.create_exec_approval_requirement_for_command(ExecApprovalRequest {
features: &Features::with_defaults(),
command: &command,
approval_policy: AskForApproval::OnRequest,
sandbox_policy: &SandboxPolicy::ReadOnly,
sandbox_permissions: SandboxPermissions::UseDefault,
prefix_rule: None,
})
.await;
assert_eq!(
@@ -1226,13 +1318,14 @@ prefix_rule(
assert_eq!(
expected_req,
policy
.create_exec_approval_requirement_for_command(
&features,
&sneaky_command,
AskForApproval::OnRequest,
&SandboxPolicy::ReadOnly,
permissions,
)
.create_exec_approval_requirement_for_command(ExecApprovalRequest {
features: &features,
command: &sneaky_command,
approval_policy: AskForApproval::OnRequest,
sandbox_policy: &SandboxPolicy::ReadOnly,
sandbox_permissions: permissions,
prefix_rule: None,
})
.await,
"{pwsh_approval_reason}"
);
@@ -1249,13 +1342,14 @@ prefix_rule(
]))),
},
policy
.create_exec_approval_requirement_for_command(
&features,
&dangerous_command,
AskForApproval::OnRequest,
&SandboxPolicy::ReadOnly,
permissions,
)
.create_exec_approval_requirement_for_command(ExecApprovalRequest {
features: &features,
command: &dangerous_command,
approval_policy: AskForApproval::OnRequest,
sandbox_policy: &SandboxPolicy::ReadOnly,
sandbox_permissions: permissions,
prefix_rule: None,
})
.await,
r#"On all platforms, a forbidden command should require approval
(unless AskForApproval::Never is specified)."#
@@ -1268,13 +1362,14 @@ prefix_rule(
reason: "`rm -rf /important/data` rejected: blocked by policy".to_string(),
},
policy
.create_exec_approval_requirement_for_command(
&features,
&dangerous_command,
AskForApproval::Never,
&SandboxPolicy::ReadOnly,
permissions,
)
.create_exec_approval_requirement_for_command(ExecApprovalRequest {
features: &features,
command: &dangerous_command,
approval_policy: AskForApproval::Never,
sandbox_policy: &SandboxPolicy::ReadOnly,
sandbox_permissions: permissions,
prefix_rule: None,
})
.await,
r#"On all platforms, a forbidden command should require approval
(unless AskForApproval::Never is specified)."#

View File

@@ -89,6 +89,8 @@ pub enum Feature {
WebSearchCached,
/// Gate the execpolicy enforcement for shell/unified exec.
ExecPolicy,
/// Allow the model to request approval and propose exec rules.
RequestRule,
/// Enable Windows sandbox (restricted token) on Windows.
WindowsSandbox,
/// Use the elevated Windows sandbox pipeline (setup + runner).
@@ -393,6 +395,12 @@ pub const FEATURES: &[FeatureSpec] = &[
stage: Stage::UnderDevelopment,
default_enabled: true,
},
FeatureSpec {
id: Feature::RequestRule,
key: "request_rule",
stage: Stage::UnderDevelopment,
default_enabled: false,
},
FeatureSpec {
id: Feature::WindowsSandbox,
key: "experimental_windows_sandbox",

View File

@@ -6,6 +6,7 @@ use std::sync::Arc;
use crate::codex::TurnContext;
use crate::exec::ExecParams;
use crate::exec_env::create_env;
use crate::exec_policy::ExecApprovalRequest;
use crate::function_tool::FunctionCallError;
use crate::is_safe_command::is_known_safe_command;
use crate::protocol::ExecCommandSource;
@@ -28,16 +29,27 @@ pub struct ShellHandler;
pub struct ShellCommandHandler;
struct RunExecLikeArgs {
tool_name: String,
exec_params: ExecParams,
prefix_rule: Option<Vec<String>>,
session: Arc<crate::codex::Session>,
turn: Arc<TurnContext>,
tracker: crate::tools::context::SharedTurnDiffTracker,
call_id: String,
freeform: bool,
}
impl ShellHandler {
fn to_exec_params(params: ShellToolCallParams, turn_context: &TurnContext) -> ExecParams {
fn to_exec_params(params: &ShellToolCallParams, turn_context: &TurnContext) -> ExecParams {
ExecParams {
command: params.command,
command: params.command.clone(),
cwd: turn_context.resolve_path(params.workdir.clone()),
expiration: params.timeout_ms.into(),
env: create_env(&turn_context.shell_environment_policy),
sandbox_permissions: params.sandbox_permissions.unwrap_or_default(),
windows_sandbox_level: turn_context.windows_sandbox_level,
justification: params.justification,
justification: params.justification.clone(),
arg0: None,
}
}
@@ -50,7 +62,7 @@ impl ShellCommandHandler {
}
fn to_exec_params(
params: ShellCommandToolCallParams,
params: &ShellCommandToolCallParams,
session: &crate::codex::Session,
turn_context: &TurnContext,
) -> ExecParams {
@@ -64,7 +76,7 @@ impl ShellCommandHandler {
env: create_env(&turn_context.shell_environment_policy),
sandbox_permissions: params.sandbox_permissions.unwrap_or_default(),
windows_sandbox_level: turn_context.windows_sandbox_level,
justification: params.justification,
justification: params.justification.clone(),
arg0: None,
}
}
@@ -108,29 +120,32 @@ impl ToolHandler for ShellHandler {
match payload {
ToolPayload::Function { arguments } => {
let params: ShellToolCallParams = parse_arguments(&arguments)?;
let exec_params = Self::to_exec_params(params, turn.as_ref());
Self::run_exec_like(
tool_name.as_str(),
let prefix_rule = params.prefix_rule.clone();
let exec_params = Self::to_exec_params(&params, turn.as_ref());
Self::run_exec_like(RunExecLikeArgs {
tool_name: tool_name.clone(),
exec_params,
prefix_rule,
session,
turn,
tracker,
call_id,
false,
)
freeform: false,
})
.await
}
ToolPayload::LocalShell { params } => {
let exec_params = Self::to_exec_params(params, turn.as_ref());
Self::run_exec_like(
tool_name.as_str(),
let exec_params = Self::to_exec_params(&params, turn.as_ref());
Self::run_exec_like(RunExecLikeArgs {
tool_name: tool_name.clone(),
exec_params,
prefix_rule: None,
session,
turn,
tracker,
call_id,
false,
)
freeform: false,
})
.await
}
_ => Err(FunctionCallError::RespondToModel(format!(
@@ -181,30 +196,43 @@ impl ToolHandler for ShellCommandHandler {
};
let params: ShellCommandToolCallParams = parse_arguments(&arguments)?;
let exec_params = Self::to_exec_params(params, session.as_ref(), turn.as_ref());
ShellHandler::run_exec_like(
tool_name.as_str(),
let prefix_rule = params.prefix_rule.clone();
let exec_params = Self::to_exec_params(&params, session.as_ref(), turn.as_ref());
ShellHandler::run_exec_like(RunExecLikeArgs {
tool_name,
exec_params,
prefix_rule,
session,
turn,
tracker,
call_id,
true,
)
freeform: true,
})
.await
}
}
impl ShellHandler {
async fn run_exec_like(
tool_name: &str,
exec_params: ExecParams,
session: Arc<crate::codex::Session>,
turn: Arc<TurnContext>,
tracker: crate::tools::context::SharedTurnDiffTracker,
call_id: String,
freeform: bool,
) -> Result<ToolOutput, FunctionCallError> {
async fn run_exec_like(args: RunExecLikeArgs) -> Result<ToolOutput, FunctionCallError> {
let RunExecLikeArgs {
tool_name,
exec_params,
prefix_rule,
session,
turn,
tracker,
call_id,
freeform,
} = args;
let features = session.features();
let request_rule_enabled = features.enabled(crate::features::Feature::RequestRule);
let prefix_rule = if request_rule_enabled {
prefix_rule
} else {
None
};
// Approval policy guard for explicit escalation in non-OnRequest modes.
if exec_params
.sandbox_permissions
@@ -214,9 +242,9 @@ impl ShellHandler {
codex_protocol::protocol::AskForApproval::OnRequest
)
{
let approval_policy = turn.approval_policy;
return Err(FunctionCallError::RespondToModel(format!(
"approval policy is {policy:?}; reject command — you should not ask for escalated permissions if the approval policy is {policy:?}",
policy = turn.approval_policy
"approval policy is {approval_policy:?}; reject command — you should not ask for escalated permissions if the approval policy is {approval_policy:?}"
)));
}
@@ -229,7 +257,7 @@ impl ShellHandler {
turn.as_ref(),
Some(&tracker),
&call_id,
tool_name,
tool_name.as_str(),
)
.await?
{
@@ -246,17 +274,17 @@ impl ShellHandler {
let event_ctx = ToolEventCtx::new(session.as_ref(), turn.as_ref(), &call_id, None);
emitter.begin(event_ctx).await;
let features = session.features();
let exec_approval_requirement = session
.services
.exec_policy
.create_exec_approval_requirement_for_command(
&features,
&exec_params.command,
turn.approval_policy,
&turn.sandbox_policy,
exec_params.sandbox_permissions,
)
.create_exec_approval_requirement_for_command(ExecApprovalRequest {
features: &features,
command: &exec_params.command,
approval_policy: turn.approval_policy,
sandbox_policy: &turn.sandbox_policy,
sandbox_permissions: exec_params.sandbox_permissions,
prefix_rule,
})
.await;
let req = ShellRequest {
@@ -274,7 +302,7 @@ impl ShellHandler {
session: session.as_ref(),
turn: turn.as_ref(),
call_id: call_id.clone(),
tool_name: tool_name.to_string(),
tool_name,
};
let out = orchestrator
.run(&mut runtime, &req, &tool_ctx, &turn, turn.approval_policy)
@@ -377,10 +405,11 @@ mod tests {
login,
timeout_ms,
sandbox_permissions: Some(sandbox_permissions),
prefix_rule: None,
justification: justification.clone(),
};
let exec_params = ShellCommandHandler::to_exec_params(params, &session, &turn_context);
let exec_params = ShellCommandHandler::to_exec_params(&params, &session, &turn_context);
// ExecParams cannot derive Eq due to the CancellationToken field, so we manually compare the fields.
assert_eq!(exec_params.command, expected_command);

View File

@@ -43,6 +43,8 @@ struct ExecCommandArgs {
sandbox_permissions: SandboxPermissions,
#[serde(default)]
justification: Option<String>,
#[serde(default)]
prefix_rule: Option<Vec<String>>,
}
#[derive(Debug, Deserialize)]
@@ -135,19 +137,28 @@ impl ToolHandler for UnifiedExecHandler {
max_output_tokens,
sandbox_permissions,
justification,
prefix_rule,
..
} = args;
let features = session.features();
let request_rule_enabled = features.enabled(crate::features::Feature::RequestRule);
let prefix_rule = if request_rule_enabled {
prefix_rule
} else {
None
};
if sandbox_permissions.requires_escalated_permissions()
&& !matches!(
context.turn.approval_policy,
codex_protocol::protocol::AskForApproval::OnRequest
)
{
let approval_policy = context.turn.approval_policy;
manager.release_process_id(&process_id).await;
return Err(FunctionCallError::RespondToModel(format!(
"approval policy is {policy:?}; reject command — you cannot ask for escalated permissions if the approval policy is {policy:?}",
policy = context.turn.approval_policy
"approval policy is {approval_policy:?}; reject command — you cannot ask for escalated permissions if the approval policy is {approval_policy:?}"
)));
}
@@ -183,6 +194,7 @@ impl ToolHandler for UnifiedExecHandler {
tty,
sandbox_permissions,
justification,
prefix_rule,
},
&context,
)

View File

@@ -114,6 +114,7 @@ impl ToolRouter {
workdir: exec.working_directory,
timeout_ms: exec.timeout_ms,
sandbox_permissions: Some(SandboxPermissions::UseDefault),
prefix_rule: None,
justification: None,
};
Ok(Some(ToolCall {

View File

@@ -30,6 +30,7 @@ pub(crate) struct ToolsConfig {
pub web_search_mode: Option<WebSearchMode>,
pub collab_tools: bool,
pub collaboration_modes_tools: bool,
pub request_rule_enabled: bool,
pub experimental_supported_tools: Vec<String>,
}
@@ -49,6 +50,7 @@ impl ToolsConfig {
let include_apply_patch_tool = features.enabled(Feature::ApplyPatchFreeform);
let include_collab_tools = features.enabled(Feature::Collab);
let include_collaboration_modes_tools = features.enabled(Feature::CollaborationModes);
let request_rule_enabled = features.enabled(Feature::RequestRule);
let shell_type = if !features.enabled(Feature::ShellTool) {
ConfigShellToolType::Disabled
@@ -81,6 +83,7 @@ impl ToolsConfig {
web_search_mode: *web_search_mode,
collab_tools: include_collab_tools,
collaboration_modes_tools: include_collaboration_modes_tools,
request_rule_enabled,
experimental_supported_tools: model_info.experimental_supported_tools.clone(),
}
}
@@ -142,8 +145,50 @@ impl From<JsonSchema> for AdditionalProperties {
}
}
fn create_exec_command_tool() -> ToolSpec {
let properties = BTreeMap::from([
fn create_approval_parameters(include_prefix_rule: bool) -> BTreeMap<String, JsonSchema> {
let mut properties = BTreeMap::from([
(
"sandbox_permissions".to_string(),
JsonSchema::String {
description: Some(
"Sandbox permissions for the command. Set to \"require_escalated\" to request running without sandbox restrictions; defaults to \"use_default\"."
.to_string(),
),
},
),
(
"justification".to_string(),
JsonSchema::String {
description: Some(
r#"Only set if sandbox_permissions is \"require_escalated\".
Request approval from the user to run this command outside the sandbox.
Phrased as a simple question that summarizes the purpose of the
command as it relates to the task at hand - e.g. 'Do you want to
fetch and pull the latest version of this git branch?'"#
.to_string(),
),
},
),
]);
if include_prefix_rule {
properties.insert(
"prefix_rule".to_string(),
JsonSchema::Array {
items: Box::new(JsonSchema::String { description: None }),
description: Some(
r#"Only specify when sandbox_permissions is `require_escalated`.
Suggest a prefix command pattern that will allow you to fulfill similar requests from the user in the future.
Should be a short but reasonable prefix, e.g. [\"git\", \"pull\"] or [\"uv\", \"run\"] or [\"pytest\"]."#.to_string(),
),
});
}
properties
}
fn create_exec_command_tool(include_prefix_rule: bool) -> ToolSpec {
let mut properties = BTreeMap::from([
(
"cmd".to_string(),
JsonSchema::String {
@@ -199,25 +244,8 @@ fn create_exec_command_tool() -> ToolSpec {
),
},
),
(
"sandbox_permissions".to_string(),
JsonSchema::String {
description: Some(
"Sandbox permissions for the command. Set to \"require_escalated\" to request running without sandbox restrictions; defaults to \"use_default\"."
.to_string(),
),
},
),
(
"justification".to_string(),
JsonSchema::String {
description: Some(
"Only set if sandbox_permissions is \"require_escalated\". 1-sentence explanation of why we want to run this command."
.to_string(),
),
},
),
]);
properties.extend(create_approval_parameters(include_prefix_rule));
ToolSpec::Function(ResponsesApiTool {
name: "exec_command".to_string(),
@@ -280,8 +308,8 @@ fn create_write_stdin_tool() -> ToolSpec {
})
}
fn create_shell_tool() -> ToolSpec {
let properties = BTreeMap::from([
fn create_shell_tool(include_prefix_rule: bool) -> ToolSpec {
let mut properties = BTreeMap::from([
(
"command".to_string(),
JsonSchema::Array {
@@ -301,19 +329,8 @@ fn create_shell_tool() -> ToolSpec {
description: Some("The timeout for the command in milliseconds".to_string()),
},
),
(
"sandbox_permissions".to_string(),
JsonSchema::String {
description: Some("Sandbox permissions for the command. Set to \"require_escalated\" to request running without sandbox restrictions; defaults to \"use_default\".".to_string()),
},
),
(
"justification".to_string(),
JsonSchema::String {
description: Some("Only set if sandbox_permissions is \"require_escalated\". 1-sentence explanation of why we want to run this command.".to_string()),
},
),
]);
properties.extend(create_approval_parameters(include_prefix_rule));
let description = if cfg!(windows) {
r#"Runs a Powershell command (Windows) and returns its output. Arguments to `shell` will be passed to CreateProcessW(). Most commands should be prefixed with ["powershell.exe", "-Command"].
@@ -344,8 +361,8 @@ Examples of valid command strings:
})
}
fn create_shell_command_tool() -> ToolSpec {
let properties = BTreeMap::from([
fn create_shell_command_tool(include_prefix_rule: bool) -> ToolSpec {
let mut properties = BTreeMap::from([
(
"command".to_string(),
JsonSchema::String {
@@ -375,19 +392,8 @@ fn create_shell_command_tool() -> ToolSpec {
description: Some("The timeout for the command in milliseconds".to_string()),
},
),
(
"sandbox_permissions".to_string(),
JsonSchema::String {
description: Some("Sandbox permissions for the command. Set to \"require_escalated\" to request running without sandbox restrictions; defaults to \"use_default\".".to_string()),
},
),
(
"justification".to_string(),
JsonSchema::String {
description: Some("Only set if sandbox_permissions is \"require_escalated\". 1-sentence explanation of why we want to run this command.".to_string()),
},
),
]);
properties.extend(create_approval_parameters(include_prefix_rule));
let description = if cfg!(windows) {
r#"Runs a Powershell command (Windows) and returns its output.
@@ -1292,13 +1298,13 @@ pub(crate) fn build_specs(
match &config.shell_type {
ConfigShellToolType::Default => {
builder.push_spec(create_shell_tool());
builder.push_spec(create_shell_tool(config.request_rule_enabled));
}
ConfigShellToolType::Local => {
builder.push_spec(ToolSpec::LocalShell {});
}
ConfigShellToolType::UnifiedExec => {
builder.push_spec(create_exec_command_tool());
builder.push_spec(create_exec_command_tool(config.request_rule_enabled));
builder.push_spec(create_write_stdin_tool());
builder.register_handler("exec_command", unified_exec_handler.clone());
builder.register_handler("write_stdin", unified_exec_handler);
@@ -1307,7 +1313,7 @@ pub(crate) fn build_specs(
// Do nothing.
}
ConfigShellToolType::ShellCommand => {
builder.push_spec(create_shell_command_tool());
builder.push_spec(create_shell_command_tool(config.request_rule_enabled));
}
}
@@ -1579,7 +1585,7 @@ mod tests {
// Build expected from the same helpers used by the builder.
let mut expected: BTreeMap<String, ToolSpec> = BTreeMap::from([]);
for spec in [
create_exec_command_tool(),
create_exec_command_tool(false),
create_write_stdin_tool(),
create_list_mcp_resources_tool(),
create_list_mcp_resource_templates_tool(),
@@ -2413,7 +2419,7 @@ mod tests {
#[test]
fn test_shell_tool() {
let tool = super::create_shell_tool();
let tool = super::create_shell_tool(false);
let ToolSpec::Function(ResponsesApiTool {
description, name, ..
}) = &tool
@@ -2443,7 +2449,7 @@ Examples of valid command strings:
#[test]
fn test_shell_command_tool() {
let tool = super::create_shell_command_tool();
let tool = super::create_shell_command_tool(false);
let ToolSpec::Function(ResponsesApiTool {
description, name, ..
}) = &tool

View File

@@ -82,6 +82,7 @@ pub(crate) struct ExecCommandRequest {
pub tty: bool,
pub sandbox_permissions: SandboxPermissions,
pub justification: Option<String>,
pub prefix_rule: Option<Vec<String>>,
}
#[derive(Debug)]
@@ -205,6 +206,7 @@ mod tests {
tty: true,
sandbox_permissions: SandboxPermissions::UseDefault,
justification: None,
prefix_rule: None,
},
&context,
)

View File

@@ -11,9 +11,9 @@ use tokio::time::Instant;
use tokio_util::sync::CancellationToken;
use crate::exec_env::create_env;
use crate::exec_policy::ExecApprovalRequest;
use crate::protocol::ExecCommandSource;
use crate::sandboxing::ExecEnv;
use crate::sandboxing::SandboxPermissions;
use crate::tools::events::ToolEmitter;
use crate::tools::events::ToolEventCtx;
use crate::tools::events::ToolEventStage;
@@ -123,14 +123,7 @@ impl UnifiedExecProcessManager {
.unwrap_or_else(|| context.turn.cwd.clone());
let process = self
.open_session_with_sandbox(
&request.command,
cwd.clone(),
request.sandbox_permissions,
request.justification,
request.tty,
context,
)
.open_session_with_sandbox(&request, cwd.clone(), context)
.await;
let process = match process {
@@ -486,11 +479,8 @@ impl UnifiedExecProcessManager {
pub(super) async fn open_session_with_sandbox(
&self,
command: &[String],
request: &ExecCommandRequest,
cwd: PathBuf,
sandbox_permissions: SandboxPermissions,
justification: Option<String>,
tty: bool,
context: &UnifiedExecContext,
) -> Result<UnifiedExecProcess, UnifiedExecError> {
let env = apply_unified_exec_env(create_env(&context.turn.shell_environment_policy));
@@ -501,21 +491,22 @@ impl UnifiedExecProcessManager {
.session
.services
.exec_policy
.create_exec_approval_requirement_for_command(
&features,
command,
context.turn.approval_policy,
&context.turn.sandbox_policy,
sandbox_permissions,
)
.create_exec_approval_requirement_for_command(ExecApprovalRequest {
features: &features,
command: &request.command,
approval_policy: context.turn.approval_policy,
sandbox_policy: &context.turn.sandbox_policy,
sandbox_permissions: request.sandbox_permissions,
prefix_rule: request.prefix_rule.clone(),
})
.await;
let req = UnifiedExecToolRequest::new(
command.to_vec(),
request.command.clone(),
cwd,
env,
tty,
sandbox_permissions,
justification,
request.tty,
request.sandbox_permissions,
request.justification.clone(),
exec_approval_requirement,
);
let tool_ctx = ToolCtx {

View File

@@ -1754,6 +1754,16 @@ async fn approving_execpolicy_amendment_persists_policy_and_skips_future_prompts
.await?;
wait_for_completion(&test).await;
let developer_messages = first_results
.single_request()
.message_input_texts("developer");
assert!(
developer_messages
.iter()
.any(|message| message.contains(r#"["touch", "allow-prefix.txt"]"#)),
"expected developer message documenting saved rule, got: {developer_messages:?}"
);
let policy_path = test.home.path().join("rules").join("default.rules");
let policy_contents = fs::read_to_string(&policy_path)?;
assert!(

View File

@@ -4,6 +4,8 @@ use codex_core::protocol::AskForApproval;
use codex_core::protocol::EventMsg;
use codex_core::protocol::Op;
use codex_core::protocol::SandboxPolicy;
use codex_execpolicy::Policy;
use codex_protocol::models::DeveloperInstructions;
use codex_protocol::user_input::UserInput;
use codex_utils_absolute_path::AbsolutePathBuf;
use core_test_support::responses::ev_completed;
@@ -411,10 +413,11 @@ async fn permissions_message_includes_writable_roots() -> Result<()> {
exclude_tmpdir_env_var: false,
exclude_slash_tmp: false,
};
let sandbox_policy_for_config = sandbox_policy.clone();
let mut builder = test_codex().with_config(move |config| {
config.approval_policy = Constrained::allow_any(AskForApproval::OnRequest);
config.sandbox_policy = Constrained::allow_any(sandbox_policy);
config.sandbox_policy = Constrained::allow_any(sandbox_policy_for_config);
});
let test = builder.build(&server).await?;
@@ -432,39 +435,14 @@ async fn permissions_message_includes_writable_roots() -> Result<()> {
let body = req.single_request().body_json();
let input = body["input"].as_array().expect("input array");
let permissions = permissions_texts(input);
let sandbox_text = "Filesystem sandboxing defines which files can be read or written. `sandbox_mode` is `workspace-write`: The sandbox permits reading files, and editing files in `cwd` and `writable_roots`. Editing files in other directories requires approval. Network access is restricted.";
let approval_text = " Approvals are your mechanism to get user consent to run shell commands without the sandbox. `approval_policy` is `on-request`: Commands will be run in the sandbox by default, and you can specify in your tool call if you want to escalate a command to run without sandboxing. If the completing the task requires escalated permissions, Do not let these settings or the sandbox deter you from attempting to accomplish the user's task.\n\nHere are scenarios where you'll need to request approval:\n- You need to run a command that writes to a directory that requires it (e.g. running tests that write to /var)\n- You need to run a GUI app (e.g., open/xdg-open/osascript) to open browsers or files.\n- You are running sandboxed and need to run a command that requires network access (e.g. installing packages)\n- If you run a command that is important to solving the user's query, but it fails because of sandboxing, rerun the command with approval. ALWAYS proceed to use the `sandbox_permissions` and `justification` parameters - do not message the user before requesting approval for the command.\n- You are about to take a potentially destructive action such as an `rm` or `git reset` that the user did not explicitly ask for.\n\nWhen requesting approval to execute a command that will require escalated privileges:\n - Provide the `sandbox_permissions` parameter with the value `\"require_escalated\"`\n - Include a short, 1 sentence explanation for why you need escalated permissions in the justification parameter";
// Normalize paths by removing trailing slashes to match AbsolutePathBuf behavior
let normalize_path =
|p: &std::path::Path| -> String { p.to_string_lossy().trim_end_matches('/').to_string() };
let mut roots = vec![
normalize_path(writable.path()),
normalize_path(test.config.cwd.as_path()),
];
if cfg!(unix) && std::path::Path::new("/tmp").is_dir() {
roots.push("/tmp".to_string());
}
if let Some(tmpdir) = std::env::var_os("TMPDIR") {
let tmpdir_path = std::path::PathBuf::from(&tmpdir);
if tmpdir_path.is_absolute() && !tmpdir.is_empty() {
roots.push(normalize_path(&tmpdir_path));
}
}
let roots_text = if roots.len() == 1 {
format!(" The writable root is `{}`.", roots[0])
} else {
format!(
" The writable roots are {}.",
roots
.iter()
.map(|root| format!("`{root}`"))
.collect::<Vec<_>>()
.join(", ")
)
};
let expected = format!(
"<permissions instructions>{sandbox_text}{approval_text}{roots_text}</permissions instructions>"
);
let expected = DeveloperInstructions::from_policy(
&sandbox_policy,
AskForApproval::OnRequest,
&Policy::empty(),
false,
test.config.cwd.as_path(),
)
.into_text();
// Normalize line endings to handle Windows vs Unix differences
let normalize_line_endings = |s: &str| s.replace("\r\n", "\n");
let expected_normalized = normalize_line_endings(&expected);

View File

@@ -31,6 +31,30 @@ impl Policy {
&self.rules_by_program
}
pub fn get_allowed_prefixes(&self) -> Vec<Vec<String>> {
let mut prefixes = Vec::new();
for (_program, rules) in self.rules_by_program.iter_all() {
for rule in rules {
let Some(prefix_rule) = rule.as_any().downcast_ref::<PrefixRule>() else {
continue;
};
if prefix_rule.decision != Decision::Allow {
continue;
}
let mut prefix = Vec::with_capacity(prefix_rule.pattern.rest.len() + 1);
prefix.push(prefix_rule.pattern.first.as_ref().to_string());
prefix.extend(prefix_rule.pattern.rest.iter().map(render_pattern_token));
prefixes.push(prefix);
}
}
prefixes.sort();
prefixes.dedup();
prefixes
}
pub fn add_prefix_rule(&mut self, prefix: &[String], decision: Decision) -> Result<()> {
let (first_token, rest) = prefix
.split_first()
@@ -116,6 +140,13 @@ impl Policy {
}
}
fn render_pattern_token(token: &PatternToken) -> String {
match token {
PatternToken::Single(value) => value.clone(),
PatternToken::Alts(alternatives) => format!("[{}]", alternatives.join("|")),
}
}
#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct Evaluation {

View File

@@ -96,6 +96,8 @@ pub trait Rule: Any + Debug + Send + Sync {
fn program(&self) -> &str;
fn matches(&self, cmd: &[String]) -> Option<RuleMatch>;
fn as_any(&self) -> &dyn Any;
}
pub type RuleRef = Arc<dyn Rule>;
@@ -114,6 +116,10 @@ impl Rule for PrefixRule {
justification: self.justification.clone(),
})
}
fn as_any(&self) -> &dyn Any {
self
}
}
/// Count how many rules match each provided example and error if any example is unmatched.

View File

@@ -13,6 +13,7 @@ workspace = true
[dependencies]
codex-git = { workspace = true }
codex-execpolicy = { workspace = true }
codex-utils-absolute-path = { workspace = true }
codex-utils-image = { workspace = true }
icu_decimal = { workspace = true }

View File

@@ -19,6 +19,7 @@ use crate::protocol::NetworkAccess;
use crate::protocol::SandboxPolicy;
use crate::protocol::WritableRoot;
use crate::user_input::UserInput;
use codex_execpolicy::Policy;
use codex_git::GhostCommit;
use codex_utils_image::error::ImageProcessingError;
use schemars::JsonSchema;
@@ -205,6 +206,8 @@ const APPROVAL_POLICY_ON_FAILURE: &str =
include_str!("prompts/permissions/approval_policy/on_failure.md");
const APPROVAL_POLICY_ON_REQUEST: &str =
include_str!("prompts/permissions/approval_policy/on_request.md");
const APPROVAL_POLICY_ON_REQUEST_RULE: &str =
include_str!("prompts/permissions/approval_policy/on_request_rule.md");
const SANDBOX_MODE_DANGER_FULL_ACCESS: &str =
include_str!("prompts/permissions/sandbox_mode/danger_full_access.md");
@@ -217,12 +220,42 @@ impl DeveloperInstructions {
Self { text: text.into() }
}
pub fn from(
approval_policy: AskForApproval,
exec_policy: &Policy,
request_rule_enabled: bool,
) -> DeveloperInstructions {
let text = match approval_policy {
AskForApproval::Never => APPROVAL_POLICY_NEVER.to_string(),
AskForApproval::UnlessTrusted => APPROVAL_POLICY_UNLESS_TRUSTED.to_string(),
AskForApproval::OnFailure => APPROVAL_POLICY_ON_FAILURE.to_string(),
AskForApproval::OnRequest => {
if !request_rule_enabled {
APPROVAL_POLICY_ON_REQUEST.to_string()
} else {
let command_prefixes = format_allow_prefixes(exec_policy);
match command_prefixes {
Some(prefixes) => {
format!("{APPROVAL_POLICY_ON_REQUEST_RULE}\n{prefixes}")
}
None => APPROVAL_POLICY_ON_REQUEST_RULE.to_string(),
}
}
}
};
DeveloperInstructions::new(text)
}
pub fn into_text(self) -> String {
self.text
}
pub fn concat(self, other: impl Into<DeveloperInstructions>) -> Self {
let mut text = self.text;
if !text.ends_with('\n') {
text.push('\n');
}
text.push_str(&other.into().text);
Self { text }
}
@@ -237,6 +270,8 @@ impl DeveloperInstructions {
pub fn from_policy(
sandbox_policy: &SandboxPolicy,
approval_policy: AskForApproval,
exec_policy: &Policy,
request_rule_enabled: bool,
cwd: &Path,
) -> Self {
let network_access = if sandbox_policy.has_full_network_access() {
@@ -259,6 +294,8 @@ impl DeveloperInstructions {
sandbox_mode,
network_access,
approval_policy,
exec_policy,
request_rule_enabled,
writable_roots,
)
}
@@ -281,6 +318,8 @@ impl DeveloperInstructions {
sandbox_mode: SandboxMode,
network_access: NetworkAccess,
approval_policy: AskForApproval,
exec_policy: &Policy,
request_rule_enabled: bool,
writable_roots: Option<Vec<WritableRoot>>,
) -> Self {
let start_tag = DeveloperInstructions::new("<permissions instructions>");
@@ -290,7 +329,11 @@ impl DeveloperInstructions {
sandbox_mode,
network_access,
))
.concat(DeveloperInstructions::from(approval_policy))
.concat(DeveloperInstructions::from(
approval_policy,
exec_policy,
request_rule_enabled,
))
.concat(DeveloperInstructions::from_writable_roots(writable_roots))
.concat(end_tag)
}
@@ -328,6 +371,37 @@ impl DeveloperInstructions {
}
}
pub fn render_command_prefix_list<I, P>(prefixes: I) -> Option<String>
where
I: IntoIterator<Item = P>,
P: AsRef<[String]>,
{
let lines = prefixes
.into_iter()
.map(|prefix| format!("- {}", render_command_prefix(prefix.as_ref())))
.collect::<Vec<_>>();
if lines.is_empty() {
return None;
}
Some(lines.join("\n"))
}
fn render_command_prefix(prefix: &[String]) -> String {
let tokens = prefix
.iter()
.map(|token| serde_json::to_string(token).unwrap_or_else(|_| format!("{token:?}")))
.collect::<Vec<_>>()
.join(", ");
format!("[{tokens}]")
}
fn format_allow_prefixes(exec_policy: &Policy) -> Option<String> {
let prefixes = exec_policy.get_allowed_prefixes();
let lines = render_command_prefix_list(prefixes)?;
Some(format!("Approved command prefixes:\n{lines}"))
}
impl From<DeveloperInstructions> for ResponseItem {
fn from(di: DeveloperInstructions) -> Self {
ResponseItem::Message {
@@ -352,19 +426,6 @@ impl From<SandboxMode> for DeveloperInstructions {
}
}
impl From<AskForApproval> for DeveloperInstructions {
fn from(mode: AskForApproval) -> Self {
let text = match mode {
AskForApproval::Never => APPROVAL_POLICY_NEVER.trim_end(),
AskForApproval::UnlessTrusted => APPROVAL_POLICY_UNLESS_TRUSTED.trim_end(),
AskForApproval::OnFailure => APPROVAL_POLICY_ON_FAILURE.trim_end(),
AskForApproval::OnRequest => APPROVAL_POLICY_ON_REQUEST.trim_end(),
};
DeveloperInstructions::new(text)
}
}
fn should_serialize_reasoning_content(content: &Option<Vec<ReasoningItemContent>>) -> bool {
match content {
Some(content) => !content
@@ -633,6 +694,10 @@ pub struct ShellToolCallParams {
#[serde(default, skip_serializing_if = "Option::is_none")]
#[ts(optional)]
pub sandbox_permissions: Option<SandboxPermissions>,
/// Suggests a command prefix to persist for future sessions
#[serde(default, skip_serializing_if = "Option::is_none")]
#[ts(optional)]
pub prefix_rule: Option<Vec<String>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub justification: Option<String>,
}
@@ -653,6 +718,9 @@ pub struct ShellCommandToolCallParams {
#[serde(default, skip_serializing_if = "Option::is_none")]
#[ts(optional)]
pub sandbox_permissions: Option<SandboxPermissions>,
#[serde(default, skip_serializing_if = "Option::is_none")]
#[ts(optional)]
pub prefix_rule: Option<Vec<String>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub justification: Option<String>,
}
@@ -836,6 +904,7 @@ mod tests {
use crate::config_types::SandboxMode;
use crate::protocol::AskForApproval;
use anyhow::Result;
use codex_execpolicy::Policy;
use mcp_types::ImageContent;
use mcp_types::TextContent;
use pretty_assertions::assert_eq;
@@ -844,15 +913,17 @@ mod tests {
#[test]
fn converts_sandbox_mode_into_developer_instructions() {
let workspace_write: DeveloperInstructions = SandboxMode::WorkspaceWrite.into();
assert_eq!(
DeveloperInstructions::from(SandboxMode::WorkspaceWrite),
workspace_write,
DeveloperInstructions::new(
"Filesystem sandboxing defines which files can be read or written. `sandbox_mode` is `workspace-write`: The sandbox permits reading files, and editing files in `cwd` and `writable_roots`. Editing files in other directories requires approval. Network access is restricted."
)
);
let read_only: DeveloperInstructions = SandboxMode::ReadOnly.into();
assert_eq!(
DeveloperInstructions::from(SandboxMode::ReadOnly),
read_only,
DeveloperInstructions::new(
"Filesystem sandboxing defines which files can be read or written. `sandbox_mode` is `read-only`: The sandbox only permits reading files. Network access is restricted."
)
@@ -865,6 +936,8 @@ mod tests {
SandboxMode::WorkspaceWrite,
NetworkAccess::Enabled,
AskForApproval::OnRequest,
&Policy::empty(),
false,
None,
);
@@ -891,6 +964,8 @@ mod tests {
let instructions = DeveloperInstructions::from_policy(
&policy,
AskForApproval::UnlessTrusted,
&Policy::empty(),
false,
&PathBuf::from("/tmp"),
);
let text = instructions.into_text();
@@ -898,6 +973,30 @@ mod tests {
assert!(text.contains("`approval_policy` is `unless-trusted`"));
}
#[test]
fn includes_request_rule_instructions_when_enabled() {
let mut exec_policy = Policy::empty();
exec_policy
.add_prefix_rule(
&["git".to_string(), "pull".to_string()],
codex_execpolicy::Decision::Allow,
)
.expect("add rule");
let instructions = DeveloperInstructions::from_permissions_with_network(
SandboxMode::WorkspaceWrite,
NetworkAccess::Enabled,
AskForApproval::OnRequest,
&exec_policy,
true,
None,
);
let text = instructions.into_text();
assert!(text.contains("prefix_rule"));
assert!(text.contains("Approved command prefixes"));
assert!(text.contains(r#"["git", "pull"]"#));
}
#[test]
fn serializes_success_as_plain_string() -> Result<()> {
let item = ResponseInputItem::FunctionCallOutput {
@@ -1126,6 +1225,7 @@ mod tests {
workdir: Some("/tmp".to_string()),
timeout_ms: Some(1000),
sandbox_permissions: None,
prefix_rule: None,
justification: None,
},
params

View File

@@ -0,0 +1,61 @@
# Escalation Requests
Commands are run outside the sandbox if they are approved by the user, or match an existing rule that allows it to run unrestricted. The command string is split into independent command segments at shell control operators, including but not limited to:
- Pipes: |
- Logical operators: &&, ||
- Command separators: ;
- Subshell boundaries: (...), $(...)
Each resulting segment is evaluated independently for sandbox restrictions and approval requirements.
Example:
git pull | tee output.txt
This is treated as two command segments:
["git", "pull"]
["tee", "output.txt"]
## How to request escalation
IMPORTANT: To request approval to execute a command that will require escalated privileges:
- Provide the `sandbox_permissions` parameter with the value `"require_escalated"`
- Include a short question asking the user if they want to allow the action in `justification` parameter. e.g. "Do you want to download and install dependencies for this project?"
- Suggest a `prefix_rule` - this will be shown to the user with an option to persist the rule approval for future sessions.
If you run a command that is important to solving the user's query, but it fails because of sandboxing, rerun the command with "require_escalated". ALWAYS proceed to use the `justification` and `prefix_rule` parameters - do not message the user before requesting approval for the command.
## When to request escalation
While commands are running inside the sandbox, here are some scenarios that will require escalation outside the sandbox:
- You need to run a command that writes to a directory that requires it (e.g. running tests that write to /var)
- You need to run a GUI app (e.g., open/xdg-open/osascript) to open browsers or files.
- If you run a command that is important to solving the user's query, but it fails because of sandboxing, rerun the command with `require_escalated`. ALWAYS proceed to use the `sandbox_permissions` and `justification` parameters. do not message the user before requesting approval for the command.
- You are about to take a potentially destructive action such as an `rm` or `git reset` that the user did not explicitly ask for.
Only run commands that require approval if it is absolutely necessary to solve the user's query, don't try and circumvent approvals by using other tools.
## prefix_rule guidance
When choosing a `prefix_rule`, request one that will allow you to fulfill similar requests from the user in the future without re-requesting escalation. It should be categorical and reasonably scoped to similar capabilities. You MUST NOT pass the entire command into `prefix_rule`.
<good_example reason="frequently run command">
["npm", "run", "dev"]
</good_example>
<good_example reason="generic and reusable">
["gh", "pr", "checks"]
</good_example>
<good_example reason="helpful for development cycle">
["pytest"]
</good_example>
<bad_example reason="too specific">
["cargo", "test", "-p", "codex-app-server"]
<correction_to_good_example>
["cargo", "test"]
</correction_to_good_example>
</bad_example>