mirror of
https://github.com/openai/codex.git
synced 2026-02-01 22:47:52 +00:00
feat(core) RequestRule (#9489)
## Summary Instead of trying to derive the prefix_rule for a command mechanically, let's let the model decide for us. ## Testing - [x] tested locally
This commit is contained in:
2
codex-rs/Cargo.lock
generated
2
codex-rs/Cargo.lock
generated
@@ -1077,6 +1077,7 @@ dependencies = [
|
||||
"codex-chatgpt",
|
||||
"codex-common",
|
||||
"codex-core",
|
||||
"codex-execpolicy",
|
||||
"codex-feedback",
|
||||
"codex-file-search",
|
||||
"codex-login",
|
||||
@@ -1756,6 +1757,7 @@ name = "codex-protocol"
|
||||
version = "0.0.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"codex-execpolicy",
|
||||
"codex-git",
|
||||
"codex-utils-absolute-path",
|
||||
"codex-utils-image",
|
||||
|
||||
@@ -56,6 +56,7 @@ axum = { workspace = true, default-features = false, features = [
|
||||
"tokio",
|
||||
] }
|
||||
base64 = { workspace = true }
|
||||
codex-execpolicy = { workspace = true }
|
||||
core_test_support = { workspace = true }
|
||||
mcp-types = { workspace = true }
|
||||
os_info = { workspace = true }
|
||||
|
||||
@@ -11,6 +11,7 @@ use codex_app_server_protocol::NewConversationResponse;
|
||||
use codex_app_server_protocol::RequestId;
|
||||
use codex_app_server_protocol::SendUserMessageParams;
|
||||
use codex_app_server_protocol::SendUserMessageResponse;
|
||||
use codex_execpolicy::Policy;
|
||||
use codex_protocol::ThreadId;
|
||||
use codex_protocol::models::ContentItem;
|
||||
use codex_protocol::models::DeveloperInstructions;
|
||||
@@ -358,6 +359,8 @@ fn assert_permissions_message(item: &ResponseItem) {
|
||||
let expected = DeveloperInstructions::from_policy(
|
||||
&SandboxPolicy::DangerFullAccess,
|
||||
AskForApproval::Never,
|
||||
&Policy::empty(),
|
||||
false,
|
||||
&PathBuf::from("/tmp"),
|
||||
)
|
||||
.into_text();
|
||||
|
||||
@@ -189,6 +189,9 @@
|
||||
"remote_models": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"request_rule": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"responses_websockets": {
|
||||
"type": "boolean"
|
||||
},
|
||||
@@ -1184,6 +1187,9 @@
|
||||
"remote_models": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"request_rule": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"responses_websockets": {
|
||||
"type": "boolean"
|
||||
},
|
||||
|
||||
@@ -190,6 +190,7 @@ use codex_protocol::models::ContentItem;
|
||||
use codex_protocol::models::DeveloperInstructions;
|
||||
use codex_protocol::models::ResponseInputItem;
|
||||
use codex_protocol::models::ResponseItem;
|
||||
use codex_protocol::models::render_command_prefix_list;
|
||||
use codex_protocol::protocol::CodexErrorInfo;
|
||||
use codex_protocol::protocol::InitialHistory;
|
||||
use codex_protocol::user_input::UserInput;
|
||||
@@ -1226,6 +1227,8 @@ impl Session {
|
||||
DeveloperInstructions::from_policy(
|
||||
&next.sandbox_policy,
|
||||
next.approval_policy,
|
||||
self.services.exec_policy.current().as_ref(),
|
||||
self.features.enabled(Feature::RequestRule),
|
||||
&next.cwd,
|
||||
)
|
||||
.into(),
|
||||
@@ -1416,6 +1419,44 @@ impl Session {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn turn_context_for_sub_id(&self, sub_id: &str) -> Option<Arc<TurnContext>> {
|
||||
let active = self.active_turn.lock().await;
|
||||
active
|
||||
.as_ref()
|
||||
.and_then(|turn| turn.tasks.get(sub_id))
|
||||
.map(|task| Arc::clone(&task.turn_context))
|
||||
}
|
||||
|
||||
pub(crate) async fn record_execpolicy_amendment_message(
|
||||
&self,
|
||||
sub_id: &str,
|
||||
amendment: &ExecPolicyAmendment,
|
||||
) {
|
||||
let Some(prefixes) = render_command_prefix_list([amendment.command.as_slice()]) else {
|
||||
warn!("execpolicy amendment for {sub_id} had no command prefix");
|
||||
return;
|
||||
};
|
||||
let text = format!("Approved command prefix saved:\n{prefixes}");
|
||||
let message: ResponseItem = DeveloperInstructions::new(text.clone()).into();
|
||||
|
||||
if let Some(turn_context) = self.turn_context_for_sub_id(sub_id).await {
|
||||
self.record_conversation_items(&turn_context, std::slice::from_ref(&message))
|
||||
.await;
|
||||
return;
|
||||
}
|
||||
|
||||
if self
|
||||
.inject_response_items(vec![ResponseInputItem::Message {
|
||||
role: "developer".to_string(),
|
||||
content: vec![ContentItem::InputText { text }],
|
||||
}])
|
||||
.await
|
||||
.is_err()
|
||||
{
|
||||
warn!("no active turn found to record execpolicy amendment message for {sub_id}");
|
||||
}
|
||||
}
|
||||
|
||||
/// Emit an exec approval request event and await the user's decision.
|
||||
///
|
||||
/// The request is keyed by `sub_id`/`call_id` so matching responses are delivered
|
||||
@@ -1749,6 +1790,8 @@ impl Session {
|
||||
DeveloperInstructions::from_policy(
|
||||
&turn_context.sandbox_policy,
|
||||
turn_context.approval_policy,
|
||||
self.services.exec_policy.current().as_ref(),
|
||||
self.features.enabled(Feature::RequestRule),
|
||||
&turn_context.cwd,
|
||||
)
|
||||
.into(),
|
||||
@@ -2595,18 +2638,26 @@ mod handlers {
|
||||
if let ReviewDecision::ApprovedExecpolicyAmendment {
|
||||
proposed_execpolicy_amendment,
|
||||
} = &decision
|
||||
&& let Err(err) = sess
|
||||
{
|
||||
match sess
|
||||
.persist_execpolicy_amendment(proposed_execpolicy_amendment)
|
||||
.await
|
||||
{
|
||||
let message = format!("Failed to apply execpolicy amendment: {err}");
|
||||
tracing::warn!("{message}");
|
||||
let warning = EventMsg::Warning(WarningEvent { message });
|
||||
sess.send_event_raw(Event {
|
||||
id: id.clone(),
|
||||
msg: warning,
|
||||
})
|
||||
.await;
|
||||
{
|
||||
Ok(()) => {
|
||||
sess.record_execpolicy_amendment_message(&id, proposed_execpolicy_amendment)
|
||||
.await;
|
||||
}
|
||||
Err(err) => {
|
||||
let message = format!("Failed to apply execpolicy amendment: {err}");
|
||||
tracing::warn!("{message}");
|
||||
let warning = EventMsg::Warning(WarningEvent { message });
|
||||
sess.send_event_raw(Event {
|
||||
id: id.clone(),
|
||||
msg: warning,
|
||||
})
|
||||
.await;
|
||||
}
|
||||
}
|
||||
}
|
||||
match decision {
|
||||
ReviewDecision::Abort => {
|
||||
|
||||
@@ -87,6 +87,15 @@ pub(crate) struct ExecPolicyManager {
|
||||
policy: ArcSwap<Policy>,
|
||||
}
|
||||
|
||||
pub(crate) struct ExecApprovalRequest<'a> {
|
||||
pub(crate) features: &'a Features,
|
||||
pub(crate) command: &'a [String],
|
||||
pub(crate) approval_policy: AskForApproval,
|
||||
pub(crate) sandbox_policy: &'a SandboxPolicy,
|
||||
pub(crate) sandbox_permissions: SandboxPermissions,
|
||||
pub(crate) prefix_rule: Option<Vec<String>>,
|
||||
}
|
||||
|
||||
impl ExecPolicyManager {
|
||||
pub(crate) fn new(policy: Arc<Policy>) -> Self {
|
||||
Self {
|
||||
@@ -112,12 +121,16 @@ impl ExecPolicyManager {
|
||||
|
||||
pub(crate) async fn create_exec_approval_requirement_for_command(
|
||||
&self,
|
||||
features: &Features,
|
||||
command: &[String],
|
||||
approval_policy: AskForApproval,
|
||||
sandbox_policy: &SandboxPolicy,
|
||||
sandbox_permissions: SandboxPermissions,
|
||||
req: ExecApprovalRequest<'_>,
|
||||
) -> ExecApprovalRequirement {
|
||||
let ExecApprovalRequest {
|
||||
features,
|
||||
command,
|
||||
approval_policy,
|
||||
sandbox_policy,
|
||||
sandbox_permissions,
|
||||
prefix_rule,
|
||||
} = req;
|
||||
let exec_policy = self.current();
|
||||
let commands =
|
||||
parse_shell_lc_plain_commands(command).unwrap_or_else(|| vec![command.to_vec()]);
|
||||
@@ -131,6 +144,12 @@ impl ExecPolicyManager {
|
||||
};
|
||||
let evaluation = exec_policy.check_multiple(commands.iter(), &exec_policy_fallback);
|
||||
|
||||
let requested_amendment = derive_requested_execpolicy_amendment(
|
||||
features,
|
||||
prefix_rule.as_ref(),
|
||||
&evaluation.matched_rules,
|
||||
);
|
||||
|
||||
match evaluation.decision {
|
||||
Decision::Forbidden => ExecApprovalRequirement::Forbidden {
|
||||
reason: derive_forbidden_reason(command, &evaluation),
|
||||
@@ -144,9 +163,11 @@ impl ExecPolicyManager {
|
||||
ExecApprovalRequirement::NeedsApproval {
|
||||
reason: derive_prompt_reason(command, &evaluation),
|
||||
proposed_execpolicy_amendment: if features.enabled(Feature::ExecPolicy) {
|
||||
try_derive_execpolicy_amendment_for_prompt_rules(
|
||||
&evaluation.matched_rules,
|
||||
)
|
||||
requested_amendment.or_else(|| {
|
||||
try_derive_execpolicy_amendment_for_prompt_rules(
|
||||
&evaluation.matched_rules,
|
||||
)
|
||||
})
|
||||
} else {
|
||||
None
|
||||
},
|
||||
@@ -382,6 +403,30 @@ fn try_derive_execpolicy_amendment_for_allow_rules(
|
||||
})
|
||||
}
|
||||
|
||||
fn derive_requested_execpolicy_amendment(
|
||||
features: &Features,
|
||||
prefix_rule: Option<&Vec<String>>,
|
||||
matched_rules: &[RuleMatch],
|
||||
) -> Option<ExecPolicyAmendment> {
|
||||
if !features.enabled(Feature::ExecPolicy) {
|
||||
return None;
|
||||
}
|
||||
|
||||
let prefix_rule = prefix_rule?;
|
||||
if prefix_rule.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
if matched_rules
|
||||
.iter()
|
||||
.any(|rule_match| is_policy_match(rule_match) && rule_match.decision() == Decision::Prompt)
|
||||
{
|
||||
return None;
|
||||
}
|
||||
|
||||
Some(ExecPolicyAmendment::new(prefix_rule.clone()))
|
||||
}
|
||||
|
||||
/// Only return a reason when a policy rule drove the prompt decision.
|
||||
fn derive_prompt_reason(command_args: &[String], evaluation: &Evaluation) -> Option<String> {
|
||||
let command = render_shlex_command(command_args);
|
||||
@@ -756,13 +801,14 @@ prefix_rule(pattern=["rm"], decision="forbidden")
|
||||
|
||||
let manager = ExecPolicyManager::new(policy);
|
||||
let requirement = manager
|
||||
.create_exec_approval_requirement_for_command(
|
||||
&Features::with_defaults(),
|
||||
&forbidden_script,
|
||||
AskForApproval::OnRequest,
|
||||
&SandboxPolicy::DangerFullAccess,
|
||||
SandboxPermissions::UseDefault,
|
||||
)
|
||||
.create_exec_approval_requirement_for_command(ExecApprovalRequest {
|
||||
features: &Features::with_defaults(),
|
||||
command: &forbidden_script,
|
||||
approval_policy: AskForApproval::OnRequest,
|
||||
sandbox_policy: &SandboxPolicy::DangerFullAccess,
|
||||
sandbox_permissions: SandboxPermissions::UseDefault,
|
||||
prefix_rule: None,
|
||||
})
|
||||
.await;
|
||||
|
||||
assert_eq!(
|
||||
@@ -790,17 +836,18 @@ prefix_rule(
|
||||
|
||||
let manager = ExecPolicyManager::new(policy);
|
||||
let requirement = manager
|
||||
.create_exec_approval_requirement_for_command(
|
||||
&Features::with_defaults(),
|
||||
&[
|
||||
.create_exec_approval_requirement_for_command(ExecApprovalRequest {
|
||||
features: &Features::with_defaults(),
|
||||
command: &[
|
||||
"rm".to_string(),
|
||||
"-rf".to_string(),
|
||||
"/some/important/folder".to_string(),
|
||||
],
|
||||
AskForApproval::OnRequest,
|
||||
&SandboxPolicy::DangerFullAccess,
|
||||
SandboxPermissions::UseDefault,
|
||||
)
|
||||
approval_policy: AskForApproval::OnRequest,
|
||||
sandbox_policy: &SandboxPolicy::DangerFullAccess,
|
||||
sandbox_permissions: SandboxPermissions::UseDefault,
|
||||
prefix_rule: None,
|
||||
})
|
||||
.await;
|
||||
|
||||
assert_eq!(
|
||||
@@ -823,13 +870,14 @@ prefix_rule(
|
||||
|
||||
let manager = ExecPolicyManager::new(policy);
|
||||
let requirement = manager
|
||||
.create_exec_approval_requirement_for_command(
|
||||
&Features::with_defaults(),
|
||||
&command,
|
||||
AskForApproval::OnRequest,
|
||||
&SandboxPolicy::DangerFullAccess,
|
||||
SandboxPermissions::UseDefault,
|
||||
)
|
||||
.create_exec_approval_requirement_for_command(ExecApprovalRequest {
|
||||
features: &Features::with_defaults(),
|
||||
command: &command,
|
||||
approval_policy: AskForApproval::OnRequest,
|
||||
sandbox_policy: &SandboxPolicy::DangerFullAccess,
|
||||
sandbox_permissions: SandboxPermissions::UseDefault,
|
||||
prefix_rule: None,
|
||||
})
|
||||
.await;
|
||||
|
||||
assert_eq!(
|
||||
@@ -853,13 +901,14 @@ prefix_rule(
|
||||
|
||||
let manager = ExecPolicyManager::new(policy);
|
||||
let requirement = manager
|
||||
.create_exec_approval_requirement_for_command(
|
||||
&Features::with_defaults(),
|
||||
&command,
|
||||
AskForApproval::Never,
|
||||
&SandboxPolicy::DangerFullAccess,
|
||||
SandboxPermissions::UseDefault,
|
||||
)
|
||||
.create_exec_approval_requirement_for_command(ExecApprovalRequest {
|
||||
features: &Features::with_defaults(),
|
||||
command: &command,
|
||||
approval_policy: AskForApproval::Never,
|
||||
sandbox_policy: &SandboxPolicy::DangerFullAccess,
|
||||
sandbox_permissions: SandboxPermissions::UseDefault,
|
||||
prefix_rule: None,
|
||||
})
|
||||
.await;
|
||||
|
||||
assert_eq!(
|
||||
@@ -876,13 +925,14 @@ prefix_rule(
|
||||
|
||||
let manager = ExecPolicyManager::default();
|
||||
let requirement = manager
|
||||
.create_exec_approval_requirement_for_command(
|
||||
&Features::with_defaults(),
|
||||
&command,
|
||||
AskForApproval::UnlessTrusted,
|
||||
&SandboxPolicy::ReadOnly,
|
||||
SandboxPermissions::UseDefault,
|
||||
)
|
||||
.create_exec_approval_requirement_for_command(ExecApprovalRequest {
|
||||
features: &Features::with_defaults(),
|
||||
command: &command,
|
||||
approval_policy: AskForApproval::UnlessTrusted,
|
||||
sandbox_policy: &SandboxPolicy::ReadOnly,
|
||||
sandbox_permissions: SandboxPermissions::UseDefault,
|
||||
prefix_rule: None,
|
||||
})
|
||||
.await;
|
||||
|
||||
assert_eq!(
|
||||
@@ -894,6 +944,40 @@ prefix_rule(
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn request_rule_uses_prefix_rule() {
|
||||
let command = vec![
|
||||
"cargo".to_string(),
|
||||
"install".to_string(),
|
||||
"cargo-insta".to_string(),
|
||||
];
|
||||
let manager = ExecPolicyManager::default();
|
||||
let mut features = Features::with_defaults();
|
||||
features.enable(Feature::RequestRule);
|
||||
|
||||
let requirement = manager
|
||||
.create_exec_approval_requirement_for_command(ExecApprovalRequest {
|
||||
features: &features,
|
||||
command: &command,
|
||||
approval_policy: AskForApproval::OnRequest,
|
||||
sandbox_policy: &SandboxPolicy::ReadOnly,
|
||||
sandbox_permissions: SandboxPermissions::RequireEscalated,
|
||||
prefix_rule: Some(vec!["cargo".to_string(), "install".to_string()]),
|
||||
})
|
||||
.await;
|
||||
|
||||
assert_eq!(
|
||||
requirement,
|
||||
ExecApprovalRequirement::NeedsApproval {
|
||||
reason: None,
|
||||
proposed_execpolicy_amendment: Some(ExecPolicyAmendment::new(vec![
|
||||
"cargo".to_string(),
|
||||
"install".to_string(),
|
||||
])),
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn heuristics_apply_when_other_commands_match_policy() {
|
||||
let policy_src = r#"prefix_rule(pattern=["apple"], decision="allow")"#;
|
||||
@@ -910,13 +994,14 @@ prefix_rule(
|
||||
|
||||
assert_eq!(
|
||||
ExecPolicyManager::new(policy)
|
||||
.create_exec_approval_requirement_for_command(
|
||||
&Features::with_defaults(),
|
||||
&command,
|
||||
AskForApproval::UnlessTrusted,
|
||||
&SandboxPolicy::DangerFullAccess,
|
||||
SandboxPermissions::UseDefault,
|
||||
)
|
||||
.create_exec_approval_requirement_for_command(ExecApprovalRequest {
|
||||
features: &Features::with_defaults(),
|
||||
command: &command,
|
||||
approval_policy: AskForApproval::UnlessTrusted,
|
||||
sandbox_policy: &SandboxPolicy::DangerFullAccess,
|
||||
sandbox_permissions: SandboxPermissions::UseDefault,
|
||||
prefix_rule: None,
|
||||
})
|
||||
.await,
|
||||
ExecApprovalRequirement::NeedsApproval {
|
||||
reason: None,
|
||||
@@ -984,13 +1069,14 @@ prefix_rule(
|
||||
|
||||
let manager = ExecPolicyManager::default();
|
||||
let requirement = manager
|
||||
.create_exec_approval_requirement_for_command(
|
||||
&Features::with_defaults(),
|
||||
&command,
|
||||
AskForApproval::UnlessTrusted,
|
||||
&SandboxPolicy::ReadOnly,
|
||||
SandboxPermissions::UseDefault,
|
||||
)
|
||||
.create_exec_approval_requirement_for_command(ExecApprovalRequest {
|
||||
features: &Features::with_defaults(),
|
||||
command: &command,
|
||||
approval_policy: AskForApproval::UnlessTrusted,
|
||||
sandbox_policy: &SandboxPolicy::ReadOnly,
|
||||
sandbox_permissions: SandboxPermissions::UseDefault,
|
||||
prefix_rule: None,
|
||||
})
|
||||
.await;
|
||||
|
||||
assert_eq!(
|
||||
@@ -1011,13 +1097,14 @@ prefix_rule(
|
||||
|
||||
let manager = ExecPolicyManager::default();
|
||||
let requirement = manager
|
||||
.create_exec_approval_requirement_for_command(
|
||||
&features,
|
||||
&command,
|
||||
AskForApproval::UnlessTrusted,
|
||||
&SandboxPolicy::ReadOnly,
|
||||
SandboxPermissions::UseDefault,
|
||||
)
|
||||
.create_exec_approval_requirement_for_command(ExecApprovalRequest {
|
||||
features: &features,
|
||||
command: &command,
|
||||
approval_policy: AskForApproval::UnlessTrusted,
|
||||
sandbox_policy: &SandboxPolicy::ReadOnly,
|
||||
sandbox_permissions: SandboxPermissions::UseDefault,
|
||||
prefix_rule: None,
|
||||
})
|
||||
.await;
|
||||
|
||||
assert_eq!(
|
||||
@@ -1041,13 +1128,14 @@ prefix_rule(
|
||||
|
||||
let manager = ExecPolicyManager::new(policy);
|
||||
let requirement = manager
|
||||
.create_exec_approval_requirement_for_command(
|
||||
&Features::with_defaults(),
|
||||
&command,
|
||||
AskForApproval::OnRequest,
|
||||
&SandboxPolicy::DangerFullAccess,
|
||||
SandboxPermissions::UseDefault,
|
||||
)
|
||||
.create_exec_approval_requirement_for_command(ExecApprovalRequest {
|
||||
features: &Features::with_defaults(),
|
||||
command: &command,
|
||||
approval_policy: AskForApproval::OnRequest,
|
||||
sandbox_policy: &SandboxPolicy::DangerFullAccess,
|
||||
sandbox_permissions: SandboxPermissions::UseDefault,
|
||||
prefix_rule: None,
|
||||
})
|
||||
.await;
|
||||
|
||||
assert_eq!(
|
||||
@@ -1068,13 +1156,14 @@ prefix_rule(
|
||||
];
|
||||
let manager = ExecPolicyManager::default();
|
||||
let requirement = manager
|
||||
.create_exec_approval_requirement_for_command(
|
||||
&Features::with_defaults(),
|
||||
&command,
|
||||
AskForApproval::UnlessTrusted,
|
||||
&SandboxPolicy::ReadOnly,
|
||||
SandboxPermissions::UseDefault,
|
||||
)
|
||||
.create_exec_approval_requirement_for_command(ExecApprovalRequest {
|
||||
features: &Features::with_defaults(),
|
||||
command: &command,
|
||||
approval_policy: AskForApproval::UnlessTrusted,
|
||||
sandbox_policy: &SandboxPolicy::ReadOnly,
|
||||
sandbox_permissions: SandboxPermissions::UseDefault,
|
||||
prefix_rule: None,
|
||||
})
|
||||
.await;
|
||||
|
||||
assert_eq!(
|
||||
@@ -1106,13 +1195,14 @@ prefix_rule(
|
||||
|
||||
assert_eq!(
|
||||
ExecPolicyManager::new(policy)
|
||||
.create_exec_approval_requirement_for_command(
|
||||
&Features::with_defaults(),
|
||||
&command,
|
||||
AskForApproval::UnlessTrusted,
|
||||
&SandboxPolicy::ReadOnly,
|
||||
SandboxPermissions::UseDefault,
|
||||
)
|
||||
.create_exec_approval_requirement_for_command(ExecApprovalRequest {
|
||||
features: &Features::with_defaults(),
|
||||
command: &command,
|
||||
approval_policy: AskForApproval::UnlessTrusted,
|
||||
sandbox_policy: &SandboxPolicy::ReadOnly,
|
||||
sandbox_permissions: SandboxPermissions::UseDefault,
|
||||
prefix_rule: None,
|
||||
})
|
||||
.await,
|
||||
ExecApprovalRequirement::NeedsApproval {
|
||||
reason: None,
|
||||
@@ -1129,13 +1219,14 @@ prefix_rule(
|
||||
|
||||
let manager = ExecPolicyManager::default();
|
||||
let requirement = manager
|
||||
.create_exec_approval_requirement_for_command(
|
||||
&Features::with_defaults(),
|
||||
&command,
|
||||
AskForApproval::OnRequest,
|
||||
&SandboxPolicy::ReadOnly,
|
||||
SandboxPermissions::UseDefault,
|
||||
)
|
||||
.create_exec_approval_requirement_for_command(ExecApprovalRequest {
|
||||
features: &Features::with_defaults(),
|
||||
command: &command,
|
||||
approval_policy: AskForApproval::OnRequest,
|
||||
sandbox_policy: &SandboxPolicy::ReadOnly,
|
||||
sandbox_permissions: SandboxPermissions::UseDefault,
|
||||
prefix_rule: None,
|
||||
})
|
||||
.await;
|
||||
|
||||
assert_eq!(
|
||||
@@ -1159,13 +1250,14 @@ prefix_rule(
|
||||
|
||||
let manager = ExecPolicyManager::new(policy);
|
||||
let requirement = manager
|
||||
.create_exec_approval_requirement_for_command(
|
||||
&Features::with_defaults(),
|
||||
&command,
|
||||
AskForApproval::OnRequest,
|
||||
&SandboxPolicy::ReadOnly,
|
||||
SandboxPermissions::UseDefault,
|
||||
)
|
||||
.create_exec_approval_requirement_for_command(ExecApprovalRequest {
|
||||
features: &Features::with_defaults(),
|
||||
command: &command,
|
||||
approval_policy: AskForApproval::OnRequest,
|
||||
sandbox_policy: &SandboxPolicy::ReadOnly,
|
||||
sandbox_permissions: SandboxPermissions::UseDefault,
|
||||
prefix_rule: None,
|
||||
})
|
||||
.await;
|
||||
|
||||
assert_eq!(
|
||||
@@ -1226,13 +1318,14 @@ prefix_rule(
|
||||
assert_eq!(
|
||||
expected_req,
|
||||
policy
|
||||
.create_exec_approval_requirement_for_command(
|
||||
&features,
|
||||
&sneaky_command,
|
||||
AskForApproval::OnRequest,
|
||||
&SandboxPolicy::ReadOnly,
|
||||
permissions,
|
||||
)
|
||||
.create_exec_approval_requirement_for_command(ExecApprovalRequest {
|
||||
features: &features,
|
||||
command: &sneaky_command,
|
||||
approval_policy: AskForApproval::OnRequest,
|
||||
sandbox_policy: &SandboxPolicy::ReadOnly,
|
||||
sandbox_permissions: permissions,
|
||||
prefix_rule: None,
|
||||
})
|
||||
.await,
|
||||
"{pwsh_approval_reason}"
|
||||
);
|
||||
@@ -1249,13 +1342,14 @@ prefix_rule(
|
||||
]))),
|
||||
},
|
||||
policy
|
||||
.create_exec_approval_requirement_for_command(
|
||||
&features,
|
||||
&dangerous_command,
|
||||
AskForApproval::OnRequest,
|
||||
&SandboxPolicy::ReadOnly,
|
||||
permissions,
|
||||
)
|
||||
.create_exec_approval_requirement_for_command(ExecApprovalRequest {
|
||||
features: &features,
|
||||
command: &dangerous_command,
|
||||
approval_policy: AskForApproval::OnRequest,
|
||||
sandbox_policy: &SandboxPolicy::ReadOnly,
|
||||
sandbox_permissions: permissions,
|
||||
prefix_rule: None,
|
||||
})
|
||||
.await,
|
||||
r#"On all platforms, a forbidden command should require approval
|
||||
(unless AskForApproval::Never is specified)."#
|
||||
@@ -1268,13 +1362,14 @@ prefix_rule(
|
||||
reason: "`rm -rf /important/data` rejected: blocked by policy".to_string(),
|
||||
},
|
||||
policy
|
||||
.create_exec_approval_requirement_for_command(
|
||||
&features,
|
||||
&dangerous_command,
|
||||
AskForApproval::Never,
|
||||
&SandboxPolicy::ReadOnly,
|
||||
permissions,
|
||||
)
|
||||
.create_exec_approval_requirement_for_command(ExecApprovalRequest {
|
||||
features: &features,
|
||||
command: &dangerous_command,
|
||||
approval_policy: AskForApproval::Never,
|
||||
sandbox_policy: &SandboxPolicy::ReadOnly,
|
||||
sandbox_permissions: permissions,
|
||||
prefix_rule: None,
|
||||
})
|
||||
.await,
|
||||
r#"On all platforms, a forbidden command should require approval
|
||||
(unless AskForApproval::Never is specified)."#
|
||||
|
||||
@@ -89,6 +89,8 @@ pub enum Feature {
|
||||
WebSearchCached,
|
||||
/// Gate the execpolicy enforcement for shell/unified exec.
|
||||
ExecPolicy,
|
||||
/// Allow the model to request approval and propose exec rules.
|
||||
RequestRule,
|
||||
/// Enable Windows sandbox (restricted token) on Windows.
|
||||
WindowsSandbox,
|
||||
/// Use the elevated Windows sandbox pipeline (setup + runner).
|
||||
@@ -393,6 +395,12 @@ pub const FEATURES: &[FeatureSpec] = &[
|
||||
stage: Stage::UnderDevelopment,
|
||||
default_enabled: true,
|
||||
},
|
||||
FeatureSpec {
|
||||
id: Feature::RequestRule,
|
||||
key: "request_rule",
|
||||
stage: Stage::UnderDevelopment,
|
||||
default_enabled: false,
|
||||
},
|
||||
FeatureSpec {
|
||||
id: Feature::WindowsSandbox,
|
||||
key: "experimental_windows_sandbox",
|
||||
|
||||
@@ -6,6 +6,7 @@ use std::sync::Arc;
|
||||
use crate::codex::TurnContext;
|
||||
use crate::exec::ExecParams;
|
||||
use crate::exec_env::create_env;
|
||||
use crate::exec_policy::ExecApprovalRequest;
|
||||
use crate::function_tool::FunctionCallError;
|
||||
use crate::is_safe_command::is_known_safe_command;
|
||||
use crate::protocol::ExecCommandSource;
|
||||
@@ -28,16 +29,27 @@ pub struct ShellHandler;
|
||||
|
||||
pub struct ShellCommandHandler;
|
||||
|
||||
struct RunExecLikeArgs {
|
||||
tool_name: String,
|
||||
exec_params: ExecParams,
|
||||
prefix_rule: Option<Vec<String>>,
|
||||
session: Arc<crate::codex::Session>,
|
||||
turn: Arc<TurnContext>,
|
||||
tracker: crate::tools::context::SharedTurnDiffTracker,
|
||||
call_id: String,
|
||||
freeform: bool,
|
||||
}
|
||||
|
||||
impl ShellHandler {
|
||||
fn to_exec_params(params: ShellToolCallParams, turn_context: &TurnContext) -> ExecParams {
|
||||
fn to_exec_params(params: &ShellToolCallParams, turn_context: &TurnContext) -> ExecParams {
|
||||
ExecParams {
|
||||
command: params.command,
|
||||
command: params.command.clone(),
|
||||
cwd: turn_context.resolve_path(params.workdir.clone()),
|
||||
expiration: params.timeout_ms.into(),
|
||||
env: create_env(&turn_context.shell_environment_policy),
|
||||
sandbox_permissions: params.sandbox_permissions.unwrap_or_default(),
|
||||
windows_sandbox_level: turn_context.windows_sandbox_level,
|
||||
justification: params.justification,
|
||||
justification: params.justification.clone(),
|
||||
arg0: None,
|
||||
}
|
||||
}
|
||||
@@ -50,7 +62,7 @@ impl ShellCommandHandler {
|
||||
}
|
||||
|
||||
fn to_exec_params(
|
||||
params: ShellCommandToolCallParams,
|
||||
params: &ShellCommandToolCallParams,
|
||||
session: &crate::codex::Session,
|
||||
turn_context: &TurnContext,
|
||||
) -> ExecParams {
|
||||
@@ -64,7 +76,7 @@ impl ShellCommandHandler {
|
||||
env: create_env(&turn_context.shell_environment_policy),
|
||||
sandbox_permissions: params.sandbox_permissions.unwrap_or_default(),
|
||||
windows_sandbox_level: turn_context.windows_sandbox_level,
|
||||
justification: params.justification,
|
||||
justification: params.justification.clone(),
|
||||
arg0: None,
|
||||
}
|
||||
}
|
||||
@@ -108,29 +120,32 @@ impl ToolHandler for ShellHandler {
|
||||
match payload {
|
||||
ToolPayload::Function { arguments } => {
|
||||
let params: ShellToolCallParams = parse_arguments(&arguments)?;
|
||||
let exec_params = Self::to_exec_params(params, turn.as_ref());
|
||||
Self::run_exec_like(
|
||||
tool_name.as_str(),
|
||||
let prefix_rule = params.prefix_rule.clone();
|
||||
let exec_params = Self::to_exec_params(¶ms, turn.as_ref());
|
||||
Self::run_exec_like(RunExecLikeArgs {
|
||||
tool_name: tool_name.clone(),
|
||||
exec_params,
|
||||
prefix_rule,
|
||||
session,
|
||||
turn,
|
||||
tracker,
|
||||
call_id,
|
||||
false,
|
||||
)
|
||||
freeform: false,
|
||||
})
|
||||
.await
|
||||
}
|
||||
ToolPayload::LocalShell { params } => {
|
||||
let exec_params = Self::to_exec_params(params, turn.as_ref());
|
||||
Self::run_exec_like(
|
||||
tool_name.as_str(),
|
||||
let exec_params = Self::to_exec_params(¶ms, turn.as_ref());
|
||||
Self::run_exec_like(RunExecLikeArgs {
|
||||
tool_name: tool_name.clone(),
|
||||
exec_params,
|
||||
prefix_rule: None,
|
||||
session,
|
||||
turn,
|
||||
tracker,
|
||||
call_id,
|
||||
false,
|
||||
)
|
||||
freeform: false,
|
||||
})
|
||||
.await
|
||||
}
|
||||
_ => Err(FunctionCallError::RespondToModel(format!(
|
||||
@@ -181,30 +196,43 @@ impl ToolHandler for ShellCommandHandler {
|
||||
};
|
||||
|
||||
let params: ShellCommandToolCallParams = parse_arguments(&arguments)?;
|
||||
let exec_params = Self::to_exec_params(params, session.as_ref(), turn.as_ref());
|
||||
ShellHandler::run_exec_like(
|
||||
tool_name.as_str(),
|
||||
let prefix_rule = params.prefix_rule.clone();
|
||||
let exec_params = Self::to_exec_params(¶ms, session.as_ref(), turn.as_ref());
|
||||
ShellHandler::run_exec_like(RunExecLikeArgs {
|
||||
tool_name,
|
||||
exec_params,
|
||||
prefix_rule,
|
||||
session,
|
||||
turn,
|
||||
tracker,
|
||||
call_id,
|
||||
true,
|
||||
)
|
||||
freeform: true,
|
||||
})
|
||||
.await
|
||||
}
|
||||
}
|
||||
|
||||
impl ShellHandler {
|
||||
async fn run_exec_like(
|
||||
tool_name: &str,
|
||||
exec_params: ExecParams,
|
||||
session: Arc<crate::codex::Session>,
|
||||
turn: Arc<TurnContext>,
|
||||
tracker: crate::tools::context::SharedTurnDiffTracker,
|
||||
call_id: String,
|
||||
freeform: bool,
|
||||
) -> Result<ToolOutput, FunctionCallError> {
|
||||
async fn run_exec_like(args: RunExecLikeArgs) -> Result<ToolOutput, FunctionCallError> {
|
||||
let RunExecLikeArgs {
|
||||
tool_name,
|
||||
exec_params,
|
||||
prefix_rule,
|
||||
session,
|
||||
turn,
|
||||
tracker,
|
||||
call_id,
|
||||
freeform,
|
||||
} = args;
|
||||
|
||||
let features = session.features();
|
||||
let request_rule_enabled = features.enabled(crate::features::Feature::RequestRule);
|
||||
let prefix_rule = if request_rule_enabled {
|
||||
prefix_rule
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
// Approval policy guard for explicit escalation in non-OnRequest modes.
|
||||
if exec_params
|
||||
.sandbox_permissions
|
||||
@@ -214,9 +242,9 @@ impl ShellHandler {
|
||||
codex_protocol::protocol::AskForApproval::OnRequest
|
||||
)
|
||||
{
|
||||
let approval_policy = turn.approval_policy;
|
||||
return Err(FunctionCallError::RespondToModel(format!(
|
||||
"approval policy is {policy:?}; reject command — you should not ask for escalated permissions if the approval policy is {policy:?}",
|
||||
policy = turn.approval_policy
|
||||
"approval policy is {approval_policy:?}; reject command — you should not ask for escalated permissions if the approval policy is {approval_policy:?}"
|
||||
)));
|
||||
}
|
||||
|
||||
@@ -229,7 +257,7 @@ impl ShellHandler {
|
||||
turn.as_ref(),
|
||||
Some(&tracker),
|
||||
&call_id,
|
||||
tool_name,
|
||||
tool_name.as_str(),
|
||||
)
|
||||
.await?
|
||||
{
|
||||
@@ -246,17 +274,17 @@ impl ShellHandler {
|
||||
let event_ctx = ToolEventCtx::new(session.as_ref(), turn.as_ref(), &call_id, None);
|
||||
emitter.begin(event_ctx).await;
|
||||
|
||||
let features = session.features();
|
||||
let exec_approval_requirement = session
|
||||
.services
|
||||
.exec_policy
|
||||
.create_exec_approval_requirement_for_command(
|
||||
&features,
|
||||
&exec_params.command,
|
||||
turn.approval_policy,
|
||||
&turn.sandbox_policy,
|
||||
exec_params.sandbox_permissions,
|
||||
)
|
||||
.create_exec_approval_requirement_for_command(ExecApprovalRequest {
|
||||
features: &features,
|
||||
command: &exec_params.command,
|
||||
approval_policy: turn.approval_policy,
|
||||
sandbox_policy: &turn.sandbox_policy,
|
||||
sandbox_permissions: exec_params.sandbox_permissions,
|
||||
prefix_rule,
|
||||
})
|
||||
.await;
|
||||
|
||||
let req = ShellRequest {
|
||||
@@ -274,7 +302,7 @@ impl ShellHandler {
|
||||
session: session.as_ref(),
|
||||
turn: turn.as_ref(),
|
||||
call_id: call_id.clone(),
|
||||
tool_name: tool_name.to_string(),
|
||||
tool_name,
|
||||
};
|
||||
let out = orchestrator
|
||||
.run(&mut runtime, &req, &tool_ctx, &turn, turn.approval_policy)
|
||||
@@ -377,10 +405,11 @@ mod tests {
|
||||
login,
|
||||
timeout_ms,
|
||||
sandbox_permissions: Some(sandbox_permissions),
|
||||
prefix_rule: None,
|
||||
justification: justification.clone(),
|
||||
};
|
||||
|
||||
let exec_params = ShellCommandHandler::to_exec_params(params, &session, &turn_context);
|
||||
let exec_params = ShellCommandHandler::to_exec_params(¶ms, &session, &turn_context);
|
||||
|
||||
// ExecParams cannot derive Eq due to the CancellationToken field, so we manually compare the fields.
|
||||
assert_eq!(exec_params.command, expected_command);
|
||||
|
||||
@@ -43,6 +43,8 @@ struct ExecCommandArgs {
|
||||
sandbox_permissions: SandboxPermissions,
|
||||
#[serde(default)]
|
||||
justification: Option<String>,
|
||||
#[serde(default)]
|
||||
prefix_rule: Option<Vec<String>>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
@@ -135,19 +137,28 @@ impl ToolHandler for UnifiedExecHandler {
|
||||
max_output_tokens,
|
||||
sandbox_permissions,
|
||||
justification,
|
||||
prefix_rule,
|
||||
..
|
||||
} = args;
|
||||
|
||||
let features = session.features();
|
||||
let request_rule_enabled = features.enabled(crate::features::Feature::RequestRule);
|
||||
let prefix_rule = if request_rule_enabled {
|
||||
prefix_rule
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
if sandbox_permissions.requires_escalated_permissions()
|
||||
&& !matches!(
|
||||
context.turn.approval_policy,
|
||||
codex_protocol::protocol::AskForApproval::OnRequest
|
||||
)
|
||||
{
|
||||
let approval_policy = context.turn.approval_policy;
|
||||
manager.release_process_id(&process_id).await;
|
||||
return Err(FunctionCallError::RespondToModel(format!(
|
||||
"approval policy is {policy:?}; reject command — you cannot ask for escalated permissions if the approval policy is {policy:?}",
|
||||
policy = context.turn.approval_policy
|
||||
"approval policy is {approval_policy:?}; reject command — you cannot ask for escalated permissions if the approval policy is {approval_policy:?}"
|
||||
)));
|
||||
}
|
||||
|
||||
@@ -183,6 +194,7 @@ impl ToolHandler for UnifiedExecHandler {
|
||||
tty,
|
||||
sandbox_permissions,
|
||||
justification,
|
||||
prefix_rule,
|
||||
},
|
||||
&context,
|
||||
)
|
||||
|
||||
@@ -114,6 +114,7 @@ impl ToolRouter {
|
||||
workdir: exec.working_directory,
|
||||
timeout_ms: exec.timeout_ms,
|
||||
sandbox_permissions: Some(SandboxPermissions::UseDefault),
|
||||
prefix_rule: None,
|
||||
justification: None,
|
||||
};
|
||||
Ok(Some(ToolCall {
|
||||
|
||||
@@ -30,6 +30,7 @@ pub(crate) struct ToolsConfig {
|
||||
pub web_search_mode: Option<WebSearchMode>,
|
||||
pub collab_tools: bool,
|
||||
pub collaboration_modes_tools: bool,
|
||||
pub request_rule_enabled: bool,
|
||||
pub experimental_supported_tools: Vec<String>,
|
||||
}
|
||||
|
||||
@@ -49,6 +50,7 @@ impl ToolsConfig {
|
||||
let include_apply_patch_tool = features.enabled(Feature::ApplyPatchFreeform);
|
||||
let include_collab_tools = features.enabled(Feature::Collab);
|
||||
let include_collaboration_modes_tools = features.enabled(Feature::CollaborationModes);
|
||||
let request_rule_enabled = features.enabled(Feature::RequestRule);
|
||||
|
||||
let shell_type = if !features.enabled(Feature::ShellTool) {
|
||||
ConfigShellToolType::Disabled
|
||||
@@ -81,6 +83,7 @@ impl ToolsConfig {
|
||||
web_search_mode: *web_search_mode,
|
||||
collab_tools: include_collab_tools,
|
||||
collaboration_modes_tools: include_collaboration_modes_tools,
|
||||
request_rule_enabled,
|
||||
experimental_supported_tools: model_info.experimental_supported_tools.clone(),
|
||||
}
|
||||
}
|
||||
@@ -142,8 +145,50 @@ impl From<JsonSchema> for AdditionalProperties {
|
||||
}
|
||||
}
|
||||
|
||||
fn create_exec_command_tool() -> ToolSpec {
|
||||
let properties = BTreeMap::from([
|
||||
fn create_approval_parameters(include_prefix_rule: bool) -> BTreeMap<String, JsonSchema> {
|
||||
let mut properties = BTreeMap::from([
|
||||
(
|
||||
"sandbox_permissions".to_string(),
|
||||
JsonSchema::String {
|
||||
description: Some(
|
||||
"Sandbox permissions for the command. Set to \"require_escalated\" to request running without sandbox restrictions; defaults to \"use_default\"."
|
||||
.to_string(),
|
||||
),
|
||||
},
|
||||
),
|
||||
(
|
||||
"justification".to_string(),
|
||||
JsonSchema::String {
|
||||
description: Some(
|
||||
r#"Only set if sandbox_permissions is \"require_escalated\".
|
||||
Request approval from the user to run this command outside the sandbox.
|
||||
Phrased as a simple question that summarizes the purpose of the
|
||||
command as it relates to the task at hand - e.g. 'Do you want to
|
||||
fetch and pull the latest version of this git branch?'"#
|
||||
.to_string(),
|
||||
),
|
||||
},
|
||||
),
|
||||
]);
|
||||
|
||||
if include_prefix_rule {
|
||||
properties.insert(
|
||||
"prefix_rule".to_string(),
|
||||
JsonSchema::Array {
|
||||
items: Box::new(JsonSchema::String { description: None }),
|
||||
description: Some(
|
||||
r#"Only specify when sandbox_permissions is `require_escalated`.
|
||||
Suggest a prefix command pattern that will allow you to fulfill similar requests from the user in the future.
|
||||
Should be a short but reasonable prefix, e.g. [\"git\", \"pull\"] or [\"uv\", \"run\"] or [\"pytest\"]."#.to_string(),
|
||||
),
|
||||
});
|
||||
}
|
||||
|
||||
properties
|
||||
}
|
||||
|
||||
fn create_exec_command_tool(include_prefix_rule: bool) -> ToolSpec {
|
||||
let mut properties = BTreeMap::from([
|
||||
(
|
||||
"cmd".to_string(),
|
||||
JsonSchema::String {
|
||||
@@ -199,25 +244,8 @@ fn create_exec_command_tool() -> ToolSpec {
|
||||
),
|
||||
},
|
||||
),
|
||||
(
|
||||
"sandbox_permissions".to_string(),
|
||||
JsonSchema::String {
|
||||
description: Some(
|
||||
"Sandbox permissions for the command. Set to \"require_escalated\" to request running without sandbox restrictions; defaults to \"use_default\"."
|
||||
.to_string(),
|
||||
),
|
||||
},
|
||||
),
|
||||
(
|
||||
"justification".to_string(),
|
||||
JsonSchema::String {
|
||||
description: Some(
|
||||
"Only set if sandbox_permissions is \"require_escalated\". 1-sentence explanation of why we want to run this command."
|
||||
.to_string(),
|
||||
),
|
||||
},
|
||||
),
|
||||
]);
|
||||
properties.extend(create_approval_parameters(include_prefix_rule));
|
||||
|
||||
ToolSpec::Function(ResponsesApiTool {
|
||||
name: "exec_command".to_string(),
|
||||
@@ -280,8 +308,8 @@ fn create_write_stdin_tool() -> ToolSpec {
|
||||
})
|
||||
}
|
||||
|
||||
fn create_shell_tool() -> ToolSpec {
|
||||
let properties = BTreeMap::from([
|
||||
fn create_shell_tool(include_prefix_rule: bool) -> ToolSpec {
|
||||
let mut properties = BTreeMap::from([
|
||||
(
|
||||
"command".to_string(),
|
||||
JsonSchema::Array {
|
||||
@@ -301,19 +329,8 @@ fn create_shell_tool() -> ToolSpec {
|
||||
description: Some("The timeout for the command in milliseconds".to_string()),
|
||||
},
|
||||
),
|
||||
(
|
||||
"sandbox_permissions".to_string(),
|
||||
JsonSchema::String {
|
||||
description: Some("Sandbox permissions for the command. Set to \"require_escalated\" to request running without sandbox restrictions; defaults to \"use_default\".".to_string()),
|
||||
},
|
||||
),
|
||||
(
|
||||
"justification".to_string(),
|
||||
JsonSchema::String {
|
||||
description: Some("Only set if sandbox_permissions is \"require_escalated\". 1-sentence explanation of why we want to run this command.".to_string()),
|
||||
},
|
||||
),
|
||||
]);
|
||||
properties.extend(create_approval_parameters(include_prefix_rule));
|
||||
|
||||
let description = if cfg!(windows) {
|
||||
r#"Runs a Powershell command (Windows) and returns its output. Arguments to `shell` will be passed to CreateProcessW(). Most commands should be prefixed with ["powershell.exe", "-Command"].
|
||||
@@ -344,8 +361,8 @@ Examples of valid command strings:
|
||||
})
|
||||
}
|
||||
|
||||
fn create_shell_command_tool() -> ToolSpec {
|
||||
let properties = BTreeMap::from([
|
||||
fn create_shell_command_tool(include_prefix_rule: bool) -> ToolSpec {
|
||||
let mut properties = BTreeMap::from([
|
||||
(
|
||||
"command".to_string(),
|
||||
JsonSchema::String {
|
||||
@@ -375,19 +392,8 @@ fn create_shell_command_tool() -> ToolSpec {
|
||||
description: Some("The timeout for the command in milliseconds".to_string()),
|
||||
},
|
||||
),
|
||||
(
|
||||
"sandbox_permissions".to_string(),
|
||||
JsonSchema::String {
|
||||
description: Some("Sandbox permissions for the command. Set to \"require_escalated\" to request running without sandbox restrictions; defaults to \"use_default\".".to_string()),
|
||||
},
|
||||
),
|
||||
(
|
||||
"justification".to_string(),
|
||||
JsonSchema::String {
|
||||
description: Some("Only set if sandbox_permissions is \"require_escalated\". 1-sentence explanation of why we want to run this command.".to_string()),
|
||||
},
|
||||
),
|
||||
]);
|
||||
properties.extend(create_approval_parameters(include_prefix_rule));
|
||||
|
||||
let description = if cfg!(windows) {
|
||||
r#"Runs a Powershell command (Windows) and returns its output.
|
||||
@@ -1292,13 +1298,13 @@ pub(crate) fn build_specs(
|
||||
|
||||
match &config.shell_type {
|
||||
ConfigShellToolType::Default => {
|
||||
builder.push_spec(create_shell_tool());
|
||||
builder.push_spec(create_shell_tool(config.request_rule_enabled));
|
||||
}
|
||||
ConfigShellToolType::Local => {
|
||||
builder.push_spec(ToolSpec::LocalShell {});
|
||||
}
|
||||
ConfigShellToolType::UnifiedExec => {
|
||||
builder.push_spec(create_exec_command_tool());
|
||||
builder.push_spec(create_exec_command_tool(config.request_rule_enabled));
|
||||
builder.push_spec(create_write_stdin_tool());
|
||||
builder.register_handler("exec_command", unified_exec_handler.clone());
|
||||
builder.register_handler("write_stdin", unified_exec_handler);
|
||||
@@ -1307,7 +1313,7 @@ pub(crate) fn build_specs(
|
||||
// Do nothing.
|
||||
}
|
||||
ConfigShellToolType::ShellCommand => {
|
||||
builder.push_spec(create_shell_command_tool());
|
||||
builder.push_spec(create_shell_command_tool(config.request_rule_enabled));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1579,7 +1585,7 @@ mod tests {
|
||||
// Build expected from the same helpers used by the builder.
|
||||
let mut expected: BTreeMap<String, ToolSpec> = BTreeMap::from([]);
|
||||
for spec in [
|
||||
create_exec_command_tool(),
|
||||
create_exec_command_tool(false),
|
||||
create_write_stdin_tool(),
|
||||
create_list_mcp_resources_tool(),
|
||||
create_list_mcp_resource_templates_tool(),
|
||||
@@ -2413,7 +2419,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_shell_tool() {
|
||||
let tool = super::create_shell_tool();
|
||||
let tool = super::create_shell_tool(false);
|
||||
let ToolSpec::Function(ResponsesApiTool {
|
||||
description, name, ..
|
||||
}) = &tool
|
||||
@@ -2443,7 +2449,7 @@ Examples of valid command strings:
|
||||
|
||||
#[test]
|
||||
fn test_shell_command_tool() {
|
||||
let tool = super::create_shell_command_tool();
|
||||
let tool = super::create_shell_command_tool(false);
|
||||
let ToolSpec::Function(ResponsesApiTool {
|
||||
description, name, ..
|
||||
}) = &tool
|
||||
|
||||
@@ -82,6 +82,7 @@ pub(crate) struct ExecCommandRequest {
|
||||
pub tty: bool,
|
||||
pub sandbox_permissions: SandboxPermissions,
|
||||
pub justification: Option<String>,
|
||||
pub prefix_rule: Option<Vec<String>>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
@@ -205,6 +206,7 @@ mod tests {
|
||||
tty: true,
|
||||
sandbox_permissions: SandboxPermissions::UseDefault,
|
||||
justification: None,
|
||||
prefix_rule: None,
|
||||
},
|
||||
&context,
|
||||
)
|
||||
|
||||
@@ -11,9 +11,9 @@ use tokio::time::Instant;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
|
||||
use crate::exec_env::create_env;
|
||||
use crate::exec_policy::ExecApprovalRequest;
|
||||
use crate::protocol::ExecCommandSource;
|
||||
use crate::sandboxing::ExecEnv;
|
||||
use crate::sandboxing::SandboxPermissions;
|
||||
use crate::tools::events::ToolEmitter;
|
||||
use crate::tools::events::ToolEventCtx;
|
||||
use crate::tools::events::ToolEventStage;
|
||||
@@ -123,14 +123,7 @@ impl UnifiedExecProcessManager {
|
||||
.unwrap_or_else(|| context.turn.cwd.clone());
|
||||
|
||||
let process = self
|
||||
.open_session_with_sandbox(
|
||||
&request.command,
|
||||
cwd.clone(),
|
||||
request.sandbox_permissions,
|
||||
request.justification,
|
||||
request.tty,
|
||||
context,
|
||||
)
|
||||
.open_session_with_sandbox(&request, cwd.clone(), context)
|
||||
.await;
|
||||
|
||||
let process = match process {
|
||||
@@ -486,11 +479,8 @@ impl UnifiedExecProcessManager {
|
||||
|
||||
pub(super) async fn open_session_with_sandbox(
|
||||
&self,
|
||||
command: &[String],
|
||||
request: &ExecCommandRequest,
|
||||
cwd: PathBuf,
|
||||
sandbox_permissions: SandboxPermissions,
|
||||
justification: Option<String>,
|
||||
tty: bool,
|
||||
context: &UnifiedExecContext,
|
||||
) -> Result<UnifiedExecProcess, UnifiedExecError> {
|
||||
let env = apply_unified_exec_env(create_env(&context.turn.shell_environment_policy));
|
||||
@@ -501,21 +491,22 @@ impl UnifiedExecProcessManager {
|
||||
.session
|
||||
.services
|
||||
.exec_policy
|
||||
.create_exec_approval_requirement_for_command(
|
||||
&features,
|
||||
command,
|
||||
context.turn.approval_policy,
|
||||
&context.turn.sandbox_policy,
|
||||
sandbox_permissions,
|
||||
)
|
||||
.create_exec_approval_requirement_for_command(ExecApprovalRequest {
|
||||
features: &features,
|
||||
command: &request.command,
|
||||
approval_policy: context.turn.approval_policy,
|
||||
sandbox_policy: &context.turn.sandbox_policy,
|
||||
sandbox_permissions: request.sandbox_permissions,
|
||||
prefix_rule: request.prefix_rule.clone(),
|
||||
})
|
||||
.await;
|
||||
let req = UnifiedExecToolRequest::new(
|
||||
command.to_vec(),
|
||||
request.command.clone(),
|
||||
cwd,
|
||||
env,
|
||||
tty,
|
||||
sandbox_permissions,
|
||||
justification,
|
||||
request.tty,
|
||||
request.sandbox_permissions,
|
||||
request.justification.clone(),
|
||||
exec_approval_requirement,
|
||||
);
|
||||
let tool_ctx = ToolCtx {
|
||||
|
||||
@@ -1754,6 +1754,16 @@ async fn approving_execpolicy_amendment_persists_policy_and_skips_future_prompts
|
||||
.await?;
|
||||
wait_for_completion(&test).await;
|
||||
|
||||
let developer_messages = first_results
|
||||
.single_request()
|
||||
.message_input_texts("developer");
|
||||
assert!(
|
||||
developer_messages
|
||||
.iter()
|
||||
.any(|message| message.contains(r#"["touch", "allow-prefix.txt"]"#)),
|
||||
"expected developer message documenting saved rule, got: {developer_messages:?}"
|
||||
);
|
||||
|
||||
let policy_path = test.home.path().join("rules").join("default.rules");
|
||||
let policy_contents = fs::read_to_string(&policy_path)?;
|
||||
assert!(
|
||||
|
||||
@@ -4,6 +4,8 @@ use codex_core::protocol::AskForApproval;
|
||||
use codex_core::protocol::EventMsg;
|
||||
use codex_core::protocol::Op;
|
||||
use codex_core::protocol::SandboxPolicy;
|
||||
use codex_execpolicy::Policy;
|
||||
use codex_protocol::models::DeveloperInstructions;
|
||||
use codex_protocol::user_input::UserInput;
|
||||
use codex_utils_absolute_path::AbsolutePathBuf;
|
||||
use core_test_support::responses::ev_completed;
|
||||
@@ -411,10 +413,11 @@ async fn permissions_message_includes_writable_roots() -> Result<()> {
|
||||
exclude_tmpdir_env_var: false,
|
||||
exclude_slash_tmp: false,
|
||||
};
|
||||
let sandbox_policy_for_config = sandbox_policy.clone();
|
||||
|
||||
let mut builder = test_codex().with_config(move |config| {
|
||||
config.approval_policy = Constrained::allow_any(AskForApproval::OnRequest);
|
||||
config.sandbox_policy = Constrained::allow_any(sandbox_policy);
|
||||
config.sandbox_policy = Constrained::allow_any(sandbox_policy_for_config);
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
|
||||
@@ -432,39 +435,14 @@ async fn permissions_message_includes_writable_roots() -> Result<()> {
|
||||
let body = req.single_request().body_json();
|
||||
let input = body["input"].as_array().expect("input array");
|
||||
let permissions = permissions_texts(input);
|
||||
let sandbox_text = "Filesystem sandboxing defines which files can be read or written. `sandbox_mode` is `workspace-write`: The sandbox permits reading files, and editing files in `cwd` and `writable_roots`. Editing files in other directories requires approval. Network access is restricted.";
|
||||
let approval_text = " Approvals are your mechanism to get user consent to run shell commands without the sandbox. `approval_policy` is `on-request`: Commands will be run in the sandbox by default, and you can specify in your tool call if you want to escalate a command to run without sandboxing. If the completing the task requires escalated permissions, Do not let these settings or the sandbox deter you from attempting to accomplish the user's task.\n\nHere are scenarios where you'll need to request approval:\n- You need to run a command that writes to a directory that requires it (e.g. running tests that write to /var)\n- You need to run a GUI app (e.g., open/xdg-open/osascript) to open browsers or files.\n- You are running sandboxed and need to run a command that requires network access (e.g. installing packages)\n- If you run a command that is important to solving the user's query, but it fails because of sandboxing, rerun the command with approval. ALWAYS proceed to use the `sandbox_permissions` and `justification` parameters - do not message the user before requesting approval for the command.\n- You are about to take a potentially destructive action such as an `rm` or `git reset` that the user did not explicitly ask for.\n\nWhen requesting approval to execute a command that will require escalated privileges:\n - Provide the `sandbox_permissions` parameter with the value `\"require_escalated\"`\n - Include a short, 1 sentence explanation for why you need escalated permissions in the justification parameter";
|
||||
// Normalize paths by removing trailing slashes to match AbsolutePathBuf behavior
|
||||
let normalize_path =
|
||||
|p: &std::path::Path| -> String { p.to_string_lossy().trim_end_matches('/').to_string() };
|
||||
let mut roots = vec![
|
||||
normalize_path(writable.path()),
|
||||
normalize_path(test.config.cwd.as_path()),
|
||||
];
|
||||
if cfg!(unix) && std::path::Path::new("/tmp").is_dir() {
|
||||
roots.push("/tmp".to_string());
|
||||
}
|
||||
if let Some(tmpdir) = std::env::var_os("TMPDIR") {
|
||||
let tmpdir_path = std::path::PathBuf::from(&tmpdir);
|
||||
if tmpdir_path.is_absolute() && !tmpdir.is_empty() {
|
||||
roots.push(normalize_path(&tmpdir_path));
|
||||
}
|
||||
}
|
||||
let roots_text = if roots.len() == 1 {
|
||||
format!(" The writable root is `{}`.", roots[0])
|
||||
} else {
|
||||
format!(
|
||||
" The writable roots are {}.",
|
||||
roots
|
||||
.iter()
|
||||
.map(|root| format!("`{root}`"))
|
||||
.collect::<Vec<_>>()
|
||||
.join(", ")
|
||||
)
|
||||
};
|
||||
let expected = format!(
|
||||
"<permissions instructions>{sandbox_text}{approval_text}{roots_text}</permissions instructions>"
|
||||
);
|
||||
let expected = DeveloperInstructions::from_policy(
|
||||
&sandbox_policy,
|
||||
AskForApproval::OnRequest,
|
||||
&Policy::empty(),
|
||||
false,
|
||||
test.config.cwd.as_path(),
|
||||
)
|
||||
.into_text();
|
||||
// Normalize line endings to handle Windows vs Unix differences
|
||||
let normalize_line_endings = |s: &str| s.replace("\r\n", "\n");
|
||||
let expected_normalized = normalize_line_endings(&expected);
|
||||
|
||||
@@ -31,6 +31,30 @@ impl Policy {
|
||||
&self.rules_by_program
|
||||
}
|
||||
|
||||
pub fn get_allowed_prefixes(&self) -> Vec<Vec<String>> {
|
||||
let mut prefixes = Vec::new();
|
||||
|
||||
for (_program, rules) in self.rules_by_program.iter_all() {
|
||||
for rule in rules {
|
||||
let Some(prefix_rule) = rule.as_any().downcast_ref::<PrefixRule>() else {
|
||||
continue;
|
||||
};
|
||||
if prefix_rule.decision != Decision::Allow {
|
||||
continue;
|
||||
}
|
||||
|
||||
let mut prefix = Vec::with_capacity(prefix_rule.pattern.rest.len() + 1);
|
||||
prefix.push(prefix_rule.pattern.first.as_ref().to_string());
|
||||
prefix.extend(prefix_rule.pattern.rest.iter().map(render_pattern_token));
|
||||
prefixes.push(prefix);
|
||||
}
|
||||
}
|
||||
|
||||
prefixes.sort();
|
||||
prefixes.dedup();
|
||||
prefixes
|
||||
}
|
||||
|
||||
pub fn add_prefix_rule(&mut self, prefix: &[String], decision: Decision) -> Result<()> {
|
||||
let (first_token, rest) = prefix
|
||||
.split_first()
|
||||
@@ -116,6 +140,13 @@ impl Policy {
|
||||
}
|
||||
}
|
||||
|
||||
fn render_pattern_token(token: &PatternToken) -> String {
|
||||
match token {
|
||||
PatternToken::Single(value) => value.clone(),
|
||||
PatternToken::Alts(alternatives) => format!("[{}]", alternatives.join("|")),
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Evaluation {
|
||||
|
||||
@@ -96,6 +96,8 @@ pub trait Rule: Any + Debug + Send + Sync {
|
||||
fn program(&self) -> &str;
|
||||
|
||||
fn matches(&self, cmd: &[String]) -> Option<RuleMatch>;
|
||||
|
||||
fn as_any(&self) -> &dyn Any;
|
||||
}
|
||||
|
||||
pub type RuleRef = Arc<dyn Rule>;
|
||||
@@ -114,6 +116,10 @@ impl Rule for PrefixRule {
|
||||
justification: self.justification.clone(),
|
||||
})
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
/// Count how many rules match each provided example and error if any example is unmatched.
|
||||
|
||||
@@ -13,6 +13,7 @@ workspace = true
|
||||
|
||||
[dependencies]
|
||||
codex-git = { workspace = true }
|
||||
codex-execpolicy = { workspace = true }
|
||||
codex-utils-absolute-path = { workspace = true }
|
||||
codex-utils-image = { workspace = true }
|
||||
icu_decimal = { workspace = true }
|
||||
|
||||
@@ -19,6 +19,7 @@ use crate::protocol::NetworkAccess;
|
||||
use crate::protocol::SandboxPolicy;
|
||||
use crate::protocol::WritableRoot;
|
||||
use crate::user_input::UserInput;
|
||||
use codex_execpolicy::Policy;
|
||||
use codex_git::GhostCommit;
|
||||
use codex_utils_image::error::ImageProcessingError;
|
||||
use schemars::JsonSchema;
|
||||
@@ -205,6 +206,8 @@ const APPROVAL_POLICY_ON_FAILURE: &str =
|
||||
include_str!("prompts/permissions/approval_policy/on_failure.md");
|
||||
const APPROVAL_POLICY_ON_REQUEST: &str =
|
||||
include_str!("prompts/permissions/approval_policy/on_request.md");
|
||||
const APPROVAL_POLICY_ON_REQUEST_RULE: &str =
|
||||
include_str!("prompts/permissions/approval_policy/on_request_rule.md");
|
||||
|
||||
const SANDBOX_MODE_DANGER_FULL_ACCESS: &str =
|
||||
include_str!("prompts/permissions/sandbox_mode/danger_full_access.md");
|
||||
@@ -217,12 +220,42 @@ impl DeveloperInstructions {
|
||||
Self { text: text.into() }
|
||||
}
|
||||
|
||||
pub fn from(
|
||||
approval_policy: AskForApproval,
|
||||
exec_policy: &Policy,
|
||||
request_rule_enabled: bool,
|
||||
) -> DeveloperInstructions {
|
||||
let text = match approval_policy {
|
||||
AskForApproval::Never => APPROVAL_POLICY_NEVER.to_string(),
|
||||
AskForApproval::UnlessTrusted => APPROVAL_POLICY_UNLESS_TRUSTED.to_string(),
|
||||
AskForApproval::OnFailure => APPROVAL_POLICY_ON_FAILURE.to_string(),
|
||||
AskForApproval::OnRequest => {
|
||||
if !request_rule_enabled {
|
||||
APPROVAL_POLICY_ON_REQUEST.to_string()
|
||||
} else {
|
||||
let command_prefixes = format_allow_prefixes(exec_policy);
|
||||
match command_prefixes {
|
||||
Some(prefixes) => {
|
||||
format!("{APPROVAL_POLICY_ON_REQUEST_RULE}\n{prefixes}")
|
||||
}
|
||||
None => APPROVAL_POLICY_ON_REQUEST_RULE.to_string(),
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
DeveloperInstructions::new(text)
|
||||
}
|
||||
|
||||
pub fn into_text(self) -> String {
|
||||
self.text
|
||||
}
|
||||
|
||||
pub fn concat(self, other: impl Into<DeveloperInstructions>) -> Self {
|
||||
let mut text = self.text;
|
||||
if !text.ends_with('\n') {
|
||||
text.push('\n');
|
||||
}
|
||||
text.push_str(&other.into().text);
|
||||
Self { text }
|
||||
}
|
||||
@@ -237,6 +270,8 @@ impl DeveloperInstructions {
|
||||
pub fn from_policy(
|
||||
sandbox_policy: &SandboxPolicy,
|
||||
approval_policy: AskForApproval,
|
||||
exec_policy: &Policy,
|
||||
request_rule_enabled: bool,
|
||||
cwd: &Path,
|
||||
) -> Self {
|
||||
let network_access = if sandbox_policy.has_full_network_access() {
|
||||
@@ -259,6 +294,8 @@ impl DeveloperInstructions {
|
||||
sandbox_mode,
|
||||
network_access,
|
||||
approval_policy,
|
||||
exec_policy,
|
||||
request_rule_enabled,
|
||||
writable_roots,
|
||||
)
|
||||
}
|
||||
@@ -281,6 +318,8 @@ impl DeveloperInstructions {
|
||||
sandbox_mode: SandboxMode,
|
||||
network_access: NetworkAccess,
|
||||
approval_policy: AskForApproval,
|
||||
exec_policy: &Policy,
|
||||
request_rule_enabled: bool,
|
||||
writable_roots: Option<Vec<WritableRoot>>,
|
||||
) -> Self {
|
||||
let start_tag = DeveloperInstructions::new("<permissions instructions>");
|
||||
@@ -290,7 +329,11 @@ impl DeveloperInstructions {
|
||||
sandbox_mode,
|
||||
network_access,
|
||||
))
|
||||
.concat(DeveloperInstructions::from(approval_policy))
|
||||
.concat(DeveloperInstructions::from(
|
||||
approval_policy,
|
||||
exec_policy,
|
||||
request_rule_enabled,
|
||||
))
|
||||
.concat(DeveloperInstructions::from_writable_roots(writable_roots))
|
||||
.concat(end_tag)
|
||||
}
|
||||
@@ -328,6 +371,37 @@ impl DeveloperInstructions {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn render_command_prefix_list<I, P>(prefixes: I) -> Option<String>
|
||||
where
|
||||
I: IntoIterator<Item = P>,
|
||||
P: AsRef<[String]>,
|
||||
{
|
||||
let lines = prefixes
|
||||
.into_iter()
|
||||
.map(|prefix| format!("- {}", render_command_prefix(prefix.as_ref())))
|
||||
.collect::<Vec<_>>();
|
||||
if lines.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
Some(lines.join("\n"))
|
||||
}
|
||||
|
||||
fn render_command_prefix(prefix: &[String]) -> String {
|
||||
let tokens = prefix
|
||||
.iter()
|
||||
.map(|token| serde_json::to_string(token).unwrap_or_else(|_| format!("{token:?}")))
|
||||
.collect::<Vec<_>>()
|
||||
.join(", ");
|
||||
format!("[{tokens}]")
|
||||
}
|
||||
|
||||
fn format_allow_prefixes(exec_policy: &Policy) -> Option<String> {
|
||||
let prefixes = exec_policy.get_allowed_prefixes();
|
||||
let lines = render_command_prefix_list(prefixes)?;
|
||||
Some(format!("Approved command prefixes:\n{lines}"))
|
||||
}
|
||||
|
||||
impl From<DeveloperInstructions> for ResponseItem {
|
||||
fn from(di: DeveloperInstructions) -> Self {
|
||||
ResponseItem::Message {
|
||||
@@ -352,19 +426,6 @@ impl From<SandboxMode> for DeveloperInstructions {
|
||||
}
|
||||
}
|
||||
|
||||
impl From<AskForApproval> for DeveloperInstructions {
|
||||
fn from(mode: AskForApproval) -> Self {
|
||||
let text = match mode {
|
||||
AskForApproval::Never => APPROVAL_POLICY_NEVER.trim_end(),
|
||||
AskForApproval::UnlessTrusted => APPROVAL_POLICY_UNLESS_TRUSTED.trim_end(),
|
||||
AskForApproval::OnFailure => APPROVAL_POLICY_ON_FAILURE.trim_end(),
|
||||
AskForApproval::OnRequest => APPROVAL_POLICY_ON_REQUEST.trim_end(),
|
||||
};
|
||||
|
||||
DeveloperInstructions::new(text)
|
||||
}
|
||||
}
|
||||
|
||||
fn should_serialize_reasoning_content(content: &Option<Vec<ReasoningItemContent>>) -> bool {
|
||||
match content {
|
||||
Some(content) => !content
|
||||
@@ -633,6 +694,10 @@ pub struct ShellToolCallParams {
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
#[ts(optional)]
|
||||
pub sandbox_permissions: Option<SandboxPermissions>,
|
||||
/// Suggests a command prefix to persist for future sessions
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
#[ts(optional)]
|
||||
pub prefix_rule: Option<Vec<String>>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub justification: Option<String>,
|
||||
}
|
||||
@@ -653,6 +718,9 @@ pub struct ShellCommandToolCallParams {
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
#[ts(optional)]
|
||||
pub sandbox_permissions: Option<SandboxPermissions>,
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
#[ts(optional)]
|
||||
pub prefix_rule: Option<Vec<String>>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub justification: Option<String>,
|
||||
}
|
||||
@@ -836,6 +904,7 @@ mod tests {
|
||||
use crate::config_types::SandboxMode;
|
||||
use crate::protocol::AskForApproval;
|
||||
use anyhow::Result;
|
||||
use codex_execpolicy::Policy;
|
||||
use mcp_types::ImageContent;
|
||||
use mcp_types::TextContent;
|
||||
use pretty_assertions::assert_eq;
|
||||
@@ -844,15 +913,17 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn converts_sandbox_mode_into_developer_instructions() {
|
||||
let workspace_write: DeveloperInstructions = SandboxMode::WorkspaceWrite.into();
|
||||
assert_eq!(
|
||||
DeveloperInstructions::from(SandboxMode::WorkspaceWrite),
|
||||
workspace_write,
|
||||
DeveloperInstructions::new(
|
||||
"Filesystem sandboxing defines which files can be read or written. `sandbox_mode` is `workspace-write`: The sandbox permits reading files, and editing files in `cwd` and `writable_roots`. Editing files in other directories requires approval. Network access is restricted."
|
||||
)
|
||||
);
|
||||
|
||||
let read_only: DeveloperInstructions = SandboxMode::ReadOnly.into();
|
||||
assert_eq!(
|
||||
DeveloperInstructions::from(SandboxMode::ReadOnly),
|
||||
read_only,
|
||||
DeveloperInstructions::new(
|
||||
"Filesystem sandboxing defines which files can be read or written. `sandbox_mode` is `read-only`: The sandbox only permits reading files. Network access is restricted."
|
||||
)
|
||||
@@ -865,6 +936,8 @@ mod tests {
|
||||
SandboxMode::WorkspaceWrite,
|
||||
NetworkAccess::Enabled,
|
||||
AskForApproval::OnRequest,
|
||||
&Policy::empty(),
|
||||
false,
|
||||
None,
|
||||
);
|
||||
|
||||
@@ -891,6 +964,8 @@ mod tests {
|
||||
let instructions = DeveloperInstructions::from_policy(
|
||||
&policy,
|
||||
AskForApproval::UnlessTrusted,
|
||||
&Policy::empty(),
|
||||
false,
|
||||
&PathBuf::from("/tmp"),
|
||||
);
|
||||
let text = instructions.into_text();
|
||||
@@ -898,6 +973,30 @@ mod tests {
|
||||
assert!(text.contains("`approval_policy` is `unless-trusted`"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn includes_request_rule_instructions_when_enabled() {
|
||||
let mut exec_policy = Policy::empty();
|
||||
exec_policy
|
||||
.add_prefix_rule(
|
||||
&["git".to_string(), "pull".to_string()],
|
||||
codex_execpolicy::Decision::Allow,
|
||||
)
|
||||
.expect("add rule");
|
||||
let instructions = DeveloperInstructions::from_permissions_with_network(
|
||||
SandboxMode::WorkspaceWrite,
|
||||
NetworkAccess::Enabled,
|
||||
AskForApproval::OnRequest,
|
||||
&exec_policy,
|
||||
true,
|
||||
None,
|
||||
);
|
||||
|
||||
let text = instructions.into_text();
|
||||
assert!(text.contains("prefix_rule"));
|
||||
assert!(text.contains("Approved command prefixes"));
|
||||
assert!(text.contains(r#"["git", "pull"]"#));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn serializes_success_as_plain_string() -> Result<()> {
|
||||
let item = ResponseInputItem::FunctionCallOutput {
|
||||
@@ -1126,6 +1225,7 @@ mod tests {
|
||||
workdir: Some("/tmp".to_string()),
|
||||
timeout_ms: Some(1000),
|
||||
sandbox_permissions: None,
|
||||
prefix_rule: None,
|
||||
justification: None,
|
||||
},
|
||||
params
|
||||
|
||||
@@ -0,0 +1,61 @@
|
||||
# Escalation Requests
|
||||
|
||||
Commands are run outside the sandbox if they are approved by the user, or match an existing rule that allows it to run unrestricted. The command string is split into independent command segments at shell control operators, including but not limited to:
|
||||
|
||||
- Pipes: |
|
||||
- Logical operators: &&, ||
|
||||
- Command separators: ;
|
||||
- Subshell boundaries: (...), $(...)
|
||||
|
||||
Each resulting segment is evaluated independently for sandbox restrictions and approval requirements.
|
||||
|
||||
Example:
|
||||
|
||||
git pull | tee output.txt
|
||||
|
||||
This is treated as two command segments:
|
||||
|
||||
["git", "pull"]
|
||||
|
||||
["tee", "output.txt"]
|
||||
|
||||
## How to request escalation
|
||||
|
||||
IMPORTANT: To request approval to execute a command that will require escalated privileges:
|
||||
|
||||
- Provide the `sandbox_permissions` parameter with the value `"require_escalated"`
|
||||
- Include a short question asking the user if they want to allow the action in `justification` parameter. e.g. "Do you want to download and install dependencies for this project?"
|
||||
- Suggest a `prefix_rule` - this will be shown to the user with an option to persist the rule approval for future sessions.
|
||||
|
||||
If you run a command that is important to solving the user's query, but it fails because of sandboxing, rerun the command with "require_escalated". ALWAYS proceed to use the `justification` and `prefix_rule` parameters - do not message the user before requesting approval for the command.
|
||||
|
||||
## When to request escalation
|
||||
|
||||
While commands are running inside the sandbox, here are some scenarios that will require escalation outside the sandbox:
|
||||
|
||||
- You need to run a command that writes to a directory that requires it (e.g. running tests that write to /var)
|
||||
- You need to run a GUI app (e.g., open/xdg-open/osascript) to open browsers or files.
|
||||
- If you run a command that is important to solving the user's query, but it fails because of sandboxing, rerun the command with `require_escalated`. ALWAYS proceed to use the `sandbox_permissions` and `justification` parameters. do not message the user before requesting approval for the command.
|
||||
- You are about to take a potentially destructive action such as an `rm` or `git reset` that the user did not explicitly ask for.
|
||||
|
||||
Only run commands that require approval if it is absolutely necessary to solve the user's query, don't try and circumvent approvals by using other tools.
|
||||
|
||||
## prefix_rule guidance
|
||||
|
||||
When choosing a `prefix_rule`, request one that will allow you to fulfill similar requests from the user in the future without re-requesting escalation. It should be categorical and reasonably scoped to similar capabilities. You MUST NOT pass the entire command into `prefix_rule`.
|
||||
|
||||
<good_example reason="frequently run command">
|
||||
["npm", "run", "dev"]
|
||||
</good_example>
|
||||
<good_example reason="generic and reusable">
|
||||
["gh", "pr", "checks"]
|
||||
</good_example>
|
||||
<good_example reason="helpful for development cycle">
|
||||
["pytest"]
|
||||
</good_example>
|
||||
<bad_example reason="too specific">
|
||||
["cargo", "test", "-p", "codex-app-server"]
|
||||
<correction_to_good_example>
|
||||
["cargo", "test"]
|
||||
</correction_to_good_example>
|
||||
</bad_example>
|
||||
Reference in New Issue
Block a user