mirror of
https://github.com/openai/codex.git
synced 2026-04-29 08:56:38 +00:00
guardian initial feedback / tweaks (#13897)
## Summary - remove the remaining model-visible guardian-specific `on-request` prompt additions so enabling the feature does not change the main approval-policy instructions - neutralize user-facing guardian wording to talk about automatic approval review / approval requests rather than a second reviewer or only sandbox escalations - tighten guardian retry-context handling so agent-authored `justification` stays in the structured action JSON and is not also injected as raw retry context - simplify guardian review plumbing in core by deleting dead prompt-append paths and trimming some request/transcript setup code ## Notable Changes - delete the dead `permissions/approval_policy/guardian.md` append path and stop threading `guardian_approval_enabled` through model-facing developer-instruction builders - rename the experimental feature copy to `Automatic approval review` and update the `/experimental` snapshot text accordingly - make approval-review status strings generic across shell, patch, network, and MCP review types - forward real sandbox/network retry reasons for shell and unified-exec guardian review, but do not pass agent-authored justification as raw retry context - simplify `guardian.rs` by removing the one-field request wrapper, deduping reasoning-effort selection, and cleaning up transcript entry collection ## Testing - `just fmt` - full validation left to CI --------- Co-authored-by: Codex <noreply@openai.com>
This commit is contained in:
committed by
GitHub
parent
2bc3e52a91
commit
f23fcd6ced
@@ -408,8 +408,6 @@ const APPROVAL_POLICY_ON_REQUEST_RULE: &str =
|
||||
include_str!("prompts/permissions/approval_policy/on_request_rule.md");
|
||||
const APPROVAL_POLICY_ON_REQUEST_RULE_REQUEST_PERMISSION: &str =
|
||||
include_str!("prompts/permissions/approval_policy/on_request_rule_request_permission.md");
|
||||
const GUARDIAN_APPROVAL_FEATURE: &str =
|
||||
include_str!("prompts/permissions/approval_policy/guardian.md");
|
||||
|
||||
const SANDBOX_MODE_DANGER_FULL_ACCESS: &str =
|
||||
include_str!("prompts/permissions/sandbox_mode/danger_full_access.md");
|
||||
@@ -427,7 +425,6 @@ impl DeveloperInstructions {
|
||||
|
||||
pub fn from(
|
||||
approval_policy: AskForApproval,
|
||||
guardian_approval_enabled: bool,
|
||||
exec_policy: &Policy,
|
||||
request_permission_enabled: bool,
|
||||
) -> DeveloperInstructions {
|
||||
@@ -451,14 +448,7 @@ impl DeveloperInstructions {
|
||||
AskForApproval::Never => APPROVAL_POLICY_NEVER.to_string(),
|
||||
AskForApproval::UnlessTrusted => APPROVAL_POLICY_UNLESS_TRUSTED.to_string(),
|
||||
AskForApproval::OnFailure => APPROVAL_POLICY_ON_FAILURE.to_string(),
|
||||
AskForApproval::OnRequest => {
|
||||
let mut instructions = on_request_instructions();
|
||||
if guardian_approval_enabled {
|
||||
instructions.push_str("\n\n");
|
||||
instructions.push_str(GUARDIAN_APPROVAL_FEATURE);
|
||||
}
|
||||
instructions
|
||||
}
|
||||
AskForApproval::OnRequest => on_request_instructions(),
|
||||
AskForApproval::Reject(reject_config) => {
|
||||
let on_request_instructions = on_request_instructions();
|
||||
let sandbox_approval = reject_config.sandbox_approval;
|
||||
@@ -521,7 +511,6 @@ impl DeveloperInstructions {
|
||||
pub fn from_policy(
|
||||
sandbox_policy: &SandboxPolicy,
|
||||
approval_policy: AskForApproval,
|
||||
guardian_approval_enabled: bool,
|
||||
exec_policy: &Policy,
|
||||
cwd: &Path,
|
||||
request_permission_enabled: bool,
|
||||
@@ -546,7 +535,6 @@ impl DeveloperInstructions {
|
||||
sandbox_mode,
|
||||
network_access,
|
||||
approval_policy,
|
||||
guardian_approval_enabled,
|
||||
exec_policy,
|
||||
writable_roots,
|
||||
request_permission_enabled,
|
||||
@@ -571,7 +559,6 @@ impl DeveloperInstructions {
|
||||
sandbox_mode: SandboxMode,
|
||||
network_access: NetworkAccess,
|
||||
approval_policy: AskForApproval,
|
||||
guardian_approval_enabled: bool,
|
||||
exec_policy: &Policy,
|
||||
writable_roots: Option<Vec<WritableRoot>>,
|
||||
request_permission_enabled: bool,
|
||||
@@ -585,7 +572,6 @@ impl DeveloperInstructions {
|
||||
))
|
||||
.concat(DeveloperInstructions::from(
|
||||
approval_policy,
|
||||
guardian_approval_enabled,
|
||||
exec_policy,
|
||||
request_permission_enabled,
|
||||
))
|
||||
@@ -1667,7 +1653,6 @@ mod tests {
|
||||
SandboxMode::WorkspaceWrite,
|
||||
NetworkAccess::Enabled,
|
||||
AskForApproval::OnRequest,
|
||||
false,
|
||||
&Policy::empty(),
|
||||
None,
|
||||
false,
|
||||
@@ -1697,7 +1682,6 @@ mod tests {
|
||||
let instructions = DeveloperInstructions::from_policy(
|
||||
&policy,
|
||||
AskForApproval::UnlessTrusted,
|
||||
false,
|
||||
&Policy::empty(),
|
||||
&PathBuf::from("/tmp"),
|
||||
false,
|
||||
@@ -1720,7 +1704,6 @@ mod tests {
|
||||
SandboxMode::WorkspaceWrite,
|
||||
NetworkAccess::Enabled,
|
||||
AskForApproval::OnRequest,
|
||||
false,
|
||||
&exec_policy,
|
||||
None,
|
||||
false,
|
||||
@@ -1738,7 +1721,6 @@ mod tests {
|
||||
SandboxMode::WorkspaceWrite,
|
||||
NetworkAccess::Enabled,
|
||||
AskForApproval::OnRequest,
|
||||
false,
|
||||
&Policy::empty(),
|
||||
None,
|
||||
true,
|
||||
@@ -1749,23 +1731,6 @@ mod tests {
|
||||
assert!(text.contains("additional_permissions"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn includes_guardian_feature_guidance_for_on_request_when_enabled() {
|
||||
let instructions = DeveloperInstructions::from_permissions_with_network(
|
||||
SandboxMode::WorkspaceWrite,
|
||||
NetworkAccess::Enabled,
|
||||
AskForApproval::OnRequest,
|
||||
true,
|
||||
&Policy::empty(),
|
||||
None,
|
||||
false,
|
||||
);
|
||||
|
||||
let text = instructions.into_text();
|
||||
assert!(text.contains("guardian subagent"));
|
||||
assert!(text.contains("approval prompts"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn render_command_prefix_list_sorts_by_len_then_total_len_then_alphabetical() {
|
||||
let prefixes = vec![
|
||||
|
||||
@@ -1,3 +0,0 @@
|
||||
Guardian approvals are enabled. While `approval_policy` is still `on-request`, approval prompts are routed to a guardian subagent instead of the user. Use `sandbox_permissions: "require_escalated"` with a concise `justification` when you need unsandboxed execution, and use `sandbox_permissions: "with_additional_permissions"` plus `additional_permissions` when you need broader sandboxed access. Codex will ask the guardian subagent to assess the risk automatically.
|
||||
|
||||
Do not message the user before requesting escalation. If the guardian rejects an action, do not attempt the same outcome via workaround, indirect execution, or policy circumvention. Either choose a materially safer alternative or stop and ask the user for guidance.
|
||||
Reference in New Issue
Block a user