guardian initial feedback / tweaks (#13897)

## Summary - remove the remaining model-visible guardian-specific `on-request` prompt additions so enabling the feature does not change the main approval-policy instructions - neutralize user-facing guardian wording to talk about automatic approval review / approval requests rather than a second reviewer or only sandbox escalations - tighten guardian retry-context handling so agent-authored `justification` stays in the structured action JSON and is not also injected as raw retry context - simplify guardian review plumbing in core by deleting dead prompt-append paths and trimming some request/transcript setup code ## Notable Changes - delete the dead `permissions/approval_policy/guardian.md` append path and stop threading `guardian_approval_enabled` through model-facing developer-instruction builders - rename the experimental feature copy to `Automatic approval review` and update the `/experimental` snapshot text accordingly - make approval-review status strings generic across shell, patch, network, and MCP review types - forward real sandbox/network retry reasons for shell and unified-exec guardian review, but do not pass agent-authored justification as raw retry context - simplify `guardian.rs` by removing the one-field request wrapper, deduping reasoning-effort selection, and cleaning up transcript entry collection ## Testing - `just fmt` - full validation left to CI --------- Co-authored-by: Codex <noreply@openai.com>
2026-04-29 08:56:38 +00:00 · 2026-03-09 09:25:24 -07:00
parent 2bc3e52a91
commit f23fcd6ced
16 changed files with 421 additions and 352 deletions
--- a/codex-rs/protocol/src/models.rs
+++ b/codex-rs/protocol/src/models.rs
@@ -408,8 +408,6 @@ const APPROVAL_POLICY_ON_REQUEST_RULE: &str =
    include_str!("prompts/permissions/approval_policy/on_request_rule.md");
 const APPROVAL_POLICY_ON_REQUEST_RULE_REQUEST_PERMISSION: &str =
    include_str!("prompts/permissions/approval_policy/on_request_rule_request_permission.md");
-const GUARDIAN_APPROVAL_FEATURE: &str =
-    include_str!("prompts/permissions/approval_policy/guardian.md");

 const SANDBOX_MODE_DANGER_FULL_ACCESS: &str =
    include_str!("prompts/permissions/sandbox_mode/danger_full_access.md");
@@ -427,7 +425,6 @@ impl DeveloperInstructions {

    pub fn from(
        approval_policy: AskForApproval,
-        guardian_approval_enabled: bool,
        exec_policy: &Policy,
        request_permission_enabled: bool,
    ) -> DeveloperInstructions {
@@ -451,14 +448,7 @@ impl DeveloperInstructions {
            AskForApproval::Never => APPROVAL_POLICY_NEVER.to_string(),
            AskForApproval::UnlessTrusted => APPROVAL_POLICY_UNLESS_TRUSTED.to_string(),
            AskForApproval::OnFailure => APPROVAL_POLICY_ON_FAILURE.to_string(),
-            AskForApproval::OnRequest => {
-                let mut instructions = on_request_instructions();
-                if guardian_approval_enabled {
-                    instructions.push_str("\n\n");
-                    instructions.push_str(GUARDIAN_APPROVAL_FEATURE);
-                }
-                instructions
-            }
+            AskForApproval::OnRequest => on_request_instructions(),
            AskForApproval::Reject(reject_config) => {
                let on_request_instructions = on_request_instructions();
                let sandbox_approval = reject_config.sandbox_approval;
@@ -521,7 +511,6 @@ impl DeveloperInstructions {
    pub fn from_policy(
        sandbox_policy: &SandboxPolicy,
        approval_policy: AskForApproval,
-        guardian_approval_enabled: bool,
        exec_policy: &Policy,
        cwd: &Path,
        request_permission_enabled: bool,
@@ -546,7 +535,6 @@ impl DeveloperInstructions {
            sandbox_mode,
            network_access,
            approval_policy,
-            guardian_approval_enabled,
            exec_policy,
            writable_roots,
            request_permission_enabled,
@@ -571,7 +559,6 @@ impl DeveloperInstructions {
        sandbox_mode: SandboxMode,
        network_access: NetworkAccess,
        approval_policy: AskForApproval,
-        guardian_approval_enabled: bool,
        exec_policy: &Policy,
        writable_roots: Option<Vec<WritableRoot>>,
        request_permission_enabled: bool,
@@ -585,7 +572,6 @@ impl DeveloperInstructions {
            ))
            .concat(DeveloperInstructions::from(
                approval_policy,
-                guardian_approval_enabled,
                exec_policy,
                request_permission_enabled,
            ))
@@ -1667,7 +1653,6 @@ mod tests {
            SandboxMode::WorkspaceWrite,
            NetworkAccess::Enabled,
            AskForApproval::OnRequest,
-            false,
            &Policy::empty(),
            None,
            false,
@@ -1697,7 +1682,6 @@ mod tests {
        let instructions = DeveloperInstructions::from_policy(
            &policy,
            AskForApproval::UnlessTrusted,
-            false,
            &Policy::empty(),
            &PathBuf::from("/tmp"),
            false,
@@ -1720,7 +1704,6 @@ mod tests {
            SandboxMode::WorkspaceWrite,
            NetworkAccess::Enabled,
            AskForApproval::OnRequest,
-            false,
            &exec_policy,
            None,
            false,
@@ -1738,7 +1721,6 @@ mod tests {
            SandboxMode::WorkspaceWrite,
            NetworkAccess::Enabled,
            AskForApproval::OnRequest,
-            false,
            &Policy::empty(),
            None,
            true,
@@ -1749,23 +1731,6 @@ mod tests {
        assert!(text.contains("additional_permissions"));
    }

-    #[test]
-    fn includes_guardian_feature_guidance_for_on_request_when_enabled() {
-        let instructions = DeveloperInstructions::from_permissions_with_network(
-            SandboxMode::WorkspaceWrite,
-            NetworkAccess::Enabled,
-            AskForApproval::OnRequest,
-            true,
-            &Policy::empty(),
-            None,
-            false,
-        );
-
-        let text = instructions.into_text();
-        assert!(text.contains("guardian subagent"));
-        assert!(text.contains("approval prompts"));
-    }
-
    #[test]
    fn render_command_prefix_list_sorts_by_len_then_total_len_then_alphabetical() {
        let prefixes = vec![
--- a/codex-rs/protocol/src/prompts/permissions/approval_policy/guardian.md
+++ b/codex-rs/protocol/src/prompts/permissions/approval_policy/guardian.md
@@ -1,3 +0,0 @@
-Guardian approvals are enabled. While `approval_policy` is still `on-request`, approval prompts are routed to a guardian subagent instead of the user. Use `sandbox_permissions: "require_escalated"` with a concise `justification` when you need unsandboxed execution, and use `sandbox_permissions: "with_additional_permissions"` plus `additional_permissions` when you need broader sandboxed access. Codex will ask the guardian subagent to assess the risk automatically.
-
-Do not message the user before requesting escalation. If the guardian rejects an action, do not attempt the same outcome via workaround, indirect execution, or policy circumvention. Either choose a materially safer alternative or stop and ask the user for guidance.