Add guardian approval MVP (#13692)

## Summary - add the guardian reviewer flow for `on-request` approvals in command, patch, sandbox-retry, and managed-network approval paths - keep guardian behind `features.guardian_approval` instead of exposing a public `approval_policy = guardian` mode - route ordinary `OnRequest` approvals to the guardian subagent when the feature is enabled, without changing the public approval-mode surface ## Public model - public approval modes stay unchanged - guardian is enabled via `features.guardian_approval` - when that feature is on, `approval_policy = on-request` keeps the same approval boundaries but sends those approval requests to the guardian reviewer instead of the user - `/experimental` only persists the feature flag; it does not rewrite `approval_policy` - CLI and app-server no longer expose a separate `guardian` approval mode in this PR ## Guardian reviewer - the reviewer runs as a normal subagent and reuses the existing subagent/thread machinery - it is locked to a read-only sandbox and `approval_policy = never` - it does not inherit user/project exec-policy rules - it prefers `gpt-5.4` when the current provider exposes it, otherwise falls back to the parent turn's active model - it fail-closes on timeout, startup failure, malformed output, or any other review error - it currently auto-approves only when `risk_score < 80` ## Review context and policy - guardian mirrors `OnRequest` approval semantics rather than introducing a separate approval policy - explicit `require_escalated` requests follow the same approval surface as `OnRequest`; the difference is only who reviews them - managed-network allowlist misses that enter the approval flow are also reviewed by guardian - the review prompt includes bounded recent transcript history plus recent tool call/result evidence - transcript entries and planned-action strings are truncated with explicit `<guardian_truncated ... />` markers so large payloads stay bounded - apply-patch reviews include the full patch content (without duplicating the structured `changes` payload) - the guardian request layout is snapshot-tested using the same model-visible Responses request formatter used elsewhere in core ## Guardian network behavior - the guardian subagent inherits the parent session's managed-network allowlist when one exists, so it can use the same approved network surface while reviewing - exact session-scoped network approvals are copied into the guardian session with protocol/port scope preserved - those copied approvals are now seeded before the guardian's first turn is submitted, so inherited approvals are available during any immediate review-time checks ## Out of scope / follow-ups - the sandbox-permission validation split was pulled into a separate PR and is not part of this diff - a future follow-up can enable `serde_json` preserve-order in `codex-core` and then simplify the guardian action rendering further --------- Co-authored-by: Codex <noreply@openai.com>
2026-05-03 10:56:37 +00:00 · 2026-03-07 05:40:10 -08:00
parent cf143bf71e
commit e84ee33cc0
34 changed files with 2477 additions and 139 deletions
--- a/codex-rs/core/src/codex_tests_guardian.rs
+++ b/codex-rs/core/src/codex_tests_guardian.rs
@@ -0,0 +1,272 @@
+use super::*;
+use crate::config_loader::ConfigLayerEntry;
+use crate::config_loader::ConfigRequirements;
+use crate::config_loader::ConfigRequirementsToml;
+use crate::exec::ExecParams;
+use crate::exec_policy::ExecPolicyManager;
+use crate::features::Feature;
+use crate::guardian::GUARDIAN_SUBAGENT_NAME;
+use crate::protocol::AskForApproval;
+use crate::sandboxing::SandboxPermissions;
+use crate::turn_diff_tracker::TurnDiffTracker;
+use codex_app_server_protocol::ConfigLayerSource;
+use codex_execpolicy::Decision;
+use codex_execpolicy::Evaluation;
+use codex_execpolicy::RuleMatch;
+use codex_protocol::models::FunctionCallOutputBody;
+use codex_protocol::models::NetworkPermissions;
+use codex_protocol::models::PermissionProfile;
+use codex_utils_absolute_path::AbsolutePathBuf;
+use pretty_assertions::assert_eq;
+use serde::Deserialize;
+use std::collections::HashMap;
+use std::fs;
+use std::sync::Arc;
+use tempfile::tempdir;
+
+#[tokio::test]
+async fn guardian_allows_shell_additional_permissions_requests_past_policy_validation() {
+    let (mut session, mut turn_context_raw) = make_session_and_context().await;
+    turn_context_raw
+        .approval_policy
+        .set(AskForApproval::OnRequest)
+        .expect("test setup should allow updating approval policy");
+    turn_context_raw
+        .features
+        .enable(Feature::GuardianApproval)
+        .expect("test setup should allow enabling guardian approvals");
+    session
+        .features
+        .enable(Feature::RequestPermissions)
+        .expect("test setup should allow enabling request permissions");
+    turn_context_raw
+        .sandbox_policy
+        .set(SandboxPolicy::DangerFullAccess)
+        .expect("test setup should allow updating sandbox policy");
+    let session = Arc::new(session);
+    let turn_context = Arc::new(turn_context_raw);
+
+    let params = ExecParams {
+        command: if cfg!(windows) {
+            vec![
+                "cmd.exe".to_string(),
+                "/C".to_string(),
+                "echo hi".to_string(),
+            ]
+        } else {
+            vec![
+                "/bin/sh".to_string(),
+                "-c".to_string(),
+                "echo hi".to_string(),
+            ]
+        },
+        cwd: turn_context.cwd.clone(),
+        expiration: 1000.into(),
+        env: HashMap::new(),
+        network: None,
+        sandbox_permissions: SandboxPermissions::WithAdditionalPermissions,
+        windows_sandbox_level: turn_context.windows_sandbox_level,
+        justification: Some("test".to_string()),
+        arg0: None,
+    };
+
+    let handler = ShellHandler;
+    let resp = handler
+        .handle(ToolInvocation {
+            session: Arc::clone(&session),
+            turn: Arc::clone(&turn_context),
+            tracker: Arc::new(tokio::sync::Mutex::new(TurnDiffTracker::new())),
+            call_id: "test-call".to_string(),
+            tool_name: "shell".to_string(),
+            payload: ToolPayload::Function {
+                arguments: serde_json::json!({
+                    "command": params.command.clone(),
+                    "workdir": Some(turn_context.cwd.to_string_lossy().to_string()),
+                    "timeout_ms": params.expiration.timeout_ms(),
+                    "sandbox_permissions": params.sandbox_permissions,
+                    "additional_permissions": PermissionProfile {
+                        network: Some(NetworkPermissions {
+                            enabled: Some(true),
+                        }),
+                        file_system: None,
+                        macos: None,
+                    },
+                    "justification": params.justification.clone(),
+                })
+                .to_string(),
+            },
+        })
+        .await;
+
+    let output = match resp.expect("expected Ok result") {
+        ToolOutput::Function {
+            body: FunctionCallOutputBody::Text(content),
+            ..
+        } => content,
+        _ => panic!("unexpected tool output"),
+    };
+
+    #[derive(Deserialize, PartialEq, Eq, Debug)]
+    struct ResponseExecMetadata {
+        exit_code: i32,
+    }
+
+    #[derive(Deserialize)]
+    struct ResponseExecOutput {
+        output: String,
+        metadata: ResponseExecMetadata,
+    }
+
+    let exec_output: ResponseExecOutput =
+        serde_json::from_str(&output).expect("valid exec output json");
+
+    assert_eq!(exec_output.metadata, ResponseExecMetadata { exit_code: 0 });
+    assert!(exec_output.output.contains("hi"));
+}
+
+#[tokio::test]
+async fn guardian_allows_unified_exec_additional_permissions_requests_past_policy_validation() {
+    let (mut session, mut turn_context_raw) = make_session_and_context().await;
+    turn_context_raw
+        .approval_policy
+        .set(AskForApproval::OnRequest)
+        .expect("test setup should allow updating approval policy");
+    turn_context_raw
+        .features
+        .enable(Feature::GuardianApproval)
+        .expect("test setup should allow enabling guardian approvals");
+    session
+        .features
+        .enable(Feature::RequestPermissions)
+        .expect("test setup should allow enabling request permissions");
+    let session = Arc::new(session);
+    let turn_context = Arc::new(turn_context_raw);
+    let tracker = Arc::new(tokio::sync::Mutex::new(TurnDiffTracker::new()));
+
+    let handler = UnifiedExecHandler;
+    let resp = handler
+        .handle(ToolInvocation {
+            session: Arc::clone(&session),
+            turn: Arc::clone(&turn_context),
+            tracker: Arc::clone(&tracker),
+            call_id: "exec-call".to_string(),
+            tool_name: "exec_command".to_string(),
+            payload: ToolPayload::Function {
+                arguments: serde_json::json!({
+                    "cmd": "echo hi",
+                    "sandbox_permissions": SandboxPermissions::WithAdditionalPermissions,
+                    "justification": "need additional sandbox permissions",
+                })
+                .to_string(),
+            },
+        })
+        .await;
+
+    let Err(FunctionCallError::RespondToModel(output)) = resp else {
+        panic!("expected validation error result");
+    };
+
+    assert_eq!(
+        output,
+        "missing `additional_permissions`; provide at least one of `network`, `file_system`, or `macos` when using `with_additional_permissions`"
+    );
+}
+
+#[tokio::test]
+async fn guardian_subagent_does_not_inherit_parent_exec_policy_rules() {
+    let codex_home = tempdir().expect("create codex home");
+    let project_dir = tempdir().expect("create project dir");
+    let rules_dir = project_dir.path().join("rules");
+    fs::create_dir_all(&rules_dir).expect("create rules dir");
+    fs::write(
+        rules_dir.join("deny.rules"),
+        r#"prefix_rule(pattern=["rm"], decision="forbidden")"#,
+    )
+    .expect("write policy file");
+
+    let mut config = build_test_config(codex_home.path()).await;
+    config.cwd = project_dir.path().to_path_buf();
+    config.config_layer_stack = ConfigLayerStack::new(
+        vec![ConfigLayerEntry::new(
+            ConfigLayerSource::Project {
+                dot_codex_folder: AbsolutePathBuf::from_absolute_path(project_dir.path())
+                    .expect("absolute project path"),
+            },
+            toml::Value::Table(Default::default()),
+        )],
+        ConfigRequirements::default(),
+        ConfigRequirementsToml::default(),
+    )
+    .expect("config layer stack");
+
+    let command = [vec!["rm".to_string()]];
+    let parent_exec_policy = ExecPolicyManager::load(&config.config_layer_stack)
+        .await
+        .expect("load parent exec policy");
+    assert_eq!(
+        parent_exec_policy
+            .current()
+            .check_multiple(command.iter(), &|_| Decision::Allow),
+        Evaluation {
+            decision: Decision::Forbidden,
+            matched_rules: vec![RuleMatch::PrefixRuleMatch {
+                matched_prefix: vec!["rm".to_string()],
+                decision: Decision::Forbidden,
+                resolved_program: None,
+                justification: None,
+            }],
+        }
+    );
+
+    let auth_manager = AuthManager::from_auth_for_testing(CodexAuth::from_api_key("Test API Key"));
+    let models_manager = Arc::new(ModelsManager::new(
+        config.codex_home.clone(),
+        auth_manager.clone(),
+        None,
+        CollaborationModesConfig::default(),
+    ));
+    let plugins_manager = Arc::new(PluginsManager::new(config.codex_home.clone()));
+    let skills_manager = Arc::new(SkillsManager::new(
+        config.codex_home.clone(),
+        Arc::clone(&plugins_manager),
+    ));
+    let mcp_manager = Arc::new(McpManager::new(Arc::clone(&plugins_manager)));
+    let file_watcher = Arc::new(FileWatcher::noop());
+
+    let CodexSpawnOk { codex, .. } = Codex::spawn(
+        config,
+        auth_manager,
+        models_manager,
+        skills_manager,
+        plugins_manager,
+        mcp_manager,
+        file_watcher,
+        InitialHistory::New,
+        SessionSource::SubAgent(SubAgentSource::Other(GUARDIAN_SUBAGENT_NAME.to_string())),
+        AgentControl::default(),
+        Vec::new(),
+        false,
+        None,
+        None,
+    )
+    .await
+    .expect("spawn guardian subagent");
+
+    assert_eq!(
+        codex
+            .session
+            .services
+            .exec_policy
+            .current()
+            .check_multiple(command.iter(), &|_| Decision::Allow),
+        Evaluation {
+            decision: Decision::Allow,
+            matched_rules: vec![RuleMatch::HeuristicsRuleMatch {
+                command: vec!["rm".to_string()],
+                decision: Decision::Allow,
+            }],
+        }
+    );
+
+    drop(codex);
+}