mirror of
https://github.com/openai/codex.git
synced 2026-05-03 10:56:37 +00:00
Add guardian approval MVP (#13692)
## Summary - add the guardian reviewer flow for `on-request` approvals in command, patch, sandbox-retry, and managed-network approval paths - keep guardian behind `features.guardian_approval` instead of exposing a public `approval_policy = guardian` mode - route ordinary `OnRequest` approvals to the guardian subagent when the feature is enabled, without changing the public approval-mode surface ## Public model - public approval modes stay unchanged - guardian is enabled via `features.guardian_approval` - when that feature is on, `approval_policy = on-request` keeps the same approval boundaries but sends those approval requests to the guardian reviewer instead of the user - `/experimental` only persists the feature flag; it does not rewrite `approval_policy` - CLI and app-server no longer expose a separate `guardian` approval mode in this PR ## Guardian reviewer - the reviewer runs as a normal subagent and reuses the existing subagent/thread machinery - it is locked to a read-only sandbox and `approval_policy = never` - it does not inherit user/project exec-policy rules - it prefers `gpt-5.4` when the current provider exposes it, otherwise falls back to the parent turn's active model - it fail-closes on timeout, startup failure, malformed output, or any other review error - it currently auto-approves only when `risk_score < 80` ## Review context and policy - guardian mirrors `OnRequest` approval semantics rather than introducing a separate approval policy - explicit `require_escalated` requests follow the same approval surface as `OnRequest`; the difference is only who reviews them - managed-network allowlist misses that enter the approval flow are also reviewed by guardian - the review prompt includes bounded recent transcript history plus recent tool call/result evidence - transcript entries and planned-action strings are truncated with explicit `<guardian_truncated ... />` markers so large payloads stay bounded - apply-patch reviews include the full patch content (without duplicating the structured `changes` payload) - the guardian request layout is snapshot-tested using the same model-visible Responses request formatter used elsewhere in core ## Guardian network behavior - the guardian subagent inherits the parent session's managed-network allowlist when one exists, so it can use the same approved network surface while reviewing - exact session-scoped network approvals are copied into the guardian session with protocol/port scope preserved - those copied approvals are now seeded before the guardian's first turn is submitted, so inherited approvals are available during any immediate review-time checks ## Out of scope / follow-ups - the sandbox-permission validation split was pulled into a separate PR and is not part of this diff - a future follow-up can enable `serde_json` preserve-order in `codex-core` and then simplify the guardian action rendering further --------- Co-authored-by: Codex <noreply@openai.com>
This commit is contained in:
committed by
GitHub
parent
cf143bf71e
commit
e84ee33cc0
272
codex-rs/core/src/codex_tests_guardian.rs
Normal file
272
codex-rs/core/src/codex_tests_guardian.rs
Normal file
@@ -0,0 +1,272 @@
|
||||
use super::*;
|
||||
use crate::config_loader::ConfigLayerEntry;
|
||||
use crate::config_loader::ConfigRequirements;
|
||||
use crate::config_loader::ConfigRequirementsToml;
|
||||
use crate::exec::ExecParams;
|
||||
use crate::exec_policy::ExecPolicyManager;
|
||||
use crate::features::Feature;
|
||||
use crate::guardian::GUARDIAN_SUBAGENT_NAME;
|
||||
use crate::protocol::AskForApproval;
|
||||
use crate::sandboxing::SandboxPermissions;
|
||||
use crate::turn_diff_tracker::TurnDiffTracker;
|
||||
use codex_app_server_protocol::ConfigLayerSource;
|
||||
use codex_execpolicy::Decision;
|
||||
use codex_execpolicy::Evaluation;
|
||||
use codex_execpolicy::RuleMatch;
|
||||
use codex_protocol::models::FunctionCallOutputBody;
|
||||
use codex_protocol::models::NetworkPermissions;
|
||||
use codex_protocol::models::PermissionProfile;
|
||||
use codex_utils_absolute_path::AbsolutePathBuf;
|
||||
use pretty_assertions::assert_eq;
|
||||
use serde::Deserialize;
|
||||
use std::collections::HashMap;
|
||||
use std::fs;
|
||||
use std::sync::Arc;
|
||||
use tempfile::tempdir;
|
||||
|
||||
#[tokio::test]
|
||||
async fn guardian_allows_shell_additional_permissions_requests_past_policy_validation() {
|
||||
let (mut session, mut turn_context_raw) = make_session_and_context().await;
|
||||
turn_context_raw
|
||||
.approval_policy
|
||||
.set(AskForApproval::OnRequest)
|
||||
.expect("test setup should allow updating approval policy");
|
||||
turn_context_raw
|
||||
.features
|
||||
.enable(Feature::GuardianApproval)
|
||||
.expect("test setup should allow enabling guardian approvals");
|
||||
session
|
||||
.features
|
||||
.enable(Feature::RequestPermissions)
|
||||
.expect("test setup should allow enabling request permissions");
|
||||
turn_context_raw
|
||||
.sandbox_policy
|
||||
.set(SandboxPolicy::DangerFullAccess)
|
||||
.expect("test setup should allow updating sandbox policy");
|
||||
let session = Arc::new(session);
|
||||
let turn_context = Arc::new(turn_context_raw);
|
||||
|
||||
let params = ExecParams {
|
||||
command: if cfg!(windows) {
|
||||
vec![
|
||||
"cmd.exe".to_string(),
|
||||
"/C".to_string(),
|
||||
"echo hi".to_string(),
|
||||
]
|
||||
} else {
|
||||
vec![
|
||||
"/bin/sh".to_string(),
|
||||
"-c".to_string(),
|
||||
"echo hi".to_string(),
|
||||
]
|
||||
},
|
||||
cwd: turn_context.cwd.clone(),
|
||||
expiration: 1000.into(),
|
||||
env: HashMap::new(),
|
||||
network: None,
|
||||
sandbox_permissions: SandboxPermissions::WithAdditionalPermissions,
|
||||
windows_sandbox_level: turn_context.windows_sandbox_level,
|
||||
justification: Some("test".to_string()),
|
||||
arg0: None,
|
||||
};
|
||||
|
||||
let handler = ShellHandler;
|
||||
let resp = handler
|
||||
.handle(ToolInvocation {
|
||||
session: Arc::clone(&session),
|
||||
turn: Arc::clone(&turn_context),
|
||||
tracker: Arc::new(tokio::sync::Mutex::new(TurnDiffTracker::new())),
|
||||
call_id: "test-call".to_string(),
|
||||
tool_name: "shell".to_string(),
|
||||
payload: ToolPayload::Function {
|
||||
arguments: serde_json::json!({
|
||||
"command": params.command.clone(),
|
||||
"workdir": Some(turn_context.cwd.to_string_lossy().to_string()),
|
||||
"timeout_ms": params.expiration.timeout_ms(),
|
||||
"sandbox_permissions": params.sandbox_permissions,
|
||||
"additional_permissions": PermissionProfile {
|
||||
network: Some(NetworkPermissions {
|
||||
enabled: Some(true),
|
||||
}),
|
||||
file_system: None,
|
||||
macos: None,
|
||||
},
|
||||
"justification": params.justification.clone(),
|
||||
})
|
||||
.to_string(),
|
||||
},
|
||||
})
|
||||
.await;
|
||||
|
||||
let output = match resp.expect("expected Ok result") {
|
||||
ToolOutput::Function {
|
||||
body: FunctionCallOutputBody::Text(content),
|
||||
..
|
||||
} => content,
|
||||
_ => panic!("unexpected tool output"),
|
||||
};
|
||||
|
||||
#[derive(Deserialize, PartialEq, Eq, Debug)]
|
||||
struct ResponseExecMetadata {
|
||||
exit_code: i32,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct ResponseExecOutput {
|
||||
output: String,
|
||||
metadata: ResponseExecMetadata,
|
||||
}
|
||||
|
||||
let exec_output: ResponseExecOutput =
|
||||
serde_json::from_str(&output).expect("valid exec output json");
|
||||
|
||||
assert_eq!(exec_output.metadata, ResponseExecMetadata { exit_code: 0 });
|
||||
assert!(exec_output.output.contains("hi"));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn guardian_allows_unified_exec_additional_permissions_requests_past_policy_validation() {
|
||||
let (mut session, mut turn_context_raw) = make_session_and_context().await;
|
||||
turn_context_raw
|
||||
.approval_policy
|
||||
.set(AskForApproval::OnRequest)
|
||||
.expect("test setup should allow updating approval policy");
|
||||
turn_context_raw
|
||||
.features
|
||||
.enable(Feature::GuardianApproval)
|
||||
.expect("test setup should allow enabling guardian approvals");
|
||||
session
|
||||
.features
|
||||
.enable(Feature::RequestPermissions)
|
||||
.expect("test setup should allow enabling request permissions");
|
||||
let session = Arc::new(session);
|
||||
let turn_context = Arc::new(turn_context_raw);
|
||||
let tracker = Arc::new(tokio::sync::Mutex::new(TurnDiffTracker::new()));
|
||||
|
||||
let handler = UnifiedExecHandler;
|
||||
let resp = handler
|
||||
.handle(ToolInvocation {
|
||||
session: Arc::clone(&session),
|
||||
turn: Arc::clone(&turn_context),
|
||||
tracker: Arc::clone(&tracker),
|
||||
call_id: "exec-call".to_string(),
|
||||
tool_name: "exec_command".to_string(),
|
||||
payload: ToolPayload::Function {
|
||||
arguments: serde_json::json!({
|
||||
"cmd": "echo hi",
|
||||
"sandbox_permissions": SandboxPermissions::WithAdditionalPermissions,
|
||||
"justification": "need additional sandbox permissions",
|
||||
})
|
||||
.to_string(),
|
||||
},
|
||||
})
|
||||
.await;
|
||||
|
||||
let Err(FunctionCallError::RespondToModel(output)) = resp else {
|
||||
panic!("expected validation error result");
|
||||
};
|
||||
|
||||
assert_eq!(
|
||||
output,
|
||||
"missing `additional_permissions`; provide at least one of `network`, `file_system`, or `macos` when using `with_additional_permissions`"
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn guardian_subagent_does_not_inherit_parent_exec_policy_rules() {
|
||||
let codex_home = tempdir().expect("create codex home");
|
||||
let project_dir = tempdir().expect("create project dir");
|
||||
let rules_dir = project_dir.path().join("rules");
|
||||
fs::create_dir_all(&rules_dir).expect("create rules dir");
|
||||
fs::write(
|
||||
rules_dir.join("deny.rules"),
|
||||
r#"prefix_rule(pattern=["rm"], decision="forbidden")"#,
|
||||
)
|
||||
.expect("write policy file");
|
||||
|
||||
let mut config = build_test_config(codex_home.path()).await;
|
||||
config.cwd = project_dir.path().to_path_buf();
|
||||
config.config_layer_stack = ConfigLayerStack::new(
|
||||
vec![ConfigLayerEntry::new(
|
||||
ConfigLayerSource::Project {
|
||||
dot_codex_folder: AbsolutePathBuf::from_absolute_path(project_dir.path())
|
||||
.expect("absolute project path"),
|
||||
},
|
||||
toml::Value::Table(Default::default()),
|
||||
)],
|
||||
ConfigRequirements::default(),
|
||||
ConfigRequirementsToml::default(),
|
||||
)
|
||||
.expect("config layer stack");
|
||||
|
||||
let command = [vec!["rm".to_string()]];
|
||||
let parent_exec_policy = ExecPolicyManager::load(&config.config_layer_stack)
|
||||
.await
|
||||
.expect("load parent exec policy");
|
||||
assert_eq!(
|
||||
parent_exec_policy
|
||||
.current()
|
||||
.check_multiple(command.iter(), &|_| Decision::Allow),
|
||||
Evaluation {
|
||||
decision: Decision::Forbidden,
|
||||
matched_rules: vec![RuleMatch::PrefixRuleMatch {
|
||||
matched_prefix: vec!["rm".to_string()],
|
||||
decision: Decision::Forbidden,
|
||||
resolved_program: None,
|
||||
justification: None,
|
||||
}],
|
||||
}
|
||||
);
|
||||
|
||||
let auth_manager = AuthManager::from_auth_for_testing(CodexAuth::from_api_key("Test API Key"));
|
||||
let models_manager = Arc::new(ModelsManager::new(
|
||||
config.codex_home.clone(),
|
||||
auth_manager.clone(),
|
||||
None,
|
||||
CollaborationModesConfig::default(),
|
||||
));
|
||||
let plugins_manager = Arc::new(PluginsManager::new(config.codex_home.clone()));
|
||||
let skills_manager = Arc::new(SkillsManager::new(
|
||||
config.codex_home.clone(),
|
||||
Arc::clone(&plugins_manager),
|
||||
));
|
||||
let mcp_manager = Arc::new(McpManager::new(Arc::clone(&plugins_manager)));
|
||||
let file_watcher = Arc::new(FileWatcher::noop());
|
||||
|
||||
let CodexSpawnOk { codex, .. } = Codex::spawn(
|
||||
config,
|
||||
auth_manager,
|
||||
models_manager,
|
||||
skills_manager,
|
||||
plugins_manager,
|
||||
mcp_manager,
|
||||
file_watcher,
|
||||
InitialHistory::New,
|
||||
SessionSource::SubAgent(SubAgentSource::Other(GUARDIAN_SUBAGENT_NAME.to_string())),
|
||||
AgentControl::default(),
|
||||
Vec::new(),
|
||||
false,
|
||||
None,
|
||||
None,
|
||||
)
|
||||
.await
|
||||
.expect("spawn guardian subagent");
|
||||
|
||||
assert_eq!(
|
||||
codex
|
||||
.session
|
||||
.services
|
||||
.exec_policy
|
||||
.current()
|
||||
.check_multiple(command.iter(), &|_| Decision::Allow),
|
||||
Evaluation {
|
||||
decision: Decision::Allow,
|
||||
matched_rules: vec![RuleMatch::HeuristicsRuleMatch {
|
||||
command: vec!["rm".to_string()],
|
||||
decision: Decision::Allow,
|
||||
}],
|
||||
}
|
||||
);
|
||||
|
||||
drop(codex);
|
||||
}
|
||||
Reference in New Issue
Block a user