mirror of
https://github.com/openai/codex.git
synced 2026-05-01 09:56:37 +00:00
Add guardian approval MVP (#13692)
## Summary - add the guardian reviewer flow for `on-request` approvals in command, patch, sandbox-retry, and managed-network approval paths - keep guardian behind `features.guardian_approval` instead of exposing a public `approval_policy = guardian` mode - route ordinary `OnRequest` approvals to the guardian subagent when the feature is enabled, without changing the public approval-mode surface ## Public model - public approval modes stay unchanged - guardian is enabled via `features.guardian_approval` - when that feature is on, `approval_policy = on-request` keeps the same approval boundaries but sends those approval requests to the guardian reviewer instead of the user - `/experimental` only persists the feature flag; it does not rewrite `approval_policy` - CLI and app-server no longer expose a separate `guardian` approval mode in this PR ## Guardian reviewer - the reviewer runs as a normal subagent and reuses the existing subagent/thread machinery - it is locked to a read-only sandbox and `approval_policy = never` - it does not inherit user/project exec-policy rules - it prefers `gpt-5.4` when the current provider exposes it, otherwise falls back to the parent turn's active model - it fail-closes on timeout, startup failure, malformed output, or any other review error - it currently auto-approves only when `risk_score < 80` ## Review context and policy - guardian mirrors `OnRequest` approval semantics rather than introducing a separate approval policy - explicit `require_escalated` requests follow the same approval surface as `OnRequest`; the difference is only who reviews them - managed-network allowlist misses that enter the approval flow are also reviewed by guardian - the review prompt includes bounded recent transcript history plus recent tool call/result evidence - transcript entries and planned-action strings are truncated with explicit `<guardian_truncated ... />` markers so large payloads stay bounded - apply-patch reviews include the full patch content (without duplicating the structured `changes` payload) - the guardian request layout is snapshot-tested using the same model-visible Responses request formatter used elsewhere in core ## Guardian network behavior - the guardian subagent inherits the parent session's managed-network allowlist when one exists, so it can use the same approved network surface while reviewing - exact session-scoped network approvals are copied into the guardian session with protocol/port scope preserved - those copied approvals are now seeded before the guardian's first turn is submitted, so inherited approvals are available during any immediate review-time checks ## Out of scope / follow-ups - the sandbox-permission validation split was pulled into a separate PR and is not part of this diff - a future follow-up can enable `serde_json` preserve-order in `codex-core` and then simplify the guardian action rendering further --------- Co-authored-by: Codex <noreply@openai.com>
This commit is contained in:
committed by
GitHub
parent
cf143bf71e
commit
e84ee33cc0
@@ -404,6 +404,8 @@ const APPROVAL_POLICY_ON_REQUEST_RULE: &str =
|
||||
include_str!("prompts/permissions/approval_policy/on_request_rule.md");
|
||||
const APPROVAL_POLICY_ON_REQUEST_RULE_REQUEST_PERMISSION: &str =
|
||||
include_str!("prompts/permissions/approval_policy/on_request_rule_request_permission.md");
|
||||
const GUARDIAN_APPROVAL_FEATURE: &str =
|
||||
include_str!("prompts/permissions/approval_policy/guardian.md");
|
||||
|
||||
const SANDBOX_MODE_DANGER_FULL_ACCESS: &str =
|
||||
include_str!("prompts/permissions/sandbox_mode/danger_full_access.md");
|
||||
@@ -421,6 +423,7 @@ impl DeveloperInstructions {
|
||||
|
||||
pub fn from(
|
||||
approval_policy: AskForApproval,
|
||||
guardian_approval_enabled: bool,
|
||||
exec_policy: &Policy,
|
||||
request_permission_enabled: bool,
|
||||
) -> DeveloperInstructions {
|
||||
@@ -444,7 +447,14 @@ impl DeveloperInstructions {
|
||||
AskForApproval::Never => APPROVAL_POLICY_NEVER.to_string(),
|
||||
AskForApproval::UnlessTrusted => APPROVAL_POLICY_UNLESS_TRUSTED.to_string(),
|
||||
AskForApproval::OnFailure => APPROVAL_POLICY_ON_FAILURE.to_string(),
|
||||
AskForApproval::OnRequest => on_request_instructions(),
|
||||
AskForApproval::OnRequest => {
|
||||
let mut instructions = on_request_instructions();
|
||||
if guardian_approval_enabled {
|
||||
instructions.push_str("\n\n");
|
||||
instructions.push_str(GUARDIAN_APPROVAL_FEATURE);
|
||||
}
|
||||
instructions
|
||||
}
|
||||
AskForApproval::Reject(reject_config) => {
|
||||
let on_request_instructions = on_request_instructions();
|
||||
let sandbox_approval = reject_config.sandbox_approval;
|
||||
@@ -507,6 +517,7 @@ impl DeveloperInstructions {
|
||||
pub fn from_policy(
|
||||
sandbox_policy: &SandboxPolicy,
|
||||
approval_policy: AskForApproval,
|
||||
guardian_approval_enabled: bool,
|
||||
exec_policy: &Policy,
|
||||
cwd: &Path,
|
||||
request_permission_enabled: bool,
|
||||
@@ -531,6 +542,7 @@ impl DeveloperInstructions {
|
||||
sandbox_mode,
|
||||
network_access,
|
||||
approval_policy,
|
||||
guardian_approval_enabled,
|
||||
exec_policy,
|
||||
writable_roots,
|
||||
request_permission_enabled,
|
||||
@@ -555,6 +567,7 @@ impl DeveloperInstructions {
|
||||
sandbox_mode: SandboxMode,
|
||||
network_access: NetworkAccess,
|
||||
approval_policy: AskForApproval,
|
||||
guardian_approval_enabled: bool,
|
||||
exec_policy: &Policy,
|
||||
writable_roots: Option<Vec<WritableRoot>>,
|
||||
request_permission_enabled: bool,
|
||||
@@ -568,6 +581,7 @@ impl DeveloperInstructions {
|
||||
))
|
||||
.concat(DeveloperInstructions::from(
|
||||
approval_policy,
|
||||
guardian_approval_enabled,
|
||||
exec_policy,
|
||||
request_permission_enabled,
|
||||
))
|
||||
@@ -1625,6 +1639,7 @@ mod tests {
|
||||
SandboxMode::WorkspaceWrite,
|
||||
NetworkAccess::Enabled,
|
||||
AskForApproval::OnRequest,
|
||||
false,
|
||||
&Policy::empty(),
|
||||
None,
|
||||
false,
|
||||
@@ -1654,6 +1669,7 @@ mod tests {
|
||||
let instructions = DeveloperInstructions::from_policy(
|
||||
&policy,
|
||||
AskForApproval::UnlessTrusted,
|
||||
false,
|
||||
&Policy::empty(),
|
||||
&PathBuf::from("/tmp"),
|
||||
false,
|
||||
@@ -1676,6 +1692,7 @@ mod tests {
|
||||
SandboxMode::WorkspaceWrite,
|
||||
NetworkAccess::Enabled,
|
||||
AskForApproval::OnRequest,
|
||||
false,
|
||||
&exec_policy,
|
||||
None,
|
||||
false,
|
||||
@@ -1693,6 +1710,7 @@ mod tests {
|
||||
SandboxMode::WorkspaceWrite,
|
||||
NetworkAccess::Enabled,
|
||||
AskForApproval::OnRequest,
|
||||
false,
|
||||
&Policy::empty(),
|
||||
None,
|
||||
true,
|
||||
@@ -1703,6 +1721,23 @@ mod tests {
|
||||
assert!(text.contains("additional_permissions"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn includes_guardian_feature_guidance_for_on_request_when_enabled() {
|
||||
let instructions = DeveloperInstructions::from_permissions_with_network(
|
||||
SandboxMode::WorkspaceWrite,
|
||||
NetworkAccess::Enabled,
|
||||
AskForApproval::OnRequest,
|
||||
true,
|
||||
&Policy::empty(),
|
||||
None,
|
||||
false,
|
||||
);
|
||||
|
||||
let text = instructions.into_text();
|
||||
assert!(text.contains("guardian subagent"));
|
||||
assert!(text.contains("approval prompts"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn render_command_prefix_list_sorts_by_len_then_total_len_then_alphabetical() {
|
||||
let prefixes = vec![
|
||||
|
||||
Reference in New Issue
Block a user