mirror of
https://github.com/openai/codex.git
synced 2026-06-01 19:02:59 +00:00
2395 lines
86 KiB
Rust
2395 lines
86 KiB
Rust
use super::*;
|
|
use crate::config::Config;
|
|
use crate::config::ConfigOverrides;
|
|
use crate::config::Constrained;
|
|
use crate::config::ManagedFeatures;
|
|
use crate::config::NetworkProxySpec;
|
|
use crate::config::test_config;
|
|
use crate::guardian::approval_request::guardian_request_target_item_id;
|
|
use crate::session::session::Session;
|
|
use crate::session::turn_context::TurnContext;
|
|
use crate::test_support;
|
|
use codex_analytics::GuardianApprovalRequestSource;
|
|
use codex_config::ConfigLayerStack;
|
|
use codex_config::FeatureRequirementsToml;
|
|
use codex_config::NetworkConstraints;
|
|
use codex_config::NetworkDomainPermissionToml;
|
|
use codex_config::NetworkDomainPermissionsToml;
|
|
use codex_config::RequirementSource;
|
|
use codex_config::Sourced;
|
|
use codex_config::config_toml::ConfigToml;
|
|
use codex_config::types::McpServerConfig;
|
|
use codex_exec_server::LOCAL_FS;
|
|
use codex_features::Feature;
|
|
use codex_model_provider::create_model_provider;
|
|
use codex_network_proxy::NetworkProxyConfig;
|
|
use codex_protocol::ThreadId;
|
|
use codex_protocol::approvals::NetworkApprovalProtocol;
|
|
use codex_protocol::config_types::ApprovalsReviewer;
|
|
use codex_protocol::models::ContentItem;
|
|
use codex_protocol::models::PermissionProfile;
|
|
use codex_protocol::models::ResponseItem;
|
|
use codex_protocol::protocol::AskForApproval;
|
|
use codex_protocol::protocol::Event;
|
|
use codex_protocol::protocol::EventMsg;
|
|
use codex_protocol::protocol::GranularApprovalConfig;
|
|
use codex_protocol::protocol::GuardianAssessmentStatus;
|
|
use codex_protocol::protocol::GuardianRiskLevel;
|
|
use codex_protocol::protocol::GuardianUserAuthorization;
|
|
use codex_protocol::protocol::ReviewDecision;
|
|
use codex_protocol::protocol::RolloutItem;
|
|
use codex_protocol::protocol::TurnCompleteEvent;
|
|
use core_test_support::PathBufExt;
|
|
use core_test_support::TempDirExt;
|
|
use core_test_support::context_snapshot;
|
|
use core_test_support::context_snapshot::ContextSnapshotOptions;
|
|
use core_test_support::responses::ev_assistant_message;
|
|
use core_test_support::responses::ev_completed;
|
|
use core_test_support::responses::ev_response_created;
|
|
use core_test_support::responses::mount_response_once;
|
|
use core_test_support::responses::mount_sse_once;
|
|
use core_test_support::responses::mount_sse_sequence;
|
|
use core_test_support::responses::sse;
|
|
use core_test_support::responses::start_mock_server;
|
|
use core_test_support::skip_if_no_network;
|
|
use core_test_support::streaming_sse::StreamingSseChunk;
|
|
use core_test_support::streaming_sse::start_streaming_sse_server;
|
|
use core_test_support::test_path_buf;
|
|
use insta::Settings;
|
|
use insta::assert_snapshot;
|
|
use pretty_assertions::assert_eq;
|
|
use std::collections::BTreeMap;
|
|
use std::collections::HashMap;
|
|
use std::sync::Arc;
|
|
use std::time::Duration;
|
|
use tempfile::TempDir;
|
|
use tokio_util::sync::CancellationToken;
|
|
|
|
fn fixed_guardian_parent_session_id() -> ThreadId {
|
|
ThreadId::from_string("11111111-1111-4111-8111-111111111111")
|
|
.expect("fixed parent session id should be a valid UUID")
|
|
}
|
|
|
|
#[test]
|
|
fn guardian_rejection_circuit_breaker_interrupts_after_three_consecutive_denials() {
|
|
let mut circuit_breaker = GuardianRejectionCircuitBreaker::default();
|
|
assert_eq!(
|
|
circuit_breaker.record_denial("turn-1"),
|
|
GuardianRejectionCircuitBreakerAction::Continue
|
|
);
|
|
assert_eq!(
|
|
circuit_breaker.record_denial("turn-1"),
|
|
GuardianRejectionCircuitBreakerAction::Continue
|
|
);
|
|
assert_eq!(
|
|
circuit_breaker.record_denial("turn-1"),
|
|
GuardianRejectionCircuitBreakerAction::InterruptTurn {
|
|
consecutive_denials: 3,
|
|
total_denials: 3,
|
|
}
|
|
);
|
|
assert_eq!(
|
|
circuit_breaker.record_denial("turn-1"),
|
|
GuardianRejectionCircuitBreakerAction::Continue
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn guardian_rejection_circuit_breaker_resets_consecutive_denials_on_non_denial() {
|
|
let mut circuit_breaker = GuardianRejectionCircuitBreaker::default();
|
|
assert_eq!(
|
|
circuit_breaker.record_denial("turn-1"),
|
|
GuardianRejectionCircuitBreakerAction::Continue
|
|
);
|
|
circuit_breaker.record_non_denial("turn-1");
|
|
assert_eq!(
|
|
circuit_breaker.record_denial("turn-1"),
|
|
GuardianRejectionCircuitBreakerAction::Continue
|
|
);
|
|
assert_eq!(
|
|
circuit_breaker.record_denial("turn-1"),
|
|
GuardianRejectionCircuitBreakerAction::Continue
|
|
);
|
|
assert_eq!(
|
|
circuit_breaker.record_denial("turn-1"),
|
|
GuardianRejectionCircuitBreakerAction::InterruptTurn {
|
|
consecutive_denials: 3,
|
|
total_denials: 4,
|
|
}
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn guardian_rejection_circuit_breaker_interrupts_after_ten_total_denials() {
|
|
let mut circuit_breaker = GuardianRejectionCircuitBreaker::default();
|
|
for _ in 0..9 {
|
|
assert_eq!(
|
|
circuit_breaker.record_denial("turn-1"),
|
|
GuardianRejectionCircuitBreakerAction::Continue
|
|
);
|
|
circuit_breaker.record_non_denial("turn-1");
|
|
}
|
|
assert_eq!(
|
|
circuit_breaker.record_denial("turn-1"),
|
|
GuardianRejectionCircuitBreakerAction::InterruptTurn {
|
|
consecutive_denials: 1,
|
|
total_denials: 10,
|
|
}
|
|
);
|
|
}
|
|
|
|
async fn guardian_test_session_and_turn(
|
|
server: &wiremock::MockServer,
|
|
) -> (Arc<Session>, Arc<TurnContext>) {
|
|
guardian_test_session_and_turn_with_base_url(server.uri().as_str()).await
|
|
}
|
|
|
|
async fn guardian_test_session_and_turn_with_base_url(
|
|
base_url: &str,
|
|
) -> (Arc<Session>, Arc<TurnContext>) {
|
|
let (mut session, mut turn) = crate::session::tests::make_session_and_context().await;
|
|
session.conversation_id = fixed_guardian_parent_session_id();
|
|
let mut config = (*turn.config).clone();
|
|
config.model_provider.base_url = Some(format!("{base_url}/v1"));
|
|
config.user_instructions = None;
|
|
let config = Arc::new(config);
|
|
let models_manager = test_support::models_manager_with_provider(
|
|
config.codex_home.to_path_buf(),
|
|
Arc::clone(&session.services.auth_manager),
|
|
config.model_provider.clone(),
|
|
);
|
|
session.services.models_manager = models_manager;
|
|
turn.config = Arc::clone(&config);
|
|
turn.provider = create_model_provider(config.model_provider.clone(), turn.auth_manager.clone());
|
|
turn.user_instructions = None;
|
|
|
|
(Arc::new(session), Arc::new(turn))
|
|
}
|
|
|
|
async fn seed_guardian_parent_history(session: &Arc<Session>, turn: &Arc<TurnContext>) {
|
|
session
|
|
.record_into_history(
|
|
&[
|
|
ResponseItem::Message {
|
|
id: None,
|
|
role: "user".to_string(),
|
|
content: vec![ContentItem::InputText {
|
|
text: "Please check the repo visibility and push the docs fix if needed."
|
|
.to_string(),
|
|
}],
|
|
phase: None,
|
|
},
|
|
ResponseItem::FunctionCall {
|
|
id: None,
|
|
name: "gh_repo_view".to_string(),
|
|
namespace: None,
|
|
arguments: "{\"repo\":\"openai/codex\"}".to_string(),
|
|
call_id: "call-1".to_string(),
|
|
},
|
|
ResponseItem::FunctionCallOutput {
|
|
call_id: "call-1".to_string(),
|
|
output: codex_protocol::models::FunctionCallOutputPayload::from_text(
|
|
"repo visibility: public".to_string(),
|
|
),
|
|
},
|
|
ResponseItem::Message {
|
|
id: None,
|
|
role: "assistant".to_string(),
|
|
content: vec![ContentItem::OutputText {
|
|
text: "The repo is public; I now need approval to push the docs fix."
|
|
.to_string(),
|
|
}],
|
|
phase: None,
|
|
},
|
|
],
|
|
turn.as_ref(),
|
|
)
|
|
.await;
|
|
}
|
|
|
|
fn rollout_item_contains_message_text(item: &RolloutItem, needle: &str) -> bool {
|
|
let RolloutItem::ResponseItem(response_item) = item else {
|
|
return false;
|
|
};
|
|
response_item_contains_message_text(response_item, needle)
|
|
}
|
|
|
|
fn response_item_contains_message_text(item: &ResponseItem, needle: &str) -> bool {
|
|
let ResponseItem::Message { content, .. } = item else {
|
|
return false;
|
|
};
|
|
content.iter().any(|item| match item {
|
|
ContentItem::InputText { text } | ContentItem::OutputText { text } => text.contains(needle),
|
|
ContentItem::InputImage { .. } => false,
|
|
})
|
|
}
|
|
|
|
fn guardian_snapshot_options() -> ContextSnapshotOptions {
|
|
ContextSnapshotOptions::default()
|
|
.strip_capability_instructions()
|
|
.strip_agents_md_user_context()
|
|
}
|
|
|
|
fn normalize_guardian_snapshot_paths(text: String) -> String {
|
|
let mut text = text;
|
|
for canonical_path in ["/repo/codex-rs/core", "/repo"] {
|
|
let platform_path = test_path_buf(canonical_path).display().to_string();
|
|
if platform_path == canonical_path {
|
|
continue;
|
|
}
|
|
|
|
let escaped_platform_path = serde_json::to_string(&platform_path)
|
|
.expect("test path should serialize")
|
|
.trim_matches('"')
|
|
.to_string();
|
|
text = text
|
|
.replace(&escaped_platform_path, canonical_path)
|
|
.replace(&platform_path, canonical_path);
|
|
}
|
|
text
|
|
}
|
|
|
|
fn guardian_prompt_text(items: &[codex_protocol::user_input::UserInput]) -> String {
|
|
items
|
|
.iter()
|
|
.map(|item| match item {
|
|
codex_protocol::user_input::UserInput::Text { text, .. } => text.as_str(),
|
|
_ => "",
|
|
})
|
|
.collect::<String>()
|
|
}
|
|
|
|
fn last_user_message_text_from_body(body: &serde_json::Value) -> String {
|
|
body["input"]
|
|
.as_array()
|
|
.expect("request input array")
|
|
.iter()
|
|
.filter(|item| item.get("role").and_then(serde_json::Value::as_str) == Some("user"))
|
|
.filter_map(|item| item.get("content").and_then(serde_json::Value::as_array))
|
|
.next_back()
|
|
.expect("user message content")
|
|
.iter()
|
|
.filter(|span| span.get("type").and_then(serde_json::Value::as_str) == Some("input_text"))
|
|
.filter_map(|span| span.get("text").and_then(serde_json::Value::as_str))
|
|
.collect::<String>()
|
|
}
|
|
|
|
#[test]
|
|
fn build_guardian_transcript_keeps_original_numbering() {
|
|
let entries = [
|
|
GuardianTranscriptEntry {
|
|
kind: GuardianTranscriptEntryKind::User,
|
|
text: "first".to_string(),
|
|
},
|
|
GuardianTranscriptEntry {
|
|
kind: GuardianTranscriptEntryKind::Assistant,
|
|
text: "second".to_string(),
|
|
},
|
|
GuardianTranscriptEntry {
|
|
kind: GuardianTranscriptEntryKind::Assistant,
|
|
text: "third".to_string(),
|
|
},
|
|
];
|
|
|
|
let (transcript, omission) = render_guardian_transcript_entries(&entries[..2]);
|
|
|
|
assert_eq!(
|
|
transcript,
|
|
vec![
|
|
"[1] user: first".to_string(),
|
|
"[2] assistant: second".to_string()
|
|
]
|
|
);
|
|
assert!(omission.is_none());
|
|
}
|
|
|
|
#[tokio::test(flavor = "current_thread")]
|
|
async fn build_guardian_prompt_full_mode_preserves_initial_review_format() -> anyhow::Result<()> {
|
|
let (session, turn) = guardian_test_session_and_turn_with_base_url("http://localhost").await;
|
|
seed_guardian_parent_history(&session, &turn).await;
|
|
|
|
let prompt = build_guardian_prompt_items(
|
|
session.as_ref(),
|
|
Some("Sandbox denied outbound git push to github.com.".to_string()),
|
|
GuardianApprovalRequest::Shell {
|
|
id: "shell-1".to_string(),
|
|
command: vec!["git".to_string(), "push".to_string()],
|
|
cwd: test_path_buf("/repo/codex-rs/core").abs(),
|
|
sandbox_permissions: crate::sandboxing::SandboxPermissions::UseDefault,
|
|
additional_permissions: None,
|
|
justification: Some("Need to push the reviewed docs fix.".to_string()),
|
|
},
|
|
GuardianPromptMode::Full,
|
|
)
|
|
.await?;
|
|
|
|
let text = guardian_prompt_text(&prompt.items);
|
|
assert!(text.contains("whose request action you are assessing"));
|
|
assert!(text.contains(">>> TRANSCRIPT START\n"));
|
|
assert!(text.contains(">>> TRANSCRIPT END\n"));
|
|
assert!(text.contains("The Codex agent has requested the following action:\n"));
|
|
assert!(!text.contains("TRANSCRIPT DELTA"));
|
|
assert_eq!(prompt.transcript_cursor.transcript_entry_count, 4);
|
|
|
|
Ok(())
|
|
}
|
|
|
|
#[tokio::test(flavor = "current_thread")]
|
|
async fn build_guardian_prompt_delta_mode_preserves_original_numbering() -> anyhow::Result<()> {
|
|
let (session, turn) = guardian_test_session_and_turn_with_base_url("http://localhost").await;
|
|
seed_guardian_parent_history(&session, &turn).await;
|
|
session
|
|
.record_into_history(
|
|
&[
|
|
ResponseItem::Message {
|
|
id: None,
|
|
role: "user".to_string(),
|
|
content: vec![ContentItem::InputText {
|
|
text: "Please also push the second docs fix.".to_string(),
|
|
}],
|
|
phase: None,
|
|
},
|
|
ResponseItem::Message {
|
|
id: None,
|
|
role: "assistant".to_string(),
|
|
content: vec![ContentItem::OutputText {
|
|
text: "I need approval for the second push.".to_string(),
|
|
}],
|
|
phase: None,
|
|
},
|
|
],
|
|
turn.as_ref(),
|
|
)
|
|
.await;
|
|
|
|
let prompt = build_guardian_prompt_items(
|
|
session.as_ref(),
|
|
/*retry_reason*/ None,
|
|
GuardianApprovalRequest::Shell {
|
|
id: "shell-2".to_string(),
|
|
command: vec!["git".to_string(), "push".to_string()],
|
|
cwd: test_path_buf("/repo/codex-rs/core").abs(),
|
|
sandbox_permissions: crate::sandboxing::SandboxPermissions::UseDefault,
|
|
additional_permissions: None,
|
|
justification: Some("Need to push the second docs fix.".to_string()),
|
|
},
|
|
GuardianPromptMode::Delta {
|
|
cursor: GuardianTranscriptCursor {
|
|
parent_history_version: 0,
|
|
transcript_entry_count: 4,
|
|
},
|
|
},
|
|
)
|
|
.await?;
|
|
|
|
let text = guardian_prompt_text(&prompt.items);
|
|
assert!(text.contains("added since your last approval assessment"));
|
|
assert!(text.contains(">>> TRANSCRIPT DELTA START\n"));
|
|
assert!(text.contains("[5] user: Please also push the second docs fix."));
|
|
assert!(text.contains("[6] assistant: I need approval for the second push."));
|
|
assert!(text.contains(">>> TRANSCRIPT DELTA END\n"));
|
|
assert!(text.contains("The Codex agent has requested the following next action:\n"));
|
|
assert!(!text.contains("[1] user: Please check the repo visibility"));
|
|
assert_eq!(prompt.transcript_cursor.transcript_entry_count, 6);
|
|
|
|
Ok(())
|
|
}
|
|
|
|
#[tokio::test(flavor = "current_thread")]
|
|
async fn build_guardian_prompt_delta_mode_handles_empty_delta() -> anyhow::Result<()> {
|
|
let (session, turn) = guardian_test_session_and_turn_with_base_url("http://localhost").await;
|
|
seed_guardian_parent_history(&session, &turn).await;
|
|
|
|
let prompt = build_guardian_prompt_items(
|
|
session.as_ref(),
|
|
/*retry_reason*/ None,
|
|
GuardianApprovalRequest::Shell {
|
|
id: "shell-2".to_string(),
|
|
command: vec!["git".to_string(), "push".to_string()],
|
|
cwd: test_path_buf("/repo/codex-rs/core").abs(),
|
|
sandbox_permissions: crate::sandboxing::SandboxPermissions::UseDefault,
|
|
additional_permissions: None,
|
|
justification: Some("Need to push the second docs fix.".to_string()),
|
|
},
|
|
GuardianPromptMode::Delta {
|
|
cursor: GuardianTranscriptCursor {
|
|
parent_history_version: 0,
|
|
transcript_entry_count: 4,
|
|
},
|
|
},
|
|
)
|
|
.await?;
|
|
|
|
let text = guardian_prompt_text(&prompt.items);
|
|
assert!(text.contains(">>> TRANSCRIPT DELTA START\n"));
|
|
assert!(text.contains("<no retained transcript delta entries>"));
|
|
assert!(text.contains(">>> TRANSCRIPT DELTA END\n"));
|
|
assert_eq!(prompt.transcript_cursor.transcript_entry_count, 4);
|
|
|
|
Ok(())
|
|
}
|
|
|
|
#[tokio::test(flavor = "current_thread")]
|
|
async fn build_guardian_prompt_stale_delta_cursor_falls_back_to_full_prompt() -> anyhow::Result<()>
|
|
{
|
|
let (session, turn) = guardian_test_session_and_turn_with_base_url("http://localhost").await;
|
|
seed_guardian_parent_history(&session, &turn).await;
|
|
|
|
let prompt = build_guardian_prompt_items(
|
|
session.as_ref(),
|
|
/*retry_reason*/ None,
|
|
GuardianApprovalRequest::Shell {
|
|
id: "shell-3".to_string(),
|
|
command: vec!["git".to_string(), "push".to_string()],
|
|
cwd: test_path_buf("/repo/codex-rs/core").abs(),
|
|
sandbox_permissions: crate::sandboxing::SandboxPermissions::UseDefault,
|
|
additional_permissions: None,
|
|
justification: Some("Need to push the docs fix.".to_string()),
|
|
},
|
|
GuardianPromptMode::Delta {
|
|
cursor: GuardianTranscriptCursor {
|
|
parent_history_version: 0,
|
|
transcript_entry_count: 99,
|
|
},
|
|
},
|
|
)
|
|
.await?;
|
|
|
|
let text = guardian_prompt_text(&prompt.items);
|
|
assert!(text.contains("whose request action you are assessing"));
|
|
assert!(text.contains(">>> TRANSCRIPT START\n"));
|
|
assert!(!text.contains("TRANSCRIPT DELTA"));
|
|
assert_eq!(prompt.transcript_cursor.transcript_entry_count, 4);
|
|
|
|
Ok(())
|
|
}
|
|
|
|
#[tokio::test(flavor = "current_thread")]
|
|
async fn build_guardian_prompt_stale_delta_version_falls_back_to_full_prompt() -> anyhow::Result<()>
|
|
{
|
|
let (session, turn) = guardian_test_session_and_turn_with_base_url("http://localhost").await;
|
|
seed_guardian_parent_history(&session, &turn).await;
|
|
session
|
|
.replace_history(
|
|
vec![
|
|
ResponseItem::Message {
|
|
id: None,
|
|
role: "user".to_string(),
|
|
content: vec![ContentItem::InputText {
|
|
text: "Compacted retained user request.".to_string(),
|
|
}],
|
|
phase: None,
|
|
},
|
|
ResponseItem::Message {
|
|
id: None,
|
|
role: "assistant".to_string(),
|
|
content: vec![ContentItem::OutputText {
|
|
text: "Compacted summary of earlier guardian context.".to_string(),
|
|
}],
|
|
phase: None,
|
|
},
|
|
],
|
|
/*reference_context_item*/ None,
|
|
)
|
|
.await;
|
|
session
|
|
.record_into_history(
|
|
&[
|
|
ResponseItem::Message {
|
|
id: None,
|
|
role: "user".to_string(),
|
|
content: vec![ContentItem::InputText {
|
|
text: "Please push after the compaction.".to_string(),
|
|
}],
|
|
phase: None,
|
|
},
|
|
ResponseItem::Message {
|
|
id: None,
|
|
role: "assistant".to_string(),
|
|
content: vec![ContentItem::OutputText {
|
|
text: "I need approval for the post-compaction push.".to_string(),
|
|
}],
|
|
phase: None,
|
|
},
|
|
],
|
|
turn.as_ref(),
|
|
)
|
|
.await;
|
|
|
|
let prompt = build_guardian_prompt_items(
|
|
session.as_ref(),
|
|
/*retry_reason*/ None,
|
|
GuardianApprovalRequest::Shell {
|
|
id: "shell-4".to_string(),
|
|
command: vec!["git".to_string(), "push".to_string()],
|
|
cwd: test_path_buf("/repo/codex-rs/core").abs(),
|
|
sandbox_permissions: crate::sandboxing::SandboxPermissions::UseDefault,
|
|
additional_permissions: None,
|
|
justification: Some("Need to push after the compaction.".to_string()),
|
|
},
|
|
GuardianPromptMode::Delta {
|
|
cursor: GuardianTranscriptCursor {
|
|
parent_history_version: 0,
|
|
transcript_entry_count: 4,
|
|
},
|
|
},
|
|
)
|
|
.await?;
|
|
|
|
let text = guardian_prompt_text(&prompt.items);
|
|
assert!(text.contains("whose request action you are assessing"));
|
|
assert!(text.contains(">>> TRANSCRIPT START\n"));
|
|
assert!(!text.contains("TRANSCRIPT DELTA"));
|
|
assert!(text.contains("[3] user: Please push after the compaction."));
|
|
assert!(text.contains("[4] assistant: I need approval for the post-compaction push."));
|
|
assert_eq!(prompt.transcript_cursor.parent_history_version, 1);
|
|
assert_eq!(prompt.transcript_cursor.transcript_entry_count, 4);
|
|
|
|
Ok(())
|
|
}
|
|
|
|
#[test]
|
|
fn collect_guardian_transcript_entries_skips_contextual_user_messages() {
|
|
let items = vec![
|
|
ResponseItem::Message {
|
|
id: None,
|
|
role: "user".to_string(),
|
|
content: vec![ContentItem::InputText {
|
|
text: "<environment_context>\n<cwd>/tmp</cwd>\n</environment_context>".to_string(),
|
|
}],
|
|
phase: None,
|
|
},
|
|
ResponseItem::Message {
|
|
id: None,
|
|
role: "assistant".to_string(),
|
|
content: vec![ContentItem::OutputText {
|
|
text: "hello".to_string(),
|
|
}],
|
|
phase: None,
|
|
},
|
|
];
|
|
|
|
let entries = collect_guardian_transcript_entries(&items);
|
|
|
|
assert_eq!(entries.len(), 1);
|
|
assert_eq!(
|
|
entries[0],
|
|
GuardianTranscriptEntry {
|
|
kind: GuardianTranscriptEntryKind::Assistant,
|
|
text: "hello".to_string(),
|
|
}
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn collect_guardian_transcript_entries_keeps_manual_approval_developer_message() {
|
|
let approval_text =
|
|
format!("{AUTO_REVIEW_DENIED_ACTION_APPROVAL_DEVELOPER_PREFIX}\n\nApproved action:\n{{}}");
|
|
let items = vec![
|
|
ResponseItem::Message {
|
|
id: None,
|
|
role: "developer".to_string(),
|
|
content: vec![ContentItem::InputText {
|
|
text: "ordinary developer context".to_string(),
|
|
}],
|
|
phase: None,
|
|
},
|
|
ResponseItem::Message {
|
|
id: None,
|
|
role: "developer".to_string(),
|
|
content: vec![ContentItem::InputText {
|
|
text: approval_text.clone(),
|
|
}],
|
|
phase: None,
|
|
},
|
|
];
|
|
|
|
let entries = collect_guardian_transcript_entries(&items);
|
|
|
|
assert_eq!(
|
|
entries,
|
|
vec![GuardianTranscriptEntry {
|
|
kind: GuardianTranscriptEntryKind::Developer,
|
|
text: approval_text,
|
|
}]
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn collect_guardian_transcript_entries_includes_recent_tool_calls_and_output() {
|
|
let items = vec![
|
|
ResponseItem::Message {
|
|
id: None,
|
|
role: "user".to_string(),
|
|
content: vec![ContentItem::InputText {
|
|
text: "check the repo".to_string(),
|
|
}],
|
|
phase: None,
|
|
},
|
|
ResponseItem::FunctionCall {
|
|
id: None,
|
|
name: "read_file".to_string(),
|
|
namespace: None,
|
|
arguments: "{\"path\":\"README.md\"}".to_string(),
|
|
call_id: "call-1".to_string(),
|
|
},
|
|
ResponseItem::FunctionCallOutput {
|
|
call_id: "call-1".to_string(),
|
|
output: codex_protocol::models::FunctionCallOutputPayload::from_text(
|
|
"repo is public".to_string(),
|
|
),
|
|
},
|
|
ResponseItem::Message {
|
|
id: None,
|
|
role: "assistant".to_string(),
|
|
content: vec![ContentItem::OutputText {
|
|
text: "I need to push a fix".to_string(),
|
|
}],
|
|
phase: None,
|
|
},
|
|
];
|
|
|
|
let entries = collect_guardian_transcript_entries(&items);
|
|
|
|
assert_eq!(entries.len(), 4);
|
|
assert_eq!(
|
|
entries[1],
|
|
GuardianTranscriptEntry {
|
|
kind: GuardianTranscriptEntryKind::Tool("tool read_file call".to_string()),
|
|
text: "{\"path\":\"README.md\"}".to_string(),
|
|
}
|
|
);
|
|
assert_eq!(
|
|
entries[2],
|
|
GuardianTranscriptEntry {
|
|
kind: GuardianTranscriptEntryKind::Tool("tool read_file result".to_string()),
|
|
text: "repo is public".to_string(),
|
|
}
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn guardian_truncate_text_keeps_prefix_suffix_and_xml_marker() {
|
|
let content = "prefix ".repeat(200) + &" suffix".repeat(200);
|
|
|
|
let (truncated, was_truncated) = guardian_truncate_text(&content, /*token_cap*/ 20);
|
|
|
|
assert!(truncated.starts_with("prefix"));
|
|
assert!(truncated.contains("<truncated omitted_approx_tokens=\""));
|
|
assert!(truncated.ends_with("suffix"));
|
|
assert!(was_truncated);
|
|
}
|
|
|
|
#[test]
|
|
fn format_guardian_action_pretty_truncates_large_string_fields() -> serde_json::Result<()> {
|
|
let patch = "line\n".repeat(100_000);
|
|
let action = GuardianApprovalRequest::ApplyPatch {
|
|
id: "patch-1".to_string(),
|
|
cwd: test_path_buf("/tmp").abs(),
|
|
files: Vec::new(),
|
|
patch: patch.clone(),
|
|
};
|
|
|
|
let rendered = format_guardian_action_pretty(&action)?;
|
|
|
|
assert!(rendered.text.contains("\"tool\": \"apply_patch\""));
|
|
assert!(rendered.text.contains("<truncated omitted_approx_tokens="));
|
|
assert!(rendered.text.len() < patch.len());
|
|
assert!(rendered.truncated);
|
|
Ok(())
|
|
}
|
|
|
|
#[test]
|
|
fn format_guardian_action_pretty_reports_no_truncation_for_small_payload() -> serde_json::Result<()>
|
|
{
|
|
let action = GuardianApprovalRequest::ApplyPatch {
|
|
id: "patch-1".to_string(),
|
|
cwd: test_path_buf("/tmp").abs(),
|
|
files: Vec::new(),
|
|
patch: "line\n".to_string(),
|
|
};
|
|
|
|
let rendered = format_guardian_action_pretty(&action)?;
|
|
|
|
assert!(rendered.text.contains("\"tool\": \"apply_patch\""));
|
|
assert!(!rendered.truncated);
|
|
Ok(())
|
|
}
|
|
|
|
#[test]
|
|
fn guardian_approval_request_to_json_renders_mcp_tool_call_shape() -> serde_json::Result<()> {
|
|
let action = GuardianApprovalRequest::McpToolCall {
|
|
id: "call-1".to_string(),
|
|
server: "mcp_server".to_string(),
|
|
tool_name: "browser_navigate".to_string(),
|
|
arguments: Some(serde_json::json!({
|
|
"url": "https://example.com",
|
|
})),
|
|
connector_id: None,
|
|
connector_name: Some("Playwright".to_string()),
|
|
connector_description: None,
|
|
tool_title: Some("Navigate".to_string()),
|
|
tool_description: None,
|
|
annotations: Some(GuardianMcpAnnotations {
|
|
destructive_hint: Some(true),
|
|
open_world_hint: None,
|
|
read_only_hint: Some(false),
|
|
}),
|
|
};
|
|
|
|
assert_eq!(
|
|
guardian_approval_request_to_json(&action)?,
|
|
serde_json::json!({
|
|
"tool": "mcp_tool_call",
|
|
"server": "mcp_server",
|
|
"tool_name": "browser_navigate",
|
|
"arguments": {
|
|
"url": "https://example.com",
|
|
},
|
|
"connector_name": "Playwright",
|
|
"tool_title": "Navigate",
|
|
"annotations": {
|
|
"destructive_hint": true,
|
|
"read_only_hint": false,
|
|
},
|
|
})
|
|
);
|
|
Ok(())
|
|
}
|
|
|
|
#[test]
|
|
fn guardian_approval_request_to_json_renders_network_access_trigger() -> serde_json::Result<()> {
|
|
let cwd = test_path_buf("/repo").abs();
|
|
let action = GuardianApprovalRequest::NetworkAccess {
|
|
id: "network-1".to_string(),
|
|
turn_id: "turn-1".to_string(),
|
|
target: "https://example.com:443".to_string(),
|
|
host: "example.com".to_string(),
|
|
protocol: NetworkApprovalProtocol::Https,
|
|
port: 443,
|
|
trigger: Some(GuardianNetworkAccessTrigger {
|
|
call_id: "call-1".to_string(),
|
|
tool_name: "shell".to_string(),
|
|
command: vec!["curl".to_string(), "https://example.com".to_string()],
|
|
cwd: cwd.clone(),
|
|
sandbox_permissions: crate::sandboxing::SandboxPermissions::UseDefault,
|
|
additional_permissions: None,
|
|
justification: Some("Fetch the release metadata.".to_string()),
|
|
tty: None,
|
|
}),
|
|
};
|
|
|
|
assert_eq!(
|
|
guardian_approval_request_to_json(&action)?,
|
|
serde_json::json!({
|
|
"tool": "network_access",
|
|
"target": "https://example.com:443",
|
|
"host": "example.com",
|
|
"protocol": "https",
|
|
"port": 443,
|
|
"trigger": {
|
|
"callId": "call-1",
|
|
"toolName": "shell",
|
|
"command": ["curl", "https://example.com"],
|
|
"cwd": cwd.to_string_lossy().to_string(),
|
|
"sandboxPermissions": "use_default",
|
|
"justification": "Fetch the release metadata.",
|
|
},
|
|
})
|
|
);
|
|
|
|
Ok(())
|
|
}
|
|
|
|
#[tokio::test(flavor = "current_thread")]
|
|
async fn build_guardian_prompt_items_explains_network_access_review_scope() -> anyhow::Result<()> {
|
|
let (session, turn) = guardian_test_session_and_turn_with_base_url("http://localhost").await;
|
|
seed_guardian_parent_history(&session, &turn).await;
|
|
let cwd = test_path_buf("/repo").abs();
|
|
|
|
let prompt = build_guardian_prompt_items(
|
|
session.as_ref(),
|
|
Some("Network access to \"example.com\" is blocked by policy.".to_string()),
|
|
GuardianApprovalRequest::NetworkAccess {
|
|
id: "network-1".to_string(),
|
|
turn_id: "turn-1".to_string(),
|
|
target: "https://example.com:443".to_string(),
|
|
host: "example.com".to_string(),
|
|
protocol: NetworkApprovalProtocol::Https,
|
|
port: 443,
|
|
trigger: Some(GuardianNetworkAccessTrigger {
|
|
call_id: "call-1".to_string(),
|
|
tool_name: "shell".to_string(),
|
|
command: vec!["curl".to_string(), "https://example.com".to_string()],
|
|
cwd,
|
|
sandbox_permissions: crate::sandboxing::SandboxPermissions::UseDefault,
|
|
additional_permissions: None,
|
|
justification: Some("Fetch the release metadata.".to_string()),
|
|
tty: None,
|
|
}),
|
|
},
|
|
GuardianPromptMode::Full,
|
|
)
|
|
.await?;
|
|
|
|
let text = guardian_prompt_text(&prompt.items);
|
|
assert!(text.contains("Below is a proposed network access request under review."));
|
|
assert!(!text.contains("Network approval context:"));
|
|
assert!(
|
|
!text.contains(
|
|
"This approval request is about network access to the target in the network access JSON below"
|
|
)
|
|
);
|
|
assert!(
|
|
text.contains(
|
|
"When assessing this request, focus primarily on whether the triggering command is authorised by the user and whether it is within the rules."
|
|
)
|
|
);
|
|
assert!(
|
|
text.contains(
|
|
"The user does not need to have explicitly authorised this exact network connection, as long as the network access is a reasonable consequence of the triggering command."
|
|
)
|
|
);
|
|
assert!(text.contains("\"trigger\""));
|
|
assert!(text.contains("Network access JSON:"));
|
|
assert!(!text.contains("The Codex agent has requested the following action:"));
|
|
assert!(!text.contains("Planned action JSON:"));
|
|
assert!(!text.contains("Retry reason:"));
|
|
assert!(!text.contains("Network access to \"example.com\" is blocked by policy."));
|
|
|
|
let mut settings = Settings::clone_current();
|
|
settings.set_snapshot_path("snapshots");
|
|
settings.set_prepend_module_to_snapshot(false);
|
|
settings.bind(|| {
|
|
assert_snapshot!(
|
|
"codex_core__guardian__tests__network_access_guardian_prompt_layout",
|
|
normalize_guardian_snapshot_paths(text)
|
|
);
|
|
});
|
|
|
|
Ok(())
|
|
}
|
|
|
|
#[test]
|
|
fn guardian_assessment_action_redacts_apply_patch_patch_text() {
|
|
let cwd = test_path_buf("/tmp").abs();
|
|
let file = test_path_buf("/tmp/guardian.txt").abs();
|
|
let action = GuardianApprovalRequest::ApplyPatch {
|
|
id: "patch-1".to_string(),
|
|
cwd: cwd.clone(),
|
|
files: vec![file.clone()],
|
|
patch: "*** Begin Patch\n*** Update File: guardian.txt\n@@\n+secret\n*** End Patch"
|
|
.to_string(),
|
|
};
|
|
|
|
assert_eq!(
|
|
serde_json::to_value(guardian_assessment_action(&action)).expect("serialize action"),
|
|
serde_json::json!({
|
|
"type": "apply_patch",
|
|
"cwd": cwd,
|
|
"files": [file],
|
|
}),
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn guardian_request_turn_id_prefers_network_access_owner_turn() {
|
|
let network_access = GuardianApprovalRequest::NetworkAccess {
|
|
id: "network-1".to_string(),
|
|
turn_id: "owner-turn".to_string(),
|
|
target: "https://example.com:443".to_string(),
|
|
host: "example.com".to_string(),
|
|
protocol: NetworkApprovalProtocol::Https,
|
|
port: 443,
|
|
trigger: None,
|
|
};
|
|
let apply_patch = GuardianApprovalRequest::ApplyPatch {
|
|
id: "patch-1".to_string(),
|
|
cwd: test_path_buf("/tmp").abs(),
|
|
files: vec![test_path_buf("/tmp/guardian.txt").abs()],
|
|
patch: "*** Begin Patch\n*** Update File: guardian.txt\n@@\n+hello\n*** End Patch"
|
|
.to_string(),
|
|
};
|
|
|
|
assert_eq!(
|
|
guardian_request_turn_id(&network_access, "fallback-turn"),
|
|
"owner-turn"
|
|
);
|
|
assert_eq!(
|
|
guardian_request_turn_id(&apply_patch, "fallback-turn"),
|
|
"fallback-turn"
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn guardian_request_target_item_id_omits_network_access_trigger_call_id() {
|
|
let network_access = GuardianApprovalRequest::NetworkAccess {
|
|
id: "network-1".to_string(),
|
|
turn_id: "owner-turn".to_string(),
|
|
target: "https://example.com:443".to_string(),
|
|
host: "example.com".to_string(),
|
|
protocol: NetworkApprovalProtocol::Https,
|
|
port: 443,
|
|
trigger: Some(GuardianNetworkAccessTrigger {
|
|
call_id: "call-1".to_string(),
|
|
tool_name: "shell".to_string(),
|
|
command: vec!["curl".to_string(), "https://example.com".to_string()],
|
|
cwd: test_path_buf("/repo").abs(),
|
|
sandbox_permissions: crate::sandboxing::SandboxPermissions::UseDefault,
|
|
additional_permissions: None,
|
|
justification: None,
|
|
tty: None,
|
|
}),
|
|
};
|
|
|
|
assert_eq!(guardian_request_target_item_id(&network_access), None);
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn cancelled_guardian_review_emits_terminal_abort_without_warning() {
|
|
let (session, turn, rx) = crate::session::tests::make_session_and_context_with_rx().await;
|
|
let cancel_token = CancellationToken::new();
|
|
cancel_token.cancel();
|
|
|
|
let decision = review_approval_request_with_cancel(
|
|
&session,
|
|
&turn,
|
|
"review-cancelled-guardian".to_string(),
|
|
GuardianApprovalRequest::ApplyPatch {
|
|
id: "patch-1".to_string(),
|
|
cwd: test_path_buf("/tmp").abs(),
|
|
files: vec![test_path_buf("/tmp/guardian.txt").abs()],
|
|
patch: "*** Begin Patch\n*** Update File: guardian.txt\n@@\n+hello\n*** End Patch"
|
|
.to_string(),
|
|
},
|
|
/*retry_reason*/ None,
|
|
GuardianApprovalRequestSource::MainTurn,
|
|
cancel_token,
|
|
)
|
|
.await;
|
|
|
|
assert_eq!(decision, ReviewDecision::Abort);
|
|
|
|
let mut guardian_statuses = Vec::new();
|
|
let mut warnings = Vec::new();
|
|
while let Ok(event) = rx.try_recv() {
|
|
match event.msg {
|
|
EventMsg::GuardianAssessment(event) => guardian_statuses.push(event.status),
|
|
EventMsg::GuardianWarning(event) => warnings.push(event.message),
|
|
_ => {}
|
|
}
|
|
}
|
|
|
|
assert_eq!(
|
|
guardian_statuses,
|
|
vec![
|
|
GuardianAssessmentStatus::InProgress,
|
|
GuardianAssessmentStatus::Aborted,
|
|
]
|
|
);
|
|
assert!(warnings.is_empty());
|
|
}
|
|
|
|
#[test]
|
|
fn guardian_timeout_message_distinguishes_timeout_from_policy_denial() {
|
|
let message = guardian_timeout_message();
|
|
assert!(message.contains("did not finish before its deadline"));
|
|
assert!(message.contains("retry once"));
|
|
assert!(!message.contains("unacceptable risk"));
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn routes_approval_to_guardian_requires_guardian_reviewer() {
|
|
let (_session, mut turn) = crate::session::tests::make_session_and_context().await;
|
|
let mut config = (*turn.config).clone();
|
|
config.approvals_reviewer = ApprovalsReviewer::User;
|
|
turn.config = Arc::new(config.clone());
|
|
|
|
assert!(!routes_approval_to_guardian(&turn));
|
|
|
|
config.approvals_reviewer = ApprovalsReviewer::AutoReview;
|
|
turn.config = Arc::new(config);
|
|
|
|
assert!(routes_approval_to_guardian(&turn));
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn routes_approval_to_guardian_allows_granular_review_policy() {
|
|
let (_session, mut turn) = crate::session::tests::make_session_and_context().await;
|
|
let mut config = (*turn.config).clone();
|
|
config.approvals_reviewer = ApprovalsReviewer::AutoReview;
|
|
turn.config = Arc::new(config);
|
|
turn.approval_policy
|
|
.set(AskForApproval::Granular(GranularApprovalConfig {
|
|
sandbox_approval: true,
|
|
rules: true,
|
|
skill_approval: true,
|
|
request_permissions: true,
|
|
mcp_elicitations: true,
|
|
}))
|
|
.expect("test setup should allow updating approval policy");
|
|
|
|
assert!(routes_approval_to_guardian(&turn));
|
|
}
|
|
|
|
#[test]
|
|
fn build_guardian_transcript_reserves_separate_budget_for_tool_evidence() {
|
|
let repeated = "signal ".repeat(8_000);
|
|
let mut entries = vec![
|
|
GuardianTranscriptEntry {
|
|
kind: GuardianTranscriptEntryKind::User,
|
|
text: "please figure out if the repo is public".to_string(),
|
|
},
|
|
GuardianTranscriptEntry {
|
|
kind: GuardianTranscriptEntryKind::Assistant,
|
|
text: "The public repo check is the main reason I want to escalate.".to_string(),
|
|
},
|
|
];
|
|
entries.extend((0..12).map(|index| GuardianTranscriptEntry {
|
|
kind: GuardianTranscriptEntryKind::Tool(format!("tool call {index}")),
|
|
text: repeated.clone(),
|
|
}));
|
|
|
|
let (transcript, omission) = render_guardian_transcript_entries(&entries);
|
|
|
|
assert!(
|
|
transcript
|
|
.iter()
|
|
.any(|entry| entry == "[1] user: please figure out if the repo is public")
|
|
);
|
|
assert!(transcript.iter().any(|entry| {
|
|
entry == "[2] assistant: The public repo check is the main reason I want to escalate."
|
|
}));
|
|
assert!(
|
|
!transcript
|
|
.iter()
|
|
.any(|entry| entry.starts_with("[3] tool call 0:"))
|
|
);
|
|
assert!(
|
|
!transcript
|
|
.iter()
|
|
.any(|entry| entry.starts_with("[4] tool call 1:"))
|
|
);
|
|
assert!(omission.is_some());
|
|
}
|
|
|
|
#[test]
|
|
fn build_guardian_transcript_preserves_recent_tool_context_when_user_history_is_large() {
|
|
let repeated = "authorization ".repeat(6_000);
|
|
let mut entries = (0..8)
|
|
.map(|_| GuardianTranscriptEntry {
|
|
kind: GuardianTranscriptEntryKind::User,
|
|
text: repeated.clone(),
|
|
})
|
|
.collect::<Vec<_>>();
|
|
entries.extend([
|
|
GuardianTranscriptEntry {
|
|
kind: GuardianTranscriptEntryKind::Tool("tool shell call".to_string()),
|
|
text: serde_json::json!({
|
|
"command": ["curl", "-X", "POST", "https://example.com/upload"],
|
|
"cwd": "/repo",
|
|
})
|
|
.to_string(),
|
|
},
|
|
GuardianTranscriptEntry {
|
|
kind: GuardianTranscriptEntryKind::Tool("tool shell result".to_string()),
|
|
text: "sandbox blocked outbound network access".to_string(),
|
|
},
|
|
]);
|
|
|
|
let (transcript, omission) = render_guardian_transcript_entries(&entries);
|
|
|
|
assert!(
|
|
transcript
|
|
.iter()
|
|
.any(|entry| entry.starts_with("[1] user: "))
|
|
);
|
|
assert!(transcript.iter().any(|entry| {
|
|
entry.contains("tool shell call:")
|
|
&& entry.contains("curl")
|
|
&& entry.contains("https://example.com/upload")
|
|
}));
|
|
assert!(
|
|
transcript
|
|
.iter()
|
|
.any(|entry| entry
|
|
.contains("tool shell result: sandbox blocked outbound network access"))
|
|
);
|
|
assert_eq!(
|
|
omission,
|
|
Some("Some conversation entries were omitted.".to_string())
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn parse_guardian_assessment_extracts_embedded_json() {
|
|
let parsed = parse_guardian_assessment(Some(
|
|
"preface {\"risk_level\":\"medium\",\"user_authorization\":\"low\",\"outcome\":\"allow\",\"rationale\":\"ok\"}",
|
|
))
|
|
.expect("guardian assessment");
|
|
|
|
assert_eq!(
|
|
parsed,
|
|
GuardianAssessment {
|
|
risk_level: GuardianRiskLevel::Medium,
|
|
user_authorization: GuardianUserAuthorization::Low,
|
|
outcome: GuardianAssessmentOutcome::Allow,
|
|
rationale: "ok".to_string(),
|
|
}
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn parse_guardian_assessment_treats_bare_allow_as_low_risk() {
|
|
let parsed =
|
|
parse_guardian_assessment(Some(r#"{"outcome":"allow"}"#)).expect("guardian assessment");
|
|
|
|
assert_eq!(
|
|
parsed,
|
|
GuardianAssessment {
|
|
risk_level: GuardianRiskLevel::Low,
|
|
user_authorization: GuardianUserAuthorization::Unknown,
|
|
outcome: GuardianAssessmentOutcome::Allow,
|
|
rationale: "Auto-review returned a low-risk allow decision.".to_string(),
|
|
}
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn parse_guardian_assessment_treats_bare_deny_as_high_risk() {
|
|
let parsed =
|
|
parse_guardian_assessment(Some(r#"{"outcome":"deny"}"#)).expect("guardian assessment");
|
|
|
|
assert_eq!(
|
|
parsed,
|
|
GuardianAssessment {
|
|
risk_level: GuardianRiskLevel::High,
|
|
user_authorization: GuardianUserAuthorization::Unknown,
|
|
outcome: GuardianAssessmentOutcome::Deny,
|
|
rationale: "Auto-review returned a deny decision without a rationale.".to_string(),
|
|
}
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn guardian_output_schema_requires_only_outcome_and_allows_optional_details() {
|
|
let schema = guardian_output_schema();
|
|
|
|
assert_eq!(
|
|
schema,
|
|
serde_json::json!({
|
|
"type": "object",
|
|
"additionalProperties": false,
|
|
"properties": {
|
|
"risk_level": {
|
|
"type": "string",
|
|
"enum": ["low", "medium", "high", "critical"]
|
|
},
|
|
"user_authorization": {
|
|
"type": "string",
|
|
"enum": ["unknown", "low", "medium", "high"]
|
|
},
|
|
"outcome": {
|
|
"type": "string",
|
|
"enum": ["allow", "deny"]
|
|
},
|
|
"rationale": {
|
|
"type": "string"
|
|
}
|
|
},
|
|
"required": ["outcome"]
|
|
})
|
|
);
|
|
}
|
|
|
|
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
|
async fn guardian_review_request_layout_matches_model_visible_request_snapshot()
|
|
-> anyhow::Result<()> {
|
|
skip_if_no_network!(Ok(()));
|
|
|
|
let server = start_mock_server().await;
|
|
let guardian_assessment = serde_json::json!({
|
|
"risk_level": "medium",
|
|
"user_authorization": "high",
|
|
"outcome": "allow",
|
|
"rationale": "The user explicitly requested pushing the reviewed branch to the known remote.",
|
|
})
|
|
.to_string();
|
|
let request_log = mount_sse_once(
|
|
&server,
|
|
sse(vec![
|
|
ev_response_created("resp-guardian"),
|
|
ev_assistant_message("msg-guardian", &guardian_assessment),
|
|
ev_completed("resp-guardian"),
|
|
]),
|
|
)
|
|
.await;
|
|
|
|
let (mut session, mut turn) = crate::session::tests::make_session_and_context().await;
|
|
session.conversation_id = fixed_guardian_parent_session_id();
|
|
let temp_cwd = TempDir::new()?;
|
|
let mut config = (*turn.config).clone();
|
|
config.cwd = temp_cwd.abs();
|
|
config.model_provider.base_url = Some(format!("{}/v1", server.uri()));
|
|
let config = Arc::new(config);
|
|
let models_manager = test_support::models_manager_with_provider(
|
|
config.codex_home.to_path_buf(),
|
|
Arc::clone(&session.services.auth_manager),
|
|
config.model_provider.clone(),
|
|
);
|
|
session.services.models_manager = models_manager;
|
|
turn.config = Arc::clone(&config);
|
|
turn.provider = create_model_provider(config.model_provider.clone(), turn.auth_manager.clone());
|
|
let session = Arc::new(session);
|
|
let turn = Arc::new(turn);
|
|
seed_guardian_parent_history(&session, &turn).await;
|
|
|
|
let request = GuardianApprovalRequest::Shell {
|
|
id: "shell-1".to_string(),
|
|
command: vec![
|
|
"git".to_string(),
|
|
"push".to_string(),
|
|
"origin".to_string(),
|
|
"guardian-approval-mvp".to_string(),
|
|
],
|
|
cwd: test_path_buf("/repo/codex-rs/core").abs(),
|
|
sandbox_permissions: crate::sandboxing::SandboxPermissions::UseDefault,
|
|
additional_permissions: None,
|
|
justification: Some("Need to push the reviewed docs fix to the repo remote.".to_string()),
|
|
};
|
|
|
|
let outcome = run_guardian_review_session_for_test(
|
|
Arc::clone(&session),
|
|
Arc::clone(&turn),
|
|
request,
|
|
Some("Sandbox denied outbound git push to github.com.".to_string()),
|
|
guardian_output_schema(),
|
|
/*external_cancel*/ None,
|
|
)
|
|
.await;
|
|
let (GuardianReviewOutcome::Completed(assessment), metadata) = outcome else {
|
|
panic!("expected guardian assessment");
|
|
};
|
|
let guardian_thread_id = metadata
|
|
.guardian_thread_id
|
|
.as_deref()
|
|
.expect("guardian thread id");
|
|
assert_eq!(assessment.outcome, GuardianAssessmentOutcome::Allow);
|
|
assert_ne!(guardian_thread_id, session.conversation_id.to_string());
|
|
ThreadId::from_string(guardian_thread_id).expect("guardian thread id should be a valid UUID");
|
|
assert!(matches!(
|
|
metadata.guardian_session_kind,
|
|
Some(codex_analytics::GuardianReviewSessionKind::TrunkNew)
|
|
));
|
|
let request = request_log.single_request();
|
|
let request_body = request.body_json();
|
|
assert_eq!(
|
|
request_body.pointer("/text/format/strict"),
|
|
Some(&serde_json::json!(false))
|
|
);
|
|
assert_eq!(
|
|
request_body.pointer("/text/format/schema"),
|
|
Some(&serde_json::json!({
|
|
"type": "object",
|
|
"additionalProperties": false,
|
|
"properties": {
|
|
"risk_level": {
|
|
"type": "string",
|
|
"enum": ["low", "medium", "high", "critical"]
|
|
},
|
|
"user_authorization": {
|
|
"type": "string",
|
|
"enum": ["unknown", "low", "medium", "high"]
|
|
},
|
|
"outcome": {
|
|
"type": "string",
|
|
"enum": ["allow", "deny"]
|
|
},
|
|
"rationale": {
|
|
"type": "string"
|
|
}
|
|
},
|
|
"required": ["outcome"]
|
|
}))
|
|
);
|
|
let request_model = request_body
|
|
.get("model")
|
|
.and_then(|value| value.as_str())
|
|
.expect("guardian request should include a model");
|
|
let request_reasoning_effort = request_body
|
|
.get("reasoning")
|
|
.and_then(|reasoning| reasoning.get("effort"))
|
|
.and_then(|value| value.as_str());
|
|
assert_eq!(metadata.guardian_model.as_deref(), Some(request_model));
|
|
assert_eq!(
|
|
metadata.guardian_reasoning_effort.as_deref(),
|
|
request_reasoning_effort
|
|
);
|
|
assert_eq!(metadata.had_prior_review_context, Some(false));
|
|
assert!(
|
|
metadata.time_to_first_token_ms.is_some(),
|
|
"guardian review metadata should capture TTFT when the nested turn completes"
|
|
);
|
|
|
|
let mut settings = Settings::clone_current();
|
|
settings.set_snapshot_path("snapshots");
|
|
settings.set_prepend_module_to_snapshot(false);
|
|
settings.bind(|| {
|
|
assert_snapshot!(
|
|
"codex_core__guardian__tests__guardian_review_request_layout",
|
|
normalize_guardian_snapshot_paths(context_snapshot::format_labeled_requests_snapshot(
|
|
"Guardian review request layout",
|
|
&[("Guardian Review Request", &request)],
|
|
&guardian_snapshot_options(),
|
|
))
|
|
);
|
|
});
|
|
|
|
Ok(())
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn build_guardian_prompt_items_includes_parent_session_id() -> anyhow::Result<()> {
|
|
let (session, _) = crate::session::tests::make_session_and_context().await;
|
|
let prompt = build_guardian_prompt_items(
|
|
&session,
|
|
/*retry_reason*/ None,
|
|
GuardianApprovalRequest::Shell {
|
|
id: "shell-1".to_string(),
|
|
command: vec!["git".to_string(), "status".to_string()],
|
|
cwd: test_path_buf("/repo").abs(),
|
|
sandbox_permissions: crate::sandboxing::SandboxPermissions::UseDefault,
|
|
additional_permissions: None,
|
|
justification: None,
|
|
},
|
|
GuardianPromptMode::Full,
|
|
)
|
|
.await?;
|
|
let prompt_text = prompt
|
|
.items
|
|
.into_iter()
|
|
.map(|item| match item {
|
|
codex_protocol::user_input::UserInput::Text { text, .. } => text,
|
|
codex_protocol::user_input::UserInput::Image { .. } => String::new(),
|
|
_ => String::new(),
|
|
})
|
|
.collect::<String>();
|
|
|
|
assert!(
|
|
prompt_text.contains(&format!(
|
|
">>> TRANSCRIPT END\nReviewed Codex session id: {}\n",
|
|
session.conversation_id
|
|
)),
|
|
"guardian prompt should expose the parent session id immediately after the transcript end"
|
|
);
|
|
|
|
Ok(())
|
|
}
|
|
|
|
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
|
async fn guardian_reuses_prompt_cache_key_and_appends_prior_reviews() -> anyhow::Result<()> {
|
|
skip_if_no_network!(Ok(()));
|
|
|
|
let server = start_mock_server().await;
|
|
let first_rationale = "first guardian rationale from the prior review";
|
|
let request_log = mount_sse_sequence(
|
|
&server,
|
|
vec![
|
|
sse(vec![
|
|
ev_response_created("resp-guardian-1"),
|
|
ev_assistant_message(
|
|
"msg-guardian-1",
|
|
&format!(
|
|
"{{\"risk_level\":\"low\",\"user_authorization\":\"high\",\"outcome\":\"allow\",\"rationale\":\"{first_rationale}\"}}"
|
|
),
|
|
),
|
|
ev_completed("resp-guardian-1"),
|
|
]),
|
|
sse(vec![
|
|
ev_response_created("resp-guardian-2"),
|
|
ev_assistant_message(
|
|
"msg-guardian-2",
|
|
"{\"risk_level\":\"low\",\"user_authorization\":\"high\",\"outcome\":\"allow\",\"rationale\":\"second guardian rationale\"}",
|
|
),
|
|
ev_completed("resp-guardian-2"),
|
|
]),
|
|
sse(vec![
|
|
ev_response_created("resp-guardian-3"),
|
|
ev_assistant_message(
|
|
"msg-guardian-3",
|
|
"{\"risk_level\":\"low\",\"user_authorization\":\"high\",\"outcome\":\"allow\",\"rationale\":\"third guardian rationale\"}",
|
|
),
|
|
ev_completed("resp-guardian-3"),
|
|
]),
|
|
],
|
|
)
|
|
.await;
|
|
|
|
let (session, turn) = guardian_test_session_and_turn(&server).await;
|
|
seed_guardian_parent_history(&session, &turn).await;
|
|
|
|
let first_request = GuardianApprovalRequest::Shell {
|
|
id: "shell-1".to_string(),
|
|
command: vec!["git".to_string(), "push".to_string()],
|
|
cwd: test_path_buf("/repo/codex-rs/core").abs(),
|
|
sandbox_permissions: crate::sandboxing::SandboxPermissions::UseDefault,
|
|
additional_permissions: None,
|
|
justification: Some("Need to push the first docs fix.".to_string()),
|
|
};
|
|
let first_outcome = run_guardian_review_session_for_test(
|
|
Arc::clone(&session),
|
|
Arc::clone(&turn),
|
|
first_request,
|
|
Some("First retry reason".to_string()),
|
|
guardian_output_schema(),
|
|
/*external_cancel*/ None,
|
|
)
|
|
.await;
|
|
session
|
|
.record_into_history(
|
|
&[
|
|
ResponseItem::Message {
|
|
id: None,
|
|
role: "user".to_string(),
|
|
content: vec![ContentItem::InputText {
|
|
text: "Please push the second docs fix too.".to_string(),
|
|
}],
|
|
phase: None,
|
|
},
|
|
ResponseItem::Message {
|
|
id: None,
|
|
role: "assistant".to_string(),
|
|
content: vec![ContentItem::OutputText {
|
|
text: "I need approval for the second docs fix.".to_string(),
|
|
}],
|
|
phase: None,
|
|
},
|
|
],
|
|
turn.as_ref(),
|
|
)
|
|
.await;
|
|
let second_request = GuardianApprovalRequest::Shell {
|
|
id: "shell-2".to_string(),
|
|
command: vec![
|
|
"git".to_string(),
|
|
"push".to_string(),
|
|
"--force-with-lease".to_string(),
|
|
],
|
|
cwd: test_path_buf("/repo/codex-rs/core").abs(),
|
|
sandbox_permissions: crate::sandboxing::SandboxPermissions::UseDefault,
|
|
additional_permissions: None,
|
|
justification: Some("Need to push the second docs fix.".to_string()),
|
|
};
|
|
let second_outcome = run_guardian_review_session_for_test(
|
|
Arc::clone(&session),
|
|
Arc::clone(&turn),
|
|
second_request,
|
|
Some("Second retry reason".to_string()),
|
|
guardian_output_schema(),
|
|
/*external_cancel*/ None,
|
|
)
|
|
.await;
|
|
session
|
|
.record_into_history(
|
|
&[
|
|
ResponseItem::Message {
|
|
id: None,
|
|
role: "user".to_string(),
|
|
content: vec![ContentItem::InputText {
|
|
text: "Please push the third docs fix too.".to_string(),
|
|
}],
|
|
phase: None,
|
|
},
|
|
ResponseItem::Message {
|
|
id: None,
|
|
role: "assistant".to_string(),
|
|
content: vec![ContentItem::OutputText {
|
|
text: "I need approval for the third docs fix.".to_string(),
|
|
}],
|
|
phase: None,
|
|
},
|
|
],
|
|
turn.as_ref(),
|
|
)
|
|
.await;
|
|
let third_request = GuardianApprovalRequest::Shell {
|
|
id: "shell-3".to_string(),
|
|
command: vec!["git".to_string(), "push".to_string()],
|
|
cwd: test_path_buf("/repo/codex-rs/core").abs(),
|
|
sandbox_permissions: crate::sandboxing::SandboxPermissions::UseDefault,
|
|
additional_permissions: None,
|
|
justification: Some("Need to push the third docs fix.".to_string()),
|
|
};
|
|
let third_outcome = run_guardian_review_session_for_test(
|
|
Arc::clone(&session),
|
|
Arc::clone(&turn),
|
|
third_request,
|
|
Some("Third retry reason".to_string()),
|
|
guardian_output_schema(),
|
|
/*external_cancel*/ None,
|
|
)
|
|
.await;
|
|
|
|
let (GuardianReviewOutcome::Completed(first_assessment), first_metadata) = first_outcome else {
|
|
panic!("expected first guardian assessment");
|
|
};
|
|
let (GuardianReviewOutcome::Completed(second_assessment), second_metadata) = second_outcome
|
|
else {
|
|
panic!("expected second guardian assessment");
|
|
};
|
|
let (GuardianReviewOutcome::Completed(third_assessment), third_metadata) = third_outcome else {
|
|
panic!("expected third guardian assessment");
|
|
};
|
|
assert_eq!(first_assessment.outcome, GuardianAssessmentOutcome::Allow);
|
|
assert_eq!(second_assessment.outcome, GuardianAssessmentOutcome::Allow);
|
|
assert_eq!(third_assessment.outcome, GuardianAssessmentOutcome::Allow);
|
|
assert!(matches!(
|
|
first_metadata.guardian_session_kind,
|
|
Some(codex_analytics::GuardianReviewSessionKind::TrunkNew)
|
|
));
|
|
assert!(matches!(
|
|
second_metadata.guardian_session_kind,
|
|
Some(codex_analytics::GuardianReviewSessionKind::TrunkReused)
|
|
));
|
|
assert!(matches!(
|
|
third_metadata.guardian_session_kind,
|
|
Some(codex_analytics::GuardianReviewSessionKind::TrunkReused)
|
|
));
|
|
ThreadId::from_string(
|
|
first_metadata
|
|
.guardian_thread_id
|
|
.as_deref()
|
|
.expect("first guardian thread id"),
|
|
)
|
|
.expect("first guardian thread id should be a valid UUID");
|
|
ThreadId::from_string(
|
|
second_metadata
|
|
.guardian_thread_id
|
|
.as_deref()
|
|
.expect("second guardian thread id"),
|
|
)
|
|
.expect("second guardian thread id should be a valid UUID");
|
|
ThreadId::from_string(
|
|
third_metadata
|
|
.guardian_thread_id
|
|
.as_deref()
|
|
.expect("third guardian thread id"),
|
|
)
|
|
.expect("third guardian thread id should be a valid UUID");
|
|
assert_eq!(first_metadata.had_prior_review_context, Some(false));
|
|
assert_eq!(second_metadata.had_prior_review_context, Some(true));
|
|
assert_eq!(third_metadata.had_prior_review_context, Some(true));
|
|
assert_eq!(
|
|
first_metadata.guardian_thread_id,
|
|
second_metadata.guardian_thread_id
|
|
);
|
|
assert_eq!(
|
|
second_metadata.guardian_thread_id,
|
|
third_metadata.guardian_thread_id
|
|
);
|
|
|
|
let requests = request_log.requests();
|
|
assert_eq!(requests.len(), 3);
|
|
|
|
let first_body = requests[0].body_json();
|
|
let second_body = requests[1].body_json();
|
|
let third_body = requests[2].body_json();
|
|
assert_eq!(
|
|
first_body["prompt_cache_key"],
|
|
second_body["prompt_cache_key"]
|
|
);
|
|
assert!(
|
|
second_body.to_string().contains(concat!(
|
|
"Use prior reviews as context, not binding precedent. ",
|
|
"Follow the Workspace Policy. ",
|
|
"If the user explicitly approves a previously rejected action after being ",
|
|
"informed of the concrete risks, set outcome to \\\"allow\\\" unless the policy ",
|
|
"explicitly disallows user overwrites in such cases."
|
|
)),
|
|
"follow-up guardian request should include the follow-up reminder"
|
|
);
|
|
assert!(
|
|
second_body.to_string().contains(first_rationale),
|
|
"guardian session should append earlier reviews into the follow-up request"
|
|
);
|
|
assert_eq!(
|
|
third_body
|
|
.to_string()
|
|
.matches("Use prior reviews as context, not binding precedent.")
|
|
.count(),
|
|
1,
|
|
"later follow-up guardian requests should not append the reminder again"
|
|
);
|
|
let committed_rollout_items = session
|
|
.guardian_review_session
|
|
.committed_fork_rollout_items_for_test()
|
|
.await
|
|
.expect("committed guardian fork snapshot");
|
|
assert_eq!(
|
|
committed_rollout_items
|
|
.iter()
|
|
.filter(|item| rollout_item_contains_message_text(
|
|
item,
|
|
"Use prior reviews as context, not binding precedent."
|
|
))
|
|
.count(),
|
|
1,
|
|
"follow-up reminder should be persisted for guardian forks"
|
|
);
|
|
let second_user_message = requests[1]
|
|
.message_input_text_groups("user")
|
|
.last()
|
|
.expect("follow-up guardian user message")
|
|
.join("");
|
|
assert!(second_user_message.contains(">>> TRANSCRIPT DELTA START\n"));
|
|
assert!(second_user_message.contains("[5] user: Please push the second docs fix too."));
|
|
assert!(
|
|
second_user_message.contains("[6] assistant: I need approval for the second docs fix.")
|
|
);
|
|
assert!(!second_user_message.contains("[1] user: Please check the repo visibility"));
|
|
|
|
let mut settings = Settings::clone_current();
|
|
settings.set_snapshot_path("snapshots");
|
|
settings.set_prepend_module_to_snapshot(false);
|
|
settings.bind(|| {
|
|
assert_snapshot!(
|
|
"codex_core__guardian__tests__guardian_followup_review_request_layout",
|
|
format!(
|
|
"{}\n\nshared_prompt_cache_key: {}\nfollowup_contains_first_rationale: {}",
|
|
normalize_guardian_snapshot_paths(
|
|
context_snapshot::format_labeled_requests_snapshot(
|
|
"Guardian follow-up review request layout",
|
|
&[
|
|
("Initial Guardian Review Request", &requests[0]),
|
|
("Follow-up Guardian Review Request", &requests[1]),
|
|
],
|
|
&guardian_snapshot_options(),
|
|
)
|
|
),
|
|
first_body["prompt_cache_key"] == second_body["prompt_cache_key"],
|
|
second_body.to_string().contains(first_rationale),
|
|
)
|
|
);
|
|
});
|
|
|
|
Ok(())
|
|
}
|
|
|
|
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
|
async fn guardian_reused_trunk_ignores_stale_prior_turn_completion() -> anyhow::Result<()> {
|
|
skip_if_no_network!(Ok(()));
|
|
|
|
let server = start_mock_server().await;
|
|
let request_log = mount_sse_sequence(
|
|
&server,
|
|
vec![
|
|
sse(vec![
|
|
ev_response_created("resp-guardian-1"),
|
|
ev_assistant_message(
|
|
"msg-guardian-1",
|
|
"{\"risk_level\":\"low\",\"user_authorization\":\"high\",\"outcome\":\"allow\",\"rationale\":\"first guardian rationale\"}",
|
|
),
|
|
ev_completed("resp-guardian-1"),
|
|
]),
|
|
sse(vec![
|
|
ev_response_created("resp-guardian-2"),
|
|
ev_assistant_message(
|
|
"msg-guardian-2",
|
|
"{\"risk_level\":\"low\",\"user_authorization\":\"high\",\"outcome\":\"allow\",\"rationale\":\"second guardian rationale\"}",
|
|
),
|
|
ev_completed("resp-guardian-2"),
|
|
]),
|
|
],
|
|
)
|
|
.await;
|
|
|
|
let (session, turn) = guardian_test_session_and_turn(&server).await;
|
|
let first_outcome = run_guardian_review_session_for_test(
|
|
Arc::clone(&session),
|
|
Arc::clone(&turn),
|
|
GuardianApprovalRequest::Shell {
|
|
id: "shell-1".to_string(),
|
|
command: vec!["git".to_string(), "push".to_string()],
|
|
cwd: test_path_buf("/repo/codex-rs/core").abs(),
|
|
sandbox_permissions: crate::sandboxing::SandboxPermissions::UseDefault,
|
|
additional_permissions: None,
|
|
justification: Some("Need to push the first docs fix.".to_string()),
|
|
},
|
|
/*retry_reason*/ None,
|
|
guardian_output_schema(),
|
|
/*external_cancel*/ None,
|
|
)
|
|
.await;
|
|
let (GuardianReviewOutcome::Completed(first_assessment), first_metadata) = first_outcome else {
|
|
panic!("expected first guardian assessment");
|
|
};
|
|
assert_eq!(first_assessment.rationale, "first guardian rationale");
|
|
assert!(matches!(
|
|
first_metadata.guardian_session_kind,
|
|
Some(codex_analytics::GuardianReviewSessionKind::TrunkNew)
|
|
));
|
|
|
|
session
|
|
.guardian_review_session
|
|
.send_trunk_event_raw_for_test(Event {
|
|
id: "stale-turn".to_string(),
|
|
msg: EventMsg::TurnComplete(TurnCompleteEvent {
|
|
turn_id: "stale-turn".to_string(),
|
|
last_agent_message: Some(
|
|
"{\"risk_level\":\"high\",\"user_authorization\":\"low\",\"outcome\":\"deny\",\"rationale\":\"stale guardian rationale\"}"
|
|
.to_string(),
|
|
),
|
|
completed_at: None,
|
|
duration_ms: None,
|
|
time_to_first_token_ms: Some(1),
|
|
}),
|
|
})
|
|
.await;
|
|
|
|
let second_outcome = run_guardian_review_session_for_test(
|
|
Arc::clone(&session),
|
|
Arc::clone(&turn),
|
|
GuardianApprovalRequest::Shell {
|
|
id: "shell-2".to_string(),
|
|
command: vec!["git".to_string(), "push".to_string()],
|
|
cwd: test_path_buf("/repo/codex-rs/core").abs(),
|
|
sandbox_permissions: crate::sandboxing::SandboxPermissions::UseDefault,
|
|
additional_permissions: None,
|
|
justification: Some("Need to push the second docs fix.".to_string()),
|
|
},
|
|
/*retry_reason*/ None,
|
|
guardian_output_schema(),
|
|
/*external_cancel*/ None,
|
|
)
|
|
.await;
|
|
let (GuardianReviewOutcome::Completed(second_assessment), second_metadata) = second_outcome
|
|
else {
|
|
panic!("expected second guardian assessment");
|
|
};
|
|
assert_eq!(second_assessment.outcome, GuardianAssessmentOutcome::Allow);
|
|
assert_eq!(second_assessment.rationale, "second guardian rationale");
|
|
assert!(matches!(
|
|
second_metadata.guardian_session_kind,
|
|
Some(codex_analytics::GuardianReviewSessionKind::TrunkReused)
|
|
));
|
|
|
|
assert_eq!(
|
|
request_log.requests().len(),
|
|
2,
|
|
"the reused trunk should wait for the real follow-up review"
|
|
);
|
|
|
|
Ok(())
|
|
}
|
|
|
|
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
|
async fn guardian_review_surfaces_responses_api_errors_in_rejection_reason() -> anyhow::Result<()> {
|
|
skip_if_no_network!(Ok(()));
|
|
|
|
let server = start_mock_server().await;
|
|
let error_message =
|
|
"Item 'rs_test' of type 'reasoning' was provided without its required following item.";
|
|
let _request_log = mount_response_once(
|
|
&server,
|
|
wiremock::ResponseTemplate::new(400).set_body_json(serde_json::json!({
|
|
"error": {
|
|
"message": error_message,
|
|
"type": "invalid_request_error",
|
|
"param": "input"
|
|
}
|
|
})),
|
|
)
|
|
.await;
|
|
|
|
let (mut session, mut turn, rx) =
|
|
crate::session::tests::make_session_and_context_with_rx().await;
|
|
let mut config = (*turn.config).clone();
|
|
config.model_provider.base_url = Some(format!("{}/v1", server.uri()));
|
|
config.user_instructions = None;
|
|
let config = Arc::new(config);
|
|
let models_manager = test_support::models_manager_with_provider(
|
|
config.codex_home.to_path_buf(),
|
|
Arc::clone(&session.services.auth_manager),
|
|
config.model_provider.clone(),
|
|
);
|
|
Arc::get_mut(&mut session)
|
|
.expect("session should be uniquely owned")
|
|
.services
|
|
.models_manager = models_manager;
|
|
let turn_mut = Arc::get_mut(&mut turn).expect("turn should be uniquely owned");
|
|
turn_mut.config = Arc::clone(&config);
|
|
turn_mut.provider =
|
|
create_model_provider(config.model_provider.clone(), turn_mut.auth_manager.clone());
|
|
turn_mut.user_instructions = None;
|
|
|
|
seed_guardian_parent_history(&session, &turn).await;
|
|
|
|
let decision = review_approval_request(
|
|
&session,
|
|
&turn,
|
|
"review-shell-guardian-error".to_string(),
|
|
GuardianApprovalRequest::Shell {
|
|
id: "shell-guardian-error".to_string(),
|
|
command: vec!["git".to_string(), "push".to_string()],
|
|
cwd: test_path_buf("/repo/codex-rs/core").abs(),
|
|
sandbox_permissions: crate::sandboxing::SandboxPermissions::UseDefault,
|
|
additional_permissions: None,
|
|
justification: Some("Need to push the reviewed docs fix.".to_string()),
|
|
},
|
|
/*retry_reason*/ None,
|
|
)
|
|
.await;
|
|
|
|
assert_eq!(decision, ReviewDecision::Denied);
|
|
|
|
let mut warnings = Vec::new();
|
|
let mut denial_rationales = Vec::new();
|
|
while let Ok(event) = rx.try_recv() {
|
|
match event.msg {
|
|
EventMsg::GuardianWarning(event) => warnings.push(event.message),
|
|
EventMsg::GuardianAssessment(event)
|
|
if event.status == GuardianAssessmentStatus::Denied =>
|
|
{
|
|
denial_rationales.push(event.rationale)
|
|
}
|
|
_ => {}
|
|
}
|
|
}
|
|
|
|
assert!(
|
|
warnings
|
|
.iter()
|
|
.any(|message| message.contains(error_message)),
|
|
"warning should include the underlying responses api error"
|
|
);
|
|
assert!(
|
|
denial_rationales
|
|
.iter()
|
|
.flatten()
|
|
.any(|message| message.contains(error_message)),
|
|
"denial rationale should include the underlying responses api error"
|
|
);
|
|
assert!(
|
|
denial_rationales.iter().flatten().all(|message| {
|
|
!message.contains("guardian review completed without an assessment payload")
|
|
}),
|
|
"denial rationale should not fall back to the generic missing payload error"
|
|
);
|
|
{
|
|
let rationales = session.services.guardian_rejections.lock().await;
|
|
assert!(rationales.contains_key("review-shell-guardian-error"));
|
|
assert!(!rationales.contains_key("shell-guardian-error"));
|
|
}
|
|
let rejection_message =
|
|
guardian_rejection_message(session.as_ref(), "review-shell-guardian-error").await;
|
|
assert!(
|
|
rejection_message.contains("Reason: Automatic approval review failed:")
|
|
&& rejection_message.contains(error_message),
|
|
"rejection message should include guardian rationale: {rejection_message}"
|
|
);
|
|
|
|
Ok(())
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn guardian_parallel_reviews_fork_from_last_committed_trunk_history() -> anyhow::Result<()> {
|
|
const TEST_STACK_SIZE_BYTES: usize = 2 * 1024 * 1024;
|
|
|
|
let handle =
|
|
std::thread::Builder::new()
|
|
.name("guardian_parallel_reviews_fork_from_last_committed_trunk_history".to_string())
|
|
.stack_size(TEST_STACK_SIZE_BYTES)
|
|
.spawn(|| -> anyhow::Result<()> {
|
|
let runtime = tokio::runtime::Builder::new_current_thread()
|
|
.enable_all()
|
|
.build()?;
|
|
runtime.block_on(Box::pin(async {
|
|
let first_assessment = serde_json::json!({
|
|
"risk_level": "low",
|
|
"user_authorization": "high",
|
|
"outcome": "allow",
|
|
"rationale": "first guardian rationale",
|
|
})
|
|
.to_string();
|
|
let second_assessment = serde_json::json!({
|
|
"risk_level": "low",
|
|
"user_authorization": "high",
|
|
"outcome": "allow",
|
|
"rationale": "second guardian rationale",
|
|
})
|
|
.to_string();
|
|
let third_assessment = serde_json::json!({
|
|
"risk_level": "low",
|
|
"user_authorization": "high",
|
|
"outcome": "allow",
|
|
"rationale": "third guardian rationale",
|
|
})
|
|
.to_string();
|
|
let (gate_tx, gate_rx) = tokio::sync::oneshot::channel();
|
|
let (server, _) = start_streaming_sse_server(vec![
|
|
vec![StreamingSseChunk {
|
|
gate: None,
|
|
body: sse(vec![
|
|
ev_response_created("resp-guardian-1"),
|
|
ev_assistant_message("msg-guardian-1", &first_assessment),
|
|
ev_completed("resp-guardian-1"),
|
|
]),
|
|
}],
|
|
vec![
|
|
StreamingSseChunk {
|
|
gate: None,
|
|
body: sse(vec![ev_response_created("resp-guardian-2")]),
|
|
},
|
|
StreamingSseChunk {
|
|
gate: Some(gate_rx),
|
|
body: sse(vec![
|
|
ev_assistant_message("msg-guardian-2", &second_assessment),
|
|
ev_completed("resp-guardian-2"),
|
|
]),
|
|
},
|
|
],
|
|
vec![StreamingSseChunk {
|
|
gate: None,
|
|
body: sse(vec![
|
|
ev_response_created("resp-guardian-3"),
|
|
ev_assistant_message("msg-guardian-3", &third_assessment),
|
|
ev_completed("resp-guardian-3"),
|
|
]),
|
|
}],
|
|
])
|
|
.await;
|
|
|
|
let (session, turn) = guardian_test_session_and_turn_with_base_url(server.uri()).await;
|
|
seed_guardian_parent_history(&session, &turn).await;
|
|
|
|
let initial_request = GuardianApprovalRequest::Shell {
|
|
id: "shell-guardian-1".to_string(),
|
|
command: vec!["git".to_string(), "status".to_string()],
|
|
cwd: test_path_buf("/repo/codex-rs/core").abs(),
|
|
sandbox_permissions: crate::sandboxing::SandboxPermissions::UseDefault,
|
|
additional_permissions: None,
|
|
justification: Some("Inspect repo state before proceeding.".to_string()),
|
|
};
|
|
assert_eq!(
|
|
review_approval_request(
|
|
&session,
|
|
&turn,
|
|
"review-shell-guardian-1".to_string(),
|
|
initial_request,
|
|
/*retry_reason*/ None
|
|
)
|
|
.await,
|
|
ReviewDecision::Approved
|
|
);
|
|
session
|
|
.record_into_history(
|
|
&[
|
|
ResponseItem::Message {
|
|
id: None,
|
|
role: "user".to_string(),
|
|
content: vec![ContentItem::InputText {
|
|
text: "Please inspect pending changes before pushing.".to_string(),
|
|
}],
|
|
phase: None,
|
|
},
|
|
ResponseItem::Message {
|
|
id: None,
|
|
role: "assistant".to_string(),
|
|
content: vec![ContentItem::OutputText {
|
|
text: "I need approval to run git diff.".to_string(),
|
|
}],
|
|
phase: None,
|
|
},
|
|
],
|
|
turn.as_ref(),
|
|
)
|
|
.await;
|
|
|
|
let second_request = GuardianApprovalRequest::Shell {
|
|
id: "shell-guardian-2".to_string(),
|
|
command: vec!["git".to_string(), "diff".to_string()],
|
|
cwd: test_path_buf("/repo/codex-rs/core").abs(),
|
|
sandbox_permissions: crate::sandboxing::SandboxPermissions::UseDefault,
|
|
additional_permissions: None,
|
|
justification: Some("Inspect pending changes before proceeding.".to_string()),
|
|
};
|
|
let third_request = GuardianApprovalRequest::Shell {
|
|
id: "shell-guardian-3".to_string(),
|
|
command: vec!["git".to_string(), "push".to_string()],
|
|
cwd: test_path_buf("/repo/codex-rs/core").abs(),
|
|
sandbox_permissions: crate::sandboxing::SandboxPermissions::UseDefault,
|
|
additional_permissions: None,
|
|
justification: Some("Inspect whether pushing is safe before proceeding.".to_string()),
|
|
};
|
|
|
|
let session_for_second = Arc::clone(&session);
|
|
let turn_for_second = Arc::clone(&turn);
|
|
let mut second_review = tokio::spawn(async move {
|
|
review_approval_request(
|
|
&session_for_second,
|
|
&turn_for_second,
|
|
"review-shell-guardian-2".to_string(),
|
|
second_request,
|
|
Some("trunk follow-up".to_string()),
|
|
)
|
|
.await
|
|
});
|
|
|
|
let second_request_observed = tokio::time::timeout(Duration::from_secs(5), async {
|
|
loop {
|
|
if server.requests().await.len() >= 2 {
|
|
break;
|
|
}
|
|
tokio::task::yield_now().await;
|
|
}
|
|
})
|
|
.await;
|
|
assert!(
|
|
second_request_observed.is_ok(),
|
|
"second guardian request was not observed"
|
|
);
|
|
session
|
|
.record_into_history(
|
|
&[
|
|
ResponseItem::Message {
|
|
id: None,
|
|
role: "user".to_string(),
|
|
content: vec![ContentItem::InputText {
|
|
text: "Now inspect whether pushing is safe.".to_string(),
|
|
}],
|
|
phase: None,
|
|
},
|
|
ResponseItem::Message {
|
|
id: None,
|
|
role: "assistant".to_string(),
|
|
content: vec![ContentItem::OutputText {
|
|
text: "I need approval to push after the diff check.".to_string(),
|
|
}],
|
|
phase: None,
|
|
},
|
|
],
|
|
turn.as_ref(),
|
|
)
|
|
.await;
|
|
|
|
let third_decision = review_approval_request(
|
|
&session,
|
|
&turn,
|
|
"review-shell-guardian-3".to_string(),
|
|
third_request,
|
|
Some("parallel follow-up".to_string()),
|
|
)
|
|
.await;
|
|
assert_eq!(third_decision, ReviewDecision::Approved);
|
|
let requests = server.requests().await;
|
|
assert_eq!(requests.len(), 3);
|
|
let third_request_body = serde_json::from_slice::<serde_json::Value>(&requests[2])?;
|
|
let third_request_body_text = third_request_body.to_string();
|
|
assert!(
|
|
third_request_body_text.contains("first guardian rationale"),
|
|
"forked guardian review should include the last committed trunk assessment"
|
|
);
|
|
let third_user_message = last_user_message_text_from_body(&third_request_body);
|
|
assert!(third_user_message.contains(">>> TRANSCRIPT DELTA START\n"));
|
|
assert!(
|
|
third_user_message.contains("[5] user: Please inspect pending changes before pushing.")
|
|
);
|
|
assert!(third_user_message.contains("[7] user: Now inspect whether pushing is safe."));
|
|
assert!(!third_user_message.contains("[1] user: Please check the repo visibility"));
|
|
assert!(
|
|
!third_request_body_text.contains("second guardian rationale"),
|
|
"forked guardian review should not include the still in-flight trunk assessment"
|
|
);
|
|
assert!(
|
|
tokio::time::timeout(Duration::from_millis(100), &mut second_review)
|
|
.await
|
|
.is_err(),
|
|
"the trunk guardian review should still be blocked on its gated response"
|
|
);
|
|
|
|
gate_tx
|
|
.send(())
|
|
.expect("second guardian review gate should still be open");
|
|
assert_eq!(second_review.await?, ReviewDecision::Approved);
|
|
server.shutdown().await;
|
|
|
|
Ok(())
|
|
}))
|
|
})?;
|
|
|
|
match handle.join() {
|
|
Ok(result) => result,
|
|
Err(_) => Err(anyhow::anyhow!(
|
|
"guardian_parallel_reviews_fork_from_last_committed_trunk_history thread panicked"
|
|
)),
|
|
}
|
|
}
|
|
#[tokio::test]
|
|
async fn guardian_review_session_config_preserves_parent_network_proxy() {
|
|
let mut parent_config = test_config().await;
|
|
let network = NetworkProxySpec::from_config_and_constraints(
|
|
NetworkProxyConfig::default(),
|
|
Some(NetworkConstraints {
|
|
enabled: Some(true),
|
|
domains: Some(NetworkDomainPermissionsToml {
|
|
entries: std::collections::BTreeMap::from([(
|
|
"github.com".to_string(),
|
|
NetworkDomainPermissionToml::Allow,
|
|
)]),
|
|
}),
|
|
..Default::default()
|
|
}),
|
|
parent_config.permissions.permission_profile.get(),
|
|
)
|
|
.expect("network proxy spec");
|
|
parent_config.permissions.network = Some(network.clone());
|
|
|
|
let guardian_config = build_guardian_review_session_config_for_test(
|
|
&parent_config,
|
|
/*live_network_config*/ None,
|
|
"parent-active-model",
|
|
Some(codex_protocol::openai_models::ReasoningEffort::Low),
|
|
)
|
|
.expect("guardian config");
|
|
|
|
assert_eq!(guardian_config.permissions.network, Some(network));
|
|
assert_eq!(
|
|
guardian_config.model,
|
|
Some("parent-active-model".to_string())
|
|
);
|
|
assert_eq!(
|
|
guardian_config.model_reasoning_effort,
|
|
Some(codex_protocol::openai_models::ReasoningEffort::Low)
|
|
);
|
|
assert_eq!(
|
|
guardian_config.permissions.approval_policy,
|
|
Constrained::allow_only(AskForApproval::Never)
|
|
);
|
|
assert_eq!(
|
|
guardian_config.permissions.permission_profile,
|
|
Constrained::allow_only(PermissionProfile::read_only())
|
|
);
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn guardian_review_session_config_clears_parent_developer_instructions() {
|
|
let mut parent_config = test_config().await;
|
|
parent_config.developer_instructions =
|
|
Some("parent or managed config should not replace guardian policy".to_string());
|
|
|
|
let guardian_config = build_guardian_review_session_config_for_test(
|
|
&parent_config,
|
|
/*live_network_config*/ None,
|
|
"active-model",
|
|
/*reasoning_effort*/ None,
|
|
)
|
|
.expect("guardian config");
|
|
|
|
assert_eq!(guardian_config.developer_instructions, None);
|
|
assert_eq!(
|
|
guardian_config.base_instructions,
|
|
Some(guardian_policy_prompt())
|
|
);
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn guardian_review_session_config_uses_live_network_proxy_state() {
|
|
let mut parent_config = test_config().await;
|
|
let mut parent_network = NetworkProxyConfig::default();
|
|
parent_network.network.enabled = true;
|
|
parent_network
|
|
.network
|
|
.set_allowed_domains(vec!["parent.example".to_string()]);
|
|
parent_config.permissions.network = Some(
|
|
NetworkProxySpec::from_config_and_constraints(
|
|
parent_network,
|
|
/*requirements*/ None,
|
|
parent_config.permissions.permission_profile.get(),
|
|
)
|
|
.expect("parent network proxy spec"),
|
|
);
|
|
|
|
let mut live_network = NetworkProxyConfig::default();
|
|
live_network.network.enabled = true;
|
|
live_network
|
|
.network
|
|
.set_allowed_domains(vec!["github.com".to_string()]);
|
|
|
|
let guardian_config = build_guardian_review_session_config_for_test(
|
|
&parent_config,
|
|
Some(live_network.clone()),
|
|
"active-model",
|
|
/*reasoning_effort*/ None,
|
|
)
|
|
.expect("guardian config");
|
|
|
|
assert_eq!(
|
|
guardian_config.permissions.network,
|
|
Some(
|
|
NetworkProxySpec::from_config_and_constraints(
|
|
live_network,
|
|
/*requirements*/ None,
|
|
&PermissionProfile::read_only(),
|
|
)
|
|
.expect("live network proxy spec")
|
|
)
|
|
);
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn guardian_review_session_config_disables_mcp_apps_and_plugins() {
|
|
let mut parent_config = test_config().await;
|
|
let server: McpServerConfig =
|
|
toml::from_str("command = \"docs-server\"").expect("deserialize MCP server");
|
|
parent_config
|
|
.mcp_servers
|
|
.set(HashMap::from([("docs".to_string(), server)]))
|
|
.expect("parent MCP servers are configurable");
|
|
parent_config
|
|
.features
|
|
.enable(Feature::Apps)
|
|
.expect("apps feature is configurable");
|
|
parent_config
|
|
.features
|
|
.enable(Feature::Plugins)
|
|
.expect("plugins feature is configurable");
|
|
parent_config.include_apps_instructions = true;
|
|
|
|
let guardian_config = build_guardian_review_session_config_for_test(
|
|
&parent_config,
|
|
/*live_network_config*/ None,
|
|
"active-model",
|
|
/*reasoning_effort*/ None,
|
|
)
|
|
.expect("guardian config");
|
|
|
|
assert!(guardian_config.mcp_servers.get().is_empty());
|
|
assert!(!guardian_config.features.enabled(Feature::Apps));
|
|
assert!(!guardian_config.features.enabled(Feature::Plugins));
|
|
assert!(!guardian_config.include_apps_instructions);
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn guardian_review_session_config_allows_pinned_disabled_feature() {
|
|
let mut parent_config = test_config().await;
|
|
parent_config.features = ManagedFeatures::from_configured(
|
|
parent_config.features.get().clone(),
|
|
Some(Sourced {
|
|
value: FeatureRequirementsToml {
|
|
entries: BTreeMap::from([("multi_agent".to_string(), true)]),
|
|
},
|
|
source: RequirementSource::Unknown,
|
|
}),
|
|
)
|
|
.expect("managed features");
|
|
|
|
let guardian_config = build_guardian_review_session_config_for_test(
|
|
&parent_config,
|
|
/*live_network_config*/ None,
|
|
"active-model",
|
|
/*reasoning_effort*/ None,
|
|
)
|
|
.expect("guardian config should continue when a disabled feature is pinned on");
|
|
|
|
assert!(guardian_config.features.enabled(Feature::Collab));
|
|
assert!(guardian_config.mcp_servers.get().is_empty());
|
|
assert!(!guardian_config.include_apps_instructions);
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn guardian_review_session_config_uses_parent_active_model_instead_of_hardcoded_slug() {
|
|
let mut parent_config = test_config().await;
|
|
parent_config.model = Some("configured-model".to_string());
|
|
|
|
let guardian_config = build_guardian_review_session_config_for_test(
|
|
&parent_config,
|
|
/*live_network_config*/ None,
|
|
"active-model",
|
|
/*reasoning_effort*/ None,
|
|
)
|
|
.expect("guardian config");
|
|
|
|
assert_eq!(guardian_config.model, Some("active-model".to_string()));
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn guardian_review_session_config_uses_requirements_guardian_policy_config() {
|
|
let codex_home = tempfile::tempdir().expect("create temp dir");
|
|
let workspace = tempfile::tempdir().expect("create temp dir");
|
|
let config_layer_stack = ConfigLayerStack::new(
|
|
Vec::new(),
|
|
Default::default(),
|
|
codex_config::ConfigRequirementsToml {
|
|
guardian_policy_config: Some(
|
|
" Use the workspace-managed guardian policy. ".to_string(),
|
|
),
|
|
..Default::default()
|
|
},
|
|
)
|
|
.expect("config layer stack");
|
|
let parent_config = Config::load_config_with_layer_stack(
|
|
LOCAL_FS.as_ref(),
|
|
ConfigToml::default(),
|
|
ConfigOverrides {
|
|
cwd: Some(workspace.path().to_path_buf()),
|
|
..Default::default()
|
|
},
|
|
codex_home.abs(),
|
|
config_layer_stack,
|
|
)
|
|
.await
|
|
.expect("load config");
|
|
|
|
let guardian_config = build_guardian_review_session_config_for_test(
|
|
&parent_config,
|
|
/*live_network_config*/ None,
|
|
"active-model",
|
|
/*reasoning_effort*/ None,
|
|
)
|
|
.expect("guardian config");
|
|
|
|
assert_eq!(guardian_config.developer_instructions, None);
|
|
assert_eq!(
|
|
guardian_config.base_instructions,
|
|
Some(guardian_policy_prompt_with_config(
|
|
"Use the workspace-managed guardian policy."
|
|
))
|
|
);
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn guardian_review_session_config_uses_default_guardian_policy_without_requirements_override()
|
|
{
|
|
let codex_home = tempfile::tempdir().expect("create temp dir");
|
|
let workspace = tempfile::tempdir().expect("create temp dir");
|
|
let config_layer_stack =
|
|
ConfigLayerStack::new(Vec::new(), Default::default(), Default::default())
|
|
.expect("config layer stack");
|
|
let parent_config = Config::load_config_with_layer_stack(
|
|
LOCAL_FS.as_ref(),
|
|
ConfigToml::default(),
|
|
ConfigOverrides {
|
|
cwd: Some(workspace.path().to_path_buf()),
|
|
..Default::default()
|
|
},
|
|
codex_home.abs(),
|
|
config_layer_stack,
|
|
)
|
|
.await
|
|
.expect("load config");
|
|
|
|
let guardian_config = build_guardian_review_session_config_for_test(
|
|
&parent_config,
|
|
/*live_network_config*/ None,
|
|
"active-model",
|
|
/*reasoning_effort*/ None,
|
|
)
|
|
.expect("guardian config");
|
|
|
|
assert_eq!(guardian_config.developer_instructions, None);
|
|
assert_eq!(
|
|
guardian_config.base_instructions,
|
|
Some(guardian_policy_prompt())
|
|
);
|
|
}
|