Compare commits

...

4 Commits

Author SHA1 Message Date
won
7369cb67c0 analytics compatibility, accurate logging, etc 2026-05-04 16:44:04 -07:00
won
a26a3d2095 compatibility issue 2026-05-01 12:41:43 -07:00
won
9f842a9ab1 ooops 2026-05-01 12:39:11 -07:00
won
3e5266aeb6 first draft 2026-05-01 12:28:53 -07:00
17 changed files with 597 additions and 272 deletions

View File

@@ -33,11 +33,13 @@ pub(crate) use review::is_guardian_reviewer_source;
pub(crate) use review::new_guardian_review_id;
#[cfg(test)]
pub(crate) use review::record_guardian_denial_for_test;
pub(crate) use review::reset_auto_review_rejection_circuit_breaker;
pub(crate) use review::review_approval_request;
#[cfg(test)]
pub(crate) use review::review_approval_request_with_cancel;
pub(crate) use review::routes_approval_to_guardian;
pub(crate) use review::spawn_approval_request_review;
pub(crate) use review::take_pending_auto_review_escalation;
pub(crate) use review_session::GuardianReviewSessionManager;
const GUARDIAN_PREFERRED_MODEL: &str = "codex-auto-review";
@@ -79,13 +81,13 @@ pub(crate) struct GuardianRejectionCircuitBreaker {
struct GuardianRejectionCircuitBreakerTurn {
consecutive_denials: u32,
total_denials: u32,
interrupt_triggered: bool,
pending_auto_review_escalation: bool,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub(crate) enum GuardianRejectionCircuitBreakerAction {
Continue,
InterruptTurn {
EscalateToUser {
consecutive_denials: u32,
total_denials: u32,
},
@@ -100,12 +102,12 @@ impl GuardianRejectionCircuitBreaker {
let turn = self.turns.entry(turn_id.to_string()).or_default();
turn.consecutive_denials = turn.consecutive_denials.saturating_add(1);
turn.total_denials = turn.total_denials.saturating_add(1);
if !turn.interrupt_triggered
if !turn.pending_auto_review_escalation
&& (turn.consecutive_denials >= MAX_CONSECUTIVE_GUARDIAN_DENIALS_PER_TURN
|| turn.total_denials >= MAX_TOTAL_GUARDIAN_DENIALS_PER_TURN)
{
turn.interrupt_triggered = true;
GuardianRejectionCircuitBreakerAction::InterruptTurn {
turn.pending_auto_review_escalation = true;
GuardianRejectionCircuitBreakerAction::EscalateToUser {
consecutive_denials: turn.consecutive_denials,
total_denials: turn.total_denials,
}
@@ -118,6 +120,12 @@ impl GuardianRejectionCircuitBreaker {
let turn = self.turns.entry(turn_id.to_string()).or_default();
turn.consecutive_denials = 0;
}
pub(crate) fn take_pending_auto_review_escalation(&mut self, turn_id: &str) -> bool {
self.turns
.get_mut(turn_id)
.is_some_and(|turn| std::mem::take(&mut turn.pending_auto_review_escalation))
}
}
#[cfg(test)]

View File

@@ -16,7 +16,6 @@ use codex_protocol::protocol::GuardianRiskLevel;
use codex_protocol::protocol::GuardianUserAuthorization;
use codex_protocol::protocol::ReviewDecision;
use codex_protocol::protocol::SubAgentSource;
use codex_protocol::protocol::TurnAbortReason;
use codex_protocol::protocol::WarningEvent;
use tokio::sync::oneshot;
use tokio_util::sync::CancellationToken;
@@ -185,7 +184,7 @@ async fn record_guardian_denial(session: &Arc<Session>, turn: &Arc<TurnContext>,
.lock()
.await
.record_denial(turn_id);
let GuardianRejectionCircuitBreakerAction::InterruptTurn {
let GuardianRejectionCircuitBreakerAction::EscalateToUser {
consecutive_denials,
total_denials,
} = action
@@ -202,20 +201,35 @@ async fn record_guardian_denial(session: &Arc<Session>, turn: &Arc<TurnContext>,
turn.as_ref(),
EventMsg::GuardianWarning(WarningEvent {
message: format!(
"Automatic approval review rejected too many approval requests for this turn ({consecutive_denials} consecutive, {total_denials} total); interrupting the turn."
"Automatic approval review rejected too many approval requests for this turn ({consecutive_denials} consecutive, {total_denials} total); requesting manual approval for the current request."
),
}),
)
.await;
}
let runtime_handle = session.services.runtime_handle.clone();
let session = Arc::clone(session);
let turn_id = turn_id.to_string();
let _abort_task = runtime_handle.spawn(async move {
session
.abort_turn_if_active(&turn_id, TurnAbortReason::Interrupted)
.await;
});
pub(crate) async fn reset_auto_review_rejection_circuit_breaker(
session: &Arc<Session>,
turn_id: &str,
) {
session
.services
.guardian_rejection_circuit_breaker
.lock()
.await
.clear_turn(turn_id);
}
pub(crate) async fn take_pending_auto_review_escalation(
session: &Arc<Session>,
turn_id: &str,
) -> bool {
session
.services
.guardian_rejection_circuit_breaker
.lock()
.await
.take_pending_auto_review_escalation(turn_id)
}
#[cfg(test)]

View File

@@ -70,7 +70,7 @@ fn fixed_guardian_parent_session_id() -> ThreadId {
}
#[test]
fn guardian_rejection_circuit_breaker_interrupts_after_three_consecutive_denials() {
fn guardian_rejection_circuit_breaker_escalates_after_three_consecutive_denials() {
let mut circuit_breaker = GuardianRejectionCircuitBreaker::default();
assert_eq!(
circuit_breaker.record_denial("turn-1"),
@@ -82,7 +82,7 @@ fn guardian_rejection_circuit_breaker_interrupts_after_three_consecutive_denials
);
assert_eq!(
circuit_breaker.record_denial("turn-1"),
GuardianRejectionCircuitBreakerAction::InterruptTurn {
GuardianRejectionCircuitBreakerAction::EscalateToUser {
consecutive_denials: 3,
total_denials: 3,
}
@@ -111,7 +111,7 @@ fn guardian_rejection_circuit_breaker_resets_consecutive_denials_on_non_denial()
);
assert_eq!(
circuit_breaker.record_denial("turn-1"),
GuardianRejectionCircuitBreakerAction::InterruptTurn {
GuardianRejectionCircuitBreakerAction::EscalateToUser {
consecutive_denials: 3,
total_denials: 4,
}
@@ -119,7 +119,7 @@ fn guardian_rejection_circuit_breaker_resets_consecutive_denials_on_non_denial()
}
#[test]
fn guardian_rejection_circuit_breaker_interrupts_after_ten_total_denials() {
fn guardian_rejection_circuit_breaker_escalates_after_ten_total_denials() {
let mut circuit_breaker = GuardianRejectionCircuitBreaker::default();
for _ in 0..9 {
assert_eq!(
@@ -130,13 +130,33 @@ fn guardian_rejection_circuit_breaker_interrupts_after_ten_total_denials() {
}
assert_eq!(
circuit_breaker.record_denial("turn-1"),
GuardianRejectionCircuitBreakerAction::InterruptTurn {
GuardianRejectionCircuitBreakerAction::EscalateToUser {
consecutive_denials: 1,
total_denials: 10,
}
);
}
#[test]
fn guardian_rejection_circuit_breaker_tracks_pending_escalation_once() {
let mut circuit_breaker = GuardianRejectionCircuitBreaker::default();
for _ in 0..2 {
assert_eq!(
circuit_breaker.record_denial("turn-1"),
GuardianRejectionCircuitBreakerAction::Continue
);
}
assert_eq!(
circuit_breaker.record_denial("turn-1"),
GuardianRejectionCircuitBreakerAction::EscalateToUser {
consecutive_denials: 3,
total_denials: 3,
}
);
assert!(circuit_breaker.take_pending_auto_review_escalation("turn-1"));
assert!(!circuit_breaker.take_pending_auto_review_escalation("turn-1"));
}
async fn guardian_test_session_and_turn(
server: &wiremock::MockServer,
) -> (Arc<Session>, Arc<TurnContext>) {

View File

@@ -23,8 +23,10 @@ use crate::guardian::guardian_approval_request_to_json;
use crate::guardian::guardian_rejection_message;
use crate::guardian::guardian_timeout_message;
use crate::guardian::new_guardian_review_id;
use crate::guardian::reset_auto_review_rejection_circuit_breaker;
use crate::guardian::review_approval_request;
use crate::guardian::routes_approval_to_guardian;
use crate::guardian::take_pending_auto_review_escalation;
use crate::hook_runtime::run_permission_request_hooks;
use crate::mcp_openai_file::rewrite_mcp_tool_arguments_for_openai_files;
use crate::mcp_tool_approval_templates::RenderedMcpToolApprovalParam;
@@ -958,6 +960,7 @@ async fn maybe_request_mcp_tool_approval(
.features
.enabled(Feature::ToolCallMcpElicitation);
let mut escalated_from_auto_review = false;
if routes_approval_to_guardian(turn_context) {
let review_id = new_guardian_review_id();
let decision = review_approval_request(
@@ -968,16 +971,21 @@ async fn maybe_request_mcp_tool_approval(
monitor_reason.clone(),
)
.await;
let decision = mcp_tool_approval_decision_from_guardian(sess, &review_id, decision).await;
apply_mcp_tool_approval_decision(
sess,
turn_context,
&decision,
session_approval_key,
persistent_approval_key,
)
.await;
return Some(decision);
escalated_from_auto_review = matches!(decision, ReviewDecision::Denied)
&& take_pending_auto_review_escalation(sess, &turn_context.sub_id).await;
if !escalated_from_auto_review {
let decision =
mcp_tool_approval_decision_from_guardian(sess, &review_id, decision).await;
apply_mcp_tool_approval_decision(
sess,
turn_context,
&decision,
session_approval_key,
persistent_approval_key,
)
.await;
return Some(decision);
}
}
let prompt_options = mcp_tool_approval_prompt_options(
@@ -1009,7 +1017,7 @@ async fn maybe_request_mcp_tool_approval(
);
question.question =
mcp_tool_approval_question_text(question.question, monitor_reason.as_deref());
if tool_call_mcp_elicitation_enabled {
let decision = if tool_call_mcp_elicitation_enabled {
let request_id = rmcp::model::RequestId::String(
format!("{MCP_TOOL_APPROVAL_QUESTION_ID_PREFIX}_{call_id}").into(),
);
@@ -1038,28 +1046,22 @@ async fn maybe_request_mcp_tool_approval(
.await,
&question_id,
);
let decision = normalize_approval_decision_for_mode(decision, approval_mode);
apply_mcp_tool_approval_decision(
sess,
turn_context,
&decision,
session_approval_key,
persistent_approval_key,
normalize_approval_decision_for_mode(decision, approval_mode)
} else {
let args = RequestUserInputArgs {
questions: vec![question],
};
let response = sess
.request_user_input(turn_context.as_ref(), call_id.to_string(), args)
.await;
normalize_approval_decision_for_mode(
parse_mcp_tool_approval_response(response, &question_id),
approval_mode,
)
.await;
return Some(decision);
}
let args = RequestUserInputArgs {
questions: vec![question],
};
let response = sess
.request_user_input(turn_context.as_ref(), call_id.to_string(), args)
.await;
let decision = normalize_approval_decision_for_mode(
parse_mcp_tool_approval_response(response, &question_id),
approval_mode,
);
if escalated_from_auto_review {
reset_auto_review_rejection_circuit_breaker(sess, &turn_context.sub_id).await;
}
apply_mcp_tool_approval_decision(
sess,
turn_context,

View File

@@ -1988,7 +1988,9 @@ impl Session {
}
let requested_permissions = args.permissions;
let request_reason = args.reason;
let mut escalated_from_auto_review = false;
if crate::guardian::routes_approval_to_guardian(turn_context.as_ref()) {
let originating_turn_state = {
let active = self.active_turn.lock().await;
@@ -1998,9 +2000,9 @@ impl Session {
let session = Arc::clone(self);
let turn = Arc::clone(turn_context);
let request = crate::guardian::GuardianApprovalRequest::RequestPermissions {
id: call_id,
id: call_id.clone(),
turn_id: turn_context.sub_id.clone(),
reason: args.reason,
reason: request_reason.clone(),
permissions: requested_permissions.clone(),
};
let review_rx = crate::guardian::spawn_approval_request_review(
@@ -2017,52 +2019,58 @@ impl Session {
_ = cancellation_token.cancelled() => return None,
decision = review_rx => decision.unwrap_or(ReviewDecision::Denied),
};
let response = match decision {
ReviewDecision::Approved | ReviewDecision::ApprovedExecpolicyAmendment { .. } => {
RequestPermissionsResponse {
permissions: requested_permissions.clone(),
scope: PermissionGrantScope::Turn,
strict_auto_review: false,
escalated_from_auto_review = matches!(decision, ReviewDecision::Denied)
&& crate::guardian::take_pending_auto_review_escalation(self, &turn_context.sub_id)
.await;
if !escalated_from_auto_review {
let response = match decision {
ReviewDecision::Approved
| ReviewDecision::ApprovedExecpolicyAmendment { .. } => {
RequestPermissionsResponse {
permissions: requested_permissions.clone(),
scope: PermissionGrantScope::Turn,
strict_auto_review: false,
}
}
}
ReviewDecision::ApprovedForSession => RequestPermissionsResponse {
permissions: requested_permissions.clone(),
scope: PermissionGrantScope::Session,
strict_auto_review: false,
},
ReviewDecision::NetworkPolicyAmendment {
network_policy_amendment,
} => match network_policy_amendment.action {
NetworkPolicyRuleAction::Allow => RequestPermissionsResponse {
ReviewDecision::ApprovedForSession => RequestPermissionsResponse {
permissions: requested_permissions.clone(),
scope: PermissionGrantScope::Turn,
scope: PermissionGrantScope::Session,
strict_auto_review: false,
},
NetworkPolicyRuleAction::Deny => RequestPermissionsResponse {
permissions: RequestPermissionProfile::default(),
scope: PermissionGrantScope::Turn,
strict_auto_review: false,
ReviewDecision::NetworkPolicyAmendment {
network_policy_amendment,
} => match network_policy_amendment.action {
NetworkPolicyRuleAction::Allow => RequestPermissionsResponse {
permissions: requested_permissions.clone(),
scope: PermissionGrantScope::Turn,
strict_auto_review: false,
},
NetworkPolicyRuleAction::Deny => RequestPermissionsResponse {
permissions: RequestPermissionProfile::default(),
scope: PermissionGrantScope::Turn,
strict_auto_review: false,
},
},
},
ReviewDecision::Abort | ReviewDecision::Denied | ReviewDecision::TimedOut => {
RequestPermissionsResponse {
permissions: RequestPermissionProfile::default(),
scope: PermissionGrantScope::Turn,
strict_auto_review: false,
ReviewDecision::Abort | ReviewDecision::Denied | ReviewDecision::TimedOut => {
RequestPermissionsResponse {
permissions: RequestPermissionProfile::default(),
scope: PermissionGrantScope::Turn,
strict_auto_review: false,
}
}
}
};
let response = Self::normalize_request_permissions_response(
requested_permissions,
response,
cwd.as_path(),
);
self.record_granted_request_permissions_for_turn(
&response,
originating_turn_state.as_ref(),
)
.await;
return Some(response);
};
let response = Self::normalize_request_permissions_response(
requested_permissions,
response,
cwd.as_path(),
);
self.record_granted_request_permissions_for_turn(
&response,
originating_turn_state.as_ref(),
)
.await;
return Some(response);
}
}
let (tx_response, rx_response) = oneshot::channel();
@@ -2090,12 +2098,12 @@ impl Session {
let event = EventMsg::RequestPermissions(RequestPermissionsEvent {
call_id: call_id.clone(),
turn_id: turn_context.sub_id.clone(),
reason: args.reason,
reason: request_reason,
permissions: requested_permissions,
cwd: Some(cwd),
});
self.send_event(turn_context.as_ref(), event).await;
tokio::select! {
let response = tokio::select! {
biased;
_ = cancellation_token.cancelled() => {
let mut active = self.active_turn.lock().await;
@@ -2106,7 +2114,15 @@ impl Session {
None
}
response = rx_response => response.ok(),
};
if escalated_from_auto_review {
crate::guardian::reset_auto_review_rejection_circuit_breaker(
self,
&turn_context.sub_id,
)
.await;
}
response
}
#[expect(

View File

@@ -817,7 +817,6 @@ impl Session {
tool_approvals: Mutex::new(ApprovalStore::default()),
guardian_rejections: Mutex::new(HashMap::new()),
guardian_rejection_circuit_breaker: Mutex::new(Default::default()),
runtime_handle: tokio::runtime::Handle::current(),
skills_manager,
plugins_manager: Arc::clone(&plugins_manager),
mcp_manager: Arc::clone(&mcp_manager),

View File

@@ -893,8 +893,10 @@ async fn danger_full_access_tool_attempts_do_not_enforce_managed_network() -> an
&'a mut self,
_req: &'a (),
_ctx: crate::tools::sandboxing::ApprovalCtx<'a>,
) -> futures::future::BoxFuture<'a, ReviewDecision> {
Box::pin(async { ReviewDecision::Approved })
) -> futures::future::BoxFuture<'a, crate::tools::sandboxing::ToolApprovalOutcome> {
Box::pin(async {
crate::tools::sandboxing::ToolApprovalOutcome::from_user(ReviewDecision::Approved)
})
}
}
@@ -3486,7 +3488,6 @@ pub(crate) async fn make_session_and_context() -> (Session, TurnContext) {
tool_approvals: Mutex::new(ApprovalStore::default()),
guardian_rejections: Mutex::new(std::collections::HashMap::new()),
guardian_rejection_circuit_breaker: Mutex::new(Default::default()),
runtime_handle: tokio::runtime::Handle::current(),
skills_manager,
plugins_manager,
mcp_manager,
@@ -4912,7 +4913,6 @@ where
tool_approvals: Mutex::new(ApprovalStore::default()),
guardian_rejections: Mutex::new(std::collections::HashMap::new()),
guardian_rejection_circuit_breaker: Mutex::new(Default::default()),
runtime_handle: tokio::runtime::Handle::current(),
skills_manager,
plugins_manager,
mcp_manager,
@@ -6315,7 +6315,7 @@ impl SessionTask for GuardianDeniedApprovalTask {
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn guardian_auto_review_interrupts_after_three_consecutive_denials() {
async fn guardian_auto_review_warns_after_three_consecutive_denials() {
let (sess, tc, rx) = make_session_and_context_with_rx().await;
let input = vec![UserInput::Text {
text: "trigger guardian denials".to_string(),
@@ -6325,12 +6325,12 @@ async fn guardian_auto_review_interrupts_after_three_consecutive_denials() {
.await;
let mut observed = Vec::new();
let aborted = tokio::time::timeout(std::time::Duration::from_secs(5), async {
let warning = tokio::time::timeout(std::time::Duration::from_secs(5), async {
loop {
let event = rx.recv().await.expect("event");
if let EventMsg::TurnAborted(event) = &event.msg {
if let EventMsg::GuardianWarning(event) = &event.msg {
let event = event.clone();
observed.push(EventMsg::TurnAborted(event.clone()));
observed.push(EventMsg::GuardianWarning(event.clone()));
break event;
}
observed.push(event.msg);
@@ -6339,14 +6339,18 @@ async fn guardian_auto_review_interrupts_after_three_consecutive_denials() {
.await
.unwrap_or_else(|_| {
panic!(
"guardian denial circuit breaker should interrupt the turn; observed events: {observed:?}"
"guardian denial circuit breaker should warn about escalation; observed events: {observed:?}"
)
});
assert_eq!(aborted.reason, TurnAbortReason::Interrupted);
assert!(
warning
.message
.contains("requesting manual approval for the current request")
);
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn guardian_helper_review_interrupts_after_three_consecutive_denials() {
async fn guardian_helper_review_warns_after_three_consecutive_denials() {
let (sess, tc, rx) = make_session_and_context_with_rx().await;
let input = vec![UserInput::Text {
text: "keep turn active for helper reviews".to_string(),
@@ -6384,12 +6388,12 @@ async fn guardian_helper_review_interrupts_after_three_consecutive_denials() {
review_thread.join().expect("helper review thread");
let mut observed = Vec::new();
let aborted = timeout(StdDuration::from_secs(5), async {
let warning = timeout(StdDuration::from_secs(5), async {
loop {
let event = rx.recv().await.expect("event");
if let EventMsg::TurnAborted(event) = &event.msg {
if let EventMsg::GuardianWarning(event) = &event.msg {
let event = event.clone();
observed.push(EventMsg::TurnAborted(event.clone()));
observed.push(EventMsg::GuardianWarning(event.clone()));
break event;
}
observed.push(event.msg);
@@ -6398,10 +6402,14 @@ async fn guardian_helper_review_interrupts_after_three_consecutive_denials() {
.await
.unwrap_or_else(|_| {
panic!(
"helper review circuit breaker should interrupt the turn; observed events: {observed:?}"
"helper review circuit breaker should warn about escalation; observed events: {observed:?}"
)
});
assert_eq!(aborted.reason, TurnAbortReason::Interrupted);
assert!(
warning
.message
.contains("requesting manual approval for the current request")
);
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]

View File

@@ -27,7 +27,6 @@ use codex_rollout_trace::ThreadTraceContext;
use codex_thread_store::LiveThread;
use codex_thread_store::ThreadStore;
use std::path::PathBuf;
use tokio::runtime::Handle;
use tokio::sync::Mutex;
use tokio::sync::RwLock;
use tokio::sync::watch;
@@ -54,7 +53,6 @@ pub(crate) struct SessionServices {
pub(crate) tool_approvals: Mutex<ApprovalStore>,
pub(crate) guardian_rejections: Mutex<HashMap<String, GuardianRejection>>,
pub(crate) guardian_rejection_circuit_breaker: Mutex<GuardianRejectionCircuitBreaker>,
pub(crate) runtime_handle: Handle,
pub(crate) skills_manager: Arc<SkillsManager>,
pub(crate) plugins_manager: Arc<PluginsManager>,
pub(crate) mcp_manager: Arc<McpManager>,

View File

@@ -508,51 +508,6 @@ impl Session {
}
}
pub(crate) async fn abort_turn_if_active(
self: &Arc<Self>,
turn_id: &str,
reason: TurnAbortReason,
) -> bool {
let active_turn = {
let mut active = self.active_turn.lock().await;
if active
.as_ref()
.is_some_and(|active_turn| active_turn.tasks.contains_key(turn_id))
{
active.take()
} else {
None
}
};
let Some(mut active_turn) = active_turn else {
return false;
};
let tasks = active_turn.drain_tasks();
let turn_context = tasks.first().map(|task| Arc::clone(&task.turn_context));
for task in tasks {
self.handle_task_abort(task, reason.clone()).await;
}
if let Err(err) = self
.goal_runtime_apply(GoalRuntimeEvent::TaskAborted {
turn_context: turn_context.as_deref(),
reason: reason.clone(),
})
.await
{
warn!("failed to apply goal runtime abort event: {err}");
}
// Let interrupted tasks observe cancellation before dropping pending approvals, or an
// in-flight approval wait can surface as a model-visible rejection before TurnAborted.
active_turn.clear_pending().await;
if reason == TurnAbortReason::Interrupted {
self.maybe_start_turn_for_pending_work().await;
}
true
}
pub async fn on_task_finished(
self: &Arc<Self>,
turn_context: Arc<TurnContext>,

View File

@@ -3,11 +3,14 @@ use crate::guardian::GuardianNetworkAccessTrigger;
use crate::guardian::guardian_rejection_message;
use crate::guardian::guardian_timeout_message;
use crate::guardian::new_guardian_review_id;
use crate::guardian::reset_auto_review_rejection_circuit_breaker;
use crate::guardian::review_approval_request;
use crate::guardian::routes_approval_to_guardian;
use crate::guardian::take_pending_auto_review_escalation;
use crate::hook_runtime::run_permission_request_hooks;
use crate::network_policy_decision::denied_network_policy_message;
use crate::session::session::Session;
use crate::tools::sandboxing::FinalApprovalDecisionSource;
use crate::tools::sandboxing::PermissionRequestPayload;
use crate::tools::sandboxing::ToolError;
use codex_hooks::PermissionRequestDecision;
@@ -498,11 +501,12 @@ impl NetworkApprovalService {
}
let use_guardian = routes_approval_to_guardian(&turn_context);
let guardian_review_id = use_guardian.then(new_guardian_review_id);
let approval_decision = if let Some(review_id) = guardian_review_id.clone() {
review_approval_request(
let mut escalated_from_auto_review = false;
let (approval_decision, final_source) = if let Some(review_id) = guardian_review_id {
let decision = review_approval_request(
&session,
&turn_context,
review_id,
review_id.clone(),
GuardianApprovalRequest::NetworkAccess {
id: guardian_approval_id.clone(),
turn_id: owner_call
@@ -516,24 +520,57 @@ impl NetworkApprovalService {
},
Some(policy_denial_message.clone()),
)
.await
.await;
escalated_from_auto_review = matches!(decision, ReviewDecision::Denied)
&& take_pending_auto_review_escalation(&session, &turn_context.sub_id).await;
if escalated_from_auto_review {
let available_decisions = None;
(
session
.request_command_approval(
turn_context.as_ref(),
guardian_approval_id,
/*approval_id*/ None,
prompt_command,
turn_context.cwd.clone(),
Some(prompt_reason),
Some(network_approval_context.clone()),
/*proposed_execpolicy_amendment*/ None,
/*additional_permissions*/ None,
available_decisions,
)
.await,
FinalApprovalDecisionSource::User,
)
} else {
(
decision,
FinalApprovalDecisionSource::AutoReview { review_id },
)
}
} else {
let available_decisions = None;
session
.request_command_approval(
turn_context.as_ref(),
guardian_approval_id,
/*approval_id*/ None,
prompt_command,
turn_context.cwd.clone(),
Some(prompt_reason),
Some(network_approval_context.clone()),
/*proposed_execpolicy_amendment*/ None,
/*additional_permissions*/ None,
available_decisions,
)
.await
(
session
.request_command_approval(
turn_context.as_ref(),
guardian_approval_id,
/*approval_id*/ None,
prompt_command,
turn_context.cwd.clone(),
Some(prompt_reason),
Some(network_approval_context.clone()),
/*proposed_execpolicy_amendment*/ None,
/*additional_permissions*/ None,
available_decisions,
)
.await,
FinalApprovalDecisionSource::User,
)
};
if escalated_from_auto_review {
reset_auto_review_rejection_circuit_breaker(&session, &turn_context.sub_id).await;
}
let mut cache_session_deny = false;
let resolved = match approval_decision {
@@ -614,7 +651,7 @@ impl NetworkApprovalService {
}
},
ReviewDecision::Denied | ReviewDecision::Abort => {
if let Some(review_id) = guardian_review_id.as_deref() {
if let Some(review_id) = final_source.guardian_review_id() {
if let Some(owner_call) = owner_call.as_ref() {
let message = guardian_rejection_message(session.as_ref(), review_id).await;
self.record_call_outcome(

View File

@@ -20,8 +20,11 @@ use crate::tools::network_approval::finish_deferred_network_approval;
use crate::tools::network_approval::finish_immediate_network_approval;
use crate::tools::sandboxing::ApprovalCtx;
use crate::tools::sandboxing::ExecApprovalRequirement;
use crate::tools::sandboxing::FinalApprovalDecisionSource;
use crate::tools::sandboxing::PriorApprovalDecision;
use crate::tools::sandboxing::SandboxAttempt;
use crate::tools::sandboxing::SandboxOverride;
use crate::tools::sandboxing::ToolApprovalOutcome;
use crate::tools::sandboxing::ToolCtx;
use crate::tools::sandboxing::ToolError;
use crate::tools::sandboxing::ToolRuntime;
@@ -160,7 +163,7 @@ impl ToolOrchestrator {
retry_reason: None,
network_approval_context: None,
};
let decision = Self::request_approval(
let (decision, source) = Self::request_approval(
tool,
req,
tool_ctx.call_id.as_str(),
@@ -170,7 +173,7 @@ impl ToolOrchestrator {
&otel,
)
.await?;
Self::reject_if_not_approved(tool_ctx, guardian_review_id.as_deref(), decision)
Self::reject_if_not_approved(tool_ctx, source.guardian_review_id(), decision)
.await?;
already_approved = true;
} else {
@@ -195,7 +198,7 @@ impl ToolOrchestrator {
retry_reason: reason,
network_approval_context: None,
};
let decision = Self::request_approval(
let (decision, source) = Self::request_approval(
tool,
req,
tool_ctx.call_id.as_str(),
@@ -206,7 +209,7 @@ impl ToolOrchestrator {
)
.await?;
Self::reject_if_not_approved(tool_ctx, guardian_review_id.as_deref(), decision)
Self::reject_if_not_approved(tool_ctx, source.guardian_review_id(), decision)
.await?;
already_approved = true;
}
@@ -330,7 +333,7 @@ impl ToolOrchestrator {
};
let permission_request_run_id = format!("{}:retry", tool_ctx.call_id);
let decision = Self::request_approval(
let (decision, source) = Self::request_approval(
tool,
req,
&permission_request_run_id,
@@ -341,7 +344,7 @@ impl ToolOrchestrator {
)
.await?;
Self::reject_if_not_approved(tool_ctx, guardian_review_id.as_deref(), decision)
Self::reject_if_not_approved(tool_ctx, source.guardian_review_id(), decision)
.await?;
}
@@ -390,7 +393,7 @@ impl ToolOrchestrator {
tool_ctx: &ToolCtx,
evaluate_permission_request_hooks: bool,
otel: &codex_otel::SessionTelemetry,
) -> Result<ReviewDecision, ToolError>
) -> Result<(ReviewDecision, FinalApprovalDecisionSource), ToolError>
where
T: ToolRuntime<Rq, Out>,
{
@@ -413,7 +416,7 @@ impl ToolOrchestrator {
&decision,
ToolDecisionSource::Config,
);
return Ok(decision);
return Ok((decision, FinalApprovalDecisionSource::Config));
}
Some(PermissionRequestDecision::Deny { message }) => {
let decision = ReviewDecision::Denied;
@@ -429,19 +432,34 @@ impl ToolOrchestrator {
}
}
let otel_source = if approval_ctx.guardian_review_id.is_some() {
ToolDecisionSource::AutomatedReviewer
} else {
ToolDecisionSource::User
let ToolApprovalOutcome {
decision,
source,
prior_decision,
} = tool.start_approval_async(req, approval_ctx).await;
if matches!(
prior_decision,
Some(PriorApprovalDecision::AutoReviewDenied)
) {
otel.tool_decision(
&tool_ctx.tool_name,
&tool_ctx.call_id,
&ReviewDecision::Denied,
ToolDecisionSource::AutomatedReviewer,
);
}
let otel_source = match &source {
FinalApprovalDecisionSource::AutoReview { .. } => ToolDecisionSource::AutomatedReviewer,
FinalApprovalDecisionSource::Config => ToolDecisionSource::Config,
FinalApprovalDecisionSource::User => ToolDecisionSource::User,
};
let decision = tool.start_approval_async(req, approval_ctx).await;
otel.tool_decision(
&tool_ctx.tool_name,
&tool_ctx.call_id,
&decision,
otel_source,
);
Ok(decision)
Ok((decision, source))
}
async fn reject_if_not_approved(

View File

@@ -5,7 +5,9 @@
//! sandboxing enforced by the explicit filesystem sandbox context.
use crate::exec::is_likely_sandbox_denied;
use crate::guardian::GuardianApprovalRequest;
use crate::guardian::reset_auto_review_rejection_circuit_breaker;
use crate::guardian::review_approval_request;
use crate::guardian::take_pending_auto_review_escalation;
use crate::tools::hook_names::HookToolName;
use crate::tools::sandboxing::Approvable;
use crate::tools::sandboxing::ApprovalCtx;
@@ -13,6 +15,7 @@ use crate::tools::sandboxing::ExecApprovalRequirement;
use crate::tools::sandboxing::PermissionRequestPayload;
use crate::tools::sandboxing::SandboxAttempt;
use crate::tools::sandboxing::Sandboxable;
use crate::tools::sandboxing::ToolApprovalOutcome;
use crate::tools::sandboxing::ToolCtx;
use crate::tools::sandboxing::ToolError;
use crate::tools::sandboxing::ToolRuntime;
@@ -125,22 +128,38 @@ impl Approvable<ApplyPatchRequest> for ApplyPatchRuntime {
&'a mut self,
req: &'a ApplyPatchRequest,
ctx: ApprovalCtx<'a>,
) -> BoxFuture<'a, ReviewDecision> {
) -> BoxFuture<'a, ToolApprovalOutcome> {
let session = ctx.session;
let turn = ctx.turn;
let call_id = ctx.call_id.to_string();
let retry_reason = ctx.retry_reason.clone();
let approval_keys = self.approval_keys(req);
let mut approval_keys = self.approval_keys(req);
let changes = req.changes.clone();
let guardian_review_id = ctx.guardian_review_id.clone();
Box::pin(async move {
if let Some(review_id) = guardian_review_id {
let escalated_from_auto_review = if let Some(review_id) = guardian_review_id {
let action = ApplyPatchRuntime::build_guardian_review_request(req, ctx.call_id);
return review_approval_request(session, turn, review_id, action, retry_reason)
.await;
}
if req.permissions_preapproved && retry_reason.is_none() {
return ReviewDecision::Approved;
let decision = review_approval_request(
session,
turn,
review_id.clone(),
action,
retry_reason.clone(),
)
.await;
let escalated_from_auto_review = matches!(decision, ReviewDecision::Denied)
&& take_pending_auto_review_escalation(session, &turn.sub_id).await;
if !escalated_from_auto_review {
return ToolApprovalOutcome::from_auto_review(decision, review_id);
}
approval_keys.clear();
true
} else {
false
};
if req.permissions_preapproved && retry_reason.is_none() && !escalated_from_auto_review
{
return ToolApprovalOutcome::from_user(ReviewDecision::Approved);
}
if let Some(reason) = retry_reason {
let rx_approve = session
@@ -152,10 +171,16 @@ impl Approvable<ApplyPatchRequest> for ApplyPatchRuntime {
/*grant_root*/ None,
)
.await;
return rx_approve.await.unwrap_or_default();
let decision = rx_approve.await.unwrap_or_default();
return if escalated_from_auto_review {
reset_auto_review_rejection_circuit_breaker(session, &turn.sub_id).await;
ToolApprovalOutcome::from_user_after_auto_review_denial(decision)
} else {
ToolApprovalOutcome::from_user(decision)
};
}
with_cached_approval(
let decision = with_cached_approval(
&session.services,
"apply_patch",
approval_keys,
@@ -168,7 +193,13 @@ impl Approvable<ApplyPatchRequest> for ApplyPatchRuntime {
rx_approve.await.unwrap_or_default()
},
)
.await
.await;
if escalated_from_auto_review {
reset_auto_review_rejection_circuit_breaker(session, &turn.sub_id).await;
ToolApprovalOutcome::from_user_after_auto_review_denial(decision)
} else {
ToolApprovalOutcome::from_user(decision)
}
})
}

View File

@@ -12,7 +12,9 @@ use crate::command_canonicalization::canonicalize_command_for_approval;
use crate::exec::ExecCapturePolicy;
use crate::guardian::GuardianApprovalRequest;
use crate::guardian::GuardianNetworkAccessTrigger;
use crate::guardian::reset_auto_review_rejection_circuit_breaker;
use crate::guardian::review_approval_request;
use crate::guardian::take_pending_auto_review_escalation;
use crate::sandboxing::ExecOptions;
use crate::sandboxing::SandboxPermissions;
use crate::sandboxing::execute_env;
@@ -29,6 +31,7 @@ use crate::tools::sandboxing::PermissionRequestPayload;
use crate::tools::sandboxing::SandboxAttempt;
use crate::tools::sandboxing::SandboxOverride;
use crate::tools::sandboxing::Sandboxable;
use crate::tools::sandboxing::ToolApprovalOutcome;
use crate::tools::sandboxing::ToolCtx;
use crate::tools::sandboxing::ToolError;
use crate::tools::sandboxing::ToolRuntime;
@@ -147,8 +150,8 @@ impl Approvable<ShellRequest> for ShellRuntime {
&'a mut self,
req: &'a ShellRequest,
ctx: ApprovalCtx<'a>,
) -> BoxFuture<'a, ReviewDecision> {
let keys = self.approval_keys(req);
) -> BoxFuture<'a, ToolApprovalOutcome> {
let mut keys = self.approval_keys(req);
let command = req.command.clone();
let cwd = req.cwd.clone();
let retry_reason = ctx.retry_reason.clone();
@@ -158,43 +161,59 @@ impl Approvable<ShellRequest> for ShellRuntime {
let call_id = ctx.call_id.to_string();
let guardian_review_id = ctx.guardian_review_id.clone();
Box::pin(async move {
if let Some(review_id) = guardian_review_id {
return review_approval_request(
let escalated_from_auto_review = if let Some(review_id) = guardian_review_id {
let decision = review_approval_request(
session,
turn,
review_id,
review_id.clone(),
GuardianApprovalRequest::Shell {
id: call_id,
command,
id: call_id.clone(),
command: command.clone(),
cwd: cwd.clone(),
sandbox_permissions: req.sandbox_permissions,
additional_permissions: req.additional_permissions.clone(),
justification: req.justification.clone(),
},
retry_reason,
retry_reason.clone(),
)
.await;
let escalated_from_auto_review = matches!(decision, ReviewDecision::Denied)
&& take_pending_auto_review_escalation(session, &turn.sub_id).await;
if !escalated_from_auto_review {
return ToolApprovalOutcome::from_auto_review(decision, review_id);
}
keys.clear();
true
} else {
false
};
let decision =
with_cached_approval(&session.services, "shell", keys, move || async move {
let available_decisions = None;
session
.request_command_approval(
turn,
call_id,
/*approval_id*/ None,
command,
cwd,
reason,
ctx.network_approval_context.clone(),
req.exec_approval_requirement
.proposed_execpolicy_amendment()
.cloned(),
req.additional_permissions.clone(),
available_decisions,
)
.await
})
.await;
if escalated_from_auto_review {
reset_auto_review_rejection_circuit_breaker(session, &turn.sub_id).await;
ToolApprovalOutcome::from_user_after_auto_review_denial(decision)
} else {
ToolApprovalOutcome::from_user(decision)
}
with_cached_approval(&session.services, "shell", keys, move || async move {
let available_decisions = None;
session
.request_command_approval(
turn,
call_id,
/*approval_id*/ None,
command,
cwd,
reason,
ctx.network_approval_context.clone(),
req.exec_approval_requirement
.proposed_execpolicy_amendment()
.cloned(),
req.additional_permissions.clone(),
available_decisions,
)
.await
})
.await
})
}

View File

@@ -7,8 +7,10 @@ use crate::guardian::GuardianApprovalRequest;
use crate::guardian::guardian_rejection_message;
use crate::guardian::guardian_timeout_message;
use crate::guardian::new_guardian_review_id;
use crate::guardian::reset_auto_review_rejection_circuit_breaker;
use crate::guardian::review_approval_request;
use crate::guardian::routes_approval_to_guardian;
use crate::guardian::take_pending_auto_review_escalation;
use crate::hook_runtime::run_permission_request_hooks;
use crate::sandboxing::ExecOptions;
use crate::sandboxing::ExecRequest;
@@ -16,6 +18,7 @@ use crate::sandboxing::SandboxPermissions;
use crate::shell::ShellType;
use crate::tools::runtimes::build_sandbox_command;
use crate::tools::runtimes::exec_env_for_sandbox_permissions;
use crate::tools::sandboxing::FinalApprovalDecisionSource;
use crate::tools::sandboxing::PermissionRequestPayload;
use crate::tools::sandboxing::SandboxAttempt;
use crate::tools::sandboxing::ToolCtx;
@@ -332,7 +335,7 @@ enum DecisionSource {
struct PromptDecision {
decision: ReviewDecision,
guardian_review_id: Option<String>,
source: FinalApprovalDecisionSource,
rejection_message: Option<String>,
}
@@ -423,14 +426,14 @@ impl CoreShellActionProvider {
Some(PermissionRequestDecision::Allow) => {
return PromptDecision {
decision: ReviewDecision::Approved,
guardian_review_id: None,
source: FinalApprovalDecisionSource::Config,
rejection_message: None,
};
}
Some(PermissionRequestDecision::Deny { message }) => {
return PromptDecision {
decision: ReviewDecision::Denied,
guardian_review_id: None,
source: FinalApprovalDecisionSource::Config,
rejection_message: Some(message),
};
}
@@ -449,16 +452,20 @@ impl CoreShellActionProvider {
program: program.to_string_lossy().into_owned(),
argv: argv.to_vec(),
cwd: workdir.clone(),
additional_permissions,
additional_permissions: additional_permissions.clone(),
},
/*retry_reason*/ None,
)
.await;
return PromptDecision {
decision,
guardian_review_id,
rejection_message: None,
};
let escalated_from_auto_review = matches!(decision, ReviewDecision::Denied)
&& take_pending_auto_review_escalation(&session, &turn.sub_id).await;
if !escalated_from_auto_review {
return PromptDecision {
decision,
source: FinalApprovalDecisionSource::AutoReview { review_id },
rejection_message: None,
};
}
}
// 3) Fall back to regular user prompt
@@ -476,9 +483,12 @@ impl CoreShellActionProvider {
Some(vec![ReviewDecision::Approved, ReviewDecision::Abort]),
)
.await;
if guardian_review_id.is_some() {
reset_auto_review_rejection_circuit_breaker(&session, &turn.sub_id).await;
}
PromptDecision {
decision,
guardian_review_id: None,
source: FinalApprovalDecisionSource::User,
rejection_message: None,
}
})
@@ -540,7 +550,7 @@ impl CoreShellActionProvider {
{
message
} else if let Some(review_id) =
prompt_decision.guardian_review_id.as_deref()
prompt_decision.source.guardian_review_id()
{
guardian_rejection_message(self.session.as_ref(), review_id).await
} else {

View File

@@ -9,7 +9,9 @@ use crate::exec::ExecCapturePolicy;
use crate::exec::ExecExpiration;
use crate::guardian::GuardianApprovalRequest;
use crate::guardian::GuardianNetworkAccessTrigger;
use crate::guardian::reset_auto_review_rejection_circuit_breaker;
use crate::guardian::review_approval_request;
use crate::guardian::take_pending_auto_review_escalation;
use crate::sandboxing::ExecOptions;
use crate::sandboxing::ExecServerEnvConfig;
use crate::sandboxing::SandboxPermissions;
@@ -27,6 +29,7 @@ use crate::tools::sandboxing::PermissionRequestPayload;
use crate::tools::sandboxing::SandboxAttempt;
use crate::tools::sandboxing::SandboxOverride;
use crate::tools::sandboxing::Sandboxable;
use crate::tools::sandboxing::ToolApprovalOutcome;
use crate::tools::sandboxing::ToolCtx;
use crate::tools::sandboxing::ToolError;
use crate::tools::sandboxing::ToolRuntime;
@@ -139,8 +142,8 @@ impl Approvable<UnifiedExecRequest> for UnifiedExecRuntime<'_> {
&'b mut self,
req: &'b UnifiedExecRequest,
ctx: ApprovalCtx<'b>,
) -> BoxFuture<'b, ReviewDecision> {
let keys = self.approval_keys(req);
) -> BoxFuture<'b, ToolApprovalOutcome> {
let mut keys = self.approval_keys(req);
let session = ctx.session;
let turn = ctx.turn;
let call_id = ctx.call_id.to_string();
@@ -150,44 +153,60 @@ impl Approvable<UnifiedExecRequest> for UnifiedExecRuntime<'_> {
let reason = retry_reason.clone().or_else(|| req.justification.clone());
let guardian_review_id = ctx.guardian_review_id.clone();
Box::pin(async move {
if let Some(review_id) = guardian_review_id {
return review_approval_request(
let escalated_from_auto_review = if let Some(review_id) = guardian_review_id {
let decision = review_approval_request(
session,
turn,
review_id,
review_id.clone(),
GuardianApprovalRequest::ExecCommand {
id: call_id,
command,
id: call_id.clone(),
command: command.clone(),
cwd: cwd.clone(),
sandbox_permissions: req.sandbox_permissions,
additional_permissions: req.additional_permissions.clone(),
justification: req.justification.clone(),
tty: req.tty,
},
retry_reason,
retry_reason.clone(),
)
.await;
let escalated_from_auto_review = matches!(decision, ReviewDecision::Denied)
&& take_pending_auto_review_escalation(session, &turn.sub_id).await;
if !escalated_from_auto_review {
return ToolApprovalOutcome::from_auto_review(decision, review_id);
}
keys.clear();
true
} else {
false
};
let decision =
with_cached_approval(&session.services, "unified_exec", keys, || async move {
let available_decisions = None;
session
.request_command_approval(
turn,
call_id,
/*approval_id*/ None,
command,
cwd.clone(),
reason,
ctx.network_approval_context.clone(),
req.exec_approval_requirement
.proposed_execpolicy_amendment()
.cloned(),
req.additional_permissions.clone(),
available_decisions,
)
.await
})
.await;
if escalated_from_auto_review {
reset_auto_review_rejection_circuit_breaker(session, &turn.sub_id).await;
ToolApprovalOutcome::from_user_after_auto_review_denial(decision)
} else {
ToolApprovalOutcome::from_user(decision)
}
with_cached_approval(&session.services, "unified_exec", keys, || async move {
let available_decisions = None;
session
.request_command_approval(
turn,
call_id,
/*approval_id*/ None,
command,
cwd.clone(),
reason,
ctx.network_approval_context.clone(),
req.exec_approval_requirement
.proposed_execpolicy_amendment()
.cloned(),
req.additional_permissions.clone(),
available_decisions,
)
.await
})
.await
})
}

View File

@@ -132,6 +132,60 @@ pub(crate) struct ApprovalCtx<'a> {
pub network_approval_context: Option<NetworkApprovalContext>,
}
#[derive(Debug)]
pub(crate) enum FinalApprovalDecisionSource {
AutoReview { review_id: String },
Config,
User,
}
#[derive(Debug)]
pub(crate) enum PriorApprovalDecision {
AutoReviewDenied,
}
#[derive(Debug)]
pub(crate) struct ToolApprovalOutcome {
pub(crate) decision: ReviewDecision,
pub(crate) source: FinalApprovalDecisionSource,
pub(crate) prior_decision: Option<PriorApprovalDecision>,
}
impl FinalApprovalDecisionSource {
pub(crate) fn guardian_review_id(&self) -> Option<&str> {
match self {
Self::AutoReview { review_id } => Some(review_id),
Self::Config | Self::User => None,
}
}
}
impl ToolApprovalOutcome {
pub(crate) fn from_auto_review(decision: ReviewDecision, review_id: String) -> Self {
Self {
decision,
source: FinalApprovalDecisionSource::AutoReview { review_id },
prior_decision: None,
}
}
pub(crate) fn from_user(decision: ReviewDecision) -> Self {
Self {
decision,
source: FinalApprovalDecisionSource::User,
prior_decision: None,
}
}
pub(crate) fn from_user_after_auto_review_denial(decision: ReviewDecision) -> Self {
Self {
decision,
source: FinalApprovalDecisionSource::User,
prior_decision: Some(PriorApprovalDecision::AutoReviewDenied),
}
}
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub(crate) struct PermissionRequestPayload {
pub tool_name: HookToolName,
@@ -330,7 +384,7 @@ pub(crate) trait Approvable<Req> {
&'a mut self,
req: &'a Req,
ctx: ApprovalCtx<'a>,
) -> BoxFuture<'a, ReviewDecision>;
) -> BoxFuture<'a, ToolApprovalOutcome>;
}
pub(crate) trait Sandboxable {

View File

@@ -1,3 +1,4 @@
use codex_config::types::ApprovalsReviewer;
use codex_core::config::Constrained;
use codex_features::Feature;
use codex_protocol::models::PermissionProfile;
@@ -20,6 +21,7 @@ use core_test_support::responses::ev_reasoning_text_delta;
use core_test_support::responses::ev_response_created;
use core_test_support::responses::mount_response_once;
use core_test_support::responses::mount_sse_once;
use core_test_support::responses::mount_sse_sequence;
use core_test_support::responses::sse;
use core_test_support::responses::sse_response;
use core_test_support::responses::start_mock_server;
@@ -1166,6 +1168,25 @@ fn tool_decision_assertion<'a>(
}
}
fn auto_review_denial(response_id: &str) -> String {
sse(vec![
ev_response_created(response_id),
ev_assistant_message(
&format!("{response_id}-message"),
"{\"risk_level\":\"low\",\"user_authorization\":\"unknown\",\"outcome\":\"deny\",\"rationale\":\"guardian test denial\"}",
),
ev_completed(response_id),
])
}
fn escalated_exec_command_call(call_id: &str) -> serde_json::Value {
ev_function_call(
call_id,
"exec_command",
r#"{"cmd":"/usr/bin/touch codex-otel-escalation-test","sandbox_permissions":"require_escalated","justification":"Exercise Auto-review escalation telemetry."}"#,
)
}
#[tokio::test]
#[traced_test]
async fn handle_container_exec_autoapprove_from_config_records_tool_decision() {
@@ -1298,6 +1319,102 @@ async fn handle_container_exec_user_approved_records_tool_decision() {
));
}
#[tokio::test]
#[traced_test]
async fn escalated_auto_review_denial_records_auto_review_then_user_tool_decisions() {
let server = start_mock_server().await;
mount_sse_sequence(
&server,
vec![
sse(vec![
ev_response_created("resp-main-1"),
escalated_exec_command_call("auto_review_call_1"),
ev_completed("resp-main-1"),
]),
auto_review_denial("resp-guardian-1"),
sse(vec![
ev_response_created("resp-main-2"),
escalated_exec_command_call("auto_review_call_2"),
ev_completed("resp-main-2"),
]),
auto_review_denial("resp-guardian-2"),
sse(vec![
ev_response_created("resp-main-3"),
escalated_exec_command_call("auto_review_call_3"),
ev_completed("resp-main-3"),
]),
auto_review_denial("resp-guardian-3"),
sse(vec![
ev_response_created("resp-main-4"),
ev_assistant_message("msg-main-4", "done"),
ev_completed("resp-main-4"),
]),
],
)
.await;
let TestCodex { codex, .. } = test_codex()
.with_config(|config| {
config.permissions.approval_policy = Constrained::allow_any(AskForApproval::OnRequest);
config.approvals_reviewer = ApprovalsReviewer::AutoReview;
})
.build(&server)
.await
.unwrap();
codex
.submit(Op::UserInput {
environments: None,
items: vec![UserInput::Text {
text: "retry until Auto-review escalates".into(),
text_elements: Vec::new(),
}],
final_output_json_schema: None,
responsesapi_client_metadata: None,
})
.await
.unwrap();
let approval_event =
wait_for_event(&codex, |ev| matches!(ev, EventMsg::ExecApprovalRequest(_))).await;
let EventMsg::ExecApprovalRequest(approval) = approval_event else {
panic!("expected ExecApprovalRequest event");
};
assert_eq!(approval.call_id, "auto_review_call_3");
codex
.submit(Op::ExecApproval {
id: approval.effective_approval_id(),
turn_id: None,
decision: ReviewDecision::Approved,
})
.await
.unwrap();
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;
logs_assert(|lines: &[&str]| {
let decisions = lines
.iter()
.filter(|line| {
line.contains("codex.tool_decision") && line.contains("call_id=auto_review_call_3")
})
.collect::<Vec<_>>();
if decisions.len() != 2 {
return Err(format!("expected 2 tool decisions, got {decisions:?}"));
}
let first = decisions[0].to_lowercase();
if !first.contains("decision=denied") || !first.contains("source=automatedreviewer") {
return Err(format!("unexpected first tool decision: {}", decisions[0]));
}
let second = decisions[1].to_lowercase();
if !second.contains("decision=approved") || !second.contains("source=user") {
return Err(format!("unexpected second tool decision: {}", decisions[1]));
}
Ok(())
});
}
#[tokio::test]
#[traced_test]
async fn handle_container_exec_user_approved_for_session_records_tool_decision() {