From 0dbad2a34816ce9aa6cb6872ad828918fad68a7a Mon Sep 17 00:00:00 2001 From: Won Park Date: Mon, 11 May 2026 11:03:11 -0700 Subject: [PATCH] Make auto-review denial short-circuit use a rolling review window (#22110) ## Why Long-running turns can accumulate enough denied auto-review decisions to trip the global short-circuit even when those denials are spread far apart. The breaker should still stop genuinely bad loops, but it should judge recent behavior instead of lifetime turn history. ## What changed - Replaced the lifetime `10 total denials` threshold with `10 denials in the last 50 reviews`. - Kept the existing `3 consecutive denials` interrupt behavior unchanged. - Tracked recent auto-review outcomes in the circuit breaker and updated the warning copy to report the rolling-window count. - Renamed the new rolling-window coverage to `auto_review_*` test names. - Added coverage that confirms older denials fall out of the 50-review window and no longer trigger the breaker. ## Validation - `just fmt` - `cargo test -p codex-core guardian_rejection_circuit_breaker --lib` - `cargo test -p codex-core auto_review_rejection_circuit_breaker --lib` --- codex-rs/core/src/guardian/mod.rs | 22 ++++++++++++++++------ codex-rs/core/src/guardian/review.rs | 5 +++-- codex-rs/core/src/guardian/tests.rs | 27 +++++++++++++++++++++++---- 3 files changed, 42 insertions(+), 12 deletions(-) diff --git a/codex-rs/core/src/guardian/mod.rs b/codex-rs/core/src/guardian/mod.rs index 256a616b97..ba116ec0e8 100644 --- a/codex-rs/core/src/guardian/mod.rs +++ b/codex-rs/core/src/guardian/mod.rs @@ -44,7 +44,8 @@ const GUARDIAN_PREFERRED_MODEL: &str = "codex-auto-review"; pub(crate) const GUARDIAN_REVIEW_TIMEOUT: Duration = Duration::from_secs(90); pub(crate) const GUARDIAN_REVIEWER_NAME: &str = "guardian"; pub(crate) const MAX_CONSECUTIVE_GUARDIAN_DENIALS_PER_TURN: u32 = 3; -pub(crate) const MAX_TOTAL_GUARDIAN_DENIALS_PER_TURN: u32 = 10; +pub(crate) const MAX_RECENT_AUTO_REVIEW_DENIALS_PER_TURN: u32 = 10; +pub(crate) const AUTO_REVIEW_DENIAL_WINDOW_SIZE: usize = 50; pub(crate) const AUTO_REVIEW_DENIED_ACTION_APPROVAL_DEVELOPER_PREFIX: &str = "The user has manually approved a specific action that was previously `Rejected`."; const GUARDIAN_MAX_MESSAGE_TRANSCRIPT_TOKENS: usize = 10_000; @@ -78,7 +79,7 @@ pub(crate) struct GuardianRejectionCircuitBreaker { #[derive(Debug, Default)] struct GuardianRejectionCircuitBreakerTurn { consecutive_denials: u32, - total_denials: u32, + recent_denials: std::collections::VecDeque, interrupt_triggered: bool, } @@ -87,7 +88,7 @@ pub(crate) enum GuardianRejectionCircuitBreakerAction { Continue, InterruptTurn { consecutive_denials: u32, - total_denials: u32, + recent_denials: u32, }, } @@ -99,15 +100,16 @@ impl GuardianRejectionCircuitBreaker { pub(crate) fn record_denial(&mut self, turn_id: &str) -> GuardianRejectionCircuitBreakerAction { let turn = self.turns.entry(turn_id.to_string()).or_default(); turn.consecutive_denials = turn.consecutive_denials.saturating_add(1); - turn.total_denials = turn.total_denials.saturating_add(1); + Self::record_recent_review(turn, /*denied*/ true); + let recent_denials = turn.recent_denials.iter().filter(|denied| **denied).count() as u32; if !turn.interrupt_triggered && (turn.consecutive_denials >= MAX_CONSECUTIVE_GUARDIAN_DENIALS_PER_TURN - || turn.total_denials >= MAX_TOTAL_GUARDIAN_DENIALS_PER_TURN) + || recent_denials >= MAX_RECENT_AUTO_REVIEW_DENIALS_PER_TURN) { turn.interrupt_triggered = true; GuardianRejectionCircuitBreakerAction::InterruptTurn { consecutive_denials: turn.consecutive_denials, - total_denials: turn.total_denials, + recent_denials, } } else { GuardianRejectionCircuitBreakerAction::Continue @@ -117,6 +119,14 @@ impl GuardianRejectionCircuitBreaker { pub(crate) fn record_non_denial(&mut self, turn_id: &str) { let turn = self.turns.entry(turn_id.to_string()).or_default(); turn.consecutive_denials = 0; + Self::record_recent_review(turn, /*denied*/ false); + } + + fn record_recent_review(turn: &mut GuardianRejectionCircuitBreakerTurn, denied: bool) { + turn.recent_denials.push_back(denied); + if turn.recent_denials.len() > AUTO_REVIEW_DENIAL_WINDOW_SIZE { + turn.recent_denials.pop_front(); + } } } diff --git a/codex-rs/core/src/guardian/review.rs b/codex-rs/core/src/guardian/review.rs index bba2167cef..4b3c0caefc 100644 --- a/codex-rs/core/src/guardian/review.rs +++ b/codex-rs/core/src/guardian/review.rs @@ -25,6 +25,7 @@ use crate::session::session::Session; use crate::session::turn_context::TurnContext; use crate::turn_timing::now_unix_timestamp_ms; +use super::AUTO_REVIEW_DENIAL_WINDOW_SIZE; use super::GUARDIAN_REVIEW_TIMEOUT; use super::GUARDIAN_REVIEWER_NAME; use super::GuardianApprovalRequest; @@ -188,7 +189,7 @@ async fn record_guardian_denial(session: &Arc, turn: &Arc, .record_denial(turn_id); let GuardianRejectionCircuitBreakerAction::InterruptTurn { consecutive_denials, - total_denials, + recent_denials, } = action else { return; @@ -203,7 +204,7 @@ async fn record_guardian_denial(session: &Arc, turn: &Arc, turn.as_ref(), EventMsg::GuardianWarning(WarningEvent { message: format!( - "Automatic approval review rejected too many approval requests for this turn ({consecutive_denials} consecutive, {total_denials} total); interrupting the turn." + "Automatic approval review rejected too many approval requests for this turn ({consecutive_denials} consecutive, {recent_denials} in the last {AUTO_REVIEW_DENIAL_WINDOW_SIZE} reviews); interrupting the turn." ), }), ) diff --git a/codex-rs/core/src/guardian/tests.rs b/codex-rs/core/src/guardian/tests.rs index 6d03dfa813..e647796cbf 100644 --- a/codex-rs/core/src/guardian/tests.rs +++ b/codex-rs/core/src/guardian/tests.rs @@ -86,7 +86,7 @@ fn guardian_rejection_circuit_breaker_interrupts_after_three_consecutive_denials circuit_breaker.record_denial("turn-1"), GuardianRejectionCircuitBreakerAction::InterruptTurn { consecutive_denials: 3, - total_denials: 3, + recent_denials: 3, } ); assert_eq!( @@ -115,13 +115,13 @@ fn guardian_rejection_circuit_breaker_resets_consecutive_denials_on_non_denial() circuit_breaker.record_denial("turn-1"), GuardianRejectionCircuitBreakerAction::InterruptTurn { consecutive_denials: 3, - total_denials: 4, + recent_denials: 4, } ); } #[test] -fn guardian_rejection_circuit_breaker_interrupts_after_ten_total_denials() { +fn auto_review_rejection_circuit_breaker_interrupts_after_ten_recent_denials() { let mut circuit_breaker = GuardianRejectionCircuitBreaker::default(); for _ in 0..9 { assert_eq!( @@ -134,11 +134,30 @@ fn guardian_rejection_circuit_breaker_interrupts_after_ten_total_denials() { circuit_breaker.record_denial("turn-1"), GuardianRejectionCircuitBreakerAction::InterruptTurn { consecutive_denials: 1, - total_denials: 10, + recent_denials: 10, } ); } +#[test] +fn auto_review_rejection_circuit_breaker_forgets_denials_outside_recent_review_window() { + let mut circuit_breaker = GuardianRejectionCircuitBreaker::default(); + for _ in 0..9 { + assert_eq!( + circuit_breaker.record_denial("turn-1"), + GuardianRejectionCircuitBreakerAction::Continue + ); + circuit_breaker.record_non_denial("turn-1"); + } + for _ in 0..(AUTO_REVIEW_DENIAL_WINDOW_SIZE - 18) { + circuit_breaker.record_non_denial("turn-1"); + } + assert_eq!( + circuit_breaker.record_denial("turn-1"), + GuardianRejectionCircuitBreakerAction::Continue + ); +} + async fn guardian_test_session_and_turn( server: &wiremock::MockServer, ) -> (Arc, Arc) {