Make auto-review denial short-circuit use a rolling review window (#22110)

## Why

Long-running turns can accumulate enough denied auto-review decisions to
trip the global short-circuit even when those denials are spread far
apart. The breaker should still stop genuinely bad loops, but it should
judge recent behavior instead of lifetime turn history.

## What changed

- Replaced the lifetime `10 total denials` threshold with `10 denials in
the last 50 reviews`.
- Kept the existing `3 consecutive denials` interrupt behavior
unchanged.
- Tracked recent auto-review outcomes in the circuit breaker and updated
the warning copy to report the rolling-window count.
- Renamed the new rolling-window coverage to `auto_review_*` test names.
- Added coverage that confirms older denials fall out of the 50-review
window and no longer trigger the breaker.

## Validation

- `just fmt`
- `cargo test -p codex-core guardian_rejection_circuit_breaker --lib`
- `cargo test -p codex-core auto_review_rejection_circuit_breaker --lib`
This commit is contained in:
Won Park
2026-05-11 11:03:11 -07:00
committed by GitHub
parent 1e65b3e0af
commit 0dbad2a348
3 changed files with 42 additions and 12 deletions

View File

@@ -44,7 +44,8 @@ const GUARDIAN_PREFERRED_MODEL: &str = "codex-auto-review";
pub(crate) const GUARDIAN_REVIEW_TIMEOUT: Duration = Duration::from_secs(90);
pub(crate) const GUARDIAN_REVIEWER_NAME: &str = "guardian";
pub(crate) const MAX_CONSECUTIVE_GUARDIAN_DENIALS_PER_TURN: u32 = 3;
pub(crate) const MAX_TOTAL_GUARDIAN_DENIALS_PER_TURN: u32 = 10;
pub(crate) const MAX_RECENT_AUTO_REVIEW_DENIALS_PER_TURN: u32 = 10;
pub(crate) const AUTO_REVIEW_DENIAL_WINDOW_SIZE: usize = 50;
pub(crate) const AUTO_REVIEW_DENIED_ACTION_APPROVAL_DEVELOPER_PREFIX: &str =
"The user has manually approved a specific action that was previously `Rejected`.";
const GUARDIAN_MAX_MESSAGE_TRANSCRIPT_TOKENS: usize = 10_000;
@@ -78,7 +79,7 @@ pub(crate) struct GuardianRejectionCircuitBreaker {
#[derive(Debug, Default)]
struct GuardianRejectionCircuitBreakerTurn {
consecutive_denials: u32,
total_denials: u32,
recent_denials: std::collections::VecDeque<bool>,
interrupt_triggered: bool,
}
@@ -87,7 +88,7 @@ pub(crate) enum GuardianRejectionCircuitBreakerAction {
Continue,
InterruptTurn {
consecutive_denials: u32,
total_denials: u32,
recent_denials: u32,
},
}
@@ -99,15 +100,16 @@ impl GuardianRejectionCircuitBreaker {
pub(crate) fn record_denial(&mut self, turn_id: &str) -> GuardianRejectionCircuitBreakerAction {
let turn = self.turns.entry(turn_id.to_string()).or_default();
turn.consecutive_denials = turn.consecutive_denials.saturating_add(1);
turn.total_denials = turn.total_denials.saturating_add(1);
Self::record_recent_review(turn, /*denied*/ true);
let recent_denials = turn.recent_denials.iter().filter(|denied| **denied).count() as u32;
if !turn.interrupt_triggered
&& (turn.consecutive_denials >= MAX_CONSECUTIVE_GUARDIAN_DENIALS_PER_TURN
|| turn.total_denials >= MAX_TOTAL_GUARDIAN_DENIALS_PER_TURN)
|| recent_denials >= MAX_RECENT_AUTO_REVIEW_DENIALS_PER_TURN)
{
turn.interrupt_triggered = true;
GuardianRejectionCircuitBreakerAction::InterruptTurn {
consecutive_denials: turn.consecutive_denials,
total_denials: turn.total_denials,
recent_denials,
}
} else {
GuardianRejectionCircuitBreakerAction::Continue
@@ -117,6 +119,14 @@ impl GuardianRejectionCircuitBreaker {
pub(crate) fn record_non_denial(&mut self, turn_id: &str) {
let turn = self.turns.entry(turn_id.to_string()).or_default();
turn.consecutive_denials = 0;
Self::record_recent_review(turn, /*denied*/ false);
}
fn record_recent_review(turn: &mut GuardianRejectionCircuitBreakerTurn, denied: bool) {
turn.recent_denials.push_back(denied);
if turn.recent_denials.len() > AUTO_REVIEW_DENIAL_WINDOW_SIZE {
turn.recent_denials.pop_front();
}
}
}

View File

@@ -25,6 +25,7 @@ use crate::session::session::Session;
use crate::session::turn_context::TurnContext;
use crate::turn_timing::now_unix_timestamp_ms;
use super::AUTO_REVIEW_DENIAL_WINDOW_SIZE;
use super::GUARDIAN_REVIEW_TIMEOUT;
use super::GUARDIAN_REVIEWER_NAME;
use super::GuardianApprovalRequest;
@@ -188,7 +189,7 @@ async fn record_guardian_denial(session: &Arc<Session>, turn: &Arc<TurnContext>,
.record_denial(turn_id);
let GuardianRejectionCircuitBreakerAction::InterruptTurn {
consecutive_denials,
total_denials,
recent_denials,
} = action
else {
return;
@@ -203,7 +204,7 @@ async fn record_guardian_denial(session: &Arc<Session>, turn: &Arc<TurnContext>,
turn.as_ref(),
EventMsg::GuardianWarning(WarningEvent {
message: format!(
"Automatic approval review rejected too many approval requests for this turn ({consecutive_denials} consecutive, {total_denials} total); interrupting the turn."
"Automatic approval review rejected too many approval requests for this turn ({consecutive_denials} consecutive, {recent_denials} in the last {AUTO_REVIEW_DENIAL_WINDOW_SIZE} reviews); interrupting the turn."
),
}),
)

View File

@@ -86,7 +86,7 @@ fn guardian_rejection_circuit_breaker_interrupts_after_three_consecutive_denials
circuit_breaker.record_denial("turn-1"),
GuardianRejectionCircuitBreakerAction::InterruptTurn {
consecutive_denials: 3,
total_denials: 3,
recent_denials: 3,
}
);
assert_eq!(
@@ -115,13 +115,13 @@ fn guardian_rejection_circuit_breaker_resets_consecutive_denials_on_non_denial()
circuit_breaker.record_denial("turn-1"),
GuardianRejectionCircuitBreakerAction::InterruptTurn {
consecutive_denials: 3,
total_denials: 4,
recent_denials: 4,
}
);
}
#[test]
fn guardian_rejection_circuit_breaker_interrupts_after_ten_total_denials() {
fn auto_review_rejection_circuit_breaker_interrupts_after_ten_recent_denials() {
let mut circuit_breaker = GuardianRejectionCircuitBreaker::default();
for _ in 0..9 {
assert_eq!(
@@ -134,11 +134,30 @@ fn guardian_rejection_circuit_breaker_interrupts_after_ten_total_denials() {
circuit_breaker.record_denial("turn-1"),
GuardianRejectionCircuitBreakerAction::InterruptTurn {
consecutive_denials: 1,
total_denials: 10,
recent_denials: 10,
}
);
}
#[test]
fn auto_review_rejection_circuit_breaker_forgets_denials_outside_recent_review_window() {
let mut circuit_breaker = GuardianRejectionCircuitBreaker::default();
for _ in 0..9 {
assert_eq!(
circuit_breaker.record_denial("turn-1"),
GuardianRejectionCircuitBreakerAction::Continue
);
circuit_breaker.record_non_denial("turn-1");
}
for _ in 0..(AUTO_REVIEW_DENIAL_WINDOW_SIZE - 18) {
circuit_breaker.record_non_denial("turn-1");
}
assert_eq!(
circuit_breaker.record_denial("turn-1"),
GuardianRejectionCircuitBreakerAction::Continue
);
}
async fn guardian_test_session_and_turn(
server: &wiremock::MockServer,
) -> (Arc<Session>, Arc<TurnContext>) {