mirror of
https://github.com/openai/codex.git
synced 2026-06-01 19:02:59 +00:00
Make auto-review denial short-circuit use a rolling review window (#22110)
## Why Long-running turns can accumulate enough denied auto-review decisions to trip the global short-circuit even when those denials are spread far apart. The breaker should still stop genuinely bad loops, but it should judge recent behavior instead of lifetime turn history. ## What changed - Replaced the lifetime `10 total denials` threshold with `10 denials in the last 50 reviews`. - Kept the existing `3 consecutive denials` interrupt behavior unchanged. - Tracked recent auto-review outcomes in the circuit breaker and updated the warning copy to report the rolling-window count. - Renamed the new rolling-window coverage to `auto_review_*` test names. - Added coverage that confirms older denials fall out of the 50-review window and no longer trigger the breaker. ## Validation - `just fmt` - `cargo test -p codex-core guardian_rejection_circuit_breaker --lib` - `cargo test -p codex-core auto_review_rejection_circuit_breaker --lib`
This commit is contained in:
@@ -44,7 +44,8 @@ const GUARDIAN_PREFERRED_MODEL: &str = "codex-auto-review";
|
||||
pub(crate) const GUARDIAN_REVIEW_TIMEOUT: Duration = Duration::from_secs(90);
|
||||
pub(crate) const GUARDIAN_REVIEWER_NAME: &str = "guardian";
|
||||
pub(crate) const MAX_CONSECUTIVE_GUARDIAN_DENIALS_PER_TURN: u32 = 3;
|
||||
pub(crate) const MAX_TOTAL_GUARDIAN_DENIALS_PER_TURN: u32 = 10;
|
||||
pub(crate) const MAX_RECENT_AUTO_REVIEW_DENIALS_PER_TURN: u32 = 10;
|
||||
pub(crate) const AUTO_REVIEW_DENIAL_WINDOW_SIZE: usize = 50;
|
||||
pub(crate) const AUTO_REVIEW_DENIED_ACTION_APPROVAL_DEVELOPER_PREFIX: &str =
|
||||
"The user has manually approved a specific action that was previously `Rejected`.";
|
||||
const GUARDIAN_MAX_MESSAGE_TRANSCRIPT_TOKENS: usize = 10_000;
|
||||
@@ -78,7 +79,7 @@ pub(crate) struct GuardianRejectionCircuitBreaker {
|
||||
#[derive(Debug, Default)]
|
||||
struct GuardianRejectionCircuitBreakerTurn {
|
||||
consecutive_denials: u32,
|
||||
total_denials: u32,
|
||||
recent_denials: std::collections::VecDeque<bool>,
|
||||
interrupt_triggered: bool,
|
||||
}
|
||||
|
||||
@@ -87,7 +88,7 @@ pub(crate) enum GuardianRejectionCircuitBreakerAction {
|
||||
Continue,
|
||||
InterruptTurn {
|
||||
consecutive_denials: u32,
|
||||
total_denials: u32,
|
||||
recent_denials: u32,
|
||||
},
|
||||
}
|
||||
|
||||
@@ -99,15 +100,16 @@ impl GuardianRejectionCircuitBreaker {
|
||||
pub(crate) fn record_denial(&mut self, turn_id: &str) -> GuardianRejectionCircuitBreakerAction {
|
||||
let turn = self.turns.entry(turn_id.to_string()).or_default();
|
||||
turn.consecutive_denials = turn.consecutive_denials.saturating_add(1);
|
||||
turn.total_denials = turn.total_denials.saturating_add(1);
|
||||
Self::record_recent_review(turn, /*denied*/ true);
|
||||
let recent_denials = turn.recent_denials.iter().filter(|denied| **denied).count() as u32;
|
||||
if !turn.interrupt_triggered
|
||||
&& (turn.consecutive_denials >= MAX_CONSECUTIVE_GUARDIAN_DENIALS_PER_TURN
|
||||
|| turn.total_denials >= MAX_TOTAL_GUARDIAN_DENIALS_PER_TURN)
|
||||
|| recent_denials >= MAX_RECENT_AUTO_REVIEW_DENIALS_PER_TURN)
|
||||
{
|
||||
turn.interrupt_triggered = true;
|
||||
GuardianRejectionCircuitBreakerAction::InterruptTurn {
|
||||
consecutive_denials: turn.consecutive_denials,
|
||||
total_denials: turn.total_denials,
|
||||
recent_denials,
|
||||
}
|
||||
} else {
|
||||
GuardianRejectionCircuitBreakerAction::Continue
|
||||
@@ -117,6 +119,14 @@ impl GuardianRejectionCircuitBreaker {
|
||||
pub(crate) fn record_non_denial(&mut self, turn_id: &str) {
|
||||
let turn = self.turns.entry(turn_id.to_string()).or_default();
|
||||
turn.consecutive_denials = 0;
|
||||
Self::record_recent_review(turn, /*denied*/ false);
|
||||
}
|
||||
|
||||
fn record_recent_review(turn: &mut GuardianRejectionCircuitBreakerTurn, denied: bool) {
|
||||
turn.recent_denials.push_back(denied);
|
||||
if turn.recent_denials.len() > AUTO_REVIEW_DENIAL_WINDOW_SIZE {
|
||||
turn.recent_denials.pop_front();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -25,6 +25,7 @@ use crate::session::session::Session;
|
||||
use crate::session::turn_context::TurnContext;
|
||||
use crate::turn_timing::now_unix_timestamp_ms;
|
||||
|
||||
use super::AUTO_REVIEW_DENIAL_WINDOW_SIZE;
|
||||
use super::GUARDIAN_REVIEW_TIMEOUT;
|
||||
use super::GUARDIAN_REVIEWER_NAME;
|
||||
use super::GuardianApprovalRequest;
|
||||
@@ -188,7 +189,7 @@ async fn record_guardian_denial(session: &Arc<Session>, turn: &Arc<TurnContext>,
|
||||
.record_denial(turn_id);
|
||||
let GuardianRejectionCircuitBreakerAction::InterruptTurn {
|
||||
consecutive_denials,
|
||||
total_denials,
|
||||
recent_denials,
|
||||
} = action
|
||||
else {
|
||||
return;
|
||||
@@ -203,7 +204,7 @@ async fn record_guardian_denial(session: &Arc<Session>, turn: &Arc<TurnContext>,
|
||||
turn.as_ref(),
|
||||
EventMsg::GuardianWarning(WarningEvent {
|
||||
message: format!(
|
||||
"Automatic approval review rejected too many approval requests for this turn ({consecutive_denials} consecutive, {total_denials} total); interrupting the turn."
|
||||
"Automatic approval review rejected too many approval requests for this turn ({consecutive_denials} consecutive, {recent_denials} in the last {AUTO_REVIEW_DENIAL_WINDOW_SIZE} reviews); interrupting the turn."
|
||||
),
|
||||
}),
|
||||
)
|
||||
|
||||
@@ -86,7 +86,7 @@ fn guardian_rejection_circuit_breaker_interrupts_after_three_consecutive_denials
|
||||
circuit_breaker.record_denial("turn-1"),
|
||||
GuardianRejectionCircuitBreakerAction::InterruptTurn {
|
||||
consecutive_denials: 3,
|
||||
total_denials: 3,
|
||||
recent_denials: 3,
|
||||
}
|
||||
);
|
||||
assert_eq!(
|
||||
@@ -115,13 +115,13 @@ fn guardian_rejection_circuit_breaker_resets_consecutive_denials_on_non_denial()
|
||||
circuit_breaker.record_denial("turn-1"),
|
||||
GuardianRejectionCircuitBreakerAction::InterruptTurn {
|
||||
consecutive_denials: 3,
|
||||
total_denials: 4,
|
||||
recent_denials: 4,
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn guardian_rejection_circuit_breaker_interrupts_after_ten_total_denials() {
|
||||
fn auto_review_rejection_circuit_breaker_interrupts_after_ten_recent_denials() {
|
||||
let mut circuit_breaker = GuardianRejectionCircuitBreaker::default();
|
||||
for _ in 0..9 {
|
||||
assert_eq!(
|
||||
@@ -134,11 +134,30 @@ fn guardian_rejection_circuit_breaker_interrupts_after_ten_total_denials() {
|
||||
circuit_breaker.record_denial("turn-1"),
|
||||
GuardianRejectionCircuitBreakerAction::InterruptTurn {
|
||||
consecutive_denials: 1,
|
||||
total_denials: 10,
|
||||
recent_denials: 10,
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn auto_review_rejection_circuit_breaker_forgets_denials_outside_recent_review_window() {
|
||||
let mut circuit_breaker = GuardianRejectionCircuitBreaker::default();
|
||||
for _ in 0..9 {
|
||||
assert_eq!(
|
||||
circuit_breaker.record_denial("turn-1"),
|
||||
GuardianRejectionCircuitBreakerAction::Continue
|
||||
);
|
||||
circuit_breaker.record_non_denial("turn-1");
|
||||
}
|
||||
for _ in 0..(AUTO_REVIEW_DENIAL_WINDOW_SIZE - 18) {
|
||||
circuit_breaker.record_non_denial("turn-1");
|
||||
}
|
||||
assert_eq!(
|
||||
circuit_breaker.record_denial("turn-1"),
|
||||
GuardianRejectionCircuitBreakerAction::Continue
|
||||
);
|
||||
}
|
||||
|
||||
async fn guardian_test_session_and_turn(
|
||||
server: &wiremock::MockServer,
|
||||
) -> (Arc<Session>, Arc<TurnContext>) {
|
||||
|
||||
Reference in New Issue
Block a user