Thread guardian Responses API errors into denial rationale (#15516)

## Summary
- capture the last guardian `EventMsg::Error` while waiting for review
completion
- reuse that error as the denial rationale when the review turn
completes without an assessment payload
- add a regression test for the `/responses` HTTP 400 path

## Testing
- `just fmt`
- `cargo test -p codex-core
guardian_review_surfaces_responses_api_errors_in_rejection_reason`
- `just argument-comment-lint -p codex-core`

## Notes
- `cargo test -p codex-core` still fails on the pre-existing unrelated
test
`tools::js_repl::tests::js_repl_imported_local_files_can_access_repl_globals`
in this environment (`mktemp ... Operation not permitted` while
downloading `dotslash`)

Co-authored-by: Codex <noreply@openai.com>
This commit is contained in:
Charley Cunningham
2026-03-23 12:46:49 -07:00
committed by GitHub
parent 450dc289c3
commit 332edba78e
2 changed files with 107 additions and 0 deletions

View File

@@ -577,6 +577,7 @@ async fn wait_for_guardian_review(
) -> (GuardianReviewSessionOutcome, bool) {
let timeout = tokio::time::sleep_until(deadline);
tokio::pin!(timeout);
let mut last_error_message: Option<String> = None;
loop {
tokio::select! {
@@ -598,11 +599,22 @@ async fn wait_for_guardian_review(
match event {
Ok(event) => match event.msg {
EventMsg::TurnComplete(turn_complete) => {
if turn_complete.last_agent_message.is_none()
&& let Some(error_message) = last_error_message
{
return (
GuardianReviewSessionOutcome::Completed(Err(anyhow!(error_message))),
true,
);
}
return (
GuardianReviewSessionOutcome::Completed(Ok(turn_complete.last_agent_message)),
true,
);
}
EventMsg::Error(error) => {
last_error_message = Some(error.message);
}
EventMsg::TurnAborted(_) => {
return (GuardianReviewSessionOutcome::Aborted, true);
}

View File

@@ -31,6 +31,7 @@ use core_test_support::context_snapshot::ContextSnapshotOptions;
use core_test_support::responses::ev_assistant_message;
use core_test_support::responses::ev_completed;
use core_test_support::responses::ev_response_created;
use core_test_support::responses::mount_response_once;
use core_test_support::responses::mount_sse_once;
use core_test_support::responses::mount_sse_sequence;
use core_test_support::responses::sse;
@@ -717,6 +718,100 @@ async fn guardian_reuses_prompt_cache_key_and_appends_prior_reviews() -> anyhow:
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn guardian_review_surfaces_responses_api_errors_in_rejection_reason() -> anyhow::Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let error_message =
"Item 'rs_test' of type 'reasoning' was provided without its required following item.";
let _request_log = mount_response_once(
&server,
wiremock::ResponseTemplate::new(400).set_body_json(serde_json::json!({
"error": {
"message": error_message,
"type": "invalid_request_error",
"param": "input"
}
})),
)
.await;
let (mut session, mut turn, rx) = crate::codex::make_session_and_context_with_rx().await;
let mut config = (*turn.config).clone();
config.model_provider.base_url = Some(format!("{}/v1", server.uri()));
config.user_instructions = None;
let config = Arc::new(config);
let models_manager = Arc::new(test_support::models_manager_with_provider(
config.codex_home.clone(),
Arc::clone(&session.services.auth_manager),
config.model_provider.clone(),
));
Arc::get_mut(&mut session)
.expect("session should be uniquely owned")
.services
.models_manager = models_manager;
let turn_mut = Arc::get_mut(&mut turn).expect("turn should be uniquely owned");
turn_mut.config = Arc::clone(&config);
turn_mut.provider = config.model_provider.clone();
turn_mut.user_instructions = None;
seed_guardian_parent_history(&session, &turn).await;
let decision = review_approval_request(
&session,
&turn,
GuardianApprovalRequest::Shell {
id: "shell-guardian-error".to_string(),
command: vec!["git".to_string(), "push".to_string()],
cwd: PathBuf::from("/repo/codex-rs/core"),
sandbox_permissions: crate::sandboxing::SandboxPermissions::UseDefault,
additional_permissions: None,
justification: Some("Need to push the reviewed docs fix.".to_string()),
},
None,
)
.await;
assert_eq!(decision, ReviewDecision::Denied);
let mut warnings = Vec::new();
let mut denial_rationales = Vec::new();
while let Ok(event) = rx.try_recv() {
match event.msg {
EventMsg::Warning(event) => warnings.push(event.message),
EventMsg::GuardianAssessment(event)
if event.status == GuardianAssessmentStatus::Denied =>
{
denial_rationales.push(event.rationale)
}
_ => {}
}
}
assert!(
warnings
.iter()
.any(|message| message.contains(error_message)),
"warning should include the underlying responses api error"
);
assert!(
denial_rationales
.iter()
.flatten()
.any(|message| message.contains(error_message)),
"denial rationale should include the underlying responses api error"
);
assert!(
denial_rationales.iter().flatten().all(|message| {
!message.contains("guardian review completed without an assessment payload")
}),
"denial rationale should not fall back to the generic missing payload error"
);
Ok(())
}
#[tokio::test(flavor = "current_thread")]
async fn guardian_parallel_reviews_fork_from_last_committed_trunk_history() -> anyhow::Result<()> {
let first_assessment = serde_json::json!({