mirror of
https://github.com/openai/codex.git
synced 2026-04-26 15:45:02 +00:00
Stabilize thread resume replay tests (#13885)
## What changed - The thread-resume replay tests now use unchecked mock sequencing so the replay flow can complete before the test asserts. - They also poll outbound `/responses` request counts and fail immediately if replay emits an unexpected extra request. ## Why this fixes the flake - The previous version asserted while the replay machinery was still mid-flight, so the test was sometimes checking an intermediate state instead of the completed behavior. - Strict mock sequencing made that problem worse by forcing the test to care about exact sub-step timing rather than about the end result. - Letting replay settle and then asserting on stabilized request counts makes the test validate the real contract: the replay path finishes and does not send extra model requests. ## Scope - Test-only change.
This commit is contained in:
@@ -4,7 +4,6 @@ use app_test_support::create_apply_patch_sse_response;
|
||||
use app_test_support::create_fake_rollout_with_text_elements;
|
||||
use app_test_support::create_final_assistant_message_sse_response;
|
||||
use app_test_support::create_mock_responses_server_repeating_assistant;
|
||||
use app_test_support::create_mock_responses_server_sequence;
|
||||
use app_test_support::create_mock_responses_server_sequence_unchecked;
|
||||
use app_test_support::create_shell_command_sse_response;
|
||||
use app_test_support::rollout_path;
|
||||
@@ -60,6 +59,36 @@ use uuid::Uuid;
|
||||
const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(10);
|
||||
const CODEX_5_2_INSTRUCTIONS_TEMPLATE_DEFAULT: &str = "You are Codex, a coding agent based on GPT-5. You and the user share the same workspace and collaborate to achieve the user's goals.";
|
||||
|
||||
async fn wait_for_responses_request_count(
|
||||
server: &wiremock::MockServer,
|
||||
expected_count: usize,
|
||||
) -> Result<()> {
|
||||
timeout(DEFAULT_READ_TIMEOUT, async {
|
||||
loop {
|
||||
let Some(requests) = server.received_requests().await else {
|
||||
anyhow::bail!("wiremock did not record requests");
|
||||
};
|
||||
let responses_request_count = requests
|
||||
.iter()
|
||||
.filter(|request| {
|
||||
request.method == "POST" && request.url.path().ends_with("/responses")
|
||||
})
|
||||
.count();
|
||||
if responses_request_count == expected_count {
|
||||
return Ok::<(), anyhow::Error>(());
|
||||
}
|
||||
if responses_request_count > expected_count {
|
||||
anyhow::bail!(
|
||||
"expected exactly {expected_count} /responses requests, got {responses_request_count}"
|
||||
);
|
||||
}
|
||||
tokio::time::sleep(std::time::Duration::from_millis(10)).await;
|
||||
}
|
||||
})
|
||||
.await??;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn thread_resume_rejects_unmaterialized_thread() -> Result<()> {
|
||||
let server = create_mock_responses_server_repeating_assistant("Done").await;
|
||||
@@ -982,6 +1011,7 @@ async fn thread_resume_replays_pending_command_execution_request_approval() -> R
|
||||
primary.read_stream_until_notification_message("turn/completed"),
|
||||
)
|
||||
.await??;
|
||||
wait_for_responses_request_count(&server, 3).await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -1004,7 +1034,7 @@ async fn thread_resume_replays_pending_file_change_request_approval() -> Result<
|
||||
create_apply_patch_sse_response(patch, "patch-call")?,
|
||||
create_final_assistant_message_sse_response("done")?,
|
||||
];
|
||||
let server = create_mock_responses_server_sequence(responses).await;
|
||||
let server = create_mock_responses_server_sequence_unchecked(responses).await;
|
||||
create_config_toml(&codex_home, &server.uri())?;
|
||||
|
||||
let mut primary = McpProcess::new(&codex_home).await?;
|
||||
@@ -1147,6 +1177,7 @@ async fn thread_resume_replays_pending_file_change_request_approval() -> Result<
|
||||
primary.read_stream_until_notification_message("turn/completed"),
|
||||
)
|
||||
.await??;
|
||||
wait_for_responses_request_count(&server, 3).await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user