Stabilize thread resume replay tests (#13885)

## What changed - The thread-resume replay tests now use unchecked mock sequencing so the replay flow can complete before the test asserts. - They also poll outbound `/responses` request counts and fail immediately if replay emits an unexpected extra request. ## Why this fixes the flake - The previous version asserted while the replay machinery was still mid-flight, so the test was sometimes checking an intermediate state instead of the completed behavior. - Strict mock sequencing made that problem worse by forcing the test to care about exact sub-step timing rather than about the end result. - Letting replay settle and then asserting on stabilized request counts makes the test validate the real contract: the replay path finishes and does not send extra model requests. ## Scope - Test-only change.
2026-04-26 15:45:02 +00:00 · 2026-03-09 10:41:23 -07:00
parent 0dc242a672
commit 10bf6008f4
1 changed files with 33 additions and 2 deletions
--- a/codex-rs/app-server/tests/suite/v2/thread_resume.rs
+++ b/codex-rs/app-server/tests/suite/v2/thread_resume.rs
@@ -4,7 +4,6 @@ use app_test_support::create_apply_patch_sse_response;
 use app_test_support::create_fake_rollout_with_text_elements;
 use app_test_support::create_final_assistant_message_sse_response;
 use app_test_support::create_mock_responses_server_repeating_assistant;
-use app_test_support::create_mock_responses_server_sequence;
 use app_test_support::create_mock_responses_server_sequence_unchecked;
 use app_test_support::create_shell_command_sse_response;
 use app_test_support::rollout_path;
@@ -60,6 +59,36 @@ use uuid::Uuid;
 const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(10);
 const CODEX_5_2_INSTRUCTIONS_TEMPLATE_DEFAULT: &str = "You are Codex, a coding agent based on GPT-5. You and the user share the same workspace and collaborate to achieve the user's goals.";

+async fn wait_for_responses_request_count(
+    server: &wiremock::MockServer,
+    expected_count: usize,
+) -> Result<()> {
+    timeout(DEFAULT_READ_TIMEOUT, async {
+        loop {
+            let Some(requests) = server.received_requests().await else {
+                anyhow::bail!("wiremock did not record requests");
+            };
+            let responses_request_count = requests
+                .iter()
+                .filter(|request| {
+                    request.method == "POST" && request.url.path().ends_with("/responses")
+                })
+                .count();
+            if responses_request_count == expected_count {
+                return Ok::<(), anyhow::Error>(());
+            }
+            if responses_request_count > expected_count {
+                anyhow::bail!(
+                    "expected exactly {expected_count} /responses requests, got {responses_request_count}"
+                );
+            }
+            tokio::time::sleep(std::time::Duration::from_millis(10)).await;
+        }
+    })
+    .await??;
+    Ok(())
+}
+
 #[tokio::test]
 async fn thread_resume_rejects_unmaterialized_thread() -> Result<()> {
    let server = create_mock_responses_server_repeating_assistant("Done").await;
@@ -982,6 +1011,7 @@ async fn thread_resume_replays_pending_command_execution_request_approval() -> R
        primary.read_stream_until_notification_message("turn/completed"),
    )
    .await??;
+    wait_for_responses_request_count(&server, 3).await?;

    Ok(())
 }
@@ -1004,7 +1034,7 @@ async fn thread_resume_replays_pending_file_change_request_approval() -> Result<
        create_apply_patch_sse_response(patch, "patch-call")?,
        create_final_assistant_message_sse_response("done")?,
    ];
-    let server = create_mock_responses_server_sequence(responses).await;
+    let server = create_mock_responses_server_sequence_unchecked(responses).await;
    create_config_toml(&codex_home, &server.uri())?;

    let mut primary = McpProcess::new(&codex_home).await?;
@@ -1147,6 +1177,7 @@ async fn thread_resume_replays_pending_file_change_request_approval() -> Result<
        primary.read_stream_until_notification_message("turn/completed"),
    )
    .await??;
+    wait_for_responses_request_count(&server, 3).await?;

    Ok(())
 }