Stabilize Bazel tests (timeout tweaks and flake fixes) (#17791)

This commit is contained in:
David de Regt
2026-04-16 07:57:51 -07:00
committed by GitHub
parent 895e2d056f
commit 6adba99f4d
34 changed files with 200 additions and 60 deletions

View File

@@ -226,12 +226,11 @@ impl ActionKind {
let _ = fs::remove_file(&path);
let patch = build_add_file_patch(&patch_path, content);
let command = shell_apply_patch_command(&patch);
let event = shell_event(
call_id,
&command,
/*timeout_ms*/ 30_000,
sandbox_permissions,
)?;
// Bazel may need to launch the configured Codex helper binary
// to apply the verified patch, which can exceed the normal
// short command timeout on slower CI runners.
let timeout_ms = 30_000;
let event = shell_event(call_id, &command, timeout_ms, sandbox_permissions)?;
Ok((event, Some(command)))
}
}

View File

@@ -2838,10 +2838,12 @@ async fn auto_compact_counts_encrypted_reasoning_before_last_user() {
];
let compact_mock =
mount_compact_json_once(&server, serde_json::json!({ "output": compacted_history })).await;
let chatgpt_base_url = format!("{}/backend-api", server.uri());
let codex = test_codex()
.with_auth(CodexAuth::create_dummy_chatgpt_auth_for_testing())
.with_config(|config| {
.with_config(move |config| {
config.chatgpt_base_url = chatgpt_base_url;
set_test_compact_prompt(config);
config.model_auto_compact_token_limit = Some(300);
})

View File

@@ -34,8 +34,10 @@ use core_test_support::test_codex::TestCodexHarness;
use core_test_support::test_codex::test_codex;
use core_test_support::wait_for_event;
use core_test_support::wait_for_event_match;
use core_test_support::wait_for_event_with_timeout;
use pretty_assertions::assert_eq;
use serde_json::json;
use tokio::time::Duration;
use wiremock::ResponseTemplate;
fn approx_token_count(text: &str) -> i64 {
@@ -55,6 +57,7 @@ fn estimate_compact_payload_tokens(request: &responses::ResponsesRequest) -> i64
const PRETURN_CONTEXT_DIFF_CWD: &str = "/tmp/PRETURN_CONTEXT_DIFF_CWD";
const DUMMY_FUNCTION_NAME: &str = "test_tool";
const REMOTE_COMPACT_TURN_COMPLETE_TIMEOUT: Duration = Duration::from_secs(30);
fn summary_with_prefix(summary: &str) -> String {
format!("{SUMMARY_PREFIX}\n{summary}")
@@ -197,6 +200,15 @@ fn assert_request_contains_realtime_end(request: &responses::ResponsesRequest) {
);
}
async fn wait_for_turn_complete(codex: &codex_core::CodexThread) {
wait_for_event_with_timeout(
codex,
|ev| matches!(ev, EventMsg::TurnComplete(_)),
REMOTE_COMPACT_TURN_COMPLETE_TIMEOUT,
)
.await;
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn remote_compact_replaces_history_for_followups() -> Result<()> {
skip_if_no_network!(Ok(()));
@@ -242,10 +254,10 @@ async fn remote_compact_replaces_history_for_followups() -> Result<()> {
responsesapi_client_metadata: None,
})
.await?;
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;
wait_for_turn_complete(&codex).await;
codex.submit(Op::Compact).await?;
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;
wait_for_turn_complete(&codex).await;
codex
.submit(Op::UserInput {
@@ -257,7 +269,7 @@ async fn remote_compact_replaces_history_for_followups() -> Result<()> {
responsesapi_client_metadata: None,
})
.await?;
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;
wait_for_turn_complete(&codex).await;
let compact_request = compact_mock.single_request();
assert_eq!(compact_request.path(), "/v1/responses/compact");

View File

@@ -39,6 +39,8 @@ use serde_json::Value;
use serde_json::json;
use tokio::time::Duration;
const UNIFIED_EXEC_LAGGED_OUTPUT_TIMEOUT: Duration = Duration::from_secs(30);
fn extract_output_text(item: &Value) -> Option<&str> {
item.get("output").and_then(|value| match value {
Value::String(text) => Some(text.as_str()),
@@ -2055,11 +2057,12 @@ PY
SandboxPolicy::DangerFullAccess,
)
.await?;
// This is a worst case scenario for the truncate logic.
// This is a worst case scenario for the truncate logic, and CI can spend a
// while draining the lagged tail before the follow-up tool call completes.
wait_for_event_with_timeout(
&test.codex,
|event| matches!(event, EventMsg::TurnComplete(_)),
Duration::from_secs(10),
UNIFIED_EXEC_LAGGED_OUTPUT_TIMEOUT,
)
.await;

View File

@@ -30,7 +30,6 @@ use core_test_support::responses::start_mock_server;
use core_test_support::skip_if_no_network;
use core_test_support::test_codex::TestCodex;
use core_test_support::test_codex::test_codex;
use core_test_support::wait_for_event;
use core_test_support::wait_for_event_with_timeout;
use image::DynamicImage;
use image::GenericImageView;
@@ -49,6 +48,8 @@ use wiremock::ResponseTemplate;
#[cfg(not(debug_assertions))]
use wiremock::matchers::body_string_contains;
const VIEW_IMAGE_TURN_COMPLETE_TIMEOUT: Duration = Duration::from_secs(30);
fn image_messages(body: &Value) -> Vec<&Value> {
body.get("input")
.and_then(Value::as_array)
@@ -180,7 +181,7 @@ async fn user_turn_with_local_image_attaches_image() -> anyhow::Result<()> {
codex,
|event| matches!(event, EventMsg::TurnComplete(_)),
// Empirically, image attachment can be slow under Bazel/RBE.
Duration::from_secs(10),
VIEW_IMAGE_TURN_COMPLETE_TIMEOUT,
)
.await;
@@ -298,7 +299,7 @@ async fn view_image_tool_attaches_local_image() -> anyhow::Result<()> {
},
// Empirically, we have seen this run slow when run under
// Bazel on arm Linux.
Duration::from_secs(10),
VIEW_IMAGE_TURN_COMPLETE_TIMEOUT,
)
.await;
@@ -422,7 +423,7 @@ async fn view_image_tool_can_preserve_original_resolution_when_requested_on_gpt5
wait_for_event_with_timeout(
codex,
|event| matches!(event, EventMsg::TurnComplete(_)),
Duration::from_secs(10),
VIEW_IMAGE_TURN_COMPLETE_TIMEOUT,
)
.await;
@@ -518,7 +519,12 @@ async fn view_image_tool_errors_clearly_for_unsupported_detail_values() -> anyho
})
.await?;
wait_for_event(codex, |event| matches!(event, EventMsg::TurnComplete(_))).await;
wait_for_event_with_timeout(
codex,
|event| matches!(event, EventMsg::TurnComplete(_)),
VIEW_IMAGE_TURN_COMPLETE_TIMEOUT,
)
.await;
let req = mock.single_request();
let body_with_tool_output = req.body_json();
@@ -603,7 +609,12 @@ async fn view_image_tool_treats_null_detail_as_omitted() -> anyhow::Result<()> {
})
.await?;
wait_for_event(codex, |event| matches!(event, EventMsg::TurnComplete(_))).await;
wait_for_event_with_timeout(
codex,
|event| matches!(event, EventMsg::TurnComplete(_)),
VIEW_IMAGE_TURN_COMPLETE_TIMEOUT,
)
.await;
let req = mock.single_request();
let function_output = req.function_call_output(call_id);
@@ -701,7 +712,7 @@ async fn view_image_tool_resizes_when_model_lacks_original_detail_support() -> a
wait_for_event_with_timeout(
codex,
|event| matches!(event, EventMsg::TurnComplete(_)),
Duration::from_secs(10),
VIEW_IMAGE_TURN_COMPLETE_TIMEOUT,
)
.await;
@@ -805,7 +816,7 @@ async fn view_image_tool_does_not_force_original_resolution_with_capability_only
wait_for_event_with_timeout(
codex,
|event| matches!(event, EventMsg::TurnComplete(_)),
Duration::from_secs(10),
VIEW_IMAGE_TURN_COMPLETE_TIMEOUT,
)
.await;
@@ -915,7 +926,7 @@ await codex.emitImage(out);
EventMsg::TurnComplete(_) => true,
_ => false,
},
Duration::from_secs(10),
VIEW_IMAGE_TURN_COMPLETE_TIMEOUT,
)
.await;
let tool_event = match tool_event {
@@ -1035,7 +1046,7 @@ console.log(out.type);
EventMsg::TurnComplete(_) => true,
_ => false,
},
Duration::from_secs(10),
VIEW_IMAGE_TURN_COMPLETE_TIMEOUT,
)
.await;
let tool_event = match tool_event {
@@ -1117,7 +1128,12 @@ async fn view_image_tool_errors_when_path_is_directory() -> anyhow::Result<()> {
})
.await?;
wait_for_event(codex, |event| matches!(event, EventMsg::TurnComplete(_))).await;
wait_for_event_with_timeout(
codex,
|event| matches!(event, EventMsg::TurnComplete(_)),
VIEW_IMAGE_TURN_COMPLETE_TIMEOUT,
)
.await;
let req = mock.single_request();
let body_with_tool_output = req.body_json();
@@ -1193,7 +1209,12 @@ async fn view_image_tool_errors_for_non_image_files() -> anyhow::Result<()> {
})
.await?;
wait_for_event(codex, |event| matches!(event, EventMsg::TurnComplete(_))).await;
wait_for_event_with_timeout(
codex,
|event| matches!(event, EventMsg::TurnComplete(_)),
VIEW_IMAGE_TURN_COMPLETE_TIMEOUT,
)
.await;
let request = mock.single_request();
assert!(
@@ -1274,7 +1295,12 @@ async fn view_image_tool_errors_when_file_missing() -> anyhow::Result<()> {
})
.await?;
wait_for_event(codex, |event| matches!(event, EventMsg::TurnComplete(_))).await;
wait_for_event_with_timeout(
codex,
|event| matches!(event, EventMsg::TurnComplete(_)),
VIEW_IMAGE_TURN_COMPLETE_TIMEOUT,
)
.await;
let req = mock.single_request();
let body_with_tool_output = req.body_json();
@@ -1405,7 +1431,12 @@ async fn view_image_tool_returns_unsupported_message_for_text_only_model() -> an
})
.await?;
wait_for_event(codex, |event| matches!(event, EventMsg::TurnComplete(_))).await;
wait_for_event_with_timeout(
codex,
|event| matches!(event, EventMsg::TurnComplete(_)),
VIEW_IMAGE_TURN_COMPLETE_TIMEOUT,
)
.await;
let output_text = mock
.single_request()
@@ -1480,7 +1511,12 @@ async fn replaces_invalid_local_image_after_bad_request() -> anyhow::Result<()>
})
.await?;
wait_for_event(&codex, |event| matches!(event, EventMsg::TurnComplete(_))).await;
wait_for_event_with_timeout(
&codex,
|event| matches!(event, EventMsg::TurnComplete(_)),
VIEW_IMAGE_TURN_COMPLETE_TIMEOUT,
)
.await;
let first_body = invalid_image_mock.single_request().body_json();
assert!(