mirror of
https://github.com/openai/codex.git
synced 2026-04-30 09:26:44 +00:00
Stabilize Bazel tests (timeout tweaks and flake fixes) (#17791)
This commit is contained in:
@@ -226,12 +226,11 @@ impl ActionKind {
|
||||
let _ = fs::remove_file(&path);
|
||||
let patch = build_add_file_patch(&patch_path, content);
|
||||
let command = shell_apply_patch_command(&patch);
|
||||
let event = shell_event(
|
||||
call_id,
|
||||
&command,
|
||||
/*timeout_ms*/ 30_000,
|
||||
sandbox_permissions,
|
||||
)?;
|
||||
// Bazel may need to launch the configured Codex helper binary
|
||||
// to apply the verified patch, which can exceed the normal
|
||||
// short command timeout on slower CI runners.
|
||||
let timeout_ms = 30_000;
|
||||
let event = shell_event(call_id, &command, timeout_ms, sandbox_permissions)?;
|
||||
Ok((event, Some(command)))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2838,10 +2838,12 @@ async fn auto_compact_counts_encrypted_reasoning_before_last_user() {
|
||||
];
|
||||
let compact_mock =
|
||||
mount_compact_json_once(&server, serde_json::json!({ "output": compacted_history })).await;
|
||||
let chatgpt_base_url = format!("{}/backend-api", server.uri());
|
||||
|
||||
let codex = test_codex()
|
||||
.with_auth(CodexAuth::create_dummy_chatgpt_auth_for_testing())
|
||||
.with_config(|config| {
|
||||
.with_config(move |config| {
|
||||
config.chatgpt_base_url = chatgpt_base_url;
|
||||
set_test_compact_prompt(config);
|
||||
config.model_auto_compact_token_limit = Some(300);
|
||||
})
|
||||
|
||||
@@ -34,8 +34,10 @@ use core_test_support::test_codex::TestCodexHarness;
|
||||
use core_test_support::test_codex::test_codex;
|
||||
use core_test_support::wait_for_event;
|
||||
use core_test_support::wait_for_event_match;
|
||||
use core_test_support::wait_for_event_with_timeout;
|
||||
use pretty_assertions::assert_eq;
|
||||
use serde_json::json;
|
||||
use tokio::time::Duration;
|
||||
use wiremock::ResponseTemplate;
|
||||
|
||||
fn approx_token_count(text: &str) -> i64 {
|
||||
@@ -55,6 +57,7 @@ fn estimate_compact_payload_tokens(request: &responses::ResponsesRequest) -> i64
|
||||
|
||||
const PRETURN_CONTEXT_DIFF_CWD: &str = "/tmp/PRETURN_CONTEXT_DIFF_CWD";
|
||||
const DUMMY_FUNCTION_NAME: &str = "test_tool";
|
||||
const REMOTE_COMPACT_TURN_COMPLETE_TIMEOUT: Duration = Duration::from_secs(30);
|
||||
|
||||
fn summary_with_prefix(summary: &str) -> String {
|
||||
format!("{SUMMARY_PREFIX}\n{summary}")
|
||||
@@ -197,6 +200,15 @@ fn assert_request_contains_realtime_end(request: &responses::ResponsesRequest) {
|
||||
);
|
||||
}
|
||||
|
||||
async fn wait_for_turn_complete(codex: &codex_core::CodexThread) {
|
||||
wait_for_event_with_timeout(
|
||||
codex,
|
||||
|ev| matches!(ev, EventMsg::TurnComplete(_)),
|
||||
REMOTE_COMPACT_TURN_COMPLETE_TIMEOUT,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn remote_compact_replaces_history_for_followups() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
@@ -242,10 +254,10 @@ async fn remote_compact_replaces_history_for_followups() -> Result<()> {
|
||||
responsesapi_client_metadata: None,
|
||||
})
|
||||
.await?;
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;
|
||||
wait_for_turn_complete(&codex).await;
|
||||
|
||||
codex.submit(Op::Compact).await?;
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;
|
||||
wait_for_turn_complete(&codex).await;
|
||||
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
@@ -257,7 +269,7 @@ async fn remote_compact_replaces_history_for_followups() -> Result<()> {
|
||||
responsesapi_client_metadata: None,
|
||||
})
|
||||
.await?;
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;
|
||||
wait_for_turn_complete(&codex).await;
|
||||
|
||||
let compact_request = compact_mock.single_request();
|
||||
assert_eq!(compact_request.path(), "/v1/responses/compact");
|
||||
|
||||
@@ -39,6 +39,8 @@ use serde_json::Value;
|
||||
use serde_json::json;
|
||||
use tokio::time::Duration;
|
||||
|
||||
const UNIFIED_EXEC_LAGGED_OUTPUT_TIMEOUT: Duration = Duration::from_secs(30);
|
||||
|
||||
fn extract_output_text(item: &Value) -> Option<&str> {
|
||||
item.get("output").and_then(|value| match value {
|
||||
Value::String(text) => Some(text.as_str()),
|
||||
@@ -2055,11 +2057,12 @@ PY
|
||||
SandboxPolicy::DangerFullAccess,
|
||||
)
|
||||
.await?;
|
||||
// This is a worst case scenario for the truncate logic.
|
||||
// This is a worst case scenario for the truncate logic, and CI can spend a
|
||||
// while draining the lagged tail before the follow-up tool call completes.
|
||||
wait_for_event_with_timeout(
|
||||
&test.codex,
|
||||
|event| matches!(event, EventMsg::TurnComplete(_)),
|
||||
Duration::from_secs(10),
|
||||
UNIFIED_EXEC_LAGGED_OUTPUT_TIMEOUT,
|
||||
)
|
||||
.await;
|
||||
|
||||
|
||||
@@ -30,7 +30,6 @@ use core_test_support::responses::start_mock_server;
|
||||
use core_test_support::skip_if_no_network;
|
||||
use core_test_support::test_codex::TestCodex;
|
||||
use core_test_support::test_codex::test_codex;
|
||||
use core_test_support::wait_for_event;
|
||||
use core_test_support::wait_for_event_with_timeout;
|
||||
use image::DynamicImage;
|
||||
use image::GenericImageView;
|
||||
@@ -49,6 +48,8 @@ use wiremock::ResponseTemplate;
|
||||
#[cfg(not(debug_assertions))]
|
||||
use wiremock::matchers::body_string_contains;
|
||||
|
||||
const VIEW_IMAGE_TURN_COMPLETE_TIMEOUT: Duration = Duration::from_secs(30);
|
||||
|
||||
fn image_messages(body: &Value) -> Vec<&Value> {
|
||||
body.get("input")
|
||||
.and_then(Value::as_array)
|
||||
@@ -180,7 +181,7 @@ async fn user_turn_with_local_image_attaches_image() -> anyhow::Result<()> {
|
||||
codex,
|
||||
|event| matches!(event, EventMsg::TurnComplete(_)),
|
||||
// Empirically, image attachment can be slow under Bazel/RBE.
|
||||
Duration::from_secs(10),
|
||||
VIEW_IMAGE_TURN_COMPLETE_TIMEOUT,
|
||||
)
|
||||
.await;
|
||||
|
||||
@@ -298,7 +299,7 @@ async fn view_image_tool_attaches_local_image() -> anyhow::Result<()> {
|
||||
},
|
||||
// Empirically, we have seen this run slow when run under
|
||||
// Bazel on arm Linux.
|
||||
Duration::from_secs(10),
|
||||
VIEW_IMAGE_TURN_COMPLETE_TIMEOUT,
|
||||
)
|
||||
.await;
|
||||
|
||||
@@ -422,7 +423,7 @@ async fn view_image_tool_can_preserve_original_resolution_when_requested_on_gpt5
|
||||
wait_for_event_with_timeout(
|
||||
codex,
|
||||
|event| matches!(event, EventMsg::TurnComplete(_)),
|
||||
Duration::from_secs(10),
|
||||
VIEW_IMAGE_TURN_COMPLETE_TIMEOUT,
|
||||
)
|
||||
.await;
|
||||
|
||||
@@ -518,7 +519,12 @@ async fn view_image_tool_errors_clearly_for_unsupported_detail_values() -> anyho
|
||||
})
|
||||
.await?;
|
||||
|
||||
wait_for_event(codex, |event| matches!(event, EventMsg::TurnComplete(_))).await;
|
||||
wait_for_event_with_timeout(
|
||||
codex,
|
||||
|event| matches!(event, EventMsg::TurnComplete(_)),
|
||||
VIEW_IMAGE_TURN_COMPLETE_TIMEOUT,
|
||||
)
|
||||
.await;
|
||||
|
||||
let req = mock.single_request();
|
||||
let body_with_tool_output = req.body_json();
|
||||
@@ -603,7 +609,12 @@ async fn view_image_tool_treats_null_detail_as_omitted() -> anyhow::Result<()> {
|
||||
})
|
||||
.await?;
|
||||
|
||||
wait_for_event(codex, |event| matches!(event, EventMsg::TurnComplete(_))).await;
|
||||
wait_for_event_with_timeout(
|
||||
codex,
|
||||
|event| matches!(event, EventMsg::TurnComplete(_)),
|
||||
VIEW_IMAGE_TURN_COMPLETE_TIMEOUT,
|
||||
)
|
||||
.await;
|
||||
|
||||
let req = mock.single_request();
|
||||
let function_output = req.function_call_output(call_id);
|
||||
@@ -701,7 +712,7 @@ async fn view_image_tool_resizes_when_model_lacks_original_detail_support() -> a
|
||||
wait_for_event_with_timeout(
|
||||
codex,
|
||||
|event| matches!(event, EventMsg::TurnComplete(_)),
|
||||
Duration::from_secs(10),
|
||||
VIEW_IMAGE_TURN_COMPLETE_TIMEOUT,
|
||||
)
|
||||
.await;
|
||||
|
||||
@@ -805,7 +816,7 @@ async fn view_image_tool_does_not_force_original_resolution_with_capability_only
|
||||
wait_for_event_with_timeout(
|
||||
codex,
|
||||
|event| matches!(event, EventMsg::TurnComplete(_)),
|
||||
Duration::from_secs(10),
|
||||
VIEW_IMAGE_TURN_COMPLETE_TIMEOUT,
|
||||
)
|
||||
.await;
|
||||
|
||||
@@ -915,7 +926,7 @@ await codex.emitImage(out);
|
||||
EventMsg::TurnComplete(_) => true,
|
||||
_ => false,
|
||||
},
|
||||
Duration::from_secs(10),
|
||||
VIEW_IMAGE_TURN_COMPLETE_TIMEOUT,
|
||||
)
|
||||
.await;
|
||||
let tool_event = match tool_event {
|
||||
@@ -1035,7 +1046,7 @@ console.log(out.type);
|
||||
EventMsg::TurnComplete(_) => true,
|
||||
_ => false,
|
||||
},
|
||||
Duration::from_secs(10),
|
||||
VIEW_IMAGE_TURN_COMPLETE_TIMEOUT,
|
||||
)
|
||||
.await;
|
||||
let tool_event = match tool_event {
|
||||
@@ -1117,7 +1128,12 @@ async fn view_image_tool_errors_when_path_is_directory() -> anyhow::Result<()> {
|
||||
})
|
||||
.await?;
|
||||
|
||||
wait_for_event(codex, |event| matches!(event, EventMsg::TurnComplete(_))).await;
|
||||
wait_for_event_with_timeout(
|
||||
codex,
|
||||
|event| matches!(event, EventMsg::TurnComplete(_)),
|
||||
VIEW_IMAGE_TURN_COMPLETE_TIMEOUT,
|
||||
)
|
||||
.await;
|
||||
|
||||
let req = mock.single_request();
|
||||
let body_with_tool_output = req.body_json();
|
||||
@@ -1193,7 +1209,12 @@ async fn view_image_tool_errors_for_non_image_files() -> anyhow::Result<()> {
|
||||
})
|
||||
.await?;
|
||||
|
||||
wait_for_event(codex, |event| matches!(event, EventMsg::TurnComplete(_))).await;
|
||||
wait_for_event_with_timeout(
|
||||
codex,
|
||||
|event| matches!(event, EventMsg::TurnComplete(_)),
|
||||
VIEW_IMAGE_TURN_COMPLETE_TIMEOUT,
|
||||
)
|
||||
.await;
|
||||
|
||||
let request = mock.single_request();
|
||||
assert!(
|
||||
@@ -1274,7 +1295,12 @@ async fn view_image_tool_errors_when_file_missing() -> anyhow::Result<()> {
|
||||
})
|
||||
.await?;
|
||||
|
||||
wait_for_event(codex, |event| matches!(event, EventMsg::TurnComplete(_))).await;
|
||||
wait_for_event_with_timeout(
|
||||
codex,
|
||||
|event| matches!(event, EventMsg::TurnComplete(_)),
|
||||
VIEW_IMAGE_TURN_COMPLETE_TIMEOUT,
|
||||
)
|
||||
.await;
|
||||
|
||||
let req = mock.single_request();
|
||||
let body_with_tool_output = req.body_json();
|
||||
@@ -1405,7 +1431,12 @@ async fn view_image_tool_returns_unsupported_message_for_text_only_model() -> an
|
||||
})
|
||||
.await?;
|
||||
|
||||
wait_for_event(codex, |event| matches!(event, EventMsg::TurnComplete(_))).await;
|
||||
wait_for_event_with_timeout(
|
||||
codex,
|
||||
|event| matches!(event, EventMsg::TurnComplete(_)),
|
||||
VIEW_IMAGE_TURN_COMPLETE_TIMEOUT,
|
||||
)
|
||||
.await;
|
||||
|
||||
let output_text = mock
|
||||
.single_request()
|
||||
@@ -1480,7 +1511,12 @@ async fn replaces_invalid_local_image_after_bad_request() -> anyhow::Result<()>
|
||||
})
|
||||
.await?;
|
||||
|
||||
wait_for_event(&codex, |event| matches!(event, EventMsg::TurnComplete(_))).await;
|
||||
wait_for_event_with_timeout(
|
||||
&codex,
|
||||
|event| matches!(event, EventMsg::TurnComplete(_)),
|
||||
VIEW_IMAGE_TURN_COMPLETE_TIMEOUT,
|
||||
)
|
||||
.await;
|
||||
|
||||
let first_body = invalid_image_mock.single_request().body_json();
|
||||
assert!(
|
||||
|
||||
Reference in New Issue
Block a user