mirror of
https://github.com/openai/codex.git
synced 2026-05-26 05:55:36 +00:00
## Why `rust-ci-full` was paying the full Cargo nextest build-and-run cost once per platform, with Windows ARM64 as the long pole. This change moves the heavy work into one reusable per-platform flow: build a nextest archive once, then replay it across four shards so the platform lane spends less time running tests serially. For Windows ARM64, the archive is cross-compiled on Windows x64 and replayed on native Windows ARM64 shards so the slow ARM64 machine is used for execution rather than compilation. ## What changed - split the `rust-ci-full` nextest matrix into five explicit per-platform reusable-workflow calls - add `.github/workflows/rust-ci-full-nextest-platform.yml` to build one archive, upload timings/helpers, replay four nextest shards, upload per-shard JUnit, and roll the shard status back up per platform - add Windows CI helpers for Dev Drive setup and MSVC ARM64 linker environment export so the Windows ARM64 archive can be produced on Windows x64 - keep the existing Cargo git CLI fetch hardening inside the reusable workflow, since caller workflow-level `env` does not flow through `workflow_call` - document the archive-backed shard shape in `.github/workflows/README.md` - raise the default nextest slow timeout to 30s so the sharded full-CI path does not treat every >15s test as stuck ## Verification - validated the archive/shard flow with live GitHub Actions runs on this PR branch - Windows ARM64 cross-compile latency on completed runs: - https://github.com/openai/codex/actions/runs/26118759651: `34m30s` lane e2e, `17m16s` archive build, `9m55s` shard phase - https://github.com/openai/codex/actions/runs/26120777976: `30m36s` lane e2e, `17m21s` archive build, `6m50s` shard phase - comparable pre-cross-compile sharded Windows ARM64 runs were `55m01s`, `50m21s`, and `46m42s`, so the completed cross-compile runs improved the lane by roughly `12m` to `24m` versus the prior range - latest corrected cross-compile run: https://github.com/openai/codex/actions/runs/26120777976 - Windows ARM64 archive built successfully on Windows x64 - native Windows ARM64 shards started immediately after the archive upload - 3/4 Windows ARM64 shards passed; the failing shard hit the same existing `code_mode` test failure seen outside this lane - downloaded failed-shard JUnit XML from the validation runs and confirmed the remaining red is from known test failures, not archive/shard wiring - no local Codex tests run per repo guidance ## Notes - this PR does not change developers.openai.com documentation
71 lines
3.0 KiB
TOML
71 lines
3.0 KiB
TOML
[profile.default]
|
|
# Retry once so one transient failure does not fail full-CI outright.
|
|
# Fanout keeps the full-CI shards moving without treating every >30s test as
|
|
# stuck. Keep this aligned with the broader timeout budget we give sharded CI.
|
|
slow-timeout = { period = "30s", terminate-after = 2 }
|
|
retries = 1
|
|
|
|
[profile.default.junit]
|
|
path = "junit.xml"
|
|
|
|
[test-groups.app_server_protocol_codegen]
|
|
max-threads = 1
|
|
|
|
[test-groups.app_server_integration]
|
|
max-threads = 1
|
|
|
|
[test-groups.core_apply_patch_cli_integration]
|
|
max-threads = 1
|
|
|
|
[test-groups.windows_sandbox_legacy_sessions]
|
|
max-threads = 1
|
|
|
|
[test-groups.windows_process_heavy]
|
|
max-threads = 2
|
|
|
|
[[profile.default.overrides]]
|
|
# Do not add new tests here
|
|
filter = 'test(rmcp_client) | test(humanlike_typing_1000_chars_appears_live_no_placeholder)'
|
|
slow-timeout = { period = "1m", terminate-after = 4 }
|
|
|
|
[[profile.default.overrides]]
|
|
filter = 'test(approval_matrix_covers_all_modes)'
|
|
slow-timeout = { period = "30s", terminate-after = 2 }
|
|
|
|
[[profile.default.overrides]]
|
|
filter = 'package(codex-app-server-protocol) & (test(typescript_schema_fixtures_match_generated) | test(json_schema_fixtures_match_generated) | test(generate_ts_with_experimental_api_retains_experimental_entries) | test(generated_ts_optional_nullable_fields_only_in_params) | test(generate_json_filters_experimental_fields_and_methods))'
|
|
test-group = 'app_server_protocol_codegen'
|
|
|
|
[[profile.default.overrides]]
|
|
# These integration tests spawn a fresh app-server subprocess per case.
|
|
# Keep the library unit tests parallel.
|
|
filter = 'package(codex-app-server) & kind(test)'
|
|
test-group = 'app_server_integration'
|
|
|
|
[[profile.default.overrides]]
|
|
# These tests exercise full Codex turns and apply_patch execution, and they are
|
|
# sensitive to Windows runner process-startup stalls when many cases launch at once.
|
|
filter = 'package(codex-core) & kind(test) & test(apply_patch_cli)'
|
|
test-group = 'core_apply_patch_cli_integration'
|
|
|
|
[[profile.default.overrides]]
|
|
# These tests create restricted-token Windows child processes and private desktops.
|
|
# Serialize them to avoid exhausting Windows session/global desktop resources in CI.
|
|
filter = 'package(codex-windows-sandbox) & test(legacy_)'
|
|
test-group = 'windows_sandbox_legacy_sessions'
|
|
|
|
[[profile.default.overrides]]
|
|
# This Codex-home startup path still exceeded the broader Windows-heavy ceiling
|
|
# in both Windows full-CI lanes after contention was reduced.
|
|
platform = 'cfg(windows)'
|
|
filter = 'test(start_thread_uses_all_default_environments_from_codex_home)'
|
|
slow-timeout = { period = "1m", terminate-after = 2 }
|
|
|
|
[[profile.default.overrides]]
|
|
# These Windows-heavy tests spawn subprocesses, session files, or JSON-RPC
|
|
# clients and have been the dominant source of 30s full-CI timeouts.
|
|
platform = 'cfg(windows)'
|
|
filter = 'test(suite::resume::) | test(suite::cli_stream::) | test(suite::auth_env::) | test(start_thread_uses_all_default_environments_from_codex_home) | test(connect_stdio_command_initializes_json_rpc_client_on_windows)'
|
|
test-group = 'windows_process_heavy'
|
|
slow-timeout = { period = "45s", terminate-after = 2 }
|