Compare commits

...

5 Commits

Author SHA1 Message Date
starr-openai
cd78334caf Run targeted nextest tests in place 2026-05-29 09:30:52 -07:00
starr-openai
84b728bafb Remove targeted workflow smoke trigger 2026-05-29 07:42:06 -07:00
starr-openai
4bba43fa16 Fix targeted test filter count 2026-05-29 05:56:40 -07:00
starr-openai
c4e59dba4b Add targeted workflow smoke trigger 2026-05-29 01:09:36 -07:00
starr-openai
b6e746629c Add targeted rust full CI test workflow 2026-05-29 00:56:21 -07:00
4 changed files with 308 additions and 13 deletions

View File

@@ -26,6 +26,10 @@ The workflows in this directory are split so that pull requests get fast, review
- release-profile Cargo builds
- cross-platform `argument-comment-lint`
- Linux remote-env tests
- `rust-ci-full-targeted-test.yml` is the manual one-test repro path for the
`rust-ci-full` nextest lanes. Dispatch it with one platform and one exact
cargo-nextest filterset such as `test(=module::test)`; the workflow rejects
filtersets that resolve to zero or multiple tests.
## Rule Of Thumb

View File

@@ -47,6 +47,22 @@ on:
required: false
default: false
type: boolean
nextest_filterset:
required: false
default: "all()"
type: string
partition_tests:
required: false
default: true
type: boolean
require_single_test:
required: false
default: false
type: boolean
run_tests_in_place:
required: false
default: false
type: boolean
# Caller workflow-level env does not flow through workflow_call, so keep the
# Cargo git transport hardening on the archive and shard jobs directly here.
@@ -55,7 +71,7 @@ env:
jobs:
archive:
name: Build nextest archive
name: ${{ inputs.run_tests_in_place && 'Run selected test in place' || 'Build nextest archive' }}
runs-on: ${{ inputs.archive_runner_group != '' && fromJSON(format('{{"group":"{0}","labels":"{1}"}}', inputs.archive_runner_group, inputs.archive_runner_labels)) || inputs.archive_runner != '' && inputs.archive_runner || inputs.runner_group != '' && fromJSON(format('{{"group":"{0}","labels":"{1}"}}', inputs.runner_group, inputs.runner_labels)) || inputs.runner }}
timeout-minutes: 60
defaults:
@@ -186,7 +202,37 @@ jobs:
sudo sysctl -w kernel.apparmor_restrict_unprivileged_userns=0
fi
- name: Resolve selected test in place
if: ${{ inputs.run_tests_in_place }}
shell: bash
run: |
set -euo pipefail
nextest_list_args=(
list
--target ${{ inputs.target }}
--cargo-profile ${{ inputs.profile }}
--filterset "${NEXTEST_FILTERSET}"
--message-format json
)
if [[ "${REQUIRE_SINGLE_TEST}" == "true" ]]; then
test_count="$(
cargo nextest "${nextest_list_args[@]}" |
python3 -c 'import json, sys; test_list = json.load(sys.stdin); print(sum(testcase["filter-match"]["status"] == "matches" for suite in test_list["rust-suites"].values() for testcase in suite["testcases"].values()))'
)"
if [[ "${test_count}" != "1" ]]; then
echo "nextest filterset must resolve to exactly one test; got ${test_count}" >&2
exit 1
fi
else
cargo nextest "${nextest_list_args[@]}" >/dev/null
fi
env:
NEXTEST_FILTERSET: ${{ inputs.nextest_filterset }}
REQUIRE_SINGLE_TEST: ${{ inputs.require_single_test && 'true' || 'false' }}
- name: Build nextest archive
if: ${{ !inputs.run_tests_in_place }}
shell: bash
run: |
set -euo pipefail
@@ -224,6 +270,88 @@ jobs:
cp "target/${{ inputs.target }}/${{ inputs.profile }}/codex-command-runner.exe" "${helper_dir}/"
fi
- name: Set up remote test env (Docker)
if: ${{ inputs.run_tests_in_place && runner.os == 'Linux' && inputs.remote_env }}
shell: bash
run: |
set -euo pipefail
export CODEX_TEST_REMOTE_ENV_CONTAINER_NAME="codex-remote-test-env-${{ github.run_id }}-in-place"
export CODEX_TEST_REMOTE_ENV_CODEX_BINARY_PATH="$(pwd)/target/${{ inputs.target }}/${{ inputs.profile }}/codex"
source "${GITHUB_WORKSPACE}/scripts/test-remote-env.sh"
echo "CODEX_TEST_REMOTE_ENV=${CODEX_TEST_REMOTE_ENV}" >> "$GITHUB_ENV"
echo "CODEX_TEST_REMOTE_EXEC_SERVER_URL=${CODEX_TEST_REMOTE_EXEC_SERVER_URL}" >> "$GITHUB_ENV"
- name: tests
if: ${{ inputs.run_tests_in_place }}
id: test_in_place
shell: bash
run: |
set -euo pipefail
helper_target_dir="$(pwd)/target/${{ inputs.target }}/${{ inputs.profile }}"
nextest_args=(
run
--no-fail-fast
--target ${{ inputs.target }}
--cargo-profile ${{ inputs.profile }}
--filterset "${NEXTEST_FILTERSET}"
)
if [[ "${{ inputs.test_threads }}" != "0" ]]; then
nextest_args+=(--test-threads "${{ inputs.test_threads }}")
fi
test_command=(cargo nextest "${nextest_args[@]}")
if [[ "${RUNNER_OS}" == "Linux" ]]; then
sandbox_helper="${helper_target_dir}/codex-linux-sandbox"
test_command=(
env
"CARGO_BIN_EXE_codex-linux-sandbox=${sandbox_helper}"
"CARGO_BIN_EXE_codex_linux_sandbox=${sandbox_helper}"
cargo nextest "${nextest_args[@]}"
)
elif [[ "${RUNNER_OS}" == "Windows" ]]; then
setup_helper="$(cygpath -w "${helper_target_dir}/codex-windows-sandbox-setup.exe")"
command_runner="$(cygpath -w "${helper_target_dir}/codex-command-runner.exe")"
test_command=(
env
"CARGO_BIN_EXE_codex_windows_sandbox_setup=${setup_helper}"
"CARGO_BIN_EXE_codex_command_runner=${command_runner}"
cargo nextest "${nextest_args[@]}"
)
fi
"${test_command[@]}"
env:
NEXTEST_FILTERSET: ${{ inputs.nextest_filterset }}
RUST_BACKTRACE: 1
RUST_MIN_STACK: "8388608" # 8 MiB
NEXTEST_STATUS_LEVEL: leak
- name: Upload nextest JUnit report
if: ${{ always() && inputs.run_tests_in_place }}
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
with:
name: nextest-junit-rust-ci-${{ inputs.artifact_id }}-shard-0
path: codex-rs/target/nextest/default/junit.xml
if-no-files-found: warn
- name: Tear down remote test env
if: ${{ always() && inputs.run_tests_in_place && runner.os == 'Linux' && inputs.remote_env }}
shell: bash
run: |
set +e
if [[ "${STEPS_TEST_OUTCOME}" != "success" ]]; then
docker logs "${CODEX_TEST_REMOTE_ENV}" || true
fi
docker rm -f "${CODEX_TEST_REMOTE_ENV}" >/dev/null 2>&1 || true
env:
STEPS_TEST_OUTCOME: ${{ steps.test_in_place.outcome }}
- name: verify tests passed
if: ${{ inputs.run_tests_in_place && steps.test_in_place.outcome == 'failure' }}
run: |
echo "Tests failed. See logs for details."
exit 1
- name: Upload Cargo timings (nextest)
if: always()
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
@@ -233,6 +361,7 @@ jobs:
if-no-files-found: warn
- name: Upload nextest archive
if: ${{ !inputs.run_tests_in_place }}
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
with:
name: nextest-archive-${{ inputs.artifact_id }}
@@ -241,7 +370,7 @@ jobs:
retention-days: 1
- name: Upload runtime test helpers
if: ${{ runner.os == 'Linux' || runner.os == 'Windows' }}
if: ${{ !inputs.run_tests_in_place && (runner.os == 'Linux' || runner.os == 'Windows') }}
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
with:
name: ${{ env.TEST_HELPERS_ARTIFACT }}
@@ -287,7 +416,8 @@ jobs:
} >> "$GITHUB_STEP_SUMMARY"
shard:
name: Tests shard ${{ matrix.shard }}/4
if: ${{ !inputs.run_tests_in_place }}
name: Tests ${{ matrix.shard == 0 && 'selected test' || format('shard {0}/4', matrix.shard) }}
needs: archive
runs-on: ${{ inputs.runner_group != '' && fromJSON(format('{{"group":"{0}","labels":"{1}"}}', inputs.runner_group, inputs.runner_labels)) || inputs.runner }}
timeout-minutes: 60
@@ -300,7 +430,7 @@ jobs:
strategy:
fail-fast: false
matrix:
shard: [1, 2, 3, 4]
shard: ${{ inputs.partition_tests && fromJSON('[1, 2, 3, 4]') || fromJSON('[0]') }}
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
@@ -391,12 +521,30 @@ jobs:
--no-fail-fast
--archive-file "${archive_file}"
--workspace-remap "${workspace_root}"
--partition "hash:${{ matrix.shard }}/4"
--filterset "${NEXTEST_FILTERSET}"
)
if [[ "${{ matrix.shard }}" != "0" ]]; then
nextest_args+=(--partition "hash:${{ matrix.shard }}/4")
fi
if [[ "${{ inputs.test_threads }}" != "0" ]]; then
nextest_args+=(--test-threads "${{ inputs.test_threads }}")
fi
if [[ "${REQUIRE_SINGLE_TEST}" == "true" ]]; then
test_count="$(
cargo nextest list \
--archive-file "${archive_file}" \
--workspace-remap "${workspace_root}" \
--filterset "${NEXTEST_FILTERSET}" \
--message-format json |
python3 -c 'import json, sys; test_list = json.load(sys.stdin); print(sum(testcase["filter-match"]["status"] == "matches" for suite in test_list["rust-suites"].values() for testcase in suite["testcases"].values()))'
)"
if [[ "${test_count}" != "1" ]]; then
echo "nextest filterset must resolve to exactly one test; got ${test_count}" >&2
exit 1
fi
fi
test_command=(cargo nextest "${nextest_args[@]}")
if [[ "${RUNNER_OS}" == "Linux" ]]; then
sandbox_helper="${helper_target_dir}/codex-linux-sandbox"
@@ -419,6 +567,8 @@ jobs:
"${test_command[@]}"
env:
NEXTEST_FILTERSET: ${{ inputs.nextest_filterset }}
REQUIRE_SINGLE_TEST: ${{ inputs.require_single_test && 'true' || 'false' }}
RUST_BACKTRACE: 1
RUST_MIN_STACK: "8388608" # 8 MiB
NEXTEST_STATUS_LEVEL: leak
@@ -451,14 +601,15 @@ jobs:
result:
name: Platform result
needs: shard
needs: [archive, shard]
if: always()
runs-on: ubuntu-24.04
steps:
- name: Confirm test shards passed
shell: bash
run: |
if [[ "${{ needs.shard.result }}" != "success" ]]; then
echo "Nextest shards finished with result: ${{ needs.shard.result }}" >&2
result="${{ inputs.run_tests_in_place && needs.archive.result || needs.shard.result }}"
if [[ "${result}" != "success" ]]; then
echo "Nextest tests finished with result: ${result}" >&2
exit 1
fi

View File

@@ -0,0 +1,138 @@
name: rust-ci-full targeted test
on:
workflow_dispatch:
inputs:
platform:
description: Full CI test platform to run.
required: true
type: choice
options:
- macos-aarch64
- linux-x64-remote
- linux-arm64
- windows-x64
- windows-arm64
nextest_filterset:
description: cargo-nextest filterset that must resolve to exactly one test, for example test(=module::test).
required: true
type: string
jobs:
tests_macos_aarch64:
if: ${{ inputs.platform == 'macos-aarch64' }}
name: Targeted test — macos-15-xlarge - aarch64-apple-darwin
uses: ./.github/workflows/rust-ci-full-nextest-platform.yml
with:
runner: macos-15-xlarge
target: aarch64-apple-darwin
profile: ci-test
artifact_id: targeted-macos-aarch64
use_sccache: true
nextest_filterset: ${{ inputs.nextest_filterset }}
partition_tests: false
require_single_test: true
run_tests_in_place: true
secrets: inherit
tests_linux_x64_remote:
if: ${{ inputs.platform == 'linux-x64-remote' }}
name: Targeted test — ubuntu-24.04 - x86_64-unknown-linux-gnu (remote)
uses: ./.github/workflows/rust-ci-full-nextest-platform.yml
with:
runner: ubuntu-24.04
runner_group: codex-runners
runner_labels: codex-linux-x64
target: x86_64-unknown-linux-gnu
profile: ci-test
artifact_id: targeted-linux-x64-remote
remote_env: true
use_sccache: true
nextest_filterset: ${{ inputs.nextest_filterset }}
partition_tests: false
require_single_test: true
run_tests_in_place: true
secrets: inherit
tests_linux_arm64:
if: ${{ inputs.platform == 'linux-arm64' }}
name: Targeted test — ubuntu-24.04-arm - aarch64-unknown-linux-gnu
uses: ./.github/workflows/rust-ci-full-nextest-platform.yml
with:
runner: ubuntu-24.04-arm
runner_group: codex-runners
runner_labels: codex-linux-arm64
target: aarch64-unknown-linux-gnu
profile: ci-test
artifact_id: targeted-linux-arm64
use_sccache: true
nextest_filterset: ${{ inputs.nextest_filterset }}
partition_tests: false
require_single_test: true
run_tests_in_place: true
secrets: inherit
tests_windows_x64:
if: ${{ inputs.platform == 'windows-x64' }}
name: Targeted test — windows-x64 - x86_64-pc-windows-msvc
uses: ./.github/workflows/rust-ci-full-nextest-platform.yml
with:
runner: windows-x64
runner_group: codex-runners
runner_labels: codex-windows-x64
target: x86_64-pc-windows-msvc
profile: ci-test
artifact_id: targeted-windows-x64
test_threads: 8
nextest_filterset: ${{ inputs.nextest_filterset }}
partition_tests: false
require_single_test: true
run_tests_in_place: true
secrets: inherit
tests_windows_arm64:
if: ${{ inputs.platform == 'windows-arm64' }}
name: Targeted test — windows-arm64 - aarch64-pc-windows-msvc
uses: ./.github/workflows/rust-ci-full-nextest-platform.yml
with:
runner: windows-arm64
runner_group: codex-runners
runner_labels: codex-windows-arm64
archive_runner: windows-x64
archive_runner_group: codex-runners
archive_runner_labels: codex-windows-x64
target: aarch64-pc-windows-msvc
profile: ci-test
artifact_id: targeted-windows-arm64
test_threads: 8
use_sccache: true
nextest_filterset: ${{ inputs.nextest_filterset }}
partition_tests: false
require_single_test: true
secrets: inherit
results:
name: Targeted test results
needs:
[
tests_macos_aarch64,
tests_linux_x64_remote,
tests_linux_arm64,
tests_windows_x64,
tests_windows_arm64,
]
if: always()
runs-on: ubuntu-24.04
steps:
- name: Confirm selected targeted test passed
shell: bash
run: |
case '${{ inputs.platform }}' in
macos-aarch64) result='${{ needs.tests_macos_aarch64.result }}' ;;
linux-x64-remote) result='${{ needs.tests_linux_x64_remote.result }}' ;;
linux-arm64) result='${{ needs.tests_linux_arm64.result }}' ;;
windows-x64) result='${{ needs.tests_windows_x64.result }}' ;;
windows-arm64) result='${{ needs.tests_windows_arm64.result }}' ;;
*) echo 'unknown platform'; exit 1 ;;
esac
[[ "${result}" == 'success' ]] || { echo "selected targeted test failed: ${result}"; exit 1; }

View File

@@ -25,7 +25,7 @@ setup_remote_env() {
local remote_exec_server_stdout_path
container_name="${CODEX_TEST_REMOTE_ENV_CONTAINER_NAME:-codex-remote-test-env-local-$(date +%s)-${RANDOM}}"
codex_binary_path="${REPO_ROOT}/codex-rs/target/debug/codex"
codex_binary_path="${CODEX_TEST_REMOTE_ENV_CODEX_BINARY_PATH:-${REPO_ROOT}/codex-rs/target/debug/codex}"
if ! command -v docker >/dev/null 2>&1; then
echo "docker is required (Colima or Docker Desktop)" >&2
@@ -42,10 +42,12 @@ setup_remote_env() {
return 1
fi
(
cd "${REPO_ROOT}/codex-rs"
cargo build -p codex-cli --bin codex
)
if [[ -z "${CODEX_TEST_REMOTE_ENV_CODEX_BINARY_PATH:-}" ]]; then
(
cd "${REPO_ROOT}/codex-rs"
cargo build -p codex-cli --bin codex
)
fi
if [[ ! -f "${codex_binary_path}" ]]; then
echo "codex binary not found at ${codex_binary_path}" >&2