Allow targeted rust CI repros without sharding

Pass repro filterset through env
Add targeted rust CI repro workflow
2026-05-22 12:04:19 +00:00 · 2026-05-19 21:36:03 -07:00 · 2026-05-19 21:06:44 -07:00 · 2026-05-19 21:06:43 -07:00 · 2026-05-19 20:29:44 -07:00
3 changed files with 184 additions and 47 deletions
--- a/.github/workflows/rust-ci-full-nextest-platform.yml
+++ b/.github/workflows/rust-ci-full-nextest-platform.yml
@@ -47,6 +47,18 @@ on:
        required: false
        default: false
        type: boolean
+      nextest_filterset:
+        required: false
+        default: "all()"
+        type: string
+      selected_shard:
+        required: false
+        default: 0
+        type: number
+      repeat_count:
+        required: false
+        default: 1
+        type: number

 # Caller workflow-level env does not flow through workflow_call, so keep the
 # Cargo git transport hardening on the archive and shard jobs directly here.
@@ -76,6 +88,19 @@ jobs:
        with:
          persist-credentials: false

+      - name: Validate targeted test inputs
+        shell: bash
+        run: |
+          set -euo pipefail
+          if (( ${{ inputs.selected_shard }} < 0 || ${{ inputs.selected_shard }} > 4 )); then
+            echo "selected_shard must be between 0 and 4" >&2
+            exit 1
+          fi
+          if (( ${{ inputs.repeat_count }} < 1 )); then
+            echo "repeat_count must be at least 1" >&2
+            exit 1
+          fi
+
      - name: Configure Dev Drive (Windows)
        if: ${{ runner.os == 'Windows' }}
        shell: pwsh
@@ -287,7 +312,7 @@ jobs:
          } >> "$GITHUB_STEP_SUMMARY"

  shard:
-    name: Tests shard ${{ matrix.shard }}/4
+    name: Tests ${{ matrix.shard == 0 && 'all selected tests' || format('shard {0}/4', matrix.shard) }}
    needs: archive
    runs-on: ${{ inputs.runner_group != '' && fromJSON(format('{{"group":"{0}","labels":"{1}"}}', inputs.runner_group, inputs.runner_labels)) || inputs.runner }}
    timeout-minutes: 60
@@ -300,7 +325,7 @@ jobs:
    strategy:
      fail-fast: false
      matrix:
-        shard: [1, 2, 3, 4]
+        shard: ${{ inputs.selected_shard == 0 && fromJSON('[0]') || fromJSON(format('[{0}]', inputs.selected_shard)) }}
    steps:
      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
        with:
@@ -391,8 +416,11 @@ jobs:
            --no-fail-fast
            --archive-file "${archive_file}"
            --workspace-remap "${workspace_root}"
-            --partition "hash:${{ matrix.shard }}/4"
+            --filterset "${NEXTEST_FILTERSET}"
          )
+          if [[ "${{ matrix.shard }}" != "0" ]]; then
+            nextest_args+=(--partition "hash:${{ matrix.shard }}/4")
+          fi
          if [[ "${{ inputs.test_threads }}" != "0" ]]; then
            nextest_args+=(--test-threads "${{ inputs.test_threads }}")
          fi
@@ -417,8 +445,12 @@ jobs:
            )
          fi

-          "${test_command[@]}"
+          for attempt in $(seq 1 "${{ inputs.repeat_count }}"); do
+            echo "nextest attempt ${attempt}/${{ inputs.repeat_count }}"
+            "${test_command[@]}"
+          done
        env:
+          NEXTEST_FILTERSET: ${{ inputs.nextest_filterset }}
          RUST_BACKTRACE: 1
          RUST_MIN_STACK: "8388608" # 8 MiB
          NEXTEST_STATUS_LEVEL: leak
--- a/.github/workflows/rust-ci-full.yml
+++ b/.github/workflows/rust-ci-full.yml
@@ -5,6 +5,34 @@ on:
      - main
      - "**full-ci**"
  workflow_dispatch:
+    inputs:
+      repro_platform:
+        description: Platform lane to run. Use all for the normal full workflow.
+        required: true
+        default: all
+        type: choice
+        options:
+          - all
+          - macos-aarch64
+          - linux-x64-remote
+          - linux-arm64
+          - windows-x64
+          - windows-arm64
+      nextest_filterset:
+        description: cargo-nextest filterset selecting the tests to run.
+        required: true
+        default: all()
+        type: string
+      shard:
+        description: Full-CI shard to reproduce. Use 0 to run selected tests without sharding.
+        required: true
+        default: 0
+        type: number
+      repeat_count:
+        description: Number of times to rerun the selected shard/filterset in one job.
+        required: true
+        default: 1
+        type: number

 # CI builds in debug (dev) for faster signal.
 env:
@@ -16,6 +44,7 @@ env:
 jobs:
  # --- CI that doesn't need specific targets ---------------------------------
  general:
+    if: ${{ github.event_name != 'workflow_dispatch' || github.event.inputs.repro_platform == 'all' }}
    name: Format / etc
    runs-on: ubuntu-24.04
    defaults:
@@ -32,6 +61,7 @@ jobs:
        run: cargo fmt -- --config imports_granularity=Item --check

  cargo_shear:
+    if: ${{ github.event_name != 'workflow_dispatch' || github.event.inputs.repro_platform == 'all' }}
    name: cargo shear
    runs-on: ubuntu-24.04
    defaults:
@@ -49,6 +79,7 @@ jobs:
        run: cargo shear --deny-warnings

  argument_comment_lint_package:
+    if: ${{ github.event_name != 'workflow_dispatch' || github.event.inputs.repro_platform == 'all' }}
    name: Argument comment lint package
    runs-on: ubuntu-24.04
    env:
@@ -90,6 +121,7 @@ jobs:
          RUST_MIN_STACK: "8388608" # 8 MiB

  argument_comment_lint_prebuilt:
+    if: ${{ github.event_name != 'workflow_dispatch' || github.event.inputs.repro_platform == 'all' }}
    name: Argument comment lint - ${{ matrix.name }}
    runs-on: ${{ matrix.runs_on || matrix.runner }}
    timeout-minutes: 30
@@ -149,6 +181,7 @@ jobs:

  # --- CI to validate on different os/targets --------------------------------
  lint_build:
+    if: ${{ github.event_name != 'workflow_dispatch' || github.event.inputs.repro_platform == 'all' }}
    name: Lint/Build — ${{ matrix.runner }} - ${{ matrix.target }}${{ matrix.profile == 'release' && ' (release)' || '' }}
    runs-on: ${{ matrix.runs_on || matrix.runner }}
    timeout-minutes: 30
@@ -522,6 +555,7 @@ jobs:
          key: apt-${{ matrix.runner }}-${{ matrix.target }}-v1

  tests_macos_aarch64:
+    if: ${{ github.event_name != 'workflow_dispatch' || github.event.inputs.repro_platform == 'all' || github.event.inputs.repro_platform == 'macos-aarch64' }}
    name: Tests — macos-15-xlarge - aarch64-apple-darwin
    uses: ./.github/workflows/rust-ci-full-nextest-platform.yml
    with:
@@ -530,9 +564,13 @@ jobs:
      profile: ci-test
      artifact_id: macos-aarch64
      use_sccache: true
+      nextest_filterset: ${{ github.event.inputs.nextest_filterset || 'all()' }}
+      selected_shard: ${{ fromJSON(github.event.inputs.shard || '0') }}
+      repeat_count: ${{ fromJSON(github.event.inputs.repeat_count || '1') }}
    secrets: inherit

  tests_linux_x64_remote:
+    if: ${{ github.event_name != 'workflow_dispatch' || github.event.inputs.repro_platform == 'all' || github.event.inputs.repro_platform == 'linux-x64-remote' }}
    name: Tests — ubuntu-24.04 - x86_64-unknown-linux-gnu (remote)
    uses: ./.github/workflows/rust-ci-full-nextest-platform.yml
    with:
@@ -544,9 +582,13 @@ jobs:
      artifact_id: linux-x64-remote
      remote_env: true
      use_sccache: true
+      nextest_filterset: ${{ github.event.inputs.nextest_filterset || 'all()' }}
+      selected_shard: ${{ fromJSON(github.event.inputs.shard || '0') }}
+      repeat_count: ${{ fromJSON(github.event.inputs.repeat_count || '1') }}
    secrets: inherit

  tests_linux_arm64:
+    if: ${{ github.event_name != 'workflow_dispatch' || github.event.inputs.repro_platform == 'all' || github.event.inputs.repro_platform == 'linux-arm64' }}
    name: Tests — ubuntu-24.04-arm - aarch64-unknown-linux-gnu
    uses: ./.github/workflows/rust-ci-full-nextest-platform.yml
    with:
@@ -557,9 +599,13 @@ jobs:
      profile: ci-test
      artifact_id: linux-arm64
      use_sccache: true
+      nextest_filterset: ${{ github.event.inputs.nextest_filterset || 'all()' }}
+      selected_shard: ${{ fromJSON(github.event.inputs.shard || '0') }}
+      repeat_count: ${{ fromJSON(github.event.inputs.repeat_count || '1') }}
    secrets: inherit

  tests_windows_x64:
+    if: ${{ github.event_name != 'workflow_dispatch' || github.event.inputs.repro_platform == 'all' || github.event.inputs.repro_platform == 'windows-x64' }}
    name: Tests — windows-x64 - x86_64-pc-windows-msvc
    uses: ./.github/workflows/rust-ci-full-nextest-platform.yml
    with:
@@ -570,9 +616,13 @@ jobs:
      profile: ci-test
      artifact_id: windows-x64
      test_threads: 8
+      nextest_filterset: ${{ github.event.inputs.nextest_filterset || 'all()' }}
+      selected_shard: ${{ fromJSON(github.event.inputs.shard || '0') }}
+      repeat_count: ${{ fromJSON(github.event.inputs.repeat_count || '1') }}
    secrets: inherit

  tests_windows_arm64:
+    if: ${{ github.event_name != 'workflow_dispatch' || github.event.inputs.repro_platform == 'all' || github.event.inputs.repro_platform == 'windows-arm64' }}
    name: Tests — windows-arm64 - aarch64-pc-windows-msvc
    uses: ./.github/workflows/rust-ci-full-nextest-platform.yml
    with:
@@ -587,6 +637,9 @@ jobs:
      artifact_id: windows-arm64
      test_threads: 8
      use_sccache: true
+      nextest_filterset: ${{ github.event.inputs.nextest_filterset || 'all()' }}
+      selected_shard: ${{ fromJSON(github.event.inputs.shard || '0') }}
+      repeat_count: ${{ fromJSON(github.event.inputs.repeat_count || '1') }}
    secrets: inherit

  # --- Gatherer job for the full post-merge workflow --------------------------
@@ -621,16 +674,29 @@ jobs:
          echo "test arm64 : ${{ needs.tests_linux_arm64.result }}"
          echo "test winx64: ${{ needs.tests_windows_x64.result }}"
          echo "test winarm: ${{ needs.tests_windows_arm64.result }}"
-          [[ '${{ needs.argument_comment_lint_package.result }}' == 'success' ]] || { echo 'argument_comment_lint_package failed'; exit 1; }
-          [[ '${{ needs.argument_comment_lint_prebuilt.result }}' == 'success' ]] || { echo 'argument_comment_lint_prebuilt failed'; exit 1; }
-          [[ '${{ needs.general.result }}' == 'success' ]] || { echo 'general failed'; exit 1; }
-          [[ '${{ needs.cargo_shear.result }}' == 'success' ]] || { echo 'cargo_shear failed'; exit 1; }
-          [[ '${{ needs.lint_build.result }}' == 'success' ]] || { echo 'lint_build failed'; exit 1; }
-          [[ '${{ needs.tests_macos_aarch64.result }}' == 'success' ]] || { echo 'tests_macos_aarch64 failed'; exit 1; }
-          [[ '${{ needs.tests_linux_x64_remote.result }}' == 'success' ]] || { echo 'tests_linux_x64_remote failed'; exit 1; }
-          [[ '${{ needs.tests_linux_arm64.result }}' == 'success' ]] || { echo 'tests_linux_arm64 failed'; exit 1; }
-          [[ '${{ needs.tests_windows_x64.result }}' == 'success' ]] || { echo 'tests_windows_x64 failed'; exit 1; }
-          [[ '${{ needs.tests_windows_arm64.result }}' == 'success' ]] || { echo 'tests_windows_arm64 failed'; exit 1; }
+          if [[ '${{ github.event_name }}' == 'workflow_dispatch' && '${{ github.event.inputs.repro_platform }}' != 'all' ]]; then
+            selected_result=''
+            case '${{ github.event.inputs.repro_platform }}' in
+              macos-aarch64) selected_result='${{ needs.tests_macos_aarch64.result }}' ;;
+              linux-x64-remote) selected_result='${{ needs.tests_linux_x64_remote.result }}' ;;
+              linux-arm64) selected_result='${{ needs.tests_linux_arm64.result }}' ;;
+              windows-x64) selected_result='${{ needs.tests_windows_x64.result }}' ;;
+              windows-arm64) selected_result='${{ needs.tests_windows_arm64.result }}' ;;
+              *) echo 'unknown repro_platform'; exit 1 ;;
+            esac
+            [[ "${selected_result}" == 'success' ]] || { echo "selected repro platform failed: ${selected_result}"; exit 1; }
+          else
+            [[ '${{ needs.argument_comment_lint_package.result }}' == 'success' ]] || { echo 'argument_comment_lint_package failed'; exit 1; }
+            [[ '${{ needs.argument_comment_lint_prebuilt.result }}' == 'success' ]] || { echo 'argument_comment_lint_prebuilt failed'; exit 1; }
+            [[ '${{ needs.general.result }}' == 'success' ]] || { echo 'general failed'; exit 1; }
+            [[ '${{ needs.cargo_shear.result }}' == 'success' ]] || { echo 'cargo_shear failed'; exit 1; }
+            [[ '${{ needs.lint_build.result }}' == 'success' ]] || { echo 'lint_build failed'; exit 1; }
+            [[ '${{ needs.tests_macos_aarch64.result }}' == 'success' ]] || { echo 'tests_macos_aarch64 failed'; exit 1; }
+            [[ '${{ needs.tests_linux_x64_remote.result }}' == 'success' ]] || { echo 'tests_linux_x64_remote failed'; exit 1; }
+            [[ '${{ needs.tests_linux_arm64.result }}' == 'success' ]] || { echo 'tests_linux_arm64 failed'; exit 1; }
+            [[ '${{ needs.tests_windows_x64.result }}' == 'success' ]] || { echo 'tests_windows_x64 failed'; exit 1; }
+            [[ '${{ needs.tests_windows_arm64.result }}' == 'success' ]] || { echo 'tests_windows_arm64 failed'; exit 1; }
+          fi

      - name: sccache summary note
        if: always()
--- a/codex-rs/exec/tests/suite/resume.rs
+++ b/codex-rs/exec/tests/suite/resume.rs
@@ -234,12 +234,12 @@ async fn exec_resume_last_accepts_prompt_after_flag_in_json_mode() -> anyhow::Re
 }

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-async fn exec_resume_last_respects_cwd_filter_and_all_flag() -> anyhow::Result<()> {
+async fn exec_resume_last_all_ignores_cwd_filter() -> anyhow::Result<()> {
    skip_if_no_network!(Ok(()));

    let test = test_codex_exec();
    let server = MockServer::start().await;
-    let _response_mock = mount_exec_responses(&server, /*count*/ 5).await;
+    let _response_mock = mount_exec_responses(&server, /*count*/ 3).await;

    let dir_a = TempDir::new()?;
    let dir_b = TempDir::new()?;
@@ -254,6 +254,10 @@ async fn exec_resume_last_respects_cwd_filter_and_all_flag() -> anyhow::Result<(
        .assert()
        .success();

+    // `updated_at` is second-granularity, so ensure thread B is created in a later second than
+    // thread A on fast CI (especially Windows).
+    std::thread::sleep(std::time::Duration::from_millis(1100));
+
    let marker_b = format!("resume-cwd-b-{}", Uuid::new_v4());
    let prompt_b = format!("echo {marker_b}");
    test.cmd_with_server(&server)
@@ -270,29 +274,6 @@ async fn exec_resume_last_respects_cwd_filter_and_all_flag() -> anyhow::Result<(
    let path_b = find_session_file_containing_marker(&sessions_dir, &marker_b)
        .expect("no session file found for marker_b");

-    // `updated_at` is second-granularity, so ensure the touch lands in a later second
-    // than the initial session creation on fast CI (especially Windows).
-    std::thread::sleep(std::time::Duration::from_millis(1100));
-
-    // Make thread B deterministically newest according to rollout metadata.
-    let session_id_b = extract_conversation_id(&path_b);
-    let marker_b_touch = format!("resume-cwd-b-touch-{}", Uuid::new_v4());
-    let prompt_b_touch = format!("echo {marker_b_touch}");
-    test.cmd_with_server(&server)
-        .arg("--skip-git-repo-check")
-        .arg("-C")
-        .arg(dir_b.path())
-        .arg("resume")
-        .arg(&session_id_b)
-        .arg(&prompt_b_touch)
-        .assert()
-        .success();
-
-    // `resume --last` sorts by `updated_at`, which is second-granularity. Sleep so
-    // the upcoming `resume --last --all` write lands in a later second and becomes
-    // deterministically newest (instead of tying and falling back to UUID order).
-    std::thread::sleep(std::time::Duration::from_millis(1100));
-
    let marker_b2 = format!("resume-cwd-b-2-{}", Uuid::new_v4());
    let prompt_b2 = format!("echo {marker_b2}");
    test.cmd_with_server(&server)
@@ -313,27 +294,85 @@ async fn exec_resume_last_respects_cwd_filter_and_all_flag() -> anyhow::Result<(
        "resume --last --all should pick newest session"
    );

-    let marker_a2 = format!("resume-cwd-a-2-{}", Uuid::new_v4());
-    let prompt_a2 = format!("echo {marker_a2}");
+    Ok(())
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn exec_resume_last_prefers_latest_matching_cwd() -> anyhow::Result<()> {
+    skip_if_no_network!(Ok(()));
+
+    let test = test_codex_exec();
+    let server = MockServer::start().await;
+    let _response_mock = mount_exec_responses(&server, /*count*/ 4).await;
+
+    let dir_a = TempDir::new()?;
+    let dir_b = TempDir::new()?;
+
+    let marker_a = format!("resume-cwd-a-{}", Uuid::new_v4());
+    let prompt_a = format!("echo {marker_a}");
+    test.cmd_with_server(&server)
+        .arg("--skip-git-repo-check")
+        .arg("-C")
+        .arg(dir_a.path())
+        .arg(&prompt_a)
+        .assert()
+        .success();
+
+    // `updated_at` is second-granularity, so ensure thread B is created in a later second than
+    // thread A on fast CI (especially Windows).
+    std::thread::sleep(std::time::Duration::from_millis(1100));
+
+    let marker_b = format!("resume-cwd-b-{}", Uuid::new_v4());
+    let prompt_b = format!("echo {marker_b}");
+    test.cmd_with_server(&server)
+        .arg("--skip-git-repo-check")
+        .arg("-C")
+        .arg(dir_b.path())
+        .arg(&prompt_b)
+        .assert()
+        .success();
+
+    let sessions_dir = test.home_path().join("sessions");
+    let path_a = find_session_file_containing_marker(&sessions_dir, &marker_a)
+        .expect("no session file found for marker_a");
+    let path_b = find_session_file_containing_marker(&sessions_dir, &marker_b)
+        .expect("no session file found for marker_b");
+
+    let session_id_b = extract_conversation_id(&path_b);
+    let marker_b_touch = format!("resume-cwd-b-touch-{}", Uuid::new_v4());
+    let prompt_b_touch = format!("echo {marker_b_touch}");
+    test.cmd_with_server(&server)
+        .arg("--skip-git-repo-check")
+        .arg("-C")
+        .arg(dir_a.path())
+        .arg("resume")
+        .arg(&session_id_b)
+        .arg(&prompt_b_touch)
+        .assert()
+        .success();
+
+    let marker_c = format!("resume-cwd-c-{}", Uuid::new_v4());
+    let prompt_c = format!("echo {marker_c}");
    test.cmd_with_server(&server)
        .arg("--skip-git-repo-check")
        .arg("-C")
        .arg(dir_a.path())
        .arg("resume")
        .arg("--last")
-        .arg(&prompt_a2)
+        .arg(&prompt_c)
        .assert()
        .success();

-    let resumed_path_cwd = find_session_file_containing_marker(&sessions_dir, &marker_a2)
-        .expect("no resumed session file containing marker_a2");
-    // The `--all` resume above appends a new turn to `path_b` while running from `dir_a`, so the
-    // session's latest cwd now matches `dir_a`. A subsequent `resume --last` should therefore pick
-    // the newest matching session (`path_b`).
+    let resumed_path_cwd = find_session_file_containing_marker(&sessions_dir, &marker_c)
+        .expect("no resumed session file containing marker_c");
    assert_eq!(
        resumed_path_cwd, path_b,
        "resume --last should prefer sessions whose latest turn context matches the current cwd"
    );
+    assert_ne!(
+        resumed_path_cwd, path_a,
+        "resume --last should not fall back to the older matching cwd session"
+    );

    Ok(())
 }
Author	SHA1	Message	Date
starr-openai	5ccd682e2d	Allow targeted rust CI repros without sharding	2026-05-19 21:36:03 -07:00
starr-openai	8cab6a6dfe	Pass repro filterset through env	2026-05-19 21:06:44 -07:00
starr-openai	acc894d2fd	Add targeted rust CI repro workflow	2026-05-19 21:06:43 -07:00
starr-openai	ce7ce756c4	Split slow exec resume cwd test	2026-05-19 20:29:44 -07:00