Pass repro filterset through env

Add targeted rust CI repro workflow
Preserve unified exec startup rejections
2026-05-23 12:34:25 +00:00 · 2026-05-19 21:06:18 -07:00 · 2026-05-19 21:06:18 -07:00 · 2026-05-19 20:29:33 -07:00 · 2026-05-19 20:29:33 -07:00
5 changed files with 284 additions and 12 deletions
--- a/.github/workflows/rust-ci-full-nextest-platform.yml
+++ b/.github/workflows/rust-ci-full-nextest-platform.yml
@@ -47,6 +47,18 @@ on:
        required: false
        default: false
        type: boolean
+      nextest_filterset:
+        required: false
+        default: "all()"
+        type: string
+      selected_shard:
+        required: false
+        default: 0
+        type: number
+      repeat_count:
+        required: false
+        default: 1
+        type: number

 # Caller workflow-level env does not flow through workflow_call, so keep the
 # Cargo git transport hardening on the archive and shard jobs directly here.
@@ -76,6 +88,19 @@ jobs:
        with:
          persist-credentials: false

+      - name: Validate targeted test inputs
+        shell: bash
+        run: |
+          set -euo pipefail
+          if (( ${{ inputs.selected_shard }} < 0 || ${{ inputs.selected_shard }} > 4 )); then
+            echo "selected_shard must be between 0 and 4" >&2
+            exit 1
+          fi
+          if (( ${{ inputs.repeat_count }} < 1 )); then
+            echo "repeat_count must be at least 1" >&2
+            exit 1
+          fi
+
      - name: Configure Dev Drive (Windows)
        if: ${{ runner.os == 'Windows' }}
        shell: pwsh
@@ -300,7 +325,7 @@ jobs:
    strategy:
      fail-fast: false
      matrix:
-        shard: [1, 2, 3, 4]
+        shard: ${{ inputs.selected_shard == 0 && fromJSON('[1,2,3,4]') || fromJSON(format('[{0}]', inputs.selected_shard)) }}
    steps:
      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
        with:
@@ -392,6 +417,7 @@ jobs:
            --archive-file "${archive_file}"
            --workspace-remap "${workspace_root}"
            --partition "hash:${{ matrix.shard }}/4"
+            --filterset "${NEXTEST_FILTERSET}"
          )
          if [[ "${{ inputs.test_threads }}" != "0" ]]; then
            nextest_args+=(--test-threads "${{ inputs.test_threads }}")
@@ -417,8 +443,12 @@ jobs:
            )
          fi

-          "${test_command[@]}"
+          for attempt in $(seq 1 "${{ inputs.repeat_count }}"); do
+            echo "nextest attempt ${attempt}/${{ inputs.repeat_count }}"
+            "${test_command[@]}"
+          done
        env:
+          NEXTEST_FILTERSET: ${{ inputs.nextest_filterset }}
          RUST_BACKTRACE: 1
          RUST_MIN_STACK: "8388608" # 8 MiB
          NEXTEST_STATUS_LEVEL: leak
--- a/.github/workflows/rust-ci-full.yml
+++ b/.github/workflows/rust-ci-full.yml
@@ -5,6 +5,34 @@ on:
      - main
      - "**full-ci**"
  workflow_dispatch:
+    inputs:
+      repro_platform:
+        description: Platform lane to run. Use all for the normal full workflow.
+        required: true
+        default: all
+        type: choice
+        options:
+          - all
+          - macos-aarch64
+          - linux-x64-remote
+          - linux-arm64
+          - windows-x64
+          - windows-arm64
+      nextest_filterset:
+        description: cargo-nextest filterset selecting the tests to run.
+        required: true
+        default: all()
+        type: string
+      shard:
+        description: Full-CI shard to reproduce. Use 0 to run all four shards.
+        required: true
+        default: 0
+        type: number
+      repeat_count:
+        description: Number of times to rerun the selected shard/filterset in one job.
+        required: true
+        default: 1
+        type: number

 # CI builds in debug (dev) for faster signal.
 env:
@@ -16,6 +44,7 @@ env:
 jobs:
  # --- CI that doesn't need specific targets ---------------------------------
  general:
+    if: ${{ github.event_name != 'workflow_dispatch' || github.event.inputs.repro_platform == 'all' }}
    name: Format / etc
    runs-on: ubuntu-24.04
    defaults:
@@ -32,6 +61,7 @@ jobs:
        run: cargo fmt -- --config imports_granularity=Item --check

  cargo_shear:
+    if: ${{ github.event_name != 'workflow_dispatch' || github.event.inputs.repro_platform == 'all' }}
    name: cargo shear
    runs-on: ubuntu-24.04
    defaults:
@@ -49,6 +79,7 @@ jobs:
        run: cargo shear --deny-warnings

  argument_comment_lint_package:
+    if: ${{ github.event_name != 'workflow_dispatch' || github.event.inputs.repro_platform == 'all' }}
    name: Argument comment lint package
    runs-on: ubuntu-24.04
    env:
@@ -90,6 +121,7 @@ jobs:
          RUST_MIN_STACK: "8388608" # 8 MiB

  argument_comment_lint_prebuilt:
+    if: ${{ github.event_name != 'workflow_dispatch' || github.event.inputs.repro_platform == 'all' }}
    name: Argument comment lint - ${{ matrix.name }}
    runs-on: ${{ matrix.runs_on || matrix.runner }}
    timeout-minutes: 30
@@ -149,6 +181,7 @@ jobs:

  # --- CI to validate on different os/targets --------------------------------
  lint_build:
+    if: ${{ github.event_name != 'workflow_dispatch' || github.event.inputs.repro_platform == 'all' }}
    name: Lint/Build — ${{ matrix.runner }} - ${{ matrix.target }}${{ matrix.profile == 'release' && ' (release)' || '' }}
    runs-on: ${{ matrix.runs_on || matrix.runner }}
    timeout-minutes: 30
@@ -522,6 +555,7 @@ jobs:
          key: apt-${{ matrix.runner }}-${{ matrix.target }}-v1

  tests_macos_aarch64:
+    if: ${{ github.event_name != 'workflow_dispatch' || github.event.inputs.repro_platform == 'all' || github.event.inputs.repro_platform == 'macos-aarch64' }}
    name: Tests — macos-15-xlarge - aarch64-apple-darwin
    uses: ./.github/workflows/rust-ci-full-nextest-platform.yml
    with:
@@ -530,9 +564,13 @@ jobs:
      profile: ci-test
      artifact_id: macos-aarch64
      use_sccache: true
+      nextest_filterset: ${{ github.event.inputs.nextest_filterset || 'all()' }}
+      selected_shard: ${{ fromJSON(github.event.inputs.shard || '0') }}
+      repeat_count: ${{ fromJSON(github.event.inputs.repeat_count || '1') }}
    secrets: inherit

  tests_linux_x64_remote:
+    if: ${{ github.event_name != 'workflow_dispatch' || github.event.inputs.repro_platform == 'all' || github.event.inputs.repro_platform == 'linux-x64-remote' }}
    name: Tests — ubuntu-24.04 - x86_64-unknown-linux-gnu (remote)
    uses: ./.github/workflows/rust-ci-full-nextest-platform.yml
    with:
@@ -544,9 +582,13 @@ jobs:
      artifact_id: linux-x64-remote
      remote_env: true
      use_sccache: true
+      nextest_filterset: ${{ github.event.inputs.nextest_filterset || 'all()' }}
+      selected_shard: ${{ fromJSON(github.event.inputs.shard || '0') }}
+      repeat_count: ${{ fromJSON(github.event.inputs.repeat_count || '1') }}
    secrets: inherit

  tests_linux_arm64:
+    if: ${{ github.event_name != 'workflow_dispatch' || github.event.inputs.repro_platform == 'all' || github.event.inputs.repro_platform == 'linux-arm64' }}
    name: Tests — ubuntu-24.04-arm - aarch64-unknown-linux-gnu
    uses: ./.github/workflows/rust-ci-full-nextest-platform.yml
    with:
@@ -557,9 +599,13 @@ jobs:
      profile: ci-test
      artifact_id: linux-arm64
      use_sccache: true
+      nextest_filterset: ${{ github.event.inputs.nextest_filterset || 'all()' }}
+      selected_shard: ${{ fromJSON(github.event.inputs.shard || '0') }}
+      repeat_count: ${{ fromJSON(github.event.inputs.repeat_count || '1') }}
    secrets: inherit

  tests_windows_x64:
+    if: ${{ github.event_name != 'workflow_dispatch' || github.event.inputs.repro_platform == 'all' || github.event.inputs.repro_platform == 'windows-x64' }}
    name: Tests — windows-x64 - x86_64-pc-windows-msvc
    uses: ./.github/workflows/rust-ci-full-nextest-platform.yml
    with:
@@ -570,9 +616,13 @@ jobs:
      profile: ci-test
      artifact_id: windows-x64
      test_threads: 8
+      nextest_filterset: ${{ github.event.inputs.nextest_filterset || 'all()' }}
+      selected_shard: ${{ fromJSON(github.event.inputs.shard || '0') }}
+      repeat_count: ${{ fromJSON(github.event.inputs.repeat_count || '1') }}
    secrets: inherit

  tests_windows_arm64:
+    if: ${{ github.event_name != 'workflow_dispatch' || github.event.inputs.repro_platform == 'all' || github.event.inputs.repro_platform == 'windows-arm64' }}
    name: Tests — windows-arm64 - aarch64-pc-windows-msvc
    uses: ./.github/workflows/rust-ci-full-nextest-platform.yml
    with:
@@ -587,6 +637,9 @@ jobs:
      artifact_id: windows-arm64
      test_threads: 8
      use_sccache: true
+      nextest_filterset: ${{ github.event.inputs.nextest_filterset || 'all()' }}
+      selected_shard: ${{ fromJSON(github.event.inputs.shard || '0') }}
+      repeat_count: ${{ fromJSON(github.event.inputs.repeat_count || '1') }}
    secrets: inherit

  # --- Gatherer job for the full post-merge workflow --------------------------
@@ -621,16 +674,29 @@ jobs:
          echo "test arm64 : ${{ needs.tests_linux_arm64.result }}"
          echo "test winx64: ${{ needs.tests_windows_x64.result }}"
          echo "test winarm: ${{ needs.tests_windows_arm64.result }}"
-          [[ '${{ needs.argument_comment_lint_package.result }}' == 'success' ]] || { echo 'argument_comment_lint_package failed'; exit 1; }
-          [[ '${{ needs.argument_comment_lint_prebuilt.result }}' == 'success' ]] || { echo 'argument_comment_lint_prebuilt failed'; exit 1; }
-          [[ '${{ needs.general.result }}' == 'success' ]] || { echo 'general failed'; exit 1; }
-          [[ '${{ needs.cargo_shear.result }}' == 'success' ]] || { echo 'cargo_shear failed'; exit 1; }
-          [[ '${{ needs.lint_build.result }}' == 'success' ]] || { echo 'lint_build failed'; exit 1; }
-          [[ '${{ needs.tests_macos_aarch64.result }}' == 'success' ]] || { echo 'tests_macos_aarch64 failed'; exit 1; }
-          [[ '${{ needs.tests_linux_x64_remote.result }}' == 'success' ]] || { echo 'tests_linux_x64_remote failed'; exit 1; }
-          [[ '${{ needs.tests_linux_arm64.result }}' == 'success' ]] || { echo 'tests_linux_arm64 failed'; exit 1; }
-          [[ '${{ needs.tests_windows_x64.result }}' == 'success' ]] || { echo 'tests_windows_x64 failed'; exit 1; }
-          [[ '${{ needs.tests_windows_arm64.result }}' == 'success' ]] || { echo 'tests_windows_arm64 failed'; exit 1; }
+          if [[ '${{ github.event_name }}' == 'workflow_dispatch' && '${{ github.event.inputs.repro_platform }}' != 'all' ]]; then
+            selected_result=''
+            case '${{ github.event.inputs.repro_platform }}' in
+              macos-aarch64) selected_result='${{ needs.tests_macos_aarch64.result }}' ;;
+              linux-x64-remote) selected_result='${{ needs.tests_linux_x64_remote.result }}' ;;
+              linux-arm64) selected_result='${{ needs.tests_linux_arm64.result }}' ;;
+              windows-x64) selected_result='${{ needs.tests_windows_x64.result }}' ;;
+              windows-arm64) selected_result='${{ needs.tests_windows_arm64.result }}' ;;
+              *) echo 'unknown repro_platform'; exit 1 ;;
+            esac
+            [[ "${selected_result}" == 'success' ]] || { echo "selected repro platform failed: ${selected_result}"; exit 1; }
+          else
+            [[ '${{ needs.argument_comment_lint_package.result }}' == 'success' ]] || { echo 'argument_comment_lint_package failed'; exit 1; }
+            [[ '${{ needs.argument_comment_lint_prebuilt.result }}' == 'success' ]] || { echo 'argument_comment_lint_prebuilt failed'; exit 1; }
+            [[ '${{ needs.general.result }}' == 'success' ]] || { echo 'general failed'; exit 1; }
+            [[ '${{ needs.cargo_shear.result }}' == 'success' ]] || { echo 'cargo_shear failed'; exit 1; }
+            [[ '${{ needs.lint_build.result }}' == 'success' ]] || { echo 'lint_build failed'; exit 1; }
+            [[ '${{ needs.tests_macos_aarch64.result }}' == 'success' ]] || { echo 'tests_macos_aarch64 failed'; exit 1; }
+            [[ '${{ needs.tests_linux_x64_remote.result }}' == 'success' ]] || { echo 'tests_linux_x64_remote failed'; exit 1; }
+            [[ '${{ needs.tests_linux_arm64.result }}' == 'success' ]] || { echo 'tests_linux_arm64 failed'; exit 1; }
+            [[ '${{ needs.tests_windows_x64.result }}' == 'success' ]] || { echo 'tests_windows_x64 failed'; exit 1; }
+            [[ '${{ needs.tests_windows_arm64.result }}' == 'success' ]] || { echo 'tests_windows_arm64 failed'; exit 1; }
+          fi

      - name: sccache summary note
        if: always()
--- a/codex-rs/core/src/unified_exec/errors.rs
+++ b/codex-rs/core/src/unified_exec/errors.rs
@@ -23,6 +23,8 @@ pub(crate) enum UnifiedExecError {
        message: String,
        output: ExecToolCallOutput,
    },
+    #[error("Command rejected: {message}")]
+    Rejected { message: String },
 }

 impl UnifiedExecError {
@@ -37,4 +39,8 @@ impl UnifiedExecError {
    pub(crate) fn sandbox_denied(message: String, output: ExecToolCallOutput) -> Self {
        Self::SandboxDenied { message, output }
    }
+
+    pub(crate) fn rejected(message: String) -> Self {
+        Self::Rejected { message }
+    }
 }
--- a/codex-rs/core/src/unified_exec/process_manager.rs
+++ b/codex-rs/core/src/unified_exec/process_manager.rs
@@ -19,6 +19,7 @@ use crate::sandboxing::ExecServerEnvConfig;
 use crate::tools::context::ExecCommandToolOutput;
 use crate::tools::events::ToolEmitter;
 use crate::tools::events::ToolEventCtx;
+use crate::tools::events::ToolEventFailure;
 use crate::tools::events::ToolEventStage;
 use crate::tools::network_approval::DeferredNetworkApproval;
 use crate::tools::network_approval::finish_deferred_network_approval;
@@ -381,6 +382,25 @@ impl UnifiedExecProcessManager {
                (Arc::new(process), deferred_network_approval)
            }
            Err(err) => {
+                let event_ctx = ToolEventCtx::new(
+                    context.session.as_ref(),
+                    context.turn.as_ref(),
+                    &context.call_id,
+                    /*turn_diff_tracker*/ None,
+                );
+                let emitter = ToolEmitter::unified_exec(
+                    &request.command,
+                    cwd.clone(),
+                    ExecCommandSource::UnifiedExecStartup,
+                    Some(request.process_id.to_string()),
+                );
+                emitter.emit(event_ctx, ToolEventStage::Begin).await;
+                emitter
+                    .emit(
+                        event_ctx,
+                        ToolEventStage::Failure(startup_failure_event(&err)),
+                    )
+                    .await;
                self.release_process_id(request.process_id).await;
                return Err(err);
            }
@@ -1084,6 +1104,7 @@ impl UnifiedExecProcessManager {
                    };
                    UnifiedExecError::sandbox_denied(message, output)
                }
+                ToolError::Rejected(message) => UnifiedExecError::rejected(message),
                other => UnifiedExecError::create_process(format!("{other:?}")),
            })
    }
@@ -1279,6 +1300,17 @@ impl UnifiedExecProcessManager {
    }
 }

+fn startup_failure_event(err: &UnifiedExecError) -> ToolEventFailure<'_> {
+    match err {
+        UnifiedExecError::SandboxDenied { output, .. } => ToolEventFailure::Output(output.clone()),
+        UnifiedExecError::Rejected { message } => ToolEventFailure::Rejected {
+            message: message.clone(),
+            applied_patch_delta: None,
+        },
+        _ => ToolEventFailure::Message(format!("execution error: {err:?}")),
+    }
+}
+
 enum ProcessStatus {
    Alive {
        exit_code: Option<i32>,
--- a/codex-rs/core/src/unified_exec/process_manager_tests.rs
+++ b/codex-rs/core/src/unified_exec/process_manager_tests.rs
@@ -1,8 +1,95 @@
 use super::*;
+use crate::session::turn_context::TurnContext;
+use codex_protocol::exec_output::ExecToolCallOutput;
+use codex_protocol::exec_output::StreamOutput;
 use pretty_assertions::assert_eq;
 use tokio::time::Duration;
 use tokio::time::Instant;

+fn startup_failure_test_request(turn: &TurnContext, process_id: i32) -> ExecCommandRequest {
+    ExecCommandRequest {
+        command: Vec::new(),
+        shell_type: crate::shell::ShellType::Sh,
+        hook_command: String::new(),
+        process_id,
+        yield_time_ms: 1000,
+        max_output_tokens: None,
+        #[allow(deprecated)]
+        cwd: turn.cwd.clone(),
+        #[allow(deprecated)]
+        sandbox_cwd: turn.cwd.clone(),
+        environment: turn
+            .environments
+            .primary_environment()
+            .expect("primary environment"),
+        network: None,
+        tty: true,
+        sandbox_permissions: crate::sandboxing::SandboxPermissions::UseDefault,
+        additional_permissions: None,
+        additional_permissions_preapproved: false,
+        justification: None,
+        prefix_rule: None,
+    }
+}
+
+async fn emit_startup_failure_end_event(
+    call_id: &str,
+    err: &UnifiedExecError,
+) -> codex_protocol::protocol::ExecCommandEndEvent {
+    let (session, turn, rx_event) = crate::session::tests::make_session_and_context_with_rx().await;
+    let context =
+        UnifiedExecContext::new(Arc::clone(&session), Arc::clone(&turn), call_id.to_string());
+    let process_id = session
+        .services
+        .unified_exec_manager
+        .allocate_process_id()
+        .await;
+    let request = startup_failure_test_request(turn.as_ref(), process_id);
+    let selected_cwd = turn
+        .environments
+        .primary()
+        .expect("primary turn environment")
+        .cwd
+        .clone();
+    let event_ctx = ToolEventCtx::new(
+        context.session.as_ref(),
+        context.turn.as_ref(),
+        &context.call_id,
+        /*turn_diff_tracker*/ None,
+    );
+    let emitter = ToolEmitter::unified_exec(
+        &request.command,
+        selected_cwd,
+        ExecCommandSource::UnifiedExecStartup,
+        Some(request.process_id.to_string()),
+    );
+    emitter.emit(event_ctx, ToolEventStage::Begin).await;
+    emitter
+        .emit(
+            event_ctx,
+            ToolEventStage::Failure(startup_failure_event(err)),
+        )
+        .await;
+
+    let begin_event = tokio::time::timeout(Duration::from_secs(1), rx_event.recv())
+        .await
+        .expect("timed out waiting for begin event")
+        .expect("event channel closed");
+    let codex_protocol::protocol::EventMsg::ExecCommandBegin(begin_event) = begin_event.msg else {
+        panic!("expected ExecCommandBegin event");
+    };
+    assert_eq!(begin_event.call_id, call_id);
+
+    let end_event = tokio::time::timeout(Duration::from_secs(1), rx_event.recv())
+        .await
+        .expect("timed out waiting for end event")
+        .expect("event channel closed");
+    let codex_protocol::protocol::EventMsg::ExecCommandEnd(end_event) = end_event.msg else {
+        panic!("expected ExecCommandEnd event");
+    };
+    end_event
+}
+
 #[test]
 fn unified_exec_env_injects_defaults() {
    let env = apply_unified_exec_env(HashMap::new());
@@ -226,6 +313,57 @@ async fn failed_initial_end_for_unstored_process_uses_fallback_output() {
    );
 }

+#[tokio::test]
+async fn startup_create_process_failure_emits_begin_then_failed_end() {
+    let err = crate::unified_exec::UnifiedExecError::create_process(
+        "missing command line for PTY".to_string(),
+    );
+    let end_event = emit_startup_failure_end_event("call-unified-empty", &err).await;
+    assert_eq!(end_event.call_id, "call-unified-empty");
+    assert_eq!(
+        end_event.status,
+        codex_protocol::protocol::ExecCommandStatus::Failed
+    );
+}
+
+#[tokio::test]
+async fn startup_sandbox_denied_failure_preserves_captured_output() {
+    let err = crate::unified_exec::UnifiedExecError::sandbox_denied(
+        "sandbox denied".to_string(),
+        ExecToolCallOutput {
+            exit_code: 13,
+            stderr: StreamOutput::new("stderr marker".to_string()),
+            aggregated_output: StreamOutput::new("captured denial output".to_string()),
+            ..Default::default()
+        },
+    );
+
+    let end_event = emit_startup_failure_end_event("call-unified-sandbox-denied", &err).await;
+
+    assert_eq!(end_event.call_id, "call-unified-sandbox-denied");
+    assert_eq!(
+        end_event.status,
+        codex_protocol::protocol::ExecCommandStatus::Failed
+    );
+    assert_eq!(end_event.exit_code, 13);
+    assert_eq!(end_event.stderr, "stderr marker");
+    assert_eq!(end_event.aggregated_output, "captured denial output");
+}
+
+#[tokio::test]
+async fn startup_rejected_failure_emits_declined_end() {
+    let err = crate::unified_exec::UnifiedExecError::rejected("rejected by user".to_string());
+
+    let end_event = emit_startup_failure_end_event("call-unified-rejected", &err).await;
+
+    assert_eq!(end_event.call_id, "call-unified-rejected");
+    assert_eq!(
+        end_event.status,
+        codex_protocol::protocol::ExecCommandStatus::Declined
+    );
+    assert_eq!(end_event.aggregated_output, "rejected by user");
+}
+
 #[test]
 fn pruning_prefers_exited_processes_outside_recently_used() {
    let now = Instant::now();
Author	SHA1	Message	Date
starr-openai	b4353b970a	Pass repro filterset through env	2026-05-19 21:06:18 -07:00
starr-openai	dfa682ccd7	Add targeted rust CI repro workflow	2026-05-19 21:06:18 -07:00
starr-openai	a96805c3a6	Preserve unified exec startup rejections	2026-05-19 20:29:33 -07:00
starr-openai	6afa5e57ca	Emit unified exec end on startup failure	2026-05-19 20:29:33 -07:00