Stabilize zsh fork app-server tests (#13872)

## What changed - `turn_start_shell_zsh_fork_executes_command_v2` now keeps the shell command alive with a file marker until the interrupt arrives instead of using a command that can finish too quickly. - `turn_start_shell_zsh_fork_subcommand_decline_marks_parent_declined_v2` now waits for `turn/completed` before sending a fallback interrupt and accepts the real terminal outcomes observed across platforms. ## Why this fixes the flake - The original tests assumed a narrow ordering window: the child command would still be running when the interrupt happened, and completion would always arrive in one specific order. - In CI, especially across different shells and runner speeds, those assumptions break. Sometimes the child finishes before the interrupt; sometimes the protocol completes while the fallback path is still arming itself. - Holding the command open until the interrupt and waiting for the explicit protocol completion event makes the tests synchronize on the behavior under test instead of on wall-clock timing. ## Scope - Test-only change.
2026-04-24 14:45:27 +00:00 · 2026-03-09 09:38:16 -07:00
parent 3f1280ce1c
commit 615ed0e437
1 changed files with 53 additions and 12 deletions
--- a/codex-rs/app-server/tests/suite/v2/turn_start_zsh_fork.rs
+++ b/codex-rs/app-server/tests/suite/v2/turn_start_zsh_fork.rs
@@ -54,6 +54,7 @@ async fn turn_start_shell_zsh_fork_executes_command_v2() -> Result<()> {
    std::fs::create_dir(&codex_home)?;
    let workspace = tmp.path().join("workspace");
    std::fs::create_dir(&workspace)?;
+    let release_marker = workspace.join("interrupt-release");

    let Some(zsh_path) = find_test_zsh_path()? else {
        eprintln!("skipping zsh fork test: no zsh executable found");
@@ -61,8 +62,15 @@ async fn turn_start_shell_zsh_fork_executes_command_v2() -> Result<()> {
    };
    eprintln!("using zsh path for zsh-fork test: {}", zsh_path.display());

+    // Keep the shell command in flight until we interrupt it. A fast command
+    // like `echo hi` can finish before the interrupt arrives on faster runners,
+    // which turns this into a test for post-command follow-up behavior instead
+    // of interrupting an active zsh-fork command.
+    let release_marker_escaped = release_marker.to_string_lossy().replace('\'', r#"'\''"#);
+    let wait_for_interrupt =
+        format!("while [ ! -f '{release_marker_escaped}' ]; do sleep 0.01; done");
    let response = create_shell_command_sse_response(
-        vec!["echo".to_string(), "hi".to_string()],
+        vec!["/bin/sh".to_string(), "-c".to_string(), wait_for_interrupt],
        None,
        Some(5000),
        "call-zsh-fork",
@@ -155,7 +163,9 @@ async fn turn_start_shell_zsh_fork_executes_command_v2() -> Result<()> {
    assert_eq!(id, "call-zsh-fork");
    assert_eq!(status, CommandExecutionStatus::InProgress);
    assert!(command.starts_with(&zsh_path.display().to_string()));
-    assert!(command.contains(" -lc 'echo hi'"));
+    assert!(command.contains("/bin/sh -c"));
+    assert!(command.contains("sleep 0.01"));
+    assert!(command.contains(&release_marker.display().to_string()));
    assert_eq!(cwd, workspace);

    mcp.interrupt_turn_and_wait_for_aborted(thread.id, turn.id, DEFAULT_READ_TIMEOUT)
@@ -663,22 +673,50 @@ async fn turn_start_shell_zsh_fork_subcommand_decline_marks_parent_declined_v2()
            };
            assert_eq!(id, "call-zsh-fork-subcommand-decline");
            assert_eq!(status, CommandExecutionStatus::Declined);
-            assert!(
-                aggregated_output.is_none()
-                    || aggregated_output == Some("exec command rejected by user".to_string())
-            );
+            if let Some(output) = aggregated_output.as_deref() {
+                assert!(
+                    output == "exec command rejected by user"
+                        || output.contains("sandbox denied exec error"),
+                    "unexpected aggregated output: {output}"
+                );
+            }

-            mcp.interrupt_turn_and_wait_for_aborted(
-                thread.id.clone(),
-                turn.id.clone(),
+            match timeout(
                DEFAULT_READ_TIMEOUT,
+                mcp.read_stream_until_notification_message("turn/completed"),
            )
-            .await?;
+            .await
+            {
+                Ok(Ok(completed_notif)) => {
+                    let completed: TurnCompletedNotification = serde_json::from_value(
+                        completed_notif
+                            .params
+                            .expect("turn/completed params must be present"),
+                    )?;
+                    assert_eq!(completed.thread_id, thread.id);
+                    assert_eq!(completed.turn.id, turn.id);
+                    assert!(matches!(
+                        completed.turn.status,
+                        TurnStatus::Interrupted | TurnStatus::Completed
+                    ));
+                }
+                Ok(Err(error)) => return Err(error),
+                Err(_) => {
+                    mcp.interrupt_turn_and_wait_for_aborted(
+                        thread.id.clone(),
+                        turn.id.clone(),
+                        DEFAULT_READ_TIMEOUT,
+                    )
+                    .await?;
+                }
+            }
        }
        Ok(Err(error)) => return Err(error),
        Err(_) => {
            // Some zsh builds abort the turn immediately after the rejected
-            // subcommand without emitting a parent `item/completed`.
+            // subcommand without emitting a parent `item/completed`, and Linux
+            // sandbox failures can also complete the turn before the parent
+            // completion item is observed.
            let completed_notif = timeout(
                DEFAULT_READ_TIMEOUT,
                mcp.read_stream_until_notification_message("turn/completed"),
@@ -691,7 +729,10 @@ async fn turn_start_shell_zsh_fork_subcommand_decline_marks_parent_declined_v2()
            )?;
            assert_eq!(completed.thread_id, thread.id);
            assert_eq!(completed.turn.id, turn.id);
-            assert_eq!(completed.turn.status, TurnStatus::Interrupted);
+            assert!(matches!(
+                completed.turn.status,
+                TurnStatus::Interrupted | TurnStatus::Completed
+            ));
        }
    }