Implement user shell command review

2026-04-24 22:54:54 +00:00 · 2026-02-03 13:51:04 +00:00
parent 88598b9402
commit 8622225ae8
4 changed files with 384 additions and 177 deletions
--- a/codex-rs/core/src/codex.rs
+++ b/codex-rs/core/src/codex.rs
@@ -1513,6 +1513,17 @@ impl Session {
            .map(|task| Arc::clone(&task.turn_context))
    }

+    async fn active_turn_context_and_cancellation_token(
+        &self,
+    ) -> Option<(Arc<TurnContext>, CancellationToken)> {
+        let active = self.active_turn.lock().await;
+        let (_, task) = active.as_ref()?.tasks.first()?;
+        Some((
+            Arc::clone(&task.turn_context),
+            task.cancellation_token.child_token(),
+        ))
+    }
+
    pub(crate) async fn record_execpolicy_amendment_message(
        &self,
        sub_id: &str,
@@ -2528,7 +2539,9 @@ mod handlers {
    use crate::tasks::CompactTask;
    use crate::tasks::RegularTask;
    use crate::tasks::UndoTask;
+    use crate::tasks::UserShellCommandMode;
    use crate::tasks::UserShellCommandTask;
+    use crate::tasks::execute_user_shell_command;
    use codex_protocol::custom_prompts::CustomPrompt;
    use codex_protocol::protocol::CodexErrorInfo;
    use codex_protocol::protocol::ErrorEvent;
@@ -2671,6 +2684,23 @@ mod handlers {
        command: String,
        previous_context: &mut Option<Arc<TurnContext>>,
    ) {
+        if let Some((turn_context, cancellation_token)) =
+            sess.active_turn_context_and_cancellation_token().await
+        {
+            let session = Arc::clone(sess);
+            tokio::spawn(async move {
+                execute_user_shell_command(
+                    session,
+                    turn_context,
+                    command,
+                    cancellation_token,
+                    UserShellCommandMode::ActiveTurnAuxiliary,
+                )
+                .await;
+            });
+            return;
+        }
+
        let turn_context = sess.new_default_turn_with_sub_id(sub_id).await;
        sess.spawn_task(
            Arc::clone(&turn_context),
--- a/codex-rs/core/src/tasks/mod.rs
+++ b/codex-rs/core/src/tasks/mod.rs
@@ -40,7 +40,9 @@ pub(crate) use ghost_snapshot::GhostSnapshotTask;
 pub(crate) use regular::RegularTask;
 pub(crate) use review::ReviewTask;
 pub(crate) use undo::UndoTask;
+pub(crate) use user_shell::UserShellCommandMode;
 pub(crate) use user_shell::UserShellCommandTask;
+pub(crate) use user_shell::execute_user_shell_command;

 const GRACEFULL_INTERRUPTION_TIMEOUT_MS: u64 = 100;
 const TURN_ABORTED_INTERRUPTED_GUIDANCE: &str = "The user interrupted the previous turn on purpose. If any tools/commands were aborted, they may have partially executed; verify current state before retrying.";
--- a/codex-rs/core/src/tasks/user_shell.rs
+++ b/codex-rs/core/src/tasks/user_shell.rs
@@ -32,9 +32,22 @@ use crate::user_shell_command::user_shell_command_record_item;

 use super::SessionTask;
 use super::SessionTaskContext;
+use crate::codex::Session;
+use codex_protocol::models::ResponseInputItem;
+use codex_protocol::models::ResponseItem;

 const USER_SHELL_TIMEOUT_MS: u64 = 60 * 60 * 1000; // 1 hour

+#[derive(Clone, Copy, Debug, Eq, PartialEq)]
+pub(crate) enum UserShellCommandMode {
+    /// Executes as an independent turn lifecycle (emits TurnStarted/TurnComplete
+    /// via task lifecycle plumbing).
+    StandaloneTurn,
+    /// Executes while another turn is already active. This mode must not emit a
+    /// second TurnStarted/TurnComplete pair for the same active turn.
+    ActiveTurnAuxiliary,
+}
+
 #[derive(Clone)]
 pub(crate) struct UserShellCommandTask {
    command: String,
@@ -59,195 +72,243 @@ impl SessionTask for UserShellCommandTask {
        _input: Vec<UserInput>,
        cancellation_token: CancellationToken,
    ) -> Option<String> {
-        let _ = session
-            .session
-            .services
-            .otel_manager
-            .counter("codex.task.user_shell", 1, &[]);
+        execute_user_shell_command(
+            session.clone_session(),
+            turn_context,
+            self.command.clone(),
+            cancellation_token,
+            UserShellCommandMode::StandaloneTurn,
+        )
+        .await;
+        None
+    }
+}

+pub(crate) async fn execute_user_shell_command(
+    session: Arc<Session>,
+    turn_context: Arc<TurnContext>,
+    command: String,
+    cancellation_token: CancellationToken,
+    mode: UserShellCommandMode,
+) {
+    session
+        .services
+        .otel_manager
+        .counter("codex.task.user_shell", 1, &[]);
+
+    if mode == UserShellCommandMode::StandaloneTurn {
+        // Auxiliary mode runs within an existing active turn. That turn already
+        // emitted TurnStarted, so emitting another TurnStarted here would create
+        // duplicate turn lifecycle events and confuse clients.
        let event = EventMsg::TurnStarted(TurnStartedEvent {
            model_context_window: turn_context.client.get_model_context_window(),
            collaboration_mode_kind: turn_context.collaboration_mode.mode,
        });
-        let session = session.clone_session();
        session.send_event(turn_context.as_ref(), event).await;
+    }

-        // Execute the user's script under their default shell when known; this
-        // allows commands that use shell features (pipes, &&, redirects, etc.).
-        // We do not source rc files or otherwise reformat the script.
-        let use_login_shell = true;
-        let session_shell = session.user_shell();
-        let display_command = session_shell.derive_exec_args(&self.command, use_login_shell);
-        let exec_command =
-            maybe_wrap_shell_lc_with_snapshot(&display_command, session_shell.as_ref());
+    // Execute the user's script under their default shell when known; this
+    // allows commands that use shell features (pipes, &&, redirects, etc.).
+    // We do not source rc files or otherwise reformat the script.
+    let use_login_shell = true;
+    let session_shell = session.user_shell();
+    let display_command = session_shell.derive_exec_args(&command, use_login_shell);
+    let exec_command = maybe_wrap_shell_lc_with_snapshot(&display_command, session_shell.as_ref());

-        let call_id = Uuid::new_v4().to_string();
-        let raw_command = self.command.clone();
-        let cwd = turn_context.cwd.clone();
+    let call_id = Uuid::new_v4().to_string();
+    let raw_command = command;
+    let cwd = turn_context.cwd.clone();

-        let parsed_cmd = parse_command(&display_command);
-        session
-            .send_event(
+    let parsed_cmd = parse_command(&display_command);
+    session
+        .send_event(
+            turn_context.as_ref(),
+            EventMsg::ExecCommandBegin(ExecCommandBeginEvent {
+                call_id: call_id.clone(),
+                process_id: None,
+                turn_id: turn_context.sub_id.clone(),
+                command: display_command.clone(),
+                cwd: cwd.clone(),
+                parsed_cmd: parsed_cmd.clone(),
+                source: ExecCommandSource::UserShell,
+                interaction_input: None,
+            }),
+        )
+        .await;
+
+    let exec_env = ExecEnv {
+        command: exec_command.clone(),
+        cwd: cwd.clone(),
+        env: create_env(&turn_context.shell_environment_policy),
+        // TODO(zhao-oai): Now that we have ExecExpiration::Cancellation, we
+        // should use that instead of an "arbitrarily large" timeout here.
+        expiration: USER_SHELL_TIMEOUT_MS.into(),
+        sandbox: SandboxType::None,
+        windows_sandbox_level: turn_context.windows_sandbox_level,
+        sandbox_permissions: SandboxPermissions::UseDefault,
+        justification: None,
+        arg0: None,
+    };
+
+    let stdout_stream = Some(StdoutStream {
+        sub_id: turn_context.sub_id.clone(),
+        call_id: call_id.clone(),
+        tx_event: session.get_tx_event(),
+    });
+
+    let sandbox_policy = SandboxPolicy::DangerFullAccess;
+    let exec_result = execute_exec_env(exec_env, &sandbox_policy, stdout_stream)
+        .or_cancel(&cancellation_token)
+        .await;
+
+    match exec_result {
+        Err(CancelErr::Cancelled) => {
+            let aborted_message = "command aborted by user".to_string();
+            let exec_output = ExecToolCallOutput {
+                exit_code: -1,
+                stdout: StreamOutput::new(String::new()),
+                stderr: StreamOutput::new(aborted_message.clone()),
+                aggregated_output: StreamOutput::new(aborted_message.clone()),
+                duration: Duration::ZERO,
+                timed_out: false,
+            };
+            persist_user_shell_output(
+                &session,
                turn_context.as_ref(),
-                EventMsg::ExecCommandBegin(ExecCommandBeginEvent {
-                    call_id: call_id.clone(),
-                    process_id: None,
-                    turn_id: turn_context.sub_id.clone(),
-                    command: display_command.clone(),
-                    cwd: cwd.clone(),
-                    parsed_cmd: parsed_cmd.clone(),
-                    source: ExecCommandSource::UserShell,
-                    interaction_input: None,
-                }),
+                &raw_command,
+                &exec_output,
+                mode,
+            )
+            .await;
+            session
+                .send_event(
+                    turn_context.as_ref(),
+                    EventMsg::ExecCommandEnd(ExecCommandEndEvent {
+                        call_id,
+                        process_id: None,
+                        turn_id: turn_context.sub_id.clone(),
+                        command: display_command.clone(),
+                        cwd: cwd.clone(),
+                        parsed_cmd: parsed_cmd.clone(),
+                        source: ExecCommandSource::UserShell,
+                        interaction_input: None,
+                        stdout: String::new(),
+                        stderr: aborted_message.clone(),
+                        aggregated_output: aborted_message.clone(),
+                        exit_code: -1,
+                        duration: Duration::ZERO,
+                        formatted_output: aborted_message,
+                    }),
+                )
+                .await;
+        }
+        Ok(Ok(output)) => {
+            session
+                .send_event(
+                    turn_context.as_ref(),
+                    EventMsg::ExecCommandEnd(ExecCommandEndEvent {
+                        call_id: call_id.clone(),
+                        process_id: None,
+                        turn_id: turn_context.sub_id.clone(),
+                        command: display_command.clone(),
+                        cwd: cwd.clone(),
+                        parsed_cmd: parsed_cmd.clone(),
+                        source: ExecCommandSource::UserShell,
+                        interaction_input: None,
+                        stdout: output.stdout.text.clone(),
+                        stderr: output.stderr.text.clone(),
+                        aggregated_output: output.aggregated_output.text.clone(),
+                        exit_code: output.exit_code,
+                        duration: output.duration,
+                        formatted_output: format_exec_output_str(
+                            &output,
+                            turn_context.truncation_policy,
+                        ),
+                    }),
+                )
+                .await;
+
+            persist_user_shell_output(&session, turn_context.as_ref(), &raw_command, &output, mode)
+                .await;
+        }
+        Ok(Err(err)) => {
+            error!("user shell command failed: {err:?}");
+            let message = format!("execution error: {err:?}");
+            let exec_output = ExecToolCallOutput {
+                exit_code: -1,
+                stdout: StreamOutput::new(String::new()),
+                stderr: StreamOutput::new(message.clone()),
+                aggregated_output: StreamOutput::new(message.clone()),
+                duration: Duration::ZERO,
+                timed_out: false,
+            };
+            session
+                .send_event(
+                    turn_context.as_ref(),
+                    EventMsg::ExecCommandEnd(ExecCommandEndEvent {
+                        call_id,
+                        process_id: None,
+                        turn_id: turn_context.sub_id.clone(),
+                        command: display_command,
+                        cwd,
+                        parsed_cmd,
+                        source: ExecCommandSource::UserShell,
+                        interaction_input: None,
+                        stdout: exec_output.stdout.text.clone(),
+                        stderr: exec_output.stderr.text.clone(),
+                        aggregated_output: exec_output.aggregated_output.text.clone(),
+                        exit_code: exec_output.exit_code,
+                        duration: exec_output.duration,
+                        formatted_output: format_exec_output_str(
+                            &exec_output,
+                            turn_context.truncation_policy,
+                        ),
+                    }),
+                )
+                .await;
+            persist_user_shell_output(
+                &session,
+                turn_context.as_ref(),
+                &raw_command,
+                &exec_output,
+                mode,
            )
            .await;
-
-        let exec_env = ExecEnv {
-            command: exec_command.clone(),
-            cwd: cwd.clone(),
-            env: create_env(&turn_context.shell_environment_policy),
-            // TODO(zhao-oai): Now that we have ExecExpiration::Cancellation, we
-            // should use that instead of an "arbitrarily large" timeout here.
-            expiration: USER_SHELL_TIMEOUT_MS.into(),
-            sandbox: SandboxType::None,
-            windows_sandbox_level: turn_context.windows_sandbox_level,
-            sandbox_permissions: SandboxPermissions::UseDefault,
-            justification: None,
-            arg0: None,
-        };
-
-        let stdout_stream = Some(StdoutStream {
-            sub_id: turn_context.sub_id.clone(),
-            call_id: call_id.clone(),
-            tx_event: session.get_tx_event(),
-        });
-
-        let sandbox_policy = SandboxPolicy::DangerFullAccess;
-        let exec_result = execute_exec_env(exec_env, &sandbox_policy, stdout_stream)
-            .or_cancel(&cancellation_token)
-            .await;
-
-        match exec_result {
-            Err(CancelErr::Cancelled) => {
-                let aborted_message = "command aborted by user".to_string();
-                let exec_output = ExecToolCallOutput {
-                    exit_code: -1,
-                    stdout: StreamOutput::new(String::new()),
-                    stderr: StreamOutput::new(aborted_message.clone()),
-                    aggregated_output: StreamOutput::new(aborted_message.clone()),
-                    duration: Duration::ZERO,
-                    timed_out: false,
-                };
-                let output_items = [user_shell_command_record_item(
-                    &raw_command,
-                    &exec_output,
-                    &turn_context,
-                )];
-                session
-                    .record_conversation_items(turn_context.as_ref(), &output_items)
-                    .await;
-                session
-                    .send_event(
-                        turn_context.as_ref(),
-                        EventMsg::ExecCommandEnd(ExecCommandEndEvent {
-                            call_id,
-                            process_id: None,
-                            turn_id: turn_context.sub_id.clone(),
-                            command: display_command.clone(),
-                            cwd: cwd.clone(),
-                            parsed_cmd: parsed_cmd.clone(),
-                            source: ExecCommandSource::UserShell,
-                            interaction_input: None,
-                            stdout: String::new(),
-                            stderr: aborted_message.clone(),
-                            aggregated_output: aborted_message.clone(),
-                            exit_code: -1,
-                            duration: Duration::ZERO,
-                            formatted_output: aborted_message,
-                        }),
-                    )
-                    .await;
-            }
-            Ok(Ok(output)) => {
-                session
-                    .send_event(
-                        turn_context.as_ref(),
-                        EventMsg::ExecCommandEnd(ExecCommandEndEvent {
-                            call_id: call_id.clone(),
-                            process_id: None,
-                            turn_id: turn_context.sub_id.clone(),
-                            command: display_command.clone(),
-                            cwd: cwd.clone(),
-                            parsed_cmd: parsed_cmd.clone(),
-                            source: ExecCommandSource::UserShell,
-                            interaction_input: None,
-                            stdout: output.stdout.text.clone(),
-                            stderr: output.stderr.text.clone(),
-                            aggregated_output: output.aggregated_output.text.clone(),
-                            exit_code: output.exit_code,
-                            duration: output.duration,
-                            formatted_output: format_exec_output_str(
-                                &output,
-                                turn_context.truncation_policy,
-                            ),
-                        }),
-                    )
-                    .await;
-
-                let output_items = [user_shell_command_record_item(
-                    &raw_command,
-                    &output,
-                    &turn_context,
-                )];
-                session
-                    .record_conversation_items(turn_context.as_ref(), &output_items)
-                    .await;
-            }
-            Ok(Err(err)) => {
-                error!("user shell command failed: {err:?}");
-                let message = format!("execution error: {err:?}");
-                let exec_output = ExecToolCallOutput {
-                    exit_code: -1,
-                    stdout: StreamOutput::new(String::new()),
-                    stderr: StreamOutput::new(message.clone()),
-                    aggregated_output: StreamOutput::new(message.clone()),
-                    duration: Duration::ZERO,
-                    timed_out: false,
-                };
-                session
-                    .send_event(
-                        turn_context.as_ref(),
-                        EventMsg::ExecCommandEnd(ExecCommandEndEvent {
-                            call_id,
-                            process_id: None,
-                            turn_id: turn_context.sub_id.clone(),
-                            command: display_command,
-                            cwd,
-                            parsed_cmd,
-                            source: ExecCommandSource::UserShell,
-                            interaction_input: None,
-                            stdout: exec_output.stdout.text.clone(),
-                            stderr: exec_output.stderr.text.clone(),
-                            aggregated_output: exec_output.aggregated_output.text.clone(),
-                            exit_code: exec_output.exit_code,
-                            duration: exec_output.duration,
-                            formatted_output: format_exec_output_str(
-                                &exec_output,
-                                turn_context.truncation_policy,
-                            ),
-                        }),
-                    )
-                    .await;
-                let output_items = [user_shell_command_record_item(
-                    &raw_command,
-                    &exec_output,
-                    &turn_context,
-                )];
-                session
-                    .record_conversation_items(turn_context.as_ref(), &output_items)
-                    .await;
-            }
        }
-        None
+    }
+}
+
+async fn persist_user_shell_output(
+    session: &Session,
+    turn_context: &TurnContext,
+    raw_command: &str,
+    exec_output: &ExecToolCallOutput,
+    mode: UserShellCommandMode,
+) {
+    let output_item = user_shell_command_record_item(raw_command, exec_output, turn_context);
+
+    if mode == UserShellCommandMode::StandaloneTurn {
+        session
+            .record_conversation_items(turn_context, std::slice::from_ref(&output_item))
+            .await;
+        return;
+    }
+
+    let response_input_item = match output_item {
+        ResponseItem::Message { role, content, .. } => ResponseInputItem::Message { role, content },
+        _ => unreachable!("user shell command output record should always be a message"),
+    };
+
+    if let Err(items) = session
+        .inject_response_items(vec![response_input_item])
+        .await
+    {
+        let response_items = items
+            .into_iter()
+            .map(ResponseItem::from)
+            .collect::<Vec<_>>();
+        session
+            .record_conversation_items(turn_context, &response_items)
+            .await;
    }
 }
--- a/codex-rs/core/tests/suite/user_shell_cmd.rs
+++ b/codex-rs/core/tests/suite/user_shell_cmd.rs
@@ -1,5 +1,6 @@
 use anyhow::Context;
 use codex_core::features::Feature;
+use codex_core::protocol::AskForApproval;
 use codex_core::protocol::EventMsg;
 use codex_core::protocol::ExecCommandEndEvent;
 use codex_core::protocol::ExecCommandSource;
@@ -7,6 +8,8 @@ use codex_core::protocol::ExecOutputStream;
 use codex_core::protocol::Op;
 use codex_core::protocol::SandboxPolicy;
 use codex_core::protocol::TurnAbortReason;
+use codex_protocol::config_types::ReasoningSummary;
+use codex_protocol::user_input::UserInput;
 use core_test_support::assert_regex_match;
 use core_test_support::responses;
 use core_test_support::responses::ev_assistant_message;
@@ -23,6 +26,8 @@ use core_test_support::wait_for_event_match;
 use regex_lite::escape;
 use std::path::PathBuf;
 use tempfile::TempDir;
+use tokio::time::Duration;
+use tokio::time::timeout;

 #[tokio::test]
 async fn user_shell_cmd_ls_and_cat_in_temp_dir() {
@@ -119,6 +124,115 @@ async fn user_shell_cmd_can_be_interrupted() {
    assert_eq!(ev.reason, TurnAbortReason::Interrupted);
 }

+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn user_shell_command_does_not_replace_active_turn() -> anyhow::Result<()> {
+    let server = start_mock_server().await;
+    let mut builder = test_codex().with_model("gpt-5.1");
+    let fixture = builder.build(&server).await?;
+
+    let call_id = "active-turn-shell-call";
+    let args = if cfg!(windows) {
+        serde_json::json!({
+            "command": "Start-Sleep -Seconds 2; Write-Output model-shell",
+            "timeout_ms": 10_000,
+        })
+    } else {
+        serde_json::json!({
+            "command": "sleep 2; echo model-shell",
+            "timeout_ms": 10_000,
+        })
+    };
+    let first = sse(vec![
+        ev_response_created("resp-1"),
+        ev_function_call(call_id, "shell_command", &serde_json::to_string(&args)?),
+        ev_completed("resp-1"),
+    ]);
+    let second = sse(vec![
+        ev_assistant_message("msg-1", "done"),
+        ev_completed("resp-2"),
+    ]);
+    let mock = responses::mount_sse_sequence(&server, vec![first, second]).await;
+
+    fixture
+        .codex
+        .submit(Op::UserTurn {
+            items: vec![UserInput::Text {
+                text: "run model shell command".to_string(),
+                text_elements: Vec::new(),
+            }],
+            final_output_json_schema: None,
+            cwd: fixture.cwd.path().to_path_buf(),
+            approval_policy: AskForApproval::Never,
+            sandbox_policy: SandboxPolicy::DangerFullAccess,
+            model: fixture.session_configured.model.clone(),
+            effort: None,
+            summary: ReasoningSummary::Auto,
+            collaboration_mode: None,
+            personality: None,
+        })
+        .await?;
+
+    let _ = wait_for_event_match(&fixture.codex, |ev| match ev {
+        EventMsg::ExecCommandBegin(event) if event.source == ExecCommandSource::Agent => {
+            Some(event.clone())
+        }
+        _ => None,
+    })
+    .await;
+
+    #[cfg(windows)]
+    let user_shell_command = "Write-Output user-shell".to_string();
+    #[cfg(not(windows))]
+    let user_shell_command = "printf user-shell".to_string();
+    fixture
+        .codex
+        .submit(Op::RunUserShellCommand {
+            command: user_shell_command,
+        })
+        .await?;
+
+    let mut saw_replaced_abort = false;
+    let mut saw_user_shell_end = false;
+    let mut saw_turn_complete = false;
+    for _ in 0..200 {
+        let event = timeout(Duration::from_secs(20), fixture.codex.next_event())
+            .await
+            .context("timed out waiting for event")?
+            .context("event stream ended unexpectedly")?;
+        match event.msg {
+            EventMsg::TurnAborted(ev) if ev.reason == TurnAbortReason::Replaced => {
+                saw_replaced_abort = true;
+            }
+            EventMsg::ExecCommandEnd(ev) if ev.source == ExecCommandSource::UserShell => {
+                saw_user_shell_end = true;
+            }
+            EventMsg::TurnComplete(_) => {
+                saw_turn_complete = true;
+                break;
+            }
+            _ => {}
+        }
+    }
+
+    assert!(saw_turn_complete, "expected turn to complete");
+    assert!(
+        saw_user_shell_end,
+        "expected user shell command to finish while turn was active"
+    );
+    assert!(
+        !saw_replaced_abort,
+        "user shell command should not replace the active turn"
+    );
+
+    assert_eq!(
+        mock.requests().len(),
+        2,
+        "active turn should continue and issue the follow-up model request"
+    );
+
+    Ok(())
+}
+
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn user_shell_command_history_is_persisted_and_shared_with_model() -> anyhow::Result<()> {
    let server = responses::start_mock_server().await;