Files
codex/prs/bolinfest/PR-1908.md
2025-09-02 15:17:45 -07:00

351 lines
12 KiB
Markdown
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# PR #1908: Ensure exec command end always emitted
- URL: https://github.com/openai/codex/pull/1908
- Author: aibrahim-oai
- Created: 2025-08-07 01:08:48 UTC
- Updated: 2025-08-07 06:26:05 UTC
- Changes: +120/-73, Files changed: 2, Commits: 12
## Description
## Summary
- defer ExecCommandEnd emission until after sandbox resolution
- make sandbox error handler return final exec output and response
- align sandbox error stderr with response content and rename to `final_output`
- replace unstable `let` chains in client command header logic
## Testing
- `just fmt`
- `just fix`
- `cargo test --all-features` *(fails: NotPresent in core/tests/client.rs)*
------
https://chatgpt.com/codex/tasks/task_i_6893e63b0c408321a8e1ff2a052c4c51
## Full Diff
```diff
diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs
index 98d13b4cd6..4a4faa84ee 100644
--- a/codex-rs/core/src/codex.rs
+++ b/codex-rs/core/src/codex.rs
@@ -46,6 +46,7 @@ use crate::conversation_history::ConversationHistory;
use crate::error::CodexErr;
use crate::error::Result as CodexResult;
use crate::error::SandboxErr;
+use crate::error::get_error_message_ui;
use crate::exec::ExecParams;
use crate::exec::ExecToolCallOutput;
use crate::exec::SandboxType;
@@ -468,6 +469,57 @@ impl Session {
}
}
}
+ /// Runs the exec tool call and emits events for the begin and end of the
+ /// command even on error.
+ ///
+ /// Returns the output of the exec tool call.
+ async fn run_exec_with_events<'a>(
+ &self,
+ turn_diff_tracker: &mut TurnDiffTracker,
+ begin_ctx: ExecCommandContext,
+ exec_args: ExecInvokeArgs<'a>,
+ ) -> crate::error::Result<ExecToolCallOutput> {
+ let is_apply_patch = begin_ctx.apply_patch.is_some();
+ let sub_id = begin_ctx.sub_id.clone();
+ let call_id = begin_ctx.call_id.clone();
+
+ self.on_exec_command_begin(turn_diff_tracker, begin_ctx.clone())
+ .await;
+
+ let result = process_exec_tool_call(
+ exec_args.params,
+ exec_args.sandbox_type,
+ exec_args.ctrl_c,
+ exec_args.sandbox_policy,
+ exec_args.codex_linux_sandbox_exe,
+ exec_args.stdout_stream,
+ )
+ .await;
+
+ let output_stderr;
+ let borrowed: &ExecToolCallOutput = match &result {
+ Ok(output) => output,
+ Err(e) => {
+ output_stderr = ExecToolCallOutput {
+ exit_code: -1,
+ stdout: String::new(),
+ stderr: get_error_message_ui(e),
+ duration: Duration::default(),
+ };
+ &output_stderr
+ }
+ };
+ self.on_exec_command_end(
+ turn_diff_tracker,
+ &sub_id,
+ &call_id,
+ borrowed,
+ is_apply_patch,
+ )
+ .await;
+
+ result
+ }
/// Helper that emits a BackgroundEvent with the given message. This keeps
/// the callsites terse so adding more diagnostics does not clutter the
@@ -1717,6 +1769,15 @@ fn parse_container_exec_arguments(
}
}
+pub struct ExecInvokeArgs<'a> {
+ pub params: ExecParams,
+ pub sandbox_type: SandboxType,
+ pub ctrl_c: Arc<Notify>,
+ pub sandbox_policy: &'a SandboxPolicy,
+ pub codex_linux_sandbox_exe: &'a Option<PathBuf>,
+ pub stdout_stream: Option<StdoutStream>,
+}
+
fn maybe_run_with_user_profile(params: ExecParams, sess: &Session) -> ExecParams {
if sess.shell_environment_policy.use_profile {
let command = sess
@@ -1887,23 +1948,26 @@ async fn handle_container_exec_with_params(
},
),
};
- sess.on_exec_command_begin(turn_diff_tracker, exec_command_context.clone())
- .await;
let params = maybe_run_with_user_profile(params, sess);
- let output_result = process_exec_tool_call(
- params.clone(),
- sandbox_type,
- sess.ctrl_c.clone(),
- &sess.sandbox_policy,
- &sess.codex_linux_sandbox_exe,
- Some(StdoutStream {
- sub_id: sub_id.clone(),
- call_id: call_id.clone(),
- tx_event: sess.tx_event.clone(),
- }),
- )
- .await;
+ let output_result = sess
+ .run_exec_with_events(
+ turn_diff_tracker,
+ exec_command_context.clone(),
+ ExecInvokeArgs {
+ params: params.clone(),
+ sandbox_type,
+ ctrl_c: sess.ctrl_c.clone(),
+ sandbox_policy: &sess.sandbox_policy,
+ codex_linux_sandbox_exe: &sess.codex_linux_sandbox_exe,
+ stdout_stream: Some(StdoutStream {
+ sub_id: sub_id.clone(),
+ call_id: call_id.clone(),
+ tx_event: sess.tx_event.clone(),
+ }),
+ },
+ )
+ .await;
match output_result {
Ok(output) => {
@@ -1914,24 +1978,14 @@ async fn handle_container_exec_with_params(
duration,
} = &output;
- sess.on_exec_command_end(
- turn_diff_tracker,
- &sub_id,
- &call_id,
- &output,
- exec_command_context.apply_patch.is_some(),
- )
- .await;
-
let is_success = *exit_code == 0;
let content = format_exec_output(
if is_success { stdout } else { stderr },
*exit_code,
*duration,
);
-
ResponseInputItem::FunctionCallOutput {
- call_id,
+ call_id: call_id.clone(),
output: FunctionCallOutputPayload {
content,
success: Some(is_success),
@@ -1949,16 +2003,13 @@ async fn handle_container_exec_with_params(
)
.await
}
- Err(e) => {
- // Handle non-sandbox errors
- ResponseInputItem::FunctionCallOutput {
- call_id,
- output: FunctionCallOutputPayload {
- content: format!("execution error: {e}"),
- success: None,
- },
- }
- }
+ Err(e) => ResponseInputItem::FunctionCallOutput {
+ call_id: call_id.clone(),
+ output: FunctionCallOutputPayload {
+ content: format!("execution error: {e}"),
+ success: None,
+ },
+ },
}
}
@@ -1973,7 +2024,6 @@ async fn handle_sandbox_error(
let call_id = exec_command_context.call_id.clone();
let sub_id = exec_command_context.sub_id.clone();
let cwd = exec_command_context.cwd.clone();
- let is_apply_patch = exec_command_context.apply_patch.is_some();
// Early out if either the user never wants to be asked for approval, or
// we're letting the model manage escalation requests. Otherwise, continue
@@ -2039,24 +2089,26 @@ async fn handle_sandbox_error(
sess.notify_background_event(&sub_id, "retrying command without sandbox")
.await;
- sess.on_exec_command_begin(turn_diff_tracker, exec_command_context)
- .await;
-
// This is an escalated retry; the policy will not be
// examined and the sandbox has been set to `None`.
- let retry_output_result = process_exec_tool_call(
- params,
- SandboxType::None,
- sess.ctrl_c.clone(),
- &sess.sandbox_policy,
- &sess.codex_linux_sandbox_exe,
- Some(StdoutStream {
- sub_id: sub_id.clone(),
- call_id: call_id.clone(),
- tx_event: sess.tx_event.clone(),
- }),
- )
- .await;
+ let retry_output_result = sess
+ .run_exec_with_events(
+ turn_diff_tracker,
+ exec_command_context.clone(),
+ ExecInvokeArgs {
+ params,
+ sandbox_type: SandboxType::None,
+ ctrl_c: sess.ctrl_c.clone(),
+ sandbox_policy: &sess.sandbox_policy,
+ codex_linux_sandbox_exe: &sess.codex_linux_sandbox_exe,
+ stdout_stream: Some(StdoutStream {
+ sub_id: sub_id.clone(),
+ call_id: call_id.clone(),
+ tx_event: sess.tx_event.clone(),
+ }),
+ },
+ )
+ .await;
match retry_output_result {
Ok(retry_output) => {
@@ -2067,15 +2119,6 @@ async fn handle_sandbox_error(
duration,
} = &retry_output;
- sess.on_exec_command_end(
- turn_diff_tracker,
- &sub_id,
- &call_id,
- &retry_output,
- is_apply_patch,
- )
- .await;
-
let is_success = *exit_code == 0;
let content = format_exec_output(
if is_success { stdout } else { stderr },
@@ -2084,23 +2127,20 @@ async fn handle_sandbox_error(
);
ResponseInputItem::FunctionCallOutput {
- call_id,
+ call_id: call_id.clone(),
output: FunctionCallOutputPayload {
content,
success: Some(is_success),
},
}
}
- Err(e) => {
- // Handle retry failure
- ResponseInputItem::FunctionCallOutput {
- call_id,
- output: FunctionCallOutputPayload {
- content: format!("retry failed: {e}"),
- success: None,
- },
- }
- }
+ Err(e) => ResponseInputItem::FunctionCallOutput {
+ call_id: call_id.clone(),
+ output: FunctionCallOutputPayload {
+ content: format!("retry failed: {e}"),
+ success: None,
+ },
+ },
}
}
ReviewDecision::Denied | ReviewDecision::Abort => {
diff --git a/codex-rs/core/src/error.rs b/codex-rs/core/src/error.rs
index 9cdc4eb544..537f4a0361 100644
--- a/codex-rs/core/src/error.rs
+++ b/codex-rs/core/src/error.rs
@@ -132,3 +132,10 @@ impl CodexErr {
(self as &dyn std::any::Any).downcast_ref::<T>()
}
}
+
+pub fn get_error_message_ui(e: &CodexErr) -> String {
+ match e {
+ CodexErr::Sandbox(SandboxErr::Denied(_, _, stderr)) => stderr.to_string(),
+ _ => e.to_string(),
+ }
+}
```
## Review Comments
### codex-rs/core/src/codex.rs
- Created: 2025-08-07 06:05:20 UTC | Link: https://github.com/openai/codex/pull/1908#discussion_r2259168708
```diff
@@ -1717,6 +1766,15 @@ fn parse_container_exec_arguments(
}
}
+pub struct ExecInvokeArgs<'a> {
+ pub params: crate::exec::ExecParams,
```
> I would make more of these top-level imports so this is a bit easier to read.
- Created: 2025-08-07 06:11:56 UTC | Link: https://github.com/openai/codex/pull/1908#discussion_r2259189591
```diff
@@ -469,6 +470,54 @@ impl Session {
}
}
+ async fn run_exec_with_events<'a>(
```
> This function returns `Result`, so can you add a docstring to comment that we guarantee that both `on_exec_command_begin()` and `on_exec_command_end()` are called, even if this function returns `Err`?
>
> Also update to note that this is used both when the shell call is done originally as well as for the sandbox retry.