Files
codex/prs/bolinfest/PR-1610.md
2025-09-02 15:17:45 -07:00

430 lines
18 KiB
Markdown
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# PR #1610: Interrupt bug
- URL: https://github.com/openai/codex/pull/1610
- Author: aibrahim-oai
- Created: 2025-07-18 07:13:16 UTC
- Updated: 2025-07-18 17:04:41 UTC
- Changes: +164/-5, Files changed: 2, Commits: 2
## Description
Interrupt is currently buggy. It uses the buffered deltas
## Full Diff
```diff
diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs
index 2c5baf152f..d42d056a76 100644
--- a/codex-rs/core/src/codex.rs
+++ b/codex-rs/core/src/codex.rs
@@ -990,6 +990,52 @@ async fn run_task(sess: Arc<Session>, sub_id: String, input: Vec<InputItem>) {
sess.tx_event.send(event).await.ok();
}
+// ---
+// Helpers --------------------------------------------------------------------
+//
+// When a turn is interrupted before Codex can deliver tool output(s) back to
+// the model, the next request can fail with a 400 from the OpenAI API:
+// {"error": {"message": "No tool output found for function call call_XXXXX", ...}}
+// Historically this manifested as a confusing retry loop ("stream error: 400 …")
+// because we never learned about the missing `call_id` (the stream was aborted
+// before we observed the `ResponseEvent::OutputItemDone` that would have let us
+// record it in `pending_call_ids`).
+//
+// To make interruption robust we parse the error body for the offending call id
+// and add it to `pending_call_ids` so the very next retry can inject a synthetic
+// `FunctionCallOutput { content: "aborted" }` and satisfy the API contract.
+// -----------------------------------------------------------------------------
+fn extract_missing_tool_call_id(body: &str) -> Option<String> {
+ // Try to parse the canonical JSON error shape first.
+ if let Ok(v) = serde_json::from_str::<serde_json::Value>(body) {
+ if let Some(msg) = v
+ .get("error")
+ .and_then(|e| e.get("message"))
+ .and_then(|m| m.as_str())
+ {
+ if let Some(id) = extract_missing_tool_call_id_from_msg(msg) {
+ return Some(id);
+ }
+ }
+ }
+ // Fallback: scan the raw body.
+ extract_missing_tool_call_id_from_msg(body)
+}
+
+fn extract_missing_tool_call_id_from_msg(msg: &str) -> Option<String> {
+ const NEEDLE: &str = "No tool output found for function call";
+ let idx = msg.find(NEEDLE)?;
+ let rest = &msg[idx + NEEDLE.len()..];
+ // Find the beginning of the call id (typically starts with "call_").
+ let start = rest.find("call_")?;
+ let rest = &rest[start..];
+ // Capture valid id chars [A-Za-z0-9_-/]. Hyphen shows up in some IDs; be permissive.
+ let end = rest
+ .find(|c: char| !(c.is_ascii_alphanumeric() || c == '_' || c == '-' || c == '/'))
+ .unwrap_or(rest.len());
+ Some(rest[..end].to_string())
+}
+
async fn run_turn(
sess: &Session,
sub_id: String,
@@ -1024,6 +1070,50 @@ async fn run_turn(
Ok(output) => return Ok(output),
Err(CodexErr::Interrupted) => return Err(CodexErr::Interrupted),
Err(CodexErr::EnvVar(var)) => return Err(CodexErr::EnvVar(var)),
+ Err(CodexErr::UnexpectedStatus(status, body)) => {
+ // Detect the specific 400 "No tool output found for function call ..." error that
+ // occurs when a user interrupted before Codex could answer a tool call.
+ if status == reqwest::StatusCode::BAD_REQUEST {
+ if let Some(call_id) = extract_missing_tool_call_id(&body) {
+ {
+ let mut state = sess.state.lock().unwrap();
+ state.pending_call_ids.insert(call_id.clone());
+ }
+ // Surface a friendlier background event so users understand the recovery.
+ sess
+ .notify_background_event(
+ &sub_id,
+ format!(
+ "previous turn interrupted before responding to tool {call_id}; sending aborted output and retrying…",
+ ),
+ )
+ .await;
+ // Immediately retry the turn without consuming a provider stream retry budget.
+ continue;
+ }
+ }
+ // Fall through to generic retry path if we could not autorecover.
+ let e = CodexErr::UnexpectedStatus(status, body);
+ // Use the configured provider-specific stream retry budget.
+ let max_retries = sess.client.get_provider().stream_max_retries();
+ if retries < max_retries {
+ retries += 1;
+ let delay = backoff(retries);
+ warn!(
+ "stream disconnected - retrying turn ({retries}/{max_retries} in {delay:?})...",
+ );
+ sess.notify_background_event(
+ &sub_id,
+ format!(
+ "stream error: {e}; retrying {retries}/{max_retries} in {delay:?}…",
+ ),
+ )
+ .await;
+ tokio::time::sleep(delay).await;
+ } else {
+ return Err(e);
+ }
+ }
Err(e) => {
// Use the configured provider-specific stream retry budget.
let max_retries = sess.client.get_provider().stream_max_retries();
@@ -1040,7 +1130,7 @@ async fn run_turn(
sess.notify_background_event(
&sub_id,
format!(
- "stream error: {e}; retrying {retries}/{max_retries} in {delay:?}…"
+ "stream error: {e}; retrying {retries}/{max_retries} in {delay:?}…",
),
)
.await;
diff --git a/codex-rs/tui/src/chatwidget.rs b/codex-rs/tui/src/chatwidget.rs
index 7c825acd41..0f72f417dc 100644
--- a/codex-rs/tui/src/chatwidget.rs
+++ b/codex-rs/tui/src/chatwidget.rs
@@ -53,6 +53,7 @@ pub(crate) struct ChatWidget<'a> {
token_usage: TokenUsage,
reasoning_buffer: String,
answer_buffer: String,
+ active_task_id: Option<String>,
}
#[derive(Clone, Copy, Eq, PartialEq)]
@@ -141,6 +142,7 @@ impl ChatWidget<'_> {
token_usage: TokenUsage::default(),
reasoning_buffer: String::new(),
answer_buffer: String::new(),
+ active_task_id: None,
}
}
@@ -222,10 +224,30 @@ impl ChatWidget<'_> {
self.conversation_history.add_user_message(text);
}
self.conversation_history.scroll_to_bottom();
+
+ // IMPORTANT: Starting a *new* user turn. Clear any partially streamed
+ // answer from a previous turn (e.g., one that was interrupted) so that
+ // the next AgentMessageDelta spawns a fresh agent message cell instead
+ // of overwriting the last one.
+ self.answer_buffer.clear();
+ self.reasoning_buffer.clear();
}
pub(crate) fn handle_codex_event(&mut self, event: Event) {
- let Event { id, msg } = event;
+ // Retain the event ID so we can refer to it after destructuring.
+ let event_id = event.id.clone();
+ let Event { id: _, msg } = event;
+
+ // When we are in the middle of a task (active_task_id is Some) we drop
+ // streaming text/reasoning events for *other* task IDs. This prevents
+ // late tokens from an interrupted run from bleeding into the current
+ // answer.
+ let should_drop_streaming = self
+ .active_task_id
+ .as_ref()
+ .map(|active| active != &event_id)
+ .unwrap_or(false);
+
match msg {
EventMsg::SessionConfigured(event) => {
// Record session information at the top of the conversation.
@@ -246,6 +268,9 @@ impl ChatWidget<'_> {
self.request_redraw();
}
EventMsg::AgentMessage(AgentMessageEvent { message }) => {
+ if should_drop_streaming {
+ return;
+ }
// if the answer buffer is empty, this means we haven't received any
// delta. Thus, we need to print the message as a new answer.
if self.answer_buffer.is_empty() {
@@ -259,6 +284,9 @@ impl ChatWidget<'_> {
self.request_redraw();
}
EventMsg::AgentMessageDelta(AgentMessageDeltaEvent { delta }) => {
+ if should_drop_streaming {
+ return;
+ }
if self.answer_buffer.is_empty() {
self.conversation_history
.add_agent_message(&self.config, "".to_string());
@@ -269,6 +297,9 @@ impl ChatWidget<'_> {
self.request_redraw();
}
EventMsg::AgentReasoningDelta(AgentReasoningDeltaEvent { delta }) => {
+ if should_drop_streaming {
+ return;
+ }
if self.reasoning_buffer.is_empty() {
self.conversation_history
.add_agent_reasoning(&self.config, "".to_string());
@@ -279,6 +310,9 @@ impl ChatWidget<'_> {
self.request_redraw();
}
EventMsg::AgentReasoning(AgentReasoningEvent { text }) => {
+ if should_drop_streaming {
+ return;
+ }
// if the reasoning buffer is empty, this means we haven't received any
// delta. Thus, we need to print the message as a new reasoning.
if self.reasoning_buffer.is_empty() {
@@ -293,6 +327,10 @@ impl ChatWidget<'_> {
self.request_redraw();
}
EventMsg::TaskStarted => {
+ // New task has begun update state and clear any stale buffers.
+ self.active_task_id = Some(event_id);
+ self.answer_buffer.clear();
+ self.reasoning_buffer.clear();
self.bottom_pane.clear_ctrl_c_quit_hint();
self.bottom_pane.set_task_running(true);
self.request_redraw();
@@ -300,6 +338,10 @@ impl ChatWidget<'_> {
EventMsg::TaskComplete(TaskCompleteEvent {
last_agent_message: _,
}) => {
+ // Task finished; clear active_task_id so that subsequent events are processed.
+ if self.active_task_id.as_ref() == Some(&event_id) {
+ self.active_task_id = None;
+ }
self.bottom_pane.set_task_running(false);
self.request_redraw();
}
@@ -309,16 +351,25 @@ impl ChatWidget<'_> {
.set_token_usage(self.token_usage.clone(), self.config.model_context_window);
}
EventMsg::Error(ErrorEvent { message }) => {
+ // Error events always get surfaced (even for stale task IDs) so that the user sees
+ // why a run stopped. However, only clear the running indicator if this is the
+ // active task.
+ if self.active_task_id.as_ref() == Some(&event_id) {
+ self.bottom_pane.set_task_running(false);
+ self.active_task_id = None;
+ }
self.conversation_history.add_error(message);
- self.bottom_pane.set_task_running(false);
}
EventMsg::ExecApprovalRequest(ExecApprovalRequestEvent {
command,
cwd,
reason,
}) => {
+ if should_drop_streaming {
+ return;
+ }
let request = ApprovalRequest::Exec {
- id,
+ id: event_id,
command,
cwd,
reason,
@@ -330,6 +381,9 @@ impl ChatWidget<'_> {
reason,
grant_root,
}) => {
+ if should_drop_streaming {
+ return;
+ }
// ------------------------------------------------------------------
// Before we even prompt the user for approval we surface the patch
// summary in the main conversation so that the dialog appears in a
@@ -348,7 +402,7 @@ impl ChatWidget<'_> {
// Now surface the approval request in the BottomPane as before.
let request = ApprovalRequest::ApplyPatch {
- id,
+ id: event_id,
reason,
grant_root,
};
@@ -360,6 +414,9 @@ impl ChatWidget<'_> {
command,
cwd: _,
}) => {
+ if should_drop_streaming {
+ return;
+ }
self.conversation_history
.add_active_exec_command(call_id, command);
self.request_redraw();
@@ -369,6 +426,9 @@ impl ChatWidget<'_> {
auto_approved,
changes,
}) => {
+ if should_drop_streaming {
+ return;
+ }
// Even when a patch is autoapproved we still display the
// summary so the user can follow along.
self.conversation_history
@@ -384,6 +444,9 @@ impl ChatWidget<'_> {
stdout,
stderr,
}) => {
+ if should_drop_streaming {
+ return;
+ }
self.conversation_history
.record_completed_exec_command(call_id, stdout, stderr, exit_code);
self.request_redraw();
@@ -394,11 +457,17 @@ impl ChatWidget<'_> {
tool,
arguments,
}) => {
+ if should_drop_streaming {
+ return;
+ }
self.conversation_history
.add_active_mcp_tool_call(call_id, server, tool, arguments);
self.request_redraw();
}
EventMsg::McpToolCallEnd(mcp_tool_call_end_event) => {
+ if should_drop_streaming {
+ return;
+ }
let success = mcp_tool_call_end_event.is_success();
let McpToolCallEndEvent { call_id, result } = mcp_tool_call_end_event;
self.conversation_history
```
## Review Comments
### codex-rs/tui/src/chatwidget.rs
- Created: 2025-07-18 07:16:35 UTC | Link: https://github.com/openai/codex/pull/1610#discussion_r2215214532
```diff
@@ -222,10 +224,30 @@ impl ChatWidget<'_> {
self.conversation_history.add_user_message(text);
}
self.conversation_history.scroll_to_bottom();
+
+ // IMPORTANT: Starting a *new* user turn. Clear any partially streamed
+ // answer from a previous turn (e.g., one that was interrupted) so that
+ // the next AgentMessageDelta spawns a fresh agent message cell instead
+ // of overwriting the last one.
+ self.answer_buffer.clear();
+ self.reasoning_buffer.clear();
}
pub(crate) fn handle_codex_event(&mut self, event: Event) {
- let Event { id, msg } = event;
+ // Retain the event ID so we can refer to it after destructuring.
+ let event_id = event.id.clone();
+ let Event { id: _, msg } = event;
```
> Why did this change? Why not destructure as before without cloning? I would defer the `clone()` until it is necessary (i.e., you need to pass the id by value to another function).
>
> If you just want to change the name:
>
> ```rust
> let Event { id: event_id, msg } = event;
> ```
- Created: 2025-07-18 07:18:25 UTC | Link: https://github.com/openai/codex/pull/1610#discussion_r2215218104
```diff
@@ -246,6 +268,9 @@ impl ChatWidget<'_> {
self.request_redraw();
}
EventMsg::AgentMessage(AgentMessageEvent { message }) => {
+ if should_drop_streaming {
+ return;
+ }
```
> Admittedly, this is my personal style, but I think it has merit: anytime you have an early `return` from a block of code, I would put a blank line after the closing `}` of the early `return` to help call attention to the fact that it is not straight-line code.
- Created: 2025-07-18 07:20:56 UTC | Link: https://github.com/openai/codex/pull/1610#discussion_r2215223225
```diff
@@ -309,16 +351,25 @@ impl ChatWidget<'_> {
.set_token_usage(self.token_usage.clone(), self.config.model_context_window);
}
EventMsg::Error(ErrorEvent { message }) => {
+ // Error events always get surfaced (even for stale task IDs) so that the user sees
+ // why a run stopped. However, only clear the running indicator if this is the
+ // active task.
+ if self.active_task_id.as_ref() == Some(&event_id) {
+ self.bottom_pane.set_task_running(false);
+ self.active_task_id = None;
+ }
self.conversation_history.add_error(message);
- self.bottom_pane.set_task_running(false);
}
EventMsg::ExecApprovalRequest(ExecApprovalRequestEvent {
command,
cwd,
reason,
}) => {
+ if should_drop_streaming {
```
> Are you sure we should drop in this case: don't we need to ensure this request is displayed to the user? Am I misunderstanding?
- Created: 2025-07-18 07:22:12 UTC | Link: https://github.com/openai/codex/pull/1610#discussion_r2215226140
```diff
@@ -330,6 +381,9 @@ impl ChatWidget<'_> {
reason,
grant_root,
}) => {
+ if should_drop_streaming {
```
> Seeing this `if` in what feels like the majority of cases makes me wonder if there's a cleaner way to do this so we don't have to copy/paste this so much?