mirror of
https://github.com/openai/codex.git
synced 2026-04-30 09:26:44 +00:00
fix: prevent repeating interrupted turns (#9043)
## What Record a model-visible `<turn_aborted>` marker in history when a turn is interrupted, and treat it as a session prefix. ## Why When a turn is interrupted, Codex emits `TurnAborted` but previously did not persist anything model-visible in the conversation history. On the next user turn, the model can’t tell the previous work was aborted and may resume/repeat earlier actions (including duplicated side effects like re-opening PRs). Fixes: https://github.com/openai/codex/issues/9042 ## How On `TurnAbortReason::Interrupted`, append a hidden user message containing a `<turn_aborted>…</turn_aborted>` marker and flush. Treat `<turn_aborted>` like `<environment_context>` for session-prefix filtering. Add a regression test to ensure follow-up turns don’t repeat side effects from an aborted turn. ## Testing `just fmt` `just fix -p codex-core` `cargo test -p codex-core -- --test-threads=1` `cargo test --all-features -- --test-threads=1` --------- Co-authored-by: Skylar Graika <sgraika127@gmail.com> Co-authored-by: jif-oai <jif@openai.com> Co-authored-by: Eric Traut <etraut@openai.com>
This commit is contained in:
@@ -4340,6 +4340,8 @@ mod tests {
|
||||
|
||||
sess.abort_all_tasks(TurnAbortReason::Interrupted).await;
|
||||
|
||||
// Interrupts persist a model-visible `<turn_aborted>` marker into history, but there is no
|
||||
// separate client-visible event for that marker (only `EventMsg::TurnAborted`).
|
||||
let evt = tokio::time::timeout(std::time::Duration::from_secs(2), rx.recv())
|
||||
.await
|
||||
.expect("timeout waiting for event")
|
||||
@@ -4348,6 +4350,7 @@ mod tests {
|
||||
EventMsg::TurnAborted(e) => assert_eq!(TurnAbortReason::Interrupted, e.reason),
|
||||
other => panic!("unexpected event: {other:?}"),
|
||||
}
|
||||
// No extra events should be emitted after an abort.
|
||||
assert!(rx.try_recv().is_err());
|
||||
}
|
||||
|
||||
@@ -4370,11 +4373,17 @@ mod tests {
|
||||
|
||||
sess.abort_all_tasks(TurnAbortReason::Interrupted).await;
|
||||
|
||||
let evt = rx.recv().await.expect("event");
|
||||
// Even if tasks handle cancellation gracefully, interrupts still result in `TurnAborted`
|
||||
// being the only client-visible signal.
|
||||
let evt = tokio::time::timeout(std::time::Duration::from_secs(2), rx.recv())
|
||||
.await
|
||||
.expect("timeout waiting for event")
|
||||
.expect("event");
|
||||
match evt.msg {
|
||||
EventMsg::TurnAborted(e) => assert_eq!(TurnAbortReason::Interrupted, e.reason),
|
||||
other => panic!("unexpected event: {other:?}"),
|
||||
}
|
||||
// No extra events should be emitted after an abort.
|
||||
assert!(rx.try_recv().is_err());
|
||||
}
|
||||
|
||||
@@ -4390,42 +4399,67 @@ mod tests {
|
||||
|
||||
sess.abort_all_tasks(TurnAbortReason::Interrupted).await;
|
||||
|
||||
// Drain events until we observe ExitedReviewMode; earlier
|
||||
// RawResponseItem entries (e.g., environment context) may arrive first.
|
||||
loop {
|
||||
let evt = tokio::time::timeout(std::time::Duration::from_secs(1), rx.recv())
|
||||
// Aborting a review task should exit review mode before surfacing the abort to the client.
|
||||
// We scan for these events (rather than relying on fixed ordering) since unrelated events
|
||||
// may interleave.
|
||||
let mut exited_review_mode_idx = None;
|
||||
let mut turn_aborted_idx = None;
|
||||
let mut idx = 0usize;
|
||||
let deadline = tokio::time::Instant::now() + std::time::Duration::from_secs(3);
|
||||
while tokio::time::Instant::now() < deadline {
|
||||
let remaining = deadline.saturating_duration_since(tokio::time::Instant::now());
|
||||
let evt = tokio::time::timeout(remaining, rx.recv())
|
||||
.await
|
||||
.expect("timeout waiting for first event")
|
||||
.expect("first event");
|
||||
.expect("timeout waiting for event")
|
||||
.expect("event");
|
||||
let event_idx = idx;
|
||||
idx = idx.saturating_add(1);
|
||||
match evt.msg {
|
||||
EventMsg::ExitedReviewMode(ev) => {
|
||||
assert!(ev.review_output.is_none());
|
||||
exited_review_mode_idx = Some(event_idx);
|
||||
}
|
||||
EventMsg::TurnAborted(ev) => {
|
||||
assert_eq!(TurnAbortReason::Interrupted, ev.reason);
|
||||
turn_aborted_idx = Some(event_idx);
|
||||
break;
|
||||
}
|
||||
// Ignore any non-critical events before exit.
|
||||
_ => continue,
|
||||
}
|
||||
}
|
||||
loop {
|
||||
let evt = tokio::time::timeout(std::time::Duration::from_secs(2), rx.recv())
|
||||
.await
|
||||
.expect("timeout waiting for next event")
|
||||
.expect("event");
|
||||
match evt.msg {
|
||||
EventMsg::RawResponseItem(_) => continue,
|
||||
EventMsg::ItemStarted(_) | EventMsg::ItemCompleted(_) => continue,
|
||||
EventMsg::AgentMessage(_) => continue,
|
||||
EventMsg::TurnAborted(e) => {
|
||||
assert_eq!(TurnAbortReason::Interrupted, e.reason);
|
||||
break;
|
||||
}
|
||||
other => panic!("unexpected second event: {other:?}"),
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
assert!(
|
||||
exited_review_mode_idx.is_some(),
|
||||
"expected ExitedReviewMode after abort"
|
||||
);
|
||||
assert!(
|
||||
turn_aborted_idx.is_some(),
|
||||
"expected TurnAborted after abort"
|
||||
);
|
||||
assert!(
|
||||
exited_review_mode_idx.unwrap() < turn_aborted_idx.unwrap(),
|
||||
"expected ExitedReviewMode before TurnAborted"
|
||||
);
|
||||
|
||||
// TODO(jif) investigate what is this?
|
||||
let history = sess.clone_history().await;
|
||||
let _ = history.raw_items();
|
||||
// The `<turn_aborted>` marker is silent in the event stream, so verify it is still
|
||||
// recorded in history for the model.
|
||||
assert!(
|
||||
history.raw_items().iter().any(|item| {
|
||||
let ResponseItem::Message { role, content, .. } = item else {
|
||||
return false;
|
||||
};
|
||||
if role != "user" {
|
||||
return false;
|
||||
}
|
||||
content.iter().any(|content_item| {
|
||||
let ContentItem::InputText { text } = content_item else {
|
||||
return false;
|
||||
};
|
||||
text.contains(crate::session_prefix::TURN_ABORTED_OPEN_TAG)
|
||||
})
|
||||
}),
|
||||
"expected a model-visible turn aborted marker in history after interrupt"
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
|
||||
Reference in New Issue
Block a user