This commit is contained in:
Ahmed Ibrahim
2025-10-22 16:17:05 -07:00
parent 838d82e19c
commit d63e920ea4
2 changed files with 122 additions and 1 deletions

View File

@@ -1965,7 +1965,6 @@ async fn try_run_turn(
let event = match stream.next().or_cancel(&cancellation_token).await {
Ok(event) => event,
Err(codex_async_utils::CancelErr::Cancelled) => {
info!("Turn aborted");
let processed_items = output.try_collect().await?;
return Err(CodexErr::TurnAborted {
dangling_artifacts: processed_items,

View File

@@ -1,3 +1,4 @@
use std::sync::Arc;
use std::time::Duration;
use codex_core::protocol::EventMsg;
@@ -5,7 +6,9 @@ use codex_core::protocol::Op;
use codex_protocol::user_input::UserInput;
use core_test_support::responses::ev_completed;
use core_test_support::responses::ev_function_call;
use core_test_support::responses::ev_response_created;
use core_test_support::responses::mount_sse_once;
use core_test_support::responses::mount_sse_sequence;
use core_test_support::responses::sse;
use core_test_support::responses::start_mock_server;
use core_test_support::test_codex::test_codex;
@@ -67,3 +70,122 @@ async fn interrupt_long_running_tool_emits_turn_aborted() {
)
.await;
}
/// After an interrupt we expect the next request to the model to include both
/// the original tool call and an `"aborted"` `function_call_output`. This test
/// exercises the follow-up flow: it sends another user turn, inspects the mock
/// responses server, and ensures the model receives the synthesized abort.
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn interrupt_tool_records_history_entries() {
let command = vec![
"bash".to_string(),
"-lc".to_string(),
"sleep 60".to_string(),
];
let call_id = "call-history";
let args = json!({
"command": command,
"timeout_ms": 60_000
})
.to_string();
let first_body = sse(vec![
ev_response_created("resp-history"),
ev_function_call(call_id, "shell", &args),
ev_completed("resp-history"),
]);
let follow_up_body = sse(vec![
ev_response_created("resp-followup"),
ev_completed("resp-followup"),
]);
let server = start_mock_server().await;
let response_mock = mount_sse_sequence(&server, vec![first_body, follow_up_body]).await;
let fixture = test_codex().build(&server).await.unwrap();
let codex = Arc::clone(&fixture.codex);
let wait_timeout = Duration::from_secs(5);
codex
.submit(Op::UserInput {
items: vec![UserInput::Text {
text: "start history recording".into(),
}],
})
.await
.unwrap();
wait_for_event_with_timeout(
&codex,
|ev| matches!(ev, EventMsg::ExecCommandBegin(_)),
wait_timeout,
)
.await;
codex.submit(Op::Interrupt).await.unwrap();
wait_for_event_with_timeout(
&codex,
|ev| matches!(ev, EventMsg::TurnAborted(_)),
wait_timeout,
)
.await;
codex
.submit(Op::UserInput {
items: vec![UserInput::Text {
text: "follow up".into(),
}],
})
.await
.unwrap();
wait_for_event_with_timeout(
&codex,
|ev| matches!(ev, EventMsg::TaskComplete(_)),
wait_timeout,
)
.await;
let requests = response_mock.requests();
assert!(
requests.len() >= 2,
"expected at least two calls to the responses API"
);
let mut call_seen = false;
let mut abort_seen = false;
for request in requests {
let input = request.input();
for window in input.windows(2) {
let current = &window[0];
let next = &window[1];
if current.get("type").and_then(|v| v.as_str()) == Some("function_call")
&& current.get("call_id").and_then(|v| v.as_str()) == Some(call_id)
{
call_seen = true;
if next.get("type").and_then(|v| v.as_str()) == Some("function_call_output")
&& next.get("call_id").and_then(|v| v.as_str()) == Some(call_id)
{
let content_matches =
next.get("output").and_then(serde_json::Value::as_str) == Some("aborted");
if content_matches {
abort_seen = true;
break;
}
}
}
}
if call_seen && abort_seen {
break;
}
}
assert!(call_seen, "function call not recorded in responses payload");
assert!(
abort_seen,
"aborted function call output not recorded in responses payload"
);
}