mirror of
https://github.com/openai/codex.git
synced 2026-04-29 00:55:38 +00:00
Treat compaction failure as failure state (#10927)
- Return compaction errors from local and remote compaction flows.\n- Stop turns/tasks when auto-compaction fails instead of continuing execution.
This commit is contained in:
@@ -245,12 +245,13 @@ async fn remote_compact_trims_function_call_history_to_fit_context_window() -> R
|
||||
.with_auth(CodexAuth::create_dummy_chatgpt_auth_for_testing())
|
||||
.with_config(|config| {
|
||||
config.model_context_window = Some(2_000);
|
||||
config.model_auto_compact_token_limit = Some(200_000);
|
||||
}),
|
||||
)
|
||||
.await?;
|
||||
let codex = harness.test().codex.clone();
|
||||
|
||||
let response_log = responses::mount_sse_sequence(
|
||||
responses::mount_sse_sequence(
|
||||
harness.server(),
|
||||
vec![
|
||||
sse(vec![
|
||||
@@ -299,17 +300,139 @@ async fn remote_compact_trims_function_call_history_to_fit_context_window() -> R
|
||||
codex.submit(Op::Compact).await?;
|
||||
wait_for_event(&codex, |event| matches!(event, EventMsg::TurnComplete(_))).await;
|
||||
|
||||
let compact_request = compact_mock.single_request();
|
||||
let user_messages = compact_request.message_input_texts("user");
|
||||
assert!(
|
||||
response_log
|
||||
.function_call_output_text(retained_call_id)
|
||||
.is_some(),
|
||||
"expected retained shell call to produce function_call_output before compaction"
|
||||
user_messages
|
||||
.iter()
|
||||
.any(|message| message == first_user_message),
|
||||
"expected compact request to retain earlier user history"
|
||||
);
|
||||
assert!(
|
||||
response_log
|
||||
.function_call_output_text(trimmed_call_id)
|
||||
.is_some(),
|
||||
"expected trimmed shell call to produce function_call_output before compaction"
|
||||
user_messages
|
||||
.iter()
|
||||
.any(|message| message == second_user_message),
|
||||
"expected compact request to retain the user boundary message"
|
||||
);
|
||||
|
||||
assert!(
|
||||
compact_request.has_function_call(retained_call_id)
|
||||
&& compact_request
|
||||
.function_call_output_text(retained_call_id)
|
||||
.is_some(),
|
||||
"expected compact request to keep the older function call/result pair"
|
||||
);
|
||||
assert!(
|
||||
!compact_request.has_function_call(trimmed_call_id)
|
||||
&& compact_request
|
||||
.function_call_output_text(trimmed_call_id)
|
||||
.is_none(),
|
||||
"expected compact request to drop the trailing function call/result pair past the boundary"
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
compact_request.inputs_of_type("function_call").len(),
|
||||
1,
|
||||
"expected exactly one function call after trimming"
|
||||
);
|
||||
assert_eq!(
|
||||
compact_request.inputs_of_type("function_call_output").len(),
|
||||
1,
|
||||
"expected exactly one function call output after trimming"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg_attr(target_os = "windows", ignore)]
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn auto_remote_compact_trims_function_call_history_to_fit_context_window() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let first_user_message = "turn with retained shell call";
|
||||
let second_user_message = "turn with trimmed shell call";
|
||||
let retained_call_id = "retained-call";
|
||||
let trimmed_call_id = "trimmed-call";
|
||||
let retained_command = "echo retained-shell-output";
|
||||
let trimmed_command = "yes x | head -n 3000";
|
||||
let harness = TestCodexHarness::with_builder(
|
||||
test_codex()
|
||||
.with_auth(CodexAuth::create_dummy_chatgpt_auth_for_testing())
|
||||
.with_config(|config| {
|
||||
config.model_context_window = Some(2_000);
|
||||
config.model_auto_compact_token_limit = Some(200_000);
|
||||
}),
|
||||
)
|
||||
.await?;
|
||||
let codex = harness.test().codex.clone();
|
||||
|
||||
responses::mount_sse_sequence(
|
||||
harness.server(),
|
||||
vec![
|
||||
sse(vec![
|
||||
responses::ev_shell_command_call(retained_call_id, retained_command),
|
||||
responses::ev_completed_with_tokens("retained-call-response", 100),
|
||||
]),
|
||||
sse(vec![
|
||||
responses::ev_assistant_message("retained-assistant", "retained complete"),
|
||||
responses::ev_completed("retained-final-response"),
|
||||
]),
|
||||
sse(vec![
|
||||
responses::ev_shell_command_call(trimmed_call_id, trimmed_command),
|
||||
responses::ev_completed_with_tokens("trimmed-call-response", 100),
|
||||
]),
|
||||
sse(vec![responses::ev_completed_with_tokens(
|
||||
"trimmed-final-response",
|
||||
500_000,
|
||||
)]),
|
||||
sse(vec![
|
||||
responses::ev_assistant_message("post-compact-assistant", "post compact complete"),
|
||||
responses::ev_completed("post-compact-final-response"),
|
||||
]),
|
||||
],
|
||||
)
|
||||
.await;
|
||||
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![UserInput::Text {
|
||||
text: first_user_message.into(),
|
||||
text_elements: Vec::new(),
|
||||
}],
|
||||
final_output_json_schema: None,
|
||||
})
|
||||
.await?;
|
||||
wait_for_event(&codex, |event| matches!(event, EventMsg::TurnComplete(_))).await;
|
||||
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![UserInput::Text {
|
||||
text: second_user_message.into(),
|
||||
text_elements: Vec::new(),
|
||||
}],
|
||||
final_output_json_schema: None,
|
||||
})
|
||||
.await?;
|
||||
wait_for_event(&codex, |event| matches!(event, EventMsg::TurnComplete(_))).await;
|
||||
|
||||
let compact_mock =
|
||||
responses::mount_compact_json_once(harness.server(), serde_json::json!({ "output": [] }))
|
||||
.await;
|
||||
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![UserInput::Text {
|
||||
text: "turn that triggers auto compact".into(),
|
||||
text_elements: Vec::new(),
|
||||
}],
|
||||
final_output_json_schema: None,
|
||||
})
|
||||
.await?;
|
||||
wait_for_event(&codex, |event| matches!(event, EventMsg::TurnComplete(_))).await;
|
||||
assert_eq!(
|
||||
compact_mock.requests().len(),
|
||||
1,
|
||||
"expected exactly one remote compact request"
|
||||
);
|
||||
|
||||
let compact_request = compact_mock.single_request();
|
||||
@@ -356,6 +479,88 @@ async fn remote_compact_trims_function_call_history_to_fit_context_window() -> R
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn auto_remote_compact_failure_stops_agent_loop() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let harness = TestCodexHarness::with_builder(
|
||||
test_codex()
|
||||
.with_auth(CodexAuth::create_dummy_chatgpt_auth_for_testing())
|
||||
.with_config(|config| {
|
||||
config.model_auto_compact_token_limit = Some(120);
|
||||
}),
|
||||
)
|
||||
.await?;
|
||||
let codex = harness.test().codex.clone();
|
||||
|
||||
mount_sse_once(
|
||||
harness.server(),
|
||||
sse(vec![
|
||||
responses::ev_assistant_message("initial-assistant", "initial turn complete"),
|
||||
responses::ev_completed_with_tokens("initial-response", 500_000),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
|
||||
let compact_mock = responses::mount_compact_json_once(
|
||||
harness.server(),
|
||||
serde_json::json!({ "output": "invalid compact payload shape" }),
|
||||
)
|
||||
.await;
|
||||
let post_compact_turn_mock = mount_sse_once(
|
||||
harness.server(),
|
||||
sse(vec![
|
||||
responses::ev_assistant_message("post-compact-assistant", "should not run"),
|
||||
responses::ev_completed("post-compact-response"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![UserInput::Text {
|
||||
text: "turn that exceeds token threshold".into(),
|
||||
text_elements: Vec::new(),
|
||||
}],
|
||||
final_output_json_schema: None,
|
||||
})
|
||||
.await?;
|
||||
wait_for_event(&codex, |event| matches!(event, EventMsg::TurnComplete(_))).await;
|
||||
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![UserInput::Text {
|
||||
text: "turn that triggers auto compact".into(),
|
||||
text_elements: Vec::new(),
|
||||
}],
|
||||
final_output_json_schema: None,
|
||||
})
|
||||
.await?;
|
||||
|
||||
let error_message = wait_for_event_match(&codex, |event| match event {
|
||||
EventMsg::Error(err) => Some(err.message.clone()),
|
||||
_ => None,
|
||||
})
|
||||
.await;
|
||||
wait_for_event(&codex, |event| matches!(event, EventMsg::TurnComplete(_))).await;
|
||||
|
||||
assert!(
|
||||
error_message.contains("Error running remote compact task"),
|
||||
"expected compact failure error, got {error_message}"
|
||||
);
|
||||
assert_eq!(
|
||||
compact_mock.requests().len(),
|
||||
1,
|
||||
"expected exactly one remote compact attempt"
|
||||
);
|
||||
assert!(
|
||||
post_compact_turn_mock.requests().is_empty(),
|
||||
"expected agent loop to stop after compaction failure"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg_attr(target_os = "windows", ignore)]
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn remote_compact_trim_estimate_uses_session_base_instructions() -> Result<()> {
|
||||
@@ -806,11 +1011,8 @@ async fn remote_compact_and_resume_refresh_stale_developer_instructions() -> Res
|
||||
let server = wiremock::MockServer::start().await;
|
||||
let stale_developer_message = "STALE_DEVELOPER_INSTRUCTIONS_SHOULD_BE_REMOVED";
|
||||
|
||||
let mut start_builder = test_codex()
|
||||
.with_auth(CodexAuth::create_dummy_chatgpt_auth_for_testing())
|
||||
.with_config(|config| {
|
||||
config.features.enable(Feature::RemoteCompaction);
|
||||
});
|
||||
let mut start_builder =
|
||||
test_codex().with_auth(CodexAuth::create_dummy_chatgpt_auth_for_testing());
|
||||
let initial = start_builder.build(&server).await?;
|
||||
let home = initial.home.clone();
|
||||
let rollout_path = initial
|
||||
@@ -900,11 +1102,8 @@ async fn remote_compact_and_resume_refresh_stale_developer_instructions() -> Res
|
||||
})
|
||||
.await;
|
||||
|
||||
let mut resume_builder = test_codex()
|
||||
.with_auth(CodexAuth::create_dummy_chatgpt_auth_for_testing())
|
||||
.with_config(|config| {
|
||||
config.features.enable(Feature::RemoteCompaction);
|
||||
});
|
||||
let mut resume_builder =
|
||||
test_codex().with_auth(CodexAuth::create_dummy_chatgpt_auth_for_testing());
|
||||
let resumed = resume_builder.resume(&server, home, rollout_path).await?;
|
||||
|
||||
resumed
|
||||
@@ -964,11 +1163,7 @@ async fn remote_compact_refreshes_stale_developer_instructions_without_resume()
|
||||
let server = wiremock::MockServer::start().await;
|
||||
let stale_developer_message = "STALE_DEVELOPER_INSTRUCTIONS_SHOULD_BE_REMOVED";
|
||||
|
||||
let mut builder = test_codex()
|
||||
.with_auth(CodexAuth::create_dummy_chatgpt_auth_for_testing())
|
||||
.with_config(|config| {
|
||||
config.features.enable(Feature::RemoteCompaction);
|
||||
});
|
||||
let mut builder = test_codex().with_auth(CodexAuth::create_dummy_chatgpt_auth_for_testing());
|
||||
let test = builder.build(&server).await?;
|
||||
|
||||
let responses_mock = responses::mount_sse_sequence(
|
||||
|
||||
Reference in New Issue
Block a user