Preserve persisted history when compact retries trim prompts

Build compaction replacement history from persisted session history instead of the retry-local prompt buffer, so ContextWindowExceeded trimming does not drop earlier real user messages or ghost snapshots from replacement history.

Add a regression test covering repeated compaction retries and verifying follow-up requests retain earlier user messages even after retry prompt trimming.

Co-authored-by: Codex <noreply@openai.com>
This commit is contained in:
Charles Cunningham
2026-03-06 01:38:49 -08:00
parent ff975f2f62
commit 97e64b8e4d
2 changed files with 106 additions and 21 deletions

View File

@@ -93,19 +93,13 @@ async fn run_compact_task_inner(
input: Vec<UserInput>,
initial_context_injection: InitialContextInjection,
) -> CodexResult<()> {
let has_synthetic_compact_prompt = matches!(
input.as_slice(),
[UserInput::Text {
text,
text_elements,
}] if text == turn_context.compact_prompt() && text_elements.is_empty()
);
let compaction_item = TurnItem::ContextCompaction(ContextCompactionItem::new());
sess.emit_turn_item_started(&turn_context, &compaction_item)
.await;
let initial_input_for_turn: ResponseInputItem = ResponseInputItem::from(input);
let mut history = sess.clone_history().await;
let persisted_history_items = history.raw_items().to_vec();
history.record_items(
&[initial_input_for_turn.into()],
turn_context.truncation_policy,
@@ -195,24 +189,15 @@ async fn run_compact_task_inner(
}
}
let compaction_history_items = history.raw_items();
let summary_suffix = {
let history_snapshot = sess.clone_history().await;
get_last_assistant_message_from_turn(history_snapshot.raw_items()).unwrap_or_default()
};
let summary_text = format!("{SUMMARY_PREFIX}\n{summary_suffix}");
let mut user_messages = collect_user_messages(compaction_history_items);
if has_synthetic_compact_prompt
&& user_messages
.last()
.is_some_and(|message| message == turn_context.compact_prompt())
{
// Local inline compaction appends one synthetic user prompt for the compaction model call.
// Rebuild from the local prompt history so we can drop only that trailing synthetic input
// and preserve earlier real user messages, including ones whose text matches the prompt.
user_messages.pop();
}
// Build replacement history from persisted session history, not the retry-local
// prompt buffer. Retries may trim oldest prompt items to fit context limits, but
// replacement history must preserve prior real user messages and ghost snapshots.
let user_messages = collect_user_messages(&persisted_history_items);
let mut new_history = build_compacted_history(Vec::new(), &user_messages, &summary_text);
if matches!(
@@ -223,7 +208,7 @@ async fn run_compact_task_inner(
new_history =
insert_initial_context_before_last_real_user_or_summary(new_history, initial_context);
}
let ghost_snapshots: Vec<ResponseItem> = compaction_history_items
let ghost_snapshots: Vec<ResponseItem> = persisted_history_items
.iter()
.filter(|item| matches!(item, ResponseItem::GhostSnapshot { .. }))
.cloned()

View File

@@ -2383,6 +2383,106 @@ async fn manual_compact_retries_after_context_window_error() {
}
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn manual_compact_retry_trimming_preserves_full_persisted_user_messages() {
skip_if_no_network!();
let server = start_mock_server().await;
let first_user_message = "first turn should survive replacement history";
let follow_up_user_message = "follow up after compact";
let user_turn = sse(vec![
ev_assistant_message("m1", FIRST_REPLY),
ev_completed("r1"),
]);
let compact_succeeds = sse(vec![
ev_assistant_message("m2", SUMMARY_TEXT),
ev_completed("r2"),
]);
let follow_up_turn = sse(vec![ev_completed("r3")]);
let mut responses = vec![user_turn];
for attempt in 0..5 {
responses.push(sse_failed(
&format!("resp-fail-{attempt}"),
"context_length_exceeded",
CONTEXT_LIMIT_MESSAGE,
));
}
responses.push(compact_succeeds);
responses.push(follow_up_turn);
let request_log = mount_sse_sequence(&server, responses).await;
let model_provider = non_openai_model_provider(&server);
let codex = test_codex()
.with_config(move |config| {
config.model_provider = model_provider;
set_test_compact_prompt(config);
config.model_auto_compact_token_limit = Some(200_000);
})
.build(&server)
.await
.expect("build codex")
.codex;
codex
.submit(Op::UserInput {
items: vec![UserInput::Text {
text: first_user_message.to_string(),
text_elements: Vec::new(),
}],
final_output_json_schema: None,
})
.await
.expect("submit first user turn");
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;
codex.submit(Op::Compact).await.expect("submit compact");
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;
codex
.submit(Op::UserInput {
items: vec![UserInput::Text {
text: follow_up_user_message.to_string(),
text_elements: Vec::new(),
}],
final_output_json_schema: None,
})
.await
.expect("submit follow-up turn");
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;
let requests = request_log.requests();
assert_eq!(
requests.len(),
8,
"expected first turn, compact retries, and follow-up turn"
);
let compact_attempt_bodies: Vec<String> = requests[1..7]
.iter()
.map(|request| request.body_json().to_string())
.collect();
let saw_retry_trim_message = compact_attempt_bodies
.iter()
.any(|body| !body_contains_text(body, first_user_message));
assert!(
saw_retry_trim_message,
"expected at least one compact retry request to trim the oldest user message from prompt history"
);
let follow_up_body = requests[7].body_json().to_string();
assert!(
body_contains_text(&follow_up_body, first_user_message),
"follow-up request should keep the full persisted user messages in compact replacement history"
);
assert!(
body_contains_text(&follow_up_body, follow_up_user_message),
"follow-up request should include the incoming user message"
);
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
// TODO(ccunningham): Re-enable after the follow-up compaction behavior PR lands.
// Current main behavior around non-context manual /compact failures is known-incorrect.