core: account for all post-response items in auto-compact token checks (#11132)

## Summary
- change compaction pre-check accounting to include all items added
after the last model-generated item, not only trailing codex-generated
outputs
- use that boundary consistently in get_total_token_usage() and
get_total_token_usage_breakdown()
- update history tests to cover user/tool-output items after the last
model item

## Why
last_token_usage.total_tokens is API-reported for the last successful
model response. After that point, local history may gain additional
items (user messages, injected context, tool outputs). Compaction
triggering must account for all of those items to avoid late compaction
attempts that can overflow context.

## Testing
- just fmt
- cargo test -p codex-core
This commit is contained in:
Charley Cunningham
2026-02-09 08:34:38 -08:00
committed by GitHub
parent 9fe925b15a
commit 0883e5d3e5
2 changed files with 58 additions and 49 deletions

View File

@@ -236,15 +236,23 @@ impl ContextManager {
})
}
fn get_trailing_codex_generated_items_tokens(&self) -> i64 {
let mut total = 0i64;
for item in self.items.iter().rev() {
if !is_codex_generated_item(item) {
break;
}
total = total.saturating_add(estimate_item_token_count(item));
}
total
// These are local items added after the most recent model-emitted item.
// They are not reflected in `last_token_usage.total_tokens`.
fn items_after_last_model_generated_item(&self) -> &[ResponseItem] {
let start = self
.items
.iter()
.rposition(is_model_generated_item)
.map_or(self.items.len(), |index| index.saturating_add(1));
&self.items[start..]
}
fn get_items_after_last_model_generated_tokens(&self) -> i64 {
self.items_after_last_model_generated_item()
.iter()
.fold(0i64, |acc, item| {
acc.saturating_add(estimate_item_token_count(item))
})
}
/// When true, the server already accounted for past reasoning tokens and
@@ -255,13 +263,14 @@ impl ContextManager {
.as_ref()
.map(|info| info.last_token_usage.total_tokens)
.unwrap_or(0);
let trailing_codex_generated_tokens = self.get_trailing_codex_generated_items_tokens();
let items_after_last_model_generated_tokens =
self.get_items_after_last_model_generated_tokens();
if server_reasoning_included {
last_tokens.saturating_add(trailing_codex_generated_tokens)
last_tokens.saturating_add(items_after_last_model_generated_tokens)
} else {
last_tokens
.saturating_add(self.get_non_last_reasoning_items_tokens())
.saturating_add(trailing_codex_generated_tokens)
.saturating_add(items_after_last_model_generated_tokens)
}
}
@@ -367,6 +376,22 @@ fn estimate_item_token_count(item: &ResponseItem) -> i64 {
}
}
fn is_model_generated_item(item: &ResponseItem) -> bool {
match item {
ResponseItem::Message { role, .. } => role == "assistant",
ResponseItem::Reasoning { .. }
| ResponseItem::FunctionCall { .. }
| ResponseItem::WebSearchCall { .. }
| ResponseItem::CustomToolCall { .. }
| ResponseItem::LocalShellCall { .. }
| ResponseItem::Compaction { .. } => true,
ResponseItem::FunctionCallOutput { .. }
| ResponseItem::CustomToolCallOutput { .. }
| ResponseItem::GhostSnapshot { .. }
| ResponseItem::Other => false,
}
}
pub(crate) fn is_codex_generated_item(item: &ResponseItem) -> bool {
matches!(
item,

View File

@@ -62,13 +62,6 @@ fn user_input_text_msg(text: &str) -> ResponseItem {
}
}
fn function_call_output(call_id: &str, content: &str) -> ResponseItem {
ResponseItem::FunctionCallOutput {
call_id: call_id.to_string(),
output: FunctionCallOutputPayload::from_text(content.to_string()),
}
}
fn custom_tool_call_output(call_id: &str, output: &str) -> ResponseItem {
ResponseItem::CustomToolCallOutput {
call_id: call_id.to_string(),
@@ -189,48 +182,32 @@ fn non_last_reasoning_tokens_ignore_entries_after_last_user() {
}
#[test]
fn trailing_codex_generated_tokens_stop_at_first_non_generated_item() {
let earlier_output = function_call_output("call-earlier", "earlier output");
let trailing_function_output = function_call_output("call-tail-1", "tail function output");
let trailing_custom_output = custom_tool_call_output("call-tail-2", "tail custom output");
fn items_after_last_model_generated_tokens_include_user_and_tool_output() {
let history = create_history_with_items(vec![
earlier_output,
user_msg("boundary item"),
trailing_function_output.clone(),
trailing_custom_output.clone(),
assistant_msg("already counted by API"),
user_msg("new user message"),
custom_tool_call_output("call-tail", "new tool output"),
]);
let expected_tokens = estimate_item_token_count(&trailing_function_output)
.saturating_add(estimate_item_token_count(&trailing_custom_output));
let expected_tokens = estimate_item_token_count(&user_msg("new user message")).saturating_add(
estimate_item_token_count(&custom_tool_call_output("call-tail", "new tool output")),
);
assert_eq!(
history.get_trailing_codex_generated_items_tokens(),
history.get_items_after_last_model_generated_tokens(),
expected_tokens
);
}
#[test]
fn trailing_codex_generated_tokens_exclude_function_call_tail() {
let history = create_history_with_items(vec![ResponseItem::FunctionCall {
id: None,
name: "not-generated".to_string(),
arguments: "{}".to_string(),
call_id: "call-tail".to_string(),
}]);
fn items_after_last_model_generated_tokens_are_zero_without_model_generated_items() {
let history = create_history_with_items(vec![user_msg("no model output yet")]);
assert_eq!(history.get_trailing_codex_generated_items_tokens(), 0);
assert_eq!(history.get_items_after_last_model_generated_tokens(), 0);
}
#[test]
fn total_token_usage_includes_only_trailing_codex_generated_items() {
let non_trailing_output = function_call_output("call-before-message", "not trailing");
let trailing_assistant = assistant_msg("assistant boundary");
let trailing_output = custom_tool_call_output("tool-tail", "trailing output");
let mut history = create_history_with_items(vec![
non_trailing_output,
user_msg("boundary"),
trailing_assistant,
trailing_output.clone(),
]);
fn total_token_usage_includes_all_items_after_last_model_generated_item() {
let mut history = create_history_with_items(vec![assistant_msg("already counted by API")]);
history.update_token_info(
&TokenUsage {
total_tokens: 100,
@@ -238,10 +215,17 @@ fn total_token_usage_includes_only_trailing_codex_generated_items() {
},
None,
);
let added_user = user_msg("new user message");
let added_tool_output = custom_tool_call_output("tool-tail", "new tool output");
history.record_items(
[&added_user, &added_tool_output],
TruncationPolicy::Tokens(10_000),
);
assert_eq!(
history.get_total_token_usage(true),
100 + estimate_item_token_count(&trailing_output)
100 + estimate_item_token_count(&added_user)
+ estimate_item_token_count(&added_tool_output)
);
}