mirror of
https://github.com/openai/codex.git
synced 2026-04-24 14:45:27 +00:00
core: account for all post-response items in auto-compact token checks (#11132)
## Summary - change compaction pre-check accounting to include all items added after the last model-generated item, not only trailing codex-generated outputs - use that boundary consistently in get_total_token_usage() and get_total_token_usage_breakdown() - update history tests to cover user/tool-output items after the last model item ## Why last_token_usage.total_tokens is API-reported for the last successful model response. After that point, local history may gain additional items (user messages, injected context, tool outputs). Compaction triggering must account for all of those items to avoid late compaction attempts that can overflow context. ## Testing - just fmt - cargo test -p codex-core
This commit is contained in:
committed by
GitHub
parent
9fe925b15a
commit
0883e5d3e5
@@ -236,15 +236,23 @@ impl ContextManager {
|
||||
})
|
||||
}
|
||||
|
||||
fn get_trailing_codex_generated_items_tokens(&self) -> i64 {
|
||||
let mut total = 0i64;
|
||||
for item in self.items.iter().rev() {
|
||||
if !is_codex_generated_item(item) {
|
||||
break;
|
||||
}
|
||||
total = total.saturating_add(estimate_item_token_count(item));
|
||||
}
|
||||
total
|
||||
// These are local items added after the most recent model-emitted item.
|
||||
// They are not reflected in `last_token_usage.total_tokens`.
|
||||
fn items_after_last_model_generated_item(&self) -> &[ResponseItem] {
|
||||
let start = self
|
||||
.items
|
||||
.iter()
|
||||
.rposition(is_model_generated_item)
|
||||
.map_or(self.items.len(), |index| index.saturating_add(1));
|
||||
&self.items[start..]
|
||||
}
|
||||
|
||||
fn get_items_after_last_model_generated_tokens(&self) -> i64 {
|
||||
self.items_after_last_model_generated_item()
|
||||
.iter()
|
||||
.fold(0i64, |acc, item| {
|
||||
acc.saturating_add(estimate_item_token_count(item))
|
||||
})
|
||||
}
|
||||
|
||||
/// When true, the server already accounted for past reasoning tokens and
|
||||
@@ -255,13 +263,14 @@ impl ContextManager {
|
||||
.as_ref()
|
||||
.map(|info| info.last_token_usage.total_tokens)
|
||||
.unwrap_or(0);
|
||||
let trailing_codex_generated_tokens = self.get_trailing_codex_generated_items_tokens();
|
||||
let items_after_last_model_generated_tokens =
|
||||
self.get_items_after_last_model_generated_tokens();
|
||||
if server_reasoning_included {
|
||||
last_tokens.saturating_add(trailing_codex_generated_tokens)
|
||||
last_tokens.saturating_add(items_after_last_model_generated_tokens)
|
||||
} else {
|
||||
last_tokens
|
||||
.saturating_add(self.get_non_last_reasoning_items_tokens())
|
||||
.saturating_add(trailing_codex_generated_tokens)
|
||||
.saturating_add(items_after_last_model_generated_tokens)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -367,6 +376,22 @@ fn estimate_item_token_count(item: &ResponseItem) -> i64 {
|
||||
}
|
||||
}
|
||||
|
||||
fn is_model_generated_item(item: &ResponseItem) -> bool {
|
||||
match item {
|
||||
ResponseItem::Message { role, .. } => role == "assistant",
|
||||
ResponseItem::Reasoning { .. }
|
||||
| ResponseItem::FunctionCall { .. }
|
||||
| ResponseItem::WebSearchCall { .. }
|
||||
| ResponseItem::CustomToolCall { .. }
|
||||
| ResponseItem::LocalShellCall { .. }
|
||||
| ResponseItem::Compaction { .. } => true,
|
||||
ResponseItem::FunctionCallOutput { .. }
|
||||
| ResponseItem::CustomToolCallOutput { .. }
|
||||
| ResponseItem::GhostSnapshot { .. }
|
||||
| ResponseItem::Other => false,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn is_codex_generated_item(item: &ResponseItem) -> bool {
|
||||
matches!(
|
||||
item,
|
||||
|
||||
@@ -62,13 +62,6 @@ fn user_input_text_msg(text: &str) -> ResponseItem {
|
||||
}
|
||||
}
|
||||
|
||||
fn function_call_output(call_id: &str, content: &str) -> ResponseItem {
|
||||
ResponseItem::FunctionCallOutput {
|
||||
call_id: call_id.to_string(),
|
||||
output: FunctionCallOutputPayload::from_text(content.to_string()),
|
||||
}
|
||||
}
|
||||
|
||||
fn custom_tool_call_output(call_id: &str, output: &str) -> ResponseItem {
|
||||
ResponseItem::CustomToolCallOutput {
|
||||
call_id: call_id.to_string(),
|
||||
@@ -189,48 +182,32 @@ fn non_last_reasoning_tokens_ignore_entries_after_last_user() {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn trailing_codex_generated_tokens_stop_at_first_non_generated_item() {
|
||||
let earlier_output = function_call_output("call-earlier", "earlier output");
|
||||
let trailing_function_output = function_call_output("call-tail-1", "tail function output");
|
||||
let trailing_custom_output = custom_tool_call_output("call-tail-2", "tail custom output");
|
||||
fn items_after_last_model_generated_tokens_include_user_and_tool_output() {
|
||||
let history = create_history_with_items(vec![
|
||||
earlier_output,
|
||||
user_msg("boundary item"),
|
||||
trailing_function_output.clone(),
|
||||
trailing_custom_output.clone(),
|
||||
assistant_msg("already counted by API"),
|
||||
user_msg("new user message"),
|
||||
custom_tool_call_output("call-tail", "new tool output"),
|
||||
]);
|
||||
let expected_tokens = estimate_item_token_count(&trailing_function_output)
|
||||
.saturating_add(estimate_item_token_count(&trailing_custom_output));
|
||||
let expected_tokens = estimate_item_token_count(&user_msg("new user message")).saturating_add(
|
||||
estimate_item_token_count(&custom_tool_call_output("call-tail", "new tool output")),
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
history.get_trailing_codex_generated_items_tokens(),
|
||||
history.get_items_after_last_model_generated_tokens(),
|
||||
expected_tokens
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn trailing_codex_generated_tokens_exclude_function_call_tail() {
|
||||
let history = create_history_with_items(vec![ResponseItem::FunctionCall {
|
||||
id: None,
|
||||
name: "not-generated".to_string(),
|
||||
arguments: "{}".to_string(),
|
||||
call_id: "call-tail".to_string(),
|
||||
}]);
|
||||
fn items_after_last_model_generated_tokens_are_zero_without_model_generated_items() {
|
||||
let history = create_history_with_items(vec![user_msg("no model output yet")]);
|
||||
|
||||
assert_eq!(history.get_trailing_codex_generated_items_tokens(), 0);
|
||||
assert_eq!(history.get_items_after_last_model_generated_tokens(), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn total_token_usage_includes_only_trailing_codex_generated_items() {
|
||||
let non_trailing_output = function_call_output("call-before-message", "not trailing");
|
||||
let trailing_assistant = assistant_msg("assistant boundary");
|
||||
let trailing_output = custom_tool_call_output("tool-tail", "trailing output");
|
||||
let mut history = create_history_with_items(vec![
|
||||
non_trailing_output,
|
||||
user_msg("boundary"),
|
||||
trailing_assistant,
|
||||
trailing_output.clone(),
|
||||
]);
|
||||
fn total_token_usage_includes_all_items_after_last_model_generated_item() {
|
||||
let mut history = create_history_with_items(vec![assistant_msg("already counted by API")]);
|
||||
history.update_token_info(
|
||||
&TokenUsage {
|
||||
total_tokens: 100,
|
||||
@@ -238,10 +215,17 @@ fn total_token_usage_includes_only_trailing_codex_generated_items() {
|
||||
},
|
||||
None,
|
||||
);
|
||||
let added_user = user_msg("new user message");
|
||||
let added_tool_output = custom_tool_call_output("tool-tail", "new tool output");
|
||||
history.record_items(
|
||||
[&added_user, &added_tool_output],
|
||||
TruncationPolicy::Tokens(10_000),
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
history.get_total_token_usage(true),
|
||||
100 + estimate_item_token_count(&trailing_output)
|
||||
100 + estimate_item_token_count(&added_user)
|
||||
+ estimate_item_token_count(&added_tool_output)
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user