core: account for all post-response items in auto-compact token checks (#11132)

## Summary - change compaction pre-check accounting to include all items added after the last model-generated item, not only trailing codex-generated outputs - use that boundary consistently in get_total_token_usage() and get_total_token_usage_breakdown() - update history tests to cover user/tool-output items after the last model item ## Why last_token_usage.total_tokens is API-reported for the last successful model response. After that point, local history may gain additional items (user messages, injected context, tool outputs). Compaction triggering must account for all of those items to avoid late compaction attempts that can overflow context. ## Testing - just fmt - cargo test -p codex-core
2026-04-24 14:45:27 +00:00 · 2026-02-09 08:34:38 -08:00
parent 9fe925b15a
commit 0883e5d3e5
2 changed files with 58 additions and 49 deletions
--- a/codex-rs/core/src/context_manager/history.rs
+++ b/codex-rs/core/src/context_manager/history.rs
@@ -236,15 +236,23 @@ impl ContextManager {
            })
    }

-    fn get_trailing_codex_generated_items_tokens(&self) -> i64 {
-        let mut total = 0i64;
-        for item in self.items.iter().rev() {
-            if !is_codex_generated_item(item) {
-                break;
-            }
-            total = total.saturating_add(estimate_item_token_count(item));
-        }
-        total
+    // These are local items added after the most recent model-emitted item.
+    // They are not reflected in `last_token_usage.total_tokens`.
+    fn items_after_last_model_generated_item(&self) -> &[ResponseItem] {
+        let start = self
+            .items
+            .iter()
+            .rposition(is_model_generated_item)
+            .map_or(self.items.len(), |index| index.saturating_add(1));
+        &self.items[start..]
+    }
+
+    fn get_items_after_last_model_generated_tokens(&self) -> i64 {
+        self.items_after_last_model_generated_item()
+            .iter()
+            .fold(0i64, |acc, item| {
+                acc.saturating_add(estimate_item_token_count(item))
+            })
    }

    /// When true, the server already accounted for past reasoning tokens and
@@ -255,13 +263,14 @@ impl ContextManager {
            .as_ref()
            .map(|info| info.last_token_usage.total_tokens)
            .unwrap_or(0);
-        let trailing_codex_generated_tokens = self.get_trailing_codex_generated_items_tokens();
+        let items_after_last_model_generated_tokens =
+            self.get_items_after_last_model_generated_tokens();
        if server_reasoning_included {
-            last_tokens.saturating_add(trailing_codex_generated_tokens)
+            last_tokens.saturating_add(items_after_last_model_generated_tokens)
        } else {
            last_tokens
                .saturating_add(self.get_non_last_reasoning_items_tokens())
-                .saturating_add(trailing_codex_generated_tokens)
+                .saturating_add(items_after_last_model_generated_tokens)
        }
    }

@@ -367,6 +376,22 @@ fn estimate_item_token_count(item: &ResponseItem) -> i64 {
    }
 }

+fn is_model_generated_item(item: &ResponseItem) -> bool {
+    match item {
+        ResponseItem::Message { role, .. } => role == "assistant",
+        ResponseItem::Reasoning { .. }
+        | ResponseItem::FunctionCall { .. }
+        | ResponseItem::WebSearchCall { .. }
+        | ResponseItem::CustomToolCall { .. }
+        | ResponseItem::LocalShellCall { .. }
+        | ResponseItem::Compaction { .. } => true,
+        ResponseItem::FunctionCallOutput { .. }
+        | ResponseItem::CustomToolCallOutput { .. }
+        | ResponseItem::GhostSnapshot { .. }
+        | ResponseItem::Other => false,
+    }
+}
+
 pub(crate) fn is_codex_generated_item(item: &ResponseItem) -> bool {
    matches!(
        item,
--- a/codex-rs/core/src/context_manager/history_tests.rs
+++ b/codex-rs/core/src/context_manager/history_tests.rs
@@ -62,13 +62,6 @@ fn user_input_text_msg(text: &str) -> ResponseItem {
    }
 }

-fn function_call_output(call_id: &str, content: &str) -> ResponseItem {
-    ResponseItem::FunctionCallOutput {
-        call_id: call_id.to_string(),
-        output: FunctionCallOutputPayload::from_text(content.to_string()),
-    }
-}
-
 fn custom_tool_call_output(call_id: &str, output: &str) -> ResponseItem {
    ResponseItem::CustomToolCallOutput {
        call_id: call_id.to_string(),
@@ -189,48 +182,32 @@ fn non_last_reasoning_tokens_ignore_entries_after_last_user() {
 }

 #[test]
-fn trailing_codex_generated_tokens_stop_at_first_non_generated_item() {
-    let earlier_output = function_call_output("call-earlier", "earlier output");
-    let trailing_function_output = function_call_output("call-tail-1", "tail function output");
-    let trailing_custom_output = custom_tool_call_output("call-tail-2", "tail custom output");
+fn items_after_last_model_generated_tokens_include_user_and_tool_output() {
    let history = create_history_with_items(vec![
-        earlier_output,
-        user_msg("boundary item"),
-        trailing_function_output.clone(),
-        trailing_custom_output.clone(),
+        assistant_msg("already counted by API"),
+        user_msg("new user message"),
+        custom_tool_call_output("call-tail", "new tool output"),
    ]);
-    let expected_tokens = estimate_item_token_count(&trailing_function_output)
-        .saturating_add(estimate_item_token_count(&trailing_custom_output));
+    let expected_tokens = estimate_item_token_count(&user_msg("new user message")).saturating_add(
+        estimate_item_token_count(&custom_tool_call_output("call-tail", "new tool output")),
+    );

    assert_eq!(
-        history.get_trailing_codex_generated_items_tokens(),
+        history.get_items_after_last_model_generated_tokens(),
        expected_tokens
    );
 }

 #[test]
-fn trailing_codex_generated_tokens_exclude_function_call_tail() {
-    let history = create_history_with_items(vec![ResponseItem::FunctionCall {
-        id: None,
-        name: "not-generated".to_string(),
-        arguments: "{}".to_string(),
-        call_id: "call-tail".to_string(),
-    }]);
+fn items_after_last_model_generated_tokens_are_zero_without_model_generated_items() {
+    let history = create_history_with_items(vec![user_msg("no model output yet")]);

-    assert_eq!(history.get_trailing_codex_generated_items_tokens(), 0);
+    assert_eq!(history.get_items_after_last_model_generated_tokens(), 0);
 }

 #[test]
-fn total_token_usage_includes_only_trailing_codex_generated_items() {
-    let non_trailing_output = function_call_output("call-before-message", "not trailing");
-    let trailing_assistant = assistant_msg("assistant boundary");
-    let trailing_output = custom_tool_call_output("tool-tail", "trailing output");
-    let mut history = create_history_with_items(vec![
-        non_trailing_output,
-        user_msg("boundary"),
-        trailing_assistant,
-        trailing_output.clone(),
-    ]);
+fn total_token_usage_includes_all_items_after_last_model_generated_item() {
+    let mut history = create_history_with_items(vec![assistant_msg("already counted by API")]);
    history.update_token_info(
        &TokenUsage {
            total_tokens: 100,
@@ -238,10 +215,17 @@ fn total_token_usage_includes_only_trailing_codex_generated_items() {
        },
        None,
    );
+    let added_user = user_msg("new user message");
+    let added_tool_output = custom_tool_call_output("tool-tail", "new tool output");
+    history.record_items(
+        [&added_user, &added_tool_output],
+        TruncationPolicy::Tokens(10_000),
+    );

    assert_eq!(
        history.get_total_token_usage(true),
-        100 + estimate_item_token_count(&trailing_output)
+        100 + estimate_item_token_count(&added_user)
+            + estimate_item_token_count(&added_tool_output)
    );
 }