Trim compaction input (#10374)

Two fixes: 1. Include trailing tool output in the total context size calculation. Otherwise when checking whether compaction should run we ignore newly added outputs. 2. Trim trailing tool output/tool calls until we can fit the request into the model context size. Otherwise the compaction endpoint will fail to compact. We only trim items that can be reproduced again by the model (tool calls, tool call outputs).
2026-05-02 10:26:45 +00:00 · 2026-02-02 19:03:11 -08:00
parent 7e07ec8f73
commit cbfd2a37cc
6 changed files with 354 additions and 40 deletions
--- a/codex-rs/core/src/codex.rs
+++ b/codex-rs/core/src/codex.rs
@@ -1051,6 +1051,11 @@ impl Session {
        state.get_total_token_usage(state.server_reasoning_included())
    }

+    async fn get_estimated_token_count(&self, turn_context: &TurnContext) -> Option<i64> {
+        let state = self.state.lock().await;
+        state.history.estimate_token_count(turn_context)
+    }
+
    pub(crate) async fn get_base_instructions(&self) -> BaseInstructions {
        let state = self.state.lock().await;
        BaseInstructions {
@@ -3310,6 +3315,7 @@ pub(crate) async fn run_turn(
    let model_info = turn_context.client.get_model_info();
    let auto_compact_limit = model_info.auto_compact_token_limit().unwrap_or(i64::MAX);
    let total_usage_tokens = sess.get_total_token_usage().await;
+
    let event = EventMsg::TurnStarted(TurnStartedEvent {
        model_context_window: turn_context.client.get_model_context_window(),
        collaboration_mode_kind: turn_context.collaboration_mode.mode,
@@ -3465,6 +3471,19 @@ pub(crate) async fn run_turn(
                let total_usage_tokens = sess.get_total_token_usage().await;
                let token_limit_reached = total_usage_tokens >= auto_compact_limit;

+                let estimated_token_count =
+                    sess.get_estimated_token_count(turn_context.as_ref()).await;
+
+                info!(
+                    turn_id = %turn_context.sub_id,
+                    total_usage_tokens,
+                    estimated_token_count = ?estimated_token_count,
+                    auto_compact_limit,
+                    token_limit_reached,
+                    needs_follow_up,
+                    "post sampling token usage"
+                );
+
                // as long as compaction works well in getting us way below the token limit, we shouldn't worry about being in an infinite loop.
                if token_limit_reached && needs_follow_up {
                    run_auto_compact(&sess, &turn_context).await;