Revert initial context trimming during compaction

This commit is contained in:
Charles Cunningham
2026-02-05 16:22:04 -08:00
parent 4868ee7f8a
commit 5c50dd2f24

View File

@@ -9,7 +9,6 @@ use crate::codex::get_last_assistant_message_from_turn;
use crate::error::CodexErr;
use crate::error::Result as CodexResult;
use crate::features::Feature;
use crate::instructions::UserInstructions;
use crate::protocol::CompactedItem;
use crate::protocol::EventMsg;
use crate::protocol::TurnContextItem;
@@ -32,20 +31,6 @@ use tracing::error;
pub const SUMMARIZATION_PROMPT: &str = include_str!("../templates/compact/prompt.md");
pub const SUMMARY_PREFIX: &str = include_str!("../templates/compact/summary_prefix.md");
const COMPACT_USER_MESSAGE_MAX_TOKENS: usize = 20_000;
// Turn context messages are re-injected after compaction so the next turn sees
// canonical session state (permissions, environment context, instructions).
//
// Some of those entries can be very large (for example AGENTS.md or custom
// developer instructions), and their size is otherwise unbounded here. Without
// a cap, re-injection can dominate post-compaction history and quickly push
// token usage back toward auto-compaction thresholds.
//
// We therefore budget the reinjected context separately and drop only
// droppable context items (largest first) when needed. Using half of the
// existing compact user-message budget keeps this heuristic simple and local to
// compaction behavior.
const REINJECTED_INITIAL_CONTEXT_MAX_TOKENS: usize = COMPACT_USER_MESSAGE_MAX_TOKENS / 2;
const PERMISSIONS_INSTRUCTIONS_OPEN_TAG: &str = "<permissions instructions>";
pub(crate) fn should_use_remote_compact_task(
session: &Session,
@@ -264,7 +249,7 @@ pub(crate) fn process_compacted_history(
) -> Vec<ResponseItem> {
compacted_history.retain(should_keep_compacted_history_item);
let initial_context = initial_context_for_reinjection(initial_context);
let initial_context = initial_context.to_vec();
// Re-inject canonical context from the current session since we stripped it
// from the pre-compaction history. Keep it right before the last user
@@ -305,65 +290,6 @@ fn should_keep_compacted_history_item(item: &ResponseItem) -> bool {
}
}
fn initial_context_for_reinjection(initial_context: &[ResponseItem]) -> Vec<ResponseItem> {
let mut selected: Vec<Option<ResponseItem>> =
initial_context.iter().cloned().map(Some).collect();
let mut total_tokens: usize = initial_context
.iter()
.map(estimate_response_item_tokens)
.sum();
if total_tokens <= REINJECTED_INITIAL_CONTEXT_MAX_TOKENS {
return initial_context.to_vec();
}
let mut droppable_items: Vec<(usize, usize)> = initial_context
.iter()
.enumerate()
.filter(|(_, item)| is_droppable_initial_context_item(item))
.map(|(idx, item)| (idx, estimate_response_item_tokens(item)))
.collect();
// Prefer dropping the largest droppable context chunks first.
droppable_items.sort_by(|a, b| b.1.cmp(&a.1).then_with(|| a.0.cmp(&b.0)));
for (idx, item_tokens) in droppable_items {
if total_tokens <= REINJECTED_INITIAL_CONTEXT_MAX_TOKENS {
break;
}
selected[idx] = None;
total_tokens = total_tokens.saturating_sub(item_tokens);
}
selected.into_iter().flatten().collect()
}
fn is_droppable_initial_context_item(item: &ResponseItem) -> bool {
let ResponseItem::Message { role, content, .. } = item else {
return false;
};
// We keep permissions and environment context stable, and allow large
// instruction wrappers to be omitted since compaction can summarize them.
if role == "user" {
return UserInstructions::is_user_instructions(content);
}
if role == "developer" {
return !is_permissions_developer_message(content);
}
false
}
fn is_permissions_developer_message(content: &[ContentItem]) -> bool {
let [ContentItem::InputText { text }] = content else {
return false;
};
text.starts_with(PERMISSIONS_INSTRUCTIONS_OPEN_TAG)
}
fn estimate_response_item_tokens(item: &ResponseItem) -> usize {
serde_json::to_string(item)
.map(|s| approx_token_count(&s))
.unwrap_or_default()
}
pub(crate) fn build_compacted_history(
initial_context: Vec<ResponseItem>,
user_messages: &[String],
@@ -843,131 +769,6 @@ mod tests {
assert_eq!(refreshed, expected);
}
#[test]
fn process_compacted_history_drops_largest_uncapped_context_items_to_fit_budget() {
let compacted_history = vec![ResponseItem::Message {
id: None,
role: "user".to_string(),
content: vec![ContentItem::InputText {
text: "summary".to_string(),
}],
end_turn: None,
phase: None,
}];
let oversized_user_instructions = format!(
"# AGENTS.md instructions for /repo\n\n<INSTRUCTIONS>\n{}\n</INSTRUCTIONS>",
"u".repeat(48_000)
);
let medium_developer_instructions = "d".repeat(8_000);
let initial_context = vec![
ResponseItem::Message {
id: None,
role: "developer".to_string(),
content: vec![ContentItem::InputText {
text: "<permissions instructions>\nallowed\n</permissions instructions>"
.to_string(),
}],
end_turn: None,
phase: None,
},
ResponseItem::Message {
id: None,
role: "user".to_string(),
content: vec![ContentItem::InputText {
text: oversized_user_instructions.clone(),
}],
end_turn: None,
phase: None,
},
ResponseItem::Message {
id: None,
role: "developer".to_string(),
content: vec![ContentItem::InputText {
text: medium_developer_instructions.clone(),
}],
end_turn: None,
phase: None,
},
ResponseItem::Message {
id: None,
role: "user".to_string(),
content: vec![ContentItem::InputText {
text: "<environment_context>\n <cwd>/repo</cwd>\n <shell>zsh</shell>\n</environment_context>".to_string(),
}],
end_turn: None,
phase: None,
},
];
let initial_context_tokens: usize = initial_context
.iter()
.map(estimate_response_item_tokens)
.sum();
assert!(
initial_context_tokens > REINJECTED_INITIAL_CONTEXT_MAX_TOKENS,
"test setup should exceed reinjected initial context budget"
);
let refreshed = process_compacted_history(compacted_history, &initial_context);
let reinjected_context_tokens: usize = refreshed
.iter()
.filter(|item| {
!matches!(
item,
ResponseItem::Message { role, content, .. }
if role == "user"
&& content_items_to_text(content).as_deref() == Some("summary")
)
})
.map(estimate_response_item_tokens)
.sum();
assert!(
reinjected_context_tokens <= REINJECTED_INITIAL_CONTEXT_MAX_TOKENS,
"re-injected context should respect budget"
);
assert!(
refreshed.iter().any(|item| matches!(
item,
ResponseItem::Message { role, content, .. }
if role == "developer"
&& content_items_to_text(content).as_deref() == Some(
"<permissions instructions>\nallowed\n</permissions instructions>"
)
)),
"permissions instructions should always be re-injected"
);
assert!(
refreshed.iter().any(|item| matches!(
item,
ResponseItem::Message { role, content, .. }
if role == "user"
&& content_items_to_text(content).as_deref() == Some(
"<environment_context>\n <cwd>/repo</cwd>\n <shell>zsh</shell>\n</environment_context>"
)
)),
"environment context should always be re-injected"
);
assert!(
refreshed.iter().all(|item| !matches!(
item,
ResponseItem::Message { role, content, .. }
if role == "user"
&& content_items_to_text(content).as_deref()
== Some(oversized_user_instructions.as_str())
)),
"largest droppable context item should be removed first"
);
assert!(
refreshed.iter().any(|item| matches!(
item,
ResponseItem::Message { role, content, .. }
if role == "developer"
&& content_items_to_text(content).as_deref()
== Some(medium_developer_instructions.as_str())
)),
"smaller droppable context items should remain when budget allows"
);
}
#[test]
fn process_compacted_history_drops_non_user_content_messages() {
let compacted_history = vec![