mirror of
https://github.com/openai/codex.git
synced 2026-04-24 14:45:27 +00:00
Revert initial context trimming during compaction
This commit is contained in:
@@ -9,7 +9,6 @@ use crate::codex::get_last_assistant_message_from_turn;
|
||||
use crate::error::CodexErr;
|
||||
use crate::error::Result as CodexResult;
|
||||
use crate::features::Feature;
|
||||
use crate::instructions::UserInstructions;
|
||||
use crate::protocol::CompactedItem;
|
||||
use crate::protocol::EventMsg;
|
||||
use crate::protocol::TurnContextItem;
|
||||
@@ -32,20 +31,6 @@ use tracing::error;
|
||||
pub const SUMMARIZATION_PROMPT: &str = include_str!("../templates/compact/prompt.md");
|
||||
pub const SUMMARY_PREFIX: &str = include_str!("../templates/compact/summary_prefix.md");
|
||||
const COMPACT_USER_MESSAGE_MAX_TOKENS: usize = 20_000;
|
||||
// Turn context messages are re-injected after compaction so the next turn sees
|
||||
// canonical session state (permissions, environment context, instructions).
|
||||
//
|
||||
// Some of those entries can be very large (for example AGENTS.md or custom
|
||||
// developer instructions), and their size is otherwise unbounded here. Without
|
||||
// a cap, re-injection can dominate post-compaction history and quickly push
|
||||
// token usage back toward auto-compaction thresholds.
|
||||
//
|
||||
// We therefore budget the reinjected context separately and drop only
|
||||
// droppable context items (largest first) when needed. Using half of the
|
||||
// existing compact user-message budget keeps this heuristic simple and local to
|
||||
// compaction behavior.
|
||||
const REINJECTED_INITIAL_CONTEXT_MAX_TOKENS: usize = COMPACT_USER_MESSAGE_MAX_TOKENS / 2;
|
||||
const PERMISSIONS_INSTRUCTIONS_OPEN_TAG: &str = "<permissions instructions>";
|
||||
|
||||
pub(crate) fn should_use_remote_compact_task(
|
||||
session: &Session,
|
||||
@@ -264,7 +249,7 @@ pub(crate) fn process_compacted_history(
|
||||
) -> Vec<ResponseItem> {
|
||||
compacted_history.retain(should_keep_compacted_history_item);
|
||||
|
||||
let initial_context = initial_context_for_reinjection(initial_context);
|
||||
let initial_context = initial_context.to_vec();
|
||||
|
||||
// Re-inject canonical context from the current session since we stripped it
|
||||
// from the pre-compaction history. Keep it right before the last user
|
||||
@@ -305,65 +290,6 @@ fn should_keep_compacted_history_item(item: &ResponseItem) -> bool {
|
||||
}
|
||||
}
|
||||
|
||||
fn initial_context_for_reinjection(initial_context: &[ResponseItem]) -> Vec<ResponseItem> {
|
||||
let mut selected: Vec<Option<ResponseItem>> =
|
||||
initial_context.iter().cloned().map(Some).collect();
|
||||
let mut total_tokens: usize = initial_context
|
||||
.iter()
|
||||
.map(estimate_response_item_tokens)
|
||||
.sum();
|
||||
if total_tokens <= REINJECTED_INITIAL_CONTEXT_MAX_TOKENS {
|
||||
return initial_context.to_vec();
|
||||
}
|
||||
|
||||
let mut droppable_items: Vec<(usize, usize)> = initial_context
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter(|(_, item)| is_droppable_initial_context_item(item))
|
||||
.map(|(idx, item)| (idx, estimate_response_item_tokens(item)))
|
||||
.collect();
|
||||
// Prefer dropping the largest droppable context chunks first.
|
||||
droppable_items.sort_by(|a, b| b.1.cmp(&a.1).then_with(|| a.0.cmp(&b.0)));
|
||||
|
||||
for (idx, item_tokens) in droppable_items {
|
||||
if total_tokens <= REINJECTED_INITIAL_CONTEXT_MAX_TOKENS {
|
||||
break;
|
||||
}
|
||||
selected[idx] = None;
|
||||
total_tokens = total_tokens.saturating_sub(item_tokens);
|
||||
}
|
||||
|
||||
selected.into_iter().flatten().collect()
|
||||
}
|
||||
|
||||
fn is_droppable_initial_context_item(item: &ResponseItem) -> bool {
|
||||
let ResponseItem::Message { role, content, .. } = item else {
|
||||
return false;
|
||||
};
|
||||
// We keep permissions and environment context stable, and allow large
|
||||
// instruction wrappers to be omitted since compaction can summarize them.
|
||||
if role == "user" {
|
||||
return UserInstructions::is_user_instructions(content);
|
||||
}
|
||||
if role == "developer" {
|
||||
return !is_permissions_developer_message(content);
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
fn is_permissions_developer_message(content: &[ContentItem]) -> bool {
|
||||
let [ContentItem::InputText { text }] = content else {
|
||||
return false;
|
||||
};
|
||||
text.starts_with(PERMISSIONS_INSTRUCTIONS_OPEN_TAG)
|
||||
}
|
||||
|
||||
fn estimate_response_item_tokens(item: &ResponseItem) -> usize {
|
||||
serde_json::to_string(item)
|
||||
.map(|s| approx_token_count(&s))
|
||||
.unwrap_or_default()
|
||||
}
|
||||
|
||||
pub(crate) fn build_compacted_history(
|
||||
initial_context: Vec<ResponseItem>,
|
||||
user_messages: &[String],
|
||||
@@ -843,131 +769,6 @@ mod tests {
|
||||
assert_eq!(refreshed, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn process_compacted_history_drops_largest_uncapped_context_items_to_fit_budget() {
|
||||
let compacted_history = vec![ResponseItem::Message {
|
||||
id: None,
|
||||
role: "user".to_string(),
|
||||
content: vec![ContentItem::InputText {
|
||||
text: "summary".to_string(),
|
||||
}],
|
||||
end_turn: None,
|
||||
phase: None,
|
||||
}];
|
||||
let oversized_user_instructions = format!(
|
||||
"# AGENTS.md instructions for /repo\n\n<INSTRUCTIONS>\n{}\n</INSTRUCTIONS>",
|
||||
"u".repeat(48_000)
|
||||
);
|
||||
let medium_developer_instructions = "d".repeat(8_000);
|
||||
let initial_context = vec![
|
||||
ResponseItem::Message {
|
||||
id: None,
|
||||
role: "developer".to_string(),
|
||||
content: vec![ContentItem::InputText {
|
||||
text: "<permissions instructions>\nallowed\n</permissions instructions>"
|
||||
.to_string(),
|
||||
}],
|
||||
end_turn: None,
|
||||
phase: None,
|
||||
},
|
||||
ResponseItem::Message {
|
||||
id: None,
|
||||
role: "user".to_string(),
|
||||
content: vec![ContentItem::InputText {
|
||||
text: oversized_user_instructions.clone(),
|
||||
}],
|
||||
end_turn: None,
|
||||
phase: None,
|
||||
},
|
||||
ResponseItem::Message {
|
||||
id: None,
|
||||
role: "developer".to_string(),
|
||||
content: vec![ContentItem::InputText {
|
||||
text: medium_developer_instructions.clone(),
|
||||
}],
|
||||
end_turn: None,
|
||||
phase: None,
|
||||
},
|
||||
ResponseItem::Message {
|
||||
id: None,
|
||||
role: "user".to_string(),
|
||||
content: vec![ContentItem::InputText {
|
||||
text: "<environment_context>\n <cwd>/repo</cwd>\n <shell>zsh</shell>\n</environment_context>".to_string(),
|
||||
}],
|
||||
end_turn: None,
|
||||
phase: None,
|
||||
},
|
||||
];
|
||||
let initial_context_tokens: usize = initial_context
|
||||
.iter()
|
||||
.map(estimate_response_item_tokens)
|
||||
.sum();
|
||||
assert!(
|
||||
initial_context_tokens > REINJECTED_INITIAL_CONTEXT_MAX_TOKENS,
|
||||
"test setup should exceed reinjected initial context budget"
|
||||
);
|
||||
|
||||
let refreshed = process_compacted_history(compacted_history, &initial_context);
|
||||
let reinjected_context_tokens: usize = refreshed
|
||||
.iter()
|
||||
.filter(|item| {
|
||||
!matches!(
|
||||
item,
|
||||
ResponseItem::Message { role, content, .. }
|
||||
if role == "user"
|
||||
&& content_items_to_text(content).as_deref() == Some("summary")
|
||||
)
|
||||
})
|
||||
.map(estimate_response_item_tokens)
|
||||
.sum();
|
||||
assert!(
|
||||
reinjected_context_tokens <= REINJECTED_INITIAL_CONTEXT_MAX_TOKENS,
|
||||
"re-injected context should respect budget"
|
||||
);
|
||||
assert!(
|
||||
refreshed.iter().any(|item| matches!(
|
||||
item,
|
||||
ResponseItem::Message { role, content, .. }
|
||||
if role == "developer"
|
||||
&& content_items_to_text(content).as_deref() == Some(
|
||||
"<permissions instructions>\nallowed\n</permissions instructions>"
|
||||
)
|
||||
)),
|
||||
"permissions instructions should always be re-injected"
|
||||
);
|
||||
assert!(
|
||||
refreshed.iter().any(|item| matches!(
|
||||
item,
|
||||
ResponseItem::Message { role, content, .. }
|
||||
if role == "user"
|
||||
&& content_items_to_text(content).as_deref() == Some(
|
||||
"<environment_context>\n <cwd>/repo</cwd>\n <shell>zsh</shell>\n</environment_context>"
|
||||
)
|
||||
)),
|
||||
"environment context should always be re-injected"
|
||||
);
|
||||
assert!(
|
||||
refreshed.iter().all(|item| !matches!(
|
||||
item,
|
||||
ResponseItem::Message { role, content, .. }
|
||||
if role == "user"
|
||||
&& content_items_to_text(content).as_deref()
|
||||
== Some(oversized_user_instructions.as_str())
|
||||
)),
|
||||
"largest droppable context item should be removed first"
|
||||
);
|
||||
assert!(
|
||||
refreshed.iter().any(|item| matches!(
|
||||
item,
|
||||
ResponseItem::Message { role, content, .. }
|
||||
if role == "developer"
|
||||
&& content_items_to_text(content).as_deref()
|
||||
== Some(medium_developer_instructions.as_str())
|
||||
)),
|
||||
"smaller droppable context items should remain when budget allows"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn process_compacted_history_drops_non_user_content_messages() {
|
||||
let compacted_history = vec![
|
||||
|
||||
Reference in New Issue
Block a user