feat: truncate with model infos (#11577)

This commit is contained in:
jif-oai
2026-02-12 13:16:40 +00:00
committed by GitHub
parent 2a409ca67c
commit 44b92f9a85
3 changed files with 76 additions and 8 deletions

View File

@@ -25,6 +25,18 @@ const MAX_ROLLOUTS_PER_STARTUP: usize = 64;
const PHASE_ONE_CONCURRENCY_LIMIT: usize = MAX_ROLLOUTS_PER_STARTUP;
/// Maximum number of recent raw memories retained for global consolidation.
const MAX_RAW_MEMORIES_FOR_GLOBAL: usize = 1_024;
/// Fallback stage-1 rollout truncation limit (tokens) when model metadata
/// does not include a valid context window.
const DEFAULT_STAGE_ONE_ROLLOUT_TOKEN_LIMIT: usize = 150_000;
/// Maximum number of tokens from `memory_summary.md` injected into memory tool
/// developer instructions.
const MEMORY_TOOL_DEVELOPER_INSTRUCTIONS_SUMMARY_TOKEN_LIMIT: usize = 5_000;
/// Portion of the model effective input window reserved for the stage-1 rollout
/// input.
///
/// Keeping this below 100% leaves room for system instructions, prompt framing,
/// and model output.
const STAGE_ONE_CONTEXT_WINDOW_PERCENT: i64 = 70;
/// Maximum rollout age considered for phase-1 extraction.
const PHASE_ONE_MAX_ROLLOUT_AGE_DAYS: i64 = 30;
/// Minimum rollout idle time required before phase-1 extraction.

View File

@@ -1,7 +1,11 @@
use crate::memories::DEFAULT_STAGE_ONE_ROLLOUT_TOKEN_LIMIT;
use crate::memories::MEMORY_TOOL_DEVELOPER_INSTRUCTIONS_SUMMARY_TOKEN_LIMIT;
use crate::memories::STAGE_ONE_CONTEXT_WINDOW_PERCENT;
use crate::memories::memory_root;
use crate::truncate::TruncationPolicy;
use crate::truncate::truncate_text;
use askama::Template;
use codex_protocol::openai_models::ModelInfo;
use std::path::Path;
use tokio::fs;
use tracing::warn;
@@ -42,15 +46,25 @@ pub(super) fn build_consolidation_prompt(memory_root: &Path) -> String {
/// Builds the stage-1 user message containing rollout metadata and content.
///
/// Large rollout payloads are truncated to a bounded byte budget while keeping
/// both head and tail context.
/// Large rollout payloads are truncated to 70% of the active model's effective
/// input window token budget while keeping both head and tail context.
pub(super) fn build_stage_one_input_message(
model_info: &ModelInfo,
rollout_path: &Path,
rollout_cwd: &Path,
rollout_contents: &str,
) -> anyhow::Result<String> {
let truncated_rollout_contents =
truncate_text(rollout_contents, TruncationPolicy::Tokens(150_000));
let rollout_token_limit = model_info
.context_window
.and_then(|limit| (limit > 0).then_some(limit))
.map(|limit| limit.saturating_mul(model_info.effective_context_window_percent) / 100)
.map(|limit| (limit.saturating_mul(STAGE_ONE_CONTEXT_WINDOW_PERCENT) / 100).max(1))
.and_then(|limit| usize::try_from(limit).ok())
.unwrap_or(DEFAULT_STAGE_ONE_ROLLOUT_TOKEN_LIMIT);
let truncated_rollout_contents = truncate_text(
rollout_contents,
TruncationPolicy::Tokens(rollout_token_limit),
);
let rollout_path = rollout_path.display().to_string();
let rollout_cwd = rollout_cwd.display().to_string();
@@ -70,6 +84,10 @@ pub(crate) async fn build_memory_tool_developer_instructions(codex_home: &Path)
.ok()?
.trim()
.to_string();
let memory_summary = truncate_text(
&memory_summary,
TruncationPolicy::Tokens(MEMORY_TOOL_DEVELOPER_INSTRUCTIONS_SUMMARY_TOKEN_LIMIT),
);
if memory_summary.is_empty() {
return None;
}
@@ -84,12 +102,25 @@ pub(crate) async fn build_memory_tool_developer_instructions(codex_home: &Path)
#[cfg(test)]
mod tests {
use super::*;
use crate::models_manager::model_info::model_info_from_slug;
#[test]
fn build_stage_one_input_message_truncates_rollout_with_standard_policy() {
fn build_stage_one_input_message_truncates_rollout_using_model_context_window() {
let input = format!("{}{}{}", "a".repeat(700_000), "middle", "z".repeat(700_000));
let expected_truncated = truncate_text(&input, TruncationPolicy::Tokens(150_000));
let mut model_info = model_info_from_slug("gpt-5.2-codex");
model_info.context_window = Some(123_000);
let expected_rollout_token_limit = usize::try_from(
((123_000_i64 * model_info.effective_context_window_percent) / 100)
* STAGE_ONE_CONTEXT_WINDOW_PERCENT
/ 100,
)
.unwrap();
let expected_truncated = truncate_text(
&input,
TruncationPolicy::Tokens(expected_rollout_token_limit),
);
let message = build_stage_one_input_message(
&model_info,
Path::new("/tmp/rollout.jsonl"),
Path::new("/tmp"),
&input,
@@ -101,4 +132,24 @@ mod tests {
assert!(expected_truncated.ends_with('z'));
assert!(message.contains(&expected_truncated));
}
#[test]
fn build_stage_one_input_message_uses_default_limit_when_model_context_window_missing() {
let input = format!("{}{}{}", "a".repeat(700_000), "middle", "z".repeat(700_000));
let mut model_info = model_info_from_slug("gpt-5.2-codex");
model_info.context_window = None;
let expected_truncated = truncate_text(
&input,
TruncationPolicy::Tokens(DEFAULT_STAGE_ONE_ROLLOUT_TOKEN_LIMIT),
);
let message = build_stage_one_input_message(
&model_info,
Path::new("/tmp/rollout.jsonl"),
Path::new("/tmp"),
&input,
)
.unwrap();
assert!(message.contains(&expected_truncated));
}
}

View File

@@ -61,8 +61,13 @@ pub(super) async fn extract_stage_one_output(
id: None,
role: "user".to_string(),
content: vec![ContentItem::InputText {
text: build_stage_one_input_message(rollout_path, rollout_cwd, &rollout_contents)
.map_err(|_e| "error while building the prompt")?,
text: build_stage_one_input_message(
&stage_one_context.model_info,
rollout_path,
rollout_cwd,
&rollout_contents,
)
.map_err(|_e| "error while building the prompt")?,
}],
end_turn: None,
phase: None,