feat: truncate with model infos (#11577)

2026-04-24 14:45:27 +00:00 · 2026-02-12 13:16:40 +00:00
parent 2a409ca67c
commit 44b92f9a85
3 changed files with 76 additions and 8 deletions
--- a/codex-rs/core/src/memories/mod.rs
+++ b/codex-rs/core/src/memories/mod.rs
@@ -25,6 +25,18 @@ const MAX_ROLLOUTS_PER_STARTUP: usize = 64;
 const PHASE_ONE_CONCURRENCY_LIMIT: usize = MAX_ROLLOUTS_PER_STARTUP;
 /// Maximum number of recent raw memories retained for global consolidation.
 const MAX_RAW_MEMORIES_FOR_GLOBAL: usize = 1_024;
+/// Fallback stage-1 rollout truncation limit (tokens) when model metadata
+/// does not include a valid context window.
+const DEFAULT_STAGE_ONE_ROLLOUT_TOKEN_LIMIT: usize = 150_000;
+/// Maximum number of tokens from `memory_summary.md` injected into memory tool
+/// developer instructions.
+const MEMORY_TOOL_DEVELOPER_INSTRUCTIONS_SUMMARY_TOKEN_LIMIT: usize = 5_000;
+/// Portion of the model effective input window reserved for the stage-1 rollout
+/// input.
+///
+/// Keeping this below 100% leaves room for system instructions, prompt framing,
+/// and model output.
+const STAGE_ONE_CONTEXT_WINDOW_PERCENT: i64 = 70;
 /// Maximum rollout age considered for phase-1 extraction.
 const PHASE_ONE_MAX_ROLLOUT_AGE_DAYS: i64 = 30;
 /// Minimum rollout idle time required before phase-1 extraction.
--- a/codex-rs/core/src/memories/prompts.rs
+++ b/codex-rs/core/src/memories/prompts.rs
@@ -1,7 +1,11 @@
+use crate::memories::DEFAULT_STAGE_ONE_ROLLOUT_TOKEN_LIMIT;
+use crate::memories::MEMORY_TOOL_DEVELOPER_INSTRUCTIONS_SUMMARY_TOKEN_LIMIT;
+use crate::memories::STAGE_ONE_CONTEXT_WINDOW_PERCENT;
 use crate::memories::memory_root;
 use crate::truncate::TruncationPolicy;
 use crate::truncate::truncate_text;
 use askama::Template;
+use codex_protocol::openai_models::ModelInfo;
 use std::path::Path;
 use tokio::fs;
 use tracing::warn;
@@ -42,15 +46,25 @@ pub(super) fn build_consolidation_prompt(memory_root: &Path) -> String {

 /// Builds the stage-1 user message containing rollout metadata and content.
 ///
-/// Large rollout payloads are truncated to a bounded byte budget while keeping
-/// both head and tail context.
+/// Large rollout payloads are truncated to 70% of the active model's effective
+/// input window token budget while keeping both head and tail context.
 pub(super) fn build_stage_one_input_message(
+    model_info: &ModelInfo,
    rollout_path: &Path,
    rollout_cwd: &Path,
    rollout_contents: &str,
 ) -> anyhow::Result<String> {
-    let truncated_rollout_contents =
-        truncate_text(rollout_contents, TruncationPolicy::Tokens(150_000));
+    let rollout_token_limit = model_info
+        .context_window
+        .and_then(|limit| (limit > 0).then_some(limit))
+        .map(|limit| limit.saturating_mul(model_info.effective_context_window_percent) / 100)
+        .map(|limit| (limit.saturating_mul(STAGE_ONE_CONTEXT_WINDOW_PERCENT) / 100).max(1))
+        .and_then(|limit| usize::try_from(limit).ok())
+        .unwrap_or(DEFAULT_STAGE_ONE_ROLLOUT_TOKEN_LIMIT);
+    let truncated_rollout_contents = truncate_text(
+        rollout_contents,
+        TruncationPolicy::Tokens(rollout_token_limit),
+    );

    let rollout_path = rollout_path.display().to_string();
    let rollout_cwd = rollout_cwd.display().to_string();
@@ -70,6 +84,10 @@ pub(crate) async fn build_memory_tool_developer_instructions(codex_home: &Path)
        .ok()?
        .trim()
        .to_string();
+    let memory_summary = truncate_text(
+        &memory_summary,
+        TruncationPolicy::Tokens(MEMORY_TOOL_DEVELOPER_INSTRUCTIONS_SUMMARY_TOKEN_LIMIT),
+    );
    if memory_summary.is_empty() {
        return None;
    }
@@ -84,12 +102,25 @@ pub(crate) async fn build_memory_tool_developer_instructions(codex_home: &Path)
 #[cfg(test)]
 mod tests {
    use super::*;
+    use crate::models_manager::model_info::model_info_from_slug;

    #[test]
-    fn build_stage_one_input_message_truncates_rollout_with_standard_policy() {
+    fn build_stage_one_input_message_truncates_rollout_using_model_context_window() {
        let input = format!("{}{}{}", "a".repeat(700_000), "middle", "z".repeat(700_000));
-        let expected_truncated = truncate_text(&input, TruncationPolicy::Tokens(150_000));
+        let mut model_info = model_info_from_slug("gpt-5.2-codex");
+        model_info.context_window = Some(123_000);
+        let expected_rollout_token_limit = usize::try_from(
+            ((123_000_i64 * model_info.effective_context_window_percent) / 100)
+                * STAGE_ONE_CONTEXT_WINDOW_PERCENT
+                / 100,
+        )
+        .unwrap();
+        let expected_truncated = truncate_text(
+            &input,
+            TruncationPolicy::Tokens(expected_rollout_token_limit),
+        );
        let message = build_stage_one_input_message(
+            &model_info,
            Path::new("/tmp/rollout.jsonl"),
            Path::new("/tmp"),
            &input,
@@ -101,4 +132,24 @@ mod tests {
        assert!(expected_truncated.ends_with('z'));
        assert!(message.contains(&expected_truncated));
    }
+
+    #[test]
+    fn build_stage_one_input_message_uses_default_limit_when_model_context_window_missing() {
+        let input = format!("{}{}{}", "a".repeat(700_000), "middle", "z".repeat(700_000));
+        let mut model_info = model_info_from_slug("gpt-5.2-codex");
+        model_info.context_window = None;
+        let expected_truncated = truncate_text(
+            &input,
+            TruncationPolicy::Tokens(DEFAULT_STAGE_ONE_ROLLOUT_TOKEN_LIMIT),
+        );
+        let message = build_stage_one_input_message(
+            &model_info,
+            Path::new("/tmp/rollout.jsonl"),
+            Path::new("/tmp"),
+            &input,
+        )
+        .unwrap();
+
+        assert!(message.contains(&expected_truncated));
+    }
 }
--- a/codex-rs/core/src/memories/startup/extract.rs
+++ b/codex-rs/core/src/memories/startup/extract.rs
@@ -61,8 +61,13 @@ pub(super) async fn extract_stage_one_output(
            id: None,
            role: "user".to_string(),
            content: vec![ContentItem::InputText {
-                text: build_stage_one_input_message(rollout_path, rollout_cwd, &rollout_contents)
-                    .map_err(|_e| "error while building the prompt")?,
+                text: build_stage_one_input_message(
+                    &stage_one_context.model_info,
+                    rollout_path,
+                    rollout_cwd,
+                    &rollout_contents,
+                )
+                .map_err(|_e| "error while building the prompt")?,
            }],
            end_turn: None,
            phase: None,