From 37bf42d5d560854b8e04801cc62685e73e87ae8b Mon Sep 17 00:00:00 2001 From: bxie-openai Date: Thu, 16 Apr 2026 13:51:43 -0700 Subject: [PATCH] [codex] Make realtime startup context truncation deterministic (#18172) ## Summary - remove the final whole-blob truncation pass from realtime startup-context assembly - enforce fixed per-section budgets, including each section heading - keep the existing per-section caps and raise the overall realtime startup-context budget to `5300`, matching the sum of those section budgets - add focused tests for the new wrapping and section-budget behavior ## Why The previous flow truncated each section and then middle-truncated the final combined startup-context blob again. Small input changes could shift that combined cut point, which made retained context unstable and caused nondeterministic tests. ## Impact Startup context now preserves section boundaries and ordering deterministically. Each section is still budgeted independently, but the final assembled blob is no longer truncated again as a single opaque string. To match that design, the overall startup-context token budget is updated to the sum of the existing section budgets rather than lowering the section caps. ## Validation - `cargo +1.93.0 test -p codex-core realtime_context` - `cargo +1.93.0 test -p codex-core --test all suite::realtime_conversation::conversation_start_injects_startup_context_from_thread_history -- --exact` - `cargo +1.93.0 test -p codex-core --test all suite::realtime_conversation::conversation_startup_context_current_thread_selects_many_turns_by_budget -- --exact` - `cargo +1.93.0 test -p codex-core --test all suite::realtime_conversation::conversation_startup_context_falls_back_to_workspace_map -- --exact` - `cargo +1.93.0 test -p codex-core --test all suite::realtime_conversation::conversation_startup_context_is_truncated_and_sent_once_per_start -- --exact` --- codex-rs/core/src/realtime_context.rs | 40 ++++++-------- codex-rs/core/src/realtime_context_tests.rs | 60 +++++++++++++++++---- codex-rs/core/src/realtime_conversation.rs | 2 +- 3 files changed, 68 insertions(+), 34 deletions(-) diff --git a/codex-rs/core/src/realtime_context.rs b/codex-rs/core/src/realtime_context.rs index fc17dab99b..c23394fa37 100644 --- a/codex-rs/core/src/realtime_context.rs +++ b/codex-rs/core/src/realtime_context.rs @@ -108,9 +108,10 @@ pub(crate) async fn build_realtime_startup_context( parts.push(section); } - let context = format_startup_context_blob(&parts.join("\n\n"), budget_tokens); + let context = format_startup_context_blob(&parts.join("\n\n")); debug!( approx_tokens = approx_token_count(&context), + requested_budget_tokens = budget_tokens, bytes = context.len(), has_current_thread_section, has_recent_work_section, @@ -440,31 +441,22 @@ fn format_section(title: &str, body: Option, budget_tokens: usize) -> Op return None; } - Some(format!( - "## {title}\n{}", - truncate_text(body, TruncationPolicy::Tokens(budget_tokens)) - )) + let heading = format!("## {title}\n"); + let body_budget = budget_tokens.saturating_sub(approx_token_count(&heading)); + if body_budget == 0 { + return None; + } + + let body = truncate_realtime_text_to_token_budget(body, body_budget); + if body.is_empty() { + return None; + } + + Some(format!("{heading}{body}")) } -fn format_startup_context_blob(body: &str, budget_tokens: usize) -> String { - let wrapper = format!("{STARTUP_CONTEXT_OPEN_TAG}\n\n{STARTUP_CONTEXT_CLOSE_TAG}"); - let mut body_budget = budget_tokens.saturating_sub(approx_token_count(&wrapper)); - - loop { - let body = truncate_text(body, TruncationPolicy::Tokens(body_budget)); - let wrapped = format!("{STARTUP_CONTEXT_OPEN_TAG}\n{body}\n{STARTUP_CONTEXT_CLOSE_TAG}"); - let wrapped_tokens = approx_token_count(&wrapped); - if wrapped_tokens <= budget_tokens || body_budget == 0 { - return wrapped; - } - - let excess_tokens = wrapped_tokens.saturating_sub(budget_tokens); - let next_budget = body_budget.saturating_sub(excess_tokens.max(1)); - if next_budget == body_budget { - return wrapped; - } - body_budget = next_budget; - } +fn format_startup_context_blob(body: &str) -> String { + format!("{STARTUP_CONTEXT_OPEN_TAG}\n{body}\n{STARTUP_CONTEXT_CLOSE_TAG}") } async fn format_thread_group( diff --git a/codex-rs/core/src/realtime_context_tests.rs b/codex-rs/core/src/realtime_context_tests.rs index 4f62affcae..99cb00f0de 100644 --- a/codex-rs/core/src/realtime_context_tests.rs +++ b/codex-rs/core/src/realtime_context_tests.rs @@ -1,6 +1,12 @@ +use super::CURRENT_THREAD_SECTION_TOKEN_BUDGET; +use super::NOTES_SECTION_TOKEN_BUDGET; +use super::RECENT_WORK_SECTION_TOKEN_BUDGET; +use super::STARTUP_CONTEXT_HEADER; +use super::WORKSPACE_SECTION_TOKEN_BUDGET; use super::build_current_thread_section; use super::build_recent_work_section; use super::build_workspace_section_with_user_root; +use super::format_section; use super::format_startup_context_blob; use chrono::TimeZone; use chrono::Utc; @@ -172,20 +178,56 @@ fn current_thread_section_keeps_latest_turns_when_history_exceeds_budget() { } #[test] -fn startup_context_blob_is_wrapped_in_tags_and_fits_budget() { - let body = format!( - "Startup context from Codex.\n{}\n{}", - "recent work ".repeat(1_200), - "workspace tree ".repeat(800), - ); +fn startup_context_blob_is_wrapped_in_tags_without_final_truncation() { + let body = "Startup context from Codex.\n## Current Thread\nhello"; + let wrapped = format_startup_context_blob(body); - let wrapped = format_startup_context_blob(&body, /*budget_tokens*/ 200); + assert_eq!( + wrapped, + "\nStartup context from Codex.\n## Current Thread\nhello\n" + ); +} + +#[test] +fn fixed_section_budgets_apply_per_section_without_total_blob_truncation() { + let body = [ + STARTUP_CONTEXT_HEADER.to_string(), + format_section( + "Current Thread", + Some("current thread ".repeat(2_000)), + CURRENT_THREAD_SECTION_TOKEN_BUDGET, + ) + .expect("current thread section"), + format_section( + "Recent Work", + Some("recent work ".repeat(3_000)), + RECENT_WORK_SECTION_TOKEN_BUDGET, + ) + .expect("recent work section"), + format_section( + "Machine / Workspace Map", + Some("workspace map ".repeat(2_500)), + WORKSPACE_SECTION_TOKEN_BUDGET, + ) + .expect("workspace section"), + format_section( + "Notes", + Some("notes ".repeat(500)), + NOTES_SECTION_TOKEN_BUDGET, + ) + .expect("notes section"), + ] + .join("\n\n"); + + let wrapped = format_startup_context_blob(&body); assert!(wrapped.starts_with("\n")); assert!(wrapped.ends_with("\n")); - assert!(wrapped.contains("Startup context from Codex.")); assert!(wrapped.contains("tokens truncated")); - assert!(wrapped.len().div_ceil(4) <= 200); + assert!(wrapped.contains("## Current Thread")); + assert!(wrapped.contains("## Recent Work")); + assert!(wrapped.contains("## Machine / Workspace Map")); + assert!(wrapped.contains("## Notes")); } #[tokio::test] diff --git a/codex-rs/core/src/realtime_conversation.rs b/codex-rs/core/src/realtime_conversation.rs index b79060259c..234dd78f38 100644 --- a/codex-rs/core/src/realtime_conversation.rs +++ b/codex-rs/core/src/realtime_conversation.rs @@ -63,7 +63,7 @@ const AUDIO_IN_QUEUE_CAPACITY: usize = 256; const USER_TEXT_IN_QUEUE_CAPACITY: usize = 64; const HANDOFF_OUT_QUEUE_CAPACITY: usize = 64; const OUTPUT_EVENTS_QUEUE_CAPACITY: usize = 256; -const REALTIME_STARTUP_CONTEXT_TOKEN_BUDGET: usize = 5_000; +const REALTIME_STARTUP_CONTEXT_TOKEN_BUDGET: usize = 5_300; const DEFAULT_REALTIME_MODEL: &str = "gpt-realtime-1.5"; pub(crate) const REALTIME_USER_TEXT_PREFIX: &str = "[USER] "; pub(crate) const REALTIME_BACKEND_TEXT_PREFIX: &str = "[BACKEND] ";