[codex] Make realtime startup context truncation deterministic (#18172)

## Summary

- remove the final whole-blob truncation pass from realtime
startup-context assembly
- enforce fixed per-section budgets, including each section heading
- keep the existing per-section caps and raise the overall realtime
startup-context budget to `5300`, matching the sum of those section
budgets
- add focused tests for the new wrapping and section-budget behavior

## Why

The previous flow truncated each section and then middle-truncated the
final combined startup-context blob again. Small input changes could
shift that combined cut point, which made retained context unstable and
caused nondeterministic tests.

## Impact

Startup context now preserves section boundaries and ordering
deterministically. Each section is still budgeted independently, but the
final assembled blob is no longer truncated again as a single opaque
string. To match that design, the overall startup-context token budget
is updated to the sum of the existing section budgets rather than
lowering the section caps.

## Validation

- `cargo +1.93.0 test -p codex-core realtime_context`
- `cargo +1.93.0 test -p codex-core --test all
suite::realtime_conversation::conversation_start_injects_startup_context_from_thread_history
-- --exact`
- `cargo +1.93.0 test -p codex-core --test all
suite::realtime_conversation::conversation_startup_context_current_thread_selects_many_turns_by_budget
-- --exact`
- `cargo +1.93.0 test -p codex-core --test all
suite::realtime_conversation::conversation_startup_context_falls_back_to_workspace_map
-- --exact`
- `cargo +1.93.0 test -p codex-core --test all
suite::realtime_conversation::conversation_startup_context_is_truncated_and_sent_once_per_start
-- --exact`
This commit is contained in:
bxie-openai
2026-04-16 13:51:43 -07:00
committed by GitHub
parent ec8d4bfc77
commit 37bf42d5d5
3 changed files with 68 additions and 34 deletions

View File

@@ -108,9 +108,10 @@ pub(crate) async fn build_realtime_startup_context(
parts.push(section);
}
let context = format_startup_context_blob(&parts.join("\n\n"), budget_tokens);
let context = format_startup_context_blob(&parts.join("\n\n"));
debug!(
approx_tokens = approx_token_count(&context),
requested_budget_tokens = budget_tokens,
bytes = context.len(),
has_current_thread_section,
has_recent_work_section,
@@ -440,31 +441,22 @@ fn format_section(title: &str, body: Option<String>, budget_tokens: usize) -> Op
return None;
}
Some(format!(
"## {title}\n{}",
truncate_text(body, TruncationPolicy::Tokens(budget_tokens))
))
let heading = format!("## {title}\n");
let body_budget = budget_tokens.saturating_sub(approx_token_count(&heading));
if body_budget == 0 {
return None;
}
let body = truncate_realtime_text_to_token_budget(body, body_budget);
if body.is_empty() {
return None;
}
Some(format!("{heading}{body}"))
}
fn format_startup_context_blob(body: &str, budget_tokens: usize) -> String {
let wrapper = format!("{STARTUP_CONTEXT_OPEN_TAG}\n\n{STARTUP_CONTEXT_CLOSE_TAG}");
let mut body_budget = budget_tokens.saturating_sub(approx_token_count(&wrapper));
loop {
let body = truncate_text(body, TruncationPolicy::Tokens(body_budget));
let wrapped = format!("{STARTUP_CONTEXT_OPEN_TAG}\n{body}\n{STARTUP_CONTEXT_CLOSE_TAG}");
let wrapped_tokens = approx_token_count(&wrapped);
if wrapped_tokens <= budget_tokens || body_budget == 0 {
return wrapped;
}
let excess_tokens = wrapped_tokens.saturating_sub(budget_tokens);
let next_budget = body_budget.saturating_sub(excess_tokens.max(1));
if next_budget == body_budget {
return wrapped;
}
body_budget = next_budget;
}
fn format_startup_context_blob(body: &str) -> String {
format!("{STARTUP_CONTEXT_OPEN_TAG}\n{body}\n{STARTUP_CONTEXT_CLOSE_TAG}")
}
async fn format_thread_group(

View File

@@ -1,6 +1,12 @@
use super::CURRENT_THREAD_SECTION_TOKEN_BUDGET;
use super::NOTES_SECTION_TOKEN_BUDGET;
use super::RECENT_WORK_SECTION_TOKEN_BUDGET;
use super::STARTUP_CONTEXT_HEADER;
use super::WORKSPACE_SECTION_TOKEN_BUDGET;
use super::build_current_thread_section;
use super::build_recent_work_section;
use super::build_workspace_section_with_user_root;
use super::format_section;
use super::format_startup_context_blob;
use chrono::TimeZone;
use chrono::Utc;
@@ -172,20 +178,56 @@ fn current_thread_section_keeps_latest_turns_when_history_exceeds_budget() {
}
#[test]
fn startup_context_blob_is_wrapped_in_tags_and_fits_budget() {
let body = format!(
"Startup context from Codex.\n{}\n{}",
"recent work ".repeat(1_200),
"workspace tree ".repeat(800),
);
fn startup_context_blob_is_wrapped_in_tags_without_final_truncation() {
let body = "Startup context from Codex.\n## Current Thread\nhello";
let wrapped = format_startup_context_blob(body);
let wrapped = format_startup_context_blob(&body, /*budget_tokens*/ 200);
assert_eq!(
wrapped,
"<startup_context>\nStartup context from Codex.\n## Current Thread\nhello\n</startup_context>"
);
}
#[test]
fn fixed_section_budgets_apply_per_section_without_total_blob_truncation() {
let body = [
STARTUP_CONTEXT_HEADER.to_string(),
format_section(
"Current Thread",
Some("current thread ".repeat(2_000)),
CURRENT_THREAD_SECTION_TOKEN_BUDGET,
)
.expect("current thread section"),
format_section(
"Recent Work",
Some("recent work ".repeat(3_000)),
RECENT_WORK_SECTION_TOKEN_BUDGET,
)
.expect("recent work section"),
format_section(
"Machine / Workspace Map",
Some("workspace map ".repeat(2_500)),
WORKSPACE_SECTION_TOKEN_BUDGET,
)
.expect("workspace section"),
format_section(
"Notes",
Some("notes ".repeat(500)),
NOTES_SECTION_TOKEN_BUDGET,
)
.expect("notes section"),
]
.join("\n\n");
let wrapped = format_startup_context_blob(&body);
assert!(wrapped.starts_with("<startup_context>\n"));
assert!(wrapped.ends_with("\n</startup_context>"));
assert!(wrapped.contains("Startup context from Codex."));
assert!(wrapped.contains("tokens truncated"));
assert!(wrapped.len().div_ceil(4) <= 200);
assert!(wrapped.contains("## Current Thread"));
assert!(wrapped.contains("## Recent Work"));
assert!(wrapped.contains("## Machine / Workspace Map"));
assert!(wrapped.contains("## Notes"));
}
#[tokio::test]

View File

@@ -63,7 +63,7 @@ const AUDIO_IN_QUEUE_CAPACITY: usize = 256;
const USER_TEXT_IN_QUEUE_CAPACITY: usize = 64;
const HANDOFF_OUT_QUEUE_CAPACITY: usize = 64;
const OUTPUT_EVENTS_QUEUE_CAPACITY: usize = 256;
const REALTIME_STARTUP_CONTEXT_TOKEN_BUDGET: usize = 5_000;
const REALTIME_STARTUP_CONTEXT_TOKEN_BUDGET: usize = 5_300;
const DEFAULT_REALTIME_MODEL: &str = "gpt-realtime-1.5";
pub(crate) const REALTIME_USER_TEXT_PREFIX: &str = "[USER] ";
pub(crate) const REALTIME_BACKEND_TEXT_PREFIX: &str = "[BACKEND] ";