mirror of
https://github.com/openai/codex.git
synced 2026-06-01 19:02:59 +00:00
[codex] Make realtime startup context truncation deterministic (#18172)
## Summary - remove the final whole-blob truncation pass from realtime startup-context assembly - enforce fixed per-section budgets, including each section heading - keep the existing per-section caps and raise the overall realtime startup-context budget to `5300`, matching the sum of those section budgets - add focused tests for the new wrapping and section-budget behavior ## Why The previous flow truncated each section and then middle-truncated the final combined startup-context blob again. Small input changes could shift that combined cut point, which made retained context unstable and caused nondeterministic tests. ## Impact Startup context now preserves section boundaries and ordering deterministically. Each section is still budgeted independently, but the final assembled blob is no longer truncated again as a single opaque string. To match that design, the overall startup-context token budget is updated to the sum of the existing section budgets rather than lowering the section caps. ## Validation - `cargo +1.93.0 test -p codex-core realtime_context` - `cargo +1.93.0 test -p codex-core --test all suite::realtime_conversation::conversation_start_injects_startup_context_from_thread_history -- --exact` - `cargo +1.93.0 test -p codex-core --test all suite::realtime_conversation::conversation_startup_context_current_thread_selects_many_turns_by_budget -- --exact` - `cargo +1.93.0 test -p codex-core --test all suite::realtime_conversation::conversation_startup_context_falls_back_to_workspace_map -- --exact` - `cargo +1.93.0 test -p codex-core --test all suite::realtime_conversation::conversation_startup_context_is_truncated_and_sent_once_per_start -- --exact`
This commit is contained in:
@@ -108,9 +108,10 @@ pub(crate) async fn build_realtime_startup_context(
|
||||
parts.push(section);
|
||||
}
|
||||
|
||||
let context = format_startup_context_blob(&parts.join("\n\n"), budget_tokens);
|
||||
let context = format_startup_context_blob(&parts.join("\n\n"));
|
||||
debug!(
|
||||
approx_tokens = approx_token_count(&context),
|
||||
requested_budget_tokens = budget_tokens,
|
||||
bytes = context.len(),
|
||||
has_current_thread_section,
|
||||
has_recent_work_section,
|
||||
@@ -440,31 +441,22 @@ fn format_section(title: &str, body: Option<String>, budget_tokens: usize) -> Op
|
||||
return None;
|
||||
}
|
||||
|
||||
Some(format!(
|
||||
"## {title}\n{}",
|
||||
truncate_text(body, TruncationPolicy::Tokens(budget_tokens))
|
||||
))
|
||||
let heading = format!("## {title}\n");
|
||||
let body_budget = budget_tokens.saturating_sub(approx_token_count(&heading));
|
||||
if body_budget == 0 {
|
||||
return None;
|
||||
}
|
||||
|
||||
let body = truncate_realtime_text_to_token_budget(body, body_budget);
|
||||
if body.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
Some(format!("{heading}{body}"))
|
||||
}
|
||||
|
||||
fn format_startup_context_blob(body: &str, budget_tokens: usize) -> String {
|
||||
let wrapper = format!("{STARTUP_CONTEXT_OPEN_TAG}\n\n{STARTUP_CONTEXT_CLOSE_TAG}");
|
||||
let mut body_budget = budget_tokens.saturating_sub(approx_token_count(&wrapper));
|
||||
|
||||
loop {
|
||||
let body = truncate_text(body, TruncationPolicy::Tokens(body_budget));
|
||||
let wrapped = format!("{STARTUP_CONTEXT_OPEN_TAG}\n{body}\n{STARTUP_CONTEXT_CLOSE_TAG}");
|
||||
let wrapped_tokens = approx_token_count(&wrapped);
|
||||
if wrapped_tokens <= budget_tokens || body_budget == 0 {
|
||||
return wrapped;
|
||||
}
|
||||
|
||||
let excess_tokens = wrapped_tokens.saturating_sub(budget_tokens);
|
||||
let next_budget = body_budget.saturating_sub(excess_tokens.max(1));
|
||||
if next_budget == body_budget {
|
||||
return wrapped;
|
||||
}
|
||||
body_budget = next_budget;
|
||||
}
|
||||
fn format_startup_context_blob(body: &str) -> String {
|
||||
format!("{STARTUP_CONTEXT_OPEN_TAG}\n{body}\n{STARTUP_CONTEXT_CLOSE_TAG}")
|
||||
}
|
||||
|
||||
async fn format_thread_group(
|
||||
|
||||
@@ -1,6 +1,12 @@
|
||||
use super::CURRENT_THREAD_SECTION_TOKEN_BUDGET;
|
||||
use super::NOTES_SECTION_TOKEN_BUDGET;
|
||||
use super::RECENT_WORK_SECTION_TOKEN_BUDGET;
|
||||
use super::STARTUP_CONTEXT_HEADER;
|
||||
use super::WORKSPACE_SECTION_TOKEN_BUDGET;
|
||||
use super::build_current_thread_section;
|
||||
use super::build_recent_work_section;
|
||||
use super::build_workspace_section_with_user_root;
|
||||
use super::format_section;
|
||||
use super::format_startup_context_blob;
|
||||
use chrono::TimeZone;
|
||||
use chrono::Utc;
|
||||
@@ -172,20 +178,56 @@ fn current_thread_section_keeps_latest_turns_when_history_exceeds_budget() {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn startup_context_blob_is_wrapped_in_tags_and_fits_budget() {
|
||||
let body = format!(
|
||||
"Startup context from Codex.\n{}\n{}",
|
||||
"recent work ".repeat(1_200),
|
||||
"workspace tree ".repeat(800),
|
||||
);
|
||||
fn startup_context_blob_is_wrapped_in_tags_without_final_truncation() {
|
||||
let body = "Startup context from Codex.\n## Current Thread\nhello";
|
||||
let wrapped = format_startup_context_blob(body);
|
||||
|
||||
let wrapped = format_startup_context_blob(&body, /*budget_tokens*/ 200);
|
||||
assert_eq!(
|
||||
wrapped,
|
||||
"<startup_context>\nStartup context from Codex.\n## Current Thread\nhello\n</startup_context>"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fixed_section_budgets_apply_per_section_without_total_blob_truncation() {
|
||||
let body = [
|
||||
STARTUP_CONTEXT_HEADER.to_string(),
|
||||
format_section(
|
||||
"Current Thread",
|
||||
Some("current thread ".repeat(2_000)),
|
||||
CURRENT_THREAD_SECTION_TOKEN_BUDGET,
|
||||
)
|
||||
.expect("current thread section"),
|
||||
format_section(
|
||||
"Recent Work",
|
||||
Some("recent work ".repeat(3_000)),
|
||||
RECENT_WORK_SECTION_TOKEN_BUDGET,
|
||||
)
|
||||
.expect("recent work section"),
|
||||
format_section(
|
||||
"Machine / Workspace Map",
|
||||
Some("workspace map ".repeat(2_500)),
|
||||
WORKSPACE_SECTION_TOKEN_BUDGET,
|
||||
)
|
||||
.expect("workspace section"),
|
||||
format_section(
|
||||
"Notes",
|
||||
Some("notes ".repeat(500)),
|
||||
NOTES_SECTION_TOKEN_BUDGET,
|
||||
)
|
||||
.expect("notes section"),
|
||||
]
|
||||
.join("\n\n");
|
||||
|
||||
let wrapped = format_startup_context_blob(&body);
|
||||
|
||||
assert!(wrapped.starts_with("<startup_context>\n"));
|
||||
assert!(wrapped.ends_with("\n</startup_context>"));
|
||||
assert!(wrapped.contains("Startup context from Codex."));
|
||||
assert!(wrapped.contains("tokens truncated"));
|
||||
assert!(wrapped.len().div_ceil(4) <= 200);
|
||||
assert!(wrapped.contains("## Current Thread"));
|
||||
assert!(wrapped.contains("## Recent Work"));
|
||||
assert!(wrapped.contains("## Machine / Workspace Map"));
|
||||
assert!(wrapped.contains("## Notes"));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
|
||||
@@ -63,7 +63,7 @@ const AUDIO_IN_QUEUE_CAPACITY: usize = 256;
|
||||
const USER_TEXT_IN_QUEUE_CAPACITY: usize = 64;
|
||||
const HANDOFF_OUT_QUEUE_CAPACITY: usize = 64;
|
||||
const OUTPUT_EVENTS_QUEUE_CAPACITY: usize = 256;
|
||||
const REALTIME_STARTUP_CONTEXT_TOKEN_BUDGET: usize = 5_000;
|
||||
const REALTIME_STARTUP_CONTEXT_TOKEN_BUDGET: usize = 5_300;
|
||||
const DEFAULT_REALTIME_MODEL: &str = "gpt-realtime-1.5";
|
||||
pub(crate) const REALTIME_USER_TEXT_PREFIX: &str = "[USER] ";
|
||||
pub(crate) const REALTIME_BACKEND_TEXT_PREFIX: &str = "[BACKEND] ";
|
||||
|
||||
Reference in New Issue
Block a user