Compare commits

...

6 Commits

Author SHA1 Message Date
Charles Cunningham
72aa904a82 core: remove misleading history delta comment 2026-02-08 15:37:46 -08:00
Charles Cunningham
706ba0ab25 core: share model-visible byte estimator for history metrics 2026-02-08 15:23:30 -08:00
Charles Cunningham
6c616e5d0c core tests: fix compact suite for model-item delta boundary 2026-02-07 12:32:02 -08:00
Charles Cunningham
267ad8806f core: estimate post-usage delta after last model item 2026-02-07 12:32:02 -08:00
Charles Cunningham
027f4318dd core: estimate post-usage tokens from all added items 2026-02-07 12:32:02 -08:00
Charles Cunningham
055c82c98b core: log compaction payload and token usage breakdown on failure 2026-02-07 12:32:02 -08:00
6 changed files with 286 additions and 68 deletions

View File

@@ -114,6 +114,7 @@ use crate::config::resolve_web_search_mode_for_turn;
use crate::config::types::McpServerConfig;
use crate::config::types::ShellEnvironmentPolicy;
use crate::context_manager::ContextManager;
use crate::context_manager::TotalTokenUsageBreakdown;
use crate::environment_context::EnvironmentContext;
use crate::error::CodexErr;
use crate::error::Result as CodexResult;
@@ -1214,12 +1215,20 @@ impl Session {
format!("auto-compact-{id}")
}
async fn get_total_token_usage(&self) -> i64 {
pub(crate) async fn get_total_token_usage(&self) -> i64 {
let state = self.state.lock().await;
state.get_total_token_usage(state.server_reasoning_included())
}
async fn get_estimated_token_count(&self, turn_context: &TurnContext) -> Option<i64> {
pub(crate) async fn get_total_token_usage_breakdown(&self) -> TotalTokenUsageBreakdown {
let state = self.state.lock().await;
state.get_total_token_usage_breakdown()
}
pub(crate) async fn get_estimated_token_count(
&self,
turn_context: &TurnContext,
) -> Option<i64> {
let state = self.state.lock().await;
state.history.estimate_token_count(turn_context)
}

View File

@@ -4,16 +4,20 @@ use crate::Prompt;
use crate::codex::Session;
use crate::codex::TurnContext;
use crate::context_manager::ContextManager;
use crate::context_manager::TotalTokenUsageBreakdown;
use crate::context_manager::is_codex_generated_item;
use crate::error::CodexErr;
use crate::error::Result as CodexResult;
use crate::protocol::CompactedItem;
use crate::protocol::EventMsg;
use crate::protocol::RolloutItem;
use crate::protocol::TurnStartedEvent;
use codex_api::CompactionInput as ApiCompactionInput;
use codex_protocol::items::ContextCompactionItem;
use codex_protocol::items::TurnItem;
use codex_protocol::models::BaseInstructions;
use codex_protocol::models::ResponseItem;
use tracing::error;
use tracing::info;
pub(crate) async fn run_inline_remote_auto_compact_task(
@@ -90,7 +94,7 @@ async fn run_remote_compact_task_inner_impl(
output_schema: None,
};
let mut new_history = sess
let compact_result = sess
.services
.model_client
.compact_conversation_history(
@@ -98,7 +102,25 @@ async fn run_remote_compact_task_inner_impl(
&turn_context.model_info,
&turn_context.otel_manager,
)
.await?;
.await;
let mut new_history = match compact_result {
Ok(history) => history,
Err(err) => {
let total_usage_breakdown = sess.get_total_token_usage_breakdown().await;
let compact_request_metrics = build_compact_request_metrics(
&turn_context.model_info.slug,
&prompt.input,
&prompt.base_instructions.text,
);
log_remote_compact_failure(
turn_context,
&compact_request_metrics,
total_usage_breakdown,
&err,
);
return Err(err);
}
};
new_history = sess
.process_compacted_history(turn_context, new_history)
.await;
@@ -121,6 +143,66 @@ async fn run_remote_compact_task_inner_impl(
Ok(())
}
#[derive(Debug)]
struct CompactRequestMetrics {
failing_compaction_request_body_json: String,
failing_compaction_request_body_bytes: usize,
}
fn build_compact_request_metrics(
model: &str,
input: &[ResponseItem],
instructions: &str,
) -> CompactRequestMetrics {
let payload = ApiCompactionInput {
model,
input,
instructions,
};
let failing_compaction_request_body_json = serde_json::to_string(&payload)
.unwrap_or_else(|err| format!("{{\"compact_request_serialization_error\":\"{err}\"}}"));
let failing_compaction_request_body_bytes = failing_compaction_request_body_json.len();
CompactRequestMetrics {
failing_compaction_request_body_json,
failing_compaction_request_body_bytes,
}
}
fn compact_error_status_code(err: &CodexErr) -> Option<u16> {
match err {
CodexErr::InvalidRequest(_) => Some(400),
CodexErr::UnexpectedStatus(status) => Some(status.status.as_u16()),
CodexErr::ContextWindowExceeded => Some(400),
_ => None,
}
}
fn log_remote_compact_failure(
turn_context: &TurnContext,
metrics: &CompactRequestMetrics,
total_usage_breakdown: TotalTokenUsageBreakdown,
err: &CodexErr,
) {
error!(
turn_id = %turn_context.sub_id,
failing_compaction_request_body_json = %metrics.failing_compaction_request_body_json,
"remote compaction request payload before failure"
);
error!(
turn_id = %turn_context.sub_id,
compact_error_status = ?compact_error_status_code(err),
last_api_response_total_tokens = total_usage_breakdown.last_api_response_total_tokens,
last_api_response_total_bytes_estimate = total_usage_breakdown.last_api_response_total_bytes_estimate,
estimated_tokens_of_items_added_since_last_successful_api_response = total_usage_breakdown.estimated_tokens_of_items_added_since_last_successful_api_response,
estimated_bytes_of_items_added_since_last_successful_api_response = total_usage_breakdown.estimated_bytes_of_items_added_since_last_successful_api_response,
model_context_window_tokens = ?turn_context.model_context_window(),
failing_compaction_request_body_bytes = metrics.failing_compaction_request_body_bytes,
compact_error = %err,
"remote compaction failed"
);
}
fn trim_function_call_history_to_fit_context_window(
history: &mut ContextManager,
turn_context: &TurnContext,

View File

@@ -4,6 +4,7 @@ use crate::instructions::SkillInstructions;
use crate::instructions::UserInstructions;
use crate::session_prefix::is_session_prefix;
use crate::truncate::TruncationPolicy;
use crate::truncate::approx_bytes_for_tokens;
use crate::truncate::approx_token_count;
use crate::truncate::approx_tokens_from_byte_count;
use crate::truncate::truncate_function_output_items_with_policy;
@@ -27,6 +28,14 @@ pub(crate) struct ContextManager {
token_info: Option<TokenUsageInfo>,
}
#[derive(Debug, Clone, Copy, Default)]
pub(crate) struct TotalTokenUsageBreakdown {
pub last_api_response_total_tokens: i64,
pub last_api_response_total_bytes_estimate: usize,
pub estimated_tokens_of_items_added_since_last_successful_api_response: i64,
pub estimated_bytes_of_items_added_since_last_successful_api_response: usize,
}
impl ContextManager {
pub(crate) fn new() -> Self {
Self {
@@ -236,15 +245,31 @@ impl ContextManager {
})
}
fn get_trailing_codex_generated_items_tokens(&self) -> i64 {
let mut total = 0i64;
for item in self.items.iter().rev() {
if !is_codex_generated_item(item) {
break;
}
total = total.saturating_add(estimate_item_token_count(item));
}
total
// These are local items added after the most recent model-emitted item.
// They are not reflected in `last_token_usage.total_tokens`.
fn items_after_last_model_generated_item(&self) -> &[ResponseItem] {
let start = self
.items
.iter()
.rposition(is_model_generated_item)
.map_or(self.items.len(), |index| index.saturating_add(1));
&self.items[start..]
}
fn get_items_after_last_model_generated_tokens(&self) -> i64 {
self.items_after_last_model_generated_item()
.iter()
.fold(0i64, |acc, item| {
acc.saturating_add(estimate_item_token_count(item))
})
}
fn get_items_after_last_model_generated_bytes(&self) -> usize {
self.items_after_last_model_generated_item()
.iter()
.fold(0usize, |acc, item| {
acc.saturating_add(estimate_item_model_visible_bytes(item))
})
}
/// When true, the server already accounted for past reasoning tokens and
@@ -255,13 +280,37 @@ impl ContextManager {
.as_ref()
.map(|info| info.last_token_usage.total_tokens)
.unwrap_or(0);
let trailing_codex_generated_tokens = self.get_trailing_codex_generated_items_tokens();
let items_after_last_model_generated_tokens =
self.get_items_after_last_model_generated_tokens();
if server_reasoning_included {
last_tokens.saturating_add(trailing_codex_generated_tokens)
last_tokens.saturating_add(items_after_last_model_generated_tokens)
} else {
last_tokens
.saturating_add(self.get_non_last_reasoning_items_tokens())
.saturating_add(trailing_codex_generated_tokens)
.saturating_add(items_after_last_model_generated_tokens)
}
}
pub(crate) fn get_total_token_usage_breakdown(&self) -> TotalTokenUsageBreakdown {
let last_usage = self
.token_info
.as_ref()
.map(|info| info.last_token_usage.clone())
.unwrap_or_default();
TotalTokenUsageBreakdown {
last_api_response_total_tokens: last_usage.total_tokens,
last_api_response_total_bytes_estimate: if last_usage.total_tokens <= 0 {
0
} else {
usize::try_from(last_usage.total_tokens)
.map(approx_bytes_for_tokens)
.unwrap_or(usize::MAX)
},
estimated_tokens_of_items_added_since_last_successful_api_response: self
.get_items_after_last_model_generated_tokens(),
estimated_bytes_of_items_added_since_last_successful_api_response: self
.get_items_after_last_model_generated_bytes(),
}
}
@@ -347,7 +396,11 @@ fn estimate_reasoning_length(encoded_len: usize) -> usize {
.saturating_sub(650)
}
fn estimate_item_token_count(item: &ResponseItem) -> i64 {
fn model_visible_text(item: &ResponseItem) -> String {
serde_json::to_string(item).unwrap_or_default()
}
fn estimate_item_model_visible_bytes(item: &ResponseItem) -> usize {
match item {
ResponseItem::GhostSnapshot { .. } => 0,
ResponseItem::Reasoning {
@@ -356,14 +409,29 @@ fn estimate_item_token_count(item: &ResponseItem) -> i64 {
}
| ResponseItem::Compaction {
encrypted_content: content,
} => {
let reasoning_bytes = estimate_reasoning_length(content.len());
i64::try_from(approx_tokens_from_byte_count(reasoning_bytes)).unwrap_or(i64::MAX)
}
item => {
let serialized = serde_json::to_string(item).unwrap_or_default();
i64::try_from(approx_token_count(&serialized)).unwrap_or(i64::MAX)
}
} => estimate_reasoning_length(content.len()),
item => model_visible_text(item).len(),
}
}
fn estimate_item_token_count(item: &ResponseItem) -> i64 {
let bytes = estimate_item_model_visible_bytes(item);
i64::try_from(approx_tokens_from_byte_count(bytes)).unwrap_or(i64::MAX)
}
fn is_model_generated_item(item: &ResponseItem) -> bool {
match item {
ResponseItem::Message { role, .. } => role == "assistant",
ResponseItem::Reasoning { .. }
| ResponseItem::FunctionCall { .. }
| ResponseItem::WebSearchCall { .. }
| ResponseItem::CustomToolCall { .. }
| ResponseItem::LocalShellCall { .. }
| ResponseItem::Compaction { .. } => true,
ResponseItem::FunctionCallOutput { .. }
| ResponseItem::CustomToolCallOutput { .. }
| ResponseItem::GhostSnapshot { .. }
| ResponseItem::Other => false,
}
}

View File

@@ -62,13 +62,6 @@ fn user_input_text_msg(text: &str) -> ResponseItem {
}
}
fn function_call_output(call_id: &str, content: &str) -> ResponseItem {
ResponseItem::FunctionCallOutput {
call_id: call_id.to_string(),
output: FunctionCallOutputPayload::from_text(content.to_string()),
}
}
fn custom_tool_call_output(call_id: &str, output: &str) -> ResponseItem {
ResponseItem::CustomToolCallOutput {
call_id: call_id.to_string(),
@@ -189,48 +182,51 @@ fn non_last_reasoning_tokens_ignore_entries_after_last_user() {
}
#[test]
fn trailing_codex_generated_tokens_stop_at_first_non_generated_item() {
let earlier_output = function_call_output("call-earlier", "earlier output");
let trailing_function_output = function_call_output("call-tail-1", "tail function output");
let trailing_custom_output = custom_tool_call_output("call-tail-2", "tail custom output");
let history = create_history_with_items(vec![
earlier_output,
user_msg("boundary item"),
trailing_function_output.clone(),
trailing_custom_output.clone(),
]);
let expected_tokens = estimate_item_token_count(&trailing_function_output)
.saturating_add(estimate_item_token_count(&trailing_custom_output));
fn usage_breakdown_counts_all_items_after_last_model_generated_item() {
let mut history = create_history_with_items(vec![assistant_msg("already counted by API")]);
history.update_token_info(
&TokenUsage {
total_tokens: 100,
..Default::default()
},
None,
);
let added_user = user_msg("new user message");
let added_tool_output = custom_tool_call_output("call-tail", "new tool output");
history.record_items(
[&added_user, &added_tool_output],
TruncationPolicy::Tokens(10_000),
);
let expected_tokens = estimate_item_token_count(&added_user)
.saturating_add(estimate_item_token_count(&added_tool_output));
let expected_bytes = serde_json::to_vec(&added_user)
.map(|bytes| bytes.len())
.unwrap_or_default()
.saturating_add(
serde_json::to_vec(&added_tool_output)
.map(|bytes| bytes.len())
.unwrap_or_default(),
);
assert_eq!(
history.get_trailing_codex_generated_items_tokens(),
history
.get_total_token_usage_breakdown()
.estimated_tokens_of_items_added_since_last_successful_api_response,
expected_tokens
);
assert_eq!(
history
.get_total_token_usage_breakdown()
.estimated_bytes_of_items_added_since_last_successful_api_response,
expected_bytes
);
}
#[test]
fn trailing_codex_generated_tokens_exclude_function_call_tail() {
let history = create_history_with_items(vec![ResponseItem::FunctionCall {
id: None,
name: "not-generated".to_string(),
arguments: "{}".to_string(),
call_id: "call-tail".to_string(),
}]);
assert_eq!(history.get_trailing_codex_generated_items_tokens(), 0);
}
#[test]
fn total_token_usage_includes_only_trailing_codex_generated_items() {
let non_trailing_output = function_call_output("call-before-message", "not trailing");
let trailing_assistant = assistant_msg("assistant boundary");
let trailing_output = custom_tool_call_output("tool-tail", "trailing output");
let mut history = create_history_with_items(vec![
non_trailing_output,
user_msg("boundary"),
trailing_assistant,
trailing_output.clone(),
]);
fn usage_breakdown_counts_no_items_after_last_model_generated_item() {
let mut history = create_history_with_items(vec![assistant_msg("already counted by API")]);
history.update_token_info(
&TokenUsage {
total_tokens: 100,
@@ -239,9 +235,66 @@ fn total_token_usage_includes_only_trailing_codex_generated_items() {
None,
);
assert_eq!(
history
.get_total_token_usage_breakdown()
.estimated_tokens_of_items_added_since_last_successful_api_response,
0
);
assert_eq!(
history
.get_total_token_usage_breakdown()
.estimated_bytes_of_items_added_since_last_successful_api_response,
0
);
}
#[test]
fn usage_breakdown_is_zero_without_model_generated_items() {
let mut history = create_history_with_items(vec![user_msg("no model output yet")]);
history.update_token_info(
&TokenUsage {
total_tokens: 100,
..Default::default()
},
None,
);
assert_eq!(
history
.get_total_token_usage_breakdown()
.estimated_tokens_of_items_added_since_last_successful_api_response,
0
);
assert_eq!(
history
.get_total_token_usage_breakdown()
.estimated_bytes_of_items_added_since_last_successful_api_response,
0
);
}
#[test]
fn total_token_usage_includes_all_items_after_last_model_generated_item() {
let mut history = create_history_with_items(vec![assistant_msg("already counted by API")]);
history.update_token_info(
&TokenUsage {
total_tokens: 100,
..Default::default()
},
None,
);
let added_user = user_msg("new user message");
let added_tool_output = custom_tool_call_output("tool-tail", "new tool output");
history.record_items(
[&added_user, &added_tool_output],
TruncationPolicy::Tokens(10_000),
);
assert_eq!(
history.get_total_token_usage(true),
100 + estimate_item_token_count(&trailing_output)
100 + estimate_item_token_count(&added_user)
+ estimate_item_token_count(&added_tool_output)
);
}

View File

@@ -2,5 +2,6 @@ mod history;
mod normalize;
pub(crate) use history::ContextManager;
pub(crate) use history::TotalTokenUsageBreakdown;
pub(crate) use history::is_codex_generated_item;
pub(crate) use history::is_user_turn_boundary;

View File

@@ -6,6 +6,7 @@ use std::collections::HashSet;
use crate::codex::SessionConfiguration;
use crate::context_manager::ContextManager;
use crate::context_manager::TotalTokenUsageBreakdown;
use crate::protocol::RateLimitSnapshot;
use crate::protocol::TokenUsage;
use crate::protocol::TokenUsageInfo;
@@ -100,6 +101,10 @@ impl SessionState {
.get_total_token_usage(server_reasoning_included)
}
pub(crate) fn get_total_token_usage_breakdown(&self) -> TotalTokenUsageBreakdown {
self.history.get_total_token_usage_breakdown()
}
pub(crate) fn set_server_reasoning_included(&mut self, included: bool) {
self.server_reasoning_included = included;
}