mirror of
https://github.com/openai/codex.git
synced 2026-05-18 18:22:39 +00:00
- move the shared byte-based middle truncation logic from `core` into `codex-utils-string` - keep token-specific truncation in `codex-core` so rollout can reuse the shared helper in the next stacked PR --------- Co-authored-by: Codex <noreply@openai.com>
143 lines
5.0 KiB
Rust
143 lines
5.0 KiB
Rust
//! Helpers for truncating tool and exec output using [`TruncationPolicy`](codex_protocol::protocol::TruncationPolicy).
|
|
|
|
use codex_protocol::models::FunctionCallOutputContentItem;
|
|
pub use codex_utils_string::approx_bytes_for_tokens;
|
|
pub use codex_utils_string::approx_token_count;
|
|
pub use codex_utils_string::approx_tokens_from_byte_count;
|
|
use codex_utils_string::truncate_middle_chars;
|
|
use codex_utils_string::truncate_middle_with_token_budget;
|
|
|
|
pub use codex_protocol::protocol::TruncationPolicy;
|
|
|
|
pub fn formatted_truncate_text(content: &str, policy: TruncationPolicy) -> String {
|
|
if content.len() <= policy.byte_budget() {
|
|
return content.to_string();
|
|
}
|
|
|
|
let total_lines = content.lines().count();
|
|
let result = truncate_text(content, policy);
|
|
format!("Total output lines: {total_lines}\n\n{result}")
|
|
}
|
|
|
|
pub fn truncate_text(content: &str, policy: TruncationPolicy) -> String {
|
|
match policy {
|
|
TruncationPolicy::Bytes(bytes) => truncate_middle_chars(content, bytes),
|
|
TruncationPolicy::Tokens(tokens) => truncate_middle_with_token_budget(content, tokens).0,
|
|
}
|
|
}
|
|
|
|
pub fn formatted_truncate_text_content_items_with_policy(
|
|
items: &[FunctionCallOutputContentItem],
|
|
policy: TruncationPolicy,
|
|
) -> (Vec<FunctionCallOutputContentItem>, Option<usize>) {
|
|
let text_segments = items
|
|
.iter()
|
|
.filter_map(|item| match item {
|
|
FunctionCallOutputContentItem::InputText { text } => Some(text.as_str()),
|
|
FunctionCallOutputContentItem::InputImage { .. } => None,
|
|
})
|
|
.collect::<Vec<_>>();
|
|
|
|
if text_segments.is_empty() {
|
|
return (items.to_vec(), None);
|
|
}
|
|
|
|
let mut combined = String::new();
|
|
for text in &text_segments {
|
|
if !combined.is_empty() {
|
|
combined.push('\n');
|
|
}
|
|
combined.push_str(text);
|
|
}
|
|
|
|
if combined.len() <= policy.byte_budget() {
|
|
return (items.to_vec(), None);
|
|
}
|
|
|
|
let mut out = vec![FunctionCallOutputContentItem::InputText {
|
|
text: formatted_truncate_text(&combined, policy),
|
|
}];
|
|
out.extend(items.iter().filter_map(|item| match item {
|
|
FunctionCallOutputContentItem::InputImage { image_url, detail } => {
|
|
Some(FunctionCallOutputContentItem::InputImage {
|
|
image_url: image_url.clone(),
|
|
detail: *detail,
|
|
})
|
|
}
|
|
FunctionCallOutputContentItem::InputText { .. } => None,
|
|
}));
|
|
|
|
(out, Some(approx_token_count(&combined)))
|
|
}
|
|
|
|
pub fn truncate_function_output_items_with_policy(
|
|
items: &[FunctionCallOutputContentItem],
|
|
policy: TruncationPolicy,
|
|
) -> Vec<FunctionCallOutputContentItem> {
|
|
let mut out: Vec<FunctionCallOutputContentItem> = Vec::with_capacity(items.len());
|
|
let mut remaining_budget = match policy {
|
|
TruncationPolicy::Bytes(_) => policy.byte_budget(),
|
|
TruncationPolicy::Tokens(_) => policy.token_budget(),
|
|
};
|
|
let mut omitted_text_items = 0usize;
|
|
|
|
for item in items {
|
|
match item {
|
|
FunctionCallOutputContentItem::InputText { text } => {
|
|
if remaining_budget == 0 {
|
|
omitted_text_items += 1;
|
|
continue;
|
|
}
|
|
|
|
let cost = match policy {
|
|
TruncationPolicy::Bytes(_) => text.len(),
|
|
TruncationPolicy::Tokens(_) => approx_token_count(text),
|
|
};
|
|
|
|
if cost <= remaining_budget {
|
|
out.push(FunctionCallOutputContentItem::InputText { text: text.clone() });
|
|
remaining_budget = remaining_budget.saturating_sub(cost);
|
|
} else {
|
|
let snippet_policy = match policy {
|
|
TruncationPolicy::Bytes(_) => TruncationPolicy::Bytes(remaining_budget),
|
|
TruncationPolicy::Tokens(_) => TruncationPolicy::Tokens(remaining_budget),
|
|
};
|
|
let snippet = truncate_text(text, snippet_policy);
|
|
if snippet.is_empty() {
|
|
omitted_text_items += 1;
|
|
} else {
|
|
out.push(FunctionCallOutputContentItem::InputText { text: snippet });
|
|
}
|
|
remaining_budget = 0;
|
|
}
|
|
}
|
|
FunctionCallOutputContentItem::InputImage { image_url, detail } => {
|
|
out.push(FunctionCallOutputContentItem::InputImage {
|
|
image_url: image_url.clone(),
|
|
detail: *detail,
|
|
});
|
|
}
|
|
}
|
|
}
|
|
|
|
if omitted_text_items > 0 {
|
|
out.push(FunctionCallOutputContentItem::InputText {
|
|
text: format!("[omitted {omitted_text_items} text items ...]"),
|
|
});
|
|
}
|
|
|
|
out
|
|
}
|
|
|
|
pub fn approx_tokens_from_byte_count_i64(bytes: i64) -> i64 {
|
|
if bytes <= 0 {
|
|
return 0;
|
|
}
|
|
|
|
let bytes = usize::try_from(bytes).unwrap_or(usize::MAX);
|
|
i64::try_from(approx_tokens_from_byte_count(bytes)).unwrap_or(i64::MAX)
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod truncate_tests;
|