mirror of
https://github.com/openai/codex.git
synced 2026-04-26 07:35:29 +00:00
core: snapshot tests for compaction requests, post-compaction layout, some additional compaction tests (#11487)
This PR keeps compaction context-layout test coverage separate from runtime compaction behavior changes, so runtime logic review can stay focused. ## Included - Adds reusable context snapshot helpers in `core/tests/common/context_snapshot.rs` for rendering model-visible request/history shapes. - Standardizes helper naming for readability: - `format_request_input_snapshot` - `format_response_items_snapshot` - `format_labeled_requests_snapshot` - `format_labeled_items_snapshot` - Expands snapshot coverage for both local and remote compaction flows: - pre-turn auto-compaction - pre-turn failure/context-window-exceeded paths - mid-turn continuation compaction - manual `/compact` with and without prior user turns - Captures both sides where relevant: - compaction request shape - post-compaction history layout shape - Adds/uses shared request-inspection helpers so assertions target structured request content instead of ad-hoc JSON string parsing. - Aligns snapshots/assertions to current behavior and leaves explicit `TODO(ccunningham)` notes where behavior is known and intentionally deferred. ## Not Included - No runtime compaction logic changes. - No model-visible context/state behavior changes.
This commit is contained in:
committed by
GitHub
parent
fce4ad9cf4
commit
85034b189e
339
codex-rs/core/tests/common/context_snapshot.rs
Normal file
339
codex-rs/core/tests/common/context_snapshot.rs
Normal file
@@ -0,0 +1,339 @@
|
||||
use serde_json::Value;
|
||||
|
||||
use crate::responses::ResponsesRequest;
|
||||
|
||||
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
|
||||
pub enum ContextSnapshotRenderMode {
|
||||
#[default]
|
||||
RedactedText,
|
||||
FullText,
|
||||
KindOnly,
|
||||
KindWithTextPrefix {
|
||||
max_chars: usize,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ContextSnapshotOptions {
|
||||
render_mode: ContextSnapshotRenderMode,
|
||||
}
|
||||
|
||||
impl Default for ContextSnapshotOptions {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
render_mode: ContextSnapshotRenderMode::RedactedText,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ContextSnapshotOptions {
|
||||
pub fn render_mode(mut self, render_mode: ContextSnapshotRenderMode) -> Self {
|
||||
self.render_mode = render_mode;
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
pub fn format_request_input_snapshot(
|
||||
request: &ResponsesRequest,
|
||||
options: &ContextSnapshotOptions,
|
||||
) -> String {
|
||||
let items = request.input();
|
||||
format_response_items_snapshot(items.as_slice(), options)
|
||||
}
|
||||
|
||||
pub fn format_response_items_snapshot(items: &[Value], options: &ContextSnapshotOptions) -> String {
|
||||
items
|
||||
.iter()
|
||||
.enumerate()
|
||||
.map(|(idx, item)| {
|
||||
let Some(item_type) = item.get("type").and_then(Value::as_str) else {
|
||||
return format!("{idx:02}:<MISSING_TYPE>");
|
||||
};
|
||||
|
||||
if options.render_mode == ContextSnapshotRenderMode::KindOnly {
|
||||
return if item_type == "message" {
|
||||
let role = item.get("role").and_then(Value::as_str).unwrap_or("unknown");
|
||||
format!("{idx:02}:message/{role}")
|
||||
} else {
|
||||
format!("{idx:02}:{item_type}")
|
||||
};
|
||||
}
|
||||
|
||||
match item_type {
|
||||
"message" => {
|
||||
let role = item.get("role").and_then(Value::as_str).unwrap_or("unknown");
|
||||
let text = item
|
||||
.get("content")
|
||||
.and_then(Value::as_array)
|
||||
.map(|content| {
|
||||
content
|
||||
.iter()
|
||||
.map(|entry| {
|
||||
if let Some(text) = entry.get("text").and_then(Value::as_str) {
|
||||
return format_snapshot_text(text, options);
|
||||
}
|
||||
let Some(content_type) =
|
||||
entry.get("type").and_then(Value::as_str)
|
||||
else {
|
||||
return "<UNKNOWN_CONTENT_ITEM>".to_string();
|
||||
};
|
||||
let Some(content_object) = entry.as_object() else {
|
||||
return format!("<{content_type}>");
|
||||
};
|
||||
let mut extra_keys = content_object
|
||||
.keys()
|
||||
.filter(|key| *key != "type" && *key != "text")
|
||||
.cloned()
|
||||
.collect::<Vec<String>>();
|
||||
extra_keys.sort();
|
||||
if extra_keys.is_empty() {
|
||||
format!("<{content_type}>")
|
||||
} else {
|
||||
format!("<{content_type}:{}>", extra_keys.join(","))
|
||||
}
|
||||
})
|
||||
.collect::<Vec<String>>()
|
||||
.join(" | ")
|
||||
})
|
||||
.filter(|text| !text.is_empty())
|
||||
.unwrap_or_else(|| "<NO_TEXT>".to_string());
|
||||
format!("{idx:02}:message/{role}:{text}")
|
||||
}
|
||||
"function_call" => {
|
||||
let name = item.get("name").and_then(Value::as_str).unwrap_or("unknown");
|
||||
format!("{idx:02}:function_call/{name}")
|
||||
}
|
||||
"function_call_output" => {
|
||||
let output = item
|
||||
.get("output")
|
||||
.and_then(Value::as_str)
|
||||
.map(|output| format_snapshot_text(output, options))
|
||||
.unwrap_or_else(|| "<NON_STRING_OUTPUT>".to_string());
|
||||
format!("{idx:02}:function_call_output:{output}")
|
||||
}
|
||||
"local_shell_call" => {
|
||||
let command = item
|
||||
.get("action")
|
||||
.and_then(|action| action.get("command"))
|
||||
.and_then(Value::as_array)
|
||||
.map(|parts| {
|
||||
parts
|
||||
.iter()
|
||||
.filter_map(Value::as_str)
|
||||
.collect::<Vec<&str>>()
|
||||
.join(" ")
|
||||
})
|
||||
.map(|command| format_snapshot_text(&command, options))
|
||||
.filter(|cmd| !cmd.is_empty())
|
||||
.unwrap_or_else(|| "<NO_COMMAND>".to_string());
|
||||
format!("{idx:02}:local_shell_call:{command}")
|
||||
}
|
||||
"reasoning" => {
|
||||
let summary_text = item
|
||||
.get("summary")
|
||||
.and_then(Value::as_array)
|
||||
.and_then(|summary| summary.first())
|
||||
.and_then(|entry| entry.get("text"))
|
||||
.and_then(Value::as_str)
|
||||
.map(|text| format_snapshot_text(text, options))
|
||||
.unwrap_or_else(|| "<NO_SUMMARY>".to_string());
|
||||
let has_encrypted_content = item
|
||||
.get("encrypted_content")
|
||||
.and_then(Value::as_str)
|
||||
.is_some_and(|value| !value.is_empty());
|
||||
format!(
|
||||
"{idx:02}:reasoning:summary={summary_text}:encrypted={has_encrypted_content}"
|
||||
)
|
||||
}
|
||||
"compaction" => {
|
||||
let has_encrypted_content = item
|
||||
.get("encrypted_content")
|
||||
.and_then(Value::as_str)
|
||||
.is_some_and(|value| !value.is_empty());
|
||||
format!("{idx:02}:compaction:encrypted={has_encrypted_content}")
|
||||
}
|
||||
other => format!("{idx:02}:{other}"),
|
||||
}
|
||||
})
|
||||
.collect::<Vec<String>>()
|
||||
.join("\n")
|
||||
}
|
||||
|
||||
pub fn format_labeled_requests_snapshot(
|
||||
scenario: &str,
|
||||
sections: &[(&str, &ResponsesRequest)],
|
||||
options: &ContextSnapshotOptions,
|
||||
) -> String {
|
||||
let sections = sections
|
||||
.iter()
|
||||
.map(|(title, request)| {
|
||||
format!(
|
||||
"## {title}\n{}",
|
||||
format_request_input_snapshot(request, options)
|
||||
)
|
||||
})
|
||||
.collect::<Vec<String>>()
|
||||
.join("\n\n");
|
||||
format!("Scenario: {scenario}\n\n{sections}")
|
||||
}
|
||||
|
||||
pub fn format_labeled_items_snapshot(
|
||||
scenario: &str,
|
||||
sections: &[(&str, &[Value])],
|
||||
options: &ContextSnapshotOptions,
|
||||
) -> String {
|
||||
let sections = sections
|
||||
.iter()
|
||||
.map(|(title, items)| {
|
||||
format!(
|
||||
"## {title}\n{}",
|
||||
format_response_items_snapshot(items, options)
|
||||
)
|
||||
})
|
||||
.collect::<Vec<String>>()
|
||||
.join("\n\n");
|
||||
format!("Scenario: {scenario}\n\n{sections}")
|
||||
}
|
||||
|
||||
fn format_snapshot_text(text: &str, options: &ContextSnapshotOptions) -> String {
|
||||
match options.render_mode {
|
||||
ContextSnapshotRenderMode::RedactedText => {
|
||||
canonicalize_snapshot_text(text).replace('\n', "\\n")
|
||||
}
|
||||
ContextSnapshotRenderMode::FullText => text.replace('\n', "\\n"),
|
||||
ContextSnapshotRenderMode::KindWithTextPrefix { max_chars } => {
|
||||
let normalized = canonicalize_snapshot_text(text).replace('\n', "\\n");
|
||||
if normalized.chars().count() <= max_chars {
|
||||
normalized
|
||||
} else {
|
||||
let prefix = normalized.chars().take(max_chars).collect::<String>();
|
||||
format!("{prefix}...")
|
||||
}
|
||||
}
|
||||
ContextSnapshotRenderMode::KindOnly => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
fn canonicalize_snapshot_text(text: &str) -> String {
|
||||
if text.starts_with("<permissions instructions>") {
|
||||
return "<PERMISSIONS_INSTRUCTIONS>".to_string();
|
||||
}
|
||||
if text.starts_with("# AGENTS.md instructions for ") {
|
||||
return "<AGENTS_MD>".to_string();
|
||||
}
|
||||
if text.starts_with("<environment_context>") {
|
||||
if let (Some(cwd_start), Some(cwd_end)) = (text.find("<cwd>"), text.find("</cwd>")) {
|
||||
let cwd = &text[cwd_start + "<cwd>".len()..cwd_end];
|
||||
return if cwd.ends_with("PRETURN_CONTEXT_DIFF_CWD") {
|
||||
"<ENVIRONMENT_CONTEXT:cwd=PRETURN_CONTEXT_DIFF_CWD>".to_string()
|
||||
} else {
|
||||
"<ENVIRONMENT_CONTEXT:cwd=<CWD>>".to_string()
|
||||
};
|
||||
}
|
||||
return "<ENVIRONMENT_CONTEXT>".to_string();
|
||||
}
|
||||
if text.starts_with("You are performing a CONTEXT CHECKPOINT COMPACTION.") {
|
||||
return "<SUMMARIZATION_PROMPT>".to_string();
|
||||
}
|
||||
if text.starts_with("Another language model started to solve this problem")
|
||||
&& let Some((_, summary)) = text.split_once('\n')
|
||||
{
|
||||
return format!("<COMPACTION_SUMMARY>\n{summary}");
|
||||
}
|
||||
text.to_string()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::ContextSnapshotOptions;
|
||||
use super::ContextSnapshotRenderMode;
|
||||
use super::format_response_items_snapshot;
|
||||
use pretty_assertions::assert_eq;
|
||||
use serde_json::json;
|
||||
|
||||
#[test]
|
||||
fn full_text_mode_preserves_unredacted_text() {
|
||||
let items = vec![json!({
|
||||
"type": "message",
|
||||
"role": "user",
|
||||
"content": [{
|
||||
"type": "input_text",
|
||||
"text": "# AGENTS.md instructions for /tmp/example"
|
||||
}]
|
||||
})];
|
||||
|
||||
let rendered = format_response_items_snapshot(
|
||||
&items,
|
||||
&ContextSnapshotOptions::default().render_mode(ContextSnapshotRenderMode::FullText),
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
rendered,
|
||||
"00:message/user:# AGENTS.md instructions for /tmp/example"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn redacted_text_mode_keeps_canonical_placeholders() {
|
||||
let items = vec![json!({
|
||||
"type": "message",
|
||||
"role": "user",
|
||||
"content": [{
|
||||
"type": "input_text",
|
||||
"text": "# AGENTS.md instructions for /tmp/example"
|
||||
}]
|
||||
})];
|
||||
|
||||
let rendered = format_response_items_snapshot(
|
||||
&items,
|
||||
&ContextSnapshotOptions::default().render_mode(ContextSnapshotRenderMode::RedactedText),
|
||||
);
|
||||
|
||||
assert_eq!(rendered, "00:message/user:<AGENTS_MD>");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn image_only_message_is_rendered_as_non_text_span() {
|
||||
let items = vec![json!({
|
||||
"type": "message",
|
||||
"role": "user",
|
||||
"content": [{
|
||||
"type": "input_image",
|
||||
"image_url": "data:image/png;base64,AAAA"
|
||||
}]
|
||||
})];
|
||||
|
||||
let rendered = format_response_items_snapshot(&items, &ContextSnapshotOptions::default());
|
||||
|
||||
assert_eq!(rendered, "00:message/user:<input_image:image_url>");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn mixed_text_and_image_message_keeps_image_span() {
|
||||
let items = vec![json!({
|
||||
"type": "message",
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "input_text",
|
||||
"text": "<image>"
|
||||
},
|
||||
{
|
||||
"type": "input_image",
|
||||
"image_url": "data:image/png;base64,AAAA"
|
||||
},
|
||||
{
|
||||
"type": "input_text",
|
||||
"text": "</image>"
|
||||
}
|
||||
]
|
||||
})];
|
||||
|
||||
let rendered = format_response_items_snapshot(&items, &ContextSnapshotOptions::default());
|
||||
|
||||
assert_eq!(
|
||||
rendered,
|
||||
"00:message/user:<image> | <input_image:image_url> | </image>"
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -12,6 +12,7 @@ use codex_utils_absolute_path::AbsolutePathBuf;
|
||||
use regex_lite::Regex;
|
||||
use std::path::PathBuf;
|
||||
|
||||
pub mod context_snapshot;
|
||||
pub mod process;
|
||||
pub mod responses;
|
||||
pub mod streaming_sse;
|
||||
|
||||
@@ -5,6 +5,8 @@ use std::time::Duration;
|
||||
|
||||
use anyhow::Result;
|
||||
use base64::Engine;
|
||||
use codex_protocol::models::ContentItem;
|
||||
use codex_protocol::models::ResponseItem;
|
||||
use codex_protocol::openai_models::ModelsResponse;
|
||||
use futures::SinkExt;
|
||||
use futures::StreamExt;
|
||||
@@ -112,6 +114,14 @@ impl ResponsesRequest {
|
||||
self.0.body.clone()
|
||||
}
|
||||
|
||||
pub fn body_contains_text(&self, text: &str) -> bool {
|
||||
let json_fragment = serde_json::to_string(text)
|
||||
.expect("serialize text to JSON")
|
||||
.trim_matches('"')
|
||||
.to_string();
|
||||
self.body_json().to_string().contains(&json_fragment)
|
||||
}
|
||||
|
||||
pub fn instructions_text(&self) -> String {
|
||||
self.body_json()["instructions"]
|
||||
.as_str()
|
||||
@@ -131,6 +141,22 @@ impl ResponsesRequest {
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Returns all `input_image` `image_url` spans from `message` inputs for the provided role.
|
||||
pub fn message_input_image_urls(&self, role: &str) -> Vec<String> {
|
||||
self.inputs_of_type("message")
|
||||
.into_iter()
|
||||
.filter(|item| item.get("role").and_then(Value::as_str) == Some(role))
|
||||
.filter_map(|item| item.get("content").and_then(Value::as_array).cloned())
|
||||
.flatten()
|
||||
.filter(|span| span.get("type").and_then(Value::as_str) == Some("input_image"))
|
||||
.filter_map(|span| {
|
||||
span.get("image_url")
|
||||
.and_then(Value::as_str)
|
||||
.map(str::to_owned)
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub fn input(&self) -> Vec<Value> {
|
||||
self.body_json()["input"]
|
||||
.as_array()
|
||||
@@ -480,6 +506,18 @@ pub fn ev_assistant_message(id: &str, text: &str) -> Value {
|
||||
})
|
||||
}
|
||||
|
||||
pub fn user_message_item(text: &str) -> ResponseItem {
|
||||
ResponseItem::Message {
|
||||
id: None,
|
||||
role: "user".to_string(),
|
||||
content: vec![ContentItem::InputText {
|
||||
text: text.to_string(),
|
||||
}],
|
||||
end_turn: None,
|
||||
phase: None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn ev_message_item_added(id: &str, text: &str) -> Value {
|
||||
serde_json::json!({
|
||||
"type": "response.output_item.added",
|
||||
@@ -818,15 +856,24 @@ where
|
||||
}
|
||||
|
||||
pub async fn mount_compact_json_once(server: &MockServer, body: serde_json::Value) -> ResponseMock {
|
||||
let (mock, response_mock) = compact_mock();
|
||||
mock.respond_with(
|
||||
mount_compact_response_once(
|
||||
server,
|
||||
ResponseTemplate::new(200)
|
||||
.insert_header("content-type", "application/json")
|
||||
.set_body_json(body.clone()),
|
||||
.set_body_json(body),
|
||||
)
|
||||
.up_to_n_times(1)
|
||||
.mount(server)
|
||||
.await;
|
||||
.await
|
||||
}
|
||||
|
||||
pub async fn mount_compact_response_once(
|
||||
server: &MockServer,
|
||||
response: ResponseTemplate,
|
||||
) -> ResponseMock {
|
||||
let (mock, response_mock) = compact_mock();
|
||||
mock.respond_with(response)
|
||||
.up_to_n_times(1)
|
||||
.mount(server)
|
||||
.await;
|
||||
response_mock
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user