Compare commits

...

4 Commits

Author SHA1 Message Date
Charles Cunningham
11dc9766bd Unify contextual user-message classification 2026-02-12 20:06:56 -08:00
Charles Cunningham
9f99335661 nit 2026-02-12 20:06:56 -08:00
Charles Cunningham
7244ce2120 protocol: always serialize TurnContextItem.network 2026-02-12 20:06:56 -08:00
Charles Cunningham
2e5f259b75 Persist complete TurnContextItem state via canonical conversion 2026-02-12 20:06:55 -08:00
5 changed files with 127 additions and 49 deletions

View File

@@ -1,16 +1,12 @@
use crate::codex::TurnContext;
use crate::context_manager::normalize;
use crate::instructions::SkillInstructions;
use crate::instructions::UserInstructions;
use crate::session_prefix::is_session_prefix;
use crate::session_prefix::is_contextual_user_message;
use crate::truncate::TruncationPolicy;
use crate::truncate::approx_token_count;
use crate::truncate::approx_tokens_from_byte_count_i64;
use crate::truncate::truncate_function_output_items_with_policy;
use crate::truncate::truncate_text;
use crate::user_shell_command::is_user_shell_command_text;
use codex_protocol::models::BaseInstructions;
use codex_protocol::models::ContentItem;
use codex_protocol::models::FunctionCallOutputBody;
use codex_protocol::models::FunctionCallOutputContentItem;
use codex_protocol::models::FunctionCallOutputPayload;
@@ -448,29 +444,7 @@ pub(crate) fn is_user_turn_boundary(item: &ResponseItem) -> bool {
return false;
}
if UserInstructions::is_user_instructions(content)
|| SkillInstructions::is_skill_instructions(content)
{
return false;
}
for content_item in content {
match content_item {
ContentItem::InputText { text } => {
if is_session_prefix(text) || is_user_shell_command_text(text) {
return false;
}
}
ContentItem::OutputText { text } => {
if is_session_prefix(text) {
return false;
}
}
ContentItem::InputImage { .. } => {}
}
}
true
!is_contextual_user_message(content)
}
fn user_message_positions(items: &[ResponseItem]) -> Vec<usize> {

View File

@@ -561,6 +561,8 @@ fn drop_last_n_user_turns_preserves_prefix() {
#[test]
fn drop_last_n_user_turns_ignores_session_prefix_user_messages() {
let context_update =
"<context_update>\n<environment_context>delta</environment_context>\n</context_update>";
let items = vec![
user_input_text_msg("<environment_context>ctx</environment_context>"),
user_input_text_msg("<user_instructions>do the thing</user_instructions>"),
@@ -571,6 +573,7 @@ fn drop_last_n_user_turns_ignores_session_prefix_user_messages() {
"<skill>\n<name>demo</name>\n<path>skills/demo/SKILL.md</path>\nbody\n</skill>",
),
user_input_text_msg("<user_shell_command>echo 42</user_shell_command>"),
user_input_text_msg(context_update),
user_input_text_msg("turn 1 user"),
assistant_msg("turn 1 assistant"),
user_input_text_msg("turn 2 user"),
@@ -591,6 +594,7 @@ fn drop_last_n_user_turns_ignores_session_prefix_user_messages() {
"<skill>\n<name>demo</name>\n<path>skills/demo/SKILL.md</path>\nbody\n</skill>",
),
user_input_text_msg("<user_shell_command>echo 42</user_shell_command>"),
user_input_text_msg(context_update),
user_input_text_msg("turn 1 user"),
assistant_msg("turn 1 assistant"),
];
@@ -610,6 +614,7 @@ fn drop_last_n_user_turns_ignores_session_prefix_user_messages() {
"<skill>\n<name>demo</name>\n<path>skills/demo/SKILL.md</path>\nbody\n</skill>",
),
user_input_text_msg("<user_shell_command>echo 42</user_shell_command>"),
user_input_text_msg(context_update),
];
let mut history = create_history_with_items(vec![
@@ -622,6 +627,7 @@ fn drop_last_n_user_turns_ignores_session_prefix_user_messages() {
"<skill>\n<name>demo</name>\n<path>skills/demo/SKILL.md</path>\nbody\n</skill>",
),
user_input_text_msg("<user_shell_command>echo 42</user_shell_command>"),
user_input_text_msg(context_update),
user_input_text_msg("turn 1 user"),
assistant_msg("turn 1 assistant"),
user_input_text_msg("turn 2 user"),
@@ -640,6 +646,7 @@ fn drop_last_n_user_turns_ignores_session_prefix_user_messages() {
"<skill>\n<name>demo</name>\n<path>skills/demo/SKILL.md</path>\nbody\n</skill>",
),
user_input_text_msg("<user_shell_command>echo 42</user_shell_command>"),
user_input_text_msg(context_update),
user_input_text_msg("turn 1 user"),
assistant_msg("turn 1 assistant"),
user_input_text_msg("turn 2 user"),

View File

@@ -18,16 +18,11 @@ use codex_protocol::user_input::UserInput;
use tracing::warn;
use uuid::Uuid;
use crate::instructions::SkillInstructions;
use crate::instructions::UserInstructions;
use crate::session_prefix::is_session_prefix;
use crate::user_shell_command::is_user_shell_command_text;
use crate::session_prefix::is_contextual_user_message;
use crate::web_search::web_search_action_detail;
fn parse_user_message(message: &[ContentItem]) -> Option<UserMessageItem> {
if UserInstructions::is_user_instructions(message)
|| SkillInstructions::is_skill_instructions(message)
{
if is_contextual_user_message(message) {
return None;
}
@@ -44,9 +39,6 @@ fn parse_user_message(message: &[ContentItem]) -> Option<UserMessageItem> {
{
continue;
}
if is_session_prefix(text) || is_user_shell_command_text(text) {
return None;
}
content.push(UserInput::Text {
text: text.clone(),
// Model input content does not carry UI element ranges.
@@ -59,9 +51,6 @@ fn parse_user_message(message: &[ContentItem]) -> Option<UserMessageItem> {
});
}
ContentItem::OutputText { text } => {
if is_session_prefix(text) {
return None;
}
warn!("Output text in user message: {}", text);
}
}
@@ -339,7 +328,16 @@ mod tests {
text: "<user_shell_command>echo 42</user_shell_command>".to_string(),
}],
end_turn: None,
phase: None,
phase: None,
},
ResponseItem::Message {
id: None,
role: "user".to_string(),
content: vec![ContentItem::InputText {
text: "<context_update>\n<environment_context>test_text</environment_context>\n</context_update>".to_string(),
}],
end_turn: None,
phase: None,
},
];

View File

@@ -194,6 +194,10 @@ mod tests {
async fn ignores_session_prefix_messages_when_truncating_rollout_from_start() {
let (session, turn_context) = make_session_and_context().await;
let mut items = session.build_initial_context(&turn_context).await;
items.push(user_msg(
"<context_update>\n<environment_context>delta</environment_context>\n</context_update>",
));
let prefix_and_context_count = items.len();
items.push(user_msg("feature request"));
items.push(assistant_msg("ack"));
items.push(user_msg("second question"));
@@ -206,12 +210,12 @@ mod tests {
.collect();
let truncated = truncate_rollout_before_nth_user_message_from_start(&rollout_items, 1);
let expected: Vec<RolloutItem> = vec![
RolloutItem::ResponseItem(items[0].clone()),
RolloutItem::ResponseItem(items[1].clone()),
RolloutItem::ResponseItem(items[2].clone()),
RolloutItem::ResponseItem(items[3].clone()),
];
let expected: Vec<RolloutItem> = items
.iter()
.take(prefix_and_context_count + 2)
.cloned()
.map(RolloutItem::ResponseItem)
.collect();
assert_eq!(
serde_json::to_value(&truncated).unwrap(),

View File

@@ -1,3 +1,9 @@
use codex_protocol::models::ContentItem;
use crate::instructions::SkillInstructions;
use crate::instructions::UserInstructions;
use crate::user_shell_command::is_user_shell_command_text;
/// Helpers for identifying model-visible "session prefix" messages.
///
/// A session prefix is a user-role message that carries configuration or state needed by
@@ -6,10 +12,99 @@
/// boundaries.
pub(crate) const ENVIRONMENT_CONTEXT_OPEN_TAG: &str = "<environment_context>";
pub(crate) const TURN_ABORTED_OPEN_TAG: &str = "<turn_aborted>";
pub(crate) const CONTEXT_UPDATE_OPEN_TAG: &str = "<context_update>";
/// Returns true if `text` starts with a session prefix marker (case-insensitive).
pub(crate) fn is_session_prefix(text: &str) -> bool {
let trimmed = text.trim_start();
let lowered = trimmed.to_ascii_lowercase();
lowered.starts_with(ENVIRONMENT_CONTEXT_OPEN_TAG) || lowered.starts_with(TURN_ABORTED_OPEN_TAG)
lowered.starts_with(ENVIRONMENT_CONTEXT_OPEN_TAG)
|| lowered.starts_with(TURN_ABORTED_OPEN_TAG)
|| lowered.starts_with(CONTEXT_UPDATE_OPEN_TAG)
}
pub(crate) fn is_contextual_user_message(content: &[ContentItem]) -> bool {
if UserInstructions::is_user_instructions(content)
|| SkillInstructions::is_skill_instructions(content)
{
return true;
}
content.iter().any(|content_item| match content_item {
ContentItem::InputText { text } => {
is_session_prefix(text) || is_user_shell_command_text(text)
}
ContentItem::OutputText { text } => is_session_prefix(text),
ContentItem::InputImage { .. } => false,
})
}
#[cfg(test)]
mod tests {
use super::*;
use codex_protocol::models::ContentItem;
use pretty_assertions::assert_eq;
#[test]
fn recognizes_context_update_session_prefix() {
assert!(is_session_prefix(
"<context_update>\nfoo\n</context_update>"
));
assert!(is_session_prefix(
" <context_update>\nfoo\n</context_update>"
));
assert!(is_session_prefix(
"<CONTEXT_UPDATE>\nfoo\n</CONTEXT_UPDATE>"
));
}
#[test]
fn recognizes_legacy_session_prefixes() {
assert!(is_session_prefix(
"<environment_context>foo</environment_context>"
));
assert!(is_session_prefix("<turn_aborted>foo</turn_aborted>"));
}
#[test]
fn does_not_treat_plain_text_as_session_prefix() {
assert_eq!(is_session_prefix("normal user message"), false);
}
#[test]
fn contextual_user_message_detects_context_markers_and_wrappers() {
let shell = [ContentItem::InputText {
text: "<user_shell_command>echo hi</user_shell_command>".to_string(),
}];
let context_update = [ContentItem::InputText {
text: "<context_update>\nfoo\n</context_update>".to_string(),
}];
let user_instructions = [ContentItem::InputText {
text: "# AGENTS.md instructions for test\n\n<INSTRUCTIONS>\nfoo\n</INSTRUCTIONS>"
.to_string(),
}];
let skill = [ContentItem::InputText {
text: "<skill>\n<name>demo</name>\n<path>skills/demo/SKILL.md</path>\nbody\n</skill>"
.to_string(),
}];
assert!(is_contextual_user_message(&shell));
assert!(is_contextual_user_message(&context_update));
assert!(is_contextual_user_message(&user_instructions));
assert!(is_contextual_user_message(&skill));
}
#[test]
fn contextual_user_message_keeps_real_user_content() {
let real_message = [
ContentItem::InputText {
text: "normal user message".to_string(),
},
ContentItem::InputImage {
image_url: "https://example.com/img.png".to_string(),
},
];
assert_eq!(is_contextual_user_message(&real_message), false);
}
}