Compare commits

...

7 Commits

Author SHA1 Message Date
jif-oai
2cbeb29855 make it cleaner 2 2026-05-18 20:26:21 +02:00
jif-oai
59eef04727 Fix clippy warning in fork history sanitization 2026-05-18 19:26:21 +02:00
jif-oai
6616a2faba Sanitize compacted fork histories 2026-05-18 19:14:53 +02:00
jif-oai
1b4a94f70b Merge branch 'main' into jif/drop-parent-fork-setup-context 2026-05-18 09:59:47 +02:00
jif-oai
01ae2987e3 Strip parent startup context from forked history 2026-05-17 15:12:01 +02:00
jif-oai
2d7fb209cc Preserve parent fork context while deduplicating instructions 2026-05-17 14:51:00 +02:00
jif-oai
a926bb004a Drop parent setup context from forked agents 2026-05-17 14:13:59 +02:00
2 changed files with 363 additions and 63 deletions

View File

@@ -21,6 +21,7 @@ use codex_protocol::error::Result as CodexResult;
use codex_protocol::models::ContentItem;
use codex_protocol::models::MessagePhase;
use codex_protocol::models::ResponseItem;
use codex_protocol::protocol::EventMsg;
use codex_protocol::protocol::InitialHistory;
use codex_protocol::protocol::InterAgentCommunication;
use codex_protocol::protocol::Op;
@@ -96,34 +97,193 @@ fn agent_nickname_candidates(
.collect()
}
fn keep_forked_rollout_item(item: &RolloutItem) -> bool {
fn keep_forked_response_item(
item: &ResponseItem,
multi_agent_v2_usage_hint_texts_to_filter: &[String],
) -> bool {
let is_multi_agent_v2_usage_hint = match item {
ResponseItem::Message { role, content, .. } if role == "developer" => {
matches!(
content.as_slice(),
[ContentItem::InputText { text }]
if multi_agent_v2_usage_hint_texts_to_filter
.iter()
.any(|usage_hint_text| usage_hint_text == text)
)
}
_ => false,
};
if is_multi_agent_v2_usage_hint {
return false;
}
match item {
RolloutItem::ResponseItem(ResponseItem::Message { role, phase, .. }) => match role.as_str()
{
ResponseItem::Message { role, phase, .. } => match role.as_str() {
"system" | "developer" | "user" => true,
"assistant" => *phase == Some(MessagePhase::FinalAnswer),
_ => false,
},
RolloutItem::ResponseItem(
ResponseItem::Reasoning { .. }
| ResponseItem::LocalShellCall { .. }
| ResponseItem::FunctionCall { .. }
| ResponseItem::ToolSearchCall { .. }
| ResponseItem::FunctionCallOutput { .. }
| ResponseItem::CustomToolCall { .. }
| ResponseItem::CustomToolCallOutput { .. }
| ResponseItem::ToolSearchOutput { .. }
| ResponseItem::WebSearchCall { .. }
| ResponseItem::ImageGenerationCall { .. }
| ResponseItem::Compaction { .. }
| ResponseItem::CompactionTrigger
| ResponseItem::ContextCompaction { .. }
| ResponseItem::Other,
) => false,
ResponseItem::Reasoning { .. }
| ResponseItem::LocalShellCall { .. }
| ResponseItem::FunctionCall { .. }
| ResponseItem::ToolSearchCall { .. }
| ResponseItem::FunctionCallOutput { .. }
| ResponseItem::CustomToolCall { .. }
| ResponseItem::CustomToolCallOutput { .. }
| ResponseItem::ToolSearchOutput { .. }
| ResponseItem::WebSearchCall { .. }
| ResponseItem::ImageGenerationCall { .. }
| ResponseItem::Compaction { .. }
| ResponseItem::CompactionTrigger
| ResponseItem::ContextCompaction { .. }
| ResponseItem::Other => false,
}
}
fn sanitize_forked_replacement_history(
items: &mut Vec<ResponseItem>,
multi_agent_v2_usage_hint_texts_to_filter: &[String],
) {
let is_contextual_user_response_item = |item: &ResponseItem| {
matches!(
item,
ResponseItem::Message { role, content, .. }
if role == "user"
&& crate::event_mapping::is_contextual_user_message_content(content)
)
};
let is_non_contextual_user_response_item = |item: &ResponseItem| {
matches!(
item,
ResponseItem::Message { role, content, .. }
if role == "user"
&& !crate::event_mapping::is_contextual_user_message_content(content)
)
};
let source_items = std::mem::take(items);
let mut sanitized_items = Vec::with_capacity(source_items.len());
let mut index = 0;
while index < source_items.len() {
let mut skipped_developer_context = false;
while let Some(item) = source_items.get(index) {
if matches!(item, ResponseItem::Message { role, .. } if role == "developer") {
skipped_developer_context = true;
index += 1;
} else if is_contextual_user_response_item(item) {
index += 1;
} else {
break;
}
}
if skipped_developer_context
&& source_items
.get(index)
.is_some_and(is_non_contextual_user_response_item)
&& source_items
.get(index + 1)
.is_some_and(is_non_contextual_user_response_item)
{
// Extension-contributed PromptSlot::ContextualUser fragments do not carry the
// built-in contextual markers. In compacted histories, the remaining structural
// cue is that such a startup user item sits between developer setup and the
// first real user turn.
index += 1;
}
let Some(item) = source_items.get(index) else {
break;
};
if keep_forked_response_item(item, multi_agent_v2_usage_hint_texts_to_filter) {
sanitized_items.push(item.clone());
}
index += 1;
}
*items = sanitized_items;
}
fn strip_parent_startup_context_bundle_from_forked_rollout(items: &mut Vec<RolloutItem>) {
let Some(turn_context_idx) = items
.iter()
.position(|item| matches!(item, RolloutItem::TurnContext(_)))
else {
return;
};
let mut context_start = turn_context_idx;
while context_start > 0 {
let is_startup_context_item = matches!(
&items[context_start - 1],
RolloutItem::ResponseItem(ResponseItem::Message { role, .. })
if role == "developer" || role == "user"
);
if !is_startup_context_item {
break;
}
context_start -= 1;
}
if context_start == turn_context_idx {
return;
}
let RolloutItem::TurnContext(turn_context) = &items[turn_context_idx] else {
return;
};
let turn_context_turn_id = turn_context.turn_id.as_deref();
let started_immediately_before_context = context_start
.checked_sub(1)
.and_then(|idx| items.get(idx))
.is_some_and(|item| {
matches!(
item,
RolloutItem::EventMsg(EventMsg::TurnStarted(started))
if Some(started.turn_id.as_str()) == turn_context_turn_id
)
});
let starts_at_rollout_front = items[..context_start]
.iter()
.all(|item| matches!(item, RolloutItem::SessionMeta(_)));
if !started_immediately_before_context && !starts_at_rollout_front {
return;
}
for item in &items[context_start..turn_context_idx] {
if !matches!(
item,
RolloutItem::ResponseItem(ResponseItem::Message { role, .. })
if role == "developer" || role == "user"
) {
return;
}
}
items.drain(context_start..=turn_context_idx);
}
fn sanitize_forked_rollout_item(
item: &mut RolloutItem,
multi_agent_v2_usage_hint_texts_to_filter: &[String],
) -> bool {
match item {
RolloutItem::ResponseItem(response_item) => {
keep_forked_response_item(response_item, multi_agent_v2_usage_hint_texts_to_filter)
}
// A forked child gets its own runtime config, including spawned-agent
// instructions, so it must establish a fresh context diff baseline.
RolloutItem::TurnContext(_) => false,
RolloutItem::Compacted(_) | RolloutItem::EventMsg(_) | RolloutItem::SessionMeta(_) => true,
RolloutItem::Compacted(compacted) => {
if let Some(replacement_history) = &mut compacted.replacement_history {
sanitize_forked_replacement_history(
replacement_history,
multi_agent_v2_usage_hint_texts_to_filter,
);
}
true
}
RolloutItem::EventMsg(_) | RolloutItem::SessionMeta(_) => true,
}
}
@@ -396,6 +556,7 @@ impl AgentControl {
forked_rollout_items =
truncate_rollout_to_last_n_fork_turns(&forked_rollout_items, *last_n_turns);
}
strip_parent_startup_context_bundle_from_forked_rollout(&mut forked_rollout_items);
// MultiAgentV2 root/subagent usage hints are injected as standalone developer
// messages at thread start. When forking history, drop hints from the parent
// so the child gets a fresh hint that matches its own session source/config.
@@ -417,18 +578,8 @@ impl AgentControl {
} else {
Vec::new()
};
forked_rollout_items.retain(|item| {
if let RolloutItem::ResponseItem(ResponseItem::Message { role, content, .. }) = item
&& role == "developer"
&& let [ContentItem::InputText { text }] = content.as_slice()
&& multi_agent_v2_usage_hint_texts_to_filter
.iter()
.any(|usage_hint_text| usage_hint_text == text)
{
return false;
}
keep_forked_rollout_item(item)
forked_rollout_items.retain_mut(|item| {
sanitize_forked_rollout_item(item, &multi_agent_v2_usage_hint_texts_to_filter)
});
state

View File

@@ -14,18 +14,24 @@ use codex_features::Feature;
use codex_login::CodexAuth;
use codex_protocol::AgentPath;
use codex_protocol::config_types::ModeKind;
use codex_protocol::config_types::ReasoningSummary;
use codex_protocol::models::ContentItem;
use codex_protocol::models::MessagePhase;
use codex_protocol::models::ResponseItem;
use codex_protocol::protocol::AskForApproval;
use codex_protocol::protocol::ErrorEvent;
use codex_protocol::protocol::EventMsg;
use codex_protocol::protocol::InterAgentCommunication;
use codex_protocol::protocol::RolloutItem;
use codex_protocol::protocol::SandboxPolicy;
use codex_protocol::protocol::SessionSource;
use codex_protocol::protocol::SubAgentSource;
use codex_protocol::protocol::TurnAbortReason;
use codex_protocol::protocol::TurnAbortedEvent;
use codex_protocol::protocol::TurnCompleteEvent;
use codex_protocol::protocol::TurnContextItem;
use codex_protocol::protocol::TurnStartedEvent;
use codex_protocol::protocol::UserMessageEvent;
use codex_thread_store::ArchiveThreadParams;
use codex_thread_store::LocalThreadStore;
use codex_thread_store::LocalThreadStoreConfig;
@@ -83,6 +89,126 @@ fn spawn_agent_call(call_id: &str) -> ResponseItem {
}
}
fn turn_context_item_for_test(turn_id: &str) -> TurnContextItem {
TurnContextItem {
turn_id: Some(turn_id.to_string()),
trace_id: None,
cwd: std::path::PathBuf::from("/tmp"),
current_date: None,
timezone: None,
approval_policy: AskForApproval::OnRequest,
sandbox_policy: SandboxPolicy::ReadOnly {
network_access: false,
},
permission_profile: None,
network: None,
file_system_sandbox_policy: None,
model: "gpt-test".to_string(),
personality: None,
collaboration_mode: None,
realtime_active: None,
effort: None,
summary: ReasoningSummary::Auto,
user_instructions: None,
developer_instructions: None,
final_output_json_schema: None,
truncation_policy: None,
}
}
#[test]
fn strips_only_parent_startup_context_bundle() {
let parent_user_message = ResponseItem::Message {
id: None,
role: "user".to_string(),
content: vec![ContentItem::InputText {
text: "parent seed context".to_string(),
}],
phase: None,
};
let parent_later_context_update = ResponseItem::Message {
id: None,
role: "developer".to_string(),
content: vec![ContentItem::InputText {
text: "Parent later context update.".to_string(),
}],
phase: None,
};
let inherited_user_message = ResponseItem::Message {
id: None,
role: "user".to_string(),
content: vec![ContentItem::InputText {
text: "inherited parent task".to_string(),
}],
phase: None,
};
let mut structural_rollout = vec![
RolloutItem::ResponseItem(inherited_user_message.clone()),
RolloutItem::EventMsg(EventMsg::UserMessage(UserMessageEvent {
message: "inherited parent task".to_string(),
..Default::default()
})),
RolloutItem::EventMsg(EventMsg::TurnStarted(TurnStartedEvent {
turn_id: "startup".to_string(),
started_at: None,
model_context_window: None,
collaboration_mode_kind: ModeKind::Default,
})),
RolloutItem::ResponseItem(ResponseItem::Message {
id: None,
role: "developer".to_string(),
content: vec![ContentItem::InputText {
text: "Parent startup context.".to_string(),
}],
phase: None,
}),
RolloutItem::ResponseItem(ResponseItem::Message {
id: None,
role: "user".to_string(),
content: vec![ContentItem::InputText {
text: "<environment_context>\n<cwd>/tmp</cwd>\n</environment_context>".to_string(),
}],
phase: None,
}),
RolloutItem::TurnContext(turn_context_item_for_test("startup")),
RolloutItem::ResponseItem(parent_user_message.clone()),
RolloutItem::ResponseItem(parent_later_context_update.clone()),
RolloutItem::TurnContext(turn_context_item_for_test("later")),
];
strip_parent_startup_context_bundle_from_forked_rollout(&mut structural_rollout);
assert_eq!(structural_rollout.len(), 6);
assert!(
matches!(&structural_rollout[0], RolloutItem::ResponseItem(item) if item == &inherited_user_message),
"inherited fork history before the parent startup should survive"
);
assert!(
matches!(
&structural_rollout[1],
RolloutItem::EventMsg(EventMsg::UserMessage(_))
),
"inherited fork events before the parent startup should survive"
);
assert!(
matches!(
&structural_rollout[2],
RolloutItem::EventMsg(EventMsg::TurnStarted(_))
),
"parent turn lifecycle events should not block startup-bundle stripping"
);
assert!(
matches!(&structural_rollout[3], RolloutItem::ResponseItem(item) if item == &parent_user_message)
);
assert!(
matches!(&structural_rollout[4], RolloutItem::ResponseItem(item) if item == &parent_later_context_update)
);
assert!(
matches!(&structural_rollout[5], RolloutItem::TurnContext(_)),
"later parent context updates should survive startup-bundle stripping"
);
}
struct AgentControlHarness {
_home: TempDir,
config: Config,
@@ -608,6 +734,7 @@ async fn spawn_agent_can_fork_parent_thread_history_with_sanitized_items() {
Some("Parent root guidance.".to_string());
parent_config.multi_agent_v2.subagent_usage_hint_text =
Some("Parent subagent guidance.".to_string());
parent_config.developer_instructions = Some("Parent developer instructions.".to_string());
let mut child_config = harness.config.clone();
let _ = child_config.features.enable(Feature::MultiAgentV2);
child_config.multi_agent_v2.root_agent_usage_hint_text =
@@ -621,10 +748,15 @@ async fn spawn_agent_can_fork_parent_thread_history_with_sanitized_items() {
.expect("start parent thread");
let parent_thread_id = new_thread.thread_id;
let parent_thread = new_thread.thread;
let turn_context = parent_thread.codex.session.new_default_turn().await;
parent_thread
.codex
.session
.record_context_updates_and_set_reference_context_item(turn_context.as_ref())
.await;
parent_thread
.inject_user_message_without_turn("parent seed context".to_string())
.await;
let turn_context = parent_thread.codex.session.new_default_turn().await;
let parent_spawn_call_id = "spawn-call-history".to_string();
let trigger_message = InterAgentCommunication::new(
AgentPath::root(),
@@ -639,22 +771,6 @@ async fn spawn_agent_can_fork_parent_thread_history_with_sanitized_items() {
.record_conversation_items(
turn_context.as_ref(),
&[
ResponseItem::Message {
id: None,
role: "developer".to_string(),
content: vec![ContentItem::InputText {
text: "Parent root guidance.".to_string(),
}],
phase: None,
},
ResponseItem::Message {
id: None,
role: "developer".to_string(),
content: vec![ContentItem::InputText {
text: "Parent subagent guidance.".to_string(),
}],
phase: None,
},
assistant_message("parent commentary", Some(MessagePhase::Commentary)),
assistant_message("parent final answer", Some(MessagePhase::FinalAnswer)),
assistant_message("parent unknown phase", /*phase*/ None),
@@ -710,23 +826,56 @@ async fn spawn_agent_can_fork_parent_thread_history_with_sanitized_items() {
.expect("child thread should be registered");
assert_ne!(child_thread_id, parent_thread_id);
let history = child_thread.codex.session.clone_history().await;
let expected_history = [
let message_summary = |item: &ResponseItem| match item {
ResponseItem::Message {
id: None,
role: "user".to_string(),
content: vec![ContentItem::InputText {
text: "parent seed context".to_string(),
}],
phase: None,
},
assistant_message("parent final answer", Some(MessagePhase::FinalAnswer)),
];
role,
content,
phase,
..
} => {
let text = content
.iter()
.map(|content_item| match content_item {
ContentItem::InputText { text } | ContentItem::OutputText { text } => {
text.as_str()
}
_ => "",
})
.collect::<Vec<_>>()
.join("\n");
(role.clone(), text, phase.clone())
}
_ => panic!("expected only message items in forked child history"),
};
assert_eq!(
history.raw_items(),
&expected_history,
"forked child history should keep only parent user messages and assistant final answers"
history
.raw_items()
.iter()
.map(message_summary)
.collect::<Vec<_>>(),
vec![
("user".to_string(), "parent seed context".to_string(), None,),
(
"assistant".to_string(),
"parent final answer".to_string(),
Some(MessagePhase::FinalAnswer),
),
],
"forked child history should drop parent startup context while keeping parent conversation items"
);
let child_rollout_path = child_thread
.rollout_path()
.expect("forked child rollout path");
let child_rollout = std::fs::read_to_string(&child_rollout_path)
.expect("forked child rollout should be readable");
assert!(
!child_rollout.contains("Parent developer instructions."),
"forked child rollout should not retain parent developer instructions from setup context"
);
assert!(
!child_rollout.contains("Parent root guidance."),
"forked child rollout should not retain parent multi-agent setup guidance"
);
let expected = (
child_thread_id,
Op::UserInput {