core: bundle settings diff updates into one dev/user envelope (#12417)

## Summary
- bundle contextual prompt injection into at most one developer message
plus one contextual user message in both:
  - per-turn settings updates
  - initial context insertion
- preserve `<model_switch>` across compaction by rebuilding it through
canonical initial-context injection, instead of relying on
strip/reattach hacks
- centralize contextual user fragment detection in one shared definition
table and reuse it for parsing/compaction logic
- keep `AGENTS.md` in its natural serialized format:
  - `# AGENTS.md instructions for {dirname}`
  - `<INSTRUCTIONS>...</INSTRUCTIONS>`
- simplify related tests/helpers and accept the expected snapshot/layout
updates from bundled multi-part messages

## Why
The goal is to converge toward a simpler, more intentional prompt shape
where contextual updates are consistently represented as one developer
envelope plus one contextual user envelope, while keeping parsing and
compaction behavior aligned with that representation.

## Notable details
- the temporary `SettingsUpdateEnvelope` wrapper was removed; these
paths now return `Vec<ResponseItem>` directly
- local/remote compaction no longer rely on model-switch strip/restore
helpers
- contextual user detection is now driven by shared fragment definitions
instead of ad hoc matcher assembly
- AGENTS/user instructions are still the same logical context; only the
synthetic `<user_instructions>` wrapper was replaced by the natural
AGENTS text format

## Testing
- `just fmt`
- `cargo test -p codex-app-server
codex_message_processor::tests::extract_conversation_summary_prefers_plain_user_messages
-- --exact`
- `cargo test -p codex-core
compact::tests::collect_user_messages_filters_session_prefix_entries
--lib -- --exact`
- `cargo test -p codex-core --test all
'suite::compact::snapshot_request_shape_pre_turn_compaction_strips_incoming_model_switch'
-- --exact`
- `cargo test -p codex-core --test all
'suite::compact_remote::snapshot_request_shape_remote_pre_turn_compaction_strips_incoming_model_switch'
-- --exact`
- `cargo test -p codex-core --test all
'suite::client::includes_apps_guidance_as_developer_message_when_enabled'
-- --exact`
- `cargo test -p codex-core --test all
'suite::client::includes_developer_instructions_message_in_request' --
--exact`
- `cargo test -p codex-core --test all
'suite::client::includes_user_instructions_message_in_request' --
--exact`
- `cargo test -p codex-core --test all
'suite::client::resume_includes_initial_messages_and_sends_prior_items'
-- --exact`
- `cargo test -p codex-core --test all
'suite::review::review_input_isolated_from_parent_history' -- --exact`
- `cargo test -p codex-exec --test all
'suite::resume::exec_resume_last_respects_cwd_filter_and_all_flag' --
--exact`
- `cargo test -p core_test_support
context_snapshot::tests::full_text_mode_preserves_unredacted_text --
--exact`

## Notes
- I also ran several targeted `compact`, `compact_remote`,
`prompt_caching`, `model_visible_layout`, and `event_mapping` tests
while iterating on prompt-shape changes.
- I have not claimed a clean full-workspace `cargo test` from this
environment because local sandbox/resource conditions have previously
produced unrelated failures in large workspace runs.
This commit is contained in:
Charley Cunningham
2026-02-26 00:12:08 -08:00
committed by GitHub
parent 28bfbb8f2b
commit 07aefffb1f
47 changed files with 966 additions and 813 deletions

View File

@@ -74,40 +74,14 @@ fn assert_message_role(request_body: &serde_json::Value, role: &str) {
assert_eq!(request_body["role"].as_str().unwrap(), role);
}
#[expect(clippy::expect_used)]
fn assert_message_equals(request_body: &serde_json::Value, text: &str) {
let content = request_body["content"][0]["text"]
.as_str()
.expect("invalid message content");
assert_eq!(
content, text,
"expected message content '{content}' to equal '{text}'"
);
}
#[expect(clippy::expect_used)]
fn assert_message_starts_with(request_body: &serde_json::Value, text: &str) {
let content = request_body["content"][0]["text"]
.as_str()
.expect("invalid message content");
assert!(
content.starts_with(text),
"expected message content '{content}' to start with '{text}'"
);
}
#[expect(clippy::expect_used)]
fn assert_message_ends_with(request_body: &serde_json::Value, text: &str) {
let content = request_body["content"][0]["text"]
.as_str()
.expect("invalid message content");
assert!(
content.ends_with(text),
"expected message content '{content}' to end with '{text}'"
);
#[expect(clippy::unwrap_used)]
fn message_input_texts(item: &serde_json::Value) -> Vec<&str> {
item["content"]
.as_array()
.unwrap()
.iter()
.filter_map(|entry| entry.get("text").and_then(|text| text.as_str()))
.collect()
}
/// Writes an `auth.json` into the provided `codex_home` with the specified parameters.
@@ -305,19 +279,15 @@ async fn resume_includes_initial_messages_and_sends_prior_items() {
let request = resp_mock.single_request();
let request_body = request.body_json();
let input = request_body["input"].as_array().expect("input array");
let messages: Vec<(String, String)> = input
.iter()
.filter_map(|item| {
let role = item.get("role")?.as_str()?;
let text = item
.get("content")?
.as_array()?
.first()?
.get("text")?
.as_str()?;
Some((role.to_string(), text.to_string()))
})
.collect();
let mut messages: Vec<(String, String)> = Vec::new();
for item in input {
let Some(role) = item.get("role").and_then(|role| role.as_str()) else {
continue;
};
for text in message_input_texts(item) {
messages.push((role.to_string(), text.to_string()));
}
}
let pos_prior_user = messages
.iter()
.position(|(role, text)| role == "user" && text == "resumed user message")
@@ -354,8 +324,7 @@ async fn resume_includes_initial_messages_and_sends_prior_items() {
.position(|(role, text)| {
role == "user"
&& text.contains("be nice")
&& (text.starts_with("# AGENTS.md instructions for ")
|| text.starts_with("<user_instructions>"))
&& (text.starts_with("# AGENTS.md instructions for "))
})
.expect("user instructions");
let pos_environment = messages
@@ -664,16 +633,27 @@ async fn includes_user_instructions_message_in_request() {
);
assert_message_role(&request_body["input"][1], "user");
assert_message_starts_with(&request_body["input"][1], "# AGENTS.md instructions for ");
assert_message_ends_with(&request_body["input"][1], "</INSTRUCTIONS>");
let ui_text = request_body["input"][1]["content"][0]["text"]
.as_str()
let user_context_texts = message_input_texts(&request_body["input"][1]);
assert!(
user_context_texts
.iter()
.any(|text| text.starts_with("# AGENTS.md instructions for ")),
"expected AGENTS text in contextual user message, got {user_context_texts:?}"
);
let ui_text = user_context_texts
.iter()
.copied()
.find(|text| text.contains("<INSTRUCTIONS>"))
.expect("invalid message content");
assert!(ui_text.contains("<INSTRUCTIONS>"));
assert!(ui_text.contains("be nice"));
assert_message_role(&request_body["input"][2], "user");
assert_message_starts_with(&request_body["input"][2], "<environment_context>");
assert_message_ends_with(&request_body["input"][2], "</environment_context>");
assert!(
user_context_texts
.iter()
.any(|text| text.starts_with("<environment_context>")
&& text.ends_with("</environment_context>")),
"expected environment context in contextual user message, got {user_context_texts:?}"
);
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
@@ -727,10 +707,14 @@ async fn includes_apps_guidance_as_developer_message_when_enabled() {
&& item
.get("content")
.and_then(|value| value.as_array())
.and_then(|value| value.first())
.and_then(|value| value.get("text"))
.and_then(|value| value.as_str())
.is_some_and(|text| text.contains(apps_snippet))
.is_some_and(|content| {
content.iter().any(|entry| {
entry
.get("text")
.and_then(|value| value.as_str())
.is_some_and(|text| text.contains(apps_snippet))
})
})
});
assert!(
has_developer_apps_guidance,
@@ -742,10 +726,14 @@ async fn includes_apps_guidance_as_developer_message_when_enabled() {
&& item
.get("content")
.and_then(|value| value.as_array())
.and_then(|value| value.first())
.and_then(|value| value.get("text"))
.and_then(|value| value.as_str())
.is_some_and(|text| text.contains(apps_snippet))
.is_some_and(|content| {
content.iter().any(|entry| {
entry
.get("text")
.and_then(|value| value.as_str())
.is_some_and(|text| text.contains(apps_snippet))
})
})
});
assert!(
!has_user_apps_guidance,
@@ -1283,19 +1271,42 @@ async fn includes_developer_instructions_message_in_request() {
"expected permissions message to mention sandbox_mode, got {permissions_text:?}"
);
assert_message_role(&request_body["input"][1], "developer");
assert_message_equals(&request_body["input"][1], "be useful");
assert_message_role(&request_body["input"][2], "user");
assert_message_starts_with(&request_body["input"][2], "# AGENTS.md instructions for ");
assert_message_ends_with(&request_body["input"][2], "</INSTRUCTIONS>");
let ui_text = request_body["input"][2]["content"][0]["text"]
.as_str()
let developer_messages: Vec<&serde_json::Value> = request_body["input"]
.as_array()
.expect("input array")
.iter()
.filter(|item| item.get("role").and_then(|role| role.as_str()) == Some("developer"))
.collect();
assert!(
developer_messages
.iter()
.any(|item| message_input_texts(item).contains(&"be useful")),
"expected developer instructions in a developer message, got {:?}",
request_body["input"]
);
assert_message_role(&request_body["input"][1], "user");
let user_context_texts = message_input_texts(&request_body["input"][1]);
assert!(
user_context_texts
.iter()
.any(|text| text.starts_with("# AGENTS.md instructions for ")),
"expected AGENTS text in contextual user message, got {user_context_texts:?}"
);
let ui_text = user_context_texts
.iter()
.copied()
.find(|text| text.contains("<INSTRUCTIONS>"))
.expect("invalid message content");
assert!(ui_text.contains("<INSTRUCTIONS>"));
assert!(ui_text.contains("be nice"));
assert_message_role(&request_body["input"][3], "user");
assert_message_starts_with(&request_body["input"][3], "<environment_context>");
assert_message_ends_with(&request_body["input"][3], "</environment_context>");
assert!(
user_context_texts
.iter()
.any(|text| text.starts_with("<environment_context>")
&& text.ends_with("</environment_context>")),
"expected environment context in contextual user message, got {user_context_texts:?}"
);
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]