core: bundle settings diff updates into one dev/user envelope (#12417)

## Summary
- bundle contextual prompt injection into at most one developer message
plus one contextual user message in both:
  - per-turn settings updates
  - initial context insertion
- preserve `<model_switch>` across compaction by rebuilding it through
canonical initial-context injection, instead of relying on
strip/reattach hacks
- centralize contextual user fragment detection in one shared definition
table and reuse it for parsing/compaction logic
- keep `AGENTS.md` in its natural serialized format:
  - `# AGENTS.md instructions for {dirname}`
  - `<INSTRUCTIONS>...</INSTRUCTIONS>`
- simplify related tests/helpers and accept the expected snapshot/layout
updates from bundled multi-part messages

## Why
The goal is to converge toward a simpler, more intentional prompt shape
where contextual updates are consistently represented as one developer
envelope plus one contextual user envelope, while keeping parsing and
compaction behavior aligned with that representation.

## Notable details
- the temporary `SettingsUpdateEnvelope` wrapper was removed; these
paths now return `Vec<ResponseItem>` directly
- local/remote compaction no longer rely on model-switch strip/restore
helpers
- contextual user detection is now driven by shared fragment definitions
instead of ad hoc matcher assembly
- AGENTS/user instructions are still the same logical context; only the
synthetic `<user_instructions>` wrapper was replaced by the natural
AGENTS text format

## Testing
- `just fmt`
- `cargo test -p codex-app-server
codex_message_processor::tests::extract_conversation_summary_prefers_plain_user_messages
-- --exact`
- `cargo test -p codex-core
compact::tests::collect_user_messages_filters_session_prefix_entries
--lib -- --exact`
- `cargo test -p codex-core --test all
'suite::compact::snapshot_request_shape_pre_turn_compaction_strips_incoming_model_switch'
-- --exact`
- `cargo test -p codex-core --test all
'suite::compact_remote::snapshot_request_shape_remote_pre_turn_compaction_strips_incoming_model_switch'
-- --exact`
- `cargo test -p codex-core --test all
'suite::client::includes_apps_guidance_as_developer_message_when_enabled'
-- --exact`
- `cargo test -p codex-core --test all
'suite::client::includes_developer_instructions_message_in_request' --
--exact`
- `cargo test -p codex-core --test all
'suite::client::includes_user_instructions_message_in_request' --
--exact`
- `cargo test -p codex-core --test all
'suite::client::resume_includes_initial_messages_and_sends_prior_items'
-- --exact`
- `cargo test -p codex-core --test all
'suite::review::review_input_isolated_from_parent_history' -- --exact`
- `cargo test -p codex-exec --test all
'suite::resume::exec_resume_last_respects_cwd_filter_and_all_flag' --
--exact`
- `cargo test -p core_test_support
context_snapshot::tests::full_text_mode_preserves_unredacted_text --
--exact`

## Notes
- I also ran several targeted `compact`, `compact_remote`,
`prompt_caching`, `model_visible_layout`, and `event_mapping` tests
while iterating on prompt-shape changes.
- I have not claimed a clean full-workspace `cargo test` from this
environment because local sandbox/resource conditions have previously
produced unrelated failures in large workspace runs.
This commit is contained in:
Charley Cunningham
2026-02-26 00:12:08 -08:00
committed by GitHub
parent 28bfbb8f2b
commit 07aefffb1f
47 changed files with 966 additions and 813 deletions

View File

@@ -756,16 +756,40 @@ async fn multiple_auto_compact_per_task_runs_after_token_limit_hit() {
let body = requests_payloads[0].body_json();
let input = body.get("input").and_then(|v| v.as_array()).unwrap();
fn strip_agents_parts_from_user_message(
value: &serde_json::Value,
) -> Option<serde_json::Value> {
let content = value
.get("content")
.and_then(|content| content.as_array())?;
let filtered_content = content
.iter()
.filter(|item| {
!item
.get("text")
.and_then(|text| text.as_str())
.is_some_and(|text| text.starts_with("# AGENTS.md instructions for "))
})
.cloned()
.collect::<Vec<_>>();
if filtered_content.is_empty() {
return None;
}
let mut normalized = value.clone();
normalized["content"] = serde_json::Value::Array(filtered_content);
Some(normalized)
}
fn normalize_inputs(values: &[serde_json::Value]) -> Vec<serde_json::Value> {
values
.iter()
.filter(|value| {
.filter_map(|value| {
if value
.get("type")
.and_then(|ty| ty.as_str())
.is_some_and(|ty| ty == "function_call_output")
{
return false;
return None;
}
let text = value
@@ -781,11 +805,13 @@ async fn multiple_auto_compact_per_task_runs_after_token_limit_hit() {
if role == Some("developer")
&& text.is_some_and(|text| text.contains("`sandbox_mode`"))
{
return false;
return None;
}
!text.is_some_and(|text| text.starts_with("# AGENTS.md instructions for "))
if role == Some("user") {
return strip_agents_parts_from_user_message(value);
}
Some(value.clone())
})
.cloned()
.collect()
}
@@ -3184,7 +3210,7 @@ async fn snapshot_request_shape_pre_turn_compaction_context_window_exceeded() {
]);
let mut responses = vec![first_turn];
responses.extend(
(0..6).map(|_| {
(0..5).map(|_| {
sse_failed(
"compact-failed",
"context_length_exceeded",