core: bundle settings diff updates into one dev/user envelope (#12417)

## Summary - bundle contextual prompt injection into at most one developer message plus one contextual user message in both: - per-turn settings updates - initial context insertion - preserve `<model_switch>` across compaction by rebuilding it through canonical initial-context injection, instead of relying on strip/reattach hacks - centralize contextual user fragment detection in one shared definition table and reuse it for parsing/compaction logic - keep `AGENTS.md` in its natural serialized format: - `# AGENTS.md instructions for {dirname}` - `<INSTRUCTIONS>...</INSTRUCTIONS>` - simplify related tests/helpers and accept the expected snapshot/layout updates from bundled multi-part messages ## Why The goal is to converge toward a simpler, more intentional prompt shape where contextual updates are consistently represented as one developer envelope plus one contextual user envelope, while keeping parsing and compaction behavior aligned with that representation. ## Notable details - the temporary `SettingsUpdateEnvelope` wrapper was removed; these paths now return `Vec<ResponseItem>` directly - local/remote compaction no longer rely on model-switch strip/restore helpers - contextual user detection is now driven by shared fragment definitions instead of ad hoc matcher assembly - AGENTS/user instructions are still the same logical context; only the synthetic `<user_instructions>` wrapper was replaced by the natural AGENTS text format ## Testing - `just fmt` - `cargo test -p codex-app-server codex_message_processor::tests::extract_conversation_summary_prefers_plain_user_messages -- --exact` - `cargo test -p codex-core compact::tests::collect_user_messages_filters_session_prefix_entries --lib -- --exact` - `cargo test -p codex-core --test all 'suite::compact::snapshot_request_shape_pre_turn_compaction_strips_incoming_model_switch' -- --exact` - `cargo test -p codex-core --test all 'suite::compact_remote::snapshot_request_shape_remote_pre_turn_compaction_strips_incoming_model_switch' -- --exact` - `cargo test -p codex-core --test all 'suite::client::includes_apps_guidance_as_developer_message_when_enabled' -- --exact` - `cargo test -p codex-core --test all 'suite::client::includes_developer_instructions_message_in_request' -- --exact` - `cargo test -p codex-core --test all 'suite::client::includes_user_instructions_message_in_request' -- --exact` - `cargo test -p codex-core --test all 'suite::client::resume_includes_initial_messages_and_sends_prior_items' -- --exact` - `cargo test -p codex-core --test all 'suite::review::review_input_isolated_from_parent_history' -- --exact` - `cargo test -p codex-exec --test all 'suite::resume::exec_resume_last_respects_cwd_filter_and_all_flag' -- --exact` - `cargo test -p core_test_support context_snapshot::tests::full_text_mode_preserves_unredacted_text -- --exact` ## Notes - I also ran several targeted `compact`, `compact_remote`, `prompt_caching`, `model_visible_layout`, and `event_mapping` tests while iterating on prompt-shape changes. - I have not claimed a clean full-workspace `cargo test` from this environment because local sandbox/resource conditions have previously produced unrelated failures in large workspace runs.
2026-04-29 17:06:51 +00:00 · 2026-02-26 00:12:08 -08:00
parent 28bfbb8f2b
commit 07aefffb1f
47 changed files with 966 additions and 813 deletions
--- a/codex-rs/core/tests/suite/compact.rs
+++ b/codex-rs/core/tests/suite/compact.rs
@@ -756,16 +756,40 @@ async fn multiple_auto_compact_per_task_runs_after_token_limit_hit() {
    let body = requests_payloads[0].body_json();
    let input = body.get("input").and_then(|v| v.as_array()).unwrap();

+    fn strip_agents_parts_from_user_message(
+        value: &serde_json::Value,
+    ) -> Option<serde_json::Value> {
+        let content = value
+            .get("content")
+            .and_then(|content| content.as_array())?;
+        let filtered_content = content
+            .iter()
+            .filter(|item| {
+                !item
+                    .get("text")
+                    .and_then(|text| text.as_str())
+                    .is_some_and(|text| text.starts_with("# AGENTS.md instructions for "))
+            })
+            .cloned()
+            .collect::<Vec<_>>();
+        if filtered_content.is_empty() {
+            return None;
+        }
+        let mut normalized = value.clone();
+        normalized["content"] = serde_json::Value::Array(filtered_content);
+        Some(normalized)
+    }
+
    fn normalize_inputs(values: &[serde_json::Value]) -> Vec<serde_json::Value> {
        values
            .iter()
-            .filter(|value| {
+            .filter_map(|value| {
                if value
                    .get("type")
                    .and_then(|ty| ty.as_str())
                    .is_some_and(|ty| ty == "function_call_output")
                {
-                    return false;
+                    return None;
                }

                let text = value
@@ -781,11 +805,13 @@ async fn multiple_auto_compact_per_task_runs_after_token_limit_hit() {
                if role == Some("developer")
                    && text.is_some_and(|text| text.contains("`sandbox_mode`"))
                {
-                    return false;
+                    return None;
                }
-                !text.is_some_and(|text| text.starts_with("# AGENTS.md instructions for "))
+                if role == Some("user") {
+                    return strip_agents_parts_from_user_message(value);
+                }
+                Some(value.clone())
            })
-            .cloned()
            .collect()
    }

@@ -3184,7 +3210,7 @@ async fn snapshot_request_shape_pre_turn_compaction_context_window_exceeded() {
    ]);
    let mut responses = vec![first_turn];
    responses.extend(
-        (0..6).map(|_| {
+        (0..5).map(|_| {
            sse_failed(
                "compact-failed",
                "context_length_exceeded",