core: bundle settings diff updates into one dev/user envelope (#12417)

## Summary - bundle contextual prompt injection into at most one developer message plus one contextual user message in both: - per-turn settings updates - initial context insertion - preserve `<model_switch>` across compaction by rebuilding it through canonical initial-context injection, instead of relying on strip/reattach hacks - centralize contextual user fragment detection in one shared definition table and reuse it for parsing/compaction logic - keep `AGENTS.md` in its natural serialized format: - `# AGENTS.md instructions for {dirname}` - `<INSTRUCTIONS>...</INSTRUCTIONS>` - simplify related tests/helpers and accept the expected snapshot/layout updates from bundled multi-part messages ## Why The goal is to converge toward a simpler, more intentional prompt shape where contextual updates are consistently represented as one developer envelope plus one contextual user envelope, while keeping parsing and compaction behavior aligned with that representation. ## Notable details - the temporary `SettingsUpdateEnvelope` wrapper was removed; these paths now return `Vec<ResponseItem>` directly - local/remote compaction no longer rely on model-switch strip/restore helpers - contextual user detection is now driven by shared fragment definitions instead of ad hoc matcher assembly - AGENTS/user instructions are still the same logical context; only the synthetic `<user_instructions>` wrapper was replaced by the natural AGENTS text format ## Testing - `just fmt` - `cargo test -p codex-app-server codex_message_processor::tests::extract_conversation_summary_prefers_plain_user_messages -- --exact` - `cargo test -p codex-core compact::tests::collect_user_messages_filters_session_prefix_entries --lib -- --exact` - `cargo test -p codex-core --test all 'suite::compact::snapshot_request_shape_pre_turn_compaction_strips_incoming_model_switch' -- --exact` - `cargo test -p codex-core --test all 'suite::compact_remote::snapshot_request_shape_remote_pre_turn_compaction_strips_incoming_model_switch' -- --exact` - `cargo test -p codex-core --test all 'suite::client::includes_apps_guidance_as_developer_message_when_enabled' -- --exact` - `cargo test -p codex-core --test all 'suite::client::includes_developer_instructions_message_in_request' -- --exact` - `cargo test -p codex-core --test all 'suite::client::includes_user_instructions_message_in_request' -- --exact` - `cargo test -p codex-core --test all 'suite::client::resume_includes_initial_messages_and_sends_prior_items' -- --exact` - `cargo test -p codex-core --test all 'suite::review::review_input_isolated_from_parent_history' -- --exact` - `cargo test -p codex-exec --test all 'suite::resume::exec_resume_last_respects_cwd_filter_and_all_flag' -- --exact` - `cargo test -p core_test_support context_snapshot::tests::full_text_mode_preserves_unredacted_text -- --exact` ## Notes - I also ran several targeted `compact`, `compact_remote`, `prompt_caching`, `model_visible_layout`, and `event_mapping` tests while iterating on prompt-shape changes. - I have not claimed a clean full-workspace `cargo test` from this environment because local sandbox/resource conditions have previously produced unrelated failures in large workspace runs.
2026-04-29 00:55:38 +00:00 · 2026-02-26 00:12:08 -08:00
parent 28bfbb8f2b
commit 07aefffb1f
47 changed files with 966 additions and 813 deletions
--- a/codex-rs/core/tests/suite/client.rs
+++ b/codex-rs/core/tests/suite/client.rs
@@ -74,40 +74,14 @@ fn assert_message_role(request_body: &serde_json::Value, role: &str) {
    assert_eq!(request_body["role"].as_str().unwrap(), role);
 }

-#[expect(clippy::expect_used)]
-fn assert_message_equals(request_body: &serde_json::Value, text: &str) {
-    let content = request_body["content"][0]["text"]
-        .as_str()
-        .expect("invalid message content");
-
-    assert_eq!(
-        content, text,
-        "expected message content '{content}' to equal '{text}'"
-    );
-}
-
-#[expect(clippy::expect_used)]
-fn assert_message_starts_with(request_body: &serde_json::Value, text: &str) {
-    let content = request_body["content"][0]["text"]
-        .as_str()
-        .expect("invalid message content");
-
-    assert!(
-        content.starts_with(text),
-        "expected message content '{content}' to start with '{text}'"
-    );
-}
-
-#[expect(clippy::expect_used)]
-fn assert_message_ends_with(request_body: &serde_json::Value, text: &str) {
-    let content = request_body["content"][0]["text"]
-        .as_str()
-        .expect("invalid message content");
-
-    assert!(
-        content.ends_with(text),
-        "expected message content '{content}' to end with '{text}'"
-    );
+#[expect(clippy::unwrap_used)]
+fn message_input_texts(item: &serde_json::Value) -> Vec<&str> {
+    item["content"]
+        .as_array()
+        .unwrap()
+        .iter()
+        .filter_map(|entry| entry.get("text").and_then(|text| text.as_str()))
+        .collect()
 }

 /// Writes an `auth.json` into the provided `codex_home` with the specified parameters.
@@ -305,19 +279,15 @@ async fn resume_includes_initial_messages_and_sends_prior_items() {
    let request = resp_mock.single_request();
    let request_body = request.body_json();
    let input = request_body["input"].as_array().expect("input array");
-    let messages: Vec<(String, String)> = input
-        .iter()
-        .filter_map(|item| {
-            let role = item.get("role")?.as_str()?;
-            let text = item
-                .get("content")?
-                .as_array()?
-                .first()?
-                .get("text")?
-                .as_str()?;
-            Some((role.to_string(), text.to_string()))
-        })
-        .collect();
+    let mut messages: Vec<(String, String)> = Vec::new();
+    for item in input {
+        let Some(role) = item.get("role").and_then(|role| role.as_str()) else {
+            continue;
+        };
+        for text in message_input_texts(item) {
+            messages.push((role.to_string(), text.to_string()));
+        }
+    }
    let pos_prior_user = messages
        .iter()
        .position(|(role, text)| role == "user" && text == "resumed user message")
@@ -354,8 +324,7 @@ async fn resume_includes_initial_messages_and_sends_prior_items() {
        .position(|(role, text)| {
            role == "user"
                && text.contains("be nice")
-                && (text.starts_with("# AGENTS.md instructions for ")
-                    || text.starts_with("<user_instructions>"))
+                && (text.starts_with("# AGENTS.md instructions for "))
        })
        .expect("user instructions");
    let pos_environment = messages
@@ -664,16 +633,27 @@ async fn includes_user_instructions_message_in_request() {
    );

    assert_message_role(&request_body["input"][1], "user");
-    assert_message_starts_with(&request_body["input"][1], "# AGENTS.md instructions for ");
-    assert_message_ends_with(&request_body["input"][1], "</INSTRUCTIONS>");
-    let ui_text = request_body["input"][1]["content"][0]["text"]
-        .as_str()
+    let user_context_texts = message_input_texts(&request_body["input"][1]);
+    assert!(
+        user_context_texts
+            .iter()
+            .any(|text| text.starts_with("# AGENTS.md instructions for ")),
+        "expected AGENTS text in contextual user message, got {user_context_texts:?}"
+    );
+    let ui_text = user_context_texts
+        .iter()
+        .copied()
+        .find(|text| text.contains("<INSTRUCTIONS>"))
        .expect("invalid message content");
    assert!(ui_text.contains("<INSTRUCTIONS>"));
    assert!(ui_text.contains("be nice"));
-    assert_message_role(&request_body["input"][2], "user");
-    assert_message_starts_with(&request_body["input"][2], "<environment_context>");
-    assert_message_ends_with(&request_body["input"][2], "</environment_context>");
+    assert!(
+        user_context_texts
+            .iter()
+            .any(|text| text.starts_with("<environment_context>")
+                && text.ends_with("</environment_context>")),
+        "expected environment context in contextual user message, got {user_context_texts:?}"
+    );
 }

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
@@ -727,10 +707,14 @@ async fn includes_apps_guidance_as_developer_message_when_enabled() {
            && item
                .get("content")
                .and_then(|value| value.as_array())
-                .and_then(|value| value.first())
-                .and_then(|value| value.get("text"))
-                .and_then(|value| value.as_str())
-                .is_some_and(|text| text.contains(apps_snippet))
+                .is_some_and(|content| {
+                    content.iter().any(|entry| {
+                        entry
+                            .get("text")
+                            .and_then(|value| value.as_str())
+                            .is_some_and(|text| text.contains(apps_snippet))
+                    })
+                })
    });
    assert!(
        has_developer_apps_guidance,
@@ -742,10 +726,14 @@ async fn includes_apps_guidance_as_developer_message_when_enabled() {
            && item
                .get("content")
                .and_then(|value| value.as_array())
-                .and_then(|value| value.first())
-                .and_then(|value| value.get("text"))
-                .and_then(|value| value.as_str())
-                .is_some_and(|text| text.contains(apps_snippet))
+                .is_some_and(|content| {
+                    content.iter().any(|entry| {
+                        entry
+                            .get("text")
+                            .and_then(|value| value.as_str())
+                            .is_some_and(|text| text.contains(apps_snippet))
+                    })
+                })
    });
    assert!(
        !has_user_apps_guidance,
@@ -1283,19 +1271,42 @@ async fn includes_developer_instructions_message_in_request() {
        "expected permissions message to mention sandbox_mode, got {permissions_text:?}"
    );

-    assert_message_role(&request_body["input"][1], "developer");
-    assert_message_equals(&request_body["input"][1], "be useful");
-    assert_message_role(&request_body["input"][2], "user");
-    assert_message_starts_with(&request_body["input"][2], "# AGENTS.md instructions for ");
-    assert_message_ends_with(&request_body["input"][2], "</INSTRUCTIONS>");
-    let ui_text = request_body["input"][2]["content"][0]["text"]
-        .as_str()
+    let developer_messages: Vec<&serde_json::Value> = request_body["input"]
+        .as_array()
+        .expect("input array")
+        .iter()
+        .filter(|item| item.get("role").and_then(|role| role.as_str()) == Some("developer"))
+        .collect();
+    assert!(
+        developer_messages
+            .iter()
+            .any(|item| message_input_texts(item).contains(&"be useful")),
+        "expected developer instructions in a developer message, got {:?}",
+        request_body["input"]
+    );
+
+    assert_message_role(&request_body["input"][1], "user");
+    let user_context_texts = message_input_texts(&request_body["input"][1]);
+    assert!(
+        user_context_texts
+            .iter()
+            .any(|text| text.starts_with("# AGENTS.md instructions for ")),
+        "expected AGENTS text in contextual user message, got {user_context_texts:?}"
+    );
+    let ui_text = user_context_texts
+        .iter()
+        .copied()
+        .find(|text| text.contains("<INSTRUCTIONS>"))
        .expect("invalid message content");
    assert!(ui_text.contains("<INSTRUCTIONS>"));
    assert!(ui_text.contains("be nice"));
-    assert_message_role(&request_body["input"][3], "user");
-    assert_message_starts_with(&request_body["input"][3], "<environment_context>");
-    assert_message_ends_with(&request_body["input"][3], "</environment_context>");
+    assert!(
+        user_context_texts
+            .iter()
+            .any(|text| text.starts_with("<environment_context>")
+                && text.ends_with("</environment_context>")),
+        "expected environment context in contextual user message, got {user_context_texts:?}"
+    );
 }

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]