move plugin/skill instructions into dev msg and reorder (#14609)

Move the general `Apps`, `Skills` and `Plugins` instructions blocks out
of `user_instructions` and into the developer message, with new `Apps ->
Skills -> Plugins` order for better clarity.

Also wrap those sections in stable XML-style instruction tags (like
other sections) and update prompt-layout tests/snapshots. This makes the
tests less brittle in snapshot output (we can parse the sections), and
it consolidates the capability instructions in one place.

#### Tests
Updated snapshots, added tests.

`<AGENTS_MD>` disappearing in snapshots is expected: before this change,
the wrapped user-instructions message was kept alive by `Skills`
content. Now that `Skills` and `Plugins` are in the developer message,
that wrapper only appears when there is real
project-doc/user-instructions content.

---------

Co-authored-by: Charley Cunningham <ccunningham@openai.com>
This commit is contained in:
sayan-oai
2026-03-13 20:51:01 -07:00
committed by GitHub
parent 7f571396c8
commit d272f45058
44 changed files with 344 additions and 362 deletions

View File

@@ -1083,7 +1083,7 @@ async fn omits_apps_guidance_for_api_key_auth_even_when_feature_enabled() {
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn skills_append_to_instructions() {
async fn skills_append_to_developer_message() {
skip_if_no_network!();
let server = MockServer::start().await;
@@ -1129,27 +1129,21 @@ async fn skills_append_to_instructions() {
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;
let request = resp_mock.single_request();
let request_body = request.body_json();
assert_message_role(&request_body["input"][0], "developer");
assert_message_role(&request_body["input"][1], "user");
let instructions_text = request_body["input"][1]["content"][0]["text"]
.as_str()
.expect("instructions text");
let developer_messages = request.message_input_texts("developer");
let developer_text = developer_messages.join("\n\n");
assert!(
instructions_text.contains("## Skills"),
"expected skills section present"
developer_text.contains("## Skills"),
"expected skills section present: {developer_messages:?}"
);
assert!(
instructions_text.contains("demo: build charts"),
"expected skill summary"
developer_text.contains("demo: build charts"),
"expected skill summary: {developer_messages:?}"
);
let expected_path = normalize_path(skill_dir.join("SKILL.md")).unwrap();
let expected_path_str = expected_path.to_string_lossy().replace('\\', "/");
assert!(
instructions_text.contains(&expected_path_str),
"expected path {expected_path_str} in instructions"
developer_text.contains(&expected_path_str),
"expected path {expected_path_str} in developer message: {developer_messages:?}"
);
let _codex_home_guard = codex_home;
}

View File

@@ -49,6 +49,13 @@ fn developer_texts(input: &[Value]) -> Vec<String> {
.collect()
}
fn developer_message_count(input: &[Value]) -> usize {
input
.iter()
.filter(|item| item.get("role").and_then(Value::as_str) == Some("developer"))
.count()
}
fn collab_xml(text: &str) -> String {
format!("{COLLABORATION_MODE_OPEN_TAG}{text}{COLLABORATION_MODE_CLOSE_TAG}")
}
@@ -82,9 +89,18 @@ async fn no_collaboration_instructions_by_default() -> Result<()> {
wait_for_event(&test.codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;
let input = req.single_request().input();
assert_eq!(developer_message_count(&input), 1);
let dev_texts = developer_texts(&input);
assert_eq!(dev_texts.len(), 1);
assert!(dev_texts[0].contains("<permissions instructions>"));
assert!(
dev_texts
.iter()
.any(|text| text.contains("<permissions instructions>")),
"expected permissions instructions in developer messages, got {dev_texts:?}"
);
assert_eq!(
count_messages_containing(&dev_texts, COLLABORATION_MODE_OPEN_TAG),
0
);
Ok(())
}
@@ -770,8 +786,8 @@ async fn empty_collaboration_instructions_are_ignored() -> Result<()> {
wait_for_event(&test.codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;
let input = req.single_request().input();
assert_eq!(developer_message_count(&input), 1);
let dev_texts = developer_texts(&input);
assert_eq!(dev_texts.len(), 1);
let collab_text = collab_xml("");
assert_eq!(count_messages_containing(&dev_texts, &collab_text), 0);

View File

@@ -181,6 +181,7 @@ async fn assert_compaction_uses_turn_lifecycle_id(codex: &std::sync::Arc<codex_c
}
fn context_snapshot_options() -> ContextSnapshotOptions {
ContextSnapshotOptions::default()
.strip_capability_instructions()
.render_mode(ContextSnapshotRenderMode::KindWithTextPrefix { max_chars: 64 })
}

View File

@@ -61,6 +61,7 @@ fn summary_with_prefix(summary: &str) -> String {
fn context_snapshot_options() -> ContextSnapshotOptions {
ContextSnapshotOptions::default()
.strip_capability_instructions()
.render_mode(ContextSnapshotRenderMode::KindWithTextPrefix { max_chars: 64 })
}

View File

@@ -494,6 +494,7 @@ async fn snapshot_rollback_past_compaction_replays_append_only_history() -> Resu
("after rollback", &requests[3]),
],
&ContextSnapshotOptions::default()
.strip_capability_instructions()
.render_mode(ContextSnapshotRenderMode::KindWithTextPrefix { max_chars: 64 }),
)
);

View File

@@ -45,7 +45,7 @@ fn format_labeled_requests_snapshot(
)
}
fn agents_message_count(request: &ResponsesRequest) -> usize {
fn user_instructions_wrapper_count(request: &ResponsesRequest) -> usize {
request
.message_input_texts("user")
.iter()
@@ -262,14 +262,14 @@ async fn snapshot_model_visible_layout_cwd_change_does_not_refresh_agents() -> R
let requests = responses.requests();
assert_eq!(requests.len(), 2, "expected two requests");
assert_eq!(
agents_message_count(&requests[0]),
1,
"expected exactly one AGENTS message in first request"
user_instructions_wrapper_count(&requests[0]),
0,
"expected first request to omit the serialized user-instructions wrapper when cwd-only project docs are introduced after session init"
);
assert_eq!(
agents_message_count(&requests[1]),
1,
"expected AGENTS to refresh after cwd change, but current behavior only keeps history AGENTS"
user_instructions_wrapper_count(&requests[1]),
0,
"expected second request to keep omitting the serialized user-instructions wrapper after cwd change with the current session-scoped project doc behavior"
);
insta::assert_snapshot!(
"model_visible_layout_cwd_change_does_not_refresh_agents",

View File

@@ -189,9 +189,10 @@ fn tool_description(body: &serde_json::Value, tool_name: &str) -> Option<String>
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn plugin_skills_append_to_instructions() -> Result<()> {
async fn capability_sections_render_in_developer_message_in_order() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = MockServer::start().await;
let server = start_mock_server().await;
let apps_server = AppsTestServer::mount_with_connector_name(&server, "Google Calendar").await?;
let resp_mock = mount_sse_once(
&server,
@@ -201,7 +202,13 @@ async fn plugin_skills_append_to_instructions() -> Result<()> {
let codex_home = Arc::new(TempDir::new()?);
write_plugin_skill_plugin(codex_home.as_ref());
let codex = build_plugin_test_codex(&server, Arc::clone(&codex_home)).await?;
write_plugin_app_plugin(codex_home.as_ref());
let codex = build_apps_enabled_plugin_test_codex(
&server,
Arc::clone(&codex_home),
apps_server.chatgpt_base_url,
)
.await?;
codex
.submit(Op::UserInput {
@@ -216,29 +223,36 @@ async fn plugin_skills_append_to_instructions() -> Result<()> {
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;
let request = resp_mock.single_request();
let request_body = request.body_json();
let instructions_text = request_body["input"][1]["content"][0]["text"]
.as_str()
.expect("instructions text");
let developer_messages = request.message_input_texts("developer");
let developer_text = developer_messages.join("\n\n");
let apps_pos = developer_text
.find("## Apps")
.expect("expected apps section in developer message");
let skills_pos = developer_text
.find("## Skills")
.expect("expected skills section in developer message");
let plugins_pos = developer_text
.find("## Plugins")
.expect("expected plugins section in developer message");
assert!(
instructions_text.contains("## Plugins"),
"expected plugins section present"
apps_pos < skills_pos && skills_pos < plugins_pos,
"expected Apps -> Skills -> Plugins order: {developer_messages:?}"
);
assert!(
instructions_text.contains("`sample`"),
"expected enabled plugin name in instructions"
developer_text.contains("`sample`"),
"expected enabled plugin name in developer message: {developer_messages:?}"
);
assert!(
instructions_text.contains("`sample`: inspect sample data"),
"expected plugin description in instructions"
developer_text.contains("`sample`: inspect sample data"),
"expected plugin description in developer message: {developer_messages:?}"
);
assert!(
instructions_text.contains("skill entries are prefixed with `plugin_name:`"),
"expected plugin skill naming guidance"
developer_text.contains("skill entries are prefixed with `plugin_name:`"),
"expected plugin skill naming guidance in developer message: {developer_messages:?}"
);
assert!(
instructions_text.contains("sample:sample-search: inspect sample data"),
"expected namespaced plugin skill summary"
developer_text.contains("sample:sample-search: inspect sample data"),
"expected namespaced plugin skill summary in developer message: {developer_messages:?}"
);
Ok(())

View File

@@ -6,9 +6,7 @@ Scenario: Manual /compact with prior user history compacts existing history and
## Local Compaction Request
00:message/developer:<PERMISSIONS_INSTRUCTIONS>
01:message/user[2]:
[01] <AGENTS_MD>
[02] <ENVIRONMENT_CONTEXT:cwd=<CWD>>
01:message/user:<ENVIRONMENT_CONTEXT:cwd=<CWD>>
02:message/user:first manual turn
03:message/assistant:FIRST_REPLY
04:message/user:<SUMMARIZATION_PROMPT>
@@ -17,7 +15,5 @@ Scenario: Manual /compact with prior user history compacts existing history and
00:message/user:first manual turn
01:message/user:<COMPACTION_SUMMARY>\nFIRST_MANUAL_SUMMARY
02:message/developer:<PERMISSIONS_INSTRUCTIONS>
03:message/user[2]:
[01] <AGENTS_MD>
[02] <ENVIRONMENT_CONTEXT:cwd=<CWD>>
03:message/user:<ENVIRONMENT_CONTEXT:cwd=<CWD>>
04:message/user:second manual turn

View File

@@ -1,6 +1,5 @@
---
source: core/tests/suite/compact.rs
assertion_line: 3343
expression: "format_labeled_requests_snapshot(\"Manual /compact with no prior user turn currently still issues a compaction request; follow-up turn carries canonical context and the new user message.\",\n&[(\"Local Compaction Request\", &requests[0]),\n(\"Local Post-Compaction History Layout\", &requests[1]),])"
---
Scenario: Manual /compact with no prior user turn currently still issues a compaction request; follow-up turn carries canonical context and the new user message.
@@ -11,7 +10,5 @@ Scenario: Manual /compact with no prior user turn currently still issues a compa
## Local Post-Compaction History Layout
00:message/user:<COMPACTION_SUMMARY>\nMANUAL_EMPTY_SUMMARY
01:message/developer:<PERMISSIONS_INSTRUCTIONS>
02:message/user[2]:
[01] <AGENTS_MD>
[02] <ENVIRONMENT_CONTEXT:cwd=<CWD>>
02:message/user:<ENVIRONMENT_CONTEXT:cwd=<CWD>>
03:message/user:AFTER_MANUAL_EMPTY_COMPACT

View File

@@ -6,9 +6,7 @@ Scenario: True mid-turn continuation compaction after tool output: compact reque
## Local Compaction Request
00:message/developer:<PERMISSIONS_INSTRUCTIONS>
01:message/user[2]:
[01] <AGENTS_MD>
[02] <ENVIRONMENT_CONTEXT:cwd=<CWD>>
01:message/user:<ENVIRONMENT_CONTEXT:cwd=<CWD>>
02:message/user:function call limit push
03:function_call/test_tool
04:function_call_output:unsupported call: test_tool
@@ -16,8 +14,6 @@ Scenario: True mid-turn continuation compaction after tool output: compact reque
## Local Post-Compaction History Layout
00:message/developer:<PERMISSIONS_INSTRUCTIONS>
01:message/user[2]:
[01] <AGENTS_MD>
[02] <ENVIRONMENT_CONTEXT:cwd=<CWD>>
01:message/user:<ENVIRONMENT_CONTEXT:cwd=<CWD>>
02:message/user:function call limit push
03:message/user:<COMPACTION_SUMMARY>\nAUTO_SUMMARY

View File

@@ -1,22 +1,17 @@
---
source: core/tests/suite/compact.rs
assertion_line: 1799
expression: "format_labeled_requests_snapshot(\"Pre-sampling compaction on model switch to a smaller context window: current behavior compacts using prior-turn history only (incoming user message excluded), and the follow-up request carries compacted history plus the new user message.\",\n&[(\"Initial Request (Previous Model)\", &requests[0]),\n(\"Pre-sampling Compaction Request\", &requests[1]),\n(\"Post-Compaction Follow-up Request (Next Model)\", &requests[2]),])"
---
Scenario: Pre-sampling compaction on model switch to a smaller context window: current behavior compacts using prior-turn history only (incoming user message excluded), and the follow-up request carries compacted history plus the new user message.
## Initial Request (Previous Model)
00:message/developer:<PERMISSIONS_INSTRUCTIONS>
01:message/user[2]:
[01] <AGENTS_MD>
[02] <ENVIRONMENT_CONTEXT:cwd=<CWD>>
01:message/user:<ENVIRONMENT_CONTEXT:cwd=<CWD>>
02:message/user:before switch
## Pre-sampling Compaction Request
00:message/developer:<PERMISSIONS_INSTRUCTIONS>
01:message/user[2]:
[01] <AGENTS_MD>
[02] <ENVIRONMENT_CONTEXT:cwd=<CWD>>
01:message/user:<ENVIRONMENT_CONTEXT:cwd=<CWD>>
02:message/user:before switch
03:message/assistant:before switch
04:message/user:<SUMMARIZATION_PROMPT>
@@ -27,7 +22,5 @@ Scenario: Pre-sampling compaction on model switch to a smaller context window: c
02:message/developer[2]:
[01] <model_switch>\nThe user was previously using a different model....
[02] <PERMISSIONS_INSTRUCTIONS>
03:message/user[2]:
[01] <AGENTS_MD>
[02] <ENVIRONMENT_CONTEXT:cwd=<CWD>>
03:message/user:<ENVIRONMENT_CONTEXT:cwd=<CWD>>
04:message/user:after switch

View File

@@ -6,9 +6,7 @@ Scenario: Pre-turn auto-compaction context-window failure: compaction request ex
## Local Compaction Request (Incoming User Excluded)
00:message/developer:<PERMISSIONS_INSTRUCTIONS>
01:message/user[2]:
[01] <AGENTS_MD>
[02] <ENVIRONMENT_CONTEXT:cwd=<CWD>>
01:message/user:<ENVIRONMENT_CONTEXT:cwd=<CWD>>
02:message/user:USER_ONE
03:message/assistant:FIRST_REPLY
04:message/user:<SUMMARIZATION_PROMPT>

View File

@@ -6,9 +6,7 @@ Scenario: Pre-turn auto-compaction with a context override emits the context dif
## Local Compaction Request
00:message/developer:<PERMISSIONS_INSTRUCTIONS>
01:message/user[2]:
[01] <AGENTS_MD>
[02] <ENVIRONMENT_CONTEXT:cwd=<CWD>>
01:message/user:<ENVIRONMENT_CONTEXT:cwd=<CWD>>
02:message/user:USER_ONE
03:message/assistant:FIRST_REPLY
04:message/user:USER_TWO
@@ -20,9 +18,7 @@ Scenario: Pre-turn auto-compaction with a context override emits the context dif
01:message/user:USER_TWO
02:message/user:<COMPACTION_SUMMARY>\nPRE_TURN_SUMMARY
03:message/developer:<PERMISSIONS_INSTRUCTIONS>
04:message/user[2]:
[01] <AGENTS_MD>
[02] <ENVIRONMENT_CONTEXT:cwd=PRETURN_CONTEXT_DIFF_CWD>
04:message/user:<ENVIRONMENT_CONTEXT:cwd=PRETURN_CONTEXT_DIFF_CWD>
05:message/user[4]:
[01] <image>
[02] <input_image:image_url>

View File

@@ -1,22 +1,17 @@
---
source: core/tests/suite/compact.rs
assertion_line: 3195
expression: "format_labeled_requests_snapshot(\"Pre-turn compaction during model switch (without pre-sampling model-switch compaction): current behavior strips incoming <model_switch> from the compact request and restores it in the post-compaction follow-up request.\",\n&[(\"Initial Request (Previous Model)\", &requests[0]),\n(\"Local Compaction Request\", &requests[1]),\n(\"Local Post-Compaction History Layout\", &requests[2]),])"
---
Scenario: Pre-turn compaction during model switch (without pre-sampling model-switch compaction): current behavior strips incoming <model_switch> from the compact request and restores it in the post-compaction follow-up request.
## Initial Request (Previous Model)
00:message/developer:<PERMISSIONS_INSTRUCTIONS>
01:message/user[2]:
[01] <AGENTS_MD>
[02] <ENVIRONMENT_CONTEXT:cwd=<CWD>>
01:message/user:<ENVIRONMENT_CONTEXT:cwd=<CWD>>
02:message/user:BEFORE_SWITCH_USER
## Local Compaction Request
00:message/developer:<PERMISSIONS_INSTRUCTIONS>
01:message/user[2]:
[01] <AGENTS_MD>
[02] <ENVIRONMENT_CONTEXT:cwd=<CWD>>
01:message/user:<ENVIRONMENT_CONTEXT:cwd=<CWD>>
02:message/user:BEFORE_SWITCH_USER
03:message/assistant:BEFORE_SWITCH_REPLY
04:message/user:<SUMMARIZATION_PROMPT>
@@ -28,7 +23,5 @@ Scenario: Pre-turn compaction during model switch (without pre-sampling model-sw
[01] <model_switch>\nThe user was previously using a different model....
[02] <PERMISSIONS_INSTRUCTIONS>
[03] <personality_spec> The user has requested a new communication st...
03:message/user[2]:
[01] <AGENTS_MD>
[02] <ENVIRONMENT_CONTEXT:cwd=<CWD>>
03:message/user:<ENVIRONMENT_CONTEXT:cwd=<CWD>>
04:message/user:AFTER_SWITCH_USER

View File

@@ -1,6 +1,5 @@
---
source: core/tests/suite/compact_remote.rs
assertion_line: 1950
expression: "format_labeled_requests_snapshot(\"After remote manual /compact and resume, the first resumed turn rebuilds history from the compaction item and restates realtime-end instructions from reconstructed previous-turn settings.\",\n&[(\"Remote Compaction Request\", &compact_request),\n(\"Remote Post-Resume History Layout\", after_resume_request),])"
---
Scenario: After remote manual /compact and resume, the first resumed turn rebuilds history from the compaction item and restates realtime-end instructions from reconstructed previous-turn settings.
@@ -9,9 +8,7 @@ Scenario: After remote manual /compact and resume, the first resumed turn rebuil
00:message/developer[2]:
[01] <PERMISSIONS_INSTRUCTIONS>
[02] <realtime_conversation>\nRealtime conversation started.\n\nYou a...
01:message/user[2]:
[01] <AGENTS_MD>
[02] <ENVIRONMENT_CONTEXT:cwd=<CWD>>
01:message/user:<ENVIRONMENT_CONTEXT:cwd=<CWD>>
02:message/user:USER_ONE
03:message/assistant:REMOTE_FIRST_REPLY
@@ -20,7 +17,5 @@ Scenario: After remote manual /compact and resume, the first resumed turn rebuil
01:message/developer[2]:
[01] <PERMISSIONS_INSTRUCTIONS>
[02] <realtime_conversation>\nRealtime conversation ended.\n\nSubsequ...
02:message/user[2]:
[01] <AGENTS_MD>
[02] <ENVIRONMENT_CONTEXT:cwd=<CWD>>
02:message/user:<ENVIRONMENT_CONTEXT:cwd=<CWD>>
03:message/user:USER_TWO

View File

@@ -1,6 +1,5 @@
---
source: core/tests/suite/compact_remote.rs
assertion_line: 1742
expression: "format_labeled_requests_snapshot(\"Remote manual /compact while realtime remains active: the next regular turn restates realtime-start instructions after compaction clears the baseline.\",\n&[(\"Remote Compaction Request\", &compact_request),\n(\"Remote Post-Compaction History Layout\", post_compact_request),])"
---
Scenario: Remote manual /compact while realtime remains active: the next regular turn restates realtime-start instructions after compaction clears the baseline.
@@ -9,9 +8,7 @@ Scenario: Remote manual /compact while realtime remains active: the next regular
00:message/developer[2]:
[01] <PERMISSIONS_INSTRUCTIONS>
[02] <realtime_conversation>\nRealtime conversation started.\n\nYou a...
01:message/user[2]:
[01] <AGENTS_MD>
[02] <ENVIRONMENT_CONTEXT:cwd=<CWD>>
01:message/user:<ENVIRONMENT_CONTEXT:cwd=<CWD>>
02:message/user:USER_ONE
03:message/assistant:REMOTE_FIRST_REPLY
@@ -20,7 +17,5 @@ Scenario: Remote manual /compact while realtime remains active: the next regular
01:message/developer[2]:
[01] <PERMISSIONS_INSTRUCTIONS>
[02] <realtime_conversation>\nRealtime conversation started.\n\nYou a...
02:message/user[2]:
[01] <AGENTS_MD>
[02] <ENVIRONMENT_CONTEXT:cwd=<CWD>>
02:message/user:<ENVIRONMENT_CONTEXT:cwd=<CWD>>
03:message/user:USER_TWO

View File

@@ -6,16 +6,12 @@ Scenario: Remote manual /compact where remote compact output is compaction-only:
## Remote Compaction Request
00:message/developer:<PERMISSIONS_INSTRUCTIONS>
01:message/user[2]:
[01] <AGENTS_MD>
[02] <ENVIRONMENT_CONTEXT:cwd=<CWD>>
01:message/user:<ENVIRONMENT_CONTEXT:cwd=<CWD>>
02:message/user:hello remote compact
03:message/assistant:FIRST_REMOTE_REPLY
## Remote Post-Compaction History Layout
00:compaction:encrypted=true
01:message/developer:<PERMISSIONS_INSTRUCTIONS>
02:message/user[2]:
[01] <AGENTS_MD>
[02] <ENVIRONMENT_CONTEXT:cwd=<CWD>>
02:message/user:<ENVIRONMENT_CONTEXT:cwd=<CWD>>
03:message/user:after compact

View File

@@ -1,12 +1,10 @@
---
source: core/tests/suite/compact_remote.rs
expression: "format_labeled_requests_snapshot(\"Remote manual /compact with no prior user turn skips the remote compact request; the follow-up turn carries canonical context and new user message.\",\n&[(\"Remote Post-Compaction History Layout\", &follow_up_request),])"
expression: "format_labeled_requests_snapshot(\"Remote manual /compact with no prior user turn skips the remote compact request; the follow-up turn carries canonical context and new user message.\",\n&[(\"Remote Post-Compaction History Layout\", &follow_up_request)])"
---
Scenario: Remote manual /compact with no prior user turn skips the remote compact request; the follow-up turn carries canonical context and new user message.
## Remote Post-Compaction History Layout
00:message/developer:<PERMISSIONS_INSTRUCTIONS>
01:message/user[2]:
[01] <AGENTS_MD>
[02] <ENVIRONMENT_CONTEXT:cwd=<CWD>>
01:message/user:<ENVIRONMENT_CONTEXT:cwd=<CWD>>
02:message/user:USER_ONE

View File

@@ -1,6 +1,5 @@
---
source: core/tests/suite/compact_remote.rs
assertion_line: 1843
expression: "format_labeled_requests_snapshot(\"Remote mid-turn continuation compaction after realtime was closed before the turn: the initial second-turn request emits realtime-end instructions, but the continuation request does not restate them after compaction because the current turn already established the inactive baseline.\",\n&[(\"Second Turn Initial Request\", second_turn_request),\n(\"Remote Compaction Request\", &compact_request),\n(\"Remote Post-Compaction History Layout\", post_compact_request),])"
---
Scenario: Remote mid-turn continuation compaction after realtime was closed before the turn: the initial second-turn request emits realtime-end instructions, but the continuation request does not restate them after compaction because the current turn already established the inactive baseline.
@@ -9,9 +8,7 @@ Scenario: Remote mid-turn continuation compaction after realtime was closed befo
00:message/developer[2]:
[01] <PERMISSIONS_INSTRUCTIONS>
[02] <realtime_conversation>\nRealtime conversation started.\n\nYou a...
01:message/user[2]:
[01] <AGENTS_MD>
[02] <ENVIRONMENT_CONTEXT:cwd=<CWD>>
01:message/user:<ENVIRONMENT_CONTEXT:cwd=<CWD>>
02:message/user:SETUP_USER
03:message/assistant:REMOTE_SETUP_REPLY
04:message/developer:<realtime_conversation>\nRealtime conversation ended.\n\nSubsequ...
@@ -21,9 +18,7 @@ Scenario: Remote mid-turn continuation compaction after realtime was closed befo
00:message/developer[2]:
[01] <PERMISSIONS_INSTRUCTIONS>
[02] <realtime_conversation>\nRealtime conversation started.\n\nYou a...
01:message/user[2]:
[01] <AGENTS_MD>
[02] <ENVIRONMENT_CONTEXT:cwd=<CWD>>
01:message/user:<ENVIRONMENT_CONTEXT:cwd=<CWD>>
02:message/user:SETUP_USER
03:message/assistant:REMOTE_SETUP_REPLY
04:message/developer:<realtime_conversation>\nRealtime conversation ended.\n\nSubsequ...
@@ -33,7 +28,5 @@ Scenario: Remote mid-turn continuation compaction after realtime was closed befo
## Remote Post-Compaction History Layout
00:message/developer:<PERMISSIONS_INSTRUCTIONS>
01:message/user[2]:
[01] <AGENTS_MD>
[02] <ENVIRONMENT_CONTEXT:cwd=<CWD>>
01:message/user:<ENVIRONMENT_CONTEXT:cwd=<CWD>>
02:compaction:encrypted=true

View File

@@ -12,7 +12,5 @@ Scenario: After a prior manual /compact produced an older remote compaction item
00:message/user:USER_ONE
01:compaction:encrypted=true
02:message/developer:<PERMISSIONS_INSTRUCTIONS>
03:message/user[2]:
[01] <AGENTS_MD>
[02] <ENVIRONMENT_CONTEXT:cwd=<CWD>>
03:message/user:<ENVIRONMENT_CONTEXT:cwd=<CWD>>
04:message/user:USER_TWO

View File

@@ -6,17 +6,13 @@ Scenario: Remote mid-turn continuation compaction after tool output: compact req
## Remote Compaction Request
00:message/developer:<PERMISSIONS_INSTRUCTIONS>
01:message/user[2]:
[01] <AGENTS_MD>
[02] <ENVIRONMENT_CONTEXT:cwd=<CWD>>
01:message/user:<ENVIRONMENT_CONTEXT:cwd=<CWD>>
02:message/user:USER_ONE
03:function_call/test_tool
04:function_call_output:unsupported call: test_tool
## Remote Post-Compaction History Layout
00:message/developer:<PERMISSIONS_INSTRUCTIONS>
01:message/user[2]:
[01] <AGENTS_MD>
[02] <ENVIRONMENT_CONTEXT:cwd=<CWD>>
01:message/user:<ENVIRONMENT_CONTEXT:cwd=<CWD>>
02:message/user:USER_ONE
03:compaction:encrypted=true

View File

@@ -6,16 +6,12 @@ Scenario: Remote mid-turn compaction where compact output has only a compaction
## Remote Compaction Request
00:message/developer:<PERMISSIONS_INSTRUCTIONS>
01:message/user[2]:
[01] <AGENTS_MD>
[02] <ENVIRONMENT_CONTEXT:cwd=<CWD>>
01:message/user:<ENVIRONMENT_CONTEXT:cwd=<CWD>>
02:message/user:USER_ONE
03:function_call/test_tool
04:function_call_output:unsupported call: test_tool
## Remote Post-Compaction History Layout
00:message/developer:<PERMISSIONS_INSTRUCTIONS>
01:message/user[2]:
[01] <AGENTS_MD>
[02] <ENVIRONMENT_CONTEXT:cwd=<CWD>>
01:message/user:<ENVIRONMENT_CONTEXT:cwd=<CWD>>
02:compaction:encrypted=true

View File

@@ -6,8 +6,6 @@ Scenario: Remote pre-turn auto-compaction context-window failure: compaction req
## Remote Compaction Request (Incoming User Excluded)
00:message/developer:<PERMISSIONS_INSTRUCTIONS>
01:message/user[2]:
[01] <AGENTS_MD>
[02] <ENVIRONMENT_CONTEXT:cwd=<CWD>>
01:message/user:<ENVIRONMENT_CONTEXT:cwd=<CWD>>
02:message/user:USER_ONE
03:message/assistant:REMOTE_FIRST_REPLY

View File

@@ -6,8 +6,6 @@ Scenario: Remote pre-turn auto-compaction parse failure: compaction request excl
## Remote Compaction Request (Incoming User Excluded)
00:message/developer:<PERMISSIONS_INSTRUCTIONS>
01:message/user[2]:
[01] <AGENTS_MD>
[02] <ENVIRONMENT_CONTEXT:cwd=<CWD>>
01:message/user:<ENVIRONMENT_CONTEXT:cwd=<CWD>>
02:message/user:turn that exceeds token threshold
03:message/assistant:initial turn complete

View File

@@ -6,9 +6,7 @@ Scenario: Remote pre-turn auto-compaction with a context override emits the cont
## Remote Compaction Request
00:message/developer:<PERMISSIONS_INSTRUCTIONS>
01:message/user[2]:
[01] <AGENTS_MD>
[02] <ENVIRONMENT_CONTEXT:cwd=<CWD>>
01:message/user:<ENVIRONMENT_CONTEXT:cwd=<CWD>>
02:message/user:USER_ONE
03:message/assistant:REMOTE_FIRST_REPLY
04:message/user:USER_TWO
@@ -19,7 +17,5 @@ Scenario: Remote pre-turn auto-compaction with a context override emits the cont
01:message/user:USER_TWO
02:compaction:encrypted=true
03:message/developer:<PERMISSIONS_INSTRUCTIONS>
04:message/user[2]:
[01] <AGENTS_MD>
[02] <ENVIRONMENT_CONTEXT:cwd=PRETURN_CONTEXT_DIFF_CWD>
04:message/user:<ENVIRONMENT_CONTEXT:cwd=PRETURN_CONTEXT_DIFF_CWD>
05:message/user:USER_THREE

View File

@@ -1,6 +1,5 @@
---
source: core/tests/suite/compact_remote.rs
assertion_line: 1656
expression: "format_labeled_requests_snapshot(\"Remote pre-turn auto-compaction after realtime was closed between turns: the follow-up request emits realtime-end instructions from previous-turn settings even though compaction cleared the reference baseline.\",\n&[(\"Remote Compaction Request\", &compact_request),\n(\"Remote Post-Compaction History Layout\", post_compact_request),])"
---
Scenario: Remote pre-turn auto-compaction after realtime was closed between turns: the follow-up request emits realtime-end instructions from previous-turn settings even though compaction cleared the reference baseline.
@@ -9,9 +8,7 @@ Scenario: Remote pre-turn auto-compaction after realtime was closed between turn
00:message/developer[2]:
[01] <PERMISSIONS_INSTRUCTIONS>
[02] <realtime_conversation>\nRealtime conversation started.\n\nYou a...
01:message/user[2]:
[01] <AGENTS_MD>
[02] <ENVIRONMENT_CONTEXT:cwd=<CWD>>
01:message/user:<ENVIRONMENT_CONTEXT:cwd=<CWD>>
02:message/user:USER_ONE
03:message/assistant:REMOTE_FIRST_REPLY
@@ -20,7 +17,5 @@ Scenario: Remote pre-turn auto-compaction after realtime was closed between turn
01:message/developer[2]:
[01] <PERMISSIONS_INSTRUCTIONS>
[02] <realtime_conversation>\nRealtime conversation ended.\n\nSubsequ...
02:message/user[2]:
[01] <AGENTS_MD>
[02] <ENVIRONMENT_CONTEXT:cwd=<CWD>>
02:message/user:<ENVIRONMENT_CONTEXT:cwd=<CWD>>
03:message/user:USER_TWO

View File

@@ -1,6 +1,5 @@
---
source: core/tests/suite/compact_remote.rs
assertion_line: 1521
expression: "format_labeled_requests_snapshot(\"Remote pre-turn auto-compaction while realtime remains active: compaction clears the reference baseline, so the follow-up request restates realtime-start instructions.\",\n&[(\"Remote Compaction Request\", &compact_request),\n(\"Remote Post-Compaction History Layout\", post_compact_request),])"
---
Scenario: Remote pre-turn auto-compaction while realtime remains active: compaction clears the reference baseline, so the follow-up request restates realtime-start instructions.
@@ -9,9 +8,7 @@ Scenario: Remote pre-turn auto-compaction while realtime remains active: compact
00:message/developer[2]:
[01] <PERMISSIONS_INSTRUCTIONS>
[02] <realtime_conversation>\nRealtime conversation started.\n\nYou a...
01:message/user[2]:
[01] <AGENTS_MD>
[02] <ENVIRONMENT_CONTEXT:cwd=<CWD>>
01:message/user:<ENVIRONMENT_CONTEXT:cwd=<CWD>>
02:message/user:USER_ONE
03:message/assistant:REMOTE_FIRST_REPLY
@@ -20,7 +17,5 @@ Scenario: Remote pre-turn auto-compaction while realtime remains active: compact
01:message/developer[2]:
[01] <PERMISSIONS_INSTRUCTIONS>
[02] <realtime_conversation>\nRealtime conversation started.\n\nYou a...
02:message/user[2]:
[01] <AGENTS_MD>
[02] <ENVIRONMENT_CONTEXT:cwd=<CWD>>
02:message/user:<ENVIRONMENT_CONTEXT:cwd=<CWD>>
03:message/user:USER_TWO

View File

@@ -1,22 +1,17 @@
---
source: core/tests/suite/compact_remote.rs
assertion_line: 1514
expression: "format_labeled_requests_snapshot(\"Remote pre-turn compaction during model switch currently excludes incoming user input, strips incoming <model_switch> from the compact request payload, and restores it in the post-compaction follow-up request.\",\n&[(\"Initial Request (Previous Model)\", &initial_turn_request),\n(\"Remote Compaction Request\", &compact_request),\n(\"Remote Post-Compaction History Layout\", &post_compact_turn_request),])"
---
Scenario: Remote pre-turn compaction during model switch currently excludes incoming user input, strips incoming <model_switch> from the compact request payload, and restores it in the post-compaction follow-up request.
## Initial Request (Previous Model)
00:message/developer:<PERMISSIONS_INSTRUCTIONS>
01:message/user[2]:
[01] <AGENTS_MD>
[02] <ENVIRONMENT_CONTEXT:cwd=<CWD>>
01:message/user:<ENVIRONMENT_CONTEXT:cwd=<CWD>>
02:message/user:BEFORE_SWITCH_USER
## Remote Compaction Request
00:message/developer:<PERMISSIONS_INSTRUCTIONS>
01:message/user[2]:
[01] <AGENTS_MD>
[02] <ENVIRONMENT_CONTEXT:cwd=<CWD>>
01:message/user:<ENVIRONMENT_CONTEXT:cwd=<CWD>>
02:message/user:BEFORE_SWITCH_USER
03:message/assistant:BEFORE_SWITCH_REPLY
@@ -27,7 +22,5 @@ Scenario: Remote pre-turn compaction during model switch currently excludes inco
[01] <model_switch>\nThe user was previously using a different model....
[02] <PERMISSIONS_INSTRUCTIONS>
[03] <personality_spec> The user has requested a new communication st...
03:message/user[2]:
[01] <AGENTS_MD>
[02] <ENVIRONMENT_CONTEXT:cwd=<CWD>>
03:message/user:<ENVIRONMENT_CONTEXT:cwd=<CWD>>
04:message/user:AFTER_SWITCH_USER

View File

@@ -1,14 +1,12 @@
---
source: core/tests/suite/compact_resume_fork.rs
expression: "context_snapshot::format_labeled_requests_snapshot(\"rollback past compaction replay after rollback\",\n&[(\"compaction request\", &requests[1]), (\"before rollback\", &requests[2]),\n(\"after rollback\", &requests[3]),],\n&ContextSnapshotOptions::default().render_mode(ContextSnapshotRenderMode::KindWithTextPrefix\n{ max_chars: 64 }),)"
expression: "context_snapshot::format_labeled_requests_snapshot(\"rollback past compaction replay after rollback\",\n&[(\"compaction request\", &requests[1]), (\"before rollback\", &requests[2]),\n(\"after rollback\", &requests[3]),],\n&ContextSnapshotOptions::default().strip_capability_instructions().render_mode(ContextSnapshotRenderMode::KindWithTextPrefix\n{ max_chars: 64 }),)"
---
Scenario: rollback past compaction replay after rollback
## compaction request
00:message/developer:<PERMISSIONS_INSTRUCTIONS>
01:message/user[2]:
[01] <AGENTS_MD>
[02] <ENVIRONMENT_CONTEXT:cwd=<CWD>>
01:message/user:<ENVIRONMENT_CONTEXT:cwd=<CWD>>
02:message/user:hello world
03:message/assistant:FIRST_REPLY
04:message/user:<SUMMARIZATION_PROMPT>
@@ -17,20 +15,14 @@ Scenario: rollback past compaction replay after rollback
00:message/user:hello world
01:message/user:<COMPACTION_SUMMARY>\nSUMMARY_ONLY_CONTEXT
02:message/developer:<PERMISSIONS_INSTRUCTIONS>
03:message/user[2]:
[01] <AGENTS_MD>
[02] <ENVIRONMENT_CONTEXT:cwd=<CWD>>
03:message/user:<ENVIRONMENT_CONTEXT:cwd=<CWD>>
04:message/user:EDITED_AFTER_COMPACT
## after rollback
00:message/user:hello world
01:message/user:<COMPACTION_SUMMARY>\nSUMMARY_ONLY_CONTEXT
02:message/developer:<PERMISSIONS_INSTRUCTIONS>
03:message/user[2]:
[01] <AGENTS_MD>
[02] <ENVIRONMENT_CONTEXT:cwd=<CWD>>
03:message/user:<ENVIRONMENT_CONTEXT:cwd=<CWD>>
04:message/developer:<PERMISSIONS_INSTRUCTIONS>
05:message/user[2]:
[01] <AGENTS_MD>
[02] <ENVIRONMENT_CONTEXT:cwd=<CWD>>
05:message/user:<ENVIRONMENT_CONTEXT:cwd=<CWD>>
06:message/user:AFTER_ROLLBACK

View File

@@ -1,22 +1,21 @@
---
source: core/tests/suite/model_visible_layout.rs
assertion_line: 288
expression: "format_labeled_requests_snapshot(\"Second turn changes cwd to a directory with different AGENTS.md; current behavior does not emit refreshed AGENTS instructions.\",\n&[(\"First Request (agents_one)\", &requests[0]),\n(\"Second Request (agents_two cwd)\", &requests[1]),])"
---
Scenario: Second turn changes cwd to a directory with different AGENTS.md; current behavior does not emit refreshed AGENTS instructions.
## First Request (agents_one)
00:message/developer:<PERMISSIONS_INSTRUCTIONS>
01:message/user[2]:
[01] <AGENTS_MD>
[02] <ENVIRONMENT_CONTEXT:cwd=<CWD>>
00:message/developer[2]:
[01] <PERMISSIONS_INSTRUCTIONS>
[02] <SKILLS_INSTRUCTIONS>
01:message/user:<ENVIRONMENT_CONTEXT:cwd=<CWD>>
02:message/user:first turn in agents_one
## Second Request (agents_two cwd)
00:message/developer:<PERMISSIONS_INSTRUCTIONS>
01:message/user[2]:
[01] <AGENTS_MD>
[02] <ENVIRONMENT_CONTEXT:cwd=<CWD>>
00:message/developer[2]:
[01] <PERMISSIONS_INSTRUCTIONS>
[02] <SKILLS_INSTRUCTIONS>
01:message/user:<ENVIRONMENT_CONTEXT:cwd=<CWD>>
02:message/user:first turn in agents_one
03:message/assistant:turn one complete
04:message/user:<ENVIRONMENT_CONTEXT:cwd=<CWD>>

View File

@@ -5,17 +5,17 @@ expression: "format_labeled_requests_snapshot(\"First post-resume turn where pre
Scenario: First post-resume turn where pre-turn override sets model to rollout model; no model-switch update should appear.
## Last Request Before Resume
00:message/developer:<PERMISSIONS_INSTRUCTIONS>
01:message/user[2]:
[01] <AGENTS_MD>
[02] <ENVIRONMENT_CONTEXT:cwd=<CWD>>
00:message/developer[2]:
[01] <PERMISSIONS_INSTRUCTIONS>
[02] <SKILLS_INSTRUCTIONS>
01:message/user:<ENVIRONMENT_CONTEXT:cwd=<CWD>>
02:message/user:seed resume history
## First Request After Resume + Override
00:message/developer:<PERMISSIONS_INSTRUCTIONS>
01:message/user[2]:
[01] <AGENTS_MD>
[02] <ENVIRONMENT_CONTEXT:cwd=<CWD>>
00:message/developer[2]:
[01] <PERMISSIONS_INSTRUCTIONS>
[02] <SKILLS_INSTRUCTIONS>
01:message/user:<ENVIRONMENT_CONTEXT:cwd=<CWD>>
02:message/user:seed resume history
03:message/assistant:recorded before resume
04:message/user:<ENVIRONMENT_CONTEXT:cwd=PRETURN_CONTEXT_DIFF_CWD>

View File

@@ -5,17 +5,17 @@ expression: "format_labeled_requests_snapshot(\"First post-resume turn where res
Scenario: First post-resume turn where resumed config model differs from rollout and personality changes.
## Last Request Before Resume
00:message/developer:<PERMISSIONS_INSTRUCTIONS>
01:message/user[2]:
[01] <AGENTS_MD>
[02] <ENVIRONMENT_CONTEXT:cwd=<CWD>>
00:message/developer[2]:
[01] <PERMISSIONS_INSTRUCTIONS>
[02] <SKILLS_INSTRUCTIONS>
01:message/user:<ENVIRONMENT_CONTEXT:cwd=<CWD>>
02:message/user:seed resume history
## First Request After Resume
00:message/developer:<PERMISSIONS_INSTRUCTIONS>
01:message/user[2]:
[01] <AGENTS_MD>
[02] <ENVIRONMENT_CONTEXT:cwd=<CWD>>
00:message/developer[2]:
[01] <PERMISSIONS_INSTRUCTIONS>
[02] <SKILLS_INSTRUCTIONS>
01:message/user:<ENVIRONMENT_CONTEXT:cwd=<CWD>>
02:message/user:seed resume history
03:message/assistant:recorded before resume
04:message/developer[2]:

View File

@@ -1,22 +1,21 @@
---
source: core/tests/suite/model_visible_layout.rs
assertion_line: 177
expression: "format_labeled_requests_snapshot(\"Second turn changes cwd, approval policy, and personality while keeping model constant.\",\n&[(\"First Request (Baseline)\", &requests[0]),\n(\"Second Request (Turn Overrides)\", &requests[1]),])"
---
Scenario: Second turn changes cwd, approval policy, and personality while keeping model constant.
## First Request (Baseline)
00:message/developer:<PERMISSIONS_INSTRUCTIONS>
01:message/user[2]:
[01] <AGENTS_MD>
[02] <ENVIRONMENT_CONTEXT:cwd=<CWD>>
00:message/developer[2]:
[01] <PERMISSIONS_INSTRUCTIONS>
[02] <SKILLS_INSTRUCTIONS>
01:message/user:<ENVIRONMENT_CONTEXT:cwd=<CWD>>
02:message/user:first turn
## Second Request (Turn Overrides)
00:message/developer:<PERMISSIONS_INSTRUCTIONS>
01:message/user[2]:
[01] <AGENTS_MD>
[02] <ENVIRONMENT_CONTEXT:cwd=<CWD>>
00:message/developer[2]:
[01] <PERMISSIONS_INSTRUCTIONS>
[02] <SKILLS_INSTRUCTIONS>
01:message/user:<ENVIRONMENT_CONTEXT:cwd=<CWD>>
02:message/user:first turn
03:message/assistant:turn one complete
04:message/developer[2]: