Support SYSTEM skills. (#8220)

1. Remove PUBLIC skills and introduce SYSTEM skills embedded in the binary and installed into $CODEX_HOME/skills/.system at startup. 2. Skills are now always enabled (feature flag removed). 3. Update skills/list to accept forceReload and plumb it through (not used by clients yet).
2026-04-30 17:36:40 +00:00 · 2025-12-17 18:48:28 -08:00
parent 6f102e18c4
commit da3869eeb6
32 changed files with 1965 additions and 723 deletions
--- a/codex-rs/core/tests/suite/client.rs
+++ b/codex-rs/core/tests/suite/client.rs
@@ -15,7 +15,6 @@ use codex_core::WireApi;
 use codex_core::auth::AuthCredentialsStoreMode;
 use codex_core::built_in_model_providers;
 use codex_core::error::CodexErr;
-use codex_core::features::Feature;
 use codex_core::openai_models::models_manager::ModelsManager;
 use codex_core::protocol::EventMsg;
 use codex_core::protocol::Op;
@@ -651,7 +650,7 @@ async fn includes_user_instructions_message_in_request() {
 }

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-async fn skills_append_to_instructions_when_feature_enabled() {
+async fn skills_append_to_instructions() {
    skip_if_no_network!();
    let server = MockServer::start().await;

@@ -673,7 +672,6 @@ async fn skills_append_to_instructions_when_feature_enabled() {

    let mut config = load_default_config_for_test(&codex_home);
    config.model_provider = model_provider;
-    config.features.enable(Feature::Skills);
    config.cwd = codex_home.path().to_path_buf();

    let conversation_manager = ConversationManager::with_models_provider_and_home(
--- a/codex-rs/core/tests/suite/compact.rs
+++ b/codex-rs/core/tests/suite/compact.rs
@@ -589,7 +589,36 @@ async fn multiple_auto_compact_per_task_runs_after_token_limit_hit() {
        .body_json::<serde_json::Value>()
        .unwrap();
    let input = body.get("input").and_then(|v| v.as_array()).unwrap();
-    let environment_message = input[0]["content"][0]["text"].as_str().unwrap();
+
+    fn normalize_inputs(values: &[serde_json::Value]) -> Vec<serde_json::Value> {
+        values
+            .iter()
+            .filter(|value| {
+                if value
+                    .get("type")
+                    .and_then(|ty| ty.as_str())
+                    .is_some_and(|ty| ty == "function_call_output")
+                {
+                    return false;
+                }
+
+                let text = value
+                    .get("content")
+                    .and_then(|content| content.as_array())
+                    .and_then(|content| content.first())
+                    .and_then(|item| item.get("text"))
+                    .and_then(|text| text.as_str());
+
+                // Ignore the cached UI prefix (project docs + skills) since it is not relevant to
+                // compaction behavior and can change as bundled skills evolve.
+                !text.is_some_and(|text| text.starts_with("# AGENTS.md instructions for "))
+            })
+            .cloned()
+            .collect()
+    }
+
+    let initial_input = normalize_inputs(input);
+    let environment_message = initial_input[0]["content"][0]["text"].as_str().unwrap();

    // test 1: after compaction, we should have one environment message, one user message, and one user message with summary prefix
    let compaction_indices = [2, 4, 6];
@@ -603,6 +632,7 @@ async fn multiple_auto_compact_per_task_runs_after_token_limit_hit() {
            .body_json::<serde_json::Value>()
            .unwrap();
        let input = body.get("input").and_then(|v| v.as_array()).unwrap();
+        let input = normalize_inputs(input);
        assert_eq!(input.len(), 3);
        let environment_message = input[0]["content"][0]["text"].as_str().unwrap();
        let user_message_received = input[1]["content"][0]["text"].as_str().unwrap();
@@ -962,20 +992,6 @@ async fn multiple_auto_compact_per_task_runs_after_token_limit_hit() {
    ]
    ]);

-    // ignore local shell calls output because it differs from OS to another and it's out of the scope of this test.
-    fn normalize_inputs(values: &[serde_json::Value]) -> Vec<serde_json::Value> {
-        values
-            .iter()
-            .filter(|value| {
-                value
-                    .get("type")
-                    .and_then(|ty| ty.as_str())
-                    .is_none_or(|ty| ty != "function_call_output")
-            })
-            .cloned()
-            .collect()
-    }
-
    for (i, request) in requests_payloads.iter().enumerate() {
        let body = request.body_json::<serde_json::Value>().unwrap();
        let input = body.get("input").and_then(|v| v.as_array()).unwrap();
--- a/codex-rs/core/tests/suite/prompt_caching.rs
+++ b/codex-rs/core/tests/suite/prompt_caching.rs
@@ -251,49 +251,36 @@ async fn prefixes_context_and_instructions_once_and_consistently_across_requests
        .await?;
    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;

+    let body1 = req1.single_request().body_json();
+    let input1 = body1["input"].as_array().expect("input array");
+    assert_eq!(input1.len(), 3, "expected cached prefix + env + user msg");
+
+    let ui_text = input1[0]["content"][0]["text"]
+        .as_str()
+        .expect("ui message text");
+    assert!(
+        ui_text.contains("be consistent and helpful"),
+        "expected user instructions in UI message: {ui_text}"
+    );
+
    let shell = default_user_shell();
    let cwd_str = config.cwd.to_string_lossy();
    let expected_env_text = default_env_context_str(&cwd_str, &shell);
-    let expected_ui_text = format!(
-        "# AGENTS.md instructions for {cwd_str}\n\n<INSTRUCTIONS>\nbe consistent and helpful\n</INSTRUCTIONS>"
-    );
-
-    let expected_env_msg = serde_json::json!({
-        "type": "message",
-        "role": "user",
-        "content": [ { "type": "input_text", "text": expected_env_text } ]
-    });
-    let expected_ui_msg = serde_json::json!({
-        "type": "message",
-        "role": "user",
-        "content": [ { "type": "input_text", "text": expected_ui_text } ]
-    });
-
-    let expected_user_message_1 = serde_json::json!({
-        "type": "message",
-        "role": "user",
-        "content": [ { "type": "input_text", "text": "hello 1" } ]
-    });
-    let body1 = req1.single_request().body_json();
    assert_eq!(
-        body1["input"],
-        serde_json::json!([expected_ui_msg, expected_env_msg, expected_user_message_1])
+        input1[1],
+        text_user_input(expected_env_text),
+        "expected environment context after UI message"
    );
+    assert_eq!(input1[2], text_user_input("hello 1".to_string()));

-    let expected_user_message_2 = serde_json::json!({
-        "type": "message",
-        "role": "user",
-        "content": [ { "type": "input_text", "text": "hello 2" } ]
-    });
    let body2 = req2.single_request().body_json();
-    let expected_body2 = serde_json::json!(
-        [
-            body1["input"].as_array().unwrap().as_slice(),
-            [expected_user_message_2].as_slice(),
-        ]
-        .concat()
+    let input2 = body2["input"].as_array().expect("input array");
+    assert_eq!(
+        &input2[..input1.len()],
+        input1.as_slice(),
+        "expected cached prefix to be reused"
    );
-    assert_eq!(body2["input"], expected_body2);
+    assert_eq!(input2[input1.len()], text_user_input("hello 2".to_string()));

    Ok(())
 }
@@ -439,17 +426,21 @@ async fn override_before_first_turn_emits_environment_context() -> anyhow::Resul
        "expected at least environment context and user message"
    );

-    let env_msg = &input[1];
-    let env_text = env_msg["content"][0]["text"]
-        .as_str()
-        .expect("environment context text");
+    let env_texts: Vec<&str> = input
+        .iter()
+        .filter_map(|msg| {
+            msg["content"]
+                .as_array()
+                .and_then(|content| content.first())
+                .and_then(|item| item["text"].as_str())
+        })
+        .filter(|text| text.starts_with(ENVIRONMENT_CONTEXT_OPEN_TAG))
+        .collect();
    assert!(
-        env_text.starts_with(ENVIRONMENT_CONTEXT_OPEN_TAG),
-        "second entry should be environment context, got: {env_text}"
-    );
-    assert!(
-        env_text.contains("<approval_policy>never</approval_policy>"),
-        "environment context should reflect overridden approval policy: {env_text}"
+        env_texts
+            .iter()
+            .any(|text| text.contains("<approval_policy>never</approval_policy>")),
+        "environment context should reflect overridden approval policy: {env_texts:?}"
    );

    let env_count = input
@@ -474,11 +465,19 @@ async fn override_before_first_turn_emits_environment_context() -> anyhow::Resul
        "environment context should appear exactly twice, found {env_count}"
    );

-    let user_msg = &input[2];
-    let user_text = user_msg["content"][0]["text"]
-        .as_str()
-        .expect("user message text");
-    assert_eq!(user_text, "first message");
+    let user_texts: Vec<&str> = input
+        .iter()
+        .filter_map(|msg| {
+            msg["content"]
+                .as_array()
+                .and_then(|content| content.first())
+                .and_then(|item| item["text"].as_str())
+        })
+        .collect();
+    assert!(
+        user_texts.contains(&"first message"),
+        "expected user message text, got {user_texts:?}"
+    );

    Ok(())
 }
@@ -646,12 +645,10 @@ async fn send_user_turn_with_no_changes_does_not_send_environment_context() -> a
    let body1 = req1.single_request().body_json();
    let body2 = req2.single_request().body_json();

+    let expected_ui_msg = body1["input"][0].clone();
+
    let shell = default_user_shell();
    let default_cwd_lossy = default_cwd.to_string_lossy();
-    let expected_ui_text = format!(
-        "# AGENTS.md instructions for {default_cwd_lossy}\n\n<INSTRUCTIONS>\nbe consistent and helpful\n</INSTRUCTIONS>"
-    );
-    let expected_ui_msg = text_user_input(expected_ui_text);

    let expected_env_msg_1 = text_user_input(default_env_context_str(&default_cwd_lossy, &shell));
    let expected_user_message_1 = text_user_input("hello 1".to_string());
@@ -738,16 +735,9 @@ async fn send_user_turn_with_changes_sends_environment_context() -> anyhow::Resu
    let body1 = req1.single_request().body_json();
    let body2 = req2.single_request().body_json();

+    let expected_ui_msg = body1["input"][0].clone();
+
    let shell = default_user_shell();
-    let expected_ui_text = format!(
-        "# AGENTS.md instructions for {}\n\n<INSTRUCTIONS>\nbe consistent and helpful\n</INSTRUCTIONS>",
-        default_cwd.to_string_lossy()
-    );
-    let expected_ui_msg = serde_json::json!({
-        "type": "message",
-        "role": "user",
-        "content": [ { "type": "input_text", "text": expected_ui_text } ]
-    });
    let expected_env_text_1 = default_env_context_str(&default_cwd.to_string_lossy(), &shell);
    let expected_env_msg_1 = text_user_input(expected_env_text_1);
    let expected_user_message_1 = text_user_input("hello 1".to_string());
--- a/codex-rs/core/tests/suite/review.rs
+++ b/codex-rs/core/tests/suite/review.rs
@@ -553,31 +553,28 @@ async fn review_input_isolated_from_parent_history() {
        .expect("expected POST request to /responses");
    let body = request.body_json::<serde_json::Value>().unwrap();
    let input = body["input"].as_array().expect("input array");
-    assert_eq!(
-        input.len(),
-        2,
-        "expected environment context and review prompt"
+    assert!(
+        input.len() >= 2,
+        "expected at least environment context and review prompt"
    );

-    let env_msg = &input[0];
-    assert_eq!(env_msg["type"].as_str().unwrap(), "message");
-    assert_eq!(env_msg["role"].as_str().unwrap(), "user");
-    let env_text = env_msg["content"][0]["text"].as_str().expect("env text");
-    assert!(
-        env_text.starts_with(ENVIRONMENT_CONTEXT_OPEN_TAG),
-        "environment context must be the first item"
-    );
+    let env_text = input
+        .iter()
+        .filter_map(|msg| msg["content"][0]["text"].as_str())
+        .find(|text| text.starts_with(ENVIRONMENT_CONTEXT_OPEN_TAG))
+        .expect("env text");
    assert!(
        env_text.contains("<cwd>"),
        "environment context should include cwd"
    );

-    let review_msg = &input[1];
-    assert_eq!(review_msg["type"].as_str().unwrap(), "message");
-    assert_eq!(review_msg["role"].as_str().unwrap(), "user");
+    let review_text = input
+        .iter()
+        .filter_map(|msg| msg["content"][0]["text"].as_str())
+        .find(|text| *text == review_prompt)
+        .expect("review prompt text");
    assert_eq!(
-        review_msg["content"][0]["text"].as_str().unwrap(),
-        review_prompt,
+        review_text, review_prompt,
        "user message should only contain the raw review prompt"
    );

--- a/codex-rs/core/tests/suite/shell_snapshot.rs
+++ b/codex-rs/core/tests/suite/shell_snapshot.rs
@@ -311,10 +311,9 @@ async fn shell_command_snapshot_still_intercepts_apply_patch() -> Result<()> {

 #[cfg_attr(target_os = "windows", ignore)]
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-async fn shell_snapshot_deleted_after_shutdown_with_skills_enabled() -> Result<()> {
+async fn shell_snapshot_deleted_after_shutdown_with_skills() -> Result<()> {
    let builder = test_codex().with_config(|config| {
        config.features.enable(Feature::ShellSnapshot);
-        config.features.enable(Feature::Skills);
    });
    let harness = TestCodexHarness::with_builder(builder).await?;
    let home = harness.test().home.clone();
--- a/codex-rs/core/tests/suite/skills.rs
+++ b/codex-rs/core/tests/suite/skills.rs
@@ -2,7 +2,6 @@
 #![allow(clippy::unwrap_used, clippy::expect_used)]

 use anyhow::Result;
-use codex_core::features::Feature;
 use codex_core::protocol::AskForApproval;
 use codex_core::protocol::Op;
 use codex_core::protocol::SandboxPolicy;
@@ -27,33 +26,15 @@ fn write_skill(home: &Path, name: &str, description: &str, body: &str) -> std::p
    path
 }

-fn write_public_skill(
-    home: &Path,
-    name: &str,
-    description: &str,
-    body: &str,
-) -> std::path::PathBuf {
-    let skill_dir = home.join("skills").join(".public").join(name);
-    fs::create_dir_all(&skill_dir).unwrap();
-    let contents = format!("---\nname: {name}\ndescription: {description}\n---\n\n{body}\n");
-    let path = skill_dir.join("SKILL.md");
-    fs::write(&path, contents).unwrap();
-    path
-}
-
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn user_turn_includes_skill_instructions() -> Result<()> {
    skip_if_no_network!(Ok(()));

    let server = start_mock_server().await;
    let skill_body = "skill body";
-    let mut builder = test_codex()
-        .with_config(|cfg| {
-            cfg.features.enable(Feature::Skills);
-        })
-        .with_pre_build_hook(|home| {
-            write_skill(home, "demo", "demo skill", skill_body);
-        });
+    let mut builder = test_codex().with_pre_build_hook(|home| {
+        write_skill(home, "demo", "demo skill", skill_body);
+    });
    let test = builder.build(&server).await?;

    let skill_path = test.codex_home_path().join("skills/demo/SKILL.md");
@@ -117,15 +98,11 @@ async fn skill_load_errors_surface_in_session_configured() -> Result<()> {
    skip_if_no_network!(Ok(()));

    let server = start_mock_server().await;
-    let mut builder = test_codex()
-        .with_config(|cfg| {
-            cfg.features.enable(Feature::Skills);
-        })
-        .with_pre_build_hook(|home| {
-            let skill_dir = home.join("skills").join("broken");
-            fs::create_dir_all(&skill_dir).unwrap();
-            fs::write(skill_dir.join("SKILL.md"), "not yaml").unwrap();
-        });
+    let mut builder = test_codex().with_pre_build_hook(|home| {
+        let skill_dir = home.join("skills").join("broken");
+        fs::create_dir_all(&skill_dir).unwrap();
+        fs::write(skill_dir.join("SKILL.md"), "not yaml").unwrap();
+    });
    let test = builder.build(&server).await?;

    test.codex
@@ -169,19 +146,30 @@ async fn skill_load_errors_surface_in_session_configured() -> Result<()> {
 }

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-async fn list_skills_includes_public_cache_entries() -> Result<()> {
+async fn list_skills_includes_system_cache_entries() -> Result<()> {
    skip_if_no_network!(Ok(()));

    let server = start_mock_server().await;
-    let mut builder = test_codex()
-        .with_config(|cfg| {
-            cfg.features.enable(Feature::Skills);
-        })
-        .with_pre_build_hook(|home| {
-            write_public_skill(home, "public-demo", "public skill", "public body");
-        });
+    let mut builder = test_codex().with_pre_build_hook(|home| {
+        let system_skill_path = home.join("skills/.system/plan/SKILL.md");
+        assert!(
+            !system_skill_path.exists(),
+            "expected embedded system skills not yet installed, but {system_skill_path:?} exists"
+        );
+    });
    let test = builder.build(&server).await?;

+    let system_skill_path = test.codex_home_path().join("skills/.system/plan/SKILL.md");
+    assert!(
+        system_skill_path.exists(),
+        "expected embedded system skills installed to {system_skill_path:?}"
+    );
+    let system_skill_contents = fs::read_to_string(&system_skill_path)?;
+    assert!(
+        system_skill_contents.contains("name: plan"),
+        "expected embedded system skill file, got:\n{system_skill_contents}"
+    );
+
    test.codex
        .submit(Op::ListSkills {
            cwds: Vec::new(),
@@ -205,12 +193,12 @@ async fn list_skills_includes_public_cache_entries() -> Result<()> {

    let skill = skills
        .iter()
-        .find(|skill| skill.name == "public-demo")
-        .expect("expected public skill to be present");
-    assert_eq!(skill.scope, codex_protocol::protocol::SkillScope::Public);
+        .find(|skill| skill.name == "plan")
+        .expect("expected system skill to be present");
+    assert_eq!(skill.scope, codex_protocol::protocol::SkillScope::System);
    let path_str = skill.path.to_string_lossy().replace('\\', "/");
    assert!(
-        path_str.ends_with("/skills/.public/public-demo/SKILL.md"),
+        path_str.ends_with("/skills/.system/plan/SKILL.md"),
        "unexpected skill path: {path_str}"
    );