Support SYSTEM skills. (#8220)

1. Remove PUBLIC skills and introduce SYSTEM skills embedded in the
binary and installed into $CODEX_HOME/skills/.system at startup.
2. Skills are now always enabled (feature flag removed).
3. Update skills/list to accept forceReload and plumb it through (not
used by clients yet).
This commit is contained in:
xl-openai
2025-12-17 18:48:28 -08:00
committed by GitHub
parent 6f102e18c4
commit da3869eeb6
32 changed files with 1965 additions and 723 deletions

View File

@@ -15,7 +15,6 @@ use codex_core::WireApi;
use codex_core::auth::AuthCredentialsStoreMode;
use codex_core::built_in_model_providers;
use codex_core::error::CodexErr;
use codex_core::features::Feature;
use codex_core::openai_models::models_manager::ModelsManager;
use codex_core::protocol::EventMsg;
use codex_core::protocol::Op;
@@ -651,7 +650,7 @@ async fn includes_user_instructions_message_in_request() {
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn skills_append_to_instructions_when_feature_enabled() {
async fn skills_append_to_instructions() {
skip_if_no_network!();
let server = MockServer::start().await;
@@ -673,7 +672,6 @@ async fn skills_append_to_instructions_when_feature_enabled() {
let mut config = load_default_config_for_test(&codex_home);
config.model_provider = model_provider;
config.features.enable(Feature::Skills);
config.cwd = codex_home.path().to_path_buf();
let conversation_manager = ConversationManager::with_models_provider_and_home(

View File

@@ -589,7 +589,36 @@ async fn multiple_auto_compact_per_task_runs_after_token_limit_hit() {
.body_json::<serde_json::Value>()
.unwrap();
let input = body.get("input").and_then(|v| v.as_array()).unwrap();
let environment_message = input[0]["content"][0]["text"].as_str().unwrap();
fn normalize_inputs(values: &[serde_json::Value]) -> Vec<serde_json::Value> {
values
.iter()
.filter(|value| {
if value
.get("type")
.and_then(|ty| ty.as_str())
.is_some_and(|ty| ty == "function_call_output")
{
return false;
}
let text = value
.get("content")
.and_then(|content| content.as_array())
.and_then(|content| content.first())
.and_then(|item| item.get("text"))
.and_then(|text| text.as_str());
// Ignore the cached UI prefix (project docs + skills) since it is not relevant to
// compaction behavior and can change as bundled skills evolve.
!text.is_some_and(|text| text.starts_with("# AGENTS.md instructions for "))
})
.cloned()
.collect()
}
let initial_input = normalize_inputs(input);
let environment_message = initial_input[0]["content"][0]["text"].as_str().unwrap();
// test 1: after compaction, we should have one environment message, one user message, and one user message with summary prefix
let compaction_indices = [2, 4, 6];
@@ -603,6 +632,7 @@ async fn multiple_auto_compact_per_task_runs_after_token_limit_hit() {
.body_json::<serde_json::Value>()
.unwrap();
let input = body.get("input").and_then(|v| v.as_array()).unwrap();
let input = normalize_inputs(input);
assert_eq!(input.len(), 3);
let environment_message = input[0]["content"][0]["text"].as_str().unwrap();
let user_message_received = input[1]["content"][0]["text"].as_str().unwrap();
@@ -962,20 +992,6 @@ async fn multiple_auto_compact_per_task_runs_after_token_limit_hit() {
]
]);
// ignore local shell calls output because it differs from OS to another and it's out of the scope of this test.
fn normalize_inputs(values: &[serde_json::Value]) -> Vec<serde_json::Value> {
values
.iter()
.filter(|value| {
value
.get("type")
.and_then(|ty| ty.as_str())
.is_none_or(|ty| ty != "function_call_output")
})
.cloned()
.collect()
}
for (i, request) in requests_payloads.iter().enumerate() {
let body = request.body_json::<serde_json::Value>().unwrap();
let input = body.get("input").and_then(|v| v.as_array()).unwrap();

View File

@@ -251,49 +251,36 @@ async fn prefixes_context_and_instructions_once_and_consistently_across_requests
.await?;
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
let body1 = req1.single_request().body_json();
let input1 = body1["input"].as_array().expect("input array");
assert_eq!(input1.len(), 3, "expected cached prefix + env + user msg");
let ui_text = input1[0]["content"][0]["text"]
.as_str()
.expect("ui message text");
assert!(
ui_text.contains("be consistent and helpful"),
"expected user instructions in UI message: {ui_text}"
);
let shell = default_user_shell();
let cwd_str = config.cwd.to_string_lossy();
let expected_env_text = default_env_context_str(&cwd_str, &shell);
let expected_ui_text = format!(
"# AGENTS.md instructions for {cwd_str}\n\n<INSTRUCTIONS>\nbe consistent and helpful\n</INSTRUCTIONS>"
);
let expected_env_msg = serde_json::json!({
"type": "message",
"role": "user",
"content": [ { "type": "input_text", "text": expected_env_text } ]
});
let expected_ui_msg = serde_json::json!({
"type": "message",
"role": "user",
"content": [ { "type": "input_text", "text": expected_ui_text } ]
});
let expected_user_message_1 = serde_json::json!({
"type": "message",
"role": "user",
"content": [ { "type": "input_text", "text": "hello 1" } ]
});
let body1 = req1.single_request().body_json();
assert_eq!(
body1["input"],
serde_json::json!([expected_ui_msg, expected_env_msg, expected_user_message_1])
input1[1],
text_user_input(expected_env_text),
"expected environment context after UI message"
);
assert_eq!(input1[2], text_user_input("hello 1".to_string()));
let expected_user_message_2 = serde_json::json!({
"type": "message",
"role": "user",
"content": [ { "type": "input_text", "text": "hello 2" } ]
});
let body2 = req2.single_request().body_json();
let expected_body2 = serde_json::json!(
[
body1["input"].as_array().unwrap().as_slice(),
[expected_user_message_2].as_slice(),
]
.concat()
let input2 = body2["input"].as_array().expect("input array");
assert_eq!(
&input2[..input1.len()],
input1.as_slice(),
"expected cached prefix to be reused"
);
assert_eq!(body2["input"], expected_body2);
assert_eq!(input2[input1.len()], text_user_input("hello 2".to_string()));
Ok(())
}
@@ -439,17 +426,21 @@ async fn override_before_first_turn_emits_environment_context() -> anyhow::Resul
"expected at least environment context and user message"
);
let env_msg = &input[1];
let env_text = env_msg["content"][0]["text"]
.as_str()
.expect("environment context text");
let env_texts: Vec<&str> = input
.iter()
.filter_map(|msg| {
msg["content"]
.as_array()
.and_then(|content| content.first())
.and_then(|item| item["text"].as_str())
})
.filter(|text| text.starts_with(ENVIRONMENT_CONTEXT_OPEN_TAG))
.collect();
assert!(
env_text.starts_with(ENVIRONMENT_CONTEXT_OPEN_TAG),
"second entry should be environment context, got: {env_text}"
);
assert!(
env_text.contains("<approval_policy>never</approval_policy>"),
"environment context should reflect overridden approval policy: {env_text}"
env_texts
.iter()
.any(|text| text.contains("<approval_policy>never</approval_policy>")),
"environment context should reflect overridden approval policy: {env_texts:?}"
);
let env_count = input
@@ -474,11 +465,19 @@ async fn override_before_first_turn_emits_environment_context() -> anyhow::Resul
"environment context should appear exactly twice, found {env_count}"
);
let user_msg = &input[2];
let user_text = user_msg["content"][0]["text"]
.as_str()
.expect("user message text");
assert_eq!(user_text, "first message");
let user_texts: Vec<&str> = input
.iter()
.filter_map(|msg| {
msg["content"]
.as_array()
.and_then(|content| content.first())
.and_then(|item| item["text"].as_str())
})
.collect();
assert!(
user_texts.contains(&"first message"),
"expected user message text, got {user_texts:?}"
);
Ok(())
}
@@ -646,12 +645,10 @@ async fn send_user_turn_with_no_changes_does_not_send_environment_context() -> a
let body1 = req1.single_request().body_json();
let body2 = req2.single_request().body_json();
let expected_ui_msg = body1["input"][0].clone();
let shell = default_user_shell();
let default_cwd_lossy = default_cwd.to_string_lossy();
let expected_ui_text = format!(
"# AGENTS.md instructions for {default_cwd_lossy}\n\n<INSTRUCTIONS>\nbe consistent and helpful\n</INSTRUCTIONS>"
);
let expected_ui_msg = text_user_input(expected_ui_text);
let expected_env_msg_1 = text_user_input(default_env_context_str(&default_cwd_lossy, &shell));
let expected_user_message_1 = text_user_input("hello 1".to_string());
@@ -738,16 +735,9 @@ async fn send_user_turn_with_changes_sends_environment_context() -> anyhow::Resu
let body1 = req1.single_request().body_json();
let body2 = req2.single_request().body_json();
let expected_ui_msg = body1["input"][0].clone();
let shell = default_user_shell();
let expected_ui_text = format!(
"# AGENTS.md instructions for {}\n\n<INSTRUCTIONS>\nbe consistent and helpful\n</INSTRUCTIONS>",
default_cwd.to_string_lossy()
);
let expected_ui_msg = serde_json::json!({
"type": "message",
"role": "user",
"content": [ { "type": "input_text", "text": expected_ui_text } ]
});
let expected_env_text_1 = default_env_context_str(&default_cwd.to_string_lossy(), &shell);
let expected_env_msg_1 = text_user_input(expected_env_text_1);
let expected_user_message_1 = text_user_input("hello 1".to_string());

View File

@@ -553,31 +553,28 @@ async fn review_input_isolated_from_parent_history() {
.expect("expected POST request to /responses");
let body = request.body_json::<serde_json::Value>().unwrap();
let input = body["input"].as_array().expect("input array");
assert_eq!(
input.len(),
2,
"expected environment context and review prompt"
assert!(
input.len() >= 2,
"expected at least environment context and review prompt"
);
let env_msg = &input[0];
assert_eq!(env_msg["type"].as_str().unwrap(), "message");
assert_eq!(env_msg["role"].as_str().unwrap(), "user");
let env_text = env_msg["content"][0]["text"].as_str().expect("env text");
assert!(
env_text.starts_with(ENVIRONMENT_CONTEXT_OPEN_TAG),
"environment context must be the first item"
);
let env_text = input
.iter()
.filter_map(|msg| msg["content"][0]["text"].as_str())
.find(|text| text.starts_with(ENVIRONMENT_CONTEXT_OPEN_TAG))
.expect("env text");
assert!(
env_text.contains("<cwd>"),
"environment context should include cwd"
);
let review_msg = &input[1];
assert_eq!(review_msg["type"].as_str().unwrap(), "message");
assert_eq!(review_msg["role"].as_str().unwrap(), "user");
let review_text = input
.iter()
.filter_map(|msg| msg["content"][0]["text"].as_str())
.find(|text| *text == review_prompt)
.expect("review prompt text");
assert_eq!(
review_msg["content"][0]["text"].as_str().unwrap(),
review_prompt,
review_text, review_prompt,
"user message should only contain the raw review prompt"
);

View File

@@ -311,10 +311,9 @@ async fn shell_command_snapshot_still_intercepts_apply_patch() -> Result<()> {
#[cfg_attr(target_os = "windows", ignore)]
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn shell_snapshot_deleted_after_shutdown_with_skills_enabled() -> Result<()> {
async fn shell_snapshot_deleted_after_shutdown_with_skills() -> Result<()> {
let builder = test_codex().with_config(|config| {
config.features.enable(Feature::ShellSnapshot);
config.features.enable(Feature::Skills);
});
let harness = TestCodexHarness::with_builder(builder).await?;
let home = harness.test().home.clone();

View File

@@ -2,7 +2,6 @@
#![allow(clippy::unwrap_used, clippy::expect_used)]
use anyhow::Result;
use codex_core::features::Feature;
use codex_core::protocol::AskForApproval;
use codex_core::protocol::Op;
use codex_core::protocol::SandboxPolicy;
@@ -27,33 +26,15 @@ fn write_skill(home: &Path, name: &str, description: &str, body: &str) -> std::p
path
}
fn write_public_skill(
home: &Path,
name: &str,
description: &str,
body: &str,
) -> std::path::PathBuf {
let skill_dir = home.join("skills").join(".public").join(name);
fs::create_dir_all(&skill_dir).unwrap();
let contents = format!("---\nname: {name}\ndescription: {description}\n---\n\n{body}\n");
let path = skill_dir.join("SKILL.md");
fs::write(&path, contents).unwrap();
path
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn user_turn_includes_skill_instructions() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let skill_body = "skill body";
let mut builder = test_codex()
.with_config(|cfg| {
cfg.features.enable(Feature::Skills);
})
.with_pre_build_hook(|home| {
write_skill(home, "demo", "demo skill", skill_body);
});
let mut builder = test_codex().with_pre_build_hook(|home| {
write_skill(home, "demo", "demo skill", skill_body);
});
let test = builder.build(&server).await?;
let skill_path = test.codex_home_path().join("skills/demo/SKILL.md");
@@ -117,15 +98,11 @@ async fn skill_load_errors_surface_in_session_configured() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let mut builder = test_codex()
.with_config(|cfg| {
cfg.features.enable(Feature::Skills);
})
.with_pre_build_hook(|home| {
let skill_dir = home.join("skills").join("broken");
fs::create_dir_all(&skill_dir).unwrap();
fs::write(skill_dir.join("SKILL.md"), "not yaml").unwrap();
});
let mut builder = test_codex().with_pre_build_hook(|home| {
let skill_dir = home.join("skills").join("broken");
fs::create_dir_all(&skill_dir).unwrap();
fs::write(skill_dir.join("SKILL.md"), "not yaml").unwrap();
});
let test = builder.build(&server).await?;
test.codex
@@ -169,19 +146,30 @@ async fn skill_load_errors_surface_in_session_configured() -> Result<()> {
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn list_skills_includes_public_cache_entries() -> Result<()> {
async fn list_skills_includes_system_cache_entries() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let mut builder = test_codex()
.with_config(|cfg| {
cfg.features.enable(Feature::Skills);
})
.with_pre_build_hook(|home| {
write_public_skill(home, "public-demo", "public skill", "public body");
});
let mut builder = test_codex().with_pre_build_hook(|home| {
let system_skill_path = home.join("skills/.system/plan/SKILL.md");
assert!(
!system_skill_path.exists(),
"expected embedded system skills not yet installed, but {system_skill_path:?} exists"
);
});
let test = builder.build(&server).await?;
let system_skill_path = test.codex_home_path().join("skills/.system/plan/SKILL.md");
assert!(
system_skill_path.exists(),
"expected embedded system skills installed to {system_skill_path:?}"
);
let system_skill_contents = fs::read_to_string(&system_skill_path)?;
assert!(
system_skill_contents.contains("name: plan"),
"expected embedded system skill file, got:\n{system_skill_contents}"
);
test.codex
.submit(Op::ListSkills {
cwds: Vec::new(),
@@ -205,12 +193,12 @@ async fn list_skills_includes_public_cache_entries() -> Result<()> {
let skill = skills
.iter()
.find(|skill| skill.name == "public-demo")
.expect("expected public skill to be present");
assert_eq!(skill.scope, codex_protocol::protocol::SkillScope::Public);
.find(|skill| skill.name == "plan")
.expect("expected system skill to be present");
assert_eq!(skill.scope, codex_protocol::protocol::SkillScope::System);
let path_str = skill.path.to_string_lossy().replace('\\', "/");
assert!(
path_str.ends_with("/skills/.public/public-demo/SKILL.md"),
path_str.ends_with("/skills/.system/plan/SKILL.md"),
"unexpected skill path: {path_str}"
);