mirror of
https://github.com/openai/codex.git
synced 2026-04-30 17:36:40 +00:00
Support SYSTEM skills. (#8220)
1. Remove PUBLIC skills and introduce SYSTEM skills embedded in the binary and installed into $CODEX_HOME/skills/.system at startup. 2. Skills are now always enabled (feature flag removed). 3. Update skills/list to accept forceReload and plumb it through (not used by clients yet).
This commit is contained in:
@@ -15,7 +15,6 @@ use codex_core::WireApi;
|
||||
use codex_core::auth::AuthCredentialsStoreMode;
|
||||
use codex_core::built_in_model_providers;
|
||||
use codex_core::error::CodexErr;
|
||||
use codex_core::features::Feature;
|
||||
use codex_core::openai_models::models_manager::ModelsManager;
|
||||
use codex_core::protocol::EventMsg;
|
||||
use codex_core::protocol::Op;
|
||||
@@ -651,7 +650,7 @@ async fn includes_user_instructions_message_in_request() {
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn skills_append_to_instructions_when_feature_enabled() {
|
||||
async fn skills_append_to_instructions() {
|
||||
skip_if_no_network!();
|
||||
let server = MockServer::start().await;
|
||||
|
||||
@@ -673,7 +672,6 @@ async fn skills_append_to_instructions_when_feature_enabled() {
|
||||
|
||||
let mut config = load_default_config_for_test(&codex_home);
|
||||
config.model_provider = model_provider;
|
||||
config.features.enable(Feature::Skills);
|
||||
config.cwd = codex_home.path().to_path_buf();
|
||||
|
||||
let conversation_manager = ConversationManager::with_models_provider_and_home(
|
||||
|
||||
@@ -589,7 +589,36 @@ async fn multiple_auto_compact_per_task_runs_after_token_limit_hit() {
|
||||
.body_json::<serde_json::Value>()
|
||||
.unwrap();
|
||||
let input = body.get("input").and_then(|v| v.as_array()).unwrap();
|
||||
let environment_message = input[0]["content"][0]["text"].as_str().unwrap();
|
||||
|
||||
fn normalize_inputs(values: &[serde_json::Value]) -> Vec<serde_json::Value> {
|
||||
values
|
||||
.iter()
|
||||
.filter(|value| {
|
||||
if value
|
||||
.get("type")
|
||||
.and_then(|ty| ty.as_str())
|
||||
.is_some_and(|ty| ty == "function_call_output")
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
let text = value
|
||||
.get("content")
|
||||
.and_then(|content| content.as_array())
|
||||
.and_then(|content| content.first())
|
||||
.and_then(|item| item.get("text"))
|
||||
.and_then(|text| text.as_str());
|
||||
|
||||
// Ignore the cached UI prefix (project docs + skills) since it is not relevant to
|
||||
// compaction behavior and can change as bundled skills evolve.
|
||||
!text.is_some_and(|text| text.starts_with("# AGENTS.md instructions for "))
|
||||
})
|
||||
.cloned()
|
||||
.collect()
|
||||
}
|
||||
|
||||
let initial_input = normalize_inputs(input);
|
||||
let environment_message = initial_input[0]["content"][0]["text"].as_str().unwrap();
|
||||
|
||||
// test 1: after compaction, we should have one environment message, one user message, and one user message with summary prefix
|
||||
let compaction_indices = [2, 4, 6];
|
||||
@@ -603,6 +632,7 @@ async fn multiple_auto_compact_per_task_runs_after_token_limit_hit() {
|
||||
.body_json::<serde_json::Value>()
|
||||
.unwrap();
|
||||
let input = body.get("input").and_then(|v| v.as_array()).unwrap();
|
||||
let input = normalize_inputs(input);
|
||||
assert_eq!(input.len(), 3);
|
||||
let environment_message = input[0]["content"][0]["text"].as_str().unwrap();
|
||||
let user_message_received = input[1]["content"][0]["text"].as_str().unwrap();
|
||||
@@ -962,20 +992,6 @@ async fn multiple_auto_compact_per_task_runs_after_token_limit_hit() {
|
||||
]
|
||||
]);
|
||||
|
||||
// ignore local shell calls output because it differs from OS to another and it's out of the scope of this test.
|
||||
fn normalize_inputs(values: &[serde_json::Value]) -> Vec<serde_json::Value> {
|
||||
values
|
||||
.iter()
|
||||
.filter(|value| {
|
||||
value
|
||||
.get("type")
|
||||
.and_then(|ty| ty.as_str())
|
||||
.is_none_or(|ty| ty != "function_call_output")
|
||||
})
|
||||
.cloned()
|
||||
.collect()
|
||||
}
|
||||
|
||||
for (i, request) in requests_payloads.iter().enumerate() {
|
||||
let body = request.body_json::<serde_json::Value>().unwrap();
|
||||
let input = body.get("input").and_then(|v| v.as_array()).unwrap();
|
||||
|
||||
@@ -251,49 +251,36 @@ async fn prefixes_context_and_instructions_once_and_consistently_across_requests
|
||||
.await?;
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
let body1 = req1.single_request().body_json();
|
||||
let input1 = body1["input"].as_array().expect("input array");
|
||||
assert_eq!(input1.len(), 3, "expected cached prefix + env + user msg");
|
||||
|
||||
let ui_text = input1[0]["content"][0]["text"]
|
||||
.as_str()
|
||||
.expect("ui message text");
|
||||
assert!(
|
||||
ui_text.contains("be consistent and helpful"),
|
||||
"expected user instructions in UI message: {ui_text}"
|
||||
);
|
||||
|
||||
let shell = default_user_shell();
|
||||
let cwd_str = config.cwd.to_string_lossy();
|
||||
let expected_env_text = default_env_context_str(&cwd_str, &shell);
|
||||
let expected_ui_text = format!(
|
||||
"# AGENTS.md instructions for {cwd_str}\n\n<INSTRUCTIONS>\nbe consistent and helpful\n</INSTRUCTIONS>"
|
||||
);
|
||||
|
||||
let expected_env_msg = serde_json::json!({
|
||||
"type": "message",
|
||||
"role": "user",
|
||||
"content": [ { "type": "input_text", "text": expected_env_text } ]
|
||||
});
|
||||
let expected_ui_msg = serde_json::json!({
|
||||
"type": "message",
|
||||
"role": "user",
|
||||
"content": [ { "type": "input_text", "text": expected_ui_text } ]
|
||||
});
|
||||
|
||||
let expected_user_message_1 = serde_json::json!({
|
||||
"type": "message",
|
||||
"role": "user",
|
||||
"content": [ { "type": "input_text", "text": "hello 1" } ]
|
||||
});
|
||||
let body1 = req1.single_request().body_json();
|
||||
assert_eq!(
|
||||
body1["input"],
|
||||
serde_json::json!([expected_ui_msg, expected_env_msg, expected_user_message_1])
|
||||
input1[1],
|
||||
text_user_input(expected_env_text),
|
||||
"expected environment context after UI message"
|
||||
);
|
||||
assert_eq!(input1[2], text_user_input("hello 1".to_string()));
|
||||
|
||||
let expected_user_message_2 = serde_json::json!({
|
||||
"type": "message",
|
||||
"role": "user",
|
||||
"content": [ { "type": "input_text", "text": "hello 2" } ]
|
||||
});
|
||||
let body2 = req2.single_request().body_json();
|
||||
let expected_body2 = serde_json::json!(
|
||||
[
|
||||
body1["input"].as_array().unwrap().as_slice(),
|
||||
[expected_user_message_2].as_slice(),
|
||||
]
|
||||
.concat()
|
||||
let input2 = body2["input"].as_array().expect("input array");
|
||||
assert_eq!(
|
||||
&input2[..input1.len()],
|
||||
input1.as_slice(),
|
||||
"expected cached prefix to be reused"
|
||||
);
|
||||
assert_eq!(body2["input"], expected_body2);
|
||||
assert_eq!(input2[input1.len()], text_user_input("hello 2".to_string()));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -439,17 +426,21 @@ async fn override_before_first_turn_emits_environment_context() -> anyhow::Resul
|
||||
"expected at least environment context and user message"
|
||||
);
|
||||
|
||||
let env_msg = &input[1];
|
||||
let env_text = env_msg["content"][0]["text"]
|
||||
.as_str()
|
||||
.expect("environment context text");
|
||||
let env_texts: Vec<&str> = input
|
||||
.iter()
|
||||
.filter_map(|msg| {
|
||||
msg["content"]
|
||||
.as_array()
|
||||
.and_then(|content| content.first())
|
||||
.and_then(|item| item["text"].as_str())
|
||||
})
|
||||
.filter(|text| text.starts_with(ENVIRONMENT_CONTEXT_OPEN_TAG))
|
||||
.collect();
|
||||
assert!(
|
||||
env_text.starts_with(ENVIRONMENT_CONTEXT_OPEN_TAG),
|
||||
"second entry should be environment context, got: {env_text}"
|
||||
);
|
||||
assert!(
|
||||
env_text.contains("<approval_policy>never</approval_policy>"),
|
||||
"environment context should reflect overridden approval policy: {env_text}"
|
||||
env_texts
|
||||
.iter()
|
||||
.any(|text| text.contains("<approval_policy>never</approval_policy>")),
|
||||
"environment context should reflect overridden approval policy: {env_texts:?}"
|
||||
);
|
||||
|
||||
let env_count = input
|
||||
@@ -474,11 +465,19 @@ async fn override_before_first_turn_emits_environment_context() -> anyhow::Resul
|
||||
"environment context should appear exactly twice, found {env_count}"
|
||||
);
|
||||
|
||||
let user_msg = &input[2];
|
||||
let user_text = user_msg["content"][0]["text"]
|
||||
.as_str()
|
||||
.expect("user message text");
|
||||
assert_eq!(user_text, "first message");
|
||||
let user_texts: Vec<&str> = input
|
||||
.iter()
|
||||
.filter_map(|msg| {
|
||||
msg["content"]
|
||||
.as_array()
|
||||
.and_then(|content| content.first())
|
||||
.and_then(|item| item["text"].as_str())
|
||||
})
|
||||
.collect();
|
||||
assert!(
|
||||
user_texts.contains(&"first message"),
|
||||
"expected user message text, got {user_texts:?}"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -646,12 +645,10 @@ async fn send_user_turn_with_no_changes_does_not_send_environment_context() -> a
|
||||
let body1 = req1.single_request().body_json();
|
||||
let body2 = req2.single_request().body_json();
|
||||
|
||||
let expected_ui_msg = body1["input"][0].clone();
|
||||
|
||||
let shell = default_user_shell();
|
||||
let default_cwd_lossy = default_cwd.to_string_lossy();
|
||||
let expected_ui_text = format!(
|
||||
"# AGENTS.md instructions for {default_cwd_lossy}\n\n<INSTRUCTIONS>\nbe consistent and helpful\n</INSTRUCTIONS>"
|
||||
);
|
||||
let expected_ui_msg = text_user_input(expected_ui_text);
|
||||
|
||||
let expected_env_msg_1 = text_user_input(default_env_context_str(&default_cwd_lossy, &shell));
|
||||
let expected_user_message_1 = text_user_input("hello 1".to_string());
|
||||
@@ -738,16 +735,9 @@ async fn send_user_turn_with_changes_sends_environment_context() -> anyhow::Resu
|
||||
let body1 = req1.single_request().body_json();
|
||||
let body2 = req2.single_request().body_json();
|
||||
|
||||
let expected_ui_msg = body1["input"][0].clone();
|
||||
|
||||
let shell = default_user_shell();
|
||||
let expected_ui_text = format!(
|
||||
"# AGENTS.md instructions for {}\n\n<INSTRUCTIONS>\nbe consistent and helpful\n</INSTRUCTIONS>",
|
||||
default_cwd.to_string_lossy()
|
||||
);
|
||||
let expected_ui_msg = serde_json::json!({
|
||||
"type": "message",
|
||||
"role": "user",
|
||||
"content": [ { "type": "input_text", "text": expected_ui_text } ]
|
||||
});
|
||||
let expected_env_text_1 = default_env_context_str(&default_cwd.to_string_lossy(), &shell);
|
||||
let expected_env_msg_1 = text_user_input(expected_env_text_1);
|
||||
let expected_user_message_1 = text_user_input("hello 1".to_string());
|
||||
|
||||
@@ -553,31 +553,28 @@ async fn review_input_isolated_from_parent_history() {
|
||||
.expect("expected POST request to /responses");
|
||||
let body = request.body_json::<serde_json::Value>().unwrap();
|
||||
let input = body["input"].as_array().expect("input array");
|
||||
assert_eq!(
|
||||
input.len(),
|
||||
2,
|
||||
"expected environment context and review prompt"
|
||||
assert!(
|
||||
input.len() >= 2,
|
||||
"expected at least environment context and review prompt"
|
||||
);
|
||||
|
||||
let env_msg = &input[0];
|
||||
assert_eq!(env_msg["type"].as_str().unwrap(), "message");
|
||||
assert_eq!(env_msg["role"].as_str().unwrap(), "user");
|
||||
let env_text = env_msg["content"][0]["text"].as_str().expect("env text");
|
||||
assert!(
|
||||
env_text.starts_with(ENVIRONMENT_CONTEXT_OPEN_TAG),
|
||||
"environment context must be the first item"
|
||||
);
|
||||
let env_text = input
|
||||
.iter()
|
||||
.filter_map(|msg| msg["content"][0]["text"].as_str())
|
||||
.find(|text| text.starts_with(ENVIRONMENT_CONTEXT_OPEN_TAG))
|
||||
.expect("env text");
|
||||
assert!(
|
||||
env_text.contains("<cwd>"),
|
||||
"environment context should include cwd"
|
||||
);
|
||||
|
||||
let review_msg = &input[1];
|
||||
assert_eq!(review_msg["type"].as_str().unwrap(), "message");
|
||||
assert_eq!(review_msg["role"].as_str().unwrap(), "user");
|
||||
let review_text = input
|
||||
.iter()
|
||||
.filter_map(|msg| msg["content"][0]["text"].as_str())
|
||||
.find(|text| *text == review_prompt)
|
||||
.expect("review prompt text");
|
||||
assert_eq!(
|
||||
review_msg["content"][0]["text"].as_str().unwrap(),
|
||||
review_prompt,
|
||||
review_text, review_prompt,
|
||||
"user message should only contain the raw review prompt"
|
||||
);
|
||||
|
||||
|
||||
@@ -311,10 +311,9 @@ async fn shell_command_snapshot_still_intercepts_apply_patch() -> Result<()> {
|
||||
|
||||
#[cfg_attr(target_os = "windows", ignore)]
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn shell_snapshot_deleted_after_shutdown_with_skills_enabled() -> Result<()> {
|
||||
async fn shell_snapshot_deleted_after_shutdown_with_skills() -> Result<()> {
|
||||
let builder = test_codex().with_config(|config| {
|
||||
config.features.enable(Feature::ShellSnapshot);
|
||||
config.features.enable(Feature::Skills);
|
||||
});
|
||||
let harness = TestCodexHarness::with_builder(builder).await?;
|
||||
let home = harness.test().home.clone();
|
||||
|
||||
@@ -2,7 +2,6 @@
|
||||
#![allow(clippy::unwrap_used, clippy::expect_used)]
|
||||
|
||||
use anyhow::Result;
|
||||
use codex_core::features::Feature;
|
||||
use codex_core::protocol::AskForApproval;
|
||||
use codex_core::protocol::Op;
|
||||
use codex_core::protocol::SandboxPolicy;
|
||||
@@ -27,33 +26,15 @@ fn write_skill(home: &Path, name: &str, description: &str, body: &str) -> std::p
|
||||
path
|
||||
}
|
||||
|
||||
fn write_public_skill(
|
||||
home: &Path,
|
||||
name: &str,
|
||||
description: &str,
|
||||
body: &str,
|
||||
) -> std::path::PathBuf {
|
||||
let skill_dir = home.join("skills").join(".public").join(name);
|
||||
fs::create_dir_all(&skill_dir).unwrap();
|
||||
let contents = format!("---\nname: {name}\ndescription: {description}\n---\n\n{body}\n");
|
||||
let path = skill_dir.join("SKILL.md");
|
||||
fs::write(&path, contents).unwrap();
|
||||
path
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn user_turn_includes_skill_instructions() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let skill_body = "skill body";
|
||||
let mut builder = test_codex()
|
||||
.with_config(|cfg| {
|
||||
cfg.features.enable(Feature::Skills);
|
||||
})
|
||||
.with_pre_build_hook(|home| {
|
||||
write_skill(home, "demo", "demo skill", skill_body);
|
||||
});
|
||||
let mut builder = test_codex().with_pre_build_hook(|home| {
|
||||
write_skill(home, "demo", "demo skill", skill_body);
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
|
||||
let skill_path = test.codex_home_path().join("skills/demo/SKILL.md");
|
||||
@@ -117,15 +98,11 @@ async fn skill_load_errors_surface_in_session_configured() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex()
|
||||
.with_config(|cfg| {
|
||||
cfg.features.enable(Feature::Skills);
|
||||
})
|
||||
.with_pre_build_hook(|home| {
|
||||
let skill_dir = home.join("skills").join("broken");
|
||||
fs::create_dir_all(&skill_dir).unwrap();
|
||||
fs::write(skill_dir.join("SKILL.md"), "not yaml").unwrap();
|
||||
});
|
||||
let mut builder = test_codex().with_pre_build_hook(|home| {
|
||||
let skill_dir = home.join("skills").join("broken");
|
||||
fs::create_dir_all(&skill_dir).unwrap();
|
||||
fs::write(skill_dir.join("SKILL.md"), "not yaml").unwrap();
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
|
||||
test.codex
|
||||
@@ -169,19 +146,30 @@ async fn skill_load_errors_surface_in_session_configured() -> Result<()> {
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn list_skills_includes_public_cache_entries() -> Result<()> {
|
||||
async fn list_skills_includes_system_cache_entries() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex()
|
||||
.with_config(|cfg| {
|
||||
cfg.features.enable(Feature::Skills);
|
||||
})
|
||||
.with_pre_build_hook(|home| {
|
||||
write_public_skill(home, "public-demo", "public skill", "public body");
|
||||
});
|
||||
let mut builder = test_codex().with_pre_build_hook(|home| {
|
||||
let system_skill_path = home.join("skills/.system/plan/SKILL.md");
|
||||
assert!(
|
||||
!system_skill_path.exists(),
|
||||
"expected embedded system skills not yet installed, but {system_skill_path:?} exists"
|
||||
);
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
|
||||
let system_skill_path = test.codex_home_path().join("skills/.system/plan/SKILL.md");
|
||||
assert!(
|
||||
system_skill_path.exists(),
|
||||
"expected embedded system skills installed to {system_skill_path:?}"
|
||||
);
|
||||
let system_skill_contents = fs::read_to_string(&system_skill_path)?;
|
||||
assert!(
|
||||
system_skill_contents.contains("name: plan"),
|
||||
"expected embedded system skill file, got:\n{system_skill_contents}"
|
||||
);
|
||||
|
||||
test.codex
|
||||
.submit(Op::ListSkills {
|
||||
cwds: Vec::new(),
|
||||
@@ -205,12 +193,12 @@ async fn list_skills_includes_public_cache_entries() -> Result<()> {
|
||||
|
||||
let skill = skills
|
||||
.iter()
|
||||
.find(|skill| skill.name == "public-demo")
|
||||
.expect("expected public skill to be present");
|
||||
assert_eq!(skill.scope, codex_protocol::protocol::SkillScope::Public);
|
||||
.find(|skill| skill.name == "plan")
|
||||
.expect("expected system skill to be present");
|
||||
assert_eq!(skill.scope, codex_protocol::protocol::SkillScope::System);
|
||||
let path_str = skill.path.to_string_lossy().replace('\\', "/");
|
||||
assert!(
|
||||
path_str.ends_with("/skills/.public/public-demo/SKILL.md"),
|
||||
path_str.ends_with("/skills/.system/plan/SKILL.md"),
|
||||
"unexpected skill path: {path_str}"
|
||||
);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user