Added support for live updates to skills (#10478)

Add a centralized FileWatcher in codex-core (using notify) that watches
skill roots from the config layer stack (recursive)

Send `SkillsChanged` events when relevant file system changes are
detected

On `SkillsChanged`:
* Invalidate the skills cache immediately in ThreadManager
* Emit EventMsg::SkillsUpdateAvailable to active sessions
~~* Broadcast a new app-server notification:
SkillsListUpdatedNotification~~

This change does not inject new items into the event stream. That means
the agent will not know about new skills, so it won't be able to
implicitly invoke new skills. It also won't know about changes to
existing skills, so if it has already read the contents of a modified
skill, it will not honor the new behavior.

This change also does not detect modifications to AGENTS.md.

I plan to address these limitations in a follow-on PR modeled after
#9985. Injection of new skills and AGENTS was deemed to risky, hence the
need to split the feature into two stages. The changes in this PR were
designed to easily accommodate the second stage once we have some other
foundational changes in place.

Testing: In addition to automated tests, I did manual testing to confirm
that newly-created skills, deleted skills, and renamed skills are
reflected in the TUI skill picker menu. Also confirmed that
modifications to behaviors for explicitly-invoked skills are honored.

---------

Co-authored-by: Xin Lin <xl@openai.com>
This commit is contained in:
Eric Traut
2026-02-04 15:25:03 -08:00
committed by GitHub
parent d452bb3ae5
commit 7bcc552325
11 changed files with 684 additions and 28 deletions

View File

@@ -0,0 +1,151 @@
#![allow(clippy::expect_used, clippy::unwrap_used)]
use std::fs;
use std::path::Path;
use std::path::PathBuf;
use std::time::Duration;
use anyhow::Result;
use codex_core::config::ProjectConfig;
use codex_core::protocol::AskForApproval;
use codex_core::protocol::EventMsg;
use codex_core::protocol::Op;
use codex_core::protocol::SandboxPolicy;
use codex_protocol::config_types::ReasoningSummary;
use codex_protocol::config_types::TrustLevel;
use codex_protocol::user_input::UserInput;
use core_test_support::responses;
use core_test_support::responses::ResponsesRequest;
use core_test_support::responses::mount_sse_sequence;
use core_test_support::responses::start_mock_server;
use core_test_support::test_codex::TestCodex;
use core_test_support::test_codex::test_codex;
use core_test_support::wait_for_event;
use tokio::time::timeout;
fn enable_trusted_project(config: &mut codex_core::config::Config) {
config.active_project = ProjectConfig {
trust_level: Some(TrustLevel::Trusted),
};
}
fn write_skill(home: &Path, name: &str, description: &str, body: &str) -> PathBuf {
let skill_dir = home.join("skills").join(name);
fs::create_dir_all(&skill_dir).expect("create skill dir");
let contents = format!("---\nname: {name}\ndescription: {description}\n---\n\n{body}\n");
let path = skill_dir.join("SKILL.md");
fs::write(&path, contents).expect("write skill");
path
}
fn contains_skill_body(request: &ResponsesRequest, skill_body: &str) -> bool {
request
.message_input_texts("user")
.iter()
.any(|text| text.contains(skill_body) && text.contains("<skill>"))
}
async fn submit_skill_turn(test: &TestCodex, skill_path: PathBuf, prompt: &str) -> Result<()> {
let session_model = test.session_configured.model.clone();
test.codex
.submit(Op::UserTurn {
items: vec![
UserInput::Text {
text: prompt.to_string(),
text_elements: Vec::new(),
},
UserInput::Skill {
name: "demo".to_string(),
path: skill_path,
},
],
final_output_json_schema: None,
cwd: test.cwd_path().to_path_buf(),
approval_policy: AskForApproval::Never,
sandbox_policy: SandboxPolicy::DangerFullAccess,
model: session_model,
effort: None,
summary: ReasoningSummary::Auto,
collaboration_mode: None,
personality: None,
})
.await?;
wait_for_event(test.codex.as_ref(), |event| {
matches!(event, EventMsg::TurnComplete(_))
})
.await;
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn live_skills_reload_refreshes_skill_cache_after_skill_change() -> Result<()> {
let server = start_mock_server().await;
let responses = mount_sse_sequence(
&server,
vec![
responses::sse(vec![responses::ev_completed("resp-1")]),
responses::sse(vec![responses::ev_completed("resp-2")]),
],
)
.await;
let skill_v1 = "skill body v1";
let skill_v2 = "skill body v2";
let mut builder = test_codex()
.with_pre_build_hook(move |home| {
write_skill(home, "demo", "demo skill", skill_v1);
})
.with_config(|config| {
enable_trusted_project(config);
});
let test = builder.build(&server).await?;
let skill_path = std::fs::canonicalize(test.codex_home_path().join("skills/demo/SKILL.md"))?;
submit_skill_turn(&test, skill_path.clone(), "please use $demo").await?;
let first_request = responses
.requests()
.first()
.cloned()
.expect("first request captured");
assert!(
contains_skill_body(&first_request, skill_v1),
"expected initial skill body in request"
);
write_skill(test.codex_home_path(), "demo", "demo skill", skill_v2);
let saw_skills_update = timeout(Duration::from_secs(5), async {
loop {
match test.codex.next_event().await {
Ok(event) => {
if matches!(event.msg, EventMsg::SkillsUpdateAvailable) {
break;
}
}
Err(err) => panic!("event stream ended unexpectedly: {err}"),
}
}
})
.await;
if saw_skills_update.is_err() {
// Some environments do not reliably surface file watcher events for
// skill changes. Clear the cache explicitly so we can still validate
// that the updated skill body is injected on the next turn.
test.thread_manager.skills_manager().clear_cache();
}
submit_skill_turn(&test, skill_path.clone(), "please use $demo again").await?;
let last_request = responses
.last_request()
.expect("request captured after skill update");
assert!(
contains_skill_body(&last_request, skill_v2),
"expected updated skill body after reload"
);
Ok(())
}