core-skills: disambiguate linked skill mentions

This commit is contained in:
starr-openai
2026-06-01 20:48:27 +00:00
parent 41c810bd3d
commit 574af01c13
4 changed files with 206 additions and 41 deletions

View File

@@ -1,16 +1,16 @@
use std::collections::HashMap;
use std::collections::HashSet;
use crate::SkillMetadata;
use crate::mention_counts::build_skill_name_counts_for_raw_paths;
use crate::build_skill_name_counts;
use codex_analytics::AnalyticsEventsClient;
use codex_analytics::InvocationType;
use codex_analytics::SkillInvocation;
use codex_analytics::TrackEventsContext;
use codex_exec_server::EnvironmentPathRef;
use codex_otel::SessionTelemetry;
use codex_protocol::user_input::UserInput;
use codex_utils_absolute_path::AbsolutePathBuf;
use codex_utils_plugins::mention_syntax::TOOL_MENTION_SIGIL;
use std::collections::HashMap;
use std::collections::HashSet;
#[derive(Debug, Default)]
pub struct SkillInjections {
@@ -96,7 +96,8 @@ fn emit_skill_injected_metric(
/// Structured `UserInput::Skill` selections are resolved first by path against
/// enabled skills. Text inputs are then scanned to extract `$skill-name` tokens, and we
/// iterate `skills` in their existing order to preserve prior ordering semantics.
/// Explicit links are resolved by path and plain names are only used when the match
/// Explicit links are resolved by path, using the linked name only when duplicate
/// filesystem-bound copies share the same raw path. Plain names are only used when the match
/// is unambiguous.
///
/// Complexity: `O(T + (N_s + N_t) * S)` time, `O(S + M)` space, where:
@@ -105,10 +106,10 @@ fn emit_skill_injected_metric(
pub fn collect_explicit_skill_mentions(
inputs: &[UserInput],
skills: &[SkillMetadata],
disabled_paths: &HashSet<AbsolutePathBuf>,
disabled_paths: &HashSet<EnvironmentPathRef>,
connector_slug_counts: &HashMap<String, usize>,
) -> Vec<SkillMetadata> {
let skill_name_counts = build_skill_name_counts_for_raw_paths(skills, disabled_paths).0;
let skill_name_counts = build_skill_name_counts(skills, disabled_paths).0;
let selection_context = SkillSelectionContext {
skills,
@@ -118,7 +119,7 @@ pub fn collect_explicit_skill_mentions(
};
let mut selected: Vec<SkillMetadata> = Vec::new();
let mut seen_names: HashSet<String> = HashSet::new();
let mut seen_paths: HashSet<AbsolutePathBuf> = HashSet::new();
let mut seen_paths: HashSet<EnvironmentPathRef> = HashSet::new();
let mut blocked_plain_names: HashSet<String> = HashSet::new();
for input in inputs {
@@ -128,18 +129,23 @@ pub fn collect_explicit_skill_mentions(
continue;
};
if selection_context.disabled_paths.contains(&path) || seen_paths.contains(&path) {
continue;
}
if let Some(skill) = selection_context
let matching_skills = selection_context
.skills
.iter()
.find(|skill| skill.path_to_skills_md == path)
.filter(|skill| {
skill.name == *name
&& skill.path_to_skills_md == path
&& !selection_context
.disabled_paths
.contains(&skill.source_path)
})
.collect::<Vec<_>>();
if let [skill] = matching_skills.as_slice()
&& !seen_paths.contains(&skill.source_path)
{
seen_paths.insert(skill.path_to_skills_md.clone());
seen_paths.insert(skill.source_path.clone());
seen_names.insert(skill.name.clone());
selected.push(skill.clone());
selected.push((*skill).clone());
}
}
}
@@ -163,7 +169,7 @@ pub fn collect_explicit_skill_mentions(
struct SkillSelectionContext<'a> {
skills: &'a [SkillMetadata],
disabled_paths: &'a HashSet<AbsolutePathBuf>,
disabled_paths: &'a HashSet<EnvironmentPathRef>,
skill_name_counts: &'a HashMap<String, usize>,
connector_slug_counts: &'a HashMap<String, usize>,
}
@@ -172,6 +178,7 @@ pub struct ToolMentions<'a> {
names: HashSet<&'a str>,
paths: HashSet<&'a str>,
plain_names: HashSet<&'a str>,
linked_mentions: HashSet<LinkedToolMention<'a>>,
}
impl<'a> ToolMentions<'a> {
@@ -186,6 +193,16 @@ impl<'a> ToolMentions<'a> {
pub fn paths(&self) -> impl Iterator<Item = &'a str> + '_ {
self.paths.iter().copied()
}
fn linked_mentions(&self) -> impl Iterator<Item = LinkedToolMention<'a>> + '_ {
self.linked_mentions.iter().copied()
}
}
#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
struct LinkedToolMention<'a> {
name: &'a str,
path: &'a str,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
@@ -250,6 +267,7 @@ pub fn extract_tool_mentions_with_sigil(text: &str, sigil: char) -> ToolMentions
let mut mentioned_names: HashSet<&str> = HashSet::new();
let mut mentioned_paths: HashSet<&str> = HashSet::new();
let mut plain_names: HashSet<&str> = HashSet::new();
let mut linked_mentions: HashSet<LinkedToolMention<'_>> = HashSet::new();
let mut index = 0;
while index < text_bytes.len() {
@@ -266,6 +284,7 @@ pub fn extract_tool_mentions_with_sigil(text: &str, sigil: char) -> ToolMentions
mentioned_names.insert(name);
}
mentioned_paths.insert(path);
linked_mentions.insert(LinkedToolMention { name, path });
}
index = end_index;
continue;
@@ -305,6 +324,7 @@ pub fn extract_tool_mentions_with_sigil(text: &str, sigil: char) -> ToolMentions
names: mentioned_names,
paths: mentioned_paths,
plain_names,
linked_mentions,
}
}
@@ -314,7 +334,7 @@ fn select_skills_from_mentions(
blocked_plain_names: &HashSet<String>,
mentions: &ToolMentions<'_>,
seen_names: &mut HashSet<String>,
seen_paths: &mut HashSet<AbsolutePathBuf>,
seen_paths: &mut HashSet<EnvironmentPathRef>,
selected: &mut Vec<SkillMetadata>,
) {
if mentions.is_empty() {
@@ -331,19 +351,82 @@ fn select_skills_from_mentions(
})
.map(normalize_skill_path)
.collect();
let mention_skill_links: HashSet<LinkedToolMention<'_>> = mentions
.linked_mentions()
.filter(|mention| {
!matches!(
tool_kind_for_path(mention.path),
ToolMentionKind::App | ToolMentionKind::Mcp | ToolMentionKind::Plugin
)
})
.map(|mention| LinkedToolMention {
name: mention.name,
path: normalize_skill_path(mention.path),
})
.collect();
let mention_skill_path_counts = selection_context
.skills
.iter()
.filter(|skill| {
!selection_context
.disabled_paths
.contains(&skill.source_path)
})
.filter_map(|skill| {
let path = skill.path_to_skills_md.to_string_lossy();
mention_skill_paths
.contains(path.as_ref())
.then_some(path.into_owned())
})
.fold(HashMap::new(), |mut counts, path| {
*counts.entry(path).or_insert(0usize) += 1;
counts
});
let mention_skill_link_counts = selection_context
.skills
.iter()
.filter(|skill| {
!selection_context
.disabled_paths
.contains(&skill.source_path)
})
.filter_map(|skill| {
let path = skill.path_to_skills_md.to_string_lossy();
mention_skill_links
.contains(&LinkedToolMention {
name: skill.name.as_str(),
path: path.as_ref(),
})
.then_some((skill.name.clone(), path.into_owned()))
})
.fold(HashMap::new(), |mut counts, link| {
*counts.entry(link).or_insert(0usize) += 1;
counts
});
for skill in selection_context.skills {
if selection_context
.disabled_paths
.contains(&skill.path_to_skills_md)
|| seen_paths.contains(&skill.path_to_skills_md)
.contains(&skill.source_path)
|| seen_paths.contains(&skill.source_path)
{
continue;
}
let path_str = skill.path_to_skills_md.to_string_lossy();
if mention_skill_paths.contains(path_str.as_ref()) {
seen_paths.insert(skill.path_to_skills_md.clone());
let path = path_str.as_ref();
let linked_mention = LinkedToolMention {
name: skill.name.as_str(),
path,
};
let linked_mention_key = (skill.name.clone(), path.to_string());
if mention_skill_paths.contains(path)
&& (mention_skill_path_counts.get(path) == Some(&1)
|| (mention_skill_links.contains(&linked_mention)
&& mention_skill_link_counts.get(&linked_mention_key) == Some(&1)))
{
seen_paths.insert(skill.source_path.clone());
seen_names.insert(skill.name.clone());
selected.push(skill.clone());
}
@@ -352,8 +435,8 @@ fn select_skills_from_mentions(
for skill in selection_context.skills {
if selection_context
.disabled_paths
.contains(&skill.path_to_skills_md)
|| seen_paths.contains(&skill.path_to_skills_md)
.contains(&skill.source_path)
|| seen_paths.contains(&skill.source_path)
{
continue;
}
@@ -380,7 +463,7 @@ fn select_skills_from_mentions(
}
if seen_names.insert(skill.name.clone()) {
seen_paths.insert(skill.path_to_skills_md.clone());
seen_paths.insert(skill.source_path.clone());
selected.push(skill.clone());
}
}

View File

@@ -1,6 +1,6 @@
use super::*;
use codex_exec_server::EnvironmentPathRef;
use codex_utils_absolute_path::AbsolutePathBuf;
use codex_exec_server::LocalFileSystem;
use codex_utils_absolute_path::test_support::PathBufExt;
use codex_utils_absolute_path::test_support::test_path_buf;
use pretty_assertions::assert_eq;
@@ -39,7 +39,7 @@ fn linked_skill_mention(name: &str, unix_path: &str) -> String {
fn collect_mentions(
inputs: &[UserInput],
skills: &[SkillMetadata],
disabled_paths: &HashSet<AbsolutePathBuf>,
disabled_paths: &HashSet<EnvironmentPathRef>,
connector_slug_counts: &HashMap<String, usize>,
) -> Vec<SkillMetadata> {
collect_explicit_skill_mentions(inputs, skills, disabled_paths, connector_slug_counts)
@@ -195,6 +195,7 @@ fn collect_explicit_skill_mentions_skips_invalid_structured_and_blocks_plain_fal
#[test]
fn collect_explicit_skill_mentions_skips_disabled_structured_and_blocks_plain_fallback() {
let alpha = make_skill("alpha-skill", "/tmp/alpha");
let disabled = HashSet::from([alpha.source_path.clone()]);
let skills = vec![alpha];
let inputs = vec![
UserInput::Text {
@@ -206,7 +207,6 @@ fn collect_explicit_skill_mentions_skips_disabled_structured_and_blocks_plain_fa
path: test_path_buf("/tmp/alpha"),
},
];
let disabled = HashSet::from([test_path_buf("/tmp/alpha").abs()]);
let connector_counts = HashMap::new();
let selected = collect_mentions(&inputs, &skills, &disabled, &connector_counts);
@@ -214,6 +214,76 @@ fn collect_explicit_skill_mentions_skips_disabled_structured_and_blocks_plain_fa
assert_eq!(selected, Vec::new());
}
#[test]
fn collect_explicit_skill_mentions_counts_only_enabled_duplicate_names() {
let alpha = make_skill("demo-skill", "/tmp/alpha");
let beta = SkillMetadata {
source_path: EnvironmentPathRef::new(
std::sync::Arc::new(LocalFileSystem::unsandboxed()),
test_path_buf("/tmp/beta").abs(),
),
path_to_skills_md: test_path_buf("/tmp/beta").abs(),
..alpha.clone()
};
let disabled = HashSet::from([beta.source_path.clone()]);
let skills = vec![alpha.clone(), beta];
let inputs = vec![UserInput::Text {
text: "please run $demo-skill".to_string(),
text_elements: Vec::new(),
}];
let connector_counts = HashMap::new();
let selected = collect_mentions(&inputs, &skills, &disabled, &connector_counts);
assert_eq!(selected, vec![alpha]);
}
#[test]
fn collect_explicit_skill_mentions_uses_structured_name_to_disambiguate_same_path() {
let first = make_skill("alpha-skill", "/tmp/shared");
let second = SkillMetadata {
name: "beta-skill".to_string(),
source_path: EnvironmentPathRef::new(
std::sync::Arc::new(LocalFileSystem::unsandboxed()),
first.path_to_skills_md.clone(),
),
..first.clone()
};
let skills = vec![first, second.clone()];
let inputs = vec![UserInput::Skill {
name: "beta-skill".to_string(),
path: test_path_buf("/tmp/shared"),
}];
let connector_counts = HashMap::new();
let selected = collect_mentions(&inputs, &skills, &HashSet::new(), &connector_counts);
assert_eq!(selected, vec![second]);
}
#[test]
fn collect_explicit_skill_mentions_uses_linked_name_to_disambiguate_same_path() {
let first = make_skill("alpha-skill", "/tmp/shared");
let second = SkillMetadata {
name: "beta-skill".to_string(),
source_path: EnvironmentPathRef::new(
std::sync::Arc::new(LocalFileSystem::unsandboxed()),
first.path_to_skills_md.clone(),
),
..first.clone()
};
let skills = vec![first, second.clone()];
let inputs = vec![UserInput::Text {
text: linked_skill_mention("beta-skill", "/tmp/shared"),
text_elements: Vec::new(),
}];
let connector_counts = HashMap::new();
let selected = collect_mentions(&inputs, &skills, &HashSet::new(), &connector_counts);
assert_eq!(selected, vec![second]);
}
#[test]
fn collect_explicit_skill_mentions_dedupes_by_path() {
let alpha = make_skill("alpha-skill", "/tmp/alpha");
@@ -230,6 +300,29 @@ fn collect_explicit_skill_mentions_dedupes_by_path() {
assert_eq!(selected, vec![alpha]);
}
#[test]
fn collect_explicit_skill_mentions_skips_ambiguous_linked_path() {
let alpha = make_skill("demo-skill", "/tmp/shared");
let beta = SkillMetadata {
name: "demo-skill".to_string(),
source_path: EnvironmentPathRef::new(
std::sync::Arc::new(LocalFileSystem::unsandboxed()),
alpha.path_to_skills_md.clone(),
),
..alpha.clone()
};
let skills = vec![alpha, beta];
let inputs = vec![UserInput::Text {
text: linked_skill_mention("demo-skill", "/tmp/shared"),
text_elements: Vec::new(),
}];
let connector_counts = HashMap::new();
let selected = collect_mentions(&inputs, &skills, &HashSet::new(), &connector_counts);
assert_eq!(selected, Vec::new());
}
#[test]
fn collect_explicit_skill_mentions_skips_ambiguous_name() {
let alpha = make_skill("demo-skill", "/tmp/alpha");
@@ -299,12 +392,12 @@ fn collect_explicit_skill_mentions_allows_explicit_path_with_connector_conflict(
fn collect_explicit_skill_mentions_skips_when_linked_path_disabled() {
let alpha = make_skill("demo-skill", "/tmp/alpha");
let beta = make_skill("demo-skill", "/tmp/beta");
let disabled = HashSet::from([alpha.source_path.clone()]);
let skills = vec![alpha, beta];
let inputs = vec![UserInput::Text {
text: format!("use {}", linked_skill_mention("demo-skill", "/tmp/alpha")),
text_elements: Vec::new(),
}];
let disabled = HashSet::from([test_path_buf("/tmp/alpha").abs()]);
let connector_counts = HashMap::new();
let selected = collect_mentions(&inputs, &skills, &disabled, &connector_counts);

View File

@@ -3,7 +3,6 @@ use std::collections::HashSet;
use super::SkillMetadata;
use codex_exec_server::EnvironmentPathRef;
use codex_utils_absolute_path::AbsolutePathBuf;
/// Counts how often each skill name appears (exact and ASCII-lowercase), excluding disabled paths.
pub fn build_skill_name_counts(
@@ -15,15 +14,6 @@ pub fn build_skill_name_counts(
})
}
pub(crate) fn build_skill_name_counts_for_raw_paths(
skills: &[SkillMetadata],
disabled_paths: &HashSet<AbsolutePathBuf>,
) -> (HashMap<String, usize>, HashMap<String, usize>) {
build_skill_name_counts_with_disabled(skills, |skill| {
disabled_paths.contains(&skill.path_to_skills_md)
})
}
fn build_skill_name_counts_with_disabled(
skills: &[SkillMetadata],
mut is_disabled: impl FnMut(&SkillMetadata) -> bool,

View File

@@ -513,11 +513,10 @@ async fn build_skills_and_plugins(
let connector_slug_counts = build_connector_slug_counts(&available_connectors);
let skill_name_counts_lower =
build_skill_name_counts(&skills_outcome.skills, &skills_outcome.disabled_paths).1;
let disabled_skill_paths = skills_outcome.disabled_path_values();
let mentioned_skills = collect_explicit_skill_mentions(
&user_input,
&skills_outcome.skills,
&disabled_skill_paths,
&skills_outcome.disabled_paths,
&connector_slug_counts,
);
maybe_prompt_and_install_mcp_dependencies(