This commit is contained in:
celia-oai
2026-02-24 21:44:09 -08:00
parent 1151972fb2
commit 9814ea1488
7 changed files with 263 additions and 17 deletions

View File

@@ -135,6 +135,8 @@ pub enum Feature {
SkillEnvVarDependencyPrompt,
/// Emit skill approval test prompts/events.
SkillApproval,
/// Run matching shell_command skill executables under the skill sandbox.
SkillShellCommandSandbox,
/// Steer feature flag - when enabled, Enter submits immediately instead of queuing.
Steer,
/// Enable collaboration modes (Plan, Default).
@@ -629,6 +631,12 @@ pub const FEATURES: &[FeatureSpec] = &[
stage: Stage::UnderDevelopment,
default_enabled: false,
},
FeatureSpec {
id: Feature::SkillShellCommandSandbox,
key: "skill_shell_command_sandbox",
stage: Stage::UnderDevelopment,
default_enabled: false,
},
FeatureSpec {
id: Feature::Steer,
key: "steer",

View File

@@ -87,6 +87,21 @@ pub(crate) fn detect_implicit_skill_script_invocation_for_tokens(
detect_skill_script_run(outcome, command, workdir)
}
pub(crate) fn detect_implicit_skill_executable_invocation_for_tokens(
outcome: &SkillLoadOutcome,
command: &[String],
) -> Option<(SkillMetadata, PathBuf)> {
let executable = Path::new(command.first()?);
if !executable.is_absolute() {
return None;
}
let executable = normalize_path(executable);
let parent = executable.parent()?;
let skill = detect_skill_for_path_under_scripts(outcome, parent)?;
Some((skill, executable))
}
fn tokenize_command(command: &str) -> Vec<String> {
shlex::split(command).unwrap_or_else(|| {
command
@@ -255,8 +270,11 @@ fn detect_skill_script_run(
workdir.join(script_path)
};
let script_path = normalize_path(script_path.as_path());
detect_skill_for_path_under_scripts(outcome, script_path.as_path())
}
for ancestor in script_path.ancestors() {
fn detect_skill_for_path_under_scripts(outcome: &SkillLoadOutcome, path: &Path) -> Option<SkillMetadata> {
for ancestor in path.ancestors() {
if let Some(candidate) = outcome.implicit_skills_by_scripts_dir.get(ancestor) {
return Some(candidate.clone());
}
@@ -317,6 +335,7 @@ fn normalize_path(path: &Path) -> PathBuf {
mod tests {
use super::SkillLoadOutcome;
use super::SkillMetadata;
use super::detect_implicit_skill_executable_invocation_for_tokens;
use super::detect_implicit_skill_script_invocation_for_command;
use super::detect_skill_doc_read;
use super::detect_skill_script_run;
@@ -483,4 +502,44 @@ mod tests {
assert_eq!(found, None);
}
#[test]
fn direct_skill_executable_detection_matches_absolute_path() {
let skill_doc_path = PathBuf::from("/tmp/skill-test/SKILL.md");
let scripts_dir = normalize_path(Path::new("/tmp/skill-test/scripts"));
let skill = test_skill_metadata(skill_doc_path);
let outcome = SkillLoadOutcome {
implicit_skills_by_scripts_dir: Arc::new(HashMap::from([(scripts_dir, skill)])),
implicit_skills_by_doc_path: Arc::new(HashMap::new()),
..Default::default()
};
let command = vec!["/tmp/skill-test/scripts/run".to_string()];
let found = detect_implicit_skill_executable_invocation_for_tokens(&outcome, &command);
assert_eq!(
found.map(|(skill, path)| (skill.name, path)),
Some((
"test-skill".to_string(),
PathBuf::from("/tmp/skill-test/scripts/run")
))
);
}
#[test]
fn direct_skill_executable_detection_ignores_relative_path() {
let skill_doc_path = PathBuf::from("/tmp/skill-test/SKILL.md");
let scripts_dir = normalize_path(Path::new("/tmp/skill-test/scripts"));
let skill = test_skill_metadata(skill_doc_path);
let outcome = SkillLoadOutcome {
implicit_skills_by_scripts_dir: Arc::new(HashMap::from([(scripts_dir, skill)])),
implicit_skills_by_doc_path: Arc::new(HashMap::new()),
..Default::default()
};
let command = vec!["scripts/run".to_string()];
let found = detect_implicit_skill_executable_invocation_for_tokens(&outcome, &command);
assert_eq!(found, None);
}
}

View File

@@ -16,6 +16,7 @@ pub(crate) use injection::build_skill_injections;
pub(crate) use injection::collect_explicit_skill_mentions;
pub(crate) use invocation_utils::SKILL_APPROVAL_DECLINED_MESSAGE;
pub(crate) use invocation_utils::build_implicit_skill_path_indexes;
pub(crate) use invocation_utils::detect_implicit_skill_executable_invocation_for_tokens;
pub(crate) use invocation_utils::ensure_skill_approval_for_command;
pub(crate) use invocation_utils::maybe_emit_implicit_skill_invocation;
pub use loader::load_skills;

View File

@@ -14,7 +14,11 @@ use crate::function_tool::FunctionCallError;
use crate::is_safe_command::is_known_safe_command;
use crate::protocol::ExecCommandSource;
use crate::shell::Shell;
use crate::shell::ShellType;
use crate::skills::SKILL_APPROVAL_DECLINED_MESSAGE;
use crate::skills::SkillLoadOutcome;
use crate::skills::SkillMetadata;
use crate::skills::detect_implicit_skill_executable_invocation_for_tokens;
use crate::skills::ensure_skill_approval_for_command;
use crate::skills::maybe_emit_implicit_skill_invocation;
use crate::tools::context::ToolInvocation;
@@ -31,6 +35,7 @@ use crate::tools::registry::ToolKind;
use crate::tools::runtimes::shell::ShellRequest;
use crate::tools::runtimes::shell::ShellRuntime;
use crate::tools::runtimes::shell::ShellRuntimeBackend;
use crate::tools::sandboxing::ExecApprovalRequirement;
use crate::tools::sandboxing::ToolCtx;
use crate::tools::spec::ShellCommandBackendConfig;
use codex_protocol::models::PermissionProfile;
@@ -47,6 +52,9 @@ pub struct ShellCommandHandler {
backend: ShellCommandBackend,
}
const SKILL_SHELL_COMMAND_APPROVAL_REASON: &str =
"This command runs a skill script and requires approval.";
struct RunExecLikeArgs {
tool_name: String,
exec_params: ExecParams,
@@ -297,6 +305,29 @@ impl ToolHandler for ShellCommandHandler {
}
impl ShellHandler {
fn detect_skill_shell_command(
outcome: &SkillLoadOutcome,
shell: &Shell,
command: &[String],
shell_runtime_backend: ShellRuntimeBackend,
skill_shell_command_enabled: bool,
) -> Option<SkillMetadata> {
if !skill_shell_command_enabled
|| shell_runtime_backend != ShellRuntimeBackend::ShellCommandClassic
|| shell.shell_type != ShellType::Zsh
{
return None;
}
let commands = crate::bash::parse_shell_lc_plain_commands(command)?;
let [inner_command] = commands.as_slice() else {
return None;
};
let (skill, _path) =
detect_implicit_skill_executable_invocation_for_tokens(outcome, inner_command)?;
Some(skill)
}
async fn run_exec_like(args: RunExecLikeArgs) -> Result<ToolOutput, FunctionCallError> {
let RunExecLikeArgs {
tool_name,
@@ -397,17 +428,37 @@ impl ShellHandler {
let event_ctx = ToolEventCtx::new(session.as_ref(), turn.as_ref(), &call_id, None);
emitter.begin(event_ctx).await;
let exec_approval_requirement = session
.services
.exec_policy
.create_exec_approval_requirement_for_command(ExecApprovalRequest {
command: &exec_params.command,
approval_policy: turn.approval_policy.value(),
sandbox_policy: turn.sandbox_policy.get(),
sandbox_permissions: exec_params.sandbox_permissions,
prefix_rule,
})
.await;
let user_shell = session.user_shell();
let matched_skill = Self::detect_skill_shell_command(
turn.turn_skills.outcome.as_ref(),
user_shell.as_ref(),
&exec_params.command,
shell_runtime_backend,
turn.features.enabled(Feature::SkillShellCommandSandbox),
);
let effective_sandbox_policy = matched_skill
.as_ref()
.and_then(|skill| skill.permissions.as_ref())
.map(|permissions| permissions.sandbox_policy.get().clone())
.unwrap_or_else(|| turn.sandbox_policy.get().clone());
let exec_approval_requirement = if matched_skill.is_some() {
ExecApprovalRequirement::NeedsApproval {
reason: Some(SKILL_SHELL_COMMAND_APPROVAL_REASON.to_string()),
proposed_execpolicy_amendment: None,
}
} else {
session
.services
.exec_policy
.create_exec_approval_requirement_for_command(ExecApprovalRequest {
command: &exec_params.command,
approval_policy: turn.approval_policy.value(),
sandbox_policy: &effective_sandbox_policy,
sandbox_permissions: exec_params.sandbox_permissions,
prefix_rule,
})
.await
};
let req = ShellRequest {
command: exec_params.command.clone(),
@@ -420,6 +471,7 @@ impl ShellHandler {
additional_permissions: normalized_additional_permissions,
justification: exec_params.justification.clone(),
exec_approval_requirement,
effective_sandbox_policy,
};
let mut orchestrator = ToolOrchestrator::new();
let mut runtime = {
@@ -458,13 +510,17 @@ impl ShellHandler {
#[cfg(test)]
mod tests {
use std::collections::HashMap;
use std::path::PathBuf;
use std::sync::Arc;
use codex_protocol::models::ShellCommandToolCallParams;
use codex_protocol::protocol::SkillScope;
use pretty_assertions::assert_eq;
use crate::codex::make_session_and_context;
use crate::skills::SkillLoadOutcome;
use crate::skills::SkillMetadata;
use crate::exec_env::create_env;
use crate::is_safe_command::is_known_safe_command;
use crate::powershell::try_find_powershell_executable_blocking;
@@ -474,8 +530,24 @@ mod tests {
use crate::shell::ShellType;
use crate::shell_snapshot::ShellSnapshot;
use crate::tools::handlers::ShellCommandHandler;
use crate::tools::handlers::ShellHandler;
use crate::tools::runtimes::shell::ShellRuntimeBackend;
use tokio::sync::watch;
fn test_skill_metadata(path_to_skills_md: PathBuf) -> SkillMetadata {
SkillMetadata {
name: "test-skill".to_string(),
description: "test".to_string(),
short_description: None,
interface: None,
dependencies: None,
policy: None,
permissions: None,
path_to_skills_md,
scope: SkillScope::User,
}
}
/// The logic for is_known_safe_command() has heuristics for known shells,
/// so we must ensure the commands generated by [ShellCommandHandler] can be
/// recognized as safe if the `command` is safe.
@@ -638,4 +710,91 @@ mod tests {
"unexpected error: {err}"
);
}
#[test]
fn detect_skill_shell_command_matches_direct_absolute_executable() {
let shell = Shell {
shell_type: ShellType::Zsh,
shell_path: PathBuf::from("/bin/zsh"),
shell_snapshot: crate::shell::empty_shell_snapshot_receiver(),
};
let skill = test_skill_metadata(PathBuf::from("/tmp/skill-test/SKILL.md"));
let outcome = SkillLoadOutcome {
implicit_skills_by_scripts_dir: Arc::new(HashMap::from([(
PathBuf::from("/tmp/skill-test/scripts"),
skill,
)])),
..Default::default()
};
let command = shell.derive_exec_args("/tmp/skill-test/scripts/run-tool", true);
let found = ShellHandler::detect_skill_shell_command(
&outcome,
&shell,
&command,
ShellRuntimeBackend::ShellCommandClassic,
true,
);
assert_eq!(found.map(|skill| skill.name), Some("test-skill".to_string()));
}
#[test]
fn detect_skill_shell_command_ignores_multi_command_shell_script() {
let shell = Shell {
shell_type: ShellType::Zsh,
shell_path: PathBuf::from("/bin/zsh"),
shell_snapshot: crate::shell::empty_shell_snapshot_receiver(),
};
let skill = test_skill_metadata(PathBuf::from("/tmp/skill-test/SKILL.md"));
let outcome = SkillLoadOutcome {
implicit_skills_by_scripts_dir: Arc::new(HashMap::from([(
PathBuf::from("/tmp/skill-test/scripts"),
skill,
)])),
..Default::default()
};
let command = shell.derive_exec_args(
"/tmp/skill-test/scripts/run-tool && echo done",
true,
);
let found = ShellHandler::detect_skill_shell_command(
&outcome,
&shell,
&command,
ShellRuntimeBackend::ShellCommandClassic,
true,
);
assert_eq!(found, None);
}
#[test]
fn detect_skill_shell_command_requires_classic_backend() {
let shell = Shell {
shell_type: ShellType::Zsh,
shell_path: PathBuf::from("/bin/zsh"),
shell_snapshot: crate::shell::empty_shell_snapshot_receiver(),
};
let skill = test_skill_metadata(PathBuf::from("/tmp/skill-test/SKILL.md"));
let outcome = SkillLoadOutcome {
implicit_skills_by_scripts_dir: Arc::new(HashMap::from([(
PathBuf::from("/tmp/skill-test/scripts"),
skill,
)])),
..Default::default()
};
let command = shell.derive_exec_args("/tmp/skill-test/scripts/run-tool", true);
let found = ShellHandler::detect_skill_shell_command(
&outcome,
&shell,
&command,
ShellRuntimeBackend::ShellCommandZshFork,
true,
);
assert_eq!(found, None);
}
}

View File

@@ -113,12 +113,13 @@ impl ToolOrchestrator {
let otel_ci = &tool_ctx.call_id;
let otel_user = ToolDecisionSource::User;
let otel_cfg = ToolDecisionSource::Config;
let effective_sandbox_policy = tool.sandbox_policy(req, turn_ctx);
// 1) Approval
let mut already_approved = false;
let requirement = tool.exec_approval_requirement(req).unwrap_or_else(|| {
default_exec_approval_requirement(approval_policy, &turn_ctx.sandbox_policy)
default_exec_approval_requirement(approval_policy, effective_sandbox_policy)
});
match requirement {
ExecApprovalRequirement::Skip { .. } => {
@@ -169,7 +170,7 @@ impl ToolOrchestrator {
let initial_sandbox = match tool.sandbox_mode_for_first_attempt(req) {
SandboxOverride::BypassSandboxFirstAttempt => crate::exec::SandboxType::None,
SandboxOverride::NoOverride => self.sandbox.select_initial(
&turn_ctx.sandbox_policy,
effective_sandbox_policy,
tool.sandbox_preference(),
turn_ctx.windows_sandbox_level,
has_managed_network_requirements,
@@ -181,7 +182,7 @@ impl ToolOrchestrator {
let use_linux_sandbox_bwrap = turn_ctx.features.enabled(Feature::UseLinuxSandboxBwrap);
let initial_attempt = SandboxAttempt {
sandbox: initial_sandbox,
policy: &turn_ctx.sandbox_policy,
policy: effective_sandbox_policy,
enforce_managed_network: has_managed_network_requirements,
manager: &self.sandbox,
sandbox_cwd: &turn_ctx.cwd,
@@ -238,7 +239,7 @@ impl ToolOrchestrator {
&& matches!(
default_exec_approval_requirement(
approval_policy,
&turn_ctx.sandbox_policy
effective_sandbox_policy
),
ExecApprovalRequirement::NeedsApproval { .. }
);
@@ -295,7 +296,7 @@ impl ToolOrchestrator {
let escalated_attempt = SandboxAttempt {
sandbox: crate::exec::SandboxType::None,
policy: &turn_ctx.sandbox_policy,
policy: effective_sandbox_policy,
enforce_managed_network: has_managed_network_requirements,
manager: &self.sandbox,
sandbox_cwd: &turn_ctx.cwd,

View File

@@ -14,6 +14,7 @@ use crate::powershell::prefix_powershell_script_with_utf8;
use crate::sandboxing::SandboxPermissions;
use crate::sandboxing::execute_env;
use crate::shell::ShellType;
use crate::protocol::SandboxPolicy;
use crate::tools::network_approval::NetworkApprovalMode;
use crate::tools::network_approval::NetworkApprovalSpec;
use crate::tools::runtimes::build_command_spec;
@@ -49,6 +50,7 @@ pub struct ShellRequest {
pub additional_permissions: Option<PermissionProfile>,
pub justification: Option<String>,
pub exec_approval_requirement: ExecApprovalRequirement,
pub effective_sandbox_policy: SandboxPolicy,
}
/// Selects `ShellRuntime` behavior for different callers.
@@ -180,6 +182,14 @@ impl Approvable<ShellRequest> for ShellRuntime {
}
impl ToolRuntime<ShellRequest, ExecToolCallOutput> for ShellRuntime {
fn sandbox_policy<'a>(
&self,
req: &'a ShellRequest,
_turn_ctx: &'a crate::codex::TurnContext,
) -> &'a SandboxPolicy {
&req.effective_sandbox_policy
}
fn network_approval_spec(
&self,
req: &ShellRequest,

View File

@@ -303,6 +303,14 @@ pub(crate) enum ToolError {
}
pub(crate) trait ToolRuntime<Req, Out>: Approvable<Req> + Sandboxable {
fn sandbox_policy<'a>(
&self,
_req: &'a Req,
turn_ctx: &'a TurnContext,
) -> &'a SandboxPolicy {
turn_ctx.sandbox_policy.get()
}
fn network_approval_spec(&self, _req: &Req, _ctx: &ToolCtx) -> Option<NetworkApprovalSpec> {
None
}