chore: rename codex-command to codex-shell-command (#11378)

This addresses some post-merge feedback on
https://github.com/openai/codex/pull/11361:

- crate rename
- reuse `detect_shell_type()` utility
This commit is contained in:
Michael Bolin
2026-02-10 17:03:46 -08:00
committed by GitHub
parent 87bbfc50a1
commit d44f4205fb
19 changed files with 58 additions and 53 deletions

View File

@@ -0,0 +1,382 @@
use crate::bash::parse_shell_lc_plain_commands;
#[cfg(windows)]
#[path = "windows_dangerous_commands.rs"]
mod windows_dangerous_commands;
pub fn command_might_be_dangerous(command: &[String]) -> bool {
#[cfg(windows)]
{
if windows_dangerous_commands::is_dangerous_command_windows(command) {
return true;
}
}
if is_dangerous_to_call_with_exec(command) {
return true;
}
// Support `bash -lc "<script>"` where the any part of the script might contain a dangerous command.
if let Some(all_commands) = parse_shell_lc_plain_commands(command)
&& all_commands
.iter()
.any(|cmd| is_dangerous_to_call_with_exec(cmd))
{
return true;
}
false
}
fn is_git_global_option_with_value(arg: &str) -> bool {
matches!(
arg,
"-C" | "-c"
| "--config-env"
| "--exec-path"
| "--git-dir"
| "--namespace"
| "--super-prefix"
| "--work-tree"
)
}
fn is_git_global_option_with_inline_value(arg: &str) -> bool {
matches!(
arg,
s if s.starts_with("--config-env=")
|| s.starts_with("--exec-path=")
|| s.starts_with("--git-dir=")
|| s.starts_with("--namespace=")
|| s.starts_with("--super-prefix=")
|| s.starts_with("--work-tree=")
) || ((arg.starts_with("-C") || arg.starts_with("-c")) && arg.len() > 2)
}
/// Find the first matching git subcommand, skipping known global options that
/// may appear before it (e.g., `-C`, `-c`, `--git-dir`).
///
/// Shared with `is_safe_command` to avoid git-global-option bypasses.
pub(crate) fn find_git_subcommand<'a>(
command: &'a [String],
subcommands: &[&str],
) -> Option<(usize, &'a str)> {
let cmd0 = command.first().map(String::as_str)?;
if !cmd0.ends_with("git") {
return None;
}
let mut skip_next = false;
for (idx, arg) in command.iter().enumerate().skip(1) {
if skip_next {
skip_next = false;
continue;
}
let arg = arg.as_str();
if is_git_global_option_with_inline_value(arg) {
continue;
}
if is_git_global_option_with_value(arg) {
skip_next = true;
continue;
}
if arg == "--" || arg.starts_with('-') {
continue;
}
if subcommands.contains(&arg) {
return Some((idx, arg));
}
// In git, the first non-option token is the subcommand. If it isn't
// one of the subcommands we're looking for, we must stop scanning to
// avoid misclassifying later positional args (e.g., branch names).
return None;
}
None
}
fn is_dangerous_to_call_with_exec(command: &[String]) -> bool {
let cmd0 = command.first().map(String::as_str);
match cmd0 {
Some(cmd) if cmd.ends_with("git") => {
let Some((subcommand_idx, subcommand)) =
find_git_subcommand(command, &["reset", "rm", "branch", "push", "clean"])
else {
return false;
};
match subcommand {
"reset" | "rm" => true,
"branch" => git_branch_is_delete(&command[subcommand_idx + 1..]),
"push" => git_push_is_dangerous(&command[subcommand_idx + 1..]),
"clean" => git_clean_is_force(&command[subcommand_idx + 1..]),
other => {
debug_assert!(false, "unexpected git subcommand from matcher: {other}");
false
}
}
}
Some("rm") => matches!(command.get(1).map(String::as_str), Some("-f" | "-rf")),
// for sudo <cmd> simply do the check for <cmd>
Some("sudo") => is_dangerous_to_call_with_exec(&command[1..]),
// ── anything else ─────────────────────────────────────────────────
_ => false,
}
}
fn git_branch_is_delete(branch_args: &[String]) -> bool {
// Git allows stacking short flags (for example, `-dv` or `-vd`). Treat any
// short-flag group containing `d`/`D` as a delete flag.
branch_args.iter().map(String::as_str).any(|arg| {
matches!(arg, "-d" | "-D" | "--delete")
|| arg.starts_with("--delete=")
|| short_flag_group_contains(arg, 'd')
|| short_flag_group_contains(arg, 'D')
})
}
fn short_flag_group_contains(arg: &str, target: char) -> bool {
arg.starts_with('-') && !arg.starts_with("--") && arg.chars().skip(1).any(|c| c == target)
}
fn git_push_is_dangerous(push_args: &[String]) -> bool {
push_args.iter().map(String::as_str).any(|arg| {
matches!(
arg,
"--force" | "--force-with-lease" | "--force-if-includes" | "--delete" | "-f" | "-d"
) || arg.starts_with("--force-with-lease=")
|| arg.starts_with("--force-if-includes=")
|| arg.starts_with("--delete=")
|| short_flag_group_contains(arg, 'f')
|| short_flag_group_contains(arg, 'd')
|| git_push_refspec_is_dangerous(arg)
})
}
fn git_push_refspec_is_dangerous(arg: &str) -> bool {
// `+<refspec>` forces updates and `:<dst>` deletes remote refs.
(arg.starts_with('+') || arg.starts_with(':')) && arg.len() > 1
}
fn git_clean_is_force(clean_args: &[String]) -> bool {
clean_args.iter().map(String::as_str).any(|arg| {
matches!(arg, "--force" | "-f")
|| arg.starts_with("--force=")
|| short_flag_group_contains(arg, 'f')
})
}
#[cfg(test)]
mod tests {
use super::*;
fn vec_str(items: &[&str]) -> Vec<String> {
items.iter().map(std::string::ToString::to_string).collect()
}
#[test]
fn git_reset_is_dangerous() {
assert!(command_might_be_dangerous(&vec_str(&["git", "reset"])));
}
#[test]
fn bash_git_reset_is_dangerous() {
assert!(command_might_be_dangerous(&vec_str(&[
"bash",
"-lc",
"git reset --hard",
])));
}
#[test]
fn zsh_git_reset_is_dangerous() {
assert!(command_might_be_dangerous(&vec_str(&[
"zsh",
"-lc",
"git reset --hard",
])));
}
#[test]
fn git_status_is_not_dangerous() {
assert!(!command_might_be_dangerous(&vec_str(&["git", "status"])));
}
#[test]
fn bash_git_status_is_not_dangerous() {
assert!(!command_might_be_dangerous(&vec_str(&[
"bash",
"-lc",
"git status",
])));
}
#[test]
fn sudo_git_reset_is_dangerous() {
assert!(command_might_be_dangerous(&vec_str(&[
"sudo", "git", "reset", "--hard",
])));
}
#[test]
fn usr_bin_git_is_dangerous() {
assert!(command_might_be_dangerous(&vec_str(&[
"/usr/bin/git",
"reset",
"--hard",
])));
}
#[test]
fn git_branch_delete_is_dangerous() {
assert!(command_might_be_dangerous(&vec_str(&[
"git", "branch", "-d", "feature",
])));
assert!(command_might_be_dangerous(&vec_str(&[
"git", "branch", "-D", "feature",
])));
assert!(command_might_be_dangerous(&vec_str(&[
"bash",
"-lc",
"git branch --delete feature",
])));
}
#[test]
fn git_branch_delete_with_stacked_short_flags_is_dangerous() {
assert!(command_might_be_dangerous(&vec_str(&[
"git", "branch", "-dv", "feature",
])));
assert!(command_might_be_dangerous(&vec_str(&[
"git", "branch", "-vd", "feature",
])));
assert!(command_might_be_dangerous(&vec_str(&[
"git", "branch", "-vD", "feature",
])));
assert!(command_might_be_dangerous(&vec_str(&[
"git", "branch", "-Dvv", "feature",
])));
}
#[test]
fn git_branch_delete_with_global_options_is_dangerous() {
assert!(command_might_be_dangerous(&vec_str(&[
"git", "-C", ".", "branch", "-d", "feature",
])));
assert!(command_might_be_dangerous(&vec_str(&[
"git",
"-c",
"color.ui=false",
"branch",
"-D",
"feature",
])));
assert!(command_might_be_dangerous(&vec_str(&[
"bash",
"-lc",
"git -C . branch -d feature",
])));
}
#[test]
fn git_checkout_reset_is_not_dangerous() {
// The first non-option token is "checkout", so later positional args
// like branch names must not be treated as subcommands.
assert!(!command_might_be_dangerous(&vec_str(&[
"git", "checkout", "reset",
])));
}
#[test]
fn git_push_force_is_dangerous() {
assert!(command_might_be_dangerous(&vec_str(&[
"git", "push", "--force", "origin", "main",
])));
assert!(command_might_be_dangerous(&vec_str(&[
"git", "push", "-f", "origin", "main",
])));
assert!(command_might_be_dangerous(&vec_str(&[
"git",
"-C",
".",
"push",
"--force-with-lease",
"origin",
"main",
])));
}
#[test]
fn git_push_plus_refspec_is_dangerous() {
assert!(command_might_be_dangerous(&vec_str(&[
"git", "push", "origin", "+main",
])));
assert!(command_might_be_dangerous(&vec_str(&[
"git",
"push",
"origin",
"+refs/heads/main:refs/heads/main",
])));
}
#[test]
fn git_push_delete_flag_is_dangerous() {
assert!(command_might_be_dangerous(&vec_str(&[
"git", "push", "--delete", "origin", "feature",
])));
assert!(command_might_be_dangerous(&vec_str(&[
"git", "push", "-d", "origin", "feature",
])));
}
#[test]
fn git_push_delete_refspec_is_dangerous() {
assert!(command_might_be_dangerous(&vec_str(&[
"git", "push", "origin", ":feature",
])));
assert!(command_might_be_dangerous(&vec_str(&[
"bash",
"-lc",
"git push origin :feature",
])));
}
#[test]
fn git_push_without_force_is_not_dangerous() {
assert!(!command_might_be_dangerous(&vec_str(&[
"git", "push", "origin", "main",
])));
}
#[test]
fn git_clean_force_is_dangerous_even_when_f_is_not_first_flag() {
assert!(command_might_be_dangerous(&vec_str(&[
"git", "clean", "-fdx",
])));
assert!(command_might_be_dangerous(&vec_str(&[
"git", "clean", "-xdf",
])));
assert!(command_might_be_dangerous(&vec_str(&[
"git", "clean", "--force",
])));
}
#[test]
fn rm_rf_is_dangerous() {
assert!(command_might_be_dangerous(&vec_str(&["rm", "-rf", "/"])));
}
#[test]
fn rm_f_is_dangerous() {
assert!(command_might_be_dangerous(&vec_str(&["rm", "-f", "/"])));
}
}

View File

@@ -0,0 +1,592 @@
use crate::bash::parse_shell_lc_plain_commands;
// Find the first matching git subcommand, skipping known global options that
// may appear before it (e.g., `-C`, `-c`, `--git-dir`).
// Implemented in `is_dangerous_command` and shared here.
use crate::command_safety::is_dangerous_command::find_git_subcommand;
use crate::command_safety::windows_safe_commands::is_safe_command_windows;
pub fn is_known_safe_command(command: &[String]) -> bool {
let command: Vec<String> = command
.iter()
.map(|s| {
if s == "zsh" {
"bash".to_string()
} else {
s.clone()
}
})
.collect();
if is_safe_command_windows(&command) {
return true;
}
if is_safe_to_call_with_exec(&command) {
return true;
}
// Support `bash -lc "..."` where the script consists solely of one or
// more "plain" commands (only bare words / quoted strings) combined with
// a conservative allowlist of shell operators that themselves do not
// introduce side effects ( "&&", "||", ";", and "|" ). If every
// individual command in the script is itself a knownsafe command, then
// the composite expression is considered safe.
if let Some(all_commands) = parse_shell_lc_plain_commands(&command)
&& !all_commands.is_empty()
&& all_commands
.iter()
.all(|cmd| is_safe_to_call_with_exec(cmd))
{
return true;
}
false
}
fn is_safe_to_call_with_exec(command: &[String]) -> bool {
let Some(cmd0) = command.first().map(String::as_str) else {
return false;
};
match std::path::Path::new(&cmd0)
.file_name()
.and_then(|osstr| osstr.to_str())
{
Some(cmd) if cfg!(target_os = "linux") && matches!(cmd, "numfmt" | "tac") => true,
#[rustfmt::skip]
Some(
"cat" |
"cd" |
"cut" |
"echo" |
"expr" |
"false" |
"grep" |
"head" |
"id" |
"ls" |
"nl" |
"paste" |
"pwd" |
"rev" |
"seq" |
"stat" |
"tail" |
"tr" |
"true" |
"uname" |
"uniq" |
"wc" |
"which" |
"whoami") => {
true
},
Some("base64") => {
const UNSAFE_BASE64_OPTIONS: &[&str] = &["-o", "--output"];
!command.iter().skip(1).any(|arg| {
UNSAFE_BASE64_OPTIONS.contains(&arg.as_str())
|| arg.starts_with("--output=")
|| (arg.starts_with("-o") && arg != "-o")
})
}
Some("find") => {
// Certain options to `find` can delete files, write to files, or
// execute arbitrary commands, so we cannot auto-approve the
// invocation of `find` in such cases.
#[rustfmt::skip]
const UNSAFE_FIND_OPTIONS: &[&str] = &[
// Options that can execute arbitrary commands.
"-exec", "-execdir", "-ok", "-okdir",
// Option that deletes matching files.
"-delete",
// Options that write pathnames to a file.
"-fls", "-fprint", "-fprint0", "-fprintf",
];
!command
.iter()
.any(|arg| UNSAFE_FIND_OPTIONS.contains(&arg.as_str()))
}
// Ripgrep
Some("rg") => {
const UNSAFE_RIPGREP_OPTIONS_WITH_ARGS: &[&str] = &[
// Takes an arbitrary command that is executed for each match.
"--pre",
// Takes a command that can be used to obtain the local hostname.
"--hostname-bin",
];
const UNSAFE_RIPGREP_OPTIONS_WITHOUT_ARGS: &[&str] = &[
// Calls out to other decompression tools, so do not auto-approve
// out of an abundance of caution.
"--search-zip",
"-z",
];
!command.iter().any(|arg| {
UNSAFE_RIPGREP_OPTIONS_WITHOUT_ARGS.contains(&arg.as_str())
|| UNSAFE_RIPGREP_OPTIONS_WITH_ARGS
.iter()
.any(|&opt| arg == opt || arg.starts_with(&format!("{opt}=")))
})
}
// Git
Some("git") => {
// Global config overrides like `-c core.pager=...` can force git
// to execute arbitrary external commands. With no sandboxing, we
// should always prompt in those cases.
if git_has_config_override_global_option(command) {
return false;
}
let Some((subcommand_idx, subcommand)) =
find_git_subcommand(command, &["status", "log", "diff", "show", "branch"])
else {
return false;
};
let subcommand_args = &command[subcommand_idx + 1..];
match subcommand {
"status" | "log" | "diff" | "show" => {
git_subcommand_args_are_read_only(subcommand_args)
}
"branch" => {
git_subcommand_args_are_read_only(subcommand_args)
&& git_branch_is_read_only(subcommand_args)
}
other => {
debug_assert!(false, "unexpected git subcommand from matcher: {other}");
false
}
}
}
// Special-case `sed -n {N|M,N}p`
Some("sed")
if {
command.len() <= 4
&& command.get(1).map(String::as_str) == Some("-n")
&& is_valid_sed_n_arg(command.get(2).map(String::as_str))
} =>
{
true
}
// ── anything else ─────────────────────────────────────────────────
_ => false,
}
}
// Treat `git branch` as safe only when the arguments clearly indicate
// a read-only query, not a branch mutation (create/rename/delete).
fn git_branch_is_read_only(branch_args: &[String]) -> bool {
if branch_args.is_empty() {
// `git branch` with no additional args lists branches.
return true;
}
let mut saw_read_only_flag = false;
for arg in branch_args.iter().map(String::as_str) {
match arg {
"--list" | "-l" | "--show-current" | "-a" | "--all" | "-r" | "--remotes" | "-v"
| "-vv" | "--verbose" => {
saw_read_only_flag = true;
}
_ if arg.starts_with("--format=") => {
saw_read_only_flag = true;
}
_ => {
// Any other flag or positional argument may create, rename, or delete branches.
return false;
}
}
}
saw_read_only_flag
}
fn git_has_config_override_global_option(command: &[String]) -> bool {
command.iter().map(String::as_str).any(|arg| {
matches!(arg, "-c" | "--config-env")
|| (arg.starts_with("-c") && arg.len() > 2)
|| arg.starts_with("--config-env=")
})
}
fn git_subcommand_args_are_read_only(args: &[String]) -> bool {
// Flags that can write to disk or execute external tools should never be
// auto-approved on an unsandboxed machine.
const UNSAFE_GIT_FLAGS: &[&str] = &[
"--output",
"--ext-diff",
"--textconv",
"--exec",
"--paginate",
];
!args.iter().map(String::as_str).any(|arg| {
UNSAFE_GIT_FLAGS.contains(&arg)
|| arg.starts_with("--output=")
|| arg.starts_with("--exec=")
})
}
// (bash parsing helpers implemented in crate::bash)
/* ----------------------------------------------------------
Example
---------------------------------------------------------- */
/// Returns true if `arg` matches /^(\d+,)?\d+p$/
fn is_valid_sed_n_arg(arg: Option<&str>) -> bool {
// unwrap or bail
let s = match arg {
Some(s) => s,
None => return false,
};
// must end with 'p', strip it
let core = match s.strip_suffix('p') {
Some(rest) => rest,
None => return false,
};
// split on ',' and ensure 1 or 2 numeric parts
let parts: Vec<&str> = core.split(',').collect();
match parts.as_slice() {
// single number, e.g. "10"
[num] => !num.is_empty() && num.chars().all(|c| c.is_ascii_digit()),
// two numbers, e.g. "1,5"
[a, b] => {
!a.is_empty()
&& !b.is_empty()
&& a.chars().all(|c| c.is_ascii_digit())
&& b.chars().all(|c| c.is_ascii_digit())
}
// anything else (more than one comma) is invalid
_ => false,
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::string::ToString;
fn vec_str(args: &[&str]) -> Vec<String> {
args.iter().map(ToString::to_string).collect()
}
#[test]
fn known_safe_examples() {
assert!(is_safe_to_call_with_exec(&vec_str(&["ls"])));
assert!(is_safe_to_call_with_exec(&vec_str(&["git", "status"])));
assert!(is_safe_to_call_with_exec(&vec_str(&["git", "branch"])));
assert!(is_safe_to_call_with_exec(&vec_str(&[
"git",
"branch",
"--show-current"
])));
assert!(is_safe_to_call_with_exec(&vec_str(&["base64"])));
assert!(is_safe_to_call_with_exec(&vec_str(&[
"sed", "-n", "1,5p", "file.txt"
])));
assert!(is_safe_to_call_with_exec(&vec_str(&[
"nl",
"-nrz",
"Cargo.toml"
])));
// Safe `find` command (no unsafe options).
assert!(is_safe_to_call_with_exec(&vec_str(&[
"find", ".", "-name", "file.txt"
])));
if cfg!(target_os = "linux") {
assert!(is_safe_to_call_with_exec(&vec_str(&["numfmt", "1000"])));
assert!(is_safe_to_call_with_exec(&vec_str(&["tac", "Cargo.toml"])));
} else {
assert!(!is_safe_to_call_with_exec(&vec_str(&["numfmt", "1000"])));
assert!(!is_safe_to_call_with_exec(&vec_str(&["tac", "Cargo.toml"])));
}
}
#[test]
fn git_branch_mutating_flags_are_not_safe() {
assert!(!is_known_safe_command(&vec_str(&[
"git", "branch", "-d", "feature"
])));
assert!(!is_known_safe_command(&vec_str(&[
"git",
"branch",
"new-branch"
])));
}
#[test]
fn git_branch_global_options_respect_safety_rules() {
use pretty_assertions::assert_eq;
assert_eq!(
is_known_safe_command(&vec_str(&["git", "-C", ".", "branch", "--show-current"])),
true
);
assert_eq!(
is_known_safe_command(&vec_str(&["git", "-C", ".", "branch", "-d", "feature"])),
false
);
assert_eq!(
is_known_safe_command(&vec_str(&["bash", "-lc", "git -C . branch -d feature",])),
false
);
}
#[test]
fn git_first_positional_is_the_subcommand() {
// In git, the first non-option token is the subcommand. Later positional
// args (like branch names) must not be treated as subcommands.
assert!(!is_known_safe_command(&vec_str(&[
"git", "checkout", "status",
])));
}
#[test]
fn git_output_and_config_override_flags_are_not_safe() {
assert!(!is_known_safe_command(&vec_str(&[
"git",
"log",
"--output=/tmp/git-log-out-test",
"-n",
"1",
])));
assert!(!is_known_safe_command(&vec_str(&[
"git",
"diff",
"--output",
"/tmp/git-diff-out-test",
])));
assert!(!is_known_safe_command(&vec_str(&[
"git",
"show",
"--output=/tmp/git-show-out-test",
"HEAD",
])));
assert!(!is_known_safe_command(&vec_str(&[
"git",
"-c",
"core.pager=cat",
"log",
"-n",
"1",
])));
assert!(!is_known_safe_command(&vec_str(&[
"git",
"-ccore.pager=cat",
"status",
])));
}
#[test]
fn cargo_check_is_not_safe() {
assert!(!is_known_safe_command(&vec_str(&["cargo", "check"])));
}
#[test]
fn zsh_lc_safe_command_sequence() {
assert!(is_known_safe_command(&vec_str(&["zsh", "-lc", "ls"])));
}
#[test]
fn unknown_or_partial() {
assert!(!is_safe_to_call_with_exec(&vec_str(&["foo"])));
assert!(!is_safe_to_call_with_exec(&vec_str(&["git", "fetch"])));
assert!(!is_safe_to_call_with_exec(&vec_str(&[
"sed", "-n", "xp", "file.txt"
])));
// Unsafe `find` commands.
for args in [
vec_str(&["find", ".", "-name", "file.txt", "-exec", "rm", "{}", ";"]),
vec_str(&[
"find", ".", "-name", "*.py", "-execdir", "python3", "{}", ";",
]),
vec_str(&["find", ".", "-name", "file.txt", "-ok", "rm", "{}", ";"]),
vec_str(&["find", ".", "-name", "*.py", "-okdir", "python3", "{}", ";"]),
vec_str(&["find", ".", "-delete", "-name", "file.txt"]),
vec_str(&["find", ".", "-fls", "/etc/passwd"]),
vec_str(&["find", ".", "-fprint", "/etc/passwd"]),
vec_str(&["find", ".", "-fprint0", "/etc/passwd"]),
vec_str(&["find", ".", "-fprintf", "/root/suid.txt", "%#m %u %p\n"]),
] {
assert!(
!is_safe_to_call_with_exec(&args),
"expected {args:?} to be unsafe"
);
}
}
#[test]
fn base64_output_options_are_unsafe() {
for args in [
vec_str(&["base64", "-o", "out.bin"]),
vec_str(&["base64", "--output", "out.bin"]),
vec_str(&["base64", "--output=out.bin"]),
vec_str(&["base64", "-ob64.txt"]),
] {
assert!(
!is_safe_to_call_with_exec(&args),
"expected {args:?} to be considered unsafe due to output option"
);
}
}
#[test]
fn ripgrep_rules() {
// Safe ripgrep invocations none of the unsafe flags are present.
assert!(is_safe_to_call_with_exec(&vec_str(&[
"rg",
"Cargo.toml",
"-n"
])));
// Unsafe flags that do not take an argument (present verbatim).
for args in [
vec_str(&["rg", "--search-zip", "files"]),
vec_str(&["rg", "-z", "files"]),
] {
assert!(
!is_safe_to_call_with_exec(&args),
"expected {args:?} to be considered unsafe due to zip-search flag",
);
}
// Unsafe flags that expect a value, provided in both split and = forms.
for args in [
vec_str(&["rg", "--pre", "pwned", "files"]),
vec_str(&["rg", "--pre=pwned", "files"]),
vec_str(&["rg", "--hostname-bin", "pwned", "files"]),
vec_str(&["rg", "--hostname-bin=pwned", "files"]),
] {
assert!(
!is_safe_to_call_with_exec(&args),
"expected {args:?} to be considered unsafe due to external-command flag",
);
}
}
#[test]
fn windows_powershell_full_path_is_safe() {
if !cfg!(windows) {
// Windows only because on Linux path splitting doesn't handle `/` separators properly
return;
}
assert!(is_known_safe_command(&vec_str(&[
r"C:\Program Files\PowerShell\7\pwsh.exe",
"-Command",
"Get-Location",
])));
}
#[test]
fn bash_lc_safe_examples() {
assert!(is_known_safe_command(&vec_str(&["bash", "-lc", "ls"])));
assert!(is_known_safe_command(&vec_str(&["bash", "-lc", "ls -1"])));
assert!(is_known_safe_command(&vec_str(&[
"bash",
"-lc",
"git status"
])));
assert!(is_known_safe_command(&vec_str(&[
"bash",
"-lc",
"grep -R \"Cargo.toml\" -n"
])));
assert!(is_known_safe_command(&vec_str(&[
"bash",
"-lc",
"sed -n 1,5p file.txt"
])));
assert!(is_known_safe_command(&vec_str(&[
"bash",
"-lc",
"sed -n '1,5p' file.txt"
])));
assert!(is_known_safe_command(&vec_str(&[
"bash",
"-lc",
"find . -name file.txt"
])));
}
#[test]
fn bash_lc_safe_examples_with_operators() {
assert!(is_known_safe_command(&vec_str(&[
"bash",
"-lc",
"grep -R \"Cargo.toml\" -n || true"
])));
assert!(is_known_safe_command(&vec_str(&[
"bash",
"-lc",
"ls && pwd"
])));
assert!(is_known_safe_command(&vec_str(&[
"bash",
"-lc",
"echo 'hi' ; ls"
])));
assert!(is_known_safe_command(&vec_str(&[
"bash",
"-lc",
"ls | wc -l"
])));
}
#[test]
fn bash_lc_unsafe_examples() {
assert!(
!is_known_safe_command(&vec_str(&["bash", "-lc", "git", "status"])),
"Four arg version is not known to be safe."
);
assert!(
!is_known_safe_command(&vec_str(&["bash", "-lc", "'git status'"])),
"The extra quoting around 'git status' makes it a program named 'git status' and is therefore unsafe."
);
assert!(
!is_known_safe_command(&vec_str(&["bash", "-lc", "find . -name file.txt -delete"])),
"Unsafe find option should not be auto-approved."
);
// Disallowed because of unsafe command in sequence.
assert!(
!is_known_safe_command(&vec_str(&["bash", "-lc", "ls && rm -rf /"])),
"Sequence containing unsafe command must be rejected"
);
// Disallowed because of parentheses / subshell.
assert!(
!is_known_safe_command(&vec_str(&["bash", "-lc", "(ls)"])),
"Parentheses (subshell) are not provably safe with the current parser"
);
assert!(
!is_known_safe_command(&vec_str(&["bash", "-lc", "ls || (pwd && echo hi)"])),
"Nested parentheses are not provably safe with the current parser"
);
// Disallowed redirection.
assert!(
!is_known_safe_command(&vec_str(&["bash", "-lc", "ls > out.txt"])),
"> redirection should be rejected"
);
}
}

View File

@@ -0,0 +1,3 @@
pub mod is_dangerous_command;
pub mod is_safe_command;
pub mod windows_safe_commands;

View File

@@ -0,0 +1,201 @@
$ErrorActionPreference = 'Stop'
$payload = $env:CODEX_POWERSHELL_PAYLOAD
if ([string]::IsNullOrEmpty($payload)) {
Write-Output '{"status":"parse_failed"}'
exit 0
}
try {
$source =
[System.Text.Encoding]::Unicode.GetString(
[System.Convert]::FromBase64String($payload)
)
} catch {
Write-Output '{"status":"parse_failed"}'
exit 0
}
$tokens = $null
$errors = $null
$ast = $null
try {
$ast = [System.Management.Automation.Language.Parser]::ParseInput(
$source,
[ref]$tokens,
[ref]$errors
)
} catch {
Write-Output '{"status":"parse_failed"}'
exit 0
}
if ($errors.Count -gt 0) {
Write-Output '{"status":"parse_errors"}'
exit 0
}
function Convert-CommandElement {
param($element)
if ($element -is [System.Management.Automation.Language.StringConstantExpressionAst]) {
return @($element.Value)
}
if ($element -is [System.Management.Automation.Language.ExpandableStringExpressionAst]) {
if ($element.NestedExpressions.Count -gt 0) {
return $null
}
return @($element.Value)
}
if ($element -is [System.Management.Automation.Language.ConstantExpressionAst]) {
return @($element.Value.ToString())
}
if ($element -is [System.Management.Automation.Language.CommandParameterAst]) {
if ($element.Argument -eq $null) {
return @('-' + $element.ParameterName)
}
if ($element.Argument -is [System.Management.Automation.Language.StringConstantExpressionAst]) {
return @('-' + $element.ParameterName, $element.Argument.Value)
}
if ($element.Argument -is [System.Management.Automation.Language.ConstantExpressionAst]) {
return @('-' + $element.ParameterName, $element.Argument.Value.ToString())
}
return $null
}
return $null
}
function Convert-PipelineElement {
param($element)
if ($element -is [System.Management.Automation.Language.CommandAst]) {
if ($element.Redirections.Count -gt 0) {
return $null
}
if (
$element.InvocationOperator -ne $null -and
$element.InvocationOperator -ne [System.Management.Automation.Language.TokenKind]::Unknown
) {
return $null
}
$parts = @()
foreach ($commandElement in $element.CommandElements) {
$converted = Convert-CommandElement $commandElement
if ($converted -eq $null) {
return $null
}
$parts += $converted
}
return $parts
}
if ($element -is [System.Management.Automation.Language.CommandExpressionAst]) {
if ($element.Redirections.Count -gt 0) {
return $null
}
if ($element.Expression -is [System.Management.Automation.Language.ParenExpressionAst]) {
$innerPipeline = $element.Expression.Pipeline
if ($innerPipeline -and $innerPipeline.PipelineElements.Count -eq 1) {
return Convert-PipelineElement $innerPipeline.PipelineElements[0]
}
}
return $null
}
return $null
}
function Add-CommandsFromPipelineAst {
param($pipeline, $commands)
if ($pipeline.PipelineElements.Count -eq 0) {
return $false
}
foreach ($element in $pipeline.PipelineElements) {
$words = Convert-PipelineElement $element
if ($words -eq $null -or $words.Count -eq 0) {
return $false
}
$null = $commands.Add($words)
}
return $true
}
function Add-CommandsFromPipelineChain {
param($chain, $commands)
if (-not (Add-CommandsFromPipelineBase $chain.LhsPipelineChain $commands)) {
return $false
}
if (-not (Add-CommandsFromPipelineAst $chain.RhsPipeline $commands)) {
return $false
}
return $true
}
function Add-CommandsFromPipelineBase {
param($pipeline, $commands)
if ($pipeline -is [System.Management.Automation.Language.PipelineAst]) {
return Add-CommandsFromPipelineAst $pipeline $commands
}
if ($pipeline -is [System.Management.Automation.Language.PipelineChainAst]) {
return Add-CommandsFromPipelineChain $pipeline $commands
}
return $false
}
$commands = [System.Collections.ArrayList]::new()
foreach ($statement in $ast.EndBlock.Statements) {
if (-not (Add-CommandsFromPipelineBase $statement $commands)) {
$commands = $null
break
}
}
if ($commands -ne $null) {
$normalized = [System.Collections.ArrayList]::new()
foreach ($cmd in $commands) {
if ($cmd -is [string]) {
$null = $normalized.Add(@($cmd))
continue
}
if ($cmd -is [System.Array] -or $cmd -is [System.Collections.IEnumerable]) {
$null = $normalized.Add(@($cmd))
continue
}
$normalized = $null
break
}
$commands = $normalized
}
$result = if ($commands -eq $null) {
@{ status = 'unsupported' }
} else {
@{ status = 'ok'; commands = $commands }
}
,$result | ConvertTo-Json -Depth 3

View File

@@ -0,0 +1,755 @@
use std::path::Path;
use once_cell::sync::Lazy;
use regex::Regex;
use shlex::split as shlex_split;
use url::Url;
pub fn is_dangerous_command_windows(command: &[String]) -> bool {
// Prefer structured parsing for PowerShell/CMD so we can spot URL-bearing
// invocations of ShellExecute-style entry points before falling back to
// simple argv heuristics.
if is_dangerous_powershell(command) {
return true;
}
if is_dangerous_cmd(command) {
return true;
}
is_direct_gui_launch(command)
}
fn is_dangerous_powershell(command: &[String]) -> bool {
let Some((exe, rest)) = command.split_first() else {
return false;
};
if !is_powershell_executable(exe) {
return false;
}
// Parse the PowerShell invocation to get a flat token list we can scan for
// dangerous cmdlets/COM calls plus any URL-looking arguments. This is a
// best-effort shlex split of the script text, not a full PS parser.
let Some(parsed) = parse_powershell_invocation(rest) else {
return false;
};
let tokens_lc: Vec<String> = parsed
.tokens
.iter()
.map(|t| t.trim_matches('\'').trim_matches('"').to_ascii_lowercase())
.collect();
let has_url = args_have_url(&parsed.tokens);
if has_url
&& tokens_lc.iter().any(|t| {
matches!(
t.as_str(),
"start-process" | "start" | "saps" | "invoke-item" | "ii"
) || t.contains("start-process")
|| t.contains("invoke-item")
})
{
return true;
}
if has_url
&& tokens_lc
.iter()
.any(|t| t.contains("shellexecute") || t.contains("shell.application"))
{
return true;
}
if let Some(first) = tokens_lc.first() {
// Legacy ShellExecute path via url.dll
if first == "rundll32"
&& tokens_lc
.iter()
.any(|t| t.contains("url.dll,fileprotocolhandler"))
&& has_url
{
return true;
}
if first == "mshta" && has_url {
return true;
}
if is_browser_executable(first) && has_url {
return true;
}
if matches!(first.as_str(), "explorer" | "explorer.exe") && has_url {
return true;
}
}
// Check for force delete operations (e.g., Remove-Item -Force)
if has_force_delete_cmdlet(&tokens_lc) {
return true;
}
false
}
fn is_dangerous_cmd(command: &[String]) -> bool {
let Some((exe, rest)) = command.split_first() else {
return false;
};
let Some(base) = executable_basename(exe) else {
return false;
};
if base != "cmd" && base != "cmd.exe" {
return false;
}
let mut iter = rest.iter();
for arg in iter.by_ref() {
let lower = arg.to_ascii_lowercase();
match lower.as_str() {
"/c" | "/r" | "-c" => break,
_ if lower.starts_with('/') => continue,
// Unknown tokens before the command body => bail.
_ => return false,
}
}
let remaining: Vec<String> = iter.cloned().collect();
if remaining.is_empty() {
return false;
}
let cmd_tokens: Vec<String> = match remaining.as_slice() {
[only] => shlex_split(only).unwrap_or_else(|| vec![only.clone()]),
_ => remaining,
};
// Refine tokens by splitting concatenated CMD operators (e.g. "echo hi&del")
let tokens: Vec<String> = cmd_tokens
.into_iter()
.flat_map(|t| split_embedded_cmd_operators(&t))
.collect();
const CMD_SEPARATORS: &[&str] = &["&", "&&", "|", "||"];
tokens
.split(|t| CMD_SEPARATORS.contains(&t.as_str()))
.any(|segment| {
let Some(cmd) = segment.first() else {
return false;
};
// Classic `cmd /c ... start https://...` ShellExecute path.
if cmd.eq_ignore_ascii_case("start") && args_have_url(segment) {
return true;
}
// Force delete: del /f, erase /f
if (cmd.eq_ignore_ascii_case("del") || cmd.eq_ignore_ascii_case("erase"))
&& has_force_flag_cmd(segment)
{
return true;
}
// Recursive directory removal: rd /s /q, rmdir /s /q
if (cmd.eq_ignore_ascii_case("rd") || cmd.eq_ignore_ascii_case("rmdir"))
&& has_recursive_flag_cmd(segment)
&& has_quiet_flag_cmd(segment)
{
return true;
}
false
})
}
fn is_direct_gui_launch(command: &[String]) -> bool {
let Some((exe, rest)) = command.split_first() else {
return false;
};
let Some(base) = executable_basename(exe) else {
return false;
};
// Explorer/rundll32/mshta or direct browser exe with a URL anywhere in args.
if matches!(base.as_str(), "explorer" | "explorer.exe") && args_have_url(rest) {
return true;
}
if matches!(base.as_str(), "mshta" | "mshta.exe") && args_have_url(rest) {
return true;
}
if (base == "rundll32" || base == "rundll32.exe")
&& rest.iter().any(|t| {
t.to_ascii_lowercase()
.contains("url.dll,fileprotocolhandler")
})
&& args_have_url(rest)
{
return true;
}
if is_browser_executable(&base) && args_have_url(rest) {
return true;
}
false
}
fn split_embedded_cmd_operators(token: &str) -> Vec<String> {
// Split concatenated CMD operators so `echo hi&del` becomes `["echo hi", "&", "del"]`.
// Handles `&`, `&&`, `|`, `||`. Best-effort (CMD escaping is weird by nature).
let mut parts = Vec::new();
let mut start = 0;
let mut it = token.char_indices().peekable();
while let Some((i, ch)) = it.next() {
if ch == '&' || ch == '|' {
if i > start {
parts.push(token[start..i].to_string());
}
// Detect doubled operator: && or ||
let op_len = match it.peek() {
Some(&(j, next)) if next == ch => {
it.next(); // consume second char
(j + next.len_utf8()) - i
}
_ => ch.len_utf8(),
};
parts.push(token[i..i + op_len].to_string());
start = i + op_len;
}
}
if start < token.len() {
parts.push(token[start..].to_string());
}
parts.retain(|s| !s.trim().is_empty());
parts
}
fn has_force_delete_cmdlet(tokens: &[String]) -> bool {
const DELETE_CMDLETS: &[&str] = &["remove-item", "ri", "rm", "del", "erase", "rd", "rmdir"];
// Hard separators that end a command segment (so -Force must be in same segment)
const SEG_SEPS: &[char] = &[';', '|', '&', '\n', '\r', '\t'];
// Soft separators: punctuation that can stick to tokens (blocks, parens, brackets, commas, etc.)
const SOFT_SEPS: &[char] = &['{', '}', '(', ')', '[', ']', ',', ';'];
// Build rough command segments first
let mut segments: Vec<Vec<String>> = vec![Vec::new()];
for tok in tokens {
// If token itself contains segment separators, split it (best-effort)
let mut cur = String::new();
for ch in tok.chars() {
if SEG_SEPS.contains(&ch) {
let s = cur.trim();
if let Some(msg) = segments.last_mut()
&& !s.is_empty()
{
msg.push(s.to_string());
}
cur.clear();
if let Some(last) = segments.last()
&& !last.is_empty()
{
segments.push(Vec::new());
}
} else {
cur.push(ch);
}
}
let s = cur.trim();
if let Some(segment) = segments.last_mut()
&& !s.is_empty()
{
segment.push(s.to_string());
}
}
// Now, inside each segment, normalize tokens by splitting on soft punctuation
segments.into_iter().any(|seg| {
let atoms = seg
.iter()
.flat_map(|t| t.split(|c| SOFT_SEPS.contains(&c)))
.map(str::trim)
.filter(|s| !s.is_empty());
let mut has_delete = false;
let mut has_force = false;
for a in atoms {
if DELETE_CMDLETS.iter().any(|cmd| a.eq_ignore_ascii_case(cmd)) {
has_delete = true;
}
if a.eq_ignore_ascii_case("-force")
|| a.get(..7)
.is_some_and(|p| p.eq_ignore_ascii_case("-force:"))
{
has_force = true;
}
}
has_delete && has_force
})
}
/// Check for /f or /F flag in CMD del/erase arguments.
fn has_force_flag_cmd(args: &[String]) -> bool {
args.iter().any(|a| a.eq_ignore_ascii_case("/f"))
}
/// Check for /s or /S flag in CMD rd/rmdir arguments.
fn has_recursive_flag_cmd(args: &[String]) -> bool {
args.iter().any(|a| a.eq_ignore_ascii_case("/s"))
}
/// Check for /q or /Q flag in CMD rd/rmdir arguments.
fn has_quiet_flag_cmd(args: &[String]) -> bool {
args.iter().any(|a| a.eq_ignore_ascii_case("/q"))
}
fn args_have_url(args: &[String]) -> bool {
args.iter().any(|arg| looks_like_url(arg))
}
fn looks_like_url(token: &str) -> bool {
// Strip common PowerShell punctuation around inline URLs (quotes, parens, trailing semicolons).
// Capture the middle token after trimming leading quotes/parens/whitespace and trailing semicolons/closing parens.
static RE: Lazy<Option<Regex>> =
Lazy::new(|| Regex::new(r#"^[ "'\(\s]*([^\s"'\);]+)[\s;\)]*$"#).ok());
// If the token embeds a URL alongside other text (e.g., Start-Process('https://...'))
// as a single shlex token, grab the substring starting at the first URL prefix.
let urlish = token
.find("https://")
.or_else(|| token.find("http://"))
.map(|idx| &token[idx..])
.unwrap_or(token);
let candidate = RE
.as_ref()
.and_then(|re| re.captures(urlish))
.and_then(|caps| caps.get(1))
.map(|m| m.as_str())
.unwrap_or(urlish);
let Ok(url) = Url::parse(candidate) else {
return false;
};
matches!(url.scheme(), "http" | "https")
}
fn executable_basename(exe: &str) -> Option<String> {
Path::new(exe)
.file_name()
.and_then(|osstr| osstr.to_str())
.map(str::to_ascii_lowercase)
}
fn is_powershell_executable(exe: &str) -> bool {
matches!(
executable_basename(exe).as_deref(),
Some("powershell") | Some("powershell.exe") | Some("pwsh") | Some("pwsh.exe")
)
}
fn is_browser_executable(name: &str) -> bool {
matches!(
name,
"chrome"
| "chrome.exe"
| "msedge"
| "msedge.exe"
| "firefox"
| "firefox.exe"
| "iexplore"
| "iexplore.exe"
)
}
struct ParsedPowershell {
tokens: Vec<String>,
}
fn parse_powershell_invocation(args: &[String]) -> Option<ParsedPowershell> {
if args.is_empty() {
return None;
}
let mut idx = 0;
while idx < args.len() {
let arg = &args[idx];
let lower = arg.to_ascii_lowercase();
match lower.as_str() {
"-command" | "/command" | "-c" => {
let script = args.get(idx + 1)?;
if idx + 2 != args.len() {
return None;
}
let tokens = shlex_split(script)?;
return Some(ParsedPowershell { tokens });
}
_ if lower.starts_with("-command:") || lower.starts_with("/command:") => {
if idx + 1 != args.len() {
return None;
}
let (_, script) = arg.split_once(':')?;
let tokens = shlex_split(script)?;
return Some(ParsedPowershell { tokens });
}
"-nologo" | "-noprofile" | "-noninteractive" | "-mta" | "-sta" => {
idx += 1;
}
_ if lower.starts_with('-') => {
idx += 1;
}
_ => {
let rest = args[idx..].to_vec();
return Some(ParsedPowershell { tokens: rest });
}
}
}
None
}
#[cfg(test)]
mod tests {
use super::is_dangerous_command_windows;
fn vec_str(items: &[&str]) -> Vec<String> {
items.iter().map(std::string::ToString::to_string).collect()
}
#[test]
fn powershell_start_process_url_is_dangerous() {
assert!(is_dangerous_command_windows(&vec_str(&[
"powershell",
"-NoLogo",
"-Command",
"Start-Process 'https://example.com'"
])));
}
#[test]
fn powershell_start_process_url_with_trailing_semicolon_is_dangerous() {
assert!(is_dangerous_command_windows(&vec_str(&[
"powershell",
"-Command",
"Start-Process('https://example.com');"
])));
}
#[test]
fn powershell_start_process_local_is_not_flagged() {
assert!(!is_dangerous_command_windows(&vec_str(&[
"powershell",
"-Command",
"Start-Process notepad.exe"
])));
}
#[test]
fn cmd_start_with_url_is_dangerous() {
assert!(is_dangerous_command_windows(&vec_str(&[
"cmd",
"/c",
"start",
"https://example.com"
])));
}
#[test]
fn msedge_with_url_is_dangerous() {
assert!(is_dangerous_command_windows(&vec_str(&[
"msedge.exe",
"https://example.com"
])));
}
#[test]
fn explorer_with_directory_is_not_flagged() {
assert!(!is_dangerous_command_windows(&vec_str(&[
"explorer.exe",
"."
])));
}
// Force delete tests for PowerShell
#[test]
fn powershell_remove_item_force_is_dangerous() {
assert!(is_dangerous_command_windows(&vec_str(&[
"powershell",
"-Command",
"Remove-Item test -Force"
])));
}
#[test]
fn powershell_remove_item_recurse_force_is_dangerous() {
assert!(is_dangerous_command_windows(&vec_str(&[
"powershell",
"-Command",
"Remove-Item test -Recurse -Force"
])));
}
#[test]
fn powershell_ri_alias_force_is_dangerous() {
assert!(is_dangerous_command_windows(&vec_str(&[
"pwsh",
"-Command",
"ri test -Force"
])));
}
#[test]
fn powershell_remove_item_without_force_is_not_flagged() {
assert!(!is_dangerous_command_windows(&vec_str(&[
"powershell",
"-Command",
"Remove-Item test"
])));
}
// Force delete tests for CMD
#[test]
fn cmd_del_force_is_dangerous() {
assert!(is_dangerous_command_windows(&vec_str(&[
"cmd", "/c", "del", "/f", "test.txt"
])));
}
#[test]
fn cmd_erase_force_is_dangerous() {
assert!(is_dangerous_command_windows(&vec_str(&[
"cmd", "/c", "erase", "/f", "test.txt"
])));
}
#[test]
fn cmd_del_without_force_is_not_flagged() {
assert!(!is_dangerous_command_windows(&vec_str(&[
"cmd", "/c", "del", "test.txt"
])));
}
#[test]
fn cmd_rd_recursive_is_dangerous() {
assert!(is_dangerous_command_windows(&vec_str(&[
"cmd", "/c", "rd", "/s", "/q", "test"
])));
}
#[test]
fn cmd_rd_without_quiet_is_not_flagged() {
assert!(!is_dangerous_command_windows(&vec_str(&[
"cmd", "/c", "rd", "/s", "test"
])));
}
#[test]
fn cmd_rmdir_recursive_is_dangerous() {
assert!(is_dangerous_command_windows(&vec_str(&[
"cmd", "/c", "rmdir", "/s", "/q", "test"
])));
}
// Test exact scenario from issue #8567
#[test]
fn powershell_remove_item_path_recurse_force_is_dangerous() {
assert!(is_dangerous_command_windows(&vec_str(&[
"powershell",
"-Command",
"Remove-Item -Path 'test' -Recurse -Force"
])));
}
#[test]
fn powershell_remove_item_force_with_semicolon_is_dangerous() {
assert!(is_dangerous_command_windows(&vec_str(&[
"powershell",
"-Command",
"Remove-Item test -Force; Write-Host done"
])));
}
#[test]
fn powershell_remove_item_force_inside_block_is_dangerous() {
assert!(is_dangerous_command_windows(&vec_str(&[
"powershell",
"-Command",
"if ($true) { Remove-Item test -Force}"
])));
}
#[test]
fn powershell_remove_item_force_inside_brackets_is_dangerous() {
assert!(is_dangerous_command_windows(&vec_str(&[
"powershell",
"-Command",
"[void]( Remove-Item test -Force)]"
])));
}
#[test]
fn cmd_del_path_containing_f_is_not_flagged() {
assert!(!is_dangerous_command_windows(&vec_str(&[
"cmd",
"/c",
"del",
"C:/foo/bar.txt"
])));
}
#[test]
fn cmd_rd_path_containing_s_is_not_flagged() {
assert!(!is_dangerous_command_windows(&vec_str(&[
"cmd",
"/c",
"rd",
"C:/source"
])));
}
#[test]
fn cmd_bypass_chained_del_is_dangerous() {
assert!(is_dangerous_command_windows(&vec_str(&[
"cmd", "/c", "echo", "hello", "&", "del", "/f", "file.txt"
])));
}
#[test]
fn powershell_chained_no_space_is_dangerous() {
assert!(is_dangerous_command_windows(&vec_str(&[
"powershell",
"-Command",
"Write-Host hi;Remove-Item -Force C:\\tmp"
])));
}
#[test]
fn powershell_comma_separated_is_dangerous() {
assert!(is_dangerous_command_windows(&vec_str(&[
"powershell",
"-Command",
"del,-Force,C:\\foo"
])));
}
#[test]
fn cmd_echo_del_is_not_dangerous() {
assert!(!is_dangerous_command_windows(&vec_str(&[
"cmd", "/c", "echo", "del", "/f"
])));
}
#[test]
fn cmd_del_single_string_argument_is_dangerous() {
assert!(is_dangerous_command_windows(&vec_str(&[
"cmd",
"/c",
"del /f file.txt"
])));
}
#[test]
fn cmd_del_chained_single_string_argument_is_dangerous() {
assert!(is_dangerous_command_windows(&vec_str(&[
"cmd",
"/c",
"echo hello & del /f file.txt"
])));
}
#[test]
fn cmd_chained_no_space_del_is_dangerous() {
assert!(is_dangerous_command_windows(&vec_str(&[
"cmd",
"/c",
"echo hi&del /f file.txt"
])));
}
#[test]
fn cmd_chained_andand_no_space_del_is_dangerous() {
assert!(is_dangerous_command_windows(&vec_str(&[
"cmd",
"/c",
"echo hi&&del /f file.txt"
])));
}
#[test]
fn cmd_chained_oror_no_space_del_is_dangerous() {
assert!(is_dangerous_command_windows(&vec_str(&[
"cmd",
"/c",
"echo hi||del /f file.txt"
])));
}
#[test]
fn cmd_start_url_single_string_is_dangerous() {
assert!(is_dangerous_command_windows(&vec_str(&[
"cmd",
"/c",
"start https://example.com"
])));
}
#[test]
fn cmd_chained_no_space_rmdir_is_dangerous() {
assert!(is_dangerous_command_windows(&vec_str(&[
"cmd",
"/c",
"echo hi&rmdir /s /q testdir"
])));
}
#[test]
fn cmd_del_force_uppercase_flag_is_dangerous() {
assert!(is_dangerous_command_windows(&vec_str(&[
"cmd", "/c", "DEL", "/F", "file.txt"
])));
}
#[test]
fn cmdexe_r_del_force_is_dangerous() {
assert!(is_dangerous_command_windows(&vec_str(&[
"cmd.exe", "/r", "del", "/f", "file.txt"
])));
}
#[test]
fn cmd_start_quoted_url_single_string_is_dangerous() {
assert!(is_dangerous_command_windows(&vec_str(&[
"cmd",
"/c",
r#"start "https://example.com""#
])));
}
#[test]
fn cmd_start_title_then_url_is_dangerous() {
assert!(is_dangerous_command_windows(&vec_str(&[
"cmd",
"/c",
r#"start "" https://example.com"#
])));
}
#[test]
fn powershell_rm_alias_force_is_dangerous() {
assert!(is_dangerous_command_windows(&vec_str(&[
"powershell",
"-Command",
"rm test -Force"
])));
}
#[test]
fn powershell_benign_force_separate_command_is_not_dangerous() {
assert!(!is_dangerous_command_windows(&vec_str(&[
"powershell",
"-Command",
"Get-ChildItem -Force; Remove-Item test"
])));
}
}

View File

@@ -0,0 +1,623 @@
use base64::Engine;
use base64::engine::general_purpose::STANDARD as BASE64_STANDARD;
use serde::Deserialize;
use std::path::Path;
use std::process::Command;
use std::sync::LazyLock;
const POWERSHELL_PARSER_SCRIPT: &str = include_str!("powershell_parser.ps1");
/// On Windows, we conservatively allow only clearly read-only PowerShell invocations
/// that match a small safelist. Anything else (including direct CMD commands) is unsafe.
pub fn is_safe_command_windows(command: &[String]) -> bool {
if let Some(commands) = try_parse_powershell_command_sequence(command) {
commands
.iter()
.all(|cmd| is_safe_powershell_command(cmd.as_slice()))
} else {
// Only PowerShell invocations are allowed on Windows for now; anything else is unsafe.
false
}
}
/// Returns each command sequence if the invocation starts with a PowerShell binary.
/// For example, the tokens from `pwsh Get-ChildItem | Measure-Object` become two sequences.
fn try_parse_powershell_command_sequence(command: &[String]) -> Option<Vec<Vec<String>>> {
let (exe, rest) = command.split_first()?;
if is_powershell_executable(exe) {
parse_powershell_invocation(exe, rest)
} else {
None
}
}
/// Parses a PowerShell invocation into discrete command vectors, rejecting unsafe patterns.
fn parse_powershell_invocation(executable: &str, args: &[String]) -> Option<Vec<Vec<String>>> {
if args.is_empty() {
// Examples rejected here: "pwsh" and "powershell.exe" with no additional arguments.
return None;
}
let mut idx = 0;
while idx < args.len() {
let arg = &args[idx];
let lower = arg.to_ascii_lowercase();
match lower.as_str() {
"-command" | "/command" | "-c" => {
let script = args.get(idx + 1)?;
if idx + 2 != args.len() {
// Reject if there is more than one token representing the actual command.
// Examples rejected here: "pwsh -Command foo bar" and "powershell -c ls extra".
return None;
}
return parse_powershell_script(executable, script);
}
_ if lower.starts_with("-command:") || lower.starts_with("/command:") => {
if idx + 1 != args.len() {
// Reject if there are more tokens after the command itself.
// Examples rejected here: "pwsh -Command:dir C:\\" and "powershell /Command:dir C:\\" with trailing args.
return None;
}
let script = arg.split_once(':')?.1;
return parse_powershell_script(executable, script);
}
// Benign, no-arg flags we tolerate.
"-nologo" | "-noprofile" | "-noninteractive" | "-mta" | "-sta" => {
idx += 1;
continue;
}
// Explicitly forbidden/opaque or unnecessary for read-only operations.
"-encodedcommand" | "-ec" | "-file" | "/file" | "-windowstyle" | "-executionpolicy"
| "-workingdirectory" => {
// Examples rejected here: "pwsh -EncodedCommand ..." and "powershell -File script.ps1".
return None;
}
// Unknown switch → bail conservatively.
_ if lower.starts_with('-') => {
// Examples rejected here: "pwsh -UnknownFlag" and "powershell -foo bar".
return None;
}
// If we hit non-flag tokens, treat the remainder as a command sequence.
// This happens if powershell is invoked without -Command, e.g.
// ["pwsh", "-NoLogo", "git", "-c", "core.pager=cat", "status"]
_ => {
let script = join_arguments_as_script(&args[idx..]);
return parse_powershell_script(executable, &script);
}
}
}
// Examples rejected here: "pwsh" and "powershell.exe -NoLogo" without a script.
None
}
/// Tokenizes an inline PowerShell script and delegates to the command splitter.
/// Examples of when this is called: pwsh.exe -Command '<script>' or pwsh.exe -Command:<script>
fn parse_powershell_script(executable: &str, script: &str) -> Option<Vec<Vec<String>>> {
if let PowershellParseOutcome::Commands(commands) =
parse_with_powershell_ast(executable, script)
{
Some(commands)
} else {
None
}
}
/// Returns true when the executable name is one of the supported PowerShell binaries.
fn is_powershell_executable(exe: &str) -> bool {
let executable_name = Path::new(exe)
.file_name()
.and_then(|osstr| osstr.to_str())
.unwrap_or(exe)
.to_ascii_lowercase();
matches!(
executable_name.as_str(),
"powershell" | "powershell.exe" | "pwsh" | "pwsh.exe"
)
}
/// Attempts to parse PowerShell using the real PowerShell parser, returning every pipeline element
/// as a flat argv vector when possible. If parsing fails or the AST includes unsupported constructs,
/// we conservatively reject the command instead of trying to split it manually.
fn parse_with_powershell_ast(executable: &str, script: &str) -> PowershellParseOutcome {
let encoded_script = encode_powershell_base64(script);
let encoded_parser_script = encoded_parser_script();
match Command::new(executable)
.args([
"-NoLogo",
"-NoProfile",
"-NonInteractive",
"-EncodedCommand",
encoded_parser_script,
])
.env("CODEX_POWERSHELL_PAYLOAD", &encoded_script)
.output()
{
Ok(output) if output.status.success() => {
if let Ok(result) =
serde_json::from_slice::<PowershellParserOutput>(output.stdout.as_slice())
{
result.into_outcome()
} else {
PowershellParseOutcome::Failed
}
}
_ => PowershellParseOutcome::Failed,
}
}
fn encode_powershell_base64(script: &str) -> String {
let mut utf16 = Vec::with_capacity(script.len() * 2);
for unit in script.encode_utf16() {
utf16.extend_from_slice(&unit.to_le_bytes());
}
BASE64_STANDARD.encode(utf16)
}
fn encoded_parser_script() -> &'static str {
static ENCODED: LazyLock<String> =
LazyLock::new(|| encode_powershell_base64(POWERSHELL_PARSER_SCRIPT));
&ENCODED
}
#[derive(Deserialize)]
#[serde(deny_unknown_fields)]
struct PowershellParserOutput {
status: String,
commands: Option<Vec<Vec<String>>>,
}
impl PowershellParserOutput {
fn into_outcome(self) -> PowershellParseOutcome {
match self.status.as_str() {
"ok" => self
.commands
.filter(|commands| {
!commands.is_empty()
&& commands
.iter()
.all(|cmd| !cmd.is_empty() && cmd.iter().all(|word| !word.is_empty()))
})
.map(PowershellParseOutcome::Commands)
.unwrap_or(PowershellParseOutcome::Unsupported),
"unsupported" => PowershellParseOutcome::Unsupported,
_ => PowershellParseOutcome::Failed,
}
}
}
enum PowershellParseOutcome {
Commands(Vec<Vec<String>>),
Unsupported,
Failed,
}
fn join_arguments_as_script(args: &[String]) -> String {
let mut words = Vec::with_capacity(args.len());
if let Some((first, rest)) = args.split_first() {
words.push(first.clone());
for arg in rest {
words.push(quote_argument(arg));
}
}
words.join(" ")
}
fn quote_argument(arg: &str) -> String {
if arg.is_empty() {
return "''".to_string();
}
if arg.chars().all(|ch| !ch.is_whitespace()) {
return arg.to_string();
}
format!("'{}'", arg.replace('\'', "''"))
}
/// Validates that a parsed PowerShell command stays within our read-only safelist.
/// Everything before this is parsing, and rejecting things that make us feel uncomfortable.
fn is_safe_powershell_command(words: &[String]) -> bool {
if words.is_empty() {
// Examples rejected here: "pwsh -Command ''" and "pwsh -Command \"\"".
return false;
}
// Reject nested unsafe cmdlets inside parentheses or arguments
for w in words.iter() {
let inner = w
.trim_matches(|c| c == '(' || c == ')')
.trim_start_matches('-')
.to_ascii_lowercase();
if matches!(
inner.as_str(),
"set-content"
| "add-content"
| "out-file"
| "new-item"
| "remove-item"
| "move-item"
| "copy-item"
| "rename-item"
| "start-process"
| "stop-process"
) {
// Examples rejected here: "Write-Output (Set-Content foo6.txt 'abc')" and "Get-Content (New-Item bar.txt)".
return false;
}
}
let command = words[0]
.trim_matches(|c| c == '(' || c == ')')
.trim_start_matches('-')
.to_ascii_lowercase();
match command.as_str() {
"echo" | "write-output" | "write-host" => true, // (no redirection allowed)
"dir" | "ls" | "get-childitem" | "gci" => true,
"cat" | "type" | "gc" | "get-content" => true,
"select-string" | "sls" | "findstr" => true,
"measure-object" | "measure" => true,
"get-location" | "gl" | "pwd" => true,
"test-path" | "tp" => true,
"resolve-path" | "rvpa" => true,
"select-object" | "select" => true,
"get-item" => true,
"git" => is_safe_git_command(words),
"rg" => is_safe_ripgrep(words),
// Extra safety: explicitly prohibit common side-effecting cmdlets regardless of args.
"set-content" | "add-content" | "out-file" | "new-item" | "remove-item" | "move-item"
| "copy-item" | "rename-item" | "start-process" | "stop-process" => {
// Examples rejected here: "pwsh -Command 'Set-Content notes.txt data'" and "pwsh -Command 'Remove-Item temp.log'".
false
}
_ => {
// Examples rejected here: "pwsh -Command 'Invoke-WebRequest https://example.com'" and "pwsh -Command 'Start-Service Spooler'".
false
}
}
}
/// Checks that an `rg` invocation avoids options that can spawn arbitrary executables.
fn is_safe_ripgrep(words: &[String]) -> bool {
const UNSAFE_RIPGREP_OPTIONS_WITH_ARGS: &[&str] = &["--pre", "--hostname-bin"];
const UNSAFE_RIPGREP_OPTIONS_WITHOUT_ARGS: &[&str] = &["--search-zip", "-z"];
!words.iter().skip(1).any(|arg| {
let arg_lc = arg.to_ascii_lowercase();
// Examples rejected here: "pwsh -Command 'rg --pre cat pattern'" and "pwsh -Command 'rg --search-zip pattern'".
UNSAFE_RIPGREP_OPTIONS_WITHOUT_ARGS.contains(&arg_lc.as_str())
|| UNSAFE_RIPGREP_OPTIONS_WITH_ARGS
.iter()
.any(|opt| arg_lc == *opt || arg_lc.starts_with(&format!("{opt}=")))
})
}
/// Ensures a Git command sticks to whitelisted read-only subcommands and flags.
fn is_safe_git_command(words: &[String]) -> bool {
const SAFE_SUBCOMMANDS: &[&str] = &["status", "log", "show", "diff", "cat-file"];
let mut iter = words.iter().skip(1);
while let Some(arg) = iter.next() {
let arg_lc = arg.to_ascii_lowercase();
if arg.starts_with('-') {
if arg.eq_ignore_ascii_case("-c") || arg.eq_ignore_ascii_case("--config") {
if iter.next().is_none() {
// Examples rejected here: "pwsh -Command 'git -c'" and "pwsh -Command 'git --config'".
return false;
}
continue;
}
if arg_lc.starts_with("-c=")
|| arg_lc.starts_with("--config=")
|| arg_lc.starts_with("--git-dir=")
|| arg_lc.starts_with("--work-tree=")
{
continue;
}
if arg.eq_ignore_ascii_case("--git-dir") || arg.eq_ignore_ascii_case("--work-tree") {
if iter.next().is_none() {
// Examples rejected here: "pwsh -Command 'git --git-dir'" and "pwsh -Command 'git --work-tree'".
return false;
}
continue;
}
continue;
}
return SAFE_SUBCOMMANDS.contains(&arg_lc.as_str());
}
// Examples rejected here: "pwsh -Command 'git'" and "pwsh -Command 'git status --short | Remove-Item foo'".
false
}
#[cfg(all(test, windows))]
mod tests {
use super::*;
use crate::powershell::try_find_pwsh_executable_blocking;
use std::string::ToString;
/// Converts a slice of string literals into owned `String`s for the tests.
fn vec_str(args: &[&str]) -> Vec<String> {
args.iter().map(ToString::to_string).collect()
}
#[test]
fn recognizes_safe_powershell_wrappers() {
assert!(is_safe_command_windows(&vec_str(&[
"powershell.exe",
"-NoLogo",
"-Command",
"Get-ChildItem -Path .",
])));
assert!(is_safe_command_windows(&vec_str(&[
"powershell.exe",
"-NoProfile",
"-Command",
"git status",
])));
assert!(is_safe_command_windows(&vec_str(&[
"powershell.exe",
"Get-Content",
"Cargo.toml",
])));
// pwsh parity
if let Some(pwsh) = try_find_pwsh_executable_blocking() {
assert!(is_safe_command_windows(&[
pwsh.as_path().to_str().unwrap().into(),
"-NoProfile".to_string(),
"-Command".to_string(),
"Get-ChildItem".to_string(),
]));
}
}
#[test]
fn accepts_full_path_powershell_invocations() {
if !cfg!(windows) {
// Windows only because on Linux path splitting doesn't handle `/` separators properly
return;
}
if let Some(pwsh) = try_find_pwsh_executable_blocking() {
assert!(is_safe_command_windows(&[
pwsh.as_path().to_str().unwrap().into(),
"-NoProfile".to_string(),
"-Command".to_string(),
"Get-ChildItem -Path .".to_string(),
]));
}
assert!(is_safe_command_windows(&vec_str(&[
r"C:\Windows\System32\WindowsPowerShell\v1.0\powershell.exe",
"-Command",
"Get-Content Cargo.toml",
])));
}
#[test]
fn allows_read_only_pipelines_and_git_usage() {
let Some(pwsh) = try_find_pwsh_executable_blocking() else {
return;
};
let pwsh: String = pwsh.as_path().to_str().unwrap().into();
assert!(is_safe_command_windows(&[
pwsh.clone(),
"-NoLogo".to_string(),
"-NoProfile".to_string(),
"-Command".to_string(),
"rg --files-with-matches foo | Measure-Object | Select-Object -ExpandProperty Count"
.to_string()
]));
assert!(is_safe_command_windows(&[
pwsh.clone(),
"-NoLogo".to_string(),
"-NoProfile".to_string(),
"-Command".to_string(),
"Get-Content foo.rs | Select-Object -Skip 200".to_string()
]));
assert!(is_safe_command_windows(&[
pwsh.clone(),
"-NoLogo".to_string(),
"-NoProfile".to_string(),
"-Command".to_string(),
"git -c core.pager=cat show HEAD:foo.rs".to_string()
]));
assert!(is_safe_command_windows(&[
pwsh.clone(),
"-Command".to_string(),
"-git cat-file -p HEAD:foo.rs".to_string()
]));
assert!(is_safe_command_windows(&[
pwsh.clone(),
"-Command".to_string(),
"(Get-Content foo.rs -Raw)".to_string()
]));
assert!(is_safe_command_windows(&[
pwsh,
"-Command".to_string(),
"Get-Item foo.rs | Select-Object Length".to_string()
]));
}
#[test]
fn rejects_powershell_commands_with_side_effects() {
assert!(!is_safe_command_windows(&vec_str(&[
"powershell.exe",
"-NoLogo",
"-Command",
"Remove-Item foo.txt",
])));
assert!(!is_safe_command_windows(&vec_str(&[
"powershell.exe",
"-NoProfile",
"-Command",
"rg --pre cat",
])));
assert!(!is_safe_command_windows(&vec_str(&[
"powershell.exe",
"-Command",
"Set-Content foo.txt 'hello'",
])));
// Redirections are blocked
assert!(!is_safe_command_windows(&vec_str(&[
"powershell.exe",
"-Command",
"echo hi > out.txt",
])));
assert!(!is_safe_command_windows(&vec_str(&[
"powershell.exe",
"-Command",
"Get-Content x | Out-File y",
])));
assert!(!is_safe_command_windows(&vec_str(&[
"powershell.exe",
"-Command",
"Write-Output foo 2> err.txt",
])));
// Call operator is blocked
assert!(!is_safe_command_windows(&vec_str(&[
"powershell.exe",
"-Command",
"& Remove-Item foo",
])));
// Chained safe + unsafe must fail
assert!(!is_safe_command_windows(&vec_str(&[
"powershell.exe",
"-Command",
"Get-ChildItem; Remove-Item foo",
])));
// Nested unsafe cmdlet inside safe command must fail
assert!(!is_safe_command_windows(&vec_str(&[
"powershell.exe",
"-Command",
"Write-Output (Set-Content foo6.txt 'abc')",
])));
// Additional nested unsafe cmdlet examples must fail
assert!(!is_safe_command_windows(&vec_str(&[
"powershell.exe",
"-Command",
"Write-Host (Remove-Item foo.txt)",
])));
assert!(!is_safe_command_windows(&vec_str(&[
"powershell.exe",
"-Command",
"Get-Content (New-Item bar.txt)",
])));
// Unsafe @ expansion.
assert!(!is_safe_command_windows(&vec_str(&[
"powershell.exe",
"-Command",
"ls @(calc.exe)"
])));
// Unsupported constructs that the AST parser refuses (no fallback to manual splitting).
assert!(!is_safe_command_windows(&vec_str(&[
"powershell.exe",
"-Command",
"ls && pwd"
])));
// Sub-expressions are rejected even if they contain otherwise safe commands.
assert!(!is_safe_command_windows(&vec_str(&[
"powershell.exe",
"-Command",
"Write-Output $(Get-Content foo)"
])));
// Empty words from the parser (e.g. '') are rejected.
assert!(!is_safe_command_windows(&vec_str(&[
"powershell.exe",
"-Command",
"''"
])));
}
#[test]
fn accepts_constant_expression_arguments() {
assert!(is_safe_command_windows(&vec_str(&[
"powershell.exe",
"-Command",
"Get-Content 'foo bar'"
])));
assert!(is_safe_command_windows(&vec_str(&[
"powershell.exe",
"-Command",
"Get-Content \"foo bar\""
])));
}
#[test]
fn rejects_dynamic_arguments() {
assert!(!is_safe_command_windows(&vec_str(&[
"powershell.exe",
"-Command",
"Get-Content $foo"
])));
assert!(!is_safe_command_windows(&vec_str(&[
"powershell.exe",
"-Command",
"Write-Output \"foo $bar\""
])));
}
#[test]
fn uses_invoked_powershell_variant_for_parsing() {
if !cfg!(windows) {
return;
}
let chain = "pwd && ls";
assert!(
!is_safe_command_windows(&vec_str(&[
"powershell.exe",
"-NoProfile",
"-Command",
chain,
])),
"`{chain}` is not recognized by powershell.exe"
);
if let Some(pwsh) = try_find_pwsh_executable_blocking() {
assert!(
is_safe_command_windows(&[
pwsh.as_path().to_str().unwrap().into(),
"-NoProfile".to_string(),
"-Command".to_string(),
chain.to_string(),
]),
"`{chain}` should be considered safe to pwsh.exe"
);
}
}
}