fix: share ripgrep safety helper

This commit is contained in:
Adrian Bravo
2026-05-06 15:03:09 -07:00
parent b4a65625ab
commit 7cfec53421
4 changed files with 130 additions and 71 deletions

View File

@@ -4,6 +4,8 @@ use crate::command_safety::is_dangerous_command::executable_name_lookup_key;
// may appear before it (e.g., `-C`, `-c`, `--git-dir`).
// Implemented in `is_dangerous_command` and shared here.
use crate::command_safety::is_dangerous_command::find_git_subcommand;
use crate::command_safety::ripgrep::RipgrepArgCase;
use crate::command_safety::ripgrep::is_safe_ripgrep_command;
use crate::command_safety::windows_safe_commands::is_safe_command_windows;
#[cfg(windows)]
use crate::command_safety::windows_safe_commands::is_safe_powershell_words as is_safe_powershell_words_windows;
@@ -127,7 +129,7 @@ fn is_safe_to_call_with_exec(command: &[String]) -> bool {
}
// Ripgrep
Some("rg") => is_safe_ripgrep_command(command),
Some("rg") => is_safe_ripgrep_command(command, RipgrepArgCase::Sensitive),
// Git
Some("git") => is_safe_git_command(command),
@@ -148,60 +150,6 @@ fn is_safe_to_call_with_exec(command: &[String]) -> bool {
}
}
fn is_safe_ripgrep_command(command: &[String]) -> bool {
!command
.iter()
.skip(1)
.map(String::as_str)
.any(is_unsafe_ripgrep_arg)
}
fn is_unsafe_ripgrep_arg(arg: &str) -> bool {
match arg {
// Takes an arbitrary command that is executed for each match.
"--pre"
// Takes a command that can be used to obtain the local hostname.
| "--hostname-bin"
// Calls out to other decompression tools, so do not auto-approve
// out of an abundance of caution.
| "--search-zip"
| "-z" => true,
_ => {
arg.starts_with("--pre=")
|| arg.starts_with("--hostname-bin=")
|| arg.starts_with("--search-zip=")
|| ripgrep_short_options_contain_search_zip(arg)
}
}
}
fn ripgrep_short_options_contain_search_zip(arg: &str) -> bool {
let Some(short_options) = arg.strip_prefix('-') else {
return false;
};
if short_options.is_empty() || short_options.starts_with('-') {
return false;
}
for option in short_options.chars() {
if option == 'z' {
return true;
}
if ripgrep_short_option_takes_value(option) {
return false;
}
}
false
}
fn ripgrep_short_option_takes_value(option: char) -> bool {
matches!(
option,
'A' | 'B' | 'C' | 'E' | 'M' | 'T' | 'd' | 'e' | 'f' | 'g' | 'j' | 'm' | 'r' | 't'
)
}
pub(crate) fn is_safe_git_command(command: &[String]) -> bool {
let Some((subcommand_idx, subcommand)) =
find_git_subcommand(command, &["status", "log", "diff", "show", "branch"])

View File

@@ -1,4 +1,5 @@
mod powershell_parser;
mod ripgrep;
pub mod is_dangerous_command;
pub mod is_safe_command;

View File

@@ -0,0 +1,123 @@
use std::borrow::Cow;
#[derive(Clone, Copy, Eq, PartialEq)]
pub(crate) enum RipgrepArgCase {
Sensitive,
AsciiInsensitive,
}
pub(crate) fn is_safe_ripgrep_command(command: &[String], arg_case: RipgrepArgCase) -> bool {
!command
.iter()
.skip(1)
.map(String::as_str)
.any(|arg| is_unsafe_ripgrep_arg(arg, arg_case))
}
fn is_unsafe_ripgrep_arg(arg: &str, arg_case: RipgrepArgCase) -> bool {
let normalized = normalized_long_arg(arg, arg_case);
match normalized.as_ref() {
// Takes an arbitrary command that is executed for each match.
"--pre"
// Takes a command that can be used to obtain the local hostname.
| "--hostname-bin"
// Calls out to other decompression tools, so do not auto-approve
// out of an abundance of caution.
| "--search-zip" => true,
_ => {
normalized.starts_with("--pre=")
|| normalized.starts_with("--hostname-bin=")
|| normalized.starts_with("--search-zip=")
|| ripgrep_short_options_contain_search_zip(arg, arg_case)
}
}
}
fn normalized_long_arg(arg: &str, arg_case: RipgrepArgCase) -> Cow<'_, str> {
if !arg.starts_with("--") {
return Cow::Borrowed(arg);
}
match arg_case {
RipgrepArgCase::Sensitive => Cow::Borrowed(arg),
RipgrepArgCase::AsciiInsensitive => Cow::Owned(arg.to_ascii_lowercase()),
}
}
fn ripgrep_short_options_contain_search_zip(arg: &str, arg_case: RipgrepArgCase) -> bool {
let Some(short_options) = arg.strip_prefix('-') else {
return false;
};
if short_options.is_empty() || short_options.starts_with('-') {
return false;
}
for option in short_options.chars() {
if option == 'z' || (arg_case == RipgrepArgCase::AsciiInsensitive && option == 'Z') {
return true;
}
if ripgrep_short_option_takes_value(option) {
return false;
}
}
false
}
fn ripgrep_short_option_takes_value(option: char) -> bool {
matches!(
option,
'A' | 'B' | 'C' | 'E' | 'M' | 'T' | 'd' | 'e' | 'f' | 'g' | 'j' | 'm' | 'r' | 't'
)
}
#[cfg(test)]
mod tests {
use super::*;
fn vec_str(args: &[&str]) -> Vec<String> {
args.iter().map(ToString::to_string).collect()
}
#[test]
fn rejects_ripgrep_options_that_can_spawn_processes() {
for args in [
vec_str(&["rg", "--pre", "pwned", "files"]),
vec_str(&["rg", "--pre=pwned", "files"]),
vec_str(&["rg", "--hostname-bin", "pwned", "files"]),
vec_str(&["rg", "--hostname-bin=pwned", "files"]),
vec_str(&["rg", "--search-zip", "files"]),
vec_str(&["rg", "--search-zip=true", "files"]),
vec_str(&["rg", "-z", "files"]),
vec_str(&["rg", "-zn", "files"]),
vec_str(&["rg", "-nz", "files"]),
] {
assert!(
!is_safe_ripgrep_command(&args, RipgrepArgCase::Sensitive),
"expected {args:?} to be unsafe",
);
}
}
#[test]
fn case_insensitive_matching_preserves_short_option_value_shape() {
for args in [
vec_str(&["rg", "--PRE", "pwned", "files"]),
vec_str(&["rg", "--HOSTNAME-BIN=pwned", "files"]),
vec_str(&["rg", "--SEARCH-ZIP", "files"]),
vec_str(&["rg", "-Z", "files"]),
vec_str(&["rg", "-Fz", "needle", "."]),
] {
assert!(
!is_safe_ripgrep_command(&args, RipgrepArgCase::AsciiInsensitive),
"expected {args:?} to be unsafe with case-insensitive matching",
);
}
let args = vec_str(&["rg", "-fz", "needle", "."]);
assert!(
is_safe_ripgrep_command(&args, RipgrepArgCase::AsciiInsensitive),
"expected lowercase -f to consume z as its pattern-file value",
);
}
}

View File

@@ -1,6 +1,8 @@
use crate::command_safety::is_safe_command::is_safe_git_command;
use crate::command_safety::powershell_parser::PowershellParseOutcome;
use crate::command_safety::powershell_parser::parse_with_powershell_ast;
use crate::command_safety::ripgrep::RipgrepArgCase;
use crate::command_safety::ripgrep::is_safe_ripgrep_command;
use std::path::Path;
/// On Windows, we conservatively allow only clearly read-only PowerShell invocations
@@ -190,7 +192,7 @@ pub(crate) fn is_safe_powershell_words(words: &[String]) -> bool {
"git" => is_safe_git_command(words),
"rg" => is_safe_ripgrep(words),
"rg" => is_safe_ripgrep_command(words, RipgrepArgCase::AsciiInsensitive),
// Extra safety: explicitly prohibit common side-effecting cmdlets regardless of args.
"set-content" | "add-content" | "out-file" | "new-item" | "remove-item" | "move-item"
@@ -206,21 +208,6 @@ pub(crate) fn is_safe_powershell_words(words: &[String]) -> bool {
}
}
/// Checks that an `rg` invocation avoids options that can spawn arbitrary executables.
fn is_safe_ripgrep(words: &[String]) -> bool {
const UNSAFE_RIPGREP_OPTIONS_WITH_ARGS: &[&str] = &["--pre", "--hostname-bin"];
const UNSAFE_RIPGREP_OPTIONS_WITHOUT_ARGS: &[&str] = &["--search-zip", "-z"];
!words.iter().skip(1).any(|arg| {
let arg_lc = arg.to_ascii_lowercase();
// Examples rejected here: "pwsh -Command 'rg --pre cat pattern'" and "pwsh -Command 'rg --search-zip pattern'".
UNSAFE_RIPGREP_OPTIONS_WITHOUT_ARGS.contains(&arg_lc.as_str())
|| UNSAFE_RIPGREP_OPTIONS_WITH_ARGS
.iter()
.any(|opt| arg_lc == *opt || arg_lc.starts_with(&format!("{opt}=")))
})
}
#[cfg(all(test, windows))]
mod tests {
use super::*;