Fix escaped shell args in safety checks

This commit is contained in:
Adrian Bravo
2026-05-06 12:56:14 -07:00
parent e3bf0cfc63
commit b4a65625ab
2 changed files with 208 additions and 29 deletions

View File

@@ -1,3 +1,4 @@
use std::borrow::Cow;
use std::path::PathBuf;
use tree_sitter::Node;
@@ -115,8 +116,55 @@ pub fn extract_bash_command(command: &[String]) -> Option<(&str, &str)> {
pub fn parse_shell_lc_plain_commands(command: &[String]) -> Option<Vec<Vec<String>>> {
let (_, script) = extract_bash_command(command)?;
let tree = try_parse_shell(script)?;
try_parse_word_only_commands_sequence(&tree, script)
let script = strip_line_continuations(script);
let tree = try_parse_shell(&script)?;
try_parse_word_only_commands_sequence(&tree, &script)
}
fn strip_line_continuations(script: &str) -> Cow<'_, str> {
if !script.contains("\\\n") {
return Cow::Borrowed(script);
}
let mut stripped = String::with_capacity(script.len());
let mut chars = script.chars().peekable();
let mut escaped = false;
let mut in_double_quote = false;
let mut in_single_quote = false;
let mut stripped_any = false;
while let Some(ch) = chars.next() {
if escaped {
stripped.push(ch);
escaped = false;
} else if ch == '\\' && !in_single_quote {
if chars.peek() == Some(&'\n') {
chars.next();
stripped_any = true;
continue;
}
stripped.push(ch);
escaped = true;
} else {
match ch {
'\'' if !in_double_quote => {
in_single_quote = !in_single_quote;
}
'"' if !in_single_quote => {
in_double_quote = !in_double_quote;
}
_ => {}
}
stripped.push(ch);
}
}
if stripped_any {
Cow::Owned(stripped)
} else {
Cow::Borrowed(script)
}
}
/// Returns the parsed argv for a single shell command in a here-doc style
@@ -152,10 +200,10 @@ fn parse_plain_command_from_node(cmd: tree_sitter::Node, src: &str) -> Option<Ve
if word_node.kind() != "word" {
return None;
}
words.push(word_node.utf8_text(src.as_bytes()).ok()?.to_owned());
words.push(parse_word_or_number(word_node, src)?);
}
"word" | "number" => {
words.push(child.utf8_text(src.as_bytes()).ok()?.to_owned());
words.push(parse_word_or_number(child, src)?);
}
"string" => {
let parsed = parse_double_quoted_string(child, src)?;
@@ -172,8 +220,7 @@ fn parse_plain_command_from_node(cmd: tree_sitter::Node, src: &str) -> Option<Ve
for part in child.named_children(&mut concat_cursor) {
match part.kind() {
"word" | "number" => {
concatenated
.push_str(part.utf8_text(src.as_bytes()).ok()?.to_owned().as_str());
concatenated.push_str(&parse_word_or_number(part, src)?);
}
"string" => {
let parsed = parse_double_quoted_string(part, src)?;
@@ -213,13 +260,13 @@ fn parse_heredoc_command_words(cmd: Node<'_>, src: &str) -> Option<Vec<String>>
{
return None;
}
words.push(word_node.utf8_text(src.as_bytes()).ok()?.to_owned());
words.push(parse_word_or_number(word_node, src)?);
}
"word" | "number" => {
if !is_literal_word_or_number(child) {
return None;
}
words.push(child.utf8_text(src.as_bytes()).ok()?.to_owned());
words.push(parse_word_or_number(child, src)?);
}
// Allow heredoc constructs that attach stdin to a single command
// without changing argv matching semantics for the executable
@@ -253,6 +300,35 @@ fn is_allowed_heredoc_attachment_kind(kind: &str) -> bool {
)
}
fn parse_word_or_number(node: Node<'_>, src: &str) -> Option<String> {
if !matches!(node.kind(), "word" | "number") {
return None;
}
let raw = node.utf8_text(src.as_bytes()).ok()?;
unescape_unquoted_word(raw)
}
fn unescape_unquoted_word(raw: &str) -> Option<String> {
let mut unescaped = String::with_capacity(raw.len());
let mut chars = raw.chars();
while let Some(ch) = chars.next() {
if ch != '\\' {
unescaped.push(ch);
continue;
}
match chars.next() {
Some('\n') => {}
Some(escaped) => unescaped.push(escaped),
None => return None,
}
}
Some(unescaped)
}
fn find_single_command_node(root: Node<'_>) -> Option<Node<'_>> {
let mut stack = vec![root];
let mut single_command = None;
@@ -360,6 +436,54 @@ mod tests {
);
}
#[test]
fn unescapes_backslashes_in_unquoted_words() {
assert_eq!(
parse_seq(r"rg --pre\=./pre.sh pattern").unwrap(),
vec![vec![
"rg".to_string(),
"--pre=./pre.sh".to_string(),
"pattern".to_string(),
]]
);
assert_eq!(
parse_seq(r"rg --\pre=./pre.sh pattern").unwrap(),
vec![vec![
"rg".to_string(),
"--pre=./pre.sh".to_string(),
"pattern".to_string(),
]]
);
}
#[test]
fn shell_lc_parser_removes_line_continuations_outside_single_quotes() {
assert_eq!(
parse_shell_lc_plain_commands(&[
"bash".to_string(),
"-lc".to_string(),
"echo foo\\\nbar".to_string(),
]),
Some(vec![vec!["echo".to_string(), "foobar".to_string()]])
);
assert_eq!(
parse_shell_lc_plain_commands(&[
"bash".to_string(),
"-lc".to_string(),
"echo \"foo\\\nbar\"".to_string(),
]),
Some(vec![vec!["echo".to_string(), "foobar".to_string()]])
);
assert_eq!(
parse_shell_lc_plain_commands(&[
"bash".to_string(),
"-lc".to_string(),
"echo 'foo\\\nbar'".to_string(),
]),
Some(vec![vec!["echo".to_string(), "foo\\\nbar".to_string()]])
);
}
#[test]
fn accepts_double_quoted_strings_with_newlines() {
let cmds = parse_seq("git commit -m \"line1\nline2\"").unwrap();

View File

@@ -127,27 +127,7 @@ fn is_safe_to_call_with_exec(command: &[String]) -> bool {
}
// Ripgrep
Some("rg") => {
const UNSAFE_RIPGREP_OPTIONS_WITH_ARGS: &[&str] = &[
// Takes an arbitrary command that is executed for each match.
"--pre",
// Takes a command that can be used to obtain the local hostname.
"--hostname-bin",
];
const UNSAFE_RIPGREP_OPTIONS_WITHOUT_ARGS: &[&str] = &[
// Calls out to other decompression tools, so do not auto-approve
// out of an abundance of caution.
"--search-zip",
"-z",
];
!command.iter().any(|arg| {
UNSAFE_RIPGREP_OPTIONS_WITHOUT_ARGS.contains(&arg.as_str())
|| UNSAFE_RIPGREP_OPTIONS_WITH_ARGS
.iter()
.any(|&opt| arg == opt || arg.starts_with(&format!("{opt}=")))
})
}
Some("rg") => is_safe_ripgrep_command(command),
// Git
Some("git") => is_safe_git_command(command),
@@ -168,6 +148,60 @@ fn is_safe_to_call_with_exec(command: &[String]) -> bool {
}
}
fn is_safe_ripgrep_command(command: &[String]) -> bool {
!command
.iter()
.skip(1)
.map(String::as_str)
.any(is_unsafe_ripgrep_arg)
}
fn is_unsafe_ripgrep_arg(arg: &str) -> bool {
match arg {
// Takes an arbitrary command that is executed for each match.
"--pre"
// Takes a command that can be used to obtain the local hostname.
| "--hostname-bin"
// Calls out to other decompression tools, so do not auto-approve
// out of an abundance of caution.
| "--search-zip"
| "-z" => true,
_ => {
arg.starts_with("--pre=")
|| arg.starts_with("--hostname-bin=")
|| arg.starts_with("--search-zip=")
|| ripgrep_short_options_contain_search_zip(arg)
}
}
}
fn ripgrep_short_options_contain_search_zip(arg: &str) -> bool {
let Some(short_options) = arg.strip_prefix('-') else {
return false;
};
if short_options.is_empty() || short_options.starts_with('-') {
return false;
}
for option in short_options.chars() {
if option == 'z' {
return true;
}
if ripgrep_short_option_takes_value(option) {
return false;
}
}
false
}
fn ripgrep_short_option_takes_value(option: char) -> bool {
matches!(
option,
'A' | 'B' | 'C' | 'E' | 'M' | 'T' | 'd' | 'e' | 'f' | 'g' | 'j' | 'm' | 'r' | 't'
)
}
pub(crate) fn is_safe_git_command(command: &[String]) -> bool {
let Some((subcommand_idx, subcommand)) =
find_git_subcommand(command, &["status", "log", "diff", "show", "branch"])
@@ -588,7 +622,10 @@ mod tests {
// Unsafe flags that do not take an argument (present verbatim).
for args in [
vec_str(&["rg", "--search-zip", "files"]),
vec_str(&["rg", "--search-zip=true", "files"]),
vec_str(&["rg", "-z", "files"]),
vec_str(&["rg", "-zn", "files"]),
vec_str(&["rg", "-nz", "files"]),
] {
assert!(
!is_safe_to_call_with_exec(&args),
@@ -610,6 +647,24 @@ mod tests {
}
}
#[test]
fn bash_lc_escaped_ripgrep_options_are_unsafe() {
for script in [
r"rg --pre\=./pre.sh files",
r"rg --\pre=./pre.sh files",
r"rg --hostname\-bin=hostname files",
r"rg -\z files",
r"rg -\zn needle .",
"rg --pr\\\ne=./pre.sh files",
"rg \"--pr\\\ne=./pre.sh\" files",
] {
assert!(
!is_known_safe_command(&vec_str(&["bash", "-lc", script])),
"expected {script:?} to require approval after shell unescaping",
);
}
}
#[test]
fn windows_powershell_full_path_is_safe() {
if !cfg!(windows) {