From b4a65625aba686a50c0e9996034a7cc48bb855c6 Mon Sep 17 00:00:00 2001 From: Adrian Bravo Date: Wed, 6 May 2026 12:56:14 -0700 Subject: [PATCH] Fix escaped shell args in safety checks --- codex-rs/shell-command/src/bash.rs | 140 +++++++++++++++++- .../src/command_safety/is_safe_command.rs | 97 +++++++++--- 2 files changed, 208 insertions(+), 29 deletions(-) diff --git a/codex-rs/shell-command/src/bash.rs b/codex-rs/shell-command/src/bash.rs index 60ee5c420c..60b63f0cbf 100644 --- a/codex-rs/shell-command/src/bash.rs +++ b/codex-rs/shell-command/src/bash.rs @@ -1,3 +1,4 @@ +use std::borrow::Cow; use std::path::PathBuf; use tree_sitter::Node; @@ -115,8 +116,55 @@ pub fn extract_bash_command(command: &[String]) -> Option<(&str, &str)> { pub fn parse_shell_lc_plain_commands(command: &[String]) -> Option>> { let (_, script) = extract_bash_command(command)?; - let tree = try_parse_shell(script)?; - try_parse_word_only_commands_sequence(&tree, script) + let script = strip_line_continuations(script); + let tree = try_parse_shell(&script)?; + try_parse_word_only_commands_sequence(&tree, &script) +} + +fn strip_line_continuations(script: &str) -> Cow<'_, str> { + if !script.contains("\\\n") { + return Cow::Borrowed(script); + } + + let mut stripped = String::with_capacity(script.len()); + let mut chars = script.chars().peekable(); + let mut escaped = false; + let mut in_double_quote = false; + let mut in_single_quote = false; + let mut stripped_any = false; + + while let Some(ch) = chars.next() { + if escaped { + stripped.push(ch); + escaped = false; + } else if ch == '\\' && !in_single_quote { + if chars.peek() == Some(&'\n') { + chars.next(); + stripped_any = true; + continue; + } + + stripped.push(ch); + escaped = true; + } else { + match ch { + '\'' if !in_double_quote => { + in_single_quote = !in_single_quote; + } + '"' if !in_single_quote => { + in_double_quote = !in_double_quote; + } + _ => {} + } + stripped.push(ch); + } + } + + if stripped_any { + Cow::Owned(stripped) + } else { + Cow::Borrowed(script) + } } /// Returns the parsed argv for a single shell command in a here-doc style @@ -152,10 +200,10 @@ fn parse_plain_command_from_node(cmd: tree_sitter::Node, src: &str) -> Option { - words.push(child.utf8_text(src.as_bytes()).ok()?.to_owned()); + words.push(parse_word_or_number(child, src)?); } "string" => { let parsed = parse_double_quoted_string(child, src)?; @@ -172,8 +220,7 @@ fn parse_plain_command_from_node(cmd: tree_sitter::Node, src: &str) -> Option { - concatenated - .push_str(part.utf8_text(src.as_bytes()).ok()?.to_owned().as_str()); + concatenated.push_str(&parse_word_or_number(part, src)?); } "string" => { let parsed = parse_double_quoted_string(part, src)?; @@ -213,13 +260,13 @@ fn parse_heredoc_command_words(cmd: Node<'_>, src: &str) -> Option> { return None; } - words.push(word_node.utf8_text(src.as_bytes()).ok()?.to_owned()); + words.push(parse_word_or_number(word_node, src)?); } "word" | "number" => { if !is_literal_word_or_number(child) { return None; } - words.push(child.utf8_text(src.as_bytes()).ok()?.to_owned()); + words.push(parse_word_or_number(child, src)?); } // Allow heredoc constructs that attach stdin to a single command // without changing argv matching semantics for the executable @@ -253,6 +300,35 @@ fn is_allowed_heredoc_attachment_kind(kind: &str) -> bool { ) } +fn parse_word_or_number(node: Node<'_>, src: &str) -> Option { + if !matches!(node.kind(), "word" | "number") { + return None; + } + + let raw = node.utf8_text(src.as_bytes()).ok()?; + unescape_unquoted_word(raw) +} + +fn unescape_unquoted_word(raw: &str) -> Option { + let mut unescaped = String::with_capacity(raw.len()); + let mut chars = raw.chars(); + + while let Some(ch) = chars.next() { + if ch != '\\' { + unescaped.push(ch); + continue; + } + + match chars.next() { + Some('\n') => {} + Some(escaped) => unescaped.push(escaped), + None => return None, + } + } + + Some(unescaped) +} + fn find_single_command_node(root: Node<'_>) -> Option> { let mut stack = vec![root]; let mut single_command = None; @@ -360,6 +436,54 @@ mod tests { ); } + #[test] + fn unescapes_backslashes_in_unquoted_words() { + assert_eq!( + parse_seq(r"rg --pre\=./pre.sh pattern").unwrap(), + vec![vec![ + "rg".to_string(), + "--pre=./pre.sh".to_string(), + "pattern".to_string(), + ]] + ); + assert_eq!( + parse_seq(r"rg --\pre=./pre.sh pattern").unwrap(), + vec![vec![ + "rg".to_string(), + "--pre=./pre.sh".to_string(), + "pattern".to_string(), + ]] + ); + } + + #[test] + fn shell_lc_parser_removes_line_continuations_outside_single_quotes() { + assert_eq!( + parse_shell_lc_plain_commands(&[ + "bash".to_string(), + "-lc".to_string(), + "echo foo\\\nbar".to_string(), + ]), + Some(vec![vec!["echo".to_string(), "foobar".to_string()]]) + ); + assert_eq!( + parse_shell_lc_plain_commands(&[ + "bash".to_string(), + "-lc".to_string(), + "echo \"foo\\\nbar\"".to_string(), + ]), + Some(vec![vec!["echo".to_string(), "foobar".to_string()]]) + ); + assert_eq!( + parse_shell_lc_plain_commands(&[ + "bash".to_string(), + "-lc".to_string(), + "echo 'foo\\\nbar'".to_string(), + ]), + Some(vec![vec!["echo".to_string(), "foo\\\nbar".to_string()]]) + ); + } + #[test] fn accepts_double_quoted_strings_with_newlines() { let cmds = parse_seq("git commit -m \"line1\nline2\"").unwrap(); diff --git a/codex-rs/shell-command/src/command_safety/is_safe_command.rs b/codex-rs/shell-command/src/command_safety/is_safe_command.rs index b35144a12b..2c098249ef 100644 --- a/codex-rs/shell-command/src/command_safety/is_safe_command.rs +++ b/codex-rs/shell-command/src/command_safety/is_safe_command.rs @@ -127,27 +127,7 @@ fn is_safe_to_call_with_exec(command: &[String]) -> bool { } // Ripgrep - Some("rg") => { - const UNSAFE_RIPGREP_OPTIONS_WITH_ARGS: &[&str] = &[ - // Takes an arbitrary command that is executed for each match. - "--pre", - // Takes a command that can be used to obtain the local hostname. - "--hostname-bin", - ]; - const UNSAFE_RIPGREP_OPTIONS_WITHOUT_ARGS: &[&str] = &[ - // Calls out to other decompression tools, so do not auto-approve - // out of an abundance of caution. - "--search-zip", - "-z", - ]; - - !command.iter().any(|arg| { - UNSAFE_RIPGREP_OPTIONS_WITHOUT_ARGS.contains(&arg.as_str()) - || UNSAFE_RIPGREP_OPTIONS_WITH_ARGS - .iter() - .any(|&opt| arg == opt || arg.starts_with(&format!("{opt}="))) - }) - } + Some("rg") => is_safe_ripgrep_command(command), // Git Some("git") => is_safe_git_command(command), @@ -168,6 +148,60 @@ fn is_safe_to_call_with_exec(command: &[String]) -> bool { } } +fn is_safe_ripgrep_command(command: &[String]) -> bool { + !command + .iter() + .skip(1) + .map(String::as_str) + .any(is_unsafe_ripgrep_arg) +} + +fn is_unsafe_ripgrep_arg(arg: &str) -> bool { + match arg { + // Takes an arbitrary command that is executed for each match. + "--pre" + // Takes a command that can be used to obtain the local hostname. + | "--hostname-bin" + // Calls out to other decompression tools, so do not auto-approve + // out of an abundance of caution. + | "--search-zip" + | "-z" => true, + _ => { + arg.starts_with("--pre=") + || arg.starts_with("--hostname-bin=") + || arg.starts_with("--search-zip=") + || ripgrep_short_options_contain_search_zip(arg) + } + } +} + +fn ripgrep_short_options_contain_search_zip(arg: &str) -> bool { + let Some(short_options) = arg.strip_prefix('-') else { + return false; + }; + if short_options.is_empty() || short_options.starts_with('-') { + return false; + } + + for option in short_options.chars() { + if option == 'z' { + return true; + } + if ripgrep_short_option_takes_value(option) { + return false; + } + } + + false +} + +fn ripgrep_short_option_takes_value(option: char) -> bool { + matches!( + option, + 'A' | 'B' | 'C' | 'E' | 'M' | 'T' | 'd' | 'e' | 'f' | 'g' | 'j' | 'm' | 'r' | 't' + ) +} + pub(crate) fn is_safe_git_command(command: &[String]) -> bool { let Some((subcommand_idx, subcommand)) = find_git_subcommand(command, &["status", "log", "diff", "show", "branch"]) @@ -588,7 +622,10 @@ mod tests { // Unsafe flags that do not take an argument (present verbatim). for args in [ vec_str(&["rg", "--search-zip", "files"]), + vec_str(&["rg", "--search-zip=true", "files"]), vec_str(&["rg", "-z", "files"]), + vec_str(&["rg", "-zn", "files"]), + vec_str(&["rg", "-nz", "files"]), ] { assert!( !is_safe_to_call_with_exec(&args), @@ -610,6 +647,24 @@ mod tests { } } + #[test] + fn bash_lc_escaped_ripgrep_options_are_unsafe() { + for script in [ + r"rg --pre\=./pre.sh files", + r"rg --\pre=./pre.sh files", + r"rg --hostname\-bin=hostname files", + r"rg -\z files", + r"rg -\zn needle .", + "rg --pr\\\ne=./pre.sh files", + "rg \"--pr\\\ne=./pre.sh\" files", + ] { + assert!( + !is_known_safe_command(&vec_str(&["bash", "-lc", script])), + "expected {script:?} to require approval after shell unescaping", + ); + } + } + #[test] fn windows_powershell_full_path_is_safe() { if !cfg!(windows) {