diff --git a/codex-rs/shell-command/src/command_safety/is_safe_command.rs b/codex-rs/shell-command/src/command_safety/is_safe_command.rs index 2c098249ef..cd0c84baa9 100644 --- a/codex-rs/shell-command/src/command_safety/is_safe_command.rs +++ b/codex-rs/shell-command/src/command_safety/is_safe_command.rs @@ -4,6 +4,8 @@ use crate::command_safety::is_dangerous_command::executable_name_lookup_key; // may appear before it (e.g., `-C`, `-c`, `--git-dir`). // Implemented in `is_dangerous_command` and shared here. use crate::command_safety::is_dangerous_command::find_git_subcommand; +use crate::command_safety::ripgrep::RipgrepArgCase; +use crate::command_safety::ripgrep::is_safe_ripgrep_command; use crate::command_safety::windows_safe_commands::is_safe_command_windows; #[cfg(windows)] use crate::command_safety::windows_safe_commands::is_safe_powershell_words as is_safe_powershell_words_windows; @@ -127,7 +129,7 @@ fn is_safe_to_call_with_exec(command: &[String]) -> bool { } // Ripgrep - Some("rg") => is_safe_ripgrep_command(command), + Some("rg") => is_safe_ripgrep_command(command, RipgrepArgCase::Sensitive), // Git Some("git") => is_safe_git_command(command), @@ -148,60 +150,6 @@ fn is_safe_to_call_with_exec(command: &[String]) -> bool { } } -fn is_safe_ripgrep_command(command: &[String]) -> bool { - !command - .iter() - .skip(1) - .map(String::as_str) - .any(is_unsafe_ripgrep_arg) -} - -fn is_unsafe_ripgrep_arg(arg: &str) -> bool { - match arg { - // Takes an arbitrary command that is executed for each match. - "--pre" - // Takes a command that can be used to obtain the local hostname. - | "--hostname-bin" - // Calls out to other decompression tools, so do not auto-approve - // out of an abundance of caution. - | "--search-zip" - | "-z" => true, - _ => { - arg.starts_with("--pre=") - || arg.starts_with("--hostname-bin=") - || arg.starts_with("--search-zip=") - || ripgrep_short_options_contain_search_zip(arg) - } - } -} - -fn ripgrep_short_options_contain_search_zip(arg: &str) -> bool { - let Some(short_options) = arg.strip_prefix('-') else { - return false; - }; - if short_options.is_empty() || short_options.starts_with('-') { - return false; - } - - for option in short_options.chars() { - if option == 'z' { - return true; - } - if ripgrep_short_option_takes_value(option) { - return false; - } - } - - false -} - -fn ripgrep_short_option_takes_value(option: char) -> bool { - matches!( - option, - 'A' | 'B' | 'C' | 'E' | 'M' | 'T' | 'd' | 'e' | 'f' | 'g' | 'j' | 'm' | 'r' | 't' - ) -} - pub(crate) fn is_safe_git_command(command: &[String]) -> bool { let Some((subcommand_idx, subcommand)) = find_git_subcommand(command, &["status", "log", "diff", "show", "branch"]) diff --git a/codex-rs/shell-command/src/command_safety/mod.rs b/codex-rs/shell-command/src/command_safety/mod.rs index 2040364e5b..4630fb4e29 100644 --- a/codex-rs/shell-command/src/command_safety/mod.rs +++ b/codex-rs/shell-command/src/command_safety/mod.rs @@ -1,4 +1,5 @@ mod powershell_parser; +mod ripgrep; pub mod is_dangerous_command; pub mod is_safe_command; diff --git a/codex-rs/shell-command/src/command_safety/ripgrep.rs b/codex-rs/shell-command/src/command_safety/ripgrep.rs new file mode 100644 index 0000000000..0a2bb2fab1 --- /dev/null +++ b/codex-rs/shell-command/src/command_safety/ripgrep.rs @@ -0,0 +1,123 @@ +use std::borrow::Cow; + +#[derive(Clone, Copy, Eq, PartialEq)] +pub(crate) enum RipgrepArgCase { + Sensitive, + AsciiInsensitive, +} + +pub(crate) fn is_safe_ripgrep_command(command: &[String], arg_case: RipgrepArgCase) -> bool { + !command + .iter() + .skip(1) + .map(String::as_str) + .any(|arg| is_unsafe_ripgrep_arg(arg, arg_case)) +} + +fn is_unsafe_ripgrep_arg(arg: &str, arg_case: RipgrepArgCase) -> bool { + let normalized = normalized_long_arg(arg, arg_case); + match normalized.as_ref() { + // Takes an arbitrary command that is executed for each match. + "--pre" + // Takes a command that can be used to obtain the local hostname. + | "--hostname-bin" + // Calls out to other decompression tools, so do not auto-approve + // out of an abundance of caution. + | "--search-zip" => true, + _ => { + normalized.starts_with("--pre=") + || normalized.starts_with("--hostname-bin=") + || normalized.starts_with("--search-zip=") + || ripgrep_short_options_contain_search_zip(arg, arg_case) + } + } +} + +fn normalized_long_arg(arg: &str, arg_case: RipgrepArgCase) -> Cow<'_, str> { + if !arg.starts_with("--") { + return Cow::Borrowed(arg); + } + + match arg_case { + RipgrepArgCase::Sensitive => Cow::Borrowed(arg), + RipgrepArgCase::AsciiInsensitive => Cow::Owned(arg.to_ascii_lowercase()), + } +} + +fn ripgrep_short_options_contain_search_zip(arg: &str, arg_case: RipgrepArgCase) -> bool { + let Some(short_options) = arg.strip_prefix('-') else { + return false; + }; + if short_options.is_empty() || short_options.starts_with('-') { + return false; + } + + for option in short_options.chars() { + if option == 'z' || (arg_case == RipgrepArgCase::AsciiInsensitive && option == 'Z') { + return true; + } + if ripgrep_short_option_takes_value(option) { + return false; + } + } + + false +} + +fn ripgrep_short_option_takes_value(option: char) -> bool { + matches!( + option, + 'A' | 'B' | 'C' | 'E' | 'M' | 'T' | 'd' | 'e' | 'f' | 'g' | 'j' | 'm' | 'r' | 't' + ) +} + +#[cfg(test)] +mod tests { + use super::*; + + fn vec_str(args: &[&str]) -> Vec { + args.iter().map(ToString::to_string).collect() + } + + #[test] + fn rejects_ripgrep_options_that_can_spawn_processes() { + for args in [ + vec_str(&["rg", "--pre", "pwned", "files"]), + vec_str(&["rg", "--pre=pwned", "files"]), + vec_str(&["rg", "--hostname-bin", "pwned", "files"]), + vec_str(&["rg", "--hostname-bin=pwned", "files"]), + vec_str(&["rg", "--search-zip", "files"]), + vec_str(&["rg", "--search-zip=true", "files"]), + vec_str(&["rg", "-z", "files"]), + vec_str(&["rg", "-zn", "files"]), + vec_str(&["rg", "-nz", "files"]), + ] { + assert!( + !is_safe_ripgrep_command(&args, RipgrepArgCase::Sensitive), + "expected {args:?} to be unsafe", + ); + } + } + + #[test] + fn case_insensitive_matching_preserves_short_option_value_shape() { + for args in [ + vec_str(&["rg", "--PRE", "pwned", "files"]), + vec_str(&["rg", "--HOSTNAME-BIN=pwned", "files"]), + vec_str(&["rg", "--SEARCH-ZIP", "files"]), + vec_str(&["rg", "-Z", "files"]), + vec_str(&["rg", "-Fz", "needle", "."]), + ] { + assert!( + !is_safe_ripgrep_command(&args, RipgrepArgCase::AsciiInsensitive), + "expected {args:?} to be unsafe with case-insensitive matching", + ); + } + + let args = vec_str(&["rg", "-fz", "needle", "."]); + assert!( + is_safe_ripgrep_command(&args, RipgrepArgCase::AsciiInsensitive), + "expected lowercase -f to consume z as its pattern-file value", + ); + } +} diff --git a/codex-rs/shell-command/src/command_safety/windows_safe_commands.rs b/codex-rs/shell-command/src/command_safety/windows_safe_commands.rs index 8ef3f8e8f9..4e0fa1d310 100644 --- a/codex-rs/shell-command/src/command_safety/windows_safe_commands.rs +++ b/codex-rs/shell-command/src/command_safety/windows_safe_commands.rs @@ -1,6 +1,8 @@ use crate::command_safety::is_safe_command::is_safe_git_command; use crate::command_safety::powershell_parser::PowershellParseOutcome; use crate::command_safety::powershell_parser::parse_with_powershell_ast; +use crate::command_safety::ripgrep::RipgrepArgCase; +use crate::command_safety::ripgrep::is_safe_ripgrep_command; use std::path::Path; /// On Windows, we conservatively allow only clearly read-only PowerShell invocations @@ -190,7 +192,7 @@ pub(crate) fn is_safe_powershell_words(words: &[String]) -> bool { "git" => is_safe_git_command(words), - "rg" => is_safe_ripgrep(words), + "rg" => is_safe_ripgrep_command(words, RipgrepArgCase::AsciiInsensitive), // Extra safety: explicitly prohibit common side-effecting cmdlets regardless of args. "set-content" | "add-content" | "out-file" | "new-item" | "remove-item" | "move-item" @@ -206,21 +208,6 @@ pub(crate) fn is_safe_powershell_words(words: &[String]) -> bool { } } -/// Checks that an `rg` invocation avoids options that can spawn arbitrary executables. -fn is_safe_ripgrep(words: &[String]) -> bool { - const UNSAFE_RIPGREP_OPTIONS_WITH_ARGS: &[&str] = &["--pre", "--hostname-bin"]; - const UNSAFE_RIPGREP_OPTIONS_WITHOUT_ARGS: &[&str] = &["--search-zip", "-z"]; - - !words.iter().skip(1).any(|arg| { - let arg_lc = arg.to_ascii_lowercase(); - // Examples rejected here: "pwsh -Command 'rg --pre cat pattern'" and "pwsh -Command 'rg --search-zip pattern'". - UNSAFE_RIPGREP_OPTIONS_WITHOUT_ARGS.contains(&arg_lc.as_str()) - || UNSAFE_RIPGREP_OPTIONS_WITH_ARGS - .iter() - .any(|opt| arg_lc == *opt || arg_lc.starts_with(&format!("{opt}="))) - }) -} - #[cfg(all(test, windows))] mod tests { use super::*;