mirror of
https://github.com/openai/codex.git
synced 2026-05-17 09:43:19 +00:00
fix: share ripgrep safety helper
This commit is contained in:
@@ -4,6 +4,8 @@ use crate::command_safety::is_dangerous_command::executable_name_lookup_key;
|
||||
// may appear before it (e.g., `-C`, `-c`, `--git-dir`).
|
||||
// Implemented in `is_dangerous_command` and shared here.
|
||||
use crate::command_safety::is_dangerous_command::find_git_subcommand;
|
||||
use crate::command_safety::ripgrep::RipgrepArgCase;
|
||||
use crate::command_safety::ripgrep::is_safe_ripgrep_command;
|
||||
use crate::command_safety::windows_safe_commands::is_safe_command_windows;
|
||||
#[cfg(windows)]
|
||||
use crate::command_safety::windows_safe_commands::is_safe_powershell_words as is_safe_powershell_words_windows;
|
||||
@@ -127,7 +129,7 @@ fn is_safe_to_call_with_exec(command: &[String]) -> bool {
|
||||
}
|
||||
|
||||
// Ripgrep
|
||||
Some("rg") => is_safe_ripgrep_command(command),
|
||||
Some("rg") => is_safe_ripgrep_command(command, RipgrepArgCase::Sensitive),
|
||||
|
||||
// Git
|
||||
Some("git") => is_safe_git_command(command),
|
||||
@@ -148,60 +150,6 @@ fn is_safe_to_call_with_exec(command: &[String]) -> bool {
|
||||
}
|
||||
}
|
||||
|
||||
fn is_safe_ripgrep_command(command: &[String]) -> bool {
|
||||
!command
|
||||
.iter()
|
||||
.skip(1)
|
||||
.map(String::as_str)
|
||||
.any(is_unsafe_ripgrep_arg)
|
||||
}
|
||||
|
||||
fn is_unsafe_ripgrep_arg(arg: &str) -> bool {
|
||||
match arg {
|
||||
// Takes an arbitrary command that is executed for each match.
|
||||
"--pre"
|
||||
// Takes a command that can be used to obtain the local hostname.
|
||||
| "--hostname-bin"
|
||||
// Calls out to other decompression tools, so do not auto-approve
|
||||
// out of an abundance of caution.
|
||||
| "--search-zip"
|
||||
| "-z" => true,
|
||||
_ => {
|
||||
arg.starts_with("--pre=")
|
||||
|| arg.starts_with("--hostname-bin=")
|
||||
|| arg.starts_with("--search-zip=")
|
||||
|| ripgrep_short_options_contain_search_zip(arg)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn ripgrep_short_options_contain_search_zip(arg: &str) -> bool {
|
||||
let Some(short_options) = arg.strip_prefix('-') else {
|
||||
return false;
|
||||
};
|
||||
if short_options.is_empty() || short_options.starts_with('-') {
|
||||
return false;
|
||||
}
|
||||
|
||||
for option in short_options.chars() {
|
||||
if option == 'z' {
|
||||
return true;
|
||||
}
|
||||
if ripgrep_short_option_takes_value(option) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
false
|
||||
}
|
||||
|
||||
fn ripgrep_short_option_takes_value(option: char) -> bool {
|
||||
matches!(
|
||||
option,
|
||||
'A' | 'B' | 'C' | 'E' | 'M' | 'T' | 'd' | 'e' | 'f' | 'g' | 'j' | 'm' | 'r' | 't'
|
||||
)
|
||||
}
|
||||
|
||||
pub(crate) fn is_safe_git_command(command: &[String]) -> bool {
|
||||
let Some((subcommand_idx, subcommand)) =
|
||||
find_git_subcommand(command, &["status", "log", "diff", "show", "branch"])
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
mod powershell_parser;
|
||||
mod ripgrep;
|
||||
|
||||
pub mod is_dangerous_command;
|
||||
pub mod is_safe_command;
|
||||
|
||||
123
codex-rs/shell-command/src/command_safety/ripgrep.rs
Normal file
123
codex-rs/shell-command/src/command_safety/ripgrep.rs
Normal file
@@ -0,0 +1,123 @@
|
||||
use std::borrow::Cow;
|
||||
|
||||
#[derive(Clone, Copy, Eq, PartialEq)]
|
||||
pub(crate) enum RipgrepArgCase {
|
||||
Sensitive,
|
||||
AsciiInsensitive,
|
||||
}
|
||||
|
||||
pub(crate) fn is_safe_ripgrep_command(command: &[String], arg_case: RipgrepArgCase) -> bool {
|
||||
!command
|
||||
.iter()
|
||||
.skip(1)
|
||||
.map(String::as_str)
|
||||
.any(|arg| is_unsafe_ripgrep_arg(arg, arg_case))
|
||||
}
|
||||
|
||||
fn is_unsafe_ripgrep_arg(arg: &str, arg_case: RipgrepArgCase) -> bool {
|
||||
let normalized = normalized_long_arg(arg, arg_case);
|
||||
match normalized.as_ref() {
|
||||
// Takes an arbitrary command that is executed for each match.
|
||||
"--pre"
|
||||
// Takes a command that can be used to obtain the local hostname.
|
||||
| "--hostname-bin"
|
||||
// Calls out to other decompression tools, so do not auto-approve
|
||||
// out of an abundance of caution.
|
||||
| "--search-zip" => true,
|
||||
_ => {
|
||||
normalized.starts_with("--pre=")
|
||||
|| normalized.starts_with("--hostname-bin=")
|
||||
|| normalized.starts_with("--search-zip=")
|
||||
|| ripgrep_short_options_contain_search_zip(arg, arg_case)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn normalized_long_arg(arg: &str, arg_case: RipgrepArgCase) -> Cow<'_, str> {
|
||||
if !arg.starts_with("--") {
|
||||
return Cow::Borrowed(arg);
|
||||
}
|
||||
|
||||
match arg_case {
|
||||
RipgrepArgCase::Sensitive => Cow::Borrowed(arg),
|
||||
RipgrepArgCase::AsciiInsensitive => Cow::Owned(arg.to_ascii_lowercase()),
|
||||
}
|
||||
}
|
||||
|
||||
fn ripgrep_short_options_contain_search_zip(arg: &str, arg_case: RipgrepArgCase) -> bool {
|
||||
let Some(short_options) = arg.strip_prefix('-') else {
|
||||
return false;
|
||||
};
|
||||
if short_options.is_empty() || short_options.starts_with('-') {
|
||||
return false;
|
||||
}
|
||||
|
||||
for option in short_options.chars() {
|
||||
if option == 'z' || (arg_case == RipgrepArgCase::AsciiInsensitive && option == 'Z') {
|
||||
return true;
|
||||
}
|
||||
if ripgrep_short_option_takes_value(option) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
false
|
||||
}
|
||||
|
||||
fn ripgrep_short_option_takes_value(option: char) -> bool {
|
||||
matches!(
|
||||
option,
|
||||
'A' | 'B' | 'C' | 'E' | 'M' | 'T' | 'd' | 'e' | 'f' | 'g' | 'j' | 'm' | 'r' | 't'
|
||||
)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn vec_str(args: &[&str]) -> Vec<String> {
|
||||
args.iter().map(ToString::to_string).collect()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rejects_ripgrep_options_that_can_spawn_processes() {
|
||||
for args in [
|
||||
vec_str(&["rg", "--pre", "pwned", "files"]),
|
||||
vec_str(&["rg", "--pre=pwned", "files"]),
|
||||
vec_str(&["rg", "--hostname-bin", "pwned", "files"]),
|
||||
vec_str(&["rg", "--hostname-bin=pwned", "files"]),
|
||||
vec_str(&["rg", "--search-zip", "files"]),
|
||||
vec_str(&["rg", "--search-zip=true", "files"]),
|
||||
vec_str(&["rg", "-z", "files"]),
|
||||
vec_str(&["rg", "-zn", "files"]),
|
||||
vec_str(&["rg", "-nz", "files"]),
|
||||
] {
|
||||
assert!(
|
||||
!is_safe_ripgrep_command(&args, RipgrepArgCase::Sensitive),
|
||||
"expected {args:?} to be unsafe",
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn case_insensitive_matching_preserves_short_option_value_shape() {
|
||||
for args in [
|
||||
vec_str(&["rg", "--PRE", "pwned", "files"]),
|
||||
vec_str(&["rg", "--HOSTNAME-BIN=pwned", "files"]),
|
||||
vec_str(&["rg", "--SEARCH-ZIP", "files"]),
|
||||
vec_str(&["rg", "-Z", "files"]),
|
||||
vec_str(&["rg", "-Fz", "needle", "."]),
|
||||
] {
|
||||
assert!(
|
||||
!is_safe_ripgrep_command(&args, RipgrepArgCase::AsciiInsensitive),
|
||||
"expected {args:?} to be unsafe with case-insensitive matching",
|
||||
);
|
||||
}
|
||||
|
||||
let args = vec_str(&["rg", "-fz", "needle", "."]);
|
||||
assert!(
|
||||
is_safe_ripgrep_command(&args, RipgrepArgCase::AsciiInsensitive),
|
||||
"expected lowercase -f to consume z as its pattern-file value",
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -1,6 +1,8 @@
|
||||
use crate::command_safety::is_safe_command::is_safe_git_command;
|
||||
use crate::command_safety::powershell_parser::PowershellParseOutcome;
|
||||
use crate::command_safety::powershell_parser::parse_with_powershell_ast;
|
||||
use crate::command_safety::ripgrep::RipgrepArgCase;
|
||||
use crate::command_safety::ripgrep::is_safe_ripgrep_command;
|
||||
use std::path::Path;
|
||||
|
||||
/// On Windows, we conservatively allow only clearly read-only PowerShell invocations
|
||||
@@ -190,7 +192,7 @@ pub(crate) fn is_safe_powershell_words(words: &[String]) -> bool {
|
||||
|
||||
"git" => is_safe_git_command(words),
|
||||
|
||||
"rg" => is_safe_ripgrep(words),
|
||||
"rg" => is_safe_ripgrep_command(words, RipgrepArgCase::AsciiInsensitive),
|
||||
|
||||
// Extra safety: explicitly prohibit common side-effecting cmdlets regardless of args.
|
||||
"set-content" | "add-content" | "out-file" | "new-item" | "remove-item" | "move-item"
|
||||
@@ -206,21 +208,6 @@ pub(crate) fn is_safe_powershell_words(words: &[String]) -> bool {
|
||||
}
|
||||
}
|
||||
|
||||
/// Checks that an `rg` invocation avoids options that can spawn arbitrary executables.
|
||||
fn is_safe_ripgrep(words: &[String]) -> bool {
|
||||
const UNSAFE_RIPGREP_OPTIONS_WITH_ARGS: &[&str] = &["--pre", "--hostname-bin"];
|
||||
const UNSAFE_RIPGREP_OPTIONS_WITHOUT_ARGS: &[&str] = &["--search-zip", "-z"];
|
||||
|
||||
!words.iter().skip(1).any(|arg| {
|
||||
let arg_lc = arg.to_ascii_lowercase();
|
||||
// Examples rejected here: "pwsh -Command 'rg --pre cat pattern'" and "pwsh -Command 'rg --search-zip pattern'".
|
||||
UNSAFE_RIPGREP_OPTIONS_WITHOUT_ARGS.contains(&arg_lc.as_str())
|
||||
|| UNSAFE_RIPGREP_OPTIONS_WITH_ARGS
|
||||
.iter()
|
||||
.any(|opt| arg_lc == *opt || arg_lc.starts_with(&format!("{opt}=")))
|
||||
})
|
||||
}
|
||||
|
||||
#[cfg(all(test, windows))]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
Reference in New Issue
Block a user