mirror of
https://github.com/openai/codex.git
synced 2026-05-29 23:40:29 +00:00
Fix escaped shell args in safety checks
This commit is contained in:
@@ -1,3 +1,4 @@
|
||||
use std::borrow::Cow;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use tree_sitter::Node;
|
||||
@@ -115,8 +116,55 @@ pub fn extract_bash_command(command: &[String]) -> Option<(&str, &str)> {
|
||||
pub fn parse_shell_lc_plain_commands(command: &[String]) -> Option<Vec<Vec<String>>> {
|
||||
let (_, script) = extract_bash_command(command)?;
|
||||
|
||||
let tree = try_parse_shell(script)?;
|
||||
try_parse_word_only_commands_sequence(&tree, script)
|
||||
let script = strip_line_continuations(script);
|
||||
let tree = try_parse_shell(&script)?;
|
||||
try_parse_word_only_commands_sequence(&tree, &script)
|
||||
}
|
||||
|
||||
fn strip_line_continuations(script: &str) -> Cow<'_, str> {
|
||||
if !script.contains("\\\n") {
|
||||
return Cow::Borrowed(script);
|
||||
}
|
||||
|
||||
let mut stripped = String::with_capacity(script.len());
|
||||
let mut chars = script.chars().peekable();
|
||||
let mut escaped = false;
|
||||
let mut in_double_quote = false;
|
||||
let mut in_single_quote = false;
|
||||
let mut stripped_any = false;
|
||||
|
||||
while let Some(ch) = chars.next() {
|
||||
if escaped {
|
||||
stripped.push(ch);
|
||||
escaped = false;
|
||||
} else if ch == '\\' && !in_single_quote {
|
||||
if chars.peek() == Some(&'\n') {
|
||||
chars.next();
|
||||
stripped_any = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
stripped.push(ch);
|
||||
escaped = true;
|
||||
} else {
|
||||
match ch {
|
||||
'\'' if !in_double_quote => {
|
||||
in_single_quote = !in_single_quote;
|
||||
}
|
||||
'"' if !in_single_quote => {
|
||||
in_double_quote = !in_double_quote;
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
stripped.push(ch);
|
||||
}
|
||||
}
|
||||
|
||||
if stripped_any {
|
||||
Cow::Owned(stripped)
|
||||
} else {
|
||||
Cow::Borrowed(script)
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the parsed argv for a single shell command in a here-doc style
|
||||
@@ -152,10 +200,10 @@ fn parse_plain_command_from_node(cmd: tree_sitter::Node, src: &str) -> Option<Ve
|
||||
if word_node.kind() != "word" {
|
||||
return None;
|
||||
}
|
||||
words.push(word_node.utf8_text(src.as_bytes()).ok()?.to_owned());
|
||||
words.push(parse_word_or_number(word_node, src)?);
|
||||
}
|
||||
"word" | "number" => {
|
||||
words.push(child.utf8_text(src.as_bytes()).ok()?.to_owned());
|
||||
words.push(parse_word_or_number(child, src)?);
|
||||
}
|
||||
"string" => {
|
||||
let parsed = parse_double_quoted_string(child, src)?;
|
||||
@@ -172,8 +220,7 @@ fn parse_plain_command_from_node(cmd: tree_sitter::Node, src: &str) -> Option<Ve
|
||||
for part in child.named_children(&mut concat_cursor) {
|
||||
match part.kind() {
|
||||
"word" | "number" => {
|
||||
concatenated
|
||||
.push_str(part.utf8_text(src.as_bytes()).ok()?.to_owned().as_str());
|
||||
concatenated.push_str(&parse_word_or_number(part, src)?);
|
||||
}
|
||||
"string" => {
|
||||
let parsed = parse_double_quoted_string(part, src)?;
|
||||
@@ -213,13 +260,13 @@ fn parse_heredoc_command_words(cmd: Node<'_>, src: &str) -> Option<Vec<String>>
|
||||
{
|
||||
return None;
|
||||
}
|
||||
words.push(word_node.utf8_text(src.as_bytes()).ok()?.to_owned());
|
||||
words.push(parse_word_or_number(word_node, src)?);
|
||||
}
|
||||
"word" | "number" => {
|
||||
if !is_literal_word_or_number(child) {
|
||||
return None;
|
||||
}
|
||||
words.push(child.utf8_text(src.as_bytes()).ok()?.to_owned());
|
||||
words.push(parse_word_or_number(child, src)?);
|
||||
}
|
||||
// Allow heredoc constructs that attach stdin to a single command
|
||||
// without changing argv matching semantics for the executable
|
||||
@@ -253,6 +300,35 @@ fn is_allowed_heredoc_attachment_kind(kind: &str) -> bool {
|
||||
)
|
||||
}
|
||||
|
||||
fn parse_word_or_number(node: Node<'_>, src: &str) -> Option<String> {
|
||||
if !matches!(node.kind(), "word" | "number") {
|
||||
return None;
|
||||
}
|
||||
|
||||
let raw = node.utf8_text(src.as_bytes()).ok()?;
|
||||
unescape_unquoted_word(raw)
|
||||
}
|
||||
|
||||
fn unescape_unquoted_word(raw: &str) -> Option<String> {
|
||||
let mut unescaped = String::with_capacity(raw.len());
|
||||
let mut chars = raw.chars();
|
||||
|
||||
while let Some(ch) = chars.next() {
|
||||
if ch != '\\' {
|
||||
unescaped.push(ch);
|
||||
continue;
|
||||
}
|
||||
|
||||
match chars.next() {
|
||||
Some('\n') => {}
|
||||
Some(escaped) => unescaped.push(escaped),
|
||||
None => return None,
|
||||
}
|
||||
}
|
||||
|
||||
Some(unescaped)
|
||||
}
|
||||
|
||||
fn find_single_command_node(root: Node<'_>) -> Option<Node<'_>> {
|
||||
let mut stack = vec![root];
|
||||
let mut single_command = None;
|
||||
@@ -360,6 +436,54 @@ mod tests {
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unescapes_backslashes_in_unquoted_words() {
|
||||
assert_eq!(
|
||||
parse_seq(r"rg --pre\=./pre.sh pattern").unwrap(),
|
||||
vec![vec![
|
||||
"rg".to_string(),
|
||||
"--pre=./pre.sh".to_string(),
|
||||
"pattern".to_string(),
|
||||
]]
|
||||
);
|
||||
assert_eq!(
|
||||
parse_seq(r"rg --\pre=./pre.sh pattern").unwrap(),
|
||||
vec![vec![
|
||||
"rg".to_string(),
|
||||
"--pre=./pre.sh".to_string(),
|
||||
"pattern".to_string(),
|
||||
]]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn shell_lc_parser_removes_line_continuations_outside_single_quotes() {
|
||||
assert_eq!(
|
||||
parse_shell_lc_plain_commands(&[
|
||||
"bash".to_string(),
|
||||
"-lc".to_string(),
|
||||
"echo foo\\\nbar".to_string(),
|
||||
]),
|
||||
Some(vec![vec!["echo".to_string(), "foobar".to_string()]])
|
||||
);
|
||||
assert_eq!(
|
||||
parse_shell_lc_plain_commands(&[
|
||||
"bash".to_string(),
|
||||
"-lc".to_string(),
|
||||
"echo \"foo\\\nbar\"".to_string(),
|
||||
]),
|
||||
Some(vec![vec!["echo".to_string(), "foobar".to_string()]])
|
||||
);
|
||||
assert_eq!(
|
||||
parse_shell_lc_plain_commands(&[
|
||||
"bash".to_string(),
|
||||
"-lc".to_string(),
|
||||
"echo 'foo\\\nbar'".to_string(),
|
||||
]),
|
||||
Some(vec![vec!["echo".to_string(), "foo\\\nbar".to_string()]])
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn accepts_double_quoted_strings_with_newlines() {
|
||||
let cmds = parse_seq("git commit -m \"line1\nline2\"").unwrap();
|
||||
|
||||
@@ -127,27 +127,7 @@ fn is_safe_to_call_with_exec(command: &[String]) -> bool {
|
||||
}
|
||||
|
||||
// Ripgrep
|
||||
Some("rg") => {
|
||||
const UNSAFE_RIPGREP_OPTIONS_WITH_ARGS: &[&str] = &[
|
||||
// Takes an arbitrary command that is executed for each match.
|
||||
"--pre",
|
||||
// Takes a command that can be used to obtain the local hostname.
|
||||
"--hostname-bin",
|
||||
];
|
||||
const UNSAFE_RIPGREP_OPTIONS_WITHOUT_ARGS: &[&str] = &[
|
||||
// Calls out to other decompression tools, so do not auto-approve
|
||||
// out of an abundance of caution.
|
||||
"--search-zip",
|
||||
"-z",
|
||||
];
|
||||
|
||||
!command.iter().any(|arg| {
|
||||
UNSAFE_RIPGREP_OPTIONS_WITHOUT_ARGS.contains(&arg.as_str())
|
||||
|| UNSAFE_RIPGREP_OPTIONS_WITH_ARGS
|
||||
.iter()
|
||||
.any(|&opt| arg == opt || arg.starts_with(&format!("{opt}=")))
|
||||
})
|
||||
}
|
||||
Some("rg") => is_safe_ripgrep_command(command),
|
||||
|
||||
// Git
|
||||
Some("git") => is_safe_git_command(command),
|
||||
@@ -168,6 +148,60 @@ fn is_safe_to_call_with_exec(command: &[String]) -> bool {
|
||||
}
|
||||
}
|
||||
|
||||
fn is_safe_ripgrep_command(command: &[String]) -> bool {
|
||||
!command
|
||||
.iter()
|
||||
.skip(1)
|
||||
.map(String::as_str)
|
||||
.any(is_unsafe_ripgrep_arg)
|
||||
}
|
||||
|
||||
fn is_unsafe_ripgrep_arg(arg: &str) -> bool {
|
||||
match arg {
|
||||
// Takes an arbitrary command that is executed for each match.
|
||||
"--pre"
|
||||
// Takes a command that can be used to obtain the local hostname.
|
||||
| "--hostname-bin"
|
||||
// Calls out to other decompression tools, so do not auto-approve
|
||||
// out of an abundance of caution.
|
||||
| "--search-zip"
|
||||
| "-z" => true,
|
||||
_ => {
|
||||
arg.starts_with("--pre=")
|
||||
|| arg.starts_with("--hostname-bin=")
|
||||
|| arg.starts_with("--search-zip=")
|
||||
|| ripgrep_short_options_contain_search_zip(arg)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn ripgrep_short_options_contain_search_zip(arg: &str) -> bool {
|
||||
let Some(short_options) = arg.strip_prefix('-') else {
|
||||
return false;
|
||||
};
|
||||
if short_options.is_empty() || short_options.starts_with('-') {
|
||||
return false;
|
||||
}
|
||||
|
||||
for option in short_options.chars() {
|
||||
if option == 'z' {
|
||||
return true;
|
||||
}
|
||||
if ripgrep_short_option_takes_value(option) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
false
|
||||
}
|
||||
|
||||
fn ripgrep_short_option_takes_value(option: char) -> bool {
|
||||
matches!(
|
||||
option,
|
||||
'A' | 'B' | 'C' | 'E' | 'M' | 'T' | 'd' | 'e' | 'f' | 'g' | 'j' | 'm' | 'r' | 't'
|
||||
)
|
||||
}
|
||||
|
||||
pub(crate) fn is_safe_git_command(command: &[String]) -> bool {
|
||||
let Some((subcommand_idx, subcommand)) =
|
||||
find_git_subcommand(command, &["status", "log", "diff", "show", "branch"])
|
||||
@@ -588,7 +622,10 @@ mod tests {
|
||||
// Unsafe flags that do not take an argument (present verbatim).
|
||||
for args in [
|
||||
vec_str(&["rg", "--search-zip", "files"]),
|
||||
vec_str(&["rg", "--search-zip=true", "files"]),
|
||||
vec_str(&["rg", "-z", "files"]),
|
||||
vec_str(&["rg", "-zn", "files"]),
|
||||
vec_str(&["rg", "-nz", "files"]),
|
||||
] {
|
||||
assert!(
|
||||
!is_safe_to_call_with_exec(&args),
|
||||
@@ -610,6 +647,24 @@ mod tests {
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn bash_lc_escaped_ripgrep_options_are_unsafe() {
|
||||
for script in [
|
||||
r"rg --pre\=./pre.sh files",
|
||||
r"rg --\pre=./pre.sh files",
|
||||
r"rg --hostname\-bin=hostname files",
|
||||
r"rg -\z files",
|
||||
r"rg -\zn needle .",
|
||||
"rg --pr\\\ne=./pre.sh files",
|
||||
"rg \"--pr\\\ne=./pre.sh\" files",
|
||||
] {
|
||||
assert!(
|
||||
!is_known_safe_command(&vec_str(&["bash", "-lc", script])),
|
||||
"expected {script:?} to require approval after shell unescaping",
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn windows_powershell_full_path_is_safe() {
|
||||
if !cfg!(windows) {
|
||||
|
||||
Reference in New Issue
Block a user