test: cover adjacent ripgrep unsafe spellings

refactor: reuse ripgrep safety helper on windows
chore: narrow ripgrep safety fix scope
2026-05-13 07:42:40 +00:00 · 2026-05-07 16:20:59 -07:00 · 2026-05-07 16:19:12 -07:00 · 2026-05-07 16:17:54 -07:00 · 2026-05-07 13:06:56 -07:00 · 2026-05-06 22:15:33 -07:00
14 changed files with 792 additions and 53 deletions
--- a/codex-rs/cli/src/main.rs
+++ b/codex-rs/cli/src/main.rs
@@ -789,9 +789,7 @@ async fn cli_main(arg0_paths: Arg0DispatchPaths) -> anyhow::Result<()> {
                root_remote_auth_token_env.as_deref(),
                "exec",
            )?;
-            exec_cli
-                .shared
-                .inherit_exec_root_options(&interactive.shared);
+            apply_exec_root_options(&mut exec_cli, &interactive)?;
            prepend_config_flags(
                &mut exec_cli.config_overrides,
                root_config_overrides.clone(),
@@ -1513,6 +1511,21 @@ fn prepend_config_flags(
        .splice(0..0, cli_config_overrides.raw_overrides);
 }

+fn apply_exec_root_options(exec_cli: &mut ExecCli, interactive: &TuiCli) -> anyhow::Result<()> {
+    exec_cli
+        .shared
+        .inherit_exec_root_options(&interactive.shared);
+    if exec_cli.approval_policy.is_none() {
+        exec_cli.approval_policy = interactive.approval_policy;
+    }
+    if exec_cli.dangerously_bypass_approvals_and_sandbox && exec_cli.approval_policy.is_some() {
+        anyhow::bail!(
+            "--dangerously-bypass-approvals-and-sandbox cannot be used with --ask-for-approval"
+        );
+    }
+    Ok(())
+}
+
 fn reject_remote_mode_for_subcommand(
    remote: Option<&str>,
    remote_auth_token_env: Option<&str>,
@@ -1847,6 +1860,61 @@ mod tests {
        assert_eq!(err.kind(), clap::error::ErrorKind::ArgumentConflict);
    }

+    #[test]
+    fn exec_accepts_approval_policy_after_subcommand() {
+        let cli = MultitoolCli::try_parse_from(["codex", "exec", "-a", "untrusted", "hi"])
+            .expect("parse should succeed");
+
+        let Some(Subcommand::Exec(exec)) = cli.subcommand else {
+            panic!("expected exec subcommand");
+        };
+
+        assert_matches!(
+            exec.approval_policy,
+            Some(codex_utils_cli::ApprovalModeCliArg::Untrusted)
+        );
+    }
+
+    #[test]
+    fn exec_accepts_root_approval_policy() {
+        let cli = MultitoolCli::try_parse_from(["codex", "-a", "untrusted", "exec", "hi"])
+            .expect("parse should succeed");
+        let MultitoolCli {
+            interactive,
+            subcommand,
+            ..
+        } = cli;
+        let Some(Subcommand::Exec(mut exec)) = subcommand else {
+            panic!("expected exec subcommand");
+        };
+
+        assert_matches!(
+            exec.approval_policy,
+            Some(codex_utils_cli::ApprovalModeCliArg::Untrusted)
+        );
+        apply_exec_root_options(&mut exec, &interactive).expect("root options apply");
+
+        assert_matches!(
+            exec.approval_policy,
+            Some(codex_utils_cli::ApprovalModeCliArg::Untrusted)
+        );
+    }
+
+    #[test]
+    fn exec_rejects_subcommand_approval_policy_with_bypass() {
+        let err = MultitoolCli::try_parse_from([
+            "codex",
+            "exec",
+            "--dangerously-bypass-approvals-and-sandbox",
+            "-a",
+            "untrusted",
+            "hi",
+        ])
+        .expect_err("conflicting permission flags should be rejected");
+
+        assert_eq!(err.kind(), clap::error::ErrorKind::ArgumentConflict);
+    }
+
    fn app_server_from_args(args: &[&str]) -> AppServerCommand {
        let cli = MultitoolCli::try_parse_from(args).expect("parse");
        let Subcommand::AppServer(app_server) = cli.subcommand.expect("app-server present") else {
--- a/codex-rs/core/src/exec_policy.rs
+++ b/codex-rs/core/src/exec_policy.rs
@@ -24,6 +24,7 @@ use codex_protocol::models::PermissionProfile;
 use codex_protocol::permissions::FileSystemSandboxKind;
 use codex_protocol::permissions::FileSystemSandboxPolicy;
 use codex_protocol::protocol::AskForApproval;
+use codex_shell_command::is_dangerous_command::command_can_execute_arbitrary_command;
 use codex_shell_command::is_dangerous_command::command_might_be_dangerous;
 use codex_shell_command::is_safe_command::is_known_safe_command;
 use thiserror::Error;
@@ -309,11 +310,45 @@ impl ExecPolicyManager {
        let match_options = MatchOptions {
            resolve_host_executables: true,
        };
-        let evaluation = exec_policy.check_multiple_with_options(
+        let mut evaluation = exec_policy.check_multiple_with_options(
            commands.iter(),
            &exec_policy_fallback,
            &match_options,
        );
+        if evaluation.decision == Decision::Allow {
+            let mut arbitrary_command_matches = commands
+                .iter()
+                .filter_map(|command| {
+                    if !command_can_execute_arbitrary_command(command)
+                        || arbitrary_command_execution_is_explicitly_allowed(
+                            command,
+                            &evaluation.matched_rules,
+                        )
+                    {
+                        return None;
+                    }
+
+                    let decision = exec_policy_fallback(command);
+                    (decision != Decision::Allow).then(|| RuleMatch::HeuristicsRuleMatch {
+                        command: command.clone(),
+                        decision,
+                    })
+                })
+                .collect::<Vec<_>>();
+            if !arbitrary_command_matches.is_empty() {
+                evaluation.decision = if arbitrary_command_matches
+                    .iter()
+                    .any(|rule_match| rule_match.decision() == Decision::Forbidden)
+                {
+                    Decision::Forbidden
+                } else {
+                    Decision::Prompt
+                };
+                evaluation
+                    .matched_rules
+                    .append(&mut arbitrary_command_matches);
+            }
+        }

        let requested_amendment = if auto_amendment_allowed {
            derive_requested_execpolicy_amendment_from_prefix_rule(
@@ -475,6 +510,24 @@ impl ExecPolicyManager {
    }
 }

+fn arbitrary_command_execution_is_explicitly_allowed(
+    command: &[String],
+    matched_rules: &[RuleMatch],
+) -> bool {
+    matched_rules.iter().any(|rule_match| {
+        let RuleMatch::PrefixRuleMatch {
+            matched_prefix,
+            decision: Decision::Allow,
+            ..
+        } = rule_match
+        else {
+            return false;
+        };
+
+        command.starts_with(matched_prefix) && command_can_execute_arbitrary_command(matched_prefix)
+    })
+}
+
 impl Default for ExecPolicyManager {
    fn default() -> Self {
        Self::new(Arc::new(Policy::empty()))
--- a/codex-rs/core/src/exec_policy_tests.rs
+++ b/codex-rs/core/src/exec_policy_tests.rs
@@ -1954,6 +1954,116 @@ fn vec_str(items: &[&str]) -> Vec<String> {
    items.iter().map(std::string::ToString::to_string).collect()
 }

+#[tokio::test]
+async fn ripgrep_pre_processor_requires_approval_in_sandboxed_exec() {
+    for command in [
+        vec_str(&["rg", "--pre=./pre.sh", "needle", "input.txt"]),
+        vec_str(&["/bin/zsh", "-lc", r"rg --pre\=./pre.sh needle input.txt"]),
+    ] {
+        assert_exec_approval_requirement_for_command(
+            ExecApprovalRequirementScenario {
+                policy_src: None,
+                command,
+                approval_policy: AskForApproval::OnRequest,
+                sandbox_policy: SandboxPolicy::new_workspace_write_policy(),
+                file_system_sandbox_policy: workspace_write_file_system_sandbox_policy(),
+                sandbox_permissions: SandboxPermissions::UseDefault,
+                prefix_rule: None,
+            },
+            ExecApprovalRequirement::NeedsApproval {
+                reason: None,
+                proposed_execpolicy_amendment: Some(ExecPolicyAmendment::new(vec_str(&[
+                    "rg",
+                    "--pre=./pre.sh",
+                    "needle",
+                    "input.txt",
+                ]))),
+            },
+        )
+        .await;
+    }
+}
+
+#[tokio::test]
+async fn ripgrep_pre_processor_is_forbidden_when_exec_cannot_ask() {
+    for (command, reason) in [
+        (
+            vec_str(&["rg", "--pre=./pre.sh", "needle", "input.txt"]),
+            "`rg '--pre=./pre.sh' needle input.txt` rejected: blocked by policy",
+        ),
+        (
+            vec_str(&["/bin/zsh", "-c", r"rg --pre\=./pre.sh needle input.txt"]),
+            "`/bin/zsh -c \"rg --pre\\\\=./pre.sh needle input.txt\"` rejected: blocked by policy",
+        ),
+    ] {
+        assert_exec_approval_requirement_for_command(
+            ExecApprovalRequirementScenario {
+                policy_src: None,
+                command,
+                approval_policy: AskForApproval::Never,
+                sandbox_policy: SandboxPolicy::new_workspace_write_policy(),
+                file_system_sandbox_policy: workspace_write_file_system_sandbox_policy(),
+                sandbox_permissions: SandboxPermissions::UseDefault,
+                prefix_rule: None,
+            },
+            ExecApprovalRequirement::Forbidden {
+                reason: reason.to_string(),
+            },
+        )
+        .await;
+    }
+}
+
+#[tokio::test]
+async fn ripgrep_pre_processor_requires_approval_despite_broad_allow_rule() {
+    assert_exec_approval_requirement_for_command(
+        ExecApprovalRequirementScenario {
+            policy_src: Some(
+                r#"prefix_rule(pattern=["rg"], decision="allow", justification="read-only search")"#
+                    .to_string(),
+            ),
+            command: vec_str(&["/bin/zsh", "-c", r"rg --pre\=./pre.sh needle input.txt"]),
+            approval_policy: AskForApproval::OnRequest,
+            sandbox_policy: SandboxPolicy::new_workspace_write_policy(),
+            file_system_sandbox_policy: workspace_write_file_system_sandbox_policy(),
+            sandbox_permissions: SandboxPermissions::UseDefault,
+            prefix_rule: None,
+        },
+        ExecApprovalRequirement::NeedsApproval {
+            reason: None,
+            proposed_execpolicy_amendment: Some(ExecPolicyAmendment::new(vec_str(&[
+                "rg",
+                "--pre=./pre.sh",
+                "needle",
+                "input.txt",
+            ]))),
+        },
+    )
+    .await;
+}
+
+#[tokio::test]
+async fn ripgrep_pre_processor_honors_exact_allow_rule() {
+    assert_exec_approval_requirement_for_command(
+        ExecApprovalRequirementScenario {
+            policy_src: Some(
+                r#"prefix_rule(pattern=["rg", "--pre=./pre.sh"], decision="allow")"#.to_string(),
+            ),
+            command: vec_str(&["/bin/zsh", "-c", r"rg --pre\=./pre.sh needle input.txt"]),
+            approval_policy: AskForApproval::OnRequest,
+            sandbox_policy: SandboxPolicy::new_workspace_write_policy(),
+            file_system_sandbox_policy: workspace_write_file_system_sandbox_policy(),
+            sandbox_permissions: SandboxPermissions::UseDefault,
+            prefix_rule: None,
+        },
+        ExecApprovalRequirement::Skip {
+            bypass_sandbox: true,
+            proposed_execpolicy_amendment: None,
+        },
+    )
+    .await;
+}
+
 /// Note this test behaves differently on Windows because it exercises an
 /// `if cfg!(windows)` code path in render_decision_for_unmatched_command().
 #[tokio::test]
--- a/codex-rs/exec/src/cli.rs
+++ b/codex-rs/exec/src/cli.rs
@@ -2,6 +2,7 @@ use clap::Args;
 use clap::FromArgMatches;
 use clap::Parser;
 use clap::ValueEnum;
+use codex_utils_cli::ApprovalModeCliArg;
 use codex_utils_cli::CliConfigOverrides;
 use codex_utils_cli::SharedCliOptions;
 use std::path::PathBuf;
@@ -19,6 +20,15 @@ pub struct Cli {
    #[clap(flatten)]
    pub shared: ExecSharedCliOptions,

+    /// Configure when the model requires human approval before executing a command.
+    #[arg(
+        long = "ask-for-approval",
+        short = 'a',
+        global = true,
+        conflicts_with = "dangerously_bypass_approvals_and_sandbox"
+    )]
+    pub approval_policy: Option<ApprovalModeCliArg>,
+
    /// Allow running Codex outside a Git repository.
    #[arg(long = "skip-git-repo-check", global = true, default_value_t = false)]
    pub skip_git_repo_check: bool,
--- a/codex-rs/exec/src/lib.rs
+++ b/codex-rs/exec/src/lib.rs
@@ -94,6 +94,7 @@ use codex_protocol::protocol::SessionSource;
 use codex_protocol::user_input::UserInput;
 use codex_utils_absolute_path::AbsolutePathBuf;
 use codex_utils_absolute_path::canonicalize_existing_preserving_symlinks;
+use codex_utils_cli::ApprovalModeCliArg;
 use codex_utils_cli::SharedCliOptions;
 use codex_utils_oss::ensure_oss_provider_ready;
 use codex_utils_oss::get_default_model_for_oss_provider;
@@ -222,6 +223,28 @@ fn exec_root_span() -> tracing::Span {
    )
 }

+fn cli_overrides_include_approval_policy<T>(cli_kv_overrides: &[(String, T)]) -> bool {
+    cli_kv_overrides
+        .iter()
+        .any(|(key, _)| key == "approval_policy")
+}
+
+fn exec_approval_policy_override<T>(
+    dangerously_bypass_approvals_and_sandbox: bool,
+    approval_policy: Option<ApprovalModeCliArg>,
+    cli_kv_overrides: &[(String, T)],
+) -> Option<AskForApproval> {
+    if dangerously_bypass_approvals_and_sandbox {
+        Some(AskForApproval::Never)
+    } else if let Some(approval_policy) = approval_policy {
+        Some(approval_policy.into())
+    } else if cli_overrides_include_approval_policy(cli_kv_overrides) {
+        None
+    } else {
+        Some(AskForApproval::Never)
+    }
+}
+
 pub async fn run_main(cli: Cli, arg0_paths: Arg0DispatchPaths) -> anyhow::Result<()> {
    #[allow(clippy::print_stderr)]
    if let Some(message) = cli.removed_full_auto_warning() {
@@ -235,6 +258,7 @@ pub async fn run_main(cli: Cli, arg0_paths: Arg0DispatchPaths) -> anyhow::Result
    let Cli {
        command,
        shared,
+        approval_policy,
        skip_git_repo_check,
        ephemeral,
        ignore_user_config,
@@ -401,8 +425,13 @@ pub async fn run_main(cli: Cli, arg0_paths: Arg0DispatchPaths) -> anyhow::Result
        model,
        review_model: None,
        config_profile,
-        // Default to never ask for approvals in headless mode. Feature flags can override.
-        approval_policy: Some(AskForApproval::Never),
+        // Default to never ask for approvals in headless mode unless the caller
+        // explicitly selected an approval policy.
+        approval_policy: exec_approval_policy_override(
+            dangerously_bypass_approvals_and_sandbox,
+            approval_policy,
+            &cli_kv_overrides,
+        ),
        approvals_reviewer: None,
        sandbox_mode,
        permission_profile: None,
--- a/codex-rs/exec/src/lib_tests.rs
+++ b/codex-rs/exec/src/lib_tests.rs
@@ -22,6 +22,50 @@ fn exec_defaults_analytics_to_enabled() {
    assert_eq!(DEFAULT_ANALYTICS_ENABLED, true);
 }

+#[test]
+fn exec_approval_policy_override_defaults_to_never() {
+    let cli_kv_overrides: Vec<(String, ())> = Vec::new();
+
+    assert_eq!(
+        exec_approval_policy_override(false, None, &cli_kv_overrides),
+        Some(AskForApproval::Never)
+    );
+}
+
+#[test]
+fn exec_approval_policy_override_uses_cli_approval_policy() {
+    let cli_kv_overrides: Vec<(String, ())> = Vec::new();
+
+    assert_eq!(
+        exec_approval_policy_override(
+            false,
+            Some(ApprovalModeCliArg::Untrusted),
+            &cli_kv_overrides
+        ),
+        Some(AskForApproval::UnlessTrusted)
+    );
+}
+
+#[test]
+fn exec_approval_policy_override_allows_config_approval_policy() {
+    let cli_kv_overrides = vec![("approval_policy".to_string(), ())];
+
+    assert_eq!(
+        exec_approval_policy_override(false, None, &cli_kv_overrides),
+        None
+    );
+}
+
+#[test]
+fn exec_approval_policy_override_yolo_forces_never() {
+    let cli_kv_overrides = vec![("approval_policy".to_string(), ())];
+
+    assert_eq!(
+        exec_approval_policy_override(true, Some(ApprovalModeCliArg::Untrusted), &cli_kv_overrides),
+        Some(AskForApproval::Never)
+    );
+}
+
 #[test]
 fn exec_root_span_can_be_parented_from_trace_context() {
    let subscriber = test_tracing_subscriber();
--- a/codex-rs/exec/tests/suite/mod.rs
+++ b/codex-rs/exec/tests/suite/mod.rs
@@ -8,5 +8,6 @@ mod originator;
 mod output_schema;
 mod prompt_stdin;
 mod resume;
+mod ripgrep_pre;
 mod sandbox;
 mod server_error_exit;
--- a/codex-rs/exec/tests/suite/ripgrep_pre.rs
+++ b/codex-rs/exec/tests/suite/ripgrep_pre.rs
@@ -0,0 +1,103 @@
+#![cfg(not(target_os = "windows"))]
+#![allow(clippy::expect_used, clippy::unwrap_used)]
+
+use core_test_support::responses;
+use core_test_support::skip_if_no_network;
+use core_test_support::test_codex_exec::test_codex_exec;
+use serde_json::json;
+use std::os::unix::fs::PermissionsExt;
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn blocks_escaped_ripgrep_preprocessor() -> anyhow::Result<()> {
+    skip_if_no_network!(Ok(()));
+
+    let test = test_codex_exec();
+    let marker = test.cwd_path().join("preprocessor-ran");
+    std::fs::write(test.cwd_path().join("input.txt"), "needle\n")?;
+
+    let preprocessor = test.cwd_path().join("pre.sh");
+    std::fs::write(
+        &preprocessor,
+        format!(
+            "#!/bin/sh\nprintf ran > {:?}\ncat\n",
+            marker.to_string_lossy()
+        ),
+    )?;
+    let mut permissions = std::fs::metadata(&preprocessor)?.permissions();
+    permissions.set_mode(0o755);
+    std::fs::set_permissions(&preprocessor, permissions)?;
+
+    let call_id = "rg-pre-poc";
+    let args = json!({
+        "command": r"rg --pre\=./pre.sh needle input.txt",
+        "login": false,
+        "timeout_ms": 1_000,
+    });
+    let server = responses::start_mock_server().await;
+    let provider_override = format!(
+        "model_providers.mock={{ name = \"mock\", base_url = \"{}/v1\", env_key = \"PATH\", wire_api = \"responses\" }}",
+        server.uri()
+    );
+    let resp_mock = responses::mount_sse_sequence(
+        &server,
+        vec![
+            responses::sse(vec![
+                responses::ev_response_created("resp-1"),
+                responses::ev_function_call(
+                    call_id,
+                    "shell_command",
+                    &serde_json::to_string(&args)?,
+                ),
+                responses::ev_completed("resp-1"),
+            ]),
+            responses::sse(vec![
+                responses::ev_response_created("resp-2"),
+                responses::ev_assistant_message("msg-1", "done"),
+                responses::ev_completed("resp-2"),
+            ]),
+        ],
+    )
+    .await;
+
+    let mut cmd = test.cmd();
+    let output = cmd
+        .env_remove("CODEX_API_KEY")
+        .env_remove("OPENAI_API_KEY")
+        .arg("-c")
+        .arg(&provider_override)
+        .arg("-c")
+        .arg("model_provider=\"mock\"")
+        .arg("--skip-git-repo-check")
+        .arg("-s")
+        .arg("workspace-write")
+        .arg("run the ripgrep preprocessor proof of concept")
+        .output()?;
+
+    let stdout = String::from_utf8_lossy(&output.stdout);
+    let stderr = String::from_utf8_lossy(&output.stderr);
+    assert!(
+        !output.status.success(),
+        "escaped rg --pre should be blocked, but codex-exec succeeded\nStatus: {}\nStdout:\n{}\nStderr:\n{}",
+        output.status,
+        stdout,
+        stderr
+    );
+    assert!(
+        stderr.contains("rejected")
+            || stderr.contains("approval")
+            || stderr.contains("unacceptable risk"),
+        "blocked run should report approval rejection\nStatus: {}\nStdout:\n{}\nStderr:\n{}",
+        output.status,
+        stdout,
+        stderr
+    );
+    if let Some(tool_output) = resp_mock.function_call_output_text(call_id) {
+        assert!(
+            !tool_output.contains("Exit code: 0"),
+            "blocked command should not return a successful shell result: {tool_output}"
+        );
+    }
+    assert!(!marker.exists(), "ripgrep --pre helper should not execute");
+
+    Ok(())
+}
--- a/codex-rs/shell-command/src/bash.rs
+++ b/codex-rs/shell-command/src/bash.rs
@@ -1,3 +1,4 @@
+use std::borrow::Cow;
 use std::path::PathBuf;

 use tree_sitter::Node;
@@ -115,8 +116,55 @@ pub fn extract_bash_command(command: &[String]) -> Option<(&str, &str)> {
 pub fn parse_shell_lc_plain_commands(command: &[String]) -> Option<Vec<Vec<String>>> {
    let (_, script) = extract_bash_command(command)?;

-    let tree = try_parse_shell(script)?;
-    try_parse_word_only_commands_sequence(&tree, script)
+    let script = strip_line_continuations(script);
+    let tree = try_parse_shell(&script)?;
+    try_parse_word_only_commands_sequence(&tree, &script)
+}
+
+fn strip_line_continuations(script: &str) -> Cow<'_, str> {
+    if !script.contains("\\\n") {
+        return Cow::Borrowed(script);
+    }
+
+    let mut stripped = String::with_capacity(script.len());
+    let mut chars = script.chars().peekable();
+    let mut escaped = false;
+    let mut in_double_quote = false;
+    let mut in_single_quote = false;
+    let mut stripped_any = false;
+
+    while let Some(ch) = chars.next() {
+        if escaped {
+            stripped.push(ch);
+            escaped = false;
+        } else if ch == '\\' && !in_single_quote {
+            if chars.peek() == Some(&'\n') {
+                chars.next();
+                stripped_any = true;
+                continue;
+            }
+
+            stripped.push(ch);
+            escaped = true;
+        } else {
+            match ch {
+                '\'' if !in_double_quote => {
+                    in_single_quote = !in_single_quote;
+                }
+                '"' if !in_single_quote => {
+                    in_double_quote = !in_double_quote;
+                }
+                _ => {}
+            }
+            stripped.push(ch);
+        }
+    }
+
+    if stripped_any {
+        Cow::Owned(stripped)
+    } else {
+        Cow::Borrowed(script)
+    }
 }

 /// Returns the parsed argv for a single shell command in a here-doc style
@@ -152,10 +200,10 @@ fn parse_plain_command_from_node(cmd: tree_sitter::Node, src: &str) -> Option<Ve
                if word_node.kind() != "word" {
                    return None;
                }
-                words.push(word_node.utf8_text(src.as_bytes()).ok()?.to_owned());
+                words.push(parse_word_or_number(word_node, src)?);
            }
            "word" | "number" => {
-                words.push(child.utf8_text(src.as_bytes()).ok()?.to_owned());
+                words.push(parse_word_or_number(child, src)?);
            }
            "string" => {
                let parsed = parse_double_quoted_string(child, src)?;
@@ -172,8 +220,7 @@ fn parse_plain_command_from_node(cmd: tree_sitter::Node, src: &str) -> Option<Ve
                for part in child.named_children(&mut concat_cursor) {
                    match part.kind() {
                        "word" | "number" => {
-                            concatenated
-                                .push_str(part.utf8_text(src.as_bytes()).ok()?.to_owned().as_str());
+                            concatenated.push_str(&parse_word_or_number(part, src)?);
                        }
                        "string" => {
                            let parsed = parse_double_quoted_string(part, src)?;
@@ -213,13 +260,13 @@ fn parse_heredoc_command_words(cmd: Node<'_>, src: &str) -> Option<Vec<String>>
                {
                    return None;
                }
-                words.push(word_node.utf8_text(src.as_bytes()).ok()?.to_owned());
+                words.push(parse_word_or_number(word_node, src)?);
            }
            "word" | "number" => {
                if !is_literal_word_or_number(child) {
                    return None;
                }
-                words.push(child.utf8_text(src.as_bytes()).ok()?.to_owned());
+                words.push(parse_word_or_number(child, src)?);
            }
            // Allow heredoc constructs that attach stdin to a single command
            // without changing argv matching semantics for the executable
@@ -253,6 +300,35 @@ fn is_allowed_heredoc_attachment_kind(kind: &str) -> bool {
    )
 }

+fn parse_word_or_number(node: Node<'_>, src: &str) -> Option<String> {
+    if !matches!(node.kind(), "word" | "number") {
+        return None;
+    }
+
+    let raw = node.utf8_text(src.as_bytes()).ok()?;
+    unescape_unquoted_word(raw)
+}
+
+fn unescape_unquoted_word(raw: &str) -> Option<String> {
+    let mut unescaped = String::with_capacity(raw.len());
+    let mut chars = raw.chars();
+
+    while let Some(ch) = chars.next() {
+        if ch != '\\' {
+            unescaped.push(ch);
+            continue;
+        }
+
+        match chars.next() {
+            Some('\n') => {}
+            Some(escaped) => unescaped.push(escaped),
+            None => return None,
+        }
+    }
+
+    Some(unescaped)
+}
+
 fn find_single_command_node(root: Node<'_>) -> Option<Node<'_>> {
    let mut stack = vec![root];
    let mut single_command = None;
@@ -360,6 +436,54 @@ mod tests {
        );
    }

+    #[test]
+    fn unescapes_backslashes_in_unquoted_words() {
+        assert_eq!(
+            parse_seq(r"rg --pre\=./pre.sh pattern").unwrap(),
+            vec![vec![
+                "rg".to_string(),
+                "--pre=./pre.sh".to_string(),
+                "pattern".to_string(),
+            ]]
+        );
+        assert_eq!(
+            parse_seq(r"rg --\pre=./pre.sh pattern").unwrap(),
+            vec![vec![
+                "rg".to_string(),
+                "--pre=./pre.sh".to_string(),
+                "pattern".to_string(),
+            ]]
+        );
+    }
+
+    #[test]
+    fn shell_lc_parser_removes_line_continuations_outside_single_quotes() {
+        assert_eq!(
+            parse_shell_lc_plain_commands(&[
+                "bash".to_string(),
+                "-lc".to_string(),
+                "echo foo\\\nbar".to_string(),
+            ]),
+            Some(vec![vec!["echo".to_string(), "foobar".to_string()]])
+        );
+        assert_eq!(
+            parse_shell_lc_plain_commands(&[
+                "bash".to_string(),
+                "-lc".to_string(),
+                "echo \"foo\\\nbar\"".to_string(),
+            ]),
+            Some(vec![vec!["echo".to_string(), "foobar".to_string()]])
+        );
+        assert_eq!(
+            parse_shell_lc_plain_commands(&[
+                "bash".to_string(),
+                "-lc".to_string(),
+                "echo 'foo\\\nbar'".to_string(),
+            ]),
+            Some(vec![vec!["echo".to_string(), "foo\\\nbar".to_string()]])
+        );
+    }
+
    #[test]
    fn accepts_double_quoted_strings_with_newlines() {
        let cmds = parse_seq("git commit -m \"line1\nline2\"").unwrap();
--- a/codex-rs/shell-command/src/command_safety/is_dangerous_command.rs
+++ b/codex-rs/shell-command/src/command_safety/is_dangerous_command.rs
@@ -1,4 +1,6 @@
 use crate::bash::parse_shell_lc_plain_commands;
+use crate::command_safety::ripgrep::RipgrepArgCase;
+use crate::command_safety::ripgrep::ripgrep_command_can_execute_arbitrary_command;
 use std::path::Path;
 #[cfg(windows)]
 #[path = "windows_dangerous_commands.rs"]
@@ -28,6 +30,22 @@ pub fn command_might_be_dangerous(command: &[String]) -> bool {
    false
 }

+pub fn command_can_execute_arbitrary_command(command: &[String]) -> bool {
+    if direct_command_can_execute_arbitrary_command(command) {
+        return true;
+    }
+
+    if let Some(all_commands) = parse_shell_lc_plain_commands(command)
+        && all_commands
+            .iter()
+            .any(|cmd| direct_command_can_execute_arbitrary_command(cmd))
+    {
+        return true;
+    }
+
+    false
+}
+
 /// Returns whether already-tokenized PowerShell words should be treated as
 /// dangerous by the Windows unmatched-command heuristics.
 pub fn is_dangerous_powershell_words(command: &[String]) -> bool {
@@ -143,19 +161,37 @@ pub(crate) fn find_git_subcommand<'a>(
 }

 fn is_dangerous_to_call_with_exec(command: &[String]) -> bool {
-    let cmd0 = command.first().map(String::as_str);
+    let cmd0 = command
+        .first()
+        .and_then(|command| executable_name_lookup_key(command));

-    match cmd0 {
+    match cmd0.as_deref() {
        Some("rm") => matches!(command.get(1).map(String::as_str), Some("-f" | "-rf")),

        // for sudo <cmd> simply do the check for <cmd>
        Some("sudo") => is_dangerous_to_call_with_exec(&command[1..]),

+        Some("rg") => direct_command_can_execute_arbitrary_command(command),
+
        // ── anything else ─────────────────────────────────────────────────
        _ => false,
    }
 }

+fn direct_command_can_execute_arbitrary_command(command: &[String]) -> bool {
+    let cmd0 = command
+        .first()
+        .and_then(|command| executable_name_lookup_key(command));
+
+    match cmd0.as_deref() {
+        Some("sudo") => direct_command_can_execute_arbitrary_command(&command[1..]),
+        Some("rg") => {
+            ripgrep_command_can_execute_arbitrary_command(command, RipgrepArgCase::Sensitive)
+        }
+        _ => false,
+    }
+}
+
 #[cfg(test)]
 mod tests {
    use super::*;
@@ -174,6 +210,35 @@ mod tests {
        assert!(command_might_be_dangerous(&vec_str(&["rm", "-f", "/"])));
    }

+    #[test]
+    fn ripgrep_pre_processor_is_dangerous() {
+        for command in [
+            vec_str(&["rg", "--pre", "./pre.sh", "needle", "input.txt"]),
+            vec_str(&["rg", "--pre=./pre.sh", "needle", "input.txt"]),
+            vec_str(&["/usr/bin/rg", "--hostname-bin=./hostname.sh", "needle"]),
+            vec_str(&["zsh", "-c", r"rg --pre\=./pre.sh needle input.txt"]),
+            vec_str(&["zsh", "-lc", r"rg --pre\=./pre.sh needle input.txt"]),
+            vec_str(&["/bin/zsh", "-lc", "rg --pre=./pre.sh needle input.txt"]),
+        ] {
+            assert!(
+                command_might_be_dangerous(&command),
+                "expected {command:?} to be dangerous",
+            );
+        }
+    }
+
+    #[test]
+    fn ripgrep_search_zip_is_not_dangerous() {
+        assert!(!command_might_be_dangerous(&vec_str(&[
+            "rg",
+            "--search-zip",
+            "needle",
+        ])));
+        assert!(!command_might_be_dangerous(&vec_str(&[
+            "rg", "-z", "needle",
+        ])));
+    }
+
    #[test]
    fn direct_powershell_words_reuse_windows_dangerous_detection() {
        let command = vec_str(&["Remove-Item", "test", "-Force"]);
--- a/codex-rs/shell-command/src/command_safety/is_safe_command.rs
+++ b/codex-rs/shell-command/src/command_safety/is_safe_command.rs
@@ -4,6 +4,8 @@ use crate::command_safety::is_dangerous_command::executable_name_lookup_key;
 // may appear before it (e.g., `-C`, `-c`, `--git-dir`).
 // Implemented in `is_dangerous_command` and shared here.
 use crate::command_safety::is_dangerous_command::find_git_subcommand;
+use crate::command_safety::ripgrep::RipgrepArgCase;
+use crate::command_safety::ripgrep::is_safe_ripgrep_command;
 use crate::command_safety::windows_safe_commands::is_safe_command_windows;
 #[cfg(windows)]
 use crate::command_safety::windows_safe_commands::is_safe_powershell_words as is_safe_powershell_words_windows;
@@ -127,27 +129,7 @@ fn is_safe_to_call_with_exec(command: &[String]) -> bool {
        }

        // Ripgrep
-        Some("rg") => {
-            const UNSAFE_RIPGREP_OPTIONS_WITH_ARGS: &[&str] = &[
-                // Takes an arbitrary command that is executed for each match.
-                "--pre",
-                // Takes a command that can be used to obtain the local hostname.
-                "--hostname-bin",
-            ];
-            const UNSAFE_RIPGREP_OPTIONS_WITHOUT_ARGS: &[&str] = &[
-                // Calls out to other decompression tools, so do not auto-approve
-                // out of an abundance of caution.
-                "--search-zip",
-                "-z",
-            ];
-
-            !command.iter().any(|arg| {
-                UNSAFE_RIPGREP_OPTIONS_WITHOUT_ARGS.contains(&arg.as_str())
-                    || UNSAFE_RIPGREP_OPTIONS_WITH_ARGS
-                        .iter()
-                        .any(|&opt| arg == opt || arg.starts_with(&format!("{opt}=")))
-            })
-        }
+        Some("rg") => is_safe_ripgrep_command(command, RipgrepArgCase::Sensitive),

        // Git
        Some("git") => is_safe_git_command(command),
@@ -588,7 +570,10 @@ mod tests {
        // Unsafe flags that do not take an argument (present verbatim).
        for args in [
            vec_str(&["rg", "--search-zip", "files"]),
+            vec_str(&["rg", "--search-zip=true", "files"]),
            vec_str(&["rg", "-z", "files"]),
+            vec_str(&["rg", "-zn", "files"]),
+            vec_str(&["rg", "-nz", "files"]),
        ] {
            assert!(
                !is_safe_to_call_with_exec(&args),
@@ -610,6 +595,24 @@ mod tests {
        }
    }

+    #[test]
+    fn bash_lc_escaped_ripgrep_options_are_unsafe() {
+        for script in [
+            r"rg --pre\=./pre.sh files",
+            r"rg --\pre=./pre.sh files",
+            r"rg --hostname\-bin=hostname files",
+            r"rg -\z files",
+            r"rg -\zn needle .",
+            "rg --pr\\\ne=./pre.sh files",
+            "rg \"--pr\\\ne=./pre.sh\" files",
+        ] {
+            assert!(
+                !is_known_safe_command(&vec_str(&["bash", "-lc", script])),
+                "expected {script:?} to require approval after shell unescaping",
+            );
+        }
+    }
+
    #[test]
    fn windows_powershell_full_path_is_safe() {
        if !cfg!(windows) {
--- a/codex-rs/shell-command/src/command_safety/mod.rs
+++ b/codex-rs/shell-command/src/command_safety/mod.rs
@@ -1,4 +1,5 @@
 mod powershell_parser;
+mod ripgrep;

 pub mod is_dangerous_command;
 pub mod is_safe_command;
--- a/codex-rs/shell-command/src/command_safety/ripgrep.rs
+++ b/codex-rs/shell-command/src/command_safety/ripgrep.rs
@@ -0,0 +1,141 @@
+use std::borrow::Cow;
+
+#[derive(Clone, Copy, Eq, PartialEq)]
+pub(crate) enum RipgrepArgCase {
+    Sensitive,
+    AsciiInsensitive,
+}
+
+pub(crate) fn is_safe_ripgrep_command(command: &[String], arg_case: RipgrepArgCase) -> bool {
+    !command
+        .iter()
+        .skip(1)
+        .map(String::as_str)
+        .any(|arg| is_unsafe_ripgrep_arg(arg, arg_case))
+}
+
+pub(crate) fn ripgrep_command_can_execute_arbitrary_command(
+    command: &[String],
+    arg_case: RipgrepArgCase,
+) -> bool {
+    command.iter().skip(1).map(String::as_str).any(|arg| {
+        let normalized = normalize_arg(arg, arg_case);
+        ripgrep_arg_can_execute_arbitrary_command(normalized.as_ref())
+    })
+}
+
+fn is_unsafe_ripgrep_arg(arg: &str, arg_case: RipgrepArgCase) -> bool {
+    let normalized = normalize_arg(arg, arg_case);
+    if ripgrep_arg_can_execute_arbitrary_command(normalized.as_ref()) {
+        return true;
+    }
+
+    match normalized.as_ref() {
+        // Calls out to other decompression tools, so do not auto-approve
+        // out of an abundance of caution.
+        "--search-zip" => true,
+        "-z" => true,
+        _ => {
+            normalized.starts_with("--search-zip=")
+                || ripgrep_short_options_contain_search_zip(arg, arg_case)
+        }
+    }
+}
+
+fn ripgrep_arg_can_execute_arbitrary_command(normalized_arg: &str) -> bool {
+    matches!(
+        normalized_arg,
+        // Takes an arbitrary command that is executed for each match.
+        "--pre"
+        // Takes a command that can be used to obtain the local hostname.
+        | "--hostname-bin"
+    ) || normalized_arg.starts_with("--pre=")
+        || normalized_arg.starts_with("--hostname-bin=")
+}
+
+fn normalize_arg(arg: &str, arg_case: RipgrepArgCase) -> Cow<'_, str> {
+    match arg_case {
+        RipgrepArgCase::Sensitive => Cow::Borrowed(arg),
+        RipgrepArgCase::AsciiInsensitive => Cow::Owned(arg.to_ascii_lowercase()),
+    }
+}
+
+fn ripgrep_short_options_contain_search_zip(arg: &str, arg_case: RipgrepArgCase) -> bool {
+    let Some(short_options) = arg.strip_prefix('-') else {
+        return false;
+    };
+    if short_options.is_empty() || short_options.starts_with('-') {
+        return false;
+    }
+
+    for option in short_options.chars() {
+        if option == 'z' || (arg_case == RipgrepArgCase::AsciiInsensitive && option == 'Z') {
+            return true;
+        }
+        if ripgrep_short_option_takes_value(option) {
+            return false;
+        }
+    }
+
+    false
+}
+
+fn ripgrep_short_option_takes_value(option: char) -> bool {
+    matches!(
+        option,
+        'A' | 'B' | 'C' | 'E' | 'M' | 'T' | 'd' | 'e' | 'f' | 'g' | 'j' | 'm' | 'r' | 't'
+    )
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn vec_str(args: &[&str]) -> Vec<String> {
+        args.iter().map(ToString::to_string).collect()
+    }
+
+    #[test]
+    fn rejects_ripgrep_options_that_can_spawn_processes() {
+        for args in [
+            vec_str(&["rg", "--pre", "pwned", "files"]),
+            vec_str(&["rg", "--pre=pwned", "files"]),
+            vec_str(&["rg", "--hostname-bin", "pwned", "files"]),
+            vec_str(&["rg", "--hostname-bin=pwned", "files"]),
+            vec_str(&["rg", "--search-zip", "files"]),
+            vec_str(&["rg", "--search-zip=true", "files"]),
+            vec_str(&["rg", "-z", "files"]),
+            vec_str(&["rg", "-zn", "files"]),
+            vec_str(&["rg", "-nz", "files"]),
+        ] {
+            assert!(
+                !is_safe_ripgrep_command(&args, RipgrepArgCase::Sensitive),
+                "expected {args:?} to be unsafe",
+            );
+        }
+    }
+
+    #[test]
+    fn rejects_case_insensitive_ripgrep_options() {
+        for args in [
+            vec_str(&["rg", "--PRE", "pwned", "files"]),
+            vec_str(&["rg", "--HOSTNAME-BIN=pwned", "files"]),
+            vec_str(&["rg", "--SEARCH-ZIP", "files"]),
+            vec_str(&["rg", "--SEARCH-ZIP=true", "files"]),
+            vec_str(&["rg", "-Z", "files"]),
+            vec_str(&["rg", "-ZN", "files"]),
+            vec_str(&["rg", "-Fz", "needle", "."]),
+        ] {
+            assert!(
+                !is_safe_ripgrep_command(&args, RipgrepArgCase::AsciiInsensitive),
+                "expected {args:?} to be unsafe with case insensitive matching",
+            );
+        }
+
+        let args = vec_str(&["rg", "-fz", "needle", "."]);
+        assert!(
+            is_safe_ripgrep_command(&args, RipgrepArgCase::AsciiInsensitive),
+            "expected lowercase -f to consume z as its pattern-file value",
+        );
+    }
+}
--- a/codex-rs/shell-command/src/command_safety/windows_safe_commands.rs
+++ b/codex-rs/shell-command/src/command_safety/windows_safe_commands.rs
@@ -1,6 +1,8 @@
 use crate::command_safety::is_safe_command::is_safe_git_command;
 use crate::command_safety::powershell_parser::PowershellParseOutcome;
 use crate::command_safety::powershell_parser::parse_with_powershell_ast;
+use crate::command_safety::ripgrep::RipgrepArgCase;
+use crate::command_safety::ripgrep::is_safe_ripgrep_command;
 use std::path::Path;

 /// On Windows, we conservatively allow only clearly read-only PowerShell invocations
@@ -190,7 +192,7 @@ pub(crate) fn is_safe_powershell_words(words: &[String]) -> bool {

        "git" => is_safe_git_command(words),

-        "rg" => is_safe_ripgrep(words),
+        "rg" => is_safe_ripgrep_command(words, RipgrepArgCase::AsciiInsensitive),

        // Extra safety: explicitly prohibit common side-effecting cmdlets regardless of args.
        "set-content" | "add-content" | "out-file" | "new-item" | "remove-item" | "move-item"
@@ -206,21 +208,6 @@ pub(crate) fn is_safe_powershell_words(words: &[String]) -> bool {
    }
 }

-/// Checks that an `rg` invocation avoids options that can spawn arbitrary executables.
-fn is_safe_ripgrep(words: &[String]) -> bool {
-    const UNSAFE_RIPGREP_OPTIONS_WITH_ARGS: &[&str] = &["--pre", "--hostname-bin"];
-    const UNSAFE_RIPGREP_OPTIONS_WITHOUT_ARGS: &[&str] = &["--search-zip", "-z"];
-
-    !words.iter().skip(1).any(|arg| {
-        let arg_lc = arg.to_ascii_lowercase();
-        // Examples rejected here: "pwsh -Command 'rg --pre cat pattern'" and "pwsh -Command 'rg --search-zip pattern'".
-        UNSAFE_RIPGREP_OPTIONS_WITHOUT_ARGS.contains(&arg_lc.as_str())
-            || UNSAFE_RIPGREP_OPTIONS_WITH_ARGS
-                .iter()
-                .any(|opt| arg_lc == *opt || arg_lc.starts_with(&format!("{opt}=")))
-    })
-}
-
 #[cfg(all(test, windows))]
 mod tests {
    use super::*;
Author	SHA1	Message	Date
Eva Wong	e0c83c1384	test: cover adjacent ripgrep unsafe spellings	2026-05-07 16:20:59 -07:00
Eva Wong	71ccb7bb9f	refactor: reuse ripgrep safety helper on windows	2026-05-07 16:19:12 -07:00
Eva Wong	4d9e90aba8	chore: narrow ripgrep safety fix scope	2026-05-07 16:17:54 -07:00
Eva Wong	c7a3ce8388	fix: honor exec approval policy overrides	2026-05-07 13:06:56 -07:00
Adrian Bravo	d97ffdfad3	fix: require approval for ripgrep subprocess options	2026-05-06 22:15:33 -07:00
Adrian Bravo	e144a7cea0	fix: enforce ripgrep pre approval in exec policy	2026-05-06 21:47:37 -07:00
Adrian Bravo	4c932ff998	fix: share ripgrep safety helper	2026-05-06 15:03:09 -07:00
Adrian Bravo	b8f8219f61	Fix escaped shell args in safety checks	2026-05-06 12:56:14 -07:00