Compare commits

...

1 Commits

Author SHA1 Message Date
Michael Bolin
f8b5119504 feat: introduce prettify_command_for_display() 2025-09-14 23:12:50 -07:00
4 changed files with 547 additions and 6 deletions

View File

@@ -1,3 +1,4 @@
use tree_sitter::Node;
use tree_sitter::Parser;
use tree_sitter::Tree;
use tree_sitter_bash::LANGUAGE as BASH;
@@ -59,9 +60,6 @@ pub fn try_parse_word_only_commands_sequence(tree: &Tree, src: &str) -> Option<V
}
} else {
// Reject any punctuation / operator tokens that are not explicitly allowed.
if kind.chars().any(|c| "&;|".contains(c)) && !ALLOWED_PUNCT_TOKENS.contains(&kind) {
return None;
}
if !(ALLOWED_PUNCT_TOKENS.contains(&kind) || kind.trim().is_empty()) {
// If it's a quote token or operator it's allowed above; we also allow whitespace tokens.
// Any other punctuation like parentheses, braces, redirects, backticks, etc are rejected.
@@ -75,7 +73,7 @@ pub fn try_parse_word_only_commands_sequence(tree: &Tree, src: &str) -> Option<V
let mut commands = Vec::new();
for node in command_nodes {
if let Some(words) = parse_plain_command_from_node(node, src) {
if let Some(words) = extract_words_from_command_node(node, src) {
commands.push(words);
} else {
return None;
@@ -84,7 +82,10 @@ pub fn try_parse_word_only_commands_sequence(tree: &Tree, src: &str) -> Option<V
Some(commands)
}
fn parse_plain_command_from_node(cmd: tree_sitter::Node, src: &str) -> Option<Vec<String>> {
/// Extract the plain words of a simple command node, normalizing quoted
/// strings into their contents. Returns None if the node contains unsupported
/// constructs for a word-only command.
pub(crate) fn extract_words_from_command_node(cmd: Node, src: &str) -> Option<Vec<String>> {
if cmd.kind() != "command" {
return None;
}
@@ -130,6 +131,48 @@ fn parse_plain_command_from_node(cmd: tree_sitter::Node, src: &str) -> Option<Ve
Some(words)
}
/// Find the earliest `command` node in source order within the parse tree.
pub(crate) fn find_first_command_node(tree: &Tree) -> Option<Node<'_>> {
let root = tree.root_node();
let mut cursor = root.walk();
let mut stack = vec![root];
let mut best: Option<Node> = None;
while let Some(node) = stack.pop() {
if node.is_named()
&& node.kind() == "command"
&& best.is_none_or(|b| node.start_byte() < b.start_byte())
{
best = Some(node);
}
for child in node.children(&mut cursor) {
stack.push(child);
}
}
best
}
/// Given the first command node, return the byte index in `src` at which the
/// remainder script starts, only if the next non-whitespace token is an allowed
/// sequencing operator for dropping the leading `cd`.
///
/// Allowed operators: `&&` (conditional on success) and `;` (unconditional).
/// Disallowed: `||`, `|` — removing `cd` would change semantics.
pub(crate) fn remainder_start_after_wrapper_operator(first_cmd: Node, src: &str) -> Option<usize> {
let mut sib = first_cmd.next_sibling()?;
while !sib.is_named() && sib.kind().trim().is_empty() {
sib = sib.next_sibling()?;
}
if sib.is_named() || (sib.kind() != "&&" && sib.kind() != ";") {
return None;
}
let mut idx = sib.end_byte();
let bytes = src.as_bytes();
while idx < bytes.len() && bytes[idx].is_ascii_whitespace() {
idx += 1;
}
if idx >= bytes.len() { None } else { Some(idx) }
}
#[cfg(test)]
mod tests {
use super::*;
@@ -215,4 +258,45 @@ mod tests {
fn rejects_trailing_operator_parse_error() {
assert!(parse_seq("ls &&").is_none());
}
#[test]
fn find_first_command_node_finds_cd() {
let src = "cd foo && ls; git status";
let tree = try_parse_bash(src).unwrap();
let first = find_first_command_node(&tree).unwrap();
let words = extract_words_from_command_node(first, src).unwrap();
assert_eq!(words, vec!["cd".to_string(), "foo".to_string()]);
}
#[test]
fn remainder_after_wrapper_operator_allows_and_and_semicolon() {
// Allows &&
let src = "cd foo && ls; git status";
let tree = try_parse_bash(src).unwrap();
let first = find_first_command_node(&tree).unwrap();
let idx = remainder_start_after_wrapper_operator(first, src).unwrap();
assert_eq!(&src[idx..], "ls; git status");
// Allows ;
let src2 = "cd foo; ls";
let tree2 = try_parse_bash(src2).unwrap();
let first2 = find_first_command_node(&tree2).unwrap();
let idx2 = remainder_start_after_wrapper_operator(first2, src2).unwrap();
assert_eq!(&src2[idx2..], "ls");
}
#[test]
fn remainder_after_wrapper_operator_rejects_or_and_pipe() {
// Rejects ||
let src = "cd foo || echo hi";
let tree = try_parse_bash(src).unwrap();
let first = find_first_command_node(&tree).unwrap();
assert!(remainder_start_after_wrapper_operator(first, src).is_none());
// Rejects |
let src2 = "cd foo | rg bar";
let tree2 = try_parse_bash(src2).unwrap();
let first2 = find_first_command_node(&tree2).unwrap();
assert!(remainder_start_after_wrapper_operator(first2, src2).is_none());
}
}

View File

@@ -11,6 +11,7 @@ use std::time::Duration;
use crate::AuthManager;
use crate::client_common::REVIEW_PROMPT;
use crate::event_mapping::map_response_item_to_event_messages;
use crate::normalize_command::try_normalize_command;
use async_channel::Receiver;
use async_channel::Sender;
use codex_apply_patch::ApplyPatchAction;
@@ -2774,6 +2775,24 @@ async fn handle_container_exec_with_params(
)
}
None => {
// Normalize a classic bash -lc "cd ... && ..." wrapper into updated
// cwd and a cleaner script for display, if applicable.
let command_for_display = match try_normalize_command(&params.command, &params.cwd) {
Some(norm) => {
// Once we have confidence in this approach, we will use
// the normalized command (and cwd) for execution as
// well.
//
// Note that behavior changes slightly in that the
// command is executed without considering whether
// `cd` succeeded, but this is probably OK because the
// argument to `cd` is used as the cwd for the exec,
// so it will fail if the directory does not exist.
norm.command_for_display
}
None => params.command.clone(),
};
let safety = {
let state = sess.state.lock_unchecked();
assess_command_safety(
@@ -2784,7 +2803,6 @@ async fn handle_container_exec_with_params(
params.with_escalated_permissions.unwrap_or(false),
)
};
let command_for_display = params.command.clone();
(params, safety, command_for_display)
}
};

View File

@@ -35,6 +35,7 @@ mod mcp_connection_manager;
mod mcp_tool_call;
mod message_history;
mod model_provider_info;
mod normalize_command;
pub mod parse_command;
mod truncate;
mod unified_exec;

View File

@@ -0,0 +1,438 @@
use crate::bash::extract_words_from_command_node;
use crate::bash::find_first_command_node;
use crate::bash::remainder_start_after_wrapper_operator;
use crate::bash::try_parse_bash;
use crate::bash::try_parse_word_only_commands_sequence;
use std::path::Path;
use std::path::PathBuf;
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct NormalizedCommand {
pub command: Vec<String>,
pub cwd: PathBuf,
pub command_for_display: Vec<String>,
}
/// Normalize a shell command for execution and display.
///
/// For classic wrappers like `bash -lc "cd repo && git status"` returns:
/// - command: ["bash", "-lc", "git status"]
/// - cwd: original cwd joined with "repo"
/// - command_for_display:
/// - If the remainder is a single exec-able command, return it tokenized
/// (e.g., ["rg", "--files"]).
/// - Otherwise, wrap the script as ["bash", "-lc", "<script>"] so that
/// [`crate::parse_command::parse_command`] (which currently checks for
/// bash -lc) can handle it. Even if the original used zsh, we standardize
/// to bash here to match [`crate::parse_command::parse_command`]s
/// expectations.
///
/// Returns None when no normalization is needed.
pub(crate) fn try_normalize_command(command: &[String], cwd: &Path) -> Option<NormalizedCommand> {
let invocation = parse_shell_script_from_shell_invocation(command)?;
let shell_script = invocation.command.clone();
let tree = try_parse_bash(&shell_script)?;
let first_cmd = find_first_command_node(&tree)?;
let words = extract_words_from_command_node(first_cmd, &shell_script)?;
if !is_command_cd_to_directory(&words) {
return None;
}
let idx = remainder_start_after_wrapper_operator(first_cmd, &shell_script)?;
let remainder = shell_script[idx..].to_string();
// For parse_command consumption: if the remainder is a single exec-able
// command, return it as argv tokens; otherwise, wrap it in ["bash","-lc",..].
let command_for_display = try_parse_bash(&remainder)
.and_then(|tree| try_parse_word_only_commands_sequence(&tree, &remainder))
.and_then(|mut cmds| match cmds.len() {
1 => cmds.pop(),
_ => None,
})
.unwrap_or_else(|| vec!["bash".to_string(), "-lc".to_string(), remainder.clone()]);
// Compute new cwd only after confirming the wrapper/operator shape
let dir = &words[1];
let new_cwd = if Path::new(dir).is_absolute() {
PathBuf::from(dir)
} else {
// TODO(mbolin): Is there anything to worry about if `dir` is a
// symlink or contains `..` components?
cwd.join(dir)
};
Some(NormalizedCommand {
command: vec![invocation.executable, invocation.flag, remainder],
cwd: new_cwd,
command_for_display,
})
}
/// This is similar to [`crate::shell::strip_bash_lc`] and should potentially
/// be unified with it.
fn parse_shell_script_from_shell_invocation(command: &[String]) -> Option<ShellScriptInvocation> {
// Allowed shells
fn is_allowed_exe(s: &str) -> bool {
matches!(s, "bash" | "/bin/bash" | "zsh" | "/bin/zsh")
}
match command {
// exactly three items: <exe> <flag> <command>
[exe, flag, cmd] if is_allowed_exe(exe) && (flag == "-lc" || flag == "-c") => {
Some(ShellScriptInvocation {
executable: exe.clone(),
flag: flag.clone(),
command: cmd.clone(),
})
}
// exactly four items: <exe> -l -c <command>
[exe, flag1, flag2, cmd] if is_allowed_exe(exe) && flag1 == "-l" && flag2 == "-c" => {
Some(ShellScriptInvocation {
executable: exe.clone(),
flag: "-lc".to_string(),
command: cmd.clone(),
})
}
_ => None,
}
}
fn is_command_cd_to_directory(command: &[String]) -> bool {
matches!(command, [first, _dir] if first == "cd")
}
#[derive(Debug, Clone, PartialEq, Eq)]
struct ShellScriptInvocation {
executable: String,
/// Single arg, so `-l -c` must be collapsed to `-lc`.
/// Must be written so `command` follows `flag`, so must
/// be `-lc` and not `-cl`.
flag: String,
command: String,
}
#[cfg(test)]
mod tests {
use super::NormalizedCommand;
use super::try_normalize_command;
use pretty_assertions::assert_eq;
use std::path::PathBuf;
#[test]
fn cd_prefix_in_bash_script_is_hidden() {
let command = vec![
"bash".to_string(),
"-lc".to_string(),
"cd foo && echo hi".to_string(),
];
let cwd = PathBuf::from("/baz");
let norm = try_normalize_command(&command, &cwd);
assert_eq!(
norm,
Some(NormalizedCommand {
command: vec!["bash".into(), "-lc".into(), "echo hi".into()],
cwd: PathBuf::from("/baz/foo"),
command_for_display: vec!["echo".into(), "hi".into()],
})
);
}
#[test]
fn cd_shell_var_not_matched() {
let command = vec![
"bash".to_string(),
"-lc".to_string(),
"cd $SOME_DIR && ls".to_string(),
];
let cwd = PathBuf::from("/baz");
let norm = try_normalize_command(&command, &cwd);
assert_eq!(
norm, None,
"Not a classic wrapper (cd arg is a variable), so no normalization."
);
}
#[test]
fn cd_prefix_with_quoted_path_is_hidden() {
let command = vec![
"bash".to_string(),
"-lc".to_string(),
" cd 'foo bar' && ls".to_string(),
];
let cwd = PathBuf::from("/baz");
let norm = try_normalize_command(&command, &cwd);
assert_eq!(
norm,
Some(NormalizedCommand {
command: vec!["bash".into(), "-lc".into(), "ls".into()],
cwd: PathBuf::from("/baz/foo bar"),
command_for_display: vec!["ls".into()],
})
);
}
#[test]
fn cd_prefix_with_additional_cd_is_preserved() {
let command = vec![
"bash".to_string(),
"-lc".to_string(),
"cd foo && cd bar && ls".to_string(),
];
let cwd = PathBuf::from("/baz");
let norm = try_normalize_command(&command, &cwd);
assert_eq!(
norm,
Some(NormalizedCommand {
command: vec!["bash".into(), "-lc".into(), "cd bar && ls".into()],
cwd: PathBuf::from("/baz/foo"),
command_for_display: vec!["bash".into(), "-lc".into(), "cd bar && ls".into(),],
})
);
}
#[test]
fn cd_prefix_with_or_connector_is_preserved() {
let command = vec![
"bash".to_string(),
"-lc".to_string(),
"cd and_and || echo \"couldn't find the dir for &&\"".to_string(),
];
let cwd = PathBuf::from("/baz");
let norm = try_normalize_command(&command, &cwd);
// Not a classic wrapper (uses ||), so no normalization.
assert_eq!(norm, None);
}
#[test]
fn cd_prefix_preserves_operators_between_remaining_commands() {
let command = vec![
"bash".to_string(),
"-lc".to_string(),
"cd foo && ls && git status".to_string(),
];
let cwd = PathBuf::from("/baz");
let norm = try_normalize_command(&command, &cwd);
assert_eq!(
norm,
Some(NormalizedCommand {
command: vec!["bash".into(), "-lc".into(), "ls && git status".into()],
cwd: PathBuf::from("/baz/foo"),
command_for_display: vec!["bash".into(), "-lc".into(), "ls && git status".into(),],
})
);
}
#[test]
fn cd_prefix_preserves_pipelines() {
let command = vec![
"bash".to_string(),
"-lc".to_string(),
"cd foo && ls | rg foo".to_string(),
];
let cwd = PathBuf::from("/baz");
let norm = try_normalize_command(&command, &cwd);
assert_eq!(
norm,
Some(NormalizedCommand {
command: vec!["bash".into(), "-lc".into(), "ls | rg foo".into()],
cwd: PathBuf::from("/baz/foo"),
command_for_display: vec!["bash".into(), "-lc".into(), "ls | rg foo".into(),],
})
);
}
#[test]
fn cd_prefix_preserves_sequence_operator() {
let command = vec![
"bash".to_string(),
"-lc".to_string(),
"cd foo && ls; git status".to_string(),
];
let cwd = PathBuf::from("/baz");
let norm = try_normalize_command(&command, &cwd);
assert_eq!(
norm,
Some(NormalizedCommand {
command: vec!["bash".into(), "-lc".into(), "ls; git status".into()],
cwd: PathBuf::from("/baz/foo"),
command_for_display: vec!["bash".into(), "-lc".into(), "ls; git status".into(),],
})
);
}
#[test]
fn cd_prefix_with_semicolon_is_hidden() {
let command = vec![
"bash".to_string(),
"-lc".to_string(),
"cd foo; ls".to_string(),
];
let cwd = PathBuf::from("/baz");
let norm = try_normalize_command(&command, &cwd);
assert_eq!(
norm,
Some(NormalizedCommand {
command: vec!["bash".into(), "-lc".into(), "ls".into()],
cwd: PathBuf::from("/baz/foo"),
command_for_display: vec!["ls".into()],
})
);
}
#[test]
fn pushd_prefix_is_not_normalized() {
let command = vec![
"bash".to_string(),
"-lc".to_string(),
"pushd foo && ls".to_string(),
];
let cwd = PathBuf::from("/baz");
// We do not normalize pushd because omitting pushd from the execution
// would have different semantics than the original command.
let norm = try_normalize_command(&command, &cwd);
assert_eq!(norm, None);
}
#[test]
fn supports_zsh_and_bin_bash_and_split_flags() {
// zsh -lc
let zsh = vec!["zsh".into(), "-lc".into(), "cd foo && ls".into()];
let cwd = PathBuf::from("/baz");
let norm = try_normalize_command(&zsh, &cwd).unwrap();
assert_eq!(
norm.command,
["zsh", "-lc", "ls"]
.iter()
.map(|s| s.to_string())
.collect::<Vec<_>>()
);
assert_eq!(norm.cwd, PathBuf::from("/baz/foo"));
assert_eq!(norm.command_for_display, vec!["ls".to_string()]);
// /bin/bash -l -c <cmd>
let bash_split = vec![
"/bin/bash".into(),
"-l".into(),
"-c".into(),
"cd foo && ls".into(),
];
let norm2 = try_normalize_command(&bash_split, &cwd).unwrap();
assert_eq!(
norm2.command,
["/bin/bash", "-lc", "ls"]
.iter()
.map(|s| s.to_string())
.collect::<Vec<_>>()
);
assert_eq!(norm2.cwd, PathBuf::from("/baz/foo"));
assert_eq!(norm2.command_for_display, vec!["ls".to_string()]);
}
#[test]
fn supports_dash_c_flag() {
let command = vec!["bash".into(), "-c".into(), "cd foo && ls".into()];
let cwd = PathBuf::from("/baz");
let norm = try_normalize_command(&command, &cwd).unwrap();
assert_eq!(
norm.command,
["bash", "-c", "ls"]
.iter()
.map(|s| s.to_string())
.collect::<Vec<_>>()
);
assert_eq!(norm.cwd, PathBuf::from("/baz/foo"));
assert_eq!(norm.command_for_display, vec!["ls".to_string()]);
}
#[test]
fn rejects_pipe_operator_immediately_after_cd() {
let command = vec!["bash".into(), "-lc".into(), "cd foo | rg bar".into()];
let cwd = PathBuf::from("/baz");
let norm = try_normalize_command(&command, &cwd);
assert_eq!(norm, None);
}
#[test]
fn rejects_unknown_shell_and_bad_flag_order() {
// Unknown shell
let unknown = vec!["sh".into(), "-lc".into(), "cd foo && ls".into()];
let cwd = PathBuf::from("/baz");
assert_eq!(try_normalize_command(&unknown, &cwd), None);
// Bad flag order -cl (unsupported)
let bad_flag = vec!["bash".into(), "-cl".into(), "cd foo && ls".into()];
assert_eq!(try_normalize_command(&bad_flag, &cwd), None);
}
#[test]
fn absolute_directory_sets_absolute_cwd() {
let command = vec!["bash".into(), "-lc".into(), "cd /tmp && ls".into()];
let cwd = PathBuf::from("/baz");
let norm = try_normalize_command(&command, &cwd).unwrap();
assert_eq!(norm.cwd, PathBuf::from("/tmp"));
assert_eq!(
norm.command,
["bash", "-lc", "ls"]
.iter()
.map(|s| s.to_string())
.collect::<Vec<_>>()
);
assert_eq!(norm.command_for_display, vec!["ls".to_string()]);
}
#[test]
fn non_shell_command_is_returned_unmodified() {
let command = vec!["rg".to_string(), "--files".to_string()];
let cwd = PathBuf::from("/baz");
let norm = try_normalize_command(&command, &cwd);
// Not a shell wrapper, so no normalization.
assert_eq!(norm, None);
}
#[test]
fn cd_prefix_with_or_operator_is_preserved() {
let command = vec![
"bash".to_string(),
"-lc".to_string(),
"cd missing && ls || echo 'fallback'".to_string(),
];
let cwd = PathBuf::from("/baz");
let norm = try_normalize_command(&command, &cwd);
assert_eq!(
norm,
Some(NormalizedCommand {
command: vec!["bash".into(), "-lc".into(), "ls || echo 'fallback'".into()],
cwd: PathBuf::from("/baz/missing"),
command_for_display: vec![
"bash".into(),
"-lc".into(),
"ls || echo 'fallback'".into(),
],
})
);
}
#[test]
fn cd_prefix_with_ampersands_in_string_is_hidden() {
let command = vec![
"bash".to_string(),
"-lc".to_string(),
"cd foo && echo \"checking && markers\"".to_string(),
];
let cwd = PathBuf::from("/baz");
let norm = try_normalize_command(&command, &cwd);
assert_eq!(
norm,
Some(NormalizedCommand {
command: vec![
"bash".into(),
"-lc".into(),
"echo \"checking && markers\"".into(),
],
cwd: PathBuf::from("/baz/foo"),
command_for_display: vec!["echo".into(), "checking && markers".into(),],
})
);
}
}