parse rg | head a search (#7797)

This commit is contained in:
pakrym-oai
2025-12-09 18:30:16 -08:00
committed by GitHub
parent 893f5261eb
commit 967d063f4b

View File

@@ -117,9 +117,6 @@ mod tests {
query: None,
path: None,
},
ParsedCommand::Unknown {
cmd: "head -n 40".to_string(),
},
],
);
}
@@ -143,16 +140,11 @@ mod tests {
let inner = "rg -n \"BUG|FIXME|TODO|XXX|HACK\" -S | head -n 200";
assert_parsed(
&vec_str(&["bash", "-lc", inner]),
vec![
ParsedCommand::Search {
cmd: "rg -n 'BUG|FIXME|TODO|XXX|HACK' -S".to_string(),
query: Some("BUG|FIXME|TODO|XXX|HACK".to_string()),
path: None,
},
ParsedCommand::Unknown {
cmd: "head -n 200".to_string(),
},
],
vec![ParsedCommand::Search {
cmd: "rg -n 'BUG|FIXME|TODO|XXX|HACK' -S".to_string(),
query: Some("BUG|FIXME|TODO|XXX|HACK".to_string()),
path: None,
}],
);
}
@@ -174,16 +166,11 @@ mod tests {
let inner = "rg --files | head -n 50";
assert_parsed(
&vec_str(&["bash", "-lc", inner]),
vec![
ParsedCommand::Search {
cmd: "rg --files".to_string(),
query: None,
path: None,
},
ParsedCommand::Unknown {
cmd: "head -n 50".to_string(),
},
],
vec![ParsedCommand::Search {
cmd: "rg --files".to_string(),
query: None,
path: None,
}],
);
}
@@ -273,6 +260,19 @@ mod tests {
);
}
#[test]
fn supports_head_file_only() {
let inner = "head Cargo.toml";
assert_parsed(
&vec_str(&["bash", "-lc", inner]),
vec![ParsedCommand::Read {
cmd: inner.to_string(),
name: "Cargo.toml".to_string(),
path: PathBuf::from("Cargo.toml"),
}],
);
}
#[test]
fn supports_cat_sed_n() {
let inner = "cat tui/Cargo.toml | sed -n '1,200p'";
@@ -313,6 +313,19 @@ mod tests {
);
}
#[test]
fn supports_tail_file_only() {
let inner = "tail README.md";
assert_parsed(
&vec_str(&["bash", "-lc", inner]),
vec![ParsedCommand::Read {
cmd: inner.to_string(),
name: "README.md".to_string(),
path: PathBuf::from("README.md"),
}],
);
}
#[test]
fn supports_npm_run_build_is_unknown() {
assert_parsed(
@@ -391,6 +404,19 @@ mod tests {
);
}
#[test]
fn supports_single_string_script_with_cd_and_pipe() {
let inner = r#"cd /Users/pakrym/code/codex && rg -n "codex_api" codex-rs -S | head -n 50"#;
assert_parsed(
&vec_str(&["bash", "-lc", inner]),
vec![ParsedCommand::Search {
cmd: "rg -n codex_api codex-rs -S".to_string(),
query: Some("codex_api".to_string()),
path: Some("codex-rs".to_string()),
}],
);
}
// ---- is_small_formatting_command unit tests ----
#[test]
fn small_formatting_always_true_commands() {
@@ -408,38 +434,43 @@ mod tests {
fn head_behavior() {
// No args -> small formatting
assert!(is_small_formatting_command(&vec_str(&["head"])));
// Numeric count only -> not considered small formatting by implementation
assert!(!is_small_formatting_command(&shlex_split_safe(
"head -n 40"
)));
// Numeric count only -> formatting
assert!(is_small_formatting_command(&shlex_split_safe("head -n 40")));
// With explicit file -> not small formatting
assert!(!is_small_formatting_command(&shlex_split_safe(
"head -n 40 file.txt"
)));
// File only (no count) -> treated as small formatting by implementation
assert!(is_small_formatting_command(&vec_str(&["head", "file.txt"])));
// File only (no count) -> not formatting
assert!(!is_small_formatting_command(&vec_str(&[
"head", "file.txt"
])));
}
#[test]
fn tail_behavior() {
// No args -> small formatting
assert!(is_small_formatting_command(&vec_str(&["tail"])));
// Numeric with plus offset -> not small formatting
assert!(!is_small_formatting_command(&shlex_split_safe(
// Numeric with plus offset -> formatting
assert!(is_small_formatting_command(&shlex_split_safe(
"tail -n +10"
)));
assert!(!is_small_formatting_command(&shlex_split_safe(
"tail -n +10 file.txt"
)));
// Numeric count
assert!(!is_small_formatting_command(&shlex_split_safe(
"tail -n 30"
)));
// Numeric count -> formatting
assert!(is_small_formatting_command(&shlex_split_safe("tail -n 30")));
assert!(!is_small_formatting_command(&shlex_split_safe(
"tail -n 30 file.txt"
)));
// File only -> small formatting by implementation
assert!(is_small_formatting_command(&vec_str(&["tail", "file.txt"])));
// Byte count -> formatting
assert!(is_small_formatting_command(&shlex_split_safe("tail -c 30")));
assert!(is_small_formatting_command(&shlex_split_safe(
"tail -c +10"
)));
// File only (no count) -> not formatting
assert!(!is_small_formatting_command(&vec_str(&[
"tail", "file.txt"
])));
}
#[test]
@@ -714,20 +745,15 @@ mod tests {
#[test]
fn bash_dash_c_pipeline_parsing() {
// Ensure -c is handled similarly to -lc by normalization
// Ensure -c is handled similarly to -lc by shell parsing
let inner = "rg --files | head -n 1";
assert_parsed(
&shlex_split_safe(inner),
vec![
ParsedCommand::Search {
cmd: "rg --files".to_string(),
query: None,
path: None,
},
ParsedCommand::Unknown {
cmd: "head -n 1".to_string(),
},
],
&vec_str(&["bash", "-c", inner]),
vec![ParsedCommand::Search {
cmd: "rg --files".to_string(),
query: None,
path: None,
}],
);
}
@@ -1384,13 +1410,50 @@ fn is_small_formatting_command(tokens: &[String]) -> bool {
// Treat as formatting when no explicit file operand is present.
// Common forms: `head -n 40`, `head -c 100`.
// Keep cases like `head -n 40 file`.
tokens.len() < 3
match tokens {
// `head`
[_] => true,
// `head <file>` or `head -n50`/`head -c100`
[_, arg] => arg.starts_with('-'),
// `head -n 40` / `head -c 100` (no file operand)
[_, flag, count]
if (flag == "-n" || flag == "-c")
&& count.chars().all(|c| c.is_ascii_digit()) =>
{
true
}
_ => false,
}
}
"tail" => {
// Treat as formatting when no explicit file operand is present.
// Common forms: `tail -n +10`, `tail -n 30`.
// Common forms: `tail -n +10`, `tail -n 30`, `tail -c 100`.
// Keep cases like `tail -n 30 file`.
tokens.len() < 3
match tokens {
// `tail`
[_] => true,
// `tail <file>` or `tail -n30`/`tail -n+10`
[_, arg] => arg.starts_with('-'),
// `tail -n 30` / `tail -n +10` (no file operand)
[_, flag, count]
if flag == "-n"
&& (count.chars().all(|c| c.is_ascii_digit())
|| (count.starts_with('+')
&& count[1..].chars().all(|c| c.is_ascii_digit()))) =>
{
true
}
// `tail -c 100` / `tail -c +10` (no file operand)
[_, flag, count]
if flag == "-c"
&& (count.chars().all(|c| c.is_ascii_digit())
|| (count.starts_with('+')
&& count[1..].chars().all(|c| c.is_ascii_digit()))) =>
{
true
}
_ => false,
}
}
"sed" => {
// Keep `sed -n <range> file` (treated as a file read elsewhere);
@@ -1543,6 +1606,16 @@ fn summarize_main_tokens(main_cmd: &[String]) -> ParsedCommand {
};
}
}
if let [path] = tail
&& !path.starts_with('-')
{
let name = short_display_path(path);
return ParsedCommand::Read {
cmd: shlex_join(main_cmd),
name,
path: PathBuf::from(path),
};
}
ParsedCommand::Unknown {
cmd: shlex_join(main_cmd),
}
@@ -1587,6 +1660,16 @@ fn summarize_main_tokens(main_cmd: &[String]) -> ParsedCommand {
};
}
}
if let [path] = tail
&& !path.starts_with('-')
{
let name = short_display_path(path);
return ParsedCommand::Read {
cmd: shlex_join(main_cmd),
name,
path: PathBuf::from(path),
};
}
ParsedCommand::Unknown {
cmd: shlex_join(main_cmd),
}