From d59685f6d4f882223f29c6b565363b1363c2f4f1 Mon Sep 17 00:00:00 2001 From: Jeremy Rose <172423086+nornagon-openai@users.noreply.github.com> Date: Fri, 30 Jan 2026 14:20:23 -0800 Subject: [PATCH] file-search: multi-root walk (#10240) Instead of a separate walker for each root in a multi-root walk, use a single walker. --- codex-rs/app-server/src/fuzzy_file_search.rs | 93 ++++---- codex-rs/core/src/rollout/list.rs | 33 +-- codex-rs/file-search/src/lib.rs | 210 ++++++++++-------- codex-rs/file-search/src/main.rs | 4 +- codex-rs/tui/src/bottom_pane/chat_composer.rs | 2 +- .../tui/src/bottom_pane/file_search_popup.rs | 8 +- codex-rs/tui/src/file_search.rs | 11 +- 7 files changed, 172 insertions(+), 189 deletions(-) diff --git a/codex-rs/app-server/src/fuzzy_file_search.rs b/codex-rs/app-server/src/fuzzy_file_search.rs index eb3dfe00bf..fde3031813 100644 --- a/codex-rs/app-server/src/fuzzy_file_search.rs +++ b/codex-rs/app-server/src/fuzzy_file_search.rs @@ -1,17 +1,14 @@ use std::num::NonZero; -use std::num::NonZeroUsize; use std::path::PathBuf; use std::sync::Arc; use std::sync::atomic::AtomicBool; use codex_app_server_protocol::FuzzyFileSearchResult; use codex_file_search as file_search; -use tokio::task::JoinSet; use tracing::warn; -const LIMIT_PER_ROOT: usize = 50; +const MATCH_LIMIT: usize = 50; const MAX_THREADS: usize = 12; -const COMPUTE_INDICES: bool = true; pub(crate) async fn run_fuzzy_file_search( query: String, @@ -23,64 +20,54 @@ pub(crate) async fn run_fuzzy_file_search( } #[expect(clippy::expect_used)] - let limit_per_root = - NonZero::new(LIMIT_PER_ROOT).expect("LIMIT_PER_ROOT should be a valid non-zero usize"); + let limit = NonZero::new(MATCH_LIMIT).expect("MATCH_LIMIT should be a valid non-zero usize"); let cores = std::thread::available_parallelism() .map(std::num::NonZero::get) .unwrap_or(1); let threads = cores.min(MAX_THREADS); - let threads_per_root = (threads / roots.len()).max(1); - let threads = NonZero::new(threads_per_root).unwrap_or(NonZeroUsize::MIN); + #[expect(clippy::expect_used)] + let threads = NonZero::new(threads.max(1)).expect("threads should be non-zero"); + let search_dirs: Vec = roots.iter().map(PathBuf::from).collect(); - let mut files: Vec = Vec::new(); - let mut join_set = JoinSet::new(); - - for root in roots { - let search_dir = PathBuf::from(&root); - let query = query.clone(); - let cancel_flag = cancellation_flag.clone(); - join_set.spawn_blocking(move || { - match file_search::run( - query.as_str(), - limit_per_root, - &search_dir, - Vec::new(), + let mut files = match tokio::task::spawn_blocking(move || { + file_search::run( + query.as_str(), + search_dirs, + file_search::FileSearchOptions { + limit, threads, - cancel_flag, - COMPUTE_INDICES, - true, - ) { - Ok(res) => Ok((root, res)), - Err(err) => Err((root, err)), - } - }); - } - - while let Some(res) = join_set.join_next().await { - match res { - Ok(Ok((root, res))) => { - for m in res.matches { - let path = m.path; - let file_name = file_search::file_name_from_path(&path); - let result = FuzzyFileSearchResult { - root: root.clone(), - path, - file_name, - score: m.score, - indices: m.indices, - }; - files.push(result); + compute_indices: true, + ..Default::default() + }, + Some(cancellation_flag), + ) + }) + .await + { + Ok(Ok(res)) => res + .matches + .into_iter() + .map(|m| { + let file_name = m.path.file_name().unwrap_or_default(); + FuzzyFileSearchResult { + root: m.root.to_string_lossy().to_string(), + path: m.path.to_string_lossy().to_string(), + file_name: file_name.to_string_lossy().to_string(), + score: m.score, + indices: m.indices, } - } - Ok(Err((root, err))) => { - warn!("fuzzy-file-search in dir '{root}' failed: {err}"); - } - Err(err) => { - warn!("fuzzy-file-search join_next failed: {err}"); - } + }) + .collect::>(), + Ok(Err(err)) => { + warn!("fuzzy-file-search failed: {err}"); + Vec::new() } - } + Err(err) => { + warn!("fuzzy-file-search join failed: {err}"); + Vec::new() + } + }; files.sort_by(file_search::cmp_by_score_desc_then_path_asc::< FuzzyFileSearchResult, diff --git a/codex-rs/core/src/rollout/list.rs b/codex-rs/core/src/rollout/list.rs index edff86606b..18e862c7f0 100644 --- a/codex-rs/core/src/rollout/list.rs +++ b/codex-rs/core/src/rollout/list.rs @@ -6,8 +6,6 @@ use std::num::NonZero; use std::ops::ControlFlow; use std::path::Path; use std::path::PathBuf; -use std::sync::Arc; -use std::sync::atomic::AtomicBool; use time::OffsetDateTime; use time::PrimitiveDateTime; use time::format_description::FormatItem; @@ -1075,30 +1073,17 @@ async fn find_thread_path_by_id_str_in_subdir( // This is safe because we know the values are valid. #[allow(clippy::unwrap_used)] let limit = NonZero::new(1).unwrap(); - // This is safe because we know the values are valid. - #[allow(clippy::unwrap_used)] - let threads = NonZero::new(2).unwrap(); - let cancel = Arc::new(AtomicBool::new(false)); - let exclude: Vec = Vec::new(); - let compute_indices = false; - - let results = file_search::run( - id_str, + let options = file_search::FileSearchOptions { limit, - &root, - exclude, - threads, - cancel, - compute_indices, - false, - ) - .map_err(|e| io::Error::other(format!("file search failed: {e}")))?; + compute_indices: false, + respect_gitignore: false, + ..Default::default() + }; - let found = results - .matches - .into_iter() - .next() - .map(|m| root.join(m.path)); + let results = file_search::run(id_str, vec![root], options, None) + .map_err(|e| io::Error::other(format!("file search failed: {e}")))?; + + let found = results.matches.into_iter().next().map(|m| m.full_path()); // Checking if DB is at parity. // TODO(jif): sqlite migration phase 1 diff --git a/codex-rs/file-search/src/lib.rs b/codex-rs/file-search/src/lib.rs index 39255fa36e..70cb583ebe 100644 --- a/codex-rs/file-search/src/lib.rs +++ b/codex-rs/file-search/src/lib.rs @@ -44,18 +44,25 @@ pub use cli::Cli; /// * `path` – Path to the matched file (relative to the search directory). /// * `indices` – Optional list of character indices that matched the query. /// These are only filled when the caller of [`run`] sets -/// `compute_indices` to `true`. The indices vector follows the +/// `options.compute_indices` to `true`. The indices vector follows the /// guidance from `nucleo::pattern::Pattern::indices`: they are /// unique and sorted in ascending order so that callers can use /// them directly for highlighting. #[derive(Debug, Clone, Serialize, PartialEq, Eq)] pub struct FileMatch { pub score: u32, - pub path: String, + pub path: PathBuf, + pub root: PathBuf, #[serde(skip_serializing_if = "Option::is_none")] pub indices: Option>, // Sorted & deduplicated when present } +impl FileMatch { + pub fn full_path(&self) -> PathBuf { + self.root.join(&self.path) + } +} + /// Returns the final path component for a matched path, falling back to the full path. pub fn file_name_from_path(path: &str) -> String { Path::new(path) @@ -80,7 +87,7 @@ pub struct FileSearchSnapshot { } #[derive(Debug, Clone)] -pub struct SessionOptions { +pub struct FileSearchOptions { pub limit: NonZero, pub exclude: Vec, pub threads: NonZero, @@ -88,7 +95,7 @@ pub struct SessionOptions { pub respect_gitignore: bool, } -impl Default for SessionOptions { +impl Default for FileSearchOptions { fn default() -> Self { Self { #[expect(clippy::unwrap_used)] @@ -133,19 +140,24 @@ impl Drop for FileSearchSession { pub fn create_session( search_directory: &Path, - options: SessionOptions, + options: FileSearchOptions, reporter: Arc, ) -> anyhow::Result { - create_session_inner(search_directory, options, reporter, None) + create_session_inner( + vec![search_directory.to_path_buf()], + options, + reporter, + None, + ) } fn create_session_inner( - search_directory: &Path, - options: SessionOptions, + search_directories: Vec, + options: FileSearchOptions, reporter: Arc, cancel_flag: Option>, ) -> anyhow::Result { - let SessionOptions { + let FileSearchOptions { limit, exclude, threads, @@ -153,7 +165,10 @@ fn create_session_inner( respect_gitignore, } = options; - let override_matcher = build_override_matcher(search_directory, &exclude)?; + let Some(primary_search_directory) = search_directories.first() else { + anyhow::bail!("at least one search directory is required"); + }; + let override_matcher = build_override_matcher(primary_search_directory, &exclude)?; let (work_tx, work_rx) = unbounded(); let notify_tx = work_tx.clone(); @@ -171,7 +186,7 @@ fn create_session_inner( let cancelled = cancel_flag.unwrap_or_else(|| Arc::new(AtomicBool::new(false))); let inner = Arc::new(SessionInner { - search_directory: search_directory.to_path_buf(), + search_directories, limit: limit.get(), threads: threads.get(), compute_indices, @@ -239,19 +254,20 @@ pub async fn run_main( } }; - let cancel_flag = Arc::new(AtomicBool::new(false)); let FileSearchResults { total_match_count, matches, } = run( &pattern_text, - limit, - &search_directory, - exclude, - threads, - cancel_flag, - compute_indices, - true, + vec![search_directory.to_path_buf()], + FileSearchOptions { + limit, + exclude, + threads, + compute_indices, + respect_gitignore: true, + }, + None, )?; let match_count = matches.len(); let matches_truncated = total_match_count > match_count; @@ -268,30 +284,14 @@ pub async fn run_main( /// The worker threads will periodically check `cancel_flag` to see if they /// should stop processing files. -#[allow(clippy::too_many_arguments)] pub fn run( pattern_text: &str, - limit: NonZero, - search_directory: &Path, - exclude: Vec, - threads: NonZero, - cancel_flag: Arc, - compute_indices: bool, - respect_gitignore: bool, + roots: Vec, + options: FileSearchOptions, + cancel_flag: Option>, ) -> anyhow::Result { let reporter = Arc::new(RunReporter::default()); - let session = create_session_inner( - search_directory, - SessionOptions { - limit, - exclude, - threads, - compute_indices, - respect_gitignore, - }, - reporter.clone(), - Some(cancel_flag), - )?; + let session = create_session_inner(roots, options, reporter.clone(), cancel_flag)?; session.update_query(pattern_text); @@ -339,7 +339,7 @@ fn create_pattern(pattern: &str) -> Pattern { } struct SessionInner { - search_directory: PathBuf, + search_directories: Vec, limit: usize, threads: usize, compute_indices: bool, @@ -373,12 +373,39 @@ fn build_override_matcher( Ok(Some(matcher)) } +fn get_file_path<'a>(path: &'a Path, search_directories: &[PathBuf]) -> Option<(usize, &'a str)> { + let mut best_match: Option<(usize, &Path)> = None; + for (idx, root) in search_directories.iter().enumerate() { + if let Ok(rel_path) = path.strip_prefix(root) { + let root_depth = root.components().count(); + match best_match { + Some((best_idx, _)) + if search_directories[best_idx].components().count() >= root_depth => {} + _ => { + best_match = Some((idx, rel_path)); + } + } + } + } + + let (root_idx, rel_path) = best_match?; + rel_path.to_str().map(|p| (root_idx, p)) +} + fn walker_worker( inner: Arc, override_matcher: Option, injector: Injector>, ) { - let mut walk_builder = WalkBuilder::new(&inner.search_directory); + let Some(first_root) = inner.search_directories.first() else { + let _ = inner.work_tx.send(WorkSignal::WalkComplete); + return; + }; + + let mut walk_builder = WalkBuilder::new(first_root); + for root in inner.search_directories.iter().skip(1) { + walk_builder.add(root); + } walk_builder .threads(inner.threads) // Allow hidden entries. @@ -401,36 +428,29 @@ fn walker_worker( let walker = walk_builder.build_parallel(); - fn get_file_path<'a>( - entry_result: &'a Result, - search_directory: &Path, - ) -> Option<&'a str> { - let entry = match entry_result { - Ok(entry) => entry, - Err(_) => return None, - }; - if entry.file_type().is_some_and(|ft| ft.is_dir()) { - return None; - } - let path = entry.path(); - match path.strip_prefix(search_directory) { - Ok(rel_path) => rel_path.to_str(), - Err(_) => None, - } - } - walker.run(|| { const CHECK_INTERVAL: usize = 1024; let mut n = 0; - let search_directory = inner.search_directory.clone(); + let search_directories = inner.search_directories.clone(); let injector = injector.clone(); let cancelled = inner.cancelled.clone(); let shutdown = inner.shutdown.clone(); Box::new(move |entry| { - if let Some(path) = get_file_path(&entry, &search_directory) { - injector.push(Arc::from(path), |path, cols| { - cols[0] = Utf32String::from(path.as_ref()); + let entry = match entry { + Ok(entry) => entry, + Err(_) => return ignore::WalkState::Continue, + }; + if entry.file_type().is_some_and(|ft| ft.is_dir()) { + return ignore::WalkState::Continue; + } + let path = entry.path(); + let Some(full_path) = path.to_str() else { + return ignore::WalkState::Continue; + }; + if let Some((_, relative_path)) = get_file_path(path, &search_directories) { + injector.push(Arc::from(full_path), |_, cols| { + cols[0] = Utf32String::from(relative_path); }); } n += 1; @@ -513,6 +533,8 @@ fn matcher_worker( .take(limit) .filter_map(|match_| { let item = snapshot.get_item(match_.idx)?; + let full_path = item.data.as_ref(); + let (root_idx, relative_path) = get_file_path(Path::new(full_path), &inner.search_directories)?; let indices = if let Some(indices_matcher) = indices_matcher.as_mut() { let mut idx_vec = Vec::::new(); let haystack = item.matcher_columns[0].slice(..); @@ -525,7 +547,8 @@ fn matcher_worker( }; Some(FileMatch { score: match_.score, - path: item.data.as_ref().to_string(), + path: PathBuf::from(relative_path), + root: inner.search_directories[root_idx].clone(), indices, }) }) @@ -723,7 +746,7 @@ mod tests { fn session_scanned_file_count_is_monotonic_across_queries() { let dir = create_temp_tree(200); let reporter = Arc::new(RecordingReporter::default()); - let session = create_session(dir.path(), SessionOptions::default(), reporter.clone()) + let session = create_session(dir.path(), FileSearchOptions::default(), reporter.clone()) .expect("session"); session.update_query("file-00"); @@ -743,7 +766,7 @@ mod tests { fn session_streams_updates_before_walk_complete() { let dir = create_temp_tree(600); let reporter = Arc::new(RecordingReporter::default()); - let session = create_session(dir.path(), SessionOptions::default(), reporter.clone()) + let session = create_session(dir.path(), FileSearchOptions::default(), reporter.clone()) .expect("session"); session.update_query("file-0"); @@ -760,7 +783,7 @@ mod tests { fs::write(dir.path().join("alpha.txt"), "alpha").unwrap(); fs::write(dir.path().join("beta.txt"), "beta").unwrap(); let reporter = Arc::new(RecordingReporter::default()); - let session = create_session(dir.path(), SessionOptions::default(), reporter.clone()) + let session = create_session(dir.path(), FileSearchOptions::default(), reporter.clone()) .expect("session"); session.update_query("alpha"); @@ -776,7 +799,7 @@ mod tests { last_update .matches .iter() - .any(|file_match| file_match.path.contains("beta.txt")) + .any(|file_match| file_match.path.to_string_lossy().contains("beta.txt")) ); } @@ -787,8 +810,8 @@ mod tests { fs::write(dir.path().join("beta.txt"), "beta").unwrap(); let reporter = Arc::new(RecordingReporter::default()); let session = create_session_inner( - dir.path(), - SessionOptions::default(), + vec![dir.path().to_path_buf()], + FileSearchOptions::default(), reporter.clone(), None, ) @@ -816,8 +839,8 @@ mod tests { let reporter_a = Arc::new(RecordingReporter::default()); let session_a = create_session_inner( - root_a.path(), - SessionOptions::default(), + vec![root_a.path().to_path_buf()], + FileSearchOptions::default(), reporter_a, Some(cancel_flag.clone()), ) @@ -825,8 +848,8 @@ mod tests { let reporter_b = Arc::new(RecordingReporter::default()); let session_b = create_session_inner( - root_b.path(), - SessionOptions::default(), + vec![root_b.path().to_path_buf()], + FileSearchOptions::default(), reporter_b.clone(), Some(cancel_flag), ) @@ -846,7 +869,7 @@ mod tests { fn session_emits_updates_when_query_changes() { let dir = create_temp_tree(200); let reporter = Arc::new(RecordingReporter::default()); - let session = create_session(dir.path(), SessionOptions::default(), reporter.clone()) + let session = create_session(dir.path(), FileSearchOptions::default(), reporter.clone()) .expect("session"); session.update_query("zzzzzzzz"); @@ -866,17 +889,15 @@ mod tests { #[test] fn run_returns_matches_for_query() { let dir = create_temp_tree(40); - let results = run( - "file-000", - NonZero::new(20).unwrap(), - dir.path(), - Vec::new(), - NonZero::new(2).unwrap(), - Arc::new(AtomicBool::new(false)), - false, - true, - ) - .expect("run ok"); + let options = FileSearchOptions { + limit: NonZero::new(20).unwrap(), + exclude: Vec::new(), + threads: NonZero::new(2).unwrap(), + compute_indices: false, + respect_gitignore: true, + }; + let results = + run("file-000", vec![dir.path().to_path_buf()], options, None).expect("run ok"); assert!(!results.matches.is_empty()); assert!(results.total_match_count >= results.matches.len()); @@ -884,7 +905,7 @@ mod tests { results .matches .iter() - .any(|m| m.path.contains("file-0000.txt")) + .any(|m| m.path.to_string_lossy().contains("file-0000.txt")) ); } @@ -893,19 +914,14 @@ mod tests { let dir = create_temp_tree(200); let cancel_flag = Arc::new(AtomicBool::new(true)); let search_dir = dir.path().to_path_buf(); + let options = FileSearchOptions { + compute_indices: false, + ..Default::default() + }; let (tx, rx) = std::sync::mpsc::channel(); let handle = thread::spawn(move || { - let result = run( - "file-", - NonZero::new(20).unwrap(), - &search_dir, - Vec::new(), - NonZero::new(2).unwrap(), - cancel_flag, - false, - true, - ); + let result = run("file-", vec![search_dir], options, Some(cancel_flag)); let _ = tx.send(result); }); diff --git a/codex-rs/file-search/src/main.rs b/codex-rs/file-search/src/main.rs index ef39174df5..4715d1bd62 100644 --- a/codex-rs/file-search/src/main.rs +++ b/codex-rs/file-search/src/main.rs @@ -39,7 +39,7 @@ impl Reporter for StdioReporter { // iterating over the characters. let mut indices_iter = indices.iter().peekable(); - for (i, c) in file_match.path.chars().enumerate() { + for (i, c) in file_match.path.to_string_lossy().chars().enumerate() { match indices_iter.peek() { Some(next) if **next == i as u32 => { // ANSI escape code for bold: \x1b[1m ... \x1b[0m @@ -54,7 +54,7 @@ impl Reporter for StdioReporter { } println!(); } else { - println!("{}", file_match.path); + println!("{}", file_match.path.to_string_lossy()); } } diff --git a/codex-rs/tui/src/bottom_pane/chat_composer.rs b/codex-rs/tui/src/bottom_pane/chat_composer.rs index 1d3cf942fb..979213112f 100644 --- a/codex-rs/tui/src/bottom_pane/chat_composer.rs +++ b/codex-rs/tui/src/bottom_pane/chat_composer.rs @@ -1306,7 +1306,7 @@ impl ChatComposer { return (InputResult::None, true); }; - let sel_path = sel.to_string(); + let sel_path = sel.to_string_lossy().to_string(); // If selected path looks like an image (png/jpeg), attach as image instead of inserting text. let is_image = Self::is_image_path(&sel_path); if is_image { diff --git a/codex-rs/tui/src/bottom_pane/file_search_popup.rs b/codex-rs/tui/src/bottom_pane/file_search_popup.rs index e018c7ff8a..3e6ef814a2 100644 --- a/codex-rs/tui/src/bottom_pane/file_search_popup.rs +++ b/codex-rs/tui/src/bottom_pane/file_search_popup.rs @@ -1,3 +1,5 @@ +use std::path::PathBuf; + use codex_file_search::FileMatch; use ratatui::buffer::Buffer; use ratatui::layout::Rect; @@ -89,11 +91,11 @@ impl FileSearchPopup { self.state.ensure_visible(len, len.min(MAX_POPUP_ROWS)); } - pub(crate) fn selected_match(&self) -> Option<&str> { + pub(crate) fn selected_match(&self) -> Option<&PathBuf> { self.state .selected_idx .and_then(|idx| self.matches.get(idx)) - .map(|file_match| file_match.path.as_str()) + .map(|file_match| &file_match.path) } pub(crate) fn calculate_required_height(&self) -> u16 { @@ -116,7 +118,7 @@ impl WidgetRef for &FileSearchPopup { self.matches .iter() .map(|m| GenericDisplayRow { - name: m.path.clone(), + name: m.path.to_string_lossy().to_string(), match_indices: m .indices .as_ref() diff --git a/codex-rs/tui/src/file_search.rs b/codex-rs/tui/src/file_search.rs index 88c56f2316..ff55ac1274 100644 --- a/codex-rs/tui/src/file_search.rs +++ b/codex-rs/tui/src/file_search.rs @@ -6,7 +6,6 @@ //! on every keystroke, and drops the session when the query becomes empty. use codex_file_search as file_search; -use std::num::NonZeroUsize; use std::path::PathBuf; use std::sync::Arc; use std::sync::Mutex; @@ -14,9 +13,6 @@ use std::sync::Mutex; use crate::app_event::AppEvent; use crate::app_event_sender::AppEventSender; -const MAX_FILE_SEARCH_RESULTS: NonZeroUsize = NonZeroUsize::new(20).unwrap(); -const NUM_FILE_SEARCH_THREADS: NonZeroUsize = NonZeroUsize::new(2).unwrap(); - pub(crate) struct FileSearchManager { state: Arc>, search_dir: PathBuf, @@ -75,12 +71,9 @@ impl FileSearchManager { }); let session = file_search::create_session( &self.search_dir, - file_search::SessionOptions { - limit: MAX_FILE_SEARCH_RESULTS, - exclude: Vec::new(), - threads: NUM_FILE_SEARCH_THREADS, + file_search::FileSearchOptions { compute_indices: true, - respect_gitignore: true, + ..Default::default() }, reporter, );