From 05cf2fc4ce82b4f894031522a7c42698e6e6addd Mon Sep 17 00:00:00 2001 From: Francis Chalissery Date: Thu, 21 May 2026 14:14:01 -0700 Subject: [PATCH] [codex] Make thread search case-insensitive (#23921) ## Summary - make rollout content search prefilter rollout files case-insensitively - keep the no-ripgrep fallback scan and visible snippet matcher aligned with that behavior - cover a lowercase `thread/search` query matching mixed-case conversation content ## Why The rollout-backed `thread/search` path used exact string matching in both its `rg` prefilter and semantic snippet generation. A content result could be missed solely because the query casing did not match the stored conversation text. ## Validation - `just fmt` - `cargo test -p codex-app-server thread_search_returns_content_matches` - `cargo test -p codex-rollout` - `just bazel-lock-update` - `just bazel-lock-check` - `cargo build -p codex-cli` - launched a local Electron dev instance with the rebuilt CLI binary --- codex-rs/Cargo.lock | 1 + .../app-server/tests/suite/v2/thread_list.rs | 4 +-- codex-rs/rollout/Cargo.toml | 1 + codex-rs/rollout/src/search.rs | 33 +++++++++++++------ 4 files changed, 27 insertions(+), 12 deletions(-) diff --git a/codex-rs/Cargo.lock b/codex-rs/Cargo.lock index 3fa4d8315b..7c9955323d 100644 --- a/codex-rs/Cargo.lock +++ b/codex-rs/Cargo.lock @@ -3558,6 +3558,7 @@ dependencies = [ "codex-utils-path", "codex-utils-string", "pretty_assertions", + "regex", "serde", "serde_json", "tempfile", diff --git a/codex-rs/app-server/tests/suite/v2/thread_list.rs b/codex-rs/app-server/tests/suite/v2/thread_list.rs index bfb1d4f5e0..e064ff6e25 100644 --- a/codex-rs/app-server/tests/suite/v2/thread_list.rs +++ b/codex-rs/app-server/tests/suite/v2/thread_list.rs @@ -686,7 +686,7 @@ async fn thread_search_returns_content_matches() -> Result<()> { codex_home.path(), "2025-01-02T12-00-00", "2025-01-02T12:00:00Z", - "needle suffix", + "mixed NEEDLE suffix", Some("mock_provider"), /*git_info*/ None, )?; @@ -718,7 +718,7 @@ async fn thread_search_returns_content_matches() -> Result<()> { .map(|result| result.thread.id.as_str()) .collect(); assert_eq!(ids, vec![newer_match, older_match]); - assert_eq!(data[0].snippet, "needle suffix"); + assert_eq!(data[0].snippet, "mixed NEEDLE suffix"); Ok(()) } diff --git a/codex-rs/rollout/Cargo.toml b/codex-rs/rollout/Cargo.toml index ef5a8dc22a..50e5a8594a 100644 --- a/codex-rs/rollout/Cargo.toml +++ b/codex-rs/rollout/Cargo.toml @@ -24,6 +24,7 @@ codex-protocol = { workspace = true } codex-state = { workspace = true } codex-utils-path = { workspace = true } codex-utils-string = { workspace = true } +regex = { workspace = true } serde = { workspace = true, features = ["derive"] } serde_json = { workspace = true } time = { workspace = true, features = [ diff --git a/codex-rs/rollout/src/search.rs b/codex-rs/rollout/src/search.rs index 1773f5afb3..911e80552a 100644 --- a/codex-rs/rollout/src/search.rs +++ b/codex-rs/rollout/src/search.rs @@ -9,6 +9,8 @@ use codex_protocol::protocol::EventMsg; use codex_protocol::protocol::RolloutItem; use codex_protocol::protocol::RolloutLine; use codex_protocol::protocol::USER_MESSAGE_BEGIN; +use regex::Regex; +use regex::RegexBuilder; use tokio::io::AsyncBufReadExt; use tokio::process::Command; @@ -45,6 +47,7 @@ async fn ripgrep_rollout_paths( let output = match Command::new(rg_command) .arg("-l") .arg("--fixed-strings") + .arg("--ignore-case") .arg("--no-ignore") .arg("--glob") .arg("*.jsonl") @@ -88,6 +91,7 @@ async fn ripgrep_rollout_paths( async fn scan_rollout_paths(root: &Path, search_term: &str) -> io::Result> { let mut matches = HashSet::new(); let mut dirs = vec![root.to_path_buf()]; + let search_term = case_insensitive_literal_regex(search_term)?; while let Some(dir) = dirs.pop() { let mut entries = match tokio::fs::read_dir(dir).await { @@ -107,7 +111,7 @@ async fn scan_rollout_paths(root: &Path, search_term: &str) -> io::Result io::Result io::Result { +async fn rollout_contains(path: &Path, search_term: &Regex) -> io::Result { let file = tokio::fs::File::open(path).await?; let mut lines = tokio::io::BufReader::new(file).lines(); while let Some(line) = lines.next_line().await? { - if line.contains(search_term) { + if search_term.is_match(line.as_str()) { return Ok(true); } } @@ -133,10 +137,11 @@ pub async fn first_rollout_content_match_snippet( ) -> io::Result> { let file = tokio::fs::File::open(path).await?; let mut lines = tokio::io::BufReader::new(file).lines(); - let json_search_term = json_escaped_search_term(search_term)?; + let json_search_term = case_insensitive_literal_regex(json_escaped_search_term(search_term)?)?; + let search_term = case_insensitive_literal_regex(search_term)?; while let Some(line) = lines.next_line().await? { - if line.contains(json_search_term.as_str()) - && let Some(snippet) = content_match_snippet(line.as_str(), search_term) + if json_search_term.is_match(line.as_str()) + && let Some(snippet) = content_match_snippet(line.as_str(), &search_term) { return Ok(Some(snippet)); } @@ -149,7 +154,14 @@ fn json_escaped_search_term(search_term: &str) -> io::Result { Ok(serialized[1..serialized.len() - 1].to_string()) } -fn content_match_snippet(jsonl_line: &str, search_term: &str) -> Option { +fn case_insensitive_literal_regex(search_term: impl AsRef) -> io::Result { + RegexBuilder::new(regex::escape(search_term.as_ref()).as_str()) + .case_insensitive(true) + .build() + .map_err(io::Error::other) +} + +fn content_match_snippet(jsonl_line: &str, search_term: &Regex) -> Option { let rollout_line = serde_json::from_str::(jsonl_line.trim()).ok()?; let text = conversation_text_from_item(&rollout_line.item)?; excerpt_around_match(text.as_str(), search_term) @@ -206,10 +218,11 @@ fn strip_user_message_prefix(text: &str) -> &str { } } -fn excerpt_around_match(text: &str, search_term: &str) -> Option { +fn excerpt_around_match(text: &str, search_term: &Regex) -> Option { let normalized = normalize_preview_text(text); - let match_start = normalized.find(search_term)?; - let match_end = match_start.saturating_add(search_term.len()); + let matched = search_term.find(normalized.as_str())?; + let match_start = matched.start(); + let match_end = matched.end(); let excerpt_start = char_start_before(normalized.as_str(), match_start, MATCH_CONTEXT_BEFORE_CHARS); let excerpt_end = char_end_after(normalized.as_str(), match_end, MATCH_CONTEXT_AFTER_CHARS);