From 05cf2fc4ce82b4f894031522a7c42698e6e6addd Mon Sep 17 00:00:00 2001
From: Francis Chalissery <fc@openai.com>
Date: Thu, 21 May 2026 14:14:01 -0700
Subject: [PATCH] [codex] Make thread search case-insensitive (#23921)

## Summary
- make rollout content search prefilter rollout files case-insensitively
- keep the no-ripgrep fallback scan and visible snippet matcher aligned
with that behavior
- cover a lowercase `thread/search` query matching mixed-case
conversation content

## Why
The rollout-backed `thread/search` path used exact string matching in
both its `rg` prefilter and semantic snippet generation. A content
result could be missed solely because the query casing did not match the
stored conversation text.

## Validation
- `just fmt`
- `cargo test -p codex-app-server thread_search_returns_content_matches`
- `cargo test -p codex-rollout`
- `just bazel-lock-update`
- `just bazel-lock-check`
- `cargo build -p codex-cli`
- launched a local Electron dev instance with the rebuilt CLI binary
---
 codex-rs/Cargo.lock                           |  1 +
 .../app-server/tests/suite/v2/thread_list.rs  |  4 +--
 codex-rs/rollout/Cargo.toml                   |  1 +
 codex-rs/rollout/src/search.rs                | 33 +++++++++++++------
 4 files changed, 27 insertions(+), 12 deletions(-)
diff --git a/codex-rs/Cargo.lock b/codex-rs/Cargo.lock
index 3fa4d8315b..7c9955323d 100644
--- a/codex-rs/Cargo.lock
+++ b/codex-rs/Cargo.lock
@@ -3558,6 +3558,7 @@ dependencies = [
  "codex-utils-path",
  "codex-utils-string",
  "pretty_assertions",
+ "regex",
  "serde",
  "serde_json",
  "tempfile",
diff --git a/codex-rs/app-server/tests/suite/v2/thread_list.rs b/codex-rs/app-server/tests/suite/v2/thread_list.rs
index bfb1d4f5e0..e064ff6e25 100644
--- a/codex-rs/app-server/tests/suite/v2/thread_list.rs
+++ b/codex-rs/app-server/tests/suite/v2/thread_list.rs
@@ -686,7 +686,7 @@ async fn thread_search_returns_content_matches() -> Result<()> {
         codex_home.path(),
         "2025-01-02T12-00-00",
         "2025-01-02T12:00:00Z",
-        "needle suffix",
+        "mixed NEEDLE suffix",
         Some("mock_provider"),
         /*git_info*/ None,
     )?;
@@ -718,7 +718,7 @@ async fn thread_search_returns_content_matches() -> Result<()> {
         .map(|result| result.thread.id.as_str())
         .collect();
     assert_eq!(ids, vec![newer_match, older_match]);
-    assert_eq!(data[0].snippet, "needle suffix");
+    assert_eq!(data[0].snippet, "mixed NEEDLE suffix");
 
     Ok(())
 }
diff --git a/codex-rs/rollout/Cargo.toml b/codex-rs/rollout/Cargo.toml
index ef5a8dc22a..50e5a8594a 100644
--- a/codex-rs/rollout/Cargo.toml
+++ b/codex-rs/rollout/Cargo.toml
@@ -24,6 +24,7 @@ codex-protocol = { workspace = true }
 codex-state = { workspace = true }
 codex-utils-path = { workspace = true }
 codex-utils-string = { workspace = true }
+regex = { workspace = true }
 serde = { workspace = true, features = ["derive"] }
 serde_json = { workspace = true }
 time = { workspace = true, features = [
diff --git a/codex-rs/rollout/src/search.rs b/codex-rs/rollout/src/search.rs
index 1773f5afb3..911e80552a 100644
--- a/codex-rs/rollout/src/search.rs
+++ b/codex-rs/rollout/src/search.rs
@@ -9,6 +9,8 @@ use codex_protocol::protocol::EventMsg;
 use codex_protocol::protocol::RolloutItem;
 use codex_protocol::protocol::RolloutLine;
 use codex_protocol::protocol::USER_MESSAGE_BEGIN;
+use regex::Regex;
+use regex::RegexBuilder;
 use tokio::io::AsyncBufReadExt;
 use tokio::process::Command;
 
@@ -45,6 +47,7 @@ async fn ripgrep_rollout_paths(
     let output = match Command::new(rg_command)
         .arg("-l")
         .arg("--fixed-strings")
+        .arg("--ignore-case")
         .arg("--no-ignore")
         .arg("--glob")
         .arg("*.jsonl")
@@ -88,6 +91,7 @@ async fn ripgrep_rollout_paths(
 async fn scan_rollout_paths(root: &Path, search_term: &str) -> io::Result<HashSet<PathBuf>> {
     let mut matches = HashSet::new();
     let mut dirs = vec![root.to_path_buf()];
+    let search_term = case_insensitive_literal_regex(search_term)?;
 
     while let Some(dir) = dirs.pop() {
         let mut entries = match tokio::fs::read_dir(dir).await {
@@ -107,7 +111,7 @@ async fn scan_rollout_paths(root: &Path, search_term: &str) -> io::Result<HashSe
             {
                 continue;
             }
-            if rollout_contains(path.as_path(), search_term).await? {
+            if rollout_contains(path.as_path(), &search_term).await? {
                 matches.insert(path);
             }
         }
@@ -116,11 +120,11 @@ async fn scan_rollout_paths(root: &Path, search_term: &str) -> io::Result<HashSe
     Ok(matches)
 }
 
-async fn rollout_contains(path: &Path, search_term: &str) -> io::Result<bool> {
+async fn rollout_contains(path: &Path, search_term: &Regex) -> io::Result<bool> {
     let file = tokio::fs::File::open(path).await?;
     let mut lines = tokio::io::BufReader::new(file).lines();
     while let Some(line) = lines.next_line().await? {
-        if line.contains(search_term) {
+        if search_term.is_match(line.as_str()) {
             return Ok(true);
         }
     }
@@ -133,10 +137,11 @@ pub async fn first_rollout_content_match_snippet(
 ) -> io::Result<Option<String>> {
     let file = tokio::fs::File::open(path).await?;
     let mut lines = tokio::io::BufReader::new(file).lines();
-    let json_search_term = json_escaped_search_term(search_term)?;
+    let json_search_term = case_insensitive_literal_regex(json_escaped_search_term(search_term)?)?;
+    let search_term = case_insensitive_literal_regex(search_term)?;
     while let Some(line) = lines.next_line().await? {
-        if line.contains(json_search_term.as_str())
-            && let Some(snippet) = content_match_snippet(line.as_str(), search_term)
+        if json_search_term.is_match(line.as_str())
+            && let Some(snippet) = content_match_snippet(line.as_str(), &search_term)
         {
             return Ok(Some(snippet));
         }
@@ -149,7 +154,14 @@ fn json_escaped_search_term(search_term: &str) -> io::Result<String> {
     Ok(serialized[1..serialized.len() - 1].to_string())
 }
 
-fn content_match_snippet(jsonl_line: &str, search_term: &str) -> Option<String> {
+fn case_insensitive_literal_regex(search_term: impl AsRef<str>) -> io::Result<Regex> {
+    RegexBuilder::new(regex::escape(search_term.as_ref()).as_str())
+        .case_insensitive(true)
+        .build()
+        .map_err(io::Error::other)
+}
+
+fn content_match_snippet(jsonl_line: &str, search_term: &Regex) -> Option<String> {
     let rollout_line = serde_json::from_str::<RolloutLine>(jsonl_line.trim()).ok()?;
     let text = conversation_text_from_item(&rollout_line.item)?;
     excerpt_around_match(text.as_str(), search_term)
@@ -206,10 +218,11 @@ fn strip_user_message_prefix(text: &str) -> &str {
     }
 }
 
-fn excerpt_around_match(text: &str, search_term: &str) -> Option<String> {
+fn excerpt_around_match(text: &str, search_term: &Regex) -> Option<String> {
     let normalized = normalize_preview_text(text);
-    let match_start = normalized.find(search_term)?;
-    let match_end = match_start.saturating_add(search_term.len());
+    let matched = search_term.find(normalized.as_str())?;
+    let match_start = matched.start();
+    let match_end = matched.end();
     let excerpt_start =
         char_start_before(normalized.as_str(), match_start, MATCH_CONTEXT_BEFORE_CHARS);
     let excerpt_end = char_end_after(normalized.as_str(), match_end, MATCH_CONTEXT_AFTER_CHARS);