mirror of
https://github.com/openai/codex.git
synced 2026-05-24 04:54:52 +00:00
[codex] Make thread search case-insensitive (#23921)
## Summary - make rollout content search prefilter rollout files case-insensitively - keep the no-ripgrep fallback scan and visible snippet matcher aligned with that behavior - cover a lowercase `thread/search` query matching mixed-case conversation content ## Why The rollout-backed `thread/search` path used exact string matching in both its `rg` prefilter and semantic snippet generation. A content result could be missed solely because the query casing did not match the stored conversation text. ## Validation - `just fmt` - `cargo test -p codex-app-server thread_search_returns_content_matches` - `cargo test -p codex-rollout` - `just bazel-lock-update` - `just bazel-lock-check` - `cargo build -p codex-cli` - launched a local Electron dev instance with the rebuilt CLI binary
This commit is contained in:
committed by
GitHub
parent
b20e969f23
commit
05cf2fc4ce
1
codex-rs/Cargo.lock
generated
1
codex-rs/Cargo.lock
generated
@@ -3558,6 +3558,7 @@ dependencies = [
|
||||
"codex-utils-path",
|
||||
"codex-utils-string",
|
||||
"pretty_assertions",
|
||||
"regex",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"tempfile",
|
||||
|
||||
@@ -686,7 +686,7 @@ async fn thread_search_returns_content_matches() -> Result<()> {
|
||||
codex_home.path(),
|
||||
"2025-01-02T12-00-00",
|
||||
"2025-01-02T12:00:00Z",
|
||||
"needle suffix",
|
||||
"mixed NEEDLE suffix",
|
||||
Some("mock_provider"),
|
||||
/*git_info*/ None,
|
||||
)?;
|
||||
@@ -718,7 +718,7 @@ async fn thread_search_returns_content_matches() -> Result<()> {
|
||||
.map(|result| result.thread.id.as_str())
|
||||
.collect();
|
||||
assert_eq!(ids, vec![newer_match, older_match]);
|
||||
assert_eq!(data[0].snippet, "needle suffix");
|
||||
assert_eq!(data[0].snippet, "mixed NEEDLE suffix");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -24,6 +24,7 @@ codex-protocol = { workspace = true }
|
||||
codex-state = { workspace = true }
|
||||
codex-utils-path = { workspace = true }
|
||||
codex-utils-string = { workspace = true }
|
||||
regex = { workspace = true }
|
||||
serde = { workspace = true, features = ["derive"] }
|
||||
serde_json = { workspace = true }
|
||||
time = { workspace = true, features = [
|
||||
|
||||
@@ -9,6 +9,8 @@ use codex_protocol::protocol::EventMsg;
|
||||
use codex_protocol::protocol::RolloutItem;
|
||||
use codex_protocol::protocol::RolloutLine;
|
||||
use codex_protocol::protocol::USER_MESSAGE_BEGIN;
|
||||
use regex::Regex;
|
||||
use regex::RegexBuilder;
|
||||
use tokio::io::AsyncBufReadExt;
|
||||
use tokio::process::Command;
|
||||
|
||||
@@ -45,6 +47,7 @@ async fn ripgrep_rollout_paths(
|
||||
let output = match Command::new(rg_command)
|
||||
.arg("-l")
|
||||
.arg("--fixed-strings")
|
||||
.arg("--ignore-case")
|
||||
.arg("--no-ignore")
|
||||
.arg("--glob")
|
||||
.arg("*.jsonl")
|
||||
@@ -88,6 +91,7 @@ async fn ripgrep_rollout_paths(
|
||||
async fn scan_rollout_paths(root: &Path, search_term: &str) -> io::Result<HashSet<PathBuf>> {
|
||||
let mut matches = HashSet::new();
|
||||
let mut dirs = vec![root.to_path_buf()];
|
||||
let search_term = case_insensitive_literal_regex(search_term)?;
|
||||
|
||||
while let Some(dir) = dirs.pop() {
|
||||
let mut entries = match tokio::fs::read_dir(dir).await {
|
||||
@@ -107,7 +111,7 @@ async fn scan_rollout_paths(root: &Path, search_term: &str) -> io::Result<HashSe
|
||||
{
|
||||
continue;
|
||||
}
|
||||
if rollout_contains(path.as_path(), search_term).await? {
|
||||
if rollout_contains(path.as_path(), &search_term).await? {
|
||||
matches.insert(path);
|
||||
}
|
||||
}
|
||||
@@ -116,11 +120,11 @@ async fn scan_rollout_paths(root: &Path, search_term: &str) -> io::Result<HashSe
|
||||
Ok(matches)
|
||||
}
|
||||
|
||||
async fn rollout_contains(path: &Path, search_term: &str) -> io::Result<bool> {
|
||||
async fn rollout_contains(path: &Path, search_term: &Regex) -> io::Result<bool> {
|
||||
let file = tokio::fs::File::open(path).await?;
|
||||
let mut lines = tokio::io::BufReader::new(file).lines();
|
||||
while let Some(line) = lines.next_line().await? {
|
||||
if line.contains(search_term) {
|
||||
if search_term.is_match(line.as_str()) {
|
||||
return Ok(true);
|
||||
}
|
||||
}
|
||||
@@ -133,10 +137,11 @@ pub async fn first_rollout_content_match_snippet(
|
||||
) -> io::Result<Option<String>> {
|
||||
let file = tokio::fs::File::open(path).await?;
|
||||
let mut lines = tokio::io::BufReader::new(file).lines();
|
||||
let json_search_term = json_escaped_search_term(search_term)?;
|
||||
let json_search_term = case_insensitive_literal_regex(json_escaped_search_term(search_term)?)?;
|
||||
let search_term = case_insensitive_literal_regex(search_term)?;
|
||||
while let Some(line) = lines.next_line().await? {
|
||||
if line.contains(json_search_term.as_str())
|
||||
&& let Some(snippet) = content_match_snippet(line.as_str(), search_term)
|
||||
if json_search_term.is_match(line.as_str())
|
||||
&& let Some(snippet) = content_match_snippet(line.as_str(), &search_term)
|
||||
{
|
||||
return Ok(Some(snippet));
|
||||
}
|
||||
@@ -149,7 +154,14 @@ fn json_escaped_search_term(search_term: &str) -> io::Result<String> {
|
||||
Ok(serialized[1..serialized.len() - 1].to_string())
|
||||
}
|
||||
|
||||
fn content_match_snippet(jsonl_line: &str, search_term: &str) -> Option<String> {
|
||||
fn case_insensitive_literal_regex(search_term: impl AsRef<str>) -> io::Result<Regex> {
|
||||
RegexBuilder::new(regex::escape(search_term.as_ref()).as_str())
|
||||
.case_insensitive(true)
|
||||
.build()
|
||||
.map_err(io::Error::other)
|
||||
}
|
||||
|
||||
fn content_match_snippet(jsonl_line: &str, search_term: &Regex) -> Option<String> {
|
||||
let rollout_line = serde_json::from_str::<RolloutLine>(jsonl_line.trim()).ok()?;
|
||||
let text = conversation_text_from_item(&rollout_line.item)?;
|
||||
excerpt_around_match(text.as_str(), search_term)
|
||||
@@ -206,10 +218,11 @@ fn strip_user_message_prefix(text: &str) -> &str {
|
||||
}
|
||||
}
|
||||
|
||||
fn excerpt_around_match(text: &str, search_term: &str) -> Option<String> {
|
||||
fn excerpt_around_match(text: &str, search_term: &Regex) -> Option<String> {
|
||||
let normalized = normalize_preview_text(text);
|
||||
let match_start = normalized.find(search_term)?;
|
||||
let match_end = match_start.saturating_add(search_term.len());
|
||||
let matched = search_term.find(normalized.as_str())?;
|
||||
let match_start = matched.start();
|
||||
let match_end = matched.end();
|
||||
let excerpt_start =
|
||||
char_start_before(normalized.as_str(), match_start, MATCH_CONTEXT_BEFORE_CHARS);
|
||||
let excerpt_end = char_end_after(normalized.as_str(), match_end, MATCH_CONTEXT_AFTER_CHARS);
|
||||
|
||||
Reference in New Issue
Block a user