Compare commits

...

2 Commits

Author SHA1 Message Date
zhao-oai
27018edc50 Merge branch 'main' into patch-guard 2025-10-15 09:38:48 -07:00
kevin zhao
0144fb4fab initial commit 2025-10-14 14:45:15 -07:00
7 changed files with 538 additions and 0 deletions

2
codex-rs/Cargo.lock generated
View File

@@ -1051,6 +1051,7 @@ dependencies = [
"escargot",
"eventsource-stream",
"futures",
"ignore",
"indexmap 2.10.0",
"landlock",
"libc",
@@ -1069,6 +1070,7 @@ dependencies = [
"serde_json",
"serial_test",
"sha1",
"sha2",
"shlex",
"similar",
"strum_macros 0.27.2",

View File

@@ -33,6 +33,7 @@ env-flags = { workspace = true }
eventsource-stream = { workspace = true }
futures = { workspace = true }
indexmap = { workspace = true }
ignore = { workspace = true }
libc = { workspace = true }
mcp-types = { workspace = true }
os_info = { workspace = true }
@@ -43,6 +44,7 @@ reqwest = { workspace = true, features = ["json", "stream"] }
serde = { workspace = true, features = ["derive"] }
serde_json = { workspace = true }
sha1 = { workspace = true }
sha2 = { workspace = true }
shlex = { workspace = true }
similar = { workspace = true }
strum_macros = { workspace = true }

View File

@@ -0,0 +1,168 @@
use std::fmt::Write;
use codex_protocol::models::ContentItem;
use codex_protocol::models::ResponseItem;
use crate::codebase_snapshot::SnapshotDiff;
pub(crate) const CODEBASE_CHANGE_NOTICE_MAX_PATHS: usize = 40;
#[derive(Clone, Debug, PartialEq, Eq)]
pub(crate) struct CodebaseChangeNotice {
added: Vec<String>,
removed: Vec<String>,
modified: Vec<String>,
truncated: bool,
}
impl CodebaseChangeNotice {
pub(crate) fn new(diff: SnapshotDiff, limit: usize) -> Self {
let mut remaining = limit;
let mut truncated = false;
let added = take_paths(diff.added, &mut remaining, &mut truncated);
let removed = take_paths(diff.removed, &mut remaining, &mut truncated);
let modified = take_paths(diff.modified, &mut remaining, &mut truncated);
Self {
added,
removed,
modified,
truncated,
}
}
pub(crate) fn is_empty(&self) -> bool {
self.added.is_empty() && self.removed.is_empty() && self.modified.is_empty()
}
pub(crate) fn serialize_to_xml(&self) -> String {
let mut output = String::new();
if self.truncated {
let _ = writeln!(output, "<codebase_changes truncated=\"true\">");
} else {
let _ = writeln!(output, "<codebase_changes>");
}
let mut summary_parts = Vec::new();
if !self.added.is_empty() {
summary_parts.push(format!("added {}", self.added.len()));
}
if !self.removed.is_empty() {
summary_parts.push(format!("removed {}", self.removed.len()));
}
if !self.modified.is_empty() {
summary_parts.push(format!("modified {}", self.modified.len()));
}
if summary_parts.is_empty() {
let _ = writeln!(output, " <summary>no changes</summary>");
} else {
let summary = summary_parts.join(", ");
let _ = writeln!(output, " <summary>{summary}</summary>");
}
serialize_section(&mut output, "added", &self.added);
serialize_section(&mut output, "removed", &self.removed);
serialize_section(&mut output, "modified", &self.modified);
if self.truncated {
let _ = writeln!(output, " <note>additional paths omitted</note>");
}
let _ = writeln!(output, "</codebase_changes>");
output
}
}
fn take_paths(mut paths: Vec<String>, remaining: &mut usize, truncated: &mut bool) -> Vec<String> {
if *remaining == 0 {
if !paths.is_empty() {
*truncated = true;
}
return Vec::new();
}
if paths.len() > *remaining {
paths.truncate(*remaining);
*truncated = true;
}
*remaining -= paths.len();
paths
}
fn serialize_section(output: &mut String, tag: &str, paths: &[String]) {
if paths.is_empty() {
return;
}
let _ = writeln!(output, " <{tag}>");
for path in paths {
let _ = writeln!(output, " <path>{}</path>", escape_xml(path));
}
let _ = writeln!(output, " </{tag}>");
}
fn escape_xml(value: &str) -> String {
let mut escaped = String::with_capacity(value.len());
for ch in value.chars() {
match ch {
'&' => escaped.push_str("&amp;"),
'<' => escaped.push_str("&lt;"),
'>' => escaped.push_str("&gt;"),
'"' => escaped.push_str("&quot;"),
'\'' => escaped.push_str("&apos;"),
other => escaped.push(other),
}
}
escaped
}
impl From<CodebaseChangeNotice> for ResponseItem {
fn from(notice: CodebaseChangeNotice) -> Self {
ResponseItem::Message {
id: None,
role: "user".to_string(),
content: vec![ContentItem::InputText {
text: notice.serialize_to_xml(),
}],
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use pretty_assertions::assert_eq;
#[test]
fn constructs_notice_with_limit() {
let diff = SnapshotDiff {
added: vec!["a.rs".to_string(), "b.rs".to_string()],
removed: vec!["c.rs".to_string()],
modified: vec!["d.rs".to_string(), "e.rs".to_string()],
};
let notice = CodebaseChangeNotice::new(diff, 3);
assert!(notice.truncated);
assert_eq!(
notice.added.len() + notice.removed.len() + notice.modified.len(),
3
);
}
#[test]
fn serializes_notice() {
let diff = SnapshotDiff {
added: vec!["src/lib.rs".to_string()],
removed: Vec::new(),
modified: vec!["src/main.rs".to_string()],
};
let notice = CodebaseChangeNotice::new(diff, CODEBASE_CHANGE_NOTICE_MAX_PATHS);
let xml = notice.serialize_to_xml();
assert!(xml.contains("<added>"));
assert!(xml.contains("<modified>"));
assert!(xml.contains("src/lib.rs"));
assert!(xml.contains("src/main.rs"));
}
}

View File

@@ -0,0 +1,278 @@
use std::borrow::Cow;
use std::collections::BTreeMap;
use std::fs::File;
use std::io::Read;
use std::path::Path;
use std::path::PathBuf;
use std::time::SystemTime;
use anyhow::Context;
use anyhow::Result;
use ignore::WalkBuilder;
use sha2::Digest;
use sha2::Sha256;
use tokio::task;
use tracing::warn;
#[derive(Clone, Debug, PartialEq, Eq)]
pub(crate) struct CodebaseSnapshot {
root: PathBuf,
entries: BTreeMap<String, EntryFingerprint>,
root_digest: DigestBytes,
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub(crate) struct EntryFingerprint {
pub kind: EntryKind,
pub digest: DigestBytes,
pub size: u64,
pub modified_millis: Option<u128>,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
#[repr(u8)]
pub(crate) enum EntryKind {
File,
Symlink,
}
#[derive(Clone, Debug, PartialEq, Eq, Default)]
pub(crate) struct SnapshotDiff {
pub added: Vec<String>,
pub removed: Vec<String>,
pub modified: Vec<String>,
}
impl SnapshotDiff {
pub fn is_empty(&self) -> bool {
self.added.is_empty() && self.removed.is_empty() && self.modified.is_empty()
}
}
pub(crate) type DigestBytes = [u8; 32];
impl CodebaseSnapshot {
pub(crate) async fn capture(root: PathBuf) -> Result<Self> {
task::spawn_blocking(move || Self::from_disk(&root))
.await
.map_err(|e| anyhow::anyhow!("codebase snapshot task failed: {e}"))?
}
pub(crate) fn from_disk(root: &Path) -> Result<Self> {
if !root.exists() {
return Ok(Self::empty(root));
}
let mut entries: BTreeMap<String, EntryFingerprint> = BTreeMap::new();
let mut walker = WalkBuilder::new(root);
walker
.hidden(false)
.git_ignore(true)
.git_exclude(true)
.parents(true)
.ignore(true)
.follow_links(false);
for result in walker.build() {
let entry = match result {
Ok(entry) => entry,
Err(err) => {
warn!("codebase snapshot failed to read entry: {err}");
continue;
}
};
let path = entry.path();
if entry.depth() == 0 {
continue;
}
let relative = match path.strip_prefix(root) {
Ok(rel) => rel,
Err(_) => continue,
};
if relative.as_os_str().is_empty() {
continue;
}
let rel_string = normalize_rel_path(relative);
let file_type = match entry.file_type() {
Some(file_type) => file_type,
None => continue,
};
if file_type.is_dir() {
continue;
}
if file_type.is_file() {
match fingerprint_file(path) {
Ok(fp) => {
entries.insert(rel_string, fp);
}
Err(err) => {
warn!(
"codebase snapshot failed to hash file {}: {err}",
path.display()
);
}
}
continue;
}
if file_type.is_symlink() {
match fingerprint_symlink(path) {
Ok(fp) => {
entries.insert(rel_string, fp);
}
Err(err) => {
warn!(
"codebase snapshot failed to hash symlink {}: {err}",
path.display()
);
}
}
continue;
}
}
let root_digest = compute_root_digest(&entries);
Ok(Self {
root: root.to_path_buf(),
entries,
root_digest,
})
}
pub(crate) fn diff(&self, newer: &CodebaseSnapshot) -> SnapshotDiff {
let mut diff = SnapshotDiff::default();
for (path, fingerprint) in &newer.entries {
match self.entries.get(path) {
None => diff.added.push(path.clone()),
Some(existing) if existing != fingerprint => diff.modified.push(path.clone()),
_ => {}
}
}
for path in self.entries.keys() {
if !newer.entries.contains_key(path) {
diff.removed.push(path.clone());
}
}
diff
}
pub(crate) fn root(&self) -> &Path {
&self.root
}
fn empty(root: &Path) -> Self {
Self {
root: root.to_path_buf(),
entries: BTreeMap::new(),
root_digest: Sha256::digest(b"").into(),
}
}
}
fn fingerprint_file(path: &Path) -> Result<EntryFingerprint> {
let metadata = path
.metadata()
.with_context(|| format!("metadata {}", path.display()))?;
let mut file = File::open(path).with_context(|| format!("open {}", path.display()))?;
let mut hasher = Sha256::new();
let mut buf = [0u8; 64 * 1024];
loop {
let read = file.read(&mut buf)?;
if read == 0 {
break;
}
hasher.update(&buf[..read]);
}
Ok(EntryFingerprint {
kind: EntryKind::File,
digest: hasher.finalize().into(),
size: metadata.len(),
modified_millis: metadata.modified().ok().and_then(system_time_to_millis),
})
}
fn fingerprint_symlink(path: &Path) -> Result<EntryFingerprint> {
let target =
std::fs::read_link(path).with_context(|| format!("read_link {}", path.display()))?;
let mut hasher = Sha256::new();
let target_str = normalize_rel_path(&target);
hasher.update(target_str.as_bytes());
Ok(EntryFingerprint {
kind: EntryKind::Symlink,
digest: hasher.finalize().into(),
size: 0,
modified_millis: None,
})
}
fn compute_root_digest(entries: &BTreeMap<String, EntryFingerprint>) -> DigestBytes {
let mut hasher = Sha256::new();
for (path, fingerprint) in entries {
hasher.update(path.as_bytes());
hasher.update(fingerprint.digest);
hasher.update([fingerprint.kind as u8]);
hasher.update(fingerprint.size.to_le_bytes());
if let Some(modified) = fingerprint.modified_millis {
hasher.update(modified.to_le_bytes());
}
}
hasher.finalize().into()
}
fn normalize_rel_path(path: &Path) -> String {
let s = path_to_cow(path);
if s.is_empty() {
String::new()
} else {
s.replace('\\', "/")
}
}
fn path_to_cow(path: &Path) -> Cow<'_, str> {
path.to_string_lossy()
}
fn system_time_to_millis(ts: SystemTime) -> Option<u128> {
ts.duration_since(SystemTime::UNIX_EPOCH)
.map(|duration| duration.as_millis())
.ok()
}
#[cfg(test)]
mod tests {
use super::*;
use pretty_assertions::assert_eq;
use tempfile::tempdir;
#[test]
fn diff_tracks_added_modified_removed() {
let dir = tempdir().unwrap();
let root = dir.path();
std::fs::write(root.join("file_a.txt"), "alpha").unwrap();
std::fs::write(root.join("file_b.txt"), "bravo").unwrap();
let snapshot_one = CodebaseSnapshot::from_disk(root).unwrap();
std::fs::write(root.join("file_a.txt"), "alpha-updated").unwrap();
std::fs::remove_file(root.join("file_b.txt")).unwrap();
std::fs::write(root.join("file_c.txt"), "charlie").unwrap();
let snapshot_two = CodebaseSnapshot::from_disk(root).unwrap();
let diff = snapshot_one.diff(&snapshot_two);
assert_eq!(diff.added, vec!["file_c.txt".to_string()]);
assert_eq!(diff.modified, vec!["file_a.txt".to_string()]);
assert_eq!(diff.removed, vec!["file_b.txt".to_string()]);
}
}

View File

@@ -1,5 +1,6 @@
use std::borrow::Cow;
use std::fmt::Debug;
use std::path::Path;
use std::path::PathBuf;
use std::sync::Arc;
use std::sync::atomic::AtomicU64;
@@ -43,6 +44,9 @@ use crate::apply_patch::convert_apply_patch_to_protocol;
use crate::client::ModelClient;
use crate::client_common::Prompt;
use crate::client_common::ResponseEvent;
use crate::codebase_change_notice::CODEBASE_CHANGE_NOTICE_MAX_PATHS;
use crate::codebase_change_notice::CodebaseChangeNotice;
use crate::codebase_snapshot::CodebaseSnapshot;
use crate::config::Config;
use crate::config_types::ShellEnvironmentPolicy;
use crate::conversation_history::ConversationHistory;
@@ -746,6 +750,73 @@ impl Session {
self.persist_rollout_items(&rollout_items).await;
}
async fn stored_snapshot_for_root(&self, root: &Path) -> Option<CodebaseSnapshot> {
let state = self.state.lock().await;
state
.codebase_snapshot
.as_ref()
.filter(|snapshot| snapshot.root() == root)
.cloned()
}
async fn set_codebase_snapshot(&self, snapshot: CodebaseSnapshot) {
let mut state = self.state.lock().await;
state.codebase_snapshot = Some(snapshot);
}
pub(crate) async fn emit_codebase_delta_if_changed(
&self,
turn_context: &TurnContext,
sub_id: &str,
) -> anyhow::Result<()> {
let cwd = turn_context.cwd.clone();
let previous = self.stored_snapshot_for_root(&cwd).await;
let latest = CodebaseSnapshot::capture(cwd.clone()).await?;
if let Some(previous_snapshot) = previous {
let diff = previous_snapshot.diff(&latest);
if diff.is_empty() {
self.set_codebase_snapshot(latest).await;
return Ok(());
}
let notice = CodebaseChangeNotice::new(diff, CODEBASE_CHANGE_NOTICE_MAX_PATHS);
if notice.is_empty() {
self.set_codebase_snapshot(latest).await;
return Ok(());
}
let response_item: ResponseItem = notice.into();
self.record_conversation_items(std::slice::from_ref(&response_item))
.await;
for msg in
map_response_item_to_event_messages(&response_item, self.show_raw_agent_reasoning())
{
let event = Event {
id: sub_id.to_string(),
msg,
};
self.send_event(event).await;
}
self.set_codebase_snapshot(latest).await;
return Ok(());
}
self.set_codebase_snapshot(latest).await;
Ok(())
}
pub(crate) async fn refresh_codebase_snapshot(
&self,
turn_context: &TurnContext,
) -> anyhow::Result<()> {
let snapshot = CodebaseSnapshot::capture(turn_context.cwd.clone()).await?;
self.set_codebase_snapshot(snapshot).await;
Ok(())
}
pub(crate) fn build_initial_context(&self, turn_context: &TurnContext) -> Vec<ResponseItem> {
let mut items = Vec::<ResponseItem>::with_capacity(2);
if let Some(user_instructions) = turn_context.user_instructions.as_deref() {
@@ -1678,6 +1749,14 @@ pub(crate) async fn run_task(
.await;
}
if !is_review_mode
&& let Err(err) = sess
.emit_codebase_delta_if_changed(turn_context.as_ref(), &sub_id)
.await
{
warn!(error = ?err, "failed to compute codebase changes");
}
let mut last_agent_message: Option<String> = None;
// Although from the perspective of codex.rs, TurnDiffTracker has the lifecycle of a Task which contains
// many turns, from the perspective of the user, it is a single turn.
@@ -1911,6 +1990,11 @@ pub(crate) async fn run_task(
}
}
if !is_review_mode && let Err(err) = sess.refresh_codebase_snapshot(turn_context.as_ref()).await
{
warn!(error = ?err, "failed to refresh codebase snapshot");
}
// If this was a review thread and we have a final assistant message,
// try to parse it as a ReviewOutput.
//

View File

@@ -11,6 +11,8 @@ pub mod bash;
mod chat_completions;
mod client;
mod client_common;
mod codebase_change_notice;
mod codebase_snapshot;
pub mod codex;
mod codex_conversation;
pub mod token_data;

View File

@@ -2,6 +2,7 @@
use codex_protocol::models::ResponseItem;
use crate::codebase_snapshot::CodebaseSnapshot;
use crate::conversation_history::ConversationHistory;
use crate::protocol::RateLimitSnapshot;
use crate::protocol::TokenUsage;
@@ -13,6 +14,7 @@ pub(crate) struct SessionState {
pub(crate) history: ConversationHistory,
pub(crate) token_info: Option<TokenUsageInfo>,
pub(crate) latest_rate_limits: Option<RateLimitSnapshot>,
pub(crate) codebase_snapshot: Option<CodebaseSnapshot>,
}
impl SessionState {