Files
codex/prs/bolinfest/PR-1770.md
2025-09-02 15:17:45 -07:00

9402 lines
372 KiB
Markdown

# PR #1770: Add a TurnDiffTracker to create a unified diff for an entire turn
- URL: https://github.com/openai/codex/pull/1770
- Author: gpeal
- Created: 2025-07-31 23:42:44 UTC
- Updated: 2025-08-04 16:55:01 UTC
- Changes: +998/-18, Files changed: 9, Commits: 25
## Description
This lets us show an accumulating diff across all patches in a turn. Refer to the docs for TurnDiffTracker for implementation details.
There are multiple ways this could have been done and this felt like the right tradeoff between reliability and completeness:
*Pros*
* It will pick up all changes to files that the model touched including if they prettier or another command that updates them.
* It will not pick up changes made by the user or other agents to files it didn't modify.
*Cons*
* It will pick up changes that the user made to a file that the model also touched
* It will not pick up changes to codegen or files that were not modified with apply_patch
## Full Diff
```diff
diff --git a/codex-rs/Cargo.lock b/codex-rs/Cargo.lock
index 7d4e41d0b1..eb4eccd897 100644
--- a/codex-rs/Cargo.lock
+++ b/codex-rs/Cargo.lock
@@ -699,6 +699,7 @@ dependencies = [
"serde_json",
"sha1",
"shlex",
+ "similar",
"strum_macros 0.27.2",
"tempfile",
"thiserror 2.0.12",
diff --git a/codex-rs/core/Cargo.toml b/codex-rs/core/Cargo.toml
index db3fd4f834..466e9adf02 100644
--- a/codex-rs/core/Cargo.toml
+++ b/codex-rs/core/Cargo.toml
@@ -34,6 +34,7 @@ serde_json = "1"
serde_bytes = "0.11"
sha1 = "0.10.6"
shlex = "1.3.0"
+similar = "2.7.0"
strum_macros = "0.27.2"
thiserror = "2.0.12"
time = { version = "0.3", features = ["formatting", "local-offset", "macros"] }
diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs
index 7004dcfcb7..568d87c4a8 100644
--- a/codex-rs/core/src/codex.rs
+++ b/codex-rs/core/src/codex.rs
@@ -85,11 +85,13 @@ use crate::protocol::SandboxPolicy;
use crate::protocol::SessionConfiguredEvent;
use crate::protocol::Submission;
use crate::protocol::TaskCompleteEvent;
+use crate::protocol::TurnDiffEvent;
use crate::rollout::RolloutRecorder;
use crate::safety::SafetyCheck;
use crate::safety::assess_command_safety;
use crate::safety::assess_safety_for_untrusted_command;
use crate::shell;
+use crate::turn_diff_tracker::TurnDiffTracker;
use crate::user_notification::UserNotification;
use crate::util::backoff;
@@ -362,7 +364,11 @@ impl Session {
}
}
- async fn notify_exec_command_begin(&self, exec_command_context: ExecCommandContext) {
+ async fn on_exec_command_begin(
+ &self,
+ turn_diff_tracker: &mut TurnDiffTracker,
+ exec_command_context: ExecCommandContext,
+ ) {
let ExecCommandContext {
sub_id,
call_id,
@@ -374,11 +380,15 @@ impl Session {
Some(ApplyPatchCommandContext {
user_explicitly_approved_this_action,
changes,
- }) => EventMsg::PatchApplyBegin(PatchApplyBeginEvent {
- call_id,
- auto_approved: !user_explicitly_approved_this_action,
- changes,
- }),
+ }) => {
+ turn_diff_tracker.on_patch_begin(&changes);
+
+ EventMsg::PatchApplyBegin(PatchApplyBeginEvent {
+ call_id,
+ auto_approved: !user_explicitly_approved_this_action,
+ changes,
+ })
+ }
None => EventMsg::ExecCommandBegin(ExecCommandBeginEvent {
call_id,
command: command_for_display.clone(),
@@ -392,8 +402,10 @@ impl Session {
let _ = self.tx_event.send(event).await;
}
- async fn notify_exec_command_end(
+ #[allow(clippy::too_many_arguments)]
+ async fn on_exec_command_end(
&self,
+ turn_diff_tracker: &mut TurnDiffTracker,
sub_id: &str,
call_id: &str,
output: &ExecToolCallOutput,
@@ -433,6 +445,20 @@ impl Session {
msg,
};
let _ = self.tx_event.send(event).await;
+
+ // If this is an apply_patch, after we emit the end patch, emit a second event
+ // with the full turn diff if there is one.
+ if is_apply_patch {
+ let unified_diff = turn_diff_tracker.get_unified_diff();
+ if let Ok(Some(unified_diff)) = unified_diff {
+ let msg = EventMsg::TurnDiff(TurnDiffEvent { unified_diff });
+ let event = Event {
+ id: sub_id.into(),
+ msg,
+ };
+ let _ = self.tx_event.send(event).await;
+ }
+ }
}
/// Helper that emits a BackgroundEvent with the given message. This keeps
@@ -1006,6 +1032,10 @@ async fn run_task(sess: Arc<Session>, sub_id: String, input: Vec<InputItem>) {
.await;
let last_agent_message: Option<String>;
+ // Although from the perspective of codex.rs, TurnDiffTracker has the lifecycle of a Task which contains
+ // many turns, from the perspective of the user, it is a single turn.
+ let mut turn_diff_tracker = TurnDiffTracker::new();
+
loop {
// Note that pending_input would be something like a message the user
// submitted through the UI while the model was running. Though the UI
@@ -1037,7 +1067,7 @@ async fn run_task(sess: Arc<Session>, sub_id: String, input: Vec<InputItem>) {
})
})
.collect();
- match run_turn(&sess, sub_id.clone(), turn_input).await {
+ match run_turn(&sess, &mut turn_diff_tracker, sub_id.clone(), turn_input).await {
Ok(turn_output) => {
let mut items_to_record_in_conversation_history = Vec::<ResponseItem>::new();
let mut responses = Vec::<ResponseInputItem>::new();
@@ -1163,6 +1193,7 @@ async fn run_task(sess: Arc<Session>, sub_id: String, input: Vec<InputItem>) {
async fn run_turn(
sess: &Session,
+ turn_diff_tracker: &mut TurnDiffTracker,
sub_id: String,
input: Vec<ResponseItem>,
) -> CodexResult<Vec<ProcessedResponseItem>> {
@@ -1177,7 +1208,7 @@ async fn run_turn(
let mut retries = 0;
loop {
- match try_run_turn(sess, &sub_id, &prompt).await {
+ match try_run_turn(sess, turn_diff_tracker, &sub_id, &prompt).await {
Ok(output) => return Ok(output),
Err(CodexErr::Interrupted) => return Err(CodexErr::Interrupted),
Err(CodexErr::EnvVar(var)) => return Err(CodexErr::EnvVar(var)),
@@ -1223,6 +1254,7 @@ struct ProcessedResponseItem {
async fn try_run_turn(
sess: &Session,
+ turn_diff_tracker: &mut TurnDiffTracker,
sub_id: &str,
prompt: &Prompt,
) -> CodexResult<Vec<ProcessedResponseItem>> {
@@ -1310,7 +1342,8 @@ async fn try_run_turn(
match event {
ResponseEvent::Created => {}
ResponseEvent::OutputItemDone(item) => {
- let response = handle_response_item(sess, sub_id, item.clone()).await?;
+ let response =
+ handle_response_item(sess, turn_diff_tracker, sub_id, item.clone()).await?;
output.push(ProcessedResponseItem { item, response });
}
@@ -1328,6 +1361,16 @@ async fn try_run_turn(
.ok();
}
+ let unified_diff = turn_diff_tracker.get_unified_diff();
+ if let Ok(Some(unified_diff)) = unified_diff {
+ let msg = EventMsg::TurnDiff(TurnDiffEvent { unified_diff });
+ let event = Event {
+ id: sub_id.to_string(),
+ msg,
+ };
+ let _ = sess.tx_event.send(event).await;
+ }
+
return Ok(output);
}
ResponseEvent::OutputTextDelta(delta) => {
@@ -1432,6 +1475,7 @@ async fn run_compact_task(
async fn handle_response_item(
sess: &Session,
+ turn_diff_tracker: &mut TurnDiffTracker,
sub_id: &str,
item: ResponseItem,
) -> CodexResult<Option<ResponseInputItem>> {
@@ -1469,7 +1513,17 @@ async fn handle_response_item(
..
} => {
info!("FunctionCall: {arguments}");
- Some(handle_function_call(sess, sub_id.to_string(), name, arguments, call_id).await)
+ Some(
+ handle_function_call(
+ sess,
+ turn_diff_tracker,
+ sub_id.to_string(),
+ name,
+ arguments,
+ call_id,
+ )
+ .await,
+ )
}
ResponseItem::LocalShellCall {
id,
@@ -1504,6 +1558,7 @@ async fn handle_response_item(
handle_container_exec_with_params(
exec_params,
sess,
+ turn_diff_tracker,
sub_id.to_string(),
effective_call_id,
)
@@ -1521,6 +1576,7 @@ async fn handle_response_item(
async fn handle_function_call(
sess: &Session,
+ turn_diff_tracker: &mut TurnDiffTracker,
sub_id: String,
name: String,
arguments: String,
@@ -1534,7 +1590,8 @@ async fn handle_function_call(
return *output;
}
};
- handle_container_exec_with_params(params, sess, sub_id, call_id).await
+ handle_container_exec_with_params(params, sess, turn_diff_tracker, sub_id, call_id)
+ .await
}
"update_plan" => handle_update_plan(sess, arguments, sub_id, call_id).await,
_ => {
@@ -1608,6 +1665,7 @@ fn maybe_run_with_user_profile(params: ExecParams, sess: &Session) -> ExecParams
async fn handle_container_exec_with_params(
params: ExecParams,
sess: &Session,
+ turn_diff_tracker: &mut TurnDiffTracker,
sub_id: String,
call_id: String,
) -> ResponseInputItem {
@@ -1755,7 +1813,7 @@ async fn handle_container_exec_with_params(
},
),
};
- sess.notify_exec_command_begin(exec_command_context.clone())
+ sess.on_exec_command_begin(turn_diff_tracker, exec_command_context.clone())
.await;
let params = maybe_run_with_user_profile(params, sess);
@@ -1782,7 +1840,8 @@ async fn handle_container_exec_with_params(
duration,
} = &output;
- sess.notify_exec_command_end(
+ sess.on_exec_command_end(
+ turn_diff_tracker,
&sub_id,
&call_id,
&output,
@@ -1806,7 +1865,15 @@ async fn handle_container_exec_with_params(
}
}
Err(CodexErr::Sandbox(error)) => {
- handle_sandbox_error(params, exec_command_context, error, sandbox_type, sess).await
+ handle_sandbox_error(
+ turn_diff_tracker,
+ params,
+ exec_command_context,
+ error,
+ sandbox_type,
+ sess,
+ )
+ .await
}
Err(e) => {
// Handle non-sandbox errors
@@ -1822,6 +1889,7 @@ async fn handle_container_exec_with_params(
}
async fn handle_sandbox_error(
+ turn_diff_tracker: &mut TurnDiffTracker,
params: ExecParams,
exec_command_context: ExecCommandContext,
error: SandboxErr,
@@ -1878,7 +1946,8 @@ async fn handle_sandbox_error(
sess.notify_background_event(&sub_id, "retrying command without sandbox")
.await;
- sess.notify_exec_command_begin(exec_command_context).await;
+ sess.on_exec_command_begin(turn_diff_tracker, exec_command_context)
+ .await;
// This is an escalated retry; the policy will not be
// examined and the sandbox has been set to `None`.
@@ -1905,8 +1974,14 @@ async fn handle_sandbox_error(
duration,
} = &retry_output;
- sess.notify_exec_command_end(&sub_id, &call_id, &retry_output, is_apply_patch)
- .await;
+ sess.on_exec_command_end(
+ turn_diff_tracker,
+ &sub_id,
+ &call_id,
+ &retry_output,
+ is_apply_patch,
+ )
+ .await;
let is_success = *exit_code == 0;
let content = format_exec_output(
diff --git a/codex-rs/core/src/lib.rs b/codex-rs/core/src/lib.rs
index 80f9014954..4f083d9e56 100644
--- a/codex-rs/core/src/lib.rs
+++ b/codex-rs/core/src/lib.rs
@@ -42,6 +42,7 @@ pub(crate) mod safety;
pub mod seatbelt;
pub mod shell;
pub mod spawn;
+pub mod turn_diff_tracker;
mod user_notification;
pub mod util;
diff --git a/codex-rs/core/src/protocol.rs b/codex-rs/core/src/protocol.rs
index cbb211d955..82591a2c78 100644
--- a/codex-rs/core/src/protocol.rs
+++ b/codex-rs/core/src/protocol.rs
@@ -387,6 +387,8 @@ pub enum EventMsg {
/// Notification that a patch application has finished.
PatchApplyEnd(PatchApplyEndEvent),
+ TurnDiff(TurnDiffEvent),
+
/// Response to GetHistoryEntryRequest.
GetHistoryEntryResponse(GetHistoryEntryResponseEvent),
@@ -598,6 +600,11 @@ pub struct PatchApplyEndEvent {
pub success: bool,
}
+#[derive(Debug, Clone, Deserialize, Serialize)]
+pub struct TurnDiffEvent {
+ pub unified_diff: String,
+}
+
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct GetHistoryEntryResponseEvent {
pub offset: usize,
diff --git a/codex-rs/core/src/turn_diff_tracker.rs b/codex-rs/core/src/turn_diff_tracker.rs
new file mode 100644
index 0000000000..7026d7bb32
--- /dev/null
+++ b/codex-rs/core/src/turn_diff_tracker.rs
@@ -0,0 +1,887 @@
+use std::collections::HashMap;
+use std::fs;
+use std::path::Path;
+use std::path::PathBuf;
+use std::process::Command;
+
+use anyhow::Context;
+use anyhow::Result;
+use anyhow::anyhow;
+use sha1::digest::Output;
+use uuid::Uuid;
+
+use crate::protocol::FileChange;
+
+const ZERO_OID: &str = "0000000000000000000000000000000000000000";
+const DEV_NULL: &str = "/dev/null";
+
+struct BaselineFileInfo {
+ path: PathBuf,
+ content: Vec<u8>,
+ mode: FileMode,
+ oid: String,
+}
+
+/// Tracks sets of changes to files and exposes the overall unified diff.
+/// Internally, the way this works is now:
+/// 1. Maintain an in-memory baseline snapshot of files when they are first seen.
+/// For new additions, do not create a baseline so that diffs are shown as proper additions (using /dev/null).
+/// 2. Keep a stable internal filename (uuid) per external path for rename tracking.
+/// 3. To compute the aggregated unified diff, compare each baseline snapshot to the current file on disk entirely in-memory
+/// using the `similar` crate and emit unified diffs with rewritten external paths.
+#[derive(Default)]
+pub struct TurnDiffTracker {
+ /// Map external path -> internal filename (uuid).
+ external_to_temp_name: HashMap<PathBuf, String>,
+ /// Internal filename -> baseline file info.
+ baseline_file_info: HashMap<String, BaselineFileInfo>,
+ /// Internal filename -> external path as of current accumulated state (after applying all changes).
+ /// This is where renames are tracked.
+ temp_name_to_current_path: HashMap<String, PathBuf>,
+ /// Cache of known git worktree roots to avoid repeated filesystem walks.
+ git_root_cache: Vec<PathBuf>,
+}
+
+impl TurnDiffTracker {
+ pub fn new() -> Self {
+ Self::default()
+ }
+
+ /// Front-run apply patch calls to track the starting contents of any modified files.
+ /// - Creates an in-memory baseline snapshot for files that already exist on disk when first seen.
+ /// - For additions, we intentionally do not create a baseline snapshot so that diffs are proper additions.
+ /// - Also updates internal mappings for move/rename events.
+ pub fn on_patch_begin(&mut self, changes: &HashMap<PathBuf, FileChange>) {
+ for (path, change) in changes.iter() {
+ // Ensure a stable internal filename exists for this external path.
+ if !self.external_to_temp_name.contains_key(path) {
+ let internal = Uuid::new_v4().to_string();
+ self.external_to_temp_name
+ .insert(path.clone(), internal.clone());
+ self.temp_name_to_current_path
+ .insert(internal.clone(), path.clone());
+
+ // If the file exists on disk now, snapshot as baseline; else leave missing to represent /dev/null.
+ let baseline_file_info = if path.exists() {
+ let mode = file_mode_for_path(path);
+ let mode_val = mode.unwrap_or(FileMode::Regular);
+ let content = blob_bytes(path, &mode_val).unwrap_or_default();
+ let oid = if mode == Some(FileMode::Symlink) {
+ format!("{:x}", git_blob_sha1_hex_bytes(&content))
+ } else {
+ self.git_blob_oid_for_path(path)
+ .unwrap_or_else(|| format!("{:x}", git_blob_sha1_hex_bytes(&content)))
+ };
+ Some(BaselineFileInfo {
+ path: path.clone(),
+ content,
+ mode: mode_val,
+ oid,
+ })
+ } else {
+ Some(BaselineFileInfo {
+ path: path.clone(),
+ content: vec![],
+ mode: FileMode::Regular,
+ oid: ZERO_OID.to_string(),
+ })
+ };
+
+ if let Some(baseline_file_info) = baseline_file_info {
+ self.baseline_file_info
+ .insert(internal.clone(), baseline_file_info);
+ }
+ }
+
+ // Track rename/move in current mapping if provided in an Update.
+ if let FileChange::Update {
+ move_path: Some(dest),
+ ..
+ } = change
+ {
+ let uuid_filename = match self.external_to_temp_name.get(path) {
+ Some(i) => i.clone(),
+ None => {
+ // This should be rare, but if we haven't mapped the source, create it with no baseline.
+ let i = Uuid::new_v4().to_string();
+ self.baseline_file_info.insert(
+ i.clone(),
+ BaselineFileInfo {
+ path: path.clone(),
+ content: vec![],
+ mode: FileMode::Regular,
+ oid: ZERO_OID.to_string(),
+ },
+ );
+ i
+ }
+ };
+ // Update current external mapping for temp file name.
+ self.temp_name_to_current_path
+ .insert(uuid_filename.clone(), dest.clone());
+ // Update forward file_mapping: external current -> internal name.
+ self.external_to_temp_name.remove(path);
+ self.external_to_temp_name
+ .insert(dest.clone(), uuid_filename);
+ };
+ }
+ }
+
+ fn get_path_for_internal(&self, internal: &str) -> Option<PathBuf> {
+ self.temp_name_to_current_path
+ .get(internal)
+ .cloned()
+ .or_else(|| {
+ self.baseline_file_info
+ .get(internal)
+ .map(|info| info.path.clone())
+ })
+ }
+
+ /// Find the git worktree root for a file/directory by walking up to the first ancestor containing a `.git` entry.
+ /// Uses a simple cache of known roots and avoids negative-result caching for simplicity.
+ fn find_git_root_cached(&mut self, start: &Path) -> Option<PathBuf> {
+ let dir = if start.is_dir() {
+ start
+ } else {
+ start.parent()?
+ };
+
+ // Fast path: if any cached root is an ancestor of this path, use it.
+ if let Some(root) = self
+ .git_root_cache
+ .iter()
+ .find(|r| dir.starts_with(r))
+ .cloned()
+ {
+ return Some(root);
+ }
+
+ // Walk up to find a `.git` marker.
+ let mut cur = dir.to_path_buf();
+ loop {
+ let git_marker = cur.join(".git");
+ if git_marker.is_dir() || git_marker.is_file() {
+ if !self.git_root_cache.iter().any(|r| r == &cur) {
+ self.git_root_cache.push(cur.clone());
+ }
+ return Some(cur);
+ }
+
+ // On Windows, avoid walking above the drive or UNC share root.
+ #[cfg(windows)]
+ {
+ if is_windows_drive_or_unc_root(&cur) {
+ return None;
+ }
+ }
+
+ if let Some(parent) = cur.parent() {
+ cur = parent.to_path_buf();
+ } else {
+ return None;
+ }
+ }
+ }
+
+ /// Return a display string for `path` relative to its git root if found, else absolute.
+ fn relative_to_git_root_str(&mut self, path: &Path) -> String {
+ let s = if let Some(root) = self.find_git_root_cached(path) {
+ if let Ok(rel) = path.strip_prefix(&root) {
+ rel.display().to_string()
+ } else {
+ path.display().to_string()
+ }
+ } else {
+ path.display().to_string()
+ };
+ s.replace('\\', "/")
+ }
+
+ /// Ask git to compute the blob SHA-1 for the file at `path` within its repository.
+ /// Returns None if no repository is found or git invocation fails.
+ fn git_blob_oid_for_path(&mut self, path: &Path) -> Option<String> {
+ let root = self.find_git_root_cached(path)?;
+ // Compute a path relative to the repo root for better portability across platforms.
+ let rel = path.strip_prefix(&root).unwrap_or(path);
+ let output = Command::new("git")
+ .arg("-C")
+ .arg(&root)
+ .arg("hash-object")
+ .arg("--")
+ .arg(rel)
+ .output()
+ .ok()?;
+ if !output.status.success() {
+ return None;
+ }
+ let s = String::from_utf8_lossy(&output.stdout).trim().to_string();
+ if s.len() == 40 { Some(s) } else { None }
+ }
+
+ /// Recompute the aggregated unified diff by comparing all of the in-memory snapshots that were
+ /// collected before the first time they were touched by apply_patch during this turn with
+ /// the current repo state.
+ pub fn get_unified_diff(&mut self) -> Result<Option<String>> {
+ let mut aggregated = String::new();
+
+ // Compute diffs per tracked internal file in a stable order by external path.
+ let mut baseline_file_names: Vec<String> =
+ self.baseline_file_info.keys().cloned().collect();
+ // Sort lexicographically by full repo-relative path to match git behavior.
+ baseline_file_names.sort_by_key(|internal| {
+ self.get_path_for_internal(internal)
+ .map(|p| self.relative_to_git_root_str(&p))
+ .unwrap_or_default()
+ });
+
+ for internal in baseline_file_names {
+ aggregated.push_str(self.get_file_diff(&internal).as_str());
+ if !aggregated.ends_with('\n') {
+ aggregated.push('\n');
+ }
+ }
+
+ if aggregated.trim().is_empty() {
+ Ok(None)
+ } else {
+ Ok(Some(aggregated))
+ }
+ }
+
+ fn get_file_diff(&mut self, internal_file_name: &str) -> String {
+ let mut aggregated = String::new();
+
+ // Snapshot lightweight fields only.
+ let (baseline_external_path, baseline_mode, left_oid) = {
+ if let Some(info) = self.baseline_file_info.get(internal_file_name) {
+ (info.path.clone(), info.mode, info.oid.clone())
+ } else {
+ (PathBuf::new(), FileMode::Regular, ZERO_OID.to_string())
+ }
+ };
+ let current_external_path = match self.get_path_for_internal(internal_file_name) {
+ Some(p) => p,
+ None => return aggregated,
+ };
+
+ let current_mode = file_mode_for_path(&current_external_path).unwrap_or(FileMode::Regular);
+ let right_bytes = blob_bytes(&current_external_path, &current_mode);
+
+ // Compute displays with &mut self before borrowing any baseline content.
+ let left_display = self.relative_to_git_root_str(&baseline_external_path);
+ let right_display = self.relative_to_git_root_str(&current_external_path);
+
+ // Compute right oid before borrowing baseline content.
+ let right_oid = if let Some(b) = right_bytes.as_ref() {
+ if current_mode == FileMode::Symlink {
+ format!("{:x}", git_blob_sha1_hex_bytes(b))
+ } else {
+ self.git_blob_oid_for_path(&current_external_path)
+ .unwrap_or_else(|| format!("{:x}", git_blob_sha1_hex_bytes(b)))
+ }
+ } else {
+ ZERO_OID.to_string()
+ };
+
+ // Borrow baseline content only after all &mut self uses are done.
+ let left_present = left_oid.as_str() != ZERO_OID;
+ let left_bytes: Option<&[u8]> = if left_present {
+ self.baseline_file_info
+ .get(internal_file_name)
+ .map(|i| i.content.as_slice())
+ } else {
+ None
+ };
+
+ // Fast path: identical bytes or both missing.
+ if left_bytes == right_bytes.as_deref() {
+ return aggregated;
+ }
+
+ aggregated.push_str(&format!("diff --git a/{left_display} b/{right_display}\n"));
+
+ let is_add = !left_present && right_bytes.is_some();
+ let is_delete = left_present && right_bytes.is_none();
+
+ if is_add {
+ aggregated.push_str(&format!("new file mode {current_mode}\n"));
+ } else if is_delete {
+ aggregated.push_str(&format!("deleted file mode {baseline_mode}\n"));
+ } else if baseline_mode != current_mode {
+ aggregated.push_str(&format!("old mode {baseline_mode}\n"));
+ aggregated.push_str(&format!("new mode {current_mode}\n"));
+ }
+
+ let left_text = left_bytes.and_then(|b| std::str::from_utf8(b).ok());
+ let right_text = right_bytes
+ .as_deref()
+ .and_then(|b| std::str::from_utf8(b).ok());
+
+ let can_text_diff = matches!(
+ (left_text, right_text, is_add, is_delete),
+ (Some(_), Some(_), _, _) | (_, Some(_), true, _) | (Some(_), _, _, true)
+ );
+
+ if can_text_diff {
+ let l = left_text.unwrap_or("");
+ let r = right_text.unwrap_or("");
+
+ aggregated.push_str(&format!("index {left_oid}..{right_oid}\n"));
+
+ let old_header = if left_present {
+ format!("a/{left_display}")
+ } else {
+ DEV_NULL.to_string()
+ };
+ let new_header = if right_bytes.is_some() {
+ format!("b/{right_display}")
+ } else {
+ DEV_NULL.to_string()
+ };
+
+ let diff = similar::TextDiff::from_lines(l, r);
+ let unified = diff
+ .unified_diff()
+ .context_radius(3)
+ .header(&old_header, &new_header)
+ .to_string();
+
+ aggregated.push_str(&unified);
+ } else {
+ aggregated.push_str(&format!("index {left_oid}..{right_oid}\n"));
+ let old_header = if left_present {
+ format!("a/{left_display}")
+ } else {
+ DEV_NULL.to_string()
+ };
+ let new_header = if right_bytes.is_some() {
+ format!("b/{right_display}")
+ } else {
+ DEV_NULL.to_string()
+ };
+ aggregated.push_str(&format!("--- {old_header}\n"));
+ aggregated.push_str(&format!("+++ {new_header}\n"));
+ aggregated.push_str("Binary files differ\n");
+ }
+ aggregated
+ }
+}
+
+/// Compute the Git SHA-1 blob object ID for the given content (bytes).
+fn git_blob_sha1_hex_bytes(data: &[u8]) -> Output<sha1::Sha1> {
+ // Git blob hash is sha1 of: "blob <len>\0<data>"
+ let header = format!("blob {}\0", data.len());
+ use sha1::Digest;
+ let mut hasher = sha1::Sha1::new();
+ hasher.update(header.as_bytes());
+ hasher.update(data);
+ hasher.finalize()
+}
+
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+enum FileMode {
+ Regular,
+ #[cfg(unix)]
+ Executable,
+ Symlink,
+}
+
+impl FileMode {
+ fn as_str(&self) -> &'static str {
+ match self {
+ FileMode::Regular => "100644",
+ #[cfg(unix)]
+ FileMode::Executable => "100755",
+ FileMode::Symlink => "120000",
+ }
+ }
+}
+
+impl std::fmt::Display for FileMode {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ f.write_str(self.as_str())
+ }
+}
+
+#[cfg(unix)]
+fn file_mode_for_path(path: &Path) -> Option<FileMode> {
+ use std::os::unix::fs::PermissionsExt;
+ let meta = fs::symlink_metadata(path).ok()?;
+ let ft = meta.file_type();
+ if ft.is_symlink() {
+ return Some(FileMode::Symlink);
+ }
+ let mode = meta.permissions().mode();
+ let is_exec = (mode & 0o111) != 0;
+ Some(if is_exec {
+ FileMode::Executable
+ } else {
+ FileMode::Regular
+ })
+}
+
+#[cfg(not(unix))]
+fn file_mode_for_path(_path: &Path) -> Option<FileMode> {
+ // Default to non-executable on non-unix.
+ Some(FileMode::Regular)
+}
+
+fn blob_bytes(path: &Path, mode: &FileMode) -> Option<Vec<u8>> {
+ if path.exists() {
+ let contents = if *mode == FileMode::Symlink {
+ symlink_blob_bytes(path)
+ .ok_or_else(|| anyhow!("failed to read symlink target for {}", path.display()))
+ } else {
+ fs::read(path)
+ .with_context(|| format!("failed to read current file for diff {}", path.display()))
+ };
+ contents.ok()
+ } else {
+ None
+ }
+}
+
+#[cfg(unix)]
+fn symlink_blob_bytes(path: &Path) -> Option<Vec<u8>> {
+ use std::os::unix::ffi::OsStrExt;
+ let target = std::fs::read_link(path).ok()?;
+ Some(target.as_os_str().as_bytes().to_vec())
+}
+
+#[cfg(not(unix))]
+fn symlink_blob_bytes(_path: &Path) -> Option<Vec<u8>> {
+ None
+}
+
+#[cfg(windows)]
+fn is_windows_drive_or_unc_root(p: &std::path::Path) -> bool {
+ use std::path::Component;
+ let mut comps = p.components();
+ matches!(
+ (comps.next(), comps.next(), comps.next()),
+ (Some(Component::Prefix(_)), Some(Component::RootDir), None)
+ )
+}
+
+#[cfg(test)]
+mod tests {
+ #![allow(clippy::unwrap_used)]
+ use super::*;
+ use pretty_assertions::assert_eq;
+ use tempfile::tempdir;
+
+ /// Compute the Git SHA-1 blob object ID for the given content (string).
+ /// This delegates to the bytes version to avoid UTF-8 lossy conversions here.
+ fn git_blob_sha1_hex(data: &str) -> String {
+ format!("{:x}", git_blob_sha1_hex_bytes(data.as_bytes()))
+ }
+
+ fn normalize_diff_for_test(input: &str, root: &Path) -> String {
+ let root_str = root.display().to_string().replace('\\', "/");
+ let replaced = input.replace(&root_str, "<TMP>");
+ // Split into blocks on lines starting with "diff --git ", sort blocks for determinism, and rejoin
+ let mut blocks: Vec<String> = Vec::new();
+ let mut current = String::new();
+ for line in replaced.lines() {
+ if line.starts_with("diff --git ") && !current.is_empty() {
+ blocks.push(current);
+ current = String::new();
+ }
+ if !current.is_empty() {
+ current.push('\n');
+ }
+ current.push_str(line);
+ }
+ if !current.is_empty() {
+ blocks.push(current);
+ }
+ blocks.sort();
+ let mut out = blocks.join("\n");
+ if !out.ends_with('\n') {
+ out.push('\n');
+ }
+ out
+ }
+
+ #[test]
+ fn accumulates_add_and_update() {
+ let mut acc = TurnDiffTracker::new();
+
+ let dir = tempdir().unwrap();
+ let file = dir.path().join("a.txt");
+
+ // First patch: add file (baseline should be /dev/null).
+ let add_changes = HashMap::from([(
+ file.clone(),
+ FileChange::Add {
+ content: "foo\n".to_string(),
+ },
+ )]);
+ acc.on_patch_begin(&add_changes);
+
+ // Simulate apply: create the file on disk.
+ fs::write(&file, "foo\n").unwrap();
+ let first = acc.get_unified_diff().unwrap().unwrap();
+ let first = normalize_diff_for_test(&first, dir.path());
+ let expected_first = {
+ let mode = file_mode_for_path(&file).unwrap_or(FileMode::Regular);
+ let right_oid = git_blob_sha1_hex("foo\n");
+ format!(
+ r#"diff --git a/<TMP>/a.txt b/<TMP>/a.txt
+new file mode {mode}
+index {ZERO_OID}..{right_oid}
+--- {DEV_NULL}
++++ b/<TMP>/a.txt
+@@ -0,0 +1 @@
++foo
+"#,
+ )
+ };
+ assert_eq!(first, expected_first);
+
+ // Second patch: update the file on disk.
+ let update_changes = HashMap::from([(
+ file.clone(),
+ FileChange::Update {
+ unified_diff: "".to_owned(),
+ move_path: None,
+ },
+ )]);
+ acc.on_patch_begin(&update_changes);
+
+ // Simulate apply: append a new line.
+ fs::write(&file, "foo\nbar\n").unwrap();
+ let combined = acc.get_unified_diff().unwrap().unwrap();
+ let combined = normalize_diff_for_test(&combined, dir.path());
+ let expected_combined = {
+ let mode = file_mode_for_path(&file).unwrap_or(FileMode::Regular);
+ let right_oid = git_blob_sha1_hex("foo\nbar\n");
+ format!(
+ r#"diff --git a/<TMP>/a.txt b/<TMP>/a.txt
+new file mode {mode}
+index {ZERO_OID}..{right_oid}
+--- {DEV_NULL}
++++ b/<TMP>/a.txt
+@@ -0,0 +1,2 @@
++foo
++bar
+"#,
+ )
+ };
+ assert_eq!(combined, expected_combined);
+ }
+
+ #[test]
+ fn accumulates_delete() {
+ let dir = tempdir().unwrap();
+ let file = dir.path().join("b.txt");
+ fs::write(&file, "x\n").unwrap();
+
+ let mut acc = TurnDiffTracker::new();
+ let del_changes = HashMap::from([(file.clone(), FileChange::Delete)]);
+ acc.on_patch_begin(&del_changes);
+
+ // Simulate apply: delete the file from disk.
+ let baseline_mode = file_mode_for_path(&file).unwrap_or(FileMode::Regular);
+ fs::remove_file(&file).unwrap();
+ let diff = acc.get_unified_diff().unwrap().unwrap();
+ let diff = normalize_diff_for_test(&diff, dir.path());
+ let expected = {
+ let left_oid = git_blob_sha1_hex("x\n");
+ format!(
+ r#"diff --git a/<TMP>/b.txt b/<TMP>/b.txt
+deleted file mode {baseline_mode}
+index {left_oid}..{ZERO_OID}
+--- a/<TMP>/b.txt
++++ {DEV_NULL}
+@@ -1 +0,0 @@
+-x
+"#,
+ )
+ };
+ assert_eq!(diff, expected);
+ }
+
+ #[test]
+ fn accumulates_move_and_update() {
+ let dir = tempdir().unwrap();
+ let src = dir.path().join("src.txt");
+ let dest = dir.path().join("dst.txt");
+ fs::write(&src, "line\n").unwrap();
+
+ let mut acc = TurnDiffTracker::new();
+ let mv_changes = HashMap::from([(
+ src.clone(),
+ FileChange::Update {
+ unified_diff: "".to_owned(),
+ move_path: Some(dest.clone()),
+ },
+ )]);
+ acc.on_patch_begin(&mv_changes);
+
+ // Simulate apply: move and update content.
+ fs::rename(&src, &dest).unwrap();
+ fs::write(&dest, "line2\n").unwrap();
+
+ let out = acc.get_unified_diff().unwrap().unwrap();
+ let out = normalize_diff_for_test(&out, dir.path());
+ let expected = {
+ let left_oid = git_blob_sha1_hex("line\n");
+ let right_oid = git_blob_sha1_hex("line2\n");
+ format!(
+ r#"diff --git a/<TMP>/src.txt b/<TMP>/dst.txt
+index {left_oid}..{right_oid}
+--- a/<TMP>/src.txt
++++ b/<TMP>/dst.txt
+@@ -1 +1 @@
+-line
++line2
+"#
+ )
+ };
+ assert_eq!(out, expected);
+ }
+
+ #[test]
+ fn move_without_1change_yields_no_diff() {
+ let dir = tempdir().unwrap();
+ let src = dir.path().join("moved.txt");
+ let dest = dir.path().join("renamed.txt");
+ fs::write(&src, "same\n").unwrap();
+
+ let mut acc = TurnDiffTracker::new();
+ let mv_changes = HashMap::from([(
+ src.clone(),
+ FileChange::Update {
+ unified_diff: "".to_owned(),
+ move_path: Some(dest.clone()),
+ },
+ )]);
+ acc.on_patch_begin(&mv_changes);
+
+ // Simulate apply: move only, no content change.
+ fs::rename(&src, &dest).unwrap();
+
+ let diff = acc.get_unified_diff().unwrap();
+ assert_eq!(diff, None);
+ }
+
+ #[test]
+ fn move_declared_but_file_only_appears_at_dest_is_add() {
+ let dir = tempdir().unwrap();
+ let src = dir.path().join("src.txt");
+ let dest = dir.path().join("dest.txt");
+ let mut acc = TurnDiffTracker::new();
+ let mv = HashMap::from([(
+ src.clone(),
+ FileChange::Update {
+ unified_diff: "".into(),
+ move_path: Some(dest.clone()),
+ },
+ )]);
+ acc.on_patch_begin(&mv);
+ // No file existed initially; create only dest
+ fs::write(&dest, "hello\n").unwrap();
+ let diff = acc.get_unified_diff().unwrap().unwrap();
+ let diff = normalize_diff_for_test(&diff, dir.path());
+ let expected = {
+ let mode = file_mode_for_path(&dest).unwrap_or(FileMode::Regular);
+ let right_oid = git_blob_sha1_hex("hello\n");
+ format!(
+ r#"diff --git a/<TMP>/src.txt b/<TMP>/dest.txt
+new file mode {mode}
+index {ZERO_OID}..{right_oid}
+--- {DEV_NULL}
++++ b/<TMP>/dest.txt
+@@ -0,0 +1 @@
++hello
+"#,
+ )
+ };
+ assert_eq!(diff, expected);
+ }
+
+ #[test]
+ fn update_persists_across_new_baseline_for_new_file() {
+ let dir = tempdir().unwrap();
+ let a = dir.path().join("a.txt");
+ let b = dir.path().join("b.txt");
+ fs::write(&a, "foo\n").unwrap();
+ fs::write(&b, "z\n").unwrap();
+
+ let mut acc = TurnDiffTracker::new();
+
+ // First: update existing a.txt (baseline snapshot is created for a).
+ let update_a = HashMap::from([(
+ a.clone(),
+ FileChange::Update {
+ unified_diff: "".to_owned(),
+ move_path: None,
+ },
+ )]);
+ acc.on_patch_begin(&update_a);
+ // Simulate apply: modify a.txt on disk.
+ fs::write(&a, "foo\nbar\n").unwrap();
+ let first = acc.get_unified_diff().unwrap().unwrap();
+ let first = normalize_diff_for_test(&first, dir.path());
+ let expected_first = {
+ let left_oid = git_blob_sha1_hex("foo\n");
+ let right_oid = git_blob_sha1_hex("foo\nbar\n");
+ format!(
+ r#"diff --git a/<TMP>/a.txt b/<TMP>/a.txt
+index {left_oid}..{right_oid}
+--- a/<TMP>/a.txt
++++ b/<TMP>/a.txt
+@@ -1 +1,2 @@
+ foo
++bar
+"#
+ )
+ };
+ assert_eq!(first, expected_first);
+
+ // Next: introduce a brand-new path b.txt into baseline snapshots via a delete change.
+ let del_b = HashMap::from([(b.clone(), FileChange::Delete)]);
+ acc.on_patch_begin(&del_b);
+ // Simulate apply: delete b.txt.
+ let baseline_mode = file_mode_for_path(&b).unwrap_or(FileMode::Regular);
+ fs::remove_file(&b).unwrap();
+
+ let combined = acc.get_unified_diff().unwrap().unwrap();
+ let combined = normalize_diff_for_test(&combined, dir.path());
+ let expected = {
+ let left_oid_a = git_blob_sha1_hex("foo\n");
+ let right_oid_a = git_blob_sha1_hex("foo\nbar\n");
+ let left_oid_b = git_blob_sha1_hex("z\n");
+ format!(
+ r#"diff --git a/<TMP>/a.txt b/<TMP>/a.txt
+index {left_oid_a}..{right_oid_a}
+--- a/<TMP>/a.txt
++++ b/<TMP>/a.txt
+@@ -1 +1,2 @@
+ foo
++bar
+diff --git a/<TMP>/b.txt b/<TMP>/b.txt
+deleted file mode {baseline_mode}
+index {left_oid_b}..{ZERO_OID}
+--- a/<TMP>/b.txt
++++ {DEV_NULL}
+@@ -1 +0,0 @@
+-z
+"#,
+ )
+ };
+ assert_eq!(combined, expected);
+ }
+
+ #[test]
+ fn binary_files_differ_update() {
+ let dir = tempdir().unwrap();
+ let file = dir.path().join("bin.dat");
+
+ // Initial non-UTF8 bytes
+ let left_bytes: Vec<u8> = vec![0xff, 0xfe, 0xfd, 0x00];
+ // Updated non-UTF8 bytes
+ let right_bytes: Vec<u8> = vec![0x01, 0x02, 0x03, 0x00];
+
+ fs::write(&file, &left_bytes).unwrap();
+
+ let mut acc = TurnDiffTracker::new();
+ let update_changes = HashMap::from([(
+ file.clone(),
+ FileChange::Update {
+ unified_diff: "".to_owned(),
+ move_path: None,
+ },
+ )]);
+ acc.on_patch_begin(&update_changes);
+
+ // Apply update on disk
+ fs::write(&file, &right_bytes).unwrap();
+
+ let diff = acc.get_unified_diff().unwrap().unwrap();
+ let diff = normalize_diff_for_test(&diff, dir.path());
+ let expected = {
+ let left_oid = format!("{:x}", git_blob_sha1_hex_bytes(&left_bytes));
+ let right_oid = format!("{:x}", git_blob_sha1_hex_bytes(&right_bytes));
+ format!(
+ r#"diff --git a/<TMP>/bin.dat b/<TMP>/bin.dat
+index {left_oid}..{right_oid}
+--- a/<TMP>/bin.dat
++++ b/<TMP>/bin.dat
+Binary files differ
+"#
+ )
+ };
+ assert_eq!(diff, expected);
+ }
+
+ #[test]
+ fn filenames_with_spaces_add_and_update() {
+ let mut acc = TurnDiffTracker::new();
+
+ let dir = tempdir().unwrap();
+ let file = dir.path().join("name with spaces.txt");
+
+ // First patch: add file (baseline should be /dev/null).
+ let add_changes = HashMap::from([(
+ file.clone(),
+ FileChange::Add {
+ content: "foo\n".to_string(),
+ },
+ )]);
+ acc.on_patch_begin(&add_changes);
+
+ // Simulate apply: create the file on disk.
+ fs::write(&file, "foo\n").unwrap();
+ let first = acc.get_unified_diff().unwrap().unwrap();
+ let first = normalize_diff_for_test(&first, dir.path());
+ let expected_first = {
+ let mode = file_mode_for_path(&file).unwrap_or(FileMode::Regular);
+ let right_oid = git_blob_sha1_hex("foo\n");
+ format!(
+ r#"diff --git a/<TMP>/name with spaces.txt b/<TMP>/name with spaces.txt
+new file mode {mode}
+index {ZERO_OID}..{right_oid}
+--- {DEV_NULL}
++++ b/<TMP>/name with spaces.txt
+@@ -0,0 +1 @@
++foo
+"#,
+ )
+ };
+ assert_eq!(first, expected_first);
+
+ // Second patch: update the file on disk.
+ let update_changes = HashMap::from([(
+ file.clone(),
+ FileChange::Update {
+ unified_diff: "".to_owned(),
+ move_path: None,
+ },
+ )]);
+ acc.on_patch_begin(&update_changes);
+
+ // Simulate apply: append a new line with a space.
+ fs::write(&file, "foo\nbar baz\n").unwrap();
+ let combined = acc.get_unified_diff().unwrap().unwrap();
+ let combined = normalize_diff_for_test(&combined, dir.path());
+ let expected_combined = {
+ let mode = file_mode_for_path(&file).unwrap_or(FileMode::Regular);
+ let right_oid = git_blob_sha1_hex("foo\nbar baz\n");
+ format!(
+ r#"diff --git a/<TMP>/name with spaces.txt b/<TMP>/name with spaces.txt
+new file mode {mode}
+index {ZERO_OID}..{right_oid}
+--- {DEV_NULL}
++++ b/<TMP>/name with spaces.txt
+@@ -0,0 +1,2 @@
++foo
++bar baz
+"#,
+ )
+ };
+ assert_eq!(combined, expected_combined);
+ }
+}
diff --git a/codex-rs/exec/src/event_processor_with_human_output.rs b/codex-rs/exec/src/event_processor_with_human_output.rs
index 72e2f9298f..c290d9336b 100644
--- a/codex-rs/exec/src/event_processor_with_human_output.rs
+++ b/codex-rs/exec/src/event_processor_with_human_output.rs
@@ -20,6 +20,7 @@ use codex_core::protocol::PatchApplyEndEvent;
use codex_core::protocol::SessionConfiguredEvent;
use codex_core::protocol::TaskCompleteEvent;
use codex_core::protocol::TokenUsage;
+use codex_core::protocol::TurnDiffEvent;
use owo_colors::OwoColorize;
use owo_colors::Style;
use shlex::try_join;
@@ -399,6 +400,7 @@ impl EventProcessor for EventProcessorWithHumanOutput {
stdout,
stderr,
success,
+ ..
}) => {
let patch_begin = self.call_id_to_patch.remove(&call_id);
@@ -428,6 +430,10 @@ impl EventProcessor for EventProcessorWithHumanOutput {
println!("{}", line.style(self.dimmed));
}
}
+ EventMsg::TurnDiff(TurnDiffEvent { unified_diff }) => {
+ ts_println!(self, "{}", "turn diff:".style(self.magenta));
+ println!("{unified_diff}");
+ }
EventMsg::ExecApprovalRequest(_) => {
// Should we exit?
}
diff --git a/codex-rs/mcp-server/src/codex_tool_runner.rs b/codex-rs/mcp-server/src/codex_tool_runner.rs
index d489ffe076..205dfa4631 100644
--- a/codex-rs/mcp-server/src/codex_tool_runner.rs
+++ b/codex-rs/mcp-server/src/codex_tool_runner.rs
@@ -263,6 +263,7 @@ async fn run_codex_tool_session_inner(
| EventMsg::BackgroundEvent(_)
| EventMsg::PatchApplyBegin(_)
| EventMsg::PatchApplyEnd(_)
+ | EventMsg::TurnDiff(_)
| EventMsg::GetHistoryEntryResponse(_)
| EventMsg::PlanUpdate(_)
| EventMsg::ShutdownComplete => {
diff --git a/codex-rs/mcp-server/src/conversation_loop.rs b/codex-rs/mcp-server/src/conversation_loop.rs
index 534275181a..1db39a2306 100644
--- a/codex-rs/mcp-server/src/conversation_loop.rs
+++ b/codex-rs/mcp-server/src/conversation_loop.rs
@@ -97,6 +97,7 @@ pub async fn run_conversation_loop(
| EventMsg::McpToolCallEnd(_)
| EventMsg::ExecCommandBegin(_)
| EventMsg::ExecCommandEnd(_)
+ | EventMsg::TurnDiff(_)
| EventMsg::BackgroundEvent(_)
| EventMsg::ExecCommandOutputDelta(_)
| EventMsg::PatchApplyBegin(_)
```
## Review Comments
### codex-rs/core/Cargo.toml
- Created: 2025-08-01 16:35:11 UTC | Link: https://github.com/openai/codex/pull/1770#discussion_r2248403984
```diff
@@ -51,6 +51,7 @@ tree-sitter-bash = "0.25.0"
uuid = { version = "1", features = ["serde", "v4"] }
whoami = "1.6.0"
wildmatch = "2.4.0"
+tempfile = "3"
```
> @pakrym-oai alpha sort is from saving in the editor, right?
- Created: 2025-08-04 02:42:24 UTC | Link: https://github.com/openai/codex/pull/1770#discussion_r2250289252
```diff
@@ -34,6 +34,7 @@ serde_json = "1"
serde_bytes = "0.11"
sha1 = "0.10.6"
shlex = "1.3.0"
+similar = "2"
```
> Maybe we should match https://github.com/openai/codex/blob/e3565a3f438c30c9d36412d2817346c7accd487c/codex-rs/apply-patch/Cargo.toml#L15 (or change that one to be `"2"`?)
### codex-rs/core/src/codex.rs
- Created: 2025-08-01 16:48:22 UTC | Link: https://github.com/openai/codex/pull/1770#discussion_r2248425776
```diff
@@ -1471,6 +1528,7 @@ fn maybe_run_with_user_profile(params: ExecParams, sess: &Session) -> ExecParams
async fn handle_container_exec_with_params(
params: ExecParams,
sess: &Session,
+ turn_diff_tracker: &mut TurnDiffTracker,
```
> Hmm, what would happen if we wanted to support parallel tool calls at one point. This would be a problem, no?
- Created: 2025-08-01 17:25:22 UTC | Link: https://github.com/openai/codex/pull/1770#discussion_r2248497145
```diff
@@ -1471,6 +1528,7 @@ fn maybe_run_with_user_profile(params: ExecParams, sess: &Session) -> ExecParams
async fn handle_container_exec_with_params(
params: ExecParams,
sess: &Session,
+ turn_diff_tracker: &mut TurnDiffTracker,
```
> Because only one tool call could take ownership of TurnDiffTracker.
- Created: 2025-08-04 02:46:36 UTC | Link: https://github.com/openai/codex/pull/1770#discussion_r2250292735
```diff
@@ -374,11 +380,15 @@ impl Session {
Some(ApplyPatchCommandContext {
user_explicitly_approved_this_action,
changes,
- }) => EventMsg::PatchApplyBegin(PatchApplyBeginEvent {
- call_id,
- auto_approved: !user_explicitly_approved_this_action,
- changes,
- }),
+ }) => {
+ let _ = turn_diff_tracker.on_patch_begin(&changes);
```
> If this doesn't have to return `Result`, then `let _` can go away, of course, but depending on what sort of `Err` we expect, perhaps we should at least `warn!()` or `error!()`?
- Created: 2025-08-04 02:47:22 UTC | Link: https://github.com/openai/codex/pull/1770#discussion_r2250293562
```diff
@@ -392,8 +402,10 @@ impl Session {
let _ = self.tx_event.send(event).await;
}
- async fn notify_exec_command_end(
+ #[allow(clippy::too_many_arguments)]
```
> We should maybe introduce a struct in a follow-up PR.
- Created: 2025-08-04 02:49:08 UTC | Link: https://github.com/openai/codex/pull/1770#discussion_r2250294909
```diff
@@ -1163,6 +1193,7 @@ async fn run_task(sess: Arc<Session>, sub_id: String, input: Vec<InputItem>) {
async fn run_turn(
sess: &Session,
+ turn_diff_tracker: &mut TurnDiffTracker,
```
> We'll probably want a `struct TurnContext` or somesuch in the near future.
- Created: 2025-08-04 02:51:47 UTC | Link: https://github.com/openai/codex/pull/1770#discussion_r2250296938
```diff
@@ -1471,6 +1528,7 @@ fn maybe_run_with_user_profile(params: ExecParams, sess: &Session) -> ExecParams
async fn handle_container_exec_with_params(
params: ExecParams,
sess: &Session,
+ turn_diff_tracker: &mut TurnDiffTracker,
```
> Yes, though also, if we introduce a `struct TurnContext` as mentioned above, that may also force the move to `Mutex`. But yes, does not have to be done in this PR.
- Created: 2025-08-04 02:55:26 UTC | Link: https://github.com/openai/codex/pull/1770#discussion_r2250299871
```diff
@@ -1328,6 +1361,16 @@ async fn try_run_turn(
.ok();
}
+ let unified_diff = turn_diff_tracker.get_unified_diff();
+ if let Ok(Some(unified_diff)) = unified_diff {
+ let msg = EventMsg::TurnDiff(TurnDiffEvent { unified_diff });
+ let event = Event {
+ id: sub_id.to_string(),
+ msg,
+ };
+ let _ = sess.tx_event.send(event).await;
+ }
+
```
> I'm starting to think that we should do `break token_usage;` to get out of the loop and then do all of this post-loop stuff below just in case there ever ends up being another way to break out.
>
> It would also eliminate this `return` statement buried in here (though admittedly it would bury the `break` statement instead).
### codex-rs/core/src/protocol.rs
- Created: 2025-08-01 16:41:09 UTC | Link: https://github.com/openai/codex/pull/1770#discussion_r2248413365
```diff
@@ -525,6 +527,11 @@ pub struct PatchApplyEndEvent {
pub success: bool,
}
+#[derive(Debug, Clone, Deserialize, Serialize)]
+pub struct TurnDiffEvent {
+ pub unified_diff: String,
```
> I feel like this would be easier to work with programmatically if this were keyed by path, more like `changes` in `PatchApplyBeginEvent`. Maybe for a full add or a full delete for an individual file, we still want the unified diff, but it's nice to have added/modified/removed metadata for each path so it's easy to build a compact summary for the diff (maybe with +/- line counts)?
- Created: 2025-08-01 16:49:48 UTC | Link: https://github.com/openai/codex/pull/1770#discussion_r2248429044
```diff
@@ -525,6 +527,11 @@ pub struct PatchApplyEndEvent {
pub success: bool,
}
+#[derive(Debug, Clone, Deserialize, Serialize)]
+pub struct TurnDiffEvent {
+ pub unified_diff: String,
```
> What guarantees, if any, can we make about the paths in the `unified_diff`: will they all be absolute paths?
### codex-rs/core/src/turn_diff_tracker.rs
- Created: 2025-08-01 16:54:37 UTC | Link: https://github.com/openai/codex/pull/1770#discussion_r2248437475
```diff
@@ -0,0 +1,476 @@
+use std::collections::HashMap;
+use std::fs;
+use std::path::Path;
+use std::path::PathBuf;
+use std::process::Command;
+
+use anyhow::Context;
+use anyhow::Result;
+use tempfile::TempDir;
+use uuid::Uuid;
+
+use crate::protocol::FileChange;
+
+/// Tracks sets of changes to files and exposes the overall unified diff.
+/// Internally, the way this works is now:
+/// 1. Create a temp directory to store baseline snapshots of files when they are first seen.
+/// 2. When a path is first observed, copy its current contents into the baseline dir if it exists on disk.
+/// For new additions, do not create a baseline file so that diffs are shown as proper additions (using /dev/null).
+/// 3. Keep a stable internal filename (uuid + same extension) per external path for path rewrite in diffs.
+/// 4. To compute the aggregated unified diff, compare each baseline snapshot to the current file on disk using
+/// `git diff --no-index` and rewrite paths to external paths.
+#[derive(Default)]
+pub struct TurnDiffTracker {
+ /// Temp directory holding baseline snapshots of files as first seen.
+ baseline_files_dir: Option<TempDir>,
+ /// Map external path -> internal filename (uuid + same extension).
+ external_to_temp_name: HashMap<PathBuf, String>,
+ /// Internal filename -> external path as of baseline snapshot.
+ temp_name_to_baseline_external: HashMap<String, PathBuf>,
+ /// Internal filename -> external path as of current accumulated state (after applying all changes).
+ /// This is where renames are tracked.
+ temp_name_to_current_external: HashMap<String, PathBuf>,
+ /// Aggregated unified diff for all accumulated changes across files.
+ pub unified_diff: Option<String>,
```
> I'm surprised to see this as a field as opposed to always derived?
- Created: 2025-08-01 16:57:12 UTC | Link: https://github.com/openai/codex/pull/1770#discussion_r2248442328
```diff
@@ -0,0 +1,476 @@
+use std::collections::HashMap;
+use std::fs;
+use std::path::Path;
+use std::path::PathBuf;
+use std::process::Command;
+
+use anyhow::Context;
+use anyhow::Result;
+use tempfile::TempDir;
+use uuid::Uuid;
+
+use crate::protocol::FileChange;
+
+/// Tracks sets of changes to files and exposes the overall unified diff.
+/// Internally, the way this works is now:
+/// 1. Create a temp directory to store baseline snapshots of files when they are first seen.
+/// 2. When a path is first observed, copy its current contents into the baseline dir if it exists on disk.
+/// For new additions, do not create a baseline file so that diffs are shown as proper additions (using /dev/null).
+/// 3. Keep a stable internal filename (uuid + same extension) per external path for path rewrite in diffs.
+/// 4. To compute the aggregated unified diff, compare each baseline snapshot to the current file on disk using
+/// `git diff --no-index` and rewrite paths to external paths.
+#[derive(Default)]
+pub struct TurnDiffTracker {
+ /// Temp directory holding baseline snapshots of files as first seen.
+ baseline_files_dir: Option<TempDir>,
+ /// Map external path -> internal filename (uuid + same extension).
+ external_to_temp_name: HashMap<PathBuf, String>,
+ /// Internal filename -> external path as of baseline snapshot.
+ temp_name_to_baseline_external: HashMap<String, PathBuf>,
+ /// Internal filename -> external path as of current accumulated state (after applying all changes).
+ /// This is where renames are tracked.
+ temp_name_to_current_external: HashMap<String, PathBuf>,
+ /// Aggregated unified diff for all accumulated changes across files.
+ pub unified_diff: Option<String>,
+}
+
+impl TurnDiffTracker {
+ pub fn new() -> Self {
+ Self::default()
+ }
+
+ /// Front-run apply patch calls to track the starting contents of any modified files.
+ /// - Creates a baseline snapshot for files that already exist on disk when first seen.
+ /// - For additions, we intentionally do not create a baseline snapshot so that diffs are proper additions.
+ /// - Also updates internal mappings for move/rename events.
+ pub fn on_patch_begin(&mut self, changes: &HashMap<PathBuf, FileChange>) -> Result<()> {
+ self.ensure_baseline_dir()?;
+ let baseline_dir = self.baseline_dir()?.to_path_buf();
+
+ for (path, change) in changes.iter() {
+ // Ensure a stable internal filename exists for this external path.
+ if !self.external_to_temp_name.contains_key(path) {
+ let internal = uuid_filename_for(path);
+ self.external_to_temp_name
+ .insert(path.clone(), internal.clone());
+ self.temp_name_to_baseline_external
+ .insert(internal.clone(), path.clone());
+ self.temp_name_to_current_external
+ .insert(internal.clone(), path.clone());
+
+ // If the file exists on disk now, snapshot as baseline; else leave missing to represent /dev/null.
+ if path.exists() {
+ let contents = fs::read(path)
+ .with_context(|| format!("failed to read original {}", path.display()))?;
+ let internal_path = baseline_dir.join(&internal);
+ fs::write(&internal_path, contents).with_context(|| {
+ format!("failed to write baseline file {}", internal_path.display())
+ })?;
+ }
+ }
+
+ // Track rename/move in current mapping if provided in an Update.
+ let move_path = match change {
+ FileChange::Update {
+ move_path: Some(dest),
+ ..
+ } => Some(dest),
+ _ => None,
+ };
+ if let Some(dest) = move_path {
+ let uuid_filename = match self.external_to_temp_name.get(path) {
+ Some(i) => i.clone(),
+ None => {
+ // This should be rare, but if we haven't mapped the source, create it with no baseline.
+ let i = uuid_filename_for(path);
+ self.external_to_temp_name.insert(path.clone(), i.clone());
+ self.temp_name_to_baseline_external
+ .insert(i.clone(), path.clone());
+ i
+ }
+ };
+ // Update current external mapping for temp file name.
+ self.temp_name_to_current_external
+ .insert(uuid_filename.clone(), dest.clone());
+ // Update forward file_mapping: external current -> internal name.
+ self.external_to_temp_name.remove(path);
+ self.external_to_temp_name
+ .insert(dest.clone(), uuid_filename);
+ }
+ }
+
+ Ok(())
+ }
+
+ /// Recompute the aggregated unified diff by comparing all baseline snapshots against
+ /// current files on disk using `git diff --no-index` and rewriting paths to external paths.
+ pub fn update_and_get_unified_diff(&mut self) -> Result<Option<String>> {
+ let baseline_dir = self.baseline_dir()?.to_path_buf();
+ let current_dir = baseline_dir.join("current");
+ if current_dir.exists() {
+ // Best-effort cleanup of previous run's mirror.
+ let _ = fs::remove_dir_all(&current_dir);
+ }
+ fs::create_dir_all(&current_dir).with_context(|| {
+ format!(
+ "failed to create current mirror dir {}",
+ current_dir.display()
+ )
+ })?;
+
+ let mut aggregated = String::new();
+
+ // Compute diffs per tracked internal file.
+ for (internal, baseline_external) in &self.temp_name_to_baseline_external {
+ let baseline_path = baseline_dir.join(internal);
+ let current_external = self
+ .temp_name_to_current_external
+ .get(internal)
+ .cloned()
+ .unwrap_or_else(|| baseline_external.clone());
+
+ let left_is_dev_null = !baseline_path.exists();
+ let right_exists = current_external.exists();
+
+ // Prepare right side mirror file if exists; otherwise use /dev/null for deletions.
+ let right_arg = if right_exists {
+ let mirror_path = current_dir.join(internal);
+ let contents = fs::read(&current_external).with_context(|| {
+ format!(
+ "failed to read current file for diff {}",
+ current_external.display()
+ )
+ })?;
+ fs::write(&mirror_path, contents).with_context(|| {
+ format!(
+ "failed to write current mirror file {}",
+ mirror_path.display()
+ )
+ })?;
+ // Use relative path from baseline_dir (so headers say a/<uuid> b/current/<uuid>).
+ format!("current/{internal}")
+ } else {
+ // Deletion: right side is /dev/null to show proper deleted file diff.
+ "/dev/null".to_string()
+ };
+
+ // Prepare left arg: baseline file path or /dev/null for additions.
+ let left_arg = if left_is_dev_null {
+ "/dev/null".to_string()
+ } else {
+ internal.clone()
+ };
+
+ // Run git diff --no-index from baseline_dir to keep paths predictable.
+ let raw = run_git_allow_exit_codes(
+ &baseline_dir,
+ &[
+ "-c",
+ "color.ui=false",
+ "diff",
+ "--no-color",
+ "--no-index",
+ "--",
+ &left_arg,
+ &right_arg,
+ ],
+ &[0, 1], // 0: no changes, 1: differences
+ )?;
+
+ if raw.trim().is_empty() {
+ continue;
+ }
+ let rewritten = self.rewrite_diff_paths(&raw);
+ if !rewritten.trim().is_empty() {
+ if !aggregated.is_empty() && !aggregated.ends_with('\n') {
+ aggregated.push('\n');
+ }
+ aggregated.push_str(&rewritten);
+ }
+ }
+
+ self.unified_diff = if aggregated.trim().is_empty() {
+ None
+ } else {
+ Some(aggregated)
+ };
+
+ // Clean up the current dir.
+ let _ = fs::remove_dir_all(&current_dir);
+
+ Ok(self.unified_diff.clone())
+ }
+
+ fn baseline_dir(&self) -> Result<&Path> {
+ self.baseline_files_dir
+ .as_ref()
+ .map(|d| d.path())
+ .ok_or_else(|| anyhow::anyhow!("baseline temp dir not initialized"))
+ }
+
+ fn ensure_baseline_dir(&mut self) -> Result<()> {
+ if self.baseline_files_dir.is_some() {
+ return Ok(());
+ }
+ let tmp = TempDir::new().context("create baseline temp dir")?;
+ self.baseline_files_dir = Some(tmp);
+ Ok(())
+ }
+
+ /// Rewrites the internal filenames to external paths in diff headers.
+ /// Handles inputs like:
+ /// diff --git a/<uuid> b/current/<uuid>
+ /// --- a/<uuid> | /dev/null
+ /// +++ b/current/<uuid> | /dev/null
+ /// and replaces uuid with the external paths tracking baseline/current.
+ fn rewrite_diff_paths(&self, diff: &str) -> String {
+ let mut out = String::new();
+ for line in diff.lines() {
+ if let Some(rest) = line.strip_prefix("diff --git ") {
+ // Format: diff --git a/<f> b/<f>
+ let parts: Vec<&str> = rest.split_whitespace().collect();
+ if parts.len() == 2 {
+ let a = parts[0].strip_prefix("a/").unwrap_or(parts[0]);
+ let b = parts[1].strip_prefix("b/").unwrap_or(parts[1]);
+
+ let a_ext_display = if a == "/dev/null" {
+ "/dev/null".to_string()
+ } else {
+ let a_base = Path::new(a)
+ .file_name()
+ .and_then(|s| s.to_str())
+ .unwrap_or(a);
+ let mapped = self
+ .temp_name_to_baseline_external
+ .get(a_base)
+ .cloned()
+ .unwrap_or_else(|| PathBuf::from(a));
+ mapped.display().to_string()
+ };
+
+ let b_ext_display = if b == "/dev/null" {
+ "/dev/null".to_string()
+ } else {
+ let b_base = Path::new(b)
+ .file_name()
+ .and_then(|s| s.to_str())
+ .unwrap_or(b);
+ let mapped = self
+ .temp_name_to_current_external
+ .get(b_base)
+ .cloned()
+ .unwrap_or_else(|| PathBuf::from(b));
+ mapped.display().to_string()
+ };
+
+ out.push_str(&format!("diff --git a/{a_ext_display} b/{b_ext_display}\n"));
+ continue;
+ }
+ }
+ if let Some(rest) = line.strip_prefix("--- ") {
+ if let Some(path) = rest.strip_prefix("a/") {
+ let external_display = if path == "/dev/null" {
+ "/dev/null".to_string()
+ } else {
+ let p_base = Path::new(path)
+ .file_name()
+ .and_then(|s| s.to_str())
+ .unwrap_or(path);
+ self.temp_name_to_baseline_external
+ .get(p_base)
+ .cloned()
+ .unwrap_or_else(|| PathBuf::from(path))
+ .display()
+ .to_string()
+ };
+ out.push_str(&format!("--- {external_display}\n"));
+ continue;
+ }
+ }
+ if let Some(rest) = line.strip_prefix("+++ ") {
+ if let Some(path) = rest.strip_prefix("b/") {
+ let external_display = if path == "/dev/null" {
+ "/dev/null".to_string()
+ } else {
+ let p_base = Path::new(path)
+ .file_name()
+ .and_then(|s| s.to_str())
+ .unwrap_or(path);
+ self.temp_name_to_current_external
+ .get(p_base)
+ .cloned()
+ .unwrap_or_else(|| PathBuf::from(path))
+ .display()
+ .to_string()
+ };
+ out.push_str(&format!("+++ {external_display}\n"));
+ continue;
+ }
+ }
+ out.push_str(line);
+ out.push('\n');
+ }
+ out
+ }
+}
+
+fn uuid_filename_for(path: &Path) -> String {
+ let id = Uuid::new_v4().to_string();
+ match path.extension().and_then(|e| e.to_str()) {
+ Some(ext) if !ext.is_empty() => format!("{id}.{ext}"),
+ _ => id,
```
> ```suggestion
> None => id,
> ```
- Created: 2025-08-01 16:59:40 UTC | Link: https://github.com/openai/codex/pull/1770#discussion_r2248446717
```diff
@@ -0,0 +1,476 @@
+use std::collections::HashMap;
+use std::fs;
+use std::path::Path;
+use std::path::PathBuf;
+use std::process::Command;
+
+use anyhow::Context;
+use anyhow::Result;
+use tempfile::TempDir;
+use uuid::Uuid;
+
+use crate::protocol::FileChange;
+
+/// Tracks sets of changes to files and exposes the overall unified diff.
+/// Internally, the way this works is now:
+/// 1. Create a temp directory to store baseline snapshots of files when they are first seen.
+/// 2. When a path is first observed, copy its current contents into the baseline dir if it exists on disk.
+/// For new additions, do not create a baseline file so that diffs are shown as proper additions (using /dev/null).
+/// 3. Keep a stable internal filename (uuid + same extension) per external path for path rewrite in diffs.
+/// 4. To compute the aggregated unified diff, compare each baseline snapshot to the current file on disk using
+/// `git diff --no-index` and rewrite paths to external paths.
+#[derive(Default)]
+pub struct TurnDiffTracker {
+ /// Temp directory holding baseline snapshots of files as first seen.
+ baseline_files_dir: Option<TempDir>,
+ /// Map external path -> internal filename (uuid + same extension).
+ external_to_temp_name: HashMap<PathBuf, String>,
+ /// Internal filename -> external path as of baseline snapshot.
+ temp_name_to_baseline_external: HashMap<String, PathBuf>,
+ /// Internal filename -> external path as of current accumulated state (after applying all changes).
+ /// This is where renames are tracked.
+ temp_name_to_current_external: HashMap<String, PathBuf>,
+ /// Aggregated unified diff for all accumulated changes across files.
+ pub unified_diff: Option<String>,
+}
+
+impl TurnDiffTracker {
+ pub fn new() -> Self {
+ Self::default()
+ }
+
+ /// Front-run apply patch calls to track the starting contents of any modified files.
+ /// - Creates a baseline snapshot for files that already exist on disk when first seen.
+ /// - For additions, we intentionally do not create a baseline snapshot so that diffs are proper additions.
+ /// - Also updates internal mappings for move/rename events.
+ pub fn on_patch_begin(&mut self, changes: &HashMap<PathBuf, FileChange>) -> Result<()> {
+ self.ensure_baseline_dir()?;
+ let baseline_dir = self.baseline_dir()?.to_path_buf();
+
+ for (path, change) in changes.iter() {
+ // Ensure a stable internal filename exists for this external path.
+ if !self.external_to_temp_name.contains_key(path) {
+ let internal = uuid_filename_for(path);
+ self.external_to_temp_name
+ .insert(path.clone(), internal.clone());
+ self.temp_name_to_baseline_external
+ .insert(internal.clone(), path.clone());
+ self.temp_name_to_current_external
+ .insert(internal.clone(), path.clone());
+
+ // If the file exists on disk now, snapshot as baseline; else leave missing to represent /dev/null.
+ if path.exists() {
+ let contents = fs::read(path)
+ .with_context(|| format!("failed to read original {}", path.display()))?;
+ let internal_path = baseline_dir.join(&internal);
+ fs::write(&internal_path, contents).with_context(|| {
+ format!("failed to write baseline file {}", internal_path.display())
+ })?;
+ }
+ }
+
+ // Track rename/move in current mapping if provided in an Update.
+ let move_path = match change {
+ FileChange::Update {
+ move_path: Some(dest),
+ ..
+ } => Some(dest),
+ _ => None,
+ };
+ if let Some(dest) = move_path {
+ let uuid_filename = match self.external_to_temp_name.get(path) {
+ Some(i) => i.clone(),
+ None => {
+ // This should be rare, but if we haven't mapped the source, create it with no baseline.
+ let i = uuid_filename_for(path);
+ self.external_to_temp_name.insert(path.clone(), i.clone());
+ self.temp_name_to_baseline_external
+ .insert(i.clone(), path.clone());
+ i
+ }
+ };
+ // Update current external mapping for temp file name.
+ self.temp_name_to_current_external
+ .insert(uuid_filename.clone(), dest.clone());
+ // Update forward file_mapping: external current -> internal name.
+ self.external_to_temp_name.remove(path);
+ self.external_to_temp_name
+ .insert(dest.clone(), uuid_filename);
+ }
+ }
+
+ Ok(())
+ }
+
+ /// Recompute the aggregated unified diff by comparing all baseline snapshots against
+ /// current files on disk using `git diff --no-index` and rewriting paths to external paths.
+ pub fn update_and_get_unified_diff(&mut self) -> Result<Option<String>> {
+ let baseline_dir = self.baseline_dir()?.to_path_buf();
+ let current_dir = baseline_dir.join("current");
+ if current_dir.exists() {
+ // Best-effort cleanup of previous run's mirror.
+ let _ = fs::remove_dir_all(&current_dir);
+ }
+ fs::create_dir_all(&current_dir).with_context(|| {
+ format!(
+ "failed to create current mirror dir {}",
+ current_dir.display()
+ )
+ })?;
+
+ let mut aggregated = String::new();
+
+ // Compute diffs per tracked internal file.
+ for (internal, baseline_external) in &self.temp_name_to_baseline_external {
+ let baseline_path = baseline_dir.join(internal);
+ let current_external = self
+ .temp_name_to_current_external
+ .get(internal)
+ .cloned()
+ .unwrap_or_else(|| baseline_external.clone());
+
+ let left_is_dev_null = !baseline_path.exists();
+ let right_exists = current_external.exists();
+
+ // Prepare right side mirror file if exists; otherwise use /dev/null for deletions.
+ let right_arg = if right_exists {
+ let mirror_path = current_dir.join(internal);
+ let contents = fs::read(&current_external).with_context(|| {
+ format!(
+ "failed to read current file for diff {}",
+ current_external.display()
+ )
+ })?;
+ fs::write(&mirror_path, contents).with_context(|| {
+ format!(
+ "failed to write current mirror file {}",
+ mirror_path.display()
+ )
+ })?;
+ // Use relative path from baseline_dir (so headers say a/<uuid> b/current/<uuid>).
+ format!("current/{internal}")
+ } else {
+ // Deletion: right side is /dev/null to show proper deleted file diff.
+ "/dev/null".to_string()
+ };
+
+ // Prepare left arg: baseline file path or /dev/null for additions.
+ let left_arg = if left_is_dev_null {
+ "/dev/null".to_string()
+ } else {
+ internal.clone()
+ };
+
+ // Run git diff --no-index from baseline_dir to keep paths predictable.
+ let raw = run_git_allow_exit_codes(
+ &baseline_dir,
+ &[
+ "-c",
+ "color.ui=false",
+ "diff",
+ "--no-color",
+ "--no-index",
+ "--",
+ &left_arg,
+ &right_arg,
+ ],
+ &[0, 1], // 0: no changes, 1: differences
+ )?;
+
+ if raw.trim().is_empty() {
+ continue;
+ }
+ let rewritten = self.rewrite_diff_paths(&raw);
+ if !rewritten.trim().is_empty() {
+ if !aggregated.is_empty() && !aggregated.ends_with('\n') {
+ aggregated.push('\n');
+ }
+ aggregated.push_str(&rewritten);
+ }
+ }
+
+ self.unified_diff = if aggregated.trim().is_empty() {
+ None
+ } else {
+ Some(aggregated)
+ };
+
+ // Clean up the current dir.
+ let _ = fs::remove_dir_all(&current_dir);
+
+ Ok(self.unified_diff.clone())
+ }
+
+ fn baseline_dir(&self) -> Result<&Path> {
+ self.baseline_files_dir
+ .as_ref()
+ .map(|d| d.path())
+ .ok_or_else(|| anyhow::anyhow!("baseline temp dir not initialized"))
+ }
+
+ fn ensure_baseline_dir(&mut self) -> Result<()> {
+ if self.baseline_files_dir.is_some() {
+ return Ok(());
+ }
+ let tmp = TempDir::new().context("create baseline temp dir")?;
+ self.baseline_files_dir = Some(tmp);
+ Ok(())
+ }
+
+ /// Rewrites the internal filenames to external paths in diff headers.
+ /// Handles inputs like:
+ /// diff --git a/<uuid> b/current/<uuid>
+ /// --- a/<uuid> | /dev/null
+ /// +++ b/current/<uuid> | /dev/null
+ /// and replaces uuid with the external paths tracking baseline/current.
+ fn rewrite_diff_paths(&self, diff: &str) -> String {
+ let mut out = String::new();
+ for line in diff.lines() {
+ if let Some(rest) = line.strip_prefix("diff --git ") {
+ // Format: diff --git a/<f> b/<f>
+ let parts: Vec<&str> = rest.split_whitespace().collect();
+ if parts.len() == 2 {
+ let a = parts[0].strip_prefix("a/").unwrap_or(parts[0]);
+ let b = parts[1].strip_prefix("b/").unwrap_or(parts[1]);
+
+ let a_ext_display = if a == "/dev/null" {
+ "/dev/null".to_string()
+ } else {
+ let a_base = Path::new(a)
+ .file_name()
+ .and_then(|s| s.to_str())
+ .unwrap_or(a);
+ let mapped = self
+ .temp_name_to_baseline_external
+ .get(a_base)
+ .cloned()
+ .unwrap_or_else(|| PathBuf::from(a));
+ mapped.display().to_string()
+ };
+
+ let b_ext_display = if b == "/dev/null" {
+ "/dev/null".to_string()
+ } else {
+ let b_base = Path::new(b)
+ .file_name()
+ .and_then(|s| s.to_str())
+ .unwrap_or(b);
+ let mapped = self
+ .temp_name_to_current_external
+ .get(b_base)
+ .cloned()
+ .unwrap_or_else(|| PathBuf::from(b));
+ mapped.display().to_string()
+ };
+
+ out.push_str(&format!("diff --git a/{a_ext_display} b/{b_ext_display}\n"));
+ continue;
+ }
+ }
+ if let Some(rest) = line.strip_prefix("--- ") {
+ if let Some(path) = rest.strip_prefix("a/") {
+ let external_display = if path == "/dev/null" {
+ "/dev/null".to_string()
+ } else {
+ let p_base = Path::new(path)
+ .file_name()
+ .and_then(|s| s.to_str())
+ .unwrap_or(path);
+ self.temp_name_to_baseline_external
+ .get(p_base)
+ .cloned()
+ .unwrap_or_else(|| PathBuf::from(path))
+ .display()
+ .to_string()
+ };
+ out.push_str(&format!("--- {external_display}\n"));
+ continue;
+ }
+ }
+ if let Some(rest) = line.strip_prefix("+++ ") {
+ if let Some(path) = rest.strip_prefix("b/") {
+ let external_display = if path == "/dev/null" {
+ "/dev/null".to_string()
+ } else {
+ let p_base = Path::new(path)
+ .file_name()
+ .and_then(|s| s.to_str())
+ .unwrap_or(path);
+ self.temp_name_to_current_external
+ .get(p_base)
+ .cloned()
+ .unwrap_or_else(|| PathBuf::from(path))
+ .display()
+ .to_string()
+ };
+ out.push_str(&format!("+++ {external_display}\n"));
+ continue;
+ }
+ }
+ out.push_str(line);
+ out.push('\n');
+ }
+ out
+ }
+}
+
+fn uuid_filename_for(path: &Path) -> String {
+ let id = Uuid::new_v4().to_string();
+ match path.extension().and_then(|e| e.to_str()) {
+ Some(ext) if !ext.is_empty() => format!("{id}.{ext}"),
+ _ => id,
+ }
+}
+
+fn run_git_allow_exit_codes(
+ repo: &Path,
+ args: &[&str],
+ allowed_exit_codes: &[i32],
+) -> Result<String> {
+ let output = Command::new("git")
+ .current_dir(repo)
+ .args(args)
+ .output()
+ .with_context(|| format!("failed to run git {:?} in {}", args, repo.display()))?;
```
> ```suggestion
> .with_context(|| format!("failed to run `git {args:?}` in {repo}"))?;
> ```
- Created: 2025-08-01 17:08:30 UTC | Link: https://github.com/openai/codex/pull/1770#discussion_r2248464099
```diff
@@ -0,0 +1,476 @@
+use std::collections::HashMap;
+use std::fs;
+use std::path::Path;
+use std::path::PathBuf;
+use std::process::Command;
+
+use anyhow::Context;
+use anyhow::Result;
+use tempfile::TempDir;
+use uuid::Uuid;
+
+use crate::protocol::FileChange;
+
+/// Tracks sets of changes to files and exposes the overall unified diff.
+/// Internally, the way this works is now:
+/// 1. Create a temp directory to store baseline snapshots of files when they are first seen.
+/// 2. When a path is first observed, copy its current contents into the baseline dir if it exists on disk.
+/// For new additions, do not create a baseline file so that diffs are shown as proper additions (using /dev/null).
+/// 3. Keep a stable internal filename (uuid + same extension) per external path for path rewrite in diffs.
+/// 4. To compute the aggregated unified diff, compare each baseline snapshot to the current file on disk using
+/// `git diff --no-index` and rewrite paths to external paths.
+#[derive(Default)]
+pub struct TurnDiffTracker {
+ /// Temp directory holding baseline snapshots of files as first seen.
+ baseline_files_dir: Option<TempDir>,
+ /// Map external path -> internal filename (uuid + same extension).
+ external_to_temp_name: HashMap<PathBuf, String>,
+ /// Internal filename -> external path as of baseline snapshot.
+ temp_name_to_baseline_external: HashMap<String, PathBuf>,
+ /// Internal filename -> external path as of current accumulated state (after applying all changes).
+ /// This is where renames are tracked.
+ temp_name_to_current_external: HashMap<String, PathBuf>,
+ /// Aggregated unified diff for all accumulated changes across files.
+ pub unified_diff: Option<String>,
+}
+
+impl TurnDiffTracker {
+ pub fn new() -> Self {
+ Self::default()
+ }
+
+ /// Front-run apply patch calls to track the starting contents of any modified files.
+ /// - Creates a baseline snapshot for files that already exist on disk when first seen.
+ /// - For additions, we intentionally do not create a baseline snapshot so that diffs are proper additions.
+ /// - Also updates internal mappings for move/rename events.
+ pub fn on_patch_begin(&mut self, changes: &HashMap<PathBuf, FileChange>) -> Result<()> {
+ self.ensure_baseline_dir()?;
+ let baseline_dir = self.baseline_dir()?.to_path_buf();
+
+ for (path, change) in changes.iter() {
+ // Ensure a stable internal filename exists for this external path.
+ if !self.external_to_temp_name.contains_key(path) {
+ let internal = uuid_filename_for(path);
+ self.external_to_temp_name
+ .insert(path.clone(), internal.clone());
+ self.temp_name_to_baseline_external
+ .insert(internal.clone(), path.clone());
+ self.temp_name_to_current_external
+ .insert(internal.clone(), path.clone());
+
+ // If the file exists on disk now, snapshot as baseline; else leave missing to represent /dev/null.
+ if path.exists() {
+ let contents = fs::read(path)
+ .with_context(|| format!("failed to read original {}", path.display()))?;
+ let internal_path = baseline_dir.join(&internal);
+ fs::write(&internal_path, contents).with_context(|| {
+ format!("failed to write baseline file {}", internal_path.display())
+ })?;
+ }
+ }
+
+ // Track rename/move in current mapping if provided in an Update.
+ let move_path = match change {
+ FileChange::Update {
+ move_path: Some(dest),
+ ..
+ } => Some(dest),
+ _ => None,
+ };
+ if let Some(dest) = move_path {
+ let uuid_filename = match self.external_to_temp_name.get(path) {
+ Some(i) => i.clone(),
+ None => {
+ // This should be rare, but if we haven't mapped the source, create it with no baseline.
+ let i = uuid_filename_for(path);
+ self.external_to_temp_name.insert(path.clone(), i.clone());
+ self.temp_name_to_baseline_external
+ .insert(i.clone(), path.clone());
+ i
+ }
+ };
+ // Update current external mapping for temp file name.
+ self.temp_name_to_current_external
+ .insert(uuid_filename.clone(), dest.clone());
+ // Update forward file_mapping: external current -> internal name.
+ self.external_to_temp_name.remove(path);
+ self.external_to_temp_name
+ .insert(dest.clone(), uuid_filename);
+ }
+ }
+
+ Ok(())
+ }
+
+ /// Recompute the aggregated unified diff by comparing all baseline snapshots against
+ /// current files on disk using `git diff --no-index` and rewriting paths to external paths.
+ pub fn update_and_get_unified_diff(&mut self) -> Result<Option<String>> {
+ let baseline_dir = self.baseline_dir()?.to_path_buf();
+ let current_dir = baseline_dir.join("current");
+ if current_dir.exists() {
+ // Best-effort cleanup of previous run's mirror.
+ let _ = fs::remove_dir_all(&current_dir);
+ }
+ fs::create_dir_all(&current_dir).with_context(|| {
+ format!(
+ "failed to create current mirror dir {}",
+ current_dir.display()
+ )
+ })?;
+
+ let mut aggregated = String::new();
+
+ // Compute diffs per tracked internal file.
+ for (internal, baseline_external) in &self.temp_name_to_baseline_external {
+ let baseline_path = baseline_dir.join(internal);
+ let current_external = self
+ .temp_name_to_current_external
+ .get(internal)
+ .cloned()
+ .unwrap_or_else(|| baseline_external.clone());
+
+ let left_is_dev_null = !baseline_path.exists();
+ let right_exists = current_external.exists();
+
+ // Prepare right side mirror file if exists; otherwise use /dev/null for deletions.
+ let right_arg = if right_exists {
+ let mirror_path = current_dir.join(internal);
+ let contents = fs::read(&current_external).with_context(|| {
+ format!(
+ "failed to read current file for diff {}",
+ current_external.display()
+ )
+ })?;
+ fs::write(&mirror_path, contents).with_context(|| {
+ format!(
+ "failed to write current mirror file {}",
+ mirror_path.display()
+ )
+ })?;
+ // Use relative path from baseline_dir (so headers say a/<uuid> b/current/<uuid>).
+ format!("current/{internal}")
+ } else {
+ // Deletion: right side is /dev/null to show proper deleted file diff.
+ "/dev/null".to_string()
+ };
+
+ // Prepare left arg: baseline file path or /dev/null for additions.
+ let left_arg = if left_is_dev_null {
+ "/dev/null".to_string()
+ } else {
+ internal.clone()
+ };
+
+ // Run git diff --no-index from baseline_dir to keep paths predictable.
+ let raw = run_git_allow_exit_codes(
+ &baseline_dir,
+ &[
+ "-c",
+ "color.ui=false",
+ "diff",
+ "--no-color",
+ "--no-index",
+ "--",
+ &left_arg,
+ &right_arg,
+ ],
+ &[0, 1], // 0: no changes, 1: differences
+ )?;
+
+ if raw.trim().is_empty() {
+ continue;
+ }
+ let rewritten = self.rewrite_diff_paths(&raw);
+ if !rewritten.trim().is_empty() {
+ if !aggregated.is_empty() && !aggregated.ends_with('\n') {
+ aggregated.push('\n');
+ }
+ aggregated.push_str(&rewritten);
+ }
+ }
+
+ self.unified_diff = if aggregated.trim().is_empty() {
+ None
+ } else {
+ Some(aggregated)
+ };
+
+ // Clean up the current dir.
+ let _ = fs::remove_dir_all(&current_dir);
+
+ Ok(self.unified_diff.clone())
+ }
+
+ fn baseline_dir(&self) -> Result<&Path> {
+ self.baseline_files_dir
+ .as_ref()
+ .map(|d| d.path())
+ .ok_or_else(|| anyhow::anyhow!("baseline temp dir not initialized"))
+ }
+
+ fn ensure_baseline_dir(&mut self) -> Result<()> {
+ if self.baseline_files_dir.is_some() {
+ return Ok(());
+ }
+ let tmp = TempDir::new().context("create baseline temp dir")?;
+ self.baseline_files_dir = Some(tmp);
+ Ok(())
+ }
+
+ /// Rewrites the internal filenames to external paths in diff headers.
+ /// Handles inputs like:
+ /// diff --git a/<uuid> b/current/<uuid>
+ /// --- a/<uuid> | /dev/null
+ /// +++ b/current/<uuid> | /dev/null
+ /// and replaces uuid with the external paths tracking baseline/current.
+ fn rewrite_diff_paths(&self, diff: &str) -> String {
+ let mut out = String::new();
+ for line in diff.lines() {
+ if let Some(rest) = line.strip_prefix("diff --git ") {
```
> This gets indented pretty far, so maybe it's worth moving to a helper function that takes `(&mut String, temp_name_to_current_external, temp_name_to_baseline_external)`
- Created: 2025-08-01 17:09:15 UTC | Link: https://github.com/openai/codex/pull/1770#discussion_r2248466128
```diff
@@ -0,0 +1,476 @@
+use std::collections::HashMap;
+use std::fs;
+use std::path::Path;
+use std::path::PathBuf;
+use std::process::Command;
+
+use anyhow::Context;
+use anyhow::Result;
+use tempfile::TempDir;
+use uuid::Uuid;
+
+use crate::protocol::FileChange;
+
+/// Tracks sets of changes to files and exposes the overall unified diff.
+/// Internally, the way this works is now:
+/// 1. Create a temp directory to store baseline snapshots of files when they are first seen.
+/// 2. When a path is first observed, copy its current contents into the baseline dir if it exists on disk.
+/// For new additions, do not create a baseline file so that diffs are shown as proper additions (using /dev/null).
+/// 3. Keep a stable internal filename (uuid + same extension) per external path for path rewrite in diffs.
+/// 4. To compute the aggregated unified diff, compare each baseline snapshot to the current file on disk using
+/// `git diff --no-index` and rewrite paths to external paths.
+#[derive(Default)]
+pub struct TurnDiffTracker {
+ /// Temp directory holding baseline snapshots of files as first seen.
+ baseline_files_dir: Option<TempDir>,
+ /// Map external path -> internal filename (uuid + same extension).
+ external_to_temp_name: HashMap<PathBuf, String>,
+ /// Internal filename -> external path as of baseline snapshot.
+ temp_name_to_baseline_external: HashMap<String, PathBuf>,
+ /// Internal filename -> external path as of current accumulated state (after applying all changes).
+ /// This is where renames are tracked.
+ temp_name_to_current_external: HashMap<String, PathBuf>,
+ /// Aggregated unified diff for all accumulated changes across files.
+ pub unified_diff: Option<String>,
+}
+
+impl TurnDiffTracker {
+ pub fn new() -> Self {
+ Self::default()
+ }
+
+ /// Front-run apply patch calls to track the starting contents of any modified files.
+ /// - Creates a baseline snapshot for files that already exist on disk when first seen.
+ /// - For additions, we intentionally do not create a baseline snapshot so that diffs are proper additions.
+ /// - Also updates internal mappings for move/rename events.
+ pub fn on_patch_begin(&mut self, changes: &HashMap<PathBuf, FileChange>) -> Result<()> {
+ self.ensure_baseline_dir()?;
+ let baseline_dir = self.baseline_dir()?.to_path_buf();
+
+ for (path, change) in changes.iter() {
+ // Ensure a stable internal filename exists for this external path.
+ if !self.external_to_temp_name.contains_key(path) {
+ let internal = uuid_filename_for(path);
+ self.external_to_temp_name
+ .insert(path.clone(), internal.clone());
+ self.temp_name_to_baseline_external
+ .insert(internal.clone(), path.clone());
+ self.temp_name_to_current_external
+ .insert(internal.clone(), path.clone());
+
+ // If the file exists on disk now, snapshot as baseline; else leave missing to represent /dev/null.
+ if path.exists() {
+ let contents = fs::read(path)
+ .with_context(|| format!("failed to read original {}", path.display()))?;
+ let internal_path = baseline_dir.join(&internal);
+ fs::write(&internal_path, contents).with_context(|| {
+ format!("failed to write baseline file {}", internal_path.display())
+ })?;
+ }
+ }
+
+ // Track rename/move in current mapping if provided in an Update.
+ let move_path = match change {
+ FileChange::Update {
+ move_path: Some(dest),
+ ..
+ } => Some(dest),
+ _ => None,
+ };
+ if let Some(dest) = move_path {
+ let uuid_filename = match self.external_to_temp_name.get(path) {
+ Some(i) => i.clone(),
+ None => {
+ // This should be rare, but if we haven't mapped the source, create it with no baseline.
+ let i = uuid_filename_for(path);
+ self.external_to_temp_name.insert(path.clone(), i.clone());
+ self.temp_name_to_baseline_external
+ .insert(i.clone(), path.clone());
+ i
+ }
+ };
+ // Update current external mapping for temp file name.
+ self.temp_name_to_current_external
+ .insert(uuid_filename.clone(), dest.clone());
+ // Update forward file_mapping: external current -> internal name.
+ self.external_to_temp_name.remove(path);
+ self.external_to_temp_name
+ .insert(dest.clone(), uuid_filename);
+ }
+ }
+
+ Ok(())
+ }
+
+ /// Recompute the aggregated unified diff by comparing all baseline snapshots against
+ /// current files on disk using `git diff --no-index` and rewriting paths to external paths.
+ pub fn update_and_get_unified_diff(&mut self) -> Result<Option<String>> {
+ let baseline_dir = self.baseline_dir()?.to_path_buf();
+ let current_dir = baseline_dir.join("current");
+ if current_dir.exists() {
+ // Best-effort cleanup of previous run's mirror.
+ let _ = fs::remove_dir_all(&current_dir);
+ }
+ fs::create_dir_all(&current_dir).with_context(|| {
+ format!(
+ "failed to create current mirror dir {}",
+ current_dir.display()
+ )
+ })?;
+
+ let mut aggregated = String::new();
+
+ // Compute diffs per tracked internal file.
+ for (internal, baseline_external) in &self.temp_name_to_baseline_external {
+ let baseline_path = baseline_dir.join(internal);
+ let current_external = self
+ .temp_name_to_current_external
+ .get(internal)
+ .cloned()
+ .unwrap_or_else(|| baseline_external.clone());
+
+ let left_is_dev_null = !baseline_path.exists();
+ let right_exists = current_external.exists();
+
+ // Prepare right side mirror file if exists; otherwise use /dev/null for deletions.
+ let right_arg = if right_exists {
+ let mirror_path = current_dir.join(internal);
+ let contents = fs::read(&current_external).with_context(|| {
+ format!(
+ "failed to read current file for diff {}",
+ current_external.display()
+ )
+ })?;
+ fs::write(&mirror_path, contents).with_context(|| {
+ format!(
+ "failed to write current mirror file {}",
+ mirror_path.display()
+ )
+ })?;
+ // Use relative path from baseline_dir (so headers say a/<uuid> b/current/<uuid>).
+ format!("current/{internal}")
+ } else {
+ // Deletion: right side is /dev/null to show proper deleted file diff.
+ "/dev/null".to_string()
+ };
+
+ // Prepare left arg: baseline file path or /dev/null for additions.
+ let left_arg = if left_is_dev_null {
+ "/dev/null".to_string()
+ } else {
+ internal.clone()
+ };
+
+ // Run git diff --no-index from baseline_dir to keep paths predictable.
+ let raw = run_git_allow_exit_codes(
+ &baseline_dir,
+ &[
+ "-c",
+ "color.ui=false",
+ "diff",
+ "--no-color",
+ "--no-index",
+ "--",
+ &left_arg,
+ &right_arg,
+ ],
+ &[0, 1], // 0: no changes, 1: differences
+ )?;
+
+ if raw.trim().is_empty() {
+ continue;
+ }
+ let rewritten = self.rewrite_diff_paths(&raw);
+ if !rewritten.trim().is_empty() {
+ if !aggregated.is_empty() && !aggregated.ends_with('\n') {
+ aggregated.push('\n');
+ }
+ aggregated.push_str(&rewritten);
+ }
+ }
+
+ self.unified_diff = if aggregated.trim().is_empty() {
+ None
+ } else {
+ Some(aggregated)
+ };
+
+ // Clean up the current dir.
+ let _ = fs::remove_dir_all(&current_dir);
+
+ Ok(self.unified_diff.clone())
+ }
+
+ fn baseline_dir(&self) -> Result<&Path> {
+ self.baseline_files_dir
+ .as_ref()
+ .map(|d| d.path())
+ .ok_or_else(|| anyhow::anyhow!("baseline temp dir not initialized"))
+ }
+
+ fn ensure_baseline_dir(&mut self) -> Result<()> {
+ if self.baseline_files_dir.is_some() {
+ return Ok(());
+ }
+ let tmp = TempDir::new().context("create baseline temp dir")?;
+ self.baseline_files_dir = Some(tmp);
+ Ok(())
+ }
+
+ /// Rewrites the internal filenames to external paths in diff headers.
+ /// Handles inputs like:
+ /// diff --git a/<uuid> b/current/<uuid>
+ /// --- a/<uuid> | /dev/null
+ /// +++ b/current/<uuid> | /dev/null
+ /// and replaces uuid with the external paths tracking baseline/current.
+ fn rewrite_diff_paths(&self, diff: &str) -> String {
+ let mut out = String::new();
+ for line in diff.lines() {
+ if let Some(rest) = line.strip_prefix("diff --git ") {
+ // Format: diff --git a/<f> b/<f>
+ let parts: Vec<&str> = rest.split_whitespace().collect();
+ if parts.len() == 2 {
```
> We don't have to worry about paths with spaces because they're all UUIDs?
- Created: 2025-08-01 17:10:28 UTC | Link: https://github.com/openai/codex/pull/1770#discussion_r2248468317
```diff
@@ -0,0 +1,476 @@
+use std::collections::HashMap;
+use std::fs;
+use std::path::Path;
+use std::path::PathBuf;
+use std::process::Command;
+
+use anyhow::Context;
+use anyhow::Result;
+use tempfile::TempDir;
+use uuid::Uuid;
+
+use crate::protocol::FileChange;
+
+/// Tracks sets of changes to files and exposes the overall unified diff.
+/// Internally, the way this works is now:
+/// 1. Create a temp directory to store baseline snapshots of files when they are first seen.
+/// 2. When a path is first observed, copy its current contents into the baseline dir if it exists on disk.
+/// For new additions, do not create a baseline file so that diffs are shown as proper additions (using /dev/null).
+/// 3. Keep a stable internal filename (uuid + same extension) per external path for path rewrite in diffs.
+/// 4. To compute the aggregated unified diff, compare each baseline snapshot to the current file on disk using
+/// `git diff --no-index` and rewrite paths to external paths.
+#[derive(Default)]
+pub struct TurnDiffTracker {
+ /// Temp directory holding baseline snapshots of files as first seen.
+ baseline_files_dir: Option<TempDir>,
+ /// Map external path -> internal filename (uuid + same extension).
+ external_to_temp_name: HashMap<PathBuf, String>,
+ /// Internal filename -> external path as of baseline snapshot.
+ temp_name_to_baseline_external: HashMap<String, PathBuf>,
+ /// Internal filename -> external path as of current accumulated state (after applying all changes).
+ /// This is where renames are tracked.
+ temp_name_to_current_external: HashMap<String, PathBuf>,
+ /// Aggregated unified diff for all accumulated changes across files.
+ pub unified_diff: Option<String>,
+}
+
+impl TurnDiffTracker {
+ pub fn new() -> Self {
+ Self::default()
+ }
+
+ /// Front-run apply patch calls to track the starting contents of any modified files.
+ /// - Creates a baseline snapshot for files that already exist on disk when first seen.
+ /// - For additions, we intentionally do not create a baseline snapshot so that diffs are proper additions.
+ /// - Also updates internal mappings for move/rename events.
+ pub fn on_patch_begin(&mut self, changes: &HashMap<PathBuf, FileChange>) -> Result<()> {
+ self.ensure_baseline_dir()?;
+ let baseline_dir = self.baseline_dir()?.to_path_buf();
+
+ for (path, change) in changes.iter() {
+ // Ensure a stable internal filename exists for this external path.
+ if !self.external_to_temp_name.contains_key(path) {
+ let internal = uuid_filename_for(path);
+ self.external_to_temp_name
+ .insert(path.clone(), internal.clone());
+ self.temp_name_to_baseline_external
+ .insert(internal.clone(), path.clone());
+ self.temp_name_to_current_external
+ .insert(internal.clone(), path.clone());
+
+ // If the file exists on disk now, snapshot as baseline; else leave missing to represent /dev/null.
+ if path.exists() {
+ let contents = fs::read(path)
+ .with_context(|| format!("failed to read original {}", path.display()))?;
+ let internal_path = baseline_dir.join(&internal);
+ fs::write(&internal_path, contents).with_context(|| {
+ format!("failed to write baseline file {}", internal_path.display())
+ })?;
+ }
+ }
+
+ // Track rename/move in current mapping if provided in an Update.
+ let move_path = match change {
+ FileChange::Update {
+ move_path: Some(dest),
+ ..
+ } => Some(dest),
+ _ => None,
+ };
+ if let Some(dest) = move_path {
+ let uuid_filename = match self.external_to_temp_name.get(path) {
+ Some(i) => i.clone(),
+ None => {
+ // This should be rare, but if we haven't mapped the source, create it with no baseline.
+ let i = uuid_filename_for(path);
+ self.external_to_temp_name.insert(path.clone(), i.clone());
+ self.temp_name_to_baseline_external
+ .insert(i.clone(), path.clone());
+ i
+ }
+ };
+ // Update current external mapping for temp file name.
+ self.temp_name_to_current_external
+ .insert(uuid_filename.clone(), dest.clone());
+ // Update forward file_mapping: external current -> internal name.
+ self.external_to_temp_name.remove(path);
+ self.external_to_temp_name
+ .insert(dest.clone(), uuid_filename);
+ }
+ }
+
+ Ok(())
+ }
+
+ /// Recompute the aggregated unified diff by comparing all baseline snapshots against
+ /// current files on disk using `git diff --no-index` and rewriting paths to external paths.
+ pub fn update_and_get_unified_diff(&mut self) -> Result<Option<String>> {
+ let baseline_dir = self.baseline_dir()?.to_path_buf();
+ let current_dir = baseline_dir.join("current");
+ if current_dir.exists() {
+ // Best-effort cleanup of previous run's mirror.
+ let _ = fs::remove_dir_all(&current_dir);
+ }
+ fs::create_dir_all(&current_dir).with_context(|| {
+ format!(
+ "failed to create current mirror dir {}",
+ current_dir.display()
+ )
+ })?;
+
+ let mut aggregated = String::new();
+
+ // Compute diffs per tracked internal file.
+ for (internal, baseline_external) in &self.temp_name_to_baseline_external {
+ let baseline_path = baseline_dir.join(internal);
+ let current_external = self
+ .temp_name_to_current_external
+ .get(internal)
+ .cloned()
+ .unwrap_or_else(|| baseline_external.clone());
+
+ let left_is_dev_null = !baseline_path.exists();
+ let right_exists = current_external.exists();
+
+ // Prepare right side mirror file if exists; otherwise use /dev/null for deletions.
+ let right_arg = if right_exists {
+ let mirror_path = current_dir.join(internal);
+ let contents = fs::read(&current_external).with_context(|| {
+ format!(
+ "failed to read current file for diff {}",
+ current_external.display()
+ )
+ })?;
+ fs::write(&mirror_path, contents).with_context(|| {
+ format!(
+ "failed to write current mirror file {}",
+ mirror_path.display()
+ )
+ })?;
+ // Use relative path from baseline_dir (so headers say a/<uuid> b/current/<uuid>).
+ format!("current/{internal}")
+ } else {
+ // Deletion: right side is /dev/null to show proper deleted file diff.
+ "/dev/null".to_string()
+ };
+
+ // Prepare left arg: baseline file path or /dev/null for additions.
+ let left_arg = if left_is_dev_null {
+ "/dev/null".to_string()
+ } else {
+ internal.clone()
+ };
+
+ // Run git diff --no-index from baseline_dir to keep paths predictable.
+ let raw = run_git_allow_exit_codes(
+ &baseline_dir,
+ &[
+ "-c",
+ "color.ui=false",
+ "diff",
+ "--no-color",
+ "--no-index",
+ "--",
+ &left_arg,
+ &right_arg,
+ ],
+ &[0, 1], // 0: no changes, 1: differences
+ )?;
+
+ if raw.trim().is_empty() {
+ continue;
+ }
+ let rewritten = self.rewrite_diff_paths(&raw);
+ if !rewritten.trim().is_empty() {
+ if !aggregated.is_empty() && !aggregated.ends_with('\n') {
+ aggregated.push('\n');
+ }
+ aggregated.push_str(&rewritten);
+ }
+ }
+
+ self.unified_diff = if aggregated.trim().is_empty() {
+ None
+ } else {
+ Some(aggregated)
+ };
+
+ // Clean up the current dir.
+ let _ = fs::remove_dir_all(&current_dir);
+
+ Ok(self.unified_diff.clone())
+ }
+
+ fn baseline_dir(&self) -> Result<&Path> {
+ self.baseline_files_dir
+ .as_ref()
+ .map(|d| d.path())
+ .ok_or_else(|| anyhow::anyhow!("baseline temp dir not initialized"))
+ }
+
+ fn ensure_baseline_dir(&mut self) -> Result<()> {
+ if self.baseline_files_dir.is_some() {
+ return Ok(());
+ }
+ let tmp = TempDir::new().context("create baseline temp dir")?;
+ self.baseline_files_dir = Some(tmp);
+ Ok(())
+ }
+
+ /// Rewrites the internal filenames to external paths in diff headers.
+ /// Handles inputs like:
+ /// diff --git a/<uuid> b/current/<uuid>
+ /// --- a/<uuid> | /dev/null
+ /// +++ b/current/<uuid> | /dev/null
+ /// and replaces uuid with the external paths tracking baseline/current.
+ fn rewrite_diff_paths(&self, diff: &str) -> String {
+ let mut out = String::new();
+ for line in diff.lines() {
+ if let Some(rest) = line.strip_prefix("diff --git ") {
+ // Format: diff --git a/<f> b/<f>
+ let parts: Vec<&str> = rest.split_whitespace().collect();
+ if parts.len() == 2 {
+ let a = parts[0].strip_prefix("a/").unwrap_or(parts[0]);
+ let b = parts[1].strip_prefix("b/").unwrap_or(parts[1]);
+
+ let a_ext_display = if a == "/dev/null" {
+ "/dev/null".to_string()
+ } else {
+ let a_base = Path::new(a)
+ .file_name()
+ .and_then(|s| s.to_str())
+ .unwrap_or(a);
+ let mapped = self
+ .temp_name_to_baseline_external
+ .get(a_base)
+ .cloned()
+ .unwrap_or_else(|| PathBuf::from(a));
+ mapped.display().to_string()
+ };
+
+ let b_ext_display = if b == "/dev/null" {
+ "/dev/null".to_string()
+ } else {
+ let b_base = Path::new(b)
+ .file_name()
+ .and_then(|s| s.to_str())
+ .unwrap_or(b);
+ let mapped = self
+ .temp_name_to_current_external
+ .get(b_base)
+ .cloned()
+ .unwrap_or_else(|| PathBuf::from(b));
+ mapped.display().to_string()
+ };
+
+ out.push_str(&format!("diff --git a/{a_ext_display} b/{b_ext_display}\n"));
+ continue;
+ }
+ }
+ if let Some(rest) = line.strip_prefix("--- ") {
+ if let Some(path) = rest.strip_prefix("a/") {
+ let external_display = if path == "/dev/null" {
+ "/dev/null".to_string()
+ } else {
+ let p_base = Path::new(path)
+ .file_name()
+ .and_then(|s| s.to_str())
+ .unwrap_or(path);
+ self.temp_name_to_baseline_external
+ .get(p_base)
+ .cloned()
+ .unwrap_or_else(|| PathBuf::from(path))
+ .display()
+ .to_string()
+ };
+ out.push_str(&format!("--- {external_display}\n"));
+ continue;
+ }
+ }
+ if let Some(rest) = line.strip_prefix("+++ ") {
+ if let Some(path) = rest.strip_prefix("b/") {
+ let external_display = if path == "/dev/null" {
+ "/dev/null".to_string()
+ } else {
+ let p_base = Path::new(path)
+ .file_name()
+ .and_then(|s| s.to_str())
+ .unwrap_or(path);
+ self.temp_name_to_current_external
+ .get(p_base)
+ .cloned()
+ .unwrap_or_else(|| PathBuf::from(path))
+ .display()
+ .to_string()
+ };
+ out.push_str(&format!("+++ {external_display}\n"));
+ continue;
+ }
+ }
+ out.push_str(line);
+ out.push('\n');
+ }
+ out
+ }
+}
+
+fn uuid_filename_for(path: &Path) -> String {
+ let id = Uuid::new_v4().to_string();
+ match path.extension().and_then(|e| e.to_str()) {
```
> Why preserve the `ext`, btw? It could, in theory, contain a space, right?
- Created: 2025-08-01 17:10:59 UTC | Link: https://github.com/openai/codex/pull/1770#discussion_r2248469411
```diff
@@ -0,0 +1,476 @@
+use std::collections::HashMap;
+use std::fs;
+use std::path::Path;
+use std::path::PathBuf;
+use std::process::Command;
+
+use anyhow::Context;
+use anyhow::Result;
+use tempfile::TempDir;
+use uuid::Uuid;
+
+use crate::protocol::FileChange;
+
+/// Tracks sets of changes to files and exposes the overall unified diff.
+/// Internally, the way this works is now:
+/// 1. Create a temp directory to store baseline snapshots of files when they are first seen.
+/// 2. When a path is first observed, copy its current contents into the baseline dir if it exists on disk.
+/// For new additions, do not create a baseline file so that diffs are shown as proper additions (using /dev/null).
+/// 3. Keep a stable internal filename (uuid + same extension) per external path for path rewrite in diffs.
+/// 4. To compute the aggregated unified diff, compare each baseline snapshot to the current file on disk using
+/// `git diff --no-index` and rewrite paths to external paths.
+#[derive(Default)]
+pub struct TurnDiffTracker {
+ /// Temp directory holding baseline snapshots of files as first seen.
+ baseline_files_dir: Option<TempDir>,
+ /// Map external path -> internal filename (uuid + same extension).
+ external_to_temp_name: HashMap<PathBuf, String>,
+ /// Internal filename -> external path as of baseline snapshot.
+ temp_name_to_baseline_external: HashMap<String, PathBuf>,
+ /// Internal filename -> external path as of current accumulated state (after applying all changes).
+ /// This is where renames are tracked.
+ temp_name_to_current_external: HashMap<String, PathBuf>,
+ /// Aggregated unified diff for all accumulated changes across files.
+ pub unified_diff: Option<String>,
+}
+
+impl TurnDiffTracker {
+ pub fn new() -> Self {
+ Self::default()
+ }
+
+ /// Front-run apply patch calls to track the starting contents of any modified files.
+ /// - Creates a baseline snapshot for files that already exist on disk when first seen.
+ /// - For additions, we intentionally do not create a baseline snapshot so that diffs are proper additions.
+ /// - Also updates internal mappings for move/rename events.
+ pub fn on_patch_begin(&mut self, changes: &HashMap<PathBuf, FileChange>) -> Result<()> {
+ self.ensure_baseline_dir()?;
+ let baseline_dir = self.baseline_dir()?.to_path_buf();
+
+ for (path, change) in changes.iter() {
+ // Ensure a stable internal filename exists for this external path.
+ if !self.external_to_temp_name.contains_key(path) {
+ let internal = uuid_filename_for(path);
+ self.external_to_temp_name
+ .insert(path.clone(), internal.clone());
+ self.temp_name_to_baseline_external
+ .insert(internal.clone(), path.clone());
+ self.temp_name_to_current_external
+ .insert(internal.clone(), path.clone());
+
+ // If the file exists on disk now, snapshot as baseline; else leave missing to represent /dev/null.
+ if path.exists() {
+ let contents = fs::read(path)
+ .with_context(|| format!("failed to read original {}", path.display()))?;
+ let internal_path = baseline_dir.join(&internal);
+ fs::write(&internal_path, contents).with_context(|| {
+ format!("failed to write baseline file {}", internal_path.display())
+ })?;
+ }
+ }
+
+ // Track rename/move in current mapping if provided in an Update.
+ let move_path = match change {
+ FileChange::Update {
+ move_path: Some(dest),
+ ..
+ } => Some(dest),
+ _ => None,
+ };
+ if let Some(dest) = move_path {
+ let uuid_filename = match self.external_to_temp_name.get(path) {
+ Some(i) => i.clone(),
+ None => {
+ // This should be rare, but if we haven't mapped the source, create it with no baseline.
+ let i = uuid_filename_for(path);
+ self.external_to_temp_name.insert(path.clone(), i.clone());
+ self.temp_name_to_baseline_external
+ .insert(i.clone(), path.clone());
+ i
+ }
+ };
+ // Update current external mapping for temp file name.
+ self.temp_name_to_current_external
+ .insert(uuid_filename.clone(), dest.clone());
+ // Update forward file_mapping: external current -> internal name.
+ self.external_to_temp_name.remove(path);
+ self.external_to_temp_name
+ .insert(dest.clone(), uuid_filename);
+ }
+ }
+
+ Ok(())
+ }
+
+ /// Recompute the aggregated unified diff by comparing all baseline snapshots against
+ /// current files on disk using `git diff --no-index` and rewriting paths to external paths.
+ pub fn update_and_get_unified_diff(&mut self) -> Result<Option<String>> {
+ let baseline_dir = self.baseline_dir()?.to_path_buf();
+ let current_dir = baseline_dir.join("current");
+ if current_dir.exists() {
+ // Best-effort cleanup of previous run's mirror.
+ let _ = fs::remove_dir_all(&current_dir);
+ }
+ fs::create_dir_all(&current_dir).with_context(|| {
+ format!(
+ "failed to create current mirror dir {}",
+ current_dir.display()
+ )
+ })?;
+
+ let mut aggregated = String::new();
+
+ // Compute diffs per tracked internal file.
+ for (internal, baseline_external) in &self.temp_name_to_baseline_external {
+ let baseline_path = baseline_dir.join(internal);
+ let current_external = self
+ .temp_name_to_current_external
+ .get(internal)
+ .cloned()
+ .unwrap_or_else(|| baseline_external.clone());
+
+ let left_is_dev_null = !baseline_path.exists();
+ let right_exists = current_external.exists();
+
+ // Prepare right side mirror file if exists; otherwise use /dev/null for deletions.
+ let right_arg = if right_exists {
+ let mirror_path = current_dir.join(internal);
+ let contents = fs::read(&current_external).with_context(|| {
+ format!(
+ "failed to read current file for diff {}",
+ current_external.display()
+ )
+ })?;
+ fs::write(&mirror_path, contents).with_context(|| {
+ format!(
+ "failed to write current mirror file {}",
+ mirror_path.display()
+ )
+ })?;
+ // Use relative path from baseline_dir (so headers say a/<uuid> b/current/<uuid>).
+ format!("current/{internal}")
+ } else {
+ // Deletion: right side is /dev/null to show proper deleted file diff.
+ "/dev/null".to_string()
+ };
+
+ // Prepare left arg: baseline file path or /dev/null for additions.
+ let left_arg = if left_is_dev_null {
+ "/dev/null".to_string()
+ } else {
+ internal.clone()
+ };
+
+ // Run git diff --no-index from baseline_dir to keep paths predictable.
+ let raw = run_git_allow_exit_codes(
+ &baseline_dir,
+ &[
+ "-c",
+ "color.ui=false",
+ "diff",
+ "--no-color",
+ "--no-index",
+ "--",
+ &left_arg,
+ &right_arg,
+ ],
+ &[0, 1], // 0: no changes, 1: differences
+ )?;
+
+ if raw.trim().is_empty() {
+ continue;
+ }
+ let rewritten = self.rewrite_diff_paths(&raw);
+ if !rewritten.trim().is_empty() {
+ if !aggregated.is_empty() && !aggregated.ends_with('\n') {
+ aggregated.push('\n');
+ }
+ aggregated.push_str(&rewritten);
+ }
+ }
+
+ self.unified_diff = if aggregated.trim().is_empty() {
+ None
+ } else {
+ Some(aggregated)
+ };
+
+ // Clean up the current dir.
+ let _ = fs::remove_dir_all(&current_dir);
+
+ Ok(self.unified_diff.clone())
+ }
+
+ fn baseline_dir(&self) -> Result<&Path> {
+ self.baseline_files_dir
+ .as_ref()
+ .map(|d| d.path())
+ .ok_or_else(|| anyhow::anyhow!("baseline temp dir not initialized"))
+ }
+
+ fn ensure_baseline_dir(&mut self) -> Result<()> {
+ if self.baseline_files_dir.is_some() {
+ return Ok(());
+ }
+ let tmp = TempDir::new().context("create baseline temp dir")?;
+ self.baseline_files_dir = Some(tmp);
+ Ok(())
+ }
+
+ /// Rewrites the internal filenames to external paths in diff headers.
+ /// Handles inputs like:
+ /// diff --git a/<uuid> b/current/<uuid>
+ /// --- a/<uuid> | /dev/null
+ /// +++ b/current/<uuid> | /dev/null
+ /// and replaces uuid with the external paths tracking baseline/current.
+ fn rewrite_diff_paths(&self, diff: &str) -> String {
+ let mut out = String::new();
+ for line in diff.lines() {
+ if let Some(rest) = line.strip_prefix("diff --git ") {
+ // Format: diff --git a/<f> b/<f>
+ let parts: Vec<&str> = rest.split_whitespace().collect();
+ if parts.len() == 2 {
+ let a = parts[0].strip_prefix("a/").unwrap_or(parts[0]);
+ let b = parts[1].strip_prefix("b/").unwrap_or(parts[1]);
+
+ let a_ext_display = if a == "/dev/null" {
+ "/dev/null".to_string()
+ } else {
+ let a_base = Path::new(a)
+ .file_name()
+ .and_then(|s| s.to_str())
+ .unwrap_or(a);
+ let mapped = self
+ .temp_name_to_baseline_external
+ .get(a_base)
+ .cloned()
+ .unwrap_or_else(|| PathBuf::from(a));
+ mapped.display().to_string()
+ };
+
+ let b_ext_display = if b == "/dev/null" {
+ "/dev/null".to_string()
+ } else {
+ let b_base = Path::new(b)
+ .file_name()
+ .and_then(|s| s.to_str())
+ .unwrap_or(b);
+ let mapped = self
+ .temp_name_to_current_external
+ .get(b_base)
+ .cloned()
+ .unwrap_or_else(|| PathBuf::from(b));
+ mapped.display().to_string()
+ };
+
+ out.push_str(&format!("diff --git a/{a_ext_display} b/{b_ext_display}\n"));
+ continue;
+ }
+ }
+ if let Some(rest) = line.strip_prefix("--- ") {
+ if let Some(path) = rest.strip_prefix("a/") {
+ let external_display = if path == "/dev/null" {
+ "/dev/null".to_string()
+ } else {
+ let p_base = Path::new(path)
+ .file_name()
+ .and_then(|s| s.to_str())
+ .unwrap_or(path);
+ self.temp_name_to_baseline_external
+ .get(p_base)
+ .cloned()
+ .unwrap_or_else(|| PathBuf::from(path))
+ .display()
+ .to_string()
+ };
+ out.push_str(&format!("--- {external_display}\n"));
+ continue;
+ }
+ }
+ if let Some(rest) = line.strip_prefix("+++ ") {
+ if let Some(path) = rest.strip_prefix("b/") {
+ let external_display = if path == "/dev/null" {
+ "/dev/null".to_string()
+ } else {
+ let p_base = Path::new(path)
+ .file_name()
+ .and_then(|s| s.to_str())
+ .unwrap_or(path);
+ self.temp_name_to_current_external
+ .get(p_base)
+ .cloned()
+ .unwrap_or_else(|| PathBuf::from(path))
+ .display()
+ .to_string()
+ };
+ out.push_str(&format!("+++ {external_display}\n"));
+ continue;
+ }
+ }
+ out.push_str(line);
+ out.push('\n');
+ }
+ out
+ }
+}
+
+fn uuid_filename_for(path: &Path) -> String {
+ let id = Uuid::new_v4().to_string();
+ match path.extension().and_then(|e| e.to_str()) {
+ Some(ext) if !ext.is_empty() => format!("{id}.{ext}"),
+ _ => id,
+ }
+}
+
+fn run_git_allow_exit_codes(
+ repo: &Path,
+ args: &[&str],
+ allowed_exit_codes: &[i32],
+) -> Result<String> {
+ let output = Command::new("git")
```
> Should we make this async and use `tokio::Command`?
- Created: 2025-08-01 17:12:45 UTC | Link: https://github.com/openai/codex/pull/1770#discussion_r2248474046
```diff
@@ -0,0 +1,476 @@
+use std::collections::HashMap;
+use std::fs;
+use std::path::Path;
+use std::path::PathBuf;
+use std::process::Command;
+
+use anyhow::Context;
+use anyhow::Result;
+use tempfile::TempDir;
+use uuid::Uuid;
+
+use crate::protocol::FileChange;
+
+/// Tracks sets of changes to files and exposes the overall unified diff.
+/// Internally, the way this works is now:
+/// 1. Create a temp directory to store baseline snapshots of files when they are first seen.
+/// 2. When a path is first observed, copy its current contents into the baseline dir if it exists on disk.
+/// For new additions, do not create a baseline file so that diffs are shown as proper additions (using /dev/null).
+/// 3. Keep a stable internal filename (uuid + same extension) per external path for path rewrite in diffs.
+/// 4. To compute the aggregated unified diff, compare each baseline snapshot to the current file on disk using
+/// `git diff --no-index` and rewrite paths to external paths.
+#[derive(Default)]
+pub struct TurnDiffTracker {
+ /// Temp directory holding baseline snapshots of files as first seen.
+ baseline_files_dir: Option<TempDir>,
+ /// Map external path -> internal filename (uuid + same extension).
+ external_to_temp_name: HashMap<PathBuf, String>,
+ /// Internal filename -> external path as of baseline snapshot.
+ temp_name_to_baseline_external: HashMap<String, PathBuf>,
+ /// Internal filename -> external path as of current accumulated state (after applying all changes).
+ /// This is where renames are tracked.
+ temp_name_to_current_external: HashMap<String, PathBuf>,
+ /// Aggregated unified diff for all accumulated changes across files.
+ pub unified_diff: Option<String>,
+}
+
+impl TurnDiffTracker {
+ pub fn new() -> Self {
+ Self::default()
+ }
+
+ /// Front-run apply patch calls to track the starting contents of any modified files.
+ /// - Creates a baseline snapshot for files that already exist on disk when first seen.
+ /// - For additions, we intentionally do not create a baseline snapshot so that diffs are proper additions.
+ /// - Also updates internal mappings for move/rename events.
+ pub fn on_patch_begin(&mut self, changes: &HashMap<PathBuf, FileChange>) -> Result<()> {
+ self.ensure_baseline_dir()?;
+ let baseline_dir = self.baseline_dir()?.to_path_buf();
+
+ for (path, change) in changes.iter() {
+ // Ensure a stable internal filename exists for this external path.
+ if !self.external_to_temp_name.contains_key(path) {
+ let internal = uuid_filename_for(path);
+ self.external_to_temp_name
+ .insert(path.clone(), internal.clone());
+ self.temp_name_to_baseline_external
+ .insert(internal.clone(), path.clone());
+ self.temp_name_to_current_external
+ .insert(internal.clone(), path.clone());
+
+ // If the file exists on disk now, snapshot as baseline; else leave missing to represent /dev/null.
+ if path.exists() {
+ let contents = fs::read(path)
+ .with_context(|| format!("failed to read original {}", path.display()))?;
+ let internal_path = baseline_dir.join(&internal);
+ fs::write(&internal_path, contents).with_context(|| {
+ format!("failed to write baseline file {}", internal_path.display())
+ })?;
+ }
+ }
+
+ // Track rename/move in current mapping if provided in an Update.
+ let move_path = match change {
+ FileChange::Update {
+ move_path: Some(dest),
+ ..
+ } => Some(dest),
+ _ => None,
+ };
+ if let Some(dest) = move_path {
+ let uuid_filename = match self.external_to_temp_name.get(path) {
+ Some(i) => i.clone(),
+ None => {
+ // This should be rare, but if we haven't mapped the source, create it with no baseline.
+ let i = uuid_filename_for(path);
+ self.external_to_temp_name.insert(path.clone(), i.clone());
+ self.temp_name_to_baseline_external
+ .insert(i.clone(), path.clone());
+ i
+ }
+ };
+ // Update current external mapping for temp file name.
+ self.temp_name_to_current_external
+ .insert(uuid_filename.clone(), dest.clone());
+ // Update forward file_mapping: external current -> internal name.
+ self.external_to_temp_name.remove(path);
+ self.external_to_temp_name
+ .insert(dest.clone(), uuid_filename);
+ }
+ }
+
+ Ok(())
+ }
+
+ /// Recompute the aggregated unified diff by comparing all baseline snapshots against
+ /// current files on disk using `git diff --no-index` and rewriting paths to external paths.
+ pub fn update_and_get_unified_diff(&mut self) -> Result<Option<String>> {
+ let baseline_dir = self.baseline_dir()?.to_path_buf();
+ let current_dir = baseline_dir.join("current");
+ if current_dir.exists() {
+ // Best-effort cleanup of previous run's mirror.
+ let _ = fs::remove_dir_all(&current_dir);
+ }
+ fs::create_dir_all(&current_dir).with_context(|| {
+ format!(
+ "failed to create current mirror dir {}",
+ current_dir.display()
+ )
+ })?;
+
+ let mut aggregated = String::new();
+
+ // Compute diffs per tracked internal file.
+ for (internal, baseline_external) in &self.temp_name_to_baseline_external {
+ let baseline_path = baseline_dir.join(internal);
+ let current_external = self
+ .temp_name_to_current_external
+ .get(internal)
+ .cloned()
+ .unwrap_or_else(|| baseline_external.clone());
+
+ let left_is_dev_null = !baseline_path.exists();
+ let right_exists = current_external.exists();
+
+ // Prepare right side mirror file if exists; otherwise use /dev/null for deletions.
+ let right_arg = if right_exists {
+ let mirror_path = current_dir.join(internal);
+ let contents = fs::read(&current_external).with_context(|| {
+ format!(
+ "failed to read current file for diff {}",
+ current_external.display()
+ )
+ })?;
+ fs::write(&mirror_path, contents).with_context(|| {
+ format!(
+ "failed to write current mirror file {}",
+ mirror_path.display()
+ )
+ })?;
+ // Use relative path from baseline_dir (so headers say a/<uuid> b/current/<uuid>).
+ format!("current/{internal}")
+ } else {
+ // Deletion: right side is /dev/null to show proper deleted file diff.
+ "/dev/null".to_string()
+ };
+
+ // Prepare left arg: baseline file path or /dev/null for additions.
+ let left_arg = if left_is_dev_null {
+ "/dev/null".to_string()
+ } else {
+ internal.clone()
+ };
+
+ // Run git diff --no-index from baseline_dir to keep paths predictable.
+ let raw = run_git_allow_exit_codes(
+ &baseline_dir,
+ &[
+ "-c",
+ "color.ui=false",
+ "diff",
+ "--no-color",
+ "--no-index",
+ "--",
+ &left_arg,
+ &right_arg,
+ ],
+ &[0, 1], // 0: no changes, 1: differences
+ )?;
+
+ if raw.trim().is_empty() {
+ continue;
+ }
+ let rewritten = self.rewrite_diff_paths(&raw);
+ if !rewritten.trim().is_empty() {
+ if !aggregated.is_empty() && !aggregated.ends_with('\n') {
+ aggregated.push('\n');
+ }
+ aggregated.push_str(&rewritten);
+ }
+ }
+
+ self.unified_diff = if aggregated.trim().is_empty() {
+ None
+ } else {
+ Some(aggregated)
+ };
+
+ // Clean up the current dir.
+ let _ = fs::remove_dir_all(&current_dir);
+
+ Ok(self.unified_diff.clone())
+ }
+
+ fn baseline_dir(&self) -> Result<&Path> {
+ self.baseline_files_dir
+ .as_ref()
+ .map(|d| d.path())
+ .ok_or_else(|| anyhow::anyhow!("baseline temp dir not initialized"))
+ }
+
+ fn ensure_baseline_dir(&mut self) -> Result<()> {
+ if self.baseline_files_dir.is_some() {
+ return Ok(());
+ }
+ let tmp = TempDir::new().context("create baseline temp dir")?;
+ self.baseline_files_dir = Some(tmp);
+ Ok(())
+ }
+
+ /// Rewrites the internal filenames to external paths in diff headers.
+ /// Handles inputs like:
+ /// diff --git a/<uuid> b/current/<uuid>
+ /// --- a/<uuid> | /dev/null
+ /// +++ b/current/<uuid> | /dev/null
+ /// and replaces uuid with the external paths tracking baseline/current.
+ fn rewrite_diff_paths(&self, diff: &str) -> String {
+ let mut out = String::new();
+ for line in diff.lines() {
+ if let Some(rest) = line.strip_prefix("diff --git ") {
+ // Format: diff --git a/<f> b/<f>
+ let parts: Vec<&str> = rest.split_whitespace().collect();
+ if parts.len() == 2 {
+ let a = parts[0].strip_prefix("a/").unwrap_or(parts[0]);
+ let b = parts[1].strip_prefix("b/").unwrap_or(parts[1]);
+
+ let a_ext_display = if a == "/dev/null" {
+ "/dev/null".to_string()
+ } else {
+ let a_base = Path::new(a)
+ .file_name()
+ .and_then(|s| s.to_str())
+ .unwrap_or(a);
+ let mapped = self
+ .temp_name_to_baseline_external
+ .get(a_base)
+ .cloned()
+ .unwrap_or_else(|| PathBuf::from(a));
+ mapped.display().to_string()
+ };
+
+ let b_ext_display = if b == "/dev/null" {
+ "/dev/null".to_string()
+ } else {
+ let b_base = Path::new(b)
+ .file_name()
+ .and_then(|s| s.to_str())
+ .unwrap_or(b);
+ let mapped = self
+ .temp_name_to_current_external
+ .get(b_base)
+ .cloned()
+ .unwrap_or_else(|| PathBuf::from(b));
+ mapped.display().to_string()
+ };
+
+ out.push_str(&format!("diff --git a/{a_ext_display} b/{b_ext_display}\n"));
+ continue;
+ }
+ }
+ if let Some(rest) = line.strip_prefix("--- ") {
+ if let Some(path) = rest.strip_prefix("a/") {
+ let external_display = if path == "/dev/null" {
+ "/dev/null".to_string()
+ } else {
+ let p_base = Path::new(path)
+ .file_name()
+ .and_then(|s| s.to_str())
+ .unwrap_or(path);
+ self.temp_name_to_baseline_external
+ .get(p_base)
+ .cloned()
+ .unwrap_or_else(|| PathBuf::from(path))
+ .display()
+ .to_string()
+ };
+ out.push_str(&format!("--- {external_display}\n"));
+ continue;
+ }
+ }
+ if let Some(rest) = line.strip_prefix("+++ ") {
+ if let Some(path) = rest.strip_prefix("b/") {
+ let external_display = if path == "/dev/null" {
+ "/dev/null".to_string()
+ } else {
+ let p_base = Path::new(path)
+ .file_name()
+ .and_then(|s| s.to_str())
+ .unwrap_or(path);
+ self.temp_name_to_current_external
+ .get(p_base)
+ .cloned()
+ .unwrap_or_else(|| PathBuf::from(path))
+ .display()
+ .to_string()
+ };
+ out.push_str(&format!("+++ {external_display}\n"));
+ continue;
+ }
+ }
+ out.push_str(line);
+ out.push('\n');
+ }
+ out
+ }
+}
+
+fn uuid_filename_for(path: &Path) -> String {
+ let id = Uuid::new_v4().to_string();
+ match path.extension().and_then(|e| e.to_str()) {
+ Some(ext) if !ext.is_empty() => format!("{id}.{ext}"),
+ _ => id,
+ }
+}
+
+fn run_git_allow_exit_codes(
+ repo: &Path,
+ args: &[&str],
+ allowed_exit_codes: &[i32],
+) -> Result<String> {
+ let output = Command::new("git")
+ .current_dir(repo)
+ .args(args)
+ .output()
+ .with_context(|| format!("failed to run git {:?} in {}", args, repo.display()))?;
+ let code = output.status.code().unwrap_or(-1);
+ if !allowed_exit_codes.contains(&code) {
+ anyhow::bail!(
+ "git {:?} failed with status {:?}: {}",
+ args,
+ output.status,
+ String::from_utf8_lossy(&output.stderr)
+ );
+ }
+ Ok(String::from_utf8_lossy(&output.stdout).into_owned())
+}
+
+#[cfg(test)]
+mod tests {
+ #![allow(clippy::unwrap_used)]
+ use super::*;
+ use tempfile::tempdir;
+
+ #[test]
+ fn accumulates_add_and_update() {
+ let mut acc = TurnDiffTracker::new();
+
+ let dir = tempdir().unwrap();
+ let file = dir.path().join("a.txt");
+
+ // First patch: add file (baseline should be /dev/null).
+ let add_changes = HashMap::from([(
+ file.clone(),
+ FileChange::Add {
+ content: "foo\n".to_string(),
+ },
+ )]);
+ acc.on_patch_begin(&add_changes).unwrap();
+
+ // Simulate apply: create the file on disk.
+ // This must happen after on_patch_begin.
+ fs::write(&file, "foo\n").unwrap();
+ acc.update_and_get_unified_diff().unwrap();
+ let first = acc.unified_diff.clone().unwrap();
+ assert!(first.contains("+foo"));
```
> Instead of `contains()` checks, can these all be full `assert_eq!()` checks?
- Created: 2025-08-01 17:13:43 UTC | Link: https://github.com/openai/codex/pull/1770#discussion_r2248476408
```diff
@@ -0,0 +1,476 @@
+use std::collections::HashMap;
+use std::fs;
+use std::path::Path;
+use std::path::PathBuf;
+use std::process::Command;
+
+use anyhow::Context;
+use anyhow::Result;
+use tempfile::TempDir;
+use uuid::Uuid;
+
+use crate::protocol::FileChange;
+
+/// Tracks sets of changes to files and exposes the overall unified diff.
+/// Internally, the way this works is now:
+/// 1. Create a temp directory to store baseline snapshots of files when they are first seen.
+/// 2. When a path is first observed, copy its current contents into the baseline dir if it exists on disk.
+/// For new additions, do not create a baseline file so that diffs are shown as proper additions (using /dev/null).
+/// 3. Keep a stable internal filename (uuid + same extension) per external path for path rewrite in diffs.
+/// 4. To compute the aggregated unified diff, compare each baseline snapshot to the current file on disk using
+/// `git diff --no-index` and rewrite paths to external paths.
+#[derive(Default)]
+pub struct TurnDiffTracker {
+ /// Temp directory holding baseline snapshots of files as first seen.
+ baseline_files_dir: Option<TempDir>,
+ /// Map external path -> internal filename (uuid + same extension).
+ external_to_temp_name: HashMap<PathBuf, String>,
+ /// Internal filename -> external path as of baseline snapshot.
+ temp_name_to_baseline_external: HashMap<String, PathBuf>,
+ /// Internal filename -> external path as of current accumulated state (after applying all changes).
+ /// This is where renames are tracked.
+ temp_name_to_current_external: HashMap<String, PathBuf>,
+ /// Aggregated unified diff for all accumulated changes across files.
+ pub unified_diff: Option<String>,
+}
+
+impl TurnDiffTracker {
+ pub fn new() -> Self {
+ Self::default()
+ }
+
+ /// Front-run apply patch calls to track the starting contents of any modified files.
+ /// - Creates a baseline snapshot for files that already exist on disk when first seen.
+ /// - For additions, we intentionally do not create a baseline snapshot so that diffs are proper additions.
+ /// - Also updates internal mappings for move/rename events.
+ pub fn on_patch_begin(&mut self, changes: &HashMap<PathBuf, FileChange>) -> Result<()> {
+ self.ensure_baseline_dir()?;
+ let baseline_dir = self.baseline_dir()?.to_path_buf();
+
+ for (path, change) in changes.iter() {
+ // Ensure a stable internal filename exists for this external path.
+ if !self.external_to_temp_name.contains_key(path) {
+ let internal = uuid_filename_for(path);
+ self.external_to_temp_name
+ .insert(path.clone(), internal.clone());
+ self.temp_name_to_baseline_external
+ .insert(internal.clone(), path.clone());
+ self.temp_name_to_current_external
+ .insert(internal.clone(), path.clone());
+
+ // If the file exists on disk now, snapshot as baseline; else leave missing to represent /dev/null.
+ if path.exists() {
+ let contents = fs::read(path)
+ .with_context(|| format!("failed to read original {}", path.display()))?;
+ let internal_path = baseline_dir.join(&internal);
+ fs::write(&internal_path, contents).with_context(|| {
+ format!("failed to write baseline file {}", internal_path.display())
+ })?;
+ }
+ }
+
+ // Track rename/move in current mapping if provided in an Update.
+ let move_path = match change {
+ FileChange::Update {
+ move_path: Some(dest),
+ ..
+ } => Some(dest),
+ _ => None,
+ };
+ if let Some(dest) = move_path {
+ let uuid_filename = match self.external_to_temp_name.get(path) {
+ Some(i) => i.clone(),
+ None => {
+ // This should be rare, but if we haven't mapped the source, create it with no baseline.
+ let i = uuid_filename_for(path);
+ self.external_to_temp_name.insert(path.clone(), i.clone());
+ self.temp_name_to_baseline_external
+ .insert(i.clone(), path.clone());
+ i
+ }
+ };
+ // Update current external mapping for temp file name.
+ self.temp_name_to_current_external
+ .insert(uuid_filename.clone(), dest.clone());
+ // Update forward file_mapping: external current -> internal name.
+ self.external_to_temp_name.remove(path);
+ self.external_to_temp_name
+ .insert(dest.clone(), uuid_filename);
+ }
+ }
+
+ Ok(())
+ }
+
+ /// Recompute the aggregated unified diff by comparing all baseline snapshots against
+ /// current files on disk using `git diff --no-index` and rewriting paths to external paths.
+ pub fn update_and_get_unified_diff(&mut self) -> Result<Option<String>> {
+ let baseline_dir = self.baseline_dir()?.to_path_buf();
+ let current_dir = baseline_dir.join("current");
+ if current_dir.exists() {
+ // Best-effort cleanup of previous run's mirror.
+ let _ = fs::remove_dir_all(&current_dir);
+ }
+ fs::create_dir_all(&current_dir).with_context(|| {
+ format!(
+ "failed to create current mirror dir {}",
+ current_dir.display()
+ )
+ })?;
+
+ let mut aggregated = String::new();
+
+ // Compute diffs per tracked internal file.
+ for (internal, baseline_external) in &self.temp_name_to_baseline_external {
+ let baseline_path = baseline_dir.join(internal);
+ let current_external = self
+ .temp_name_to_current_external
+ .get(internal)
+ .cloned()
+ .unwrap_or_else(|| baseline_external.clone());
+
+ let left_is_dev_null = !baseline_path.exists();
+ let right_exists = current_external.exists();
+
+ // Prepare right side mirror file if exists; otherwise use /dev/null for deletions.
+ let right_arg = if right_exists {
+ let mirror_path = current_dir.join(internal);
+ let contents = fs::read(&current_external).with_context(|| {
+ format!(
+ "failed to read current file for diff {}",
+ current_external.display()
+ )
+ })?;
+ fs::write(&mirror_path, contents).with_context(|| {
+ format!(
+ "failed to write current mirror file {}",
+ mirror_path.display()
+ )
+ })?;
+ // Use relative path from baseline_dir (so headers say a/<uuid> b/current/<uuid>).
+ format!("current/{internal}")
+ } else {
+ // Deletion: right side is /dev/null to show proper deleted file diff.
+ "/dev/null".to_string()
+ };
+
+ // Prepare left arg: baseline file path or /dev/null for additions.
+ let left_arg = if left_is_dev_null {
+ "/dev/null".to_string()
+ } else {
+ internal.clone()
+ };
+
+ // Run git diff --no-index from baseline_dir to keep paths predictable.
+ let raw = run_git_allow_exit_codes(
+ &baseline_dir,
+ &[
+ "-c",
+ "color.ui=false",
+ "diff",
+ "--no-color",
+ "--no-index",
+ "--",
+ &left_arg,
+ &right_arg,
+ ],
+ &[0, 1], // 0: no changes, 1: differences
+ )?;
+
+ if raw.trim().is_empty() {
+ continue;
+ }
+ let rewritten = self.rewrite_diff_paths(&raw);
+ if !rewritten.trim().is_empty() {
+ if !aggregated.is_empty() && !aggregated.ends_with('\n') {
+ aggregated.push('\n');
+ }
+ aggregated.push_str(&rewritten);
+ }
+ }
+
+ self.unified_diff = if aggregated.trim().is_empty() {
+ None
+ } else {
+ Some(aggregated)
+ };
+
+ // Clean up the current dir.
+ let _ = fs::remove_dir_all(&current_dir);
+
+ Ok(self.unified_diff.clone())
+ }
+
+ fn baseline_dir(&self) -> Result<&Path> {
+ self.baseline_files_dir
+ .as_ref()
+ .map(|d| d.path())
+ .ok_or_else(|| anyhow::anyhow!("baseline temp dir not initialized"))
+ }
+
+ fn ensure_baseline_dir(&mut self) -> Result<()> {
+ if self.baseline_files_dir.is_some() {
+ return Ok(());
+ }
+ let tmp = TempDir::new().context("create baseline temp dir")?;
+ self.baseline_files_dir = Some(tmp);
+ Ok(())
+ }
+
+ /// Rewrites the internal filenames to external paths in diff headers.
+ /// Handles inputs like:
+ /// diff --git a/<uuid> b/current/<uuid>
+ /// --- a/<uuid> | /dev/null
+ /// +++ b/current/<uuid> | /dev/null
+ /// and replaces uuid with the external paths tracking baseline/current.
+ fn rewrite_diff_paths(&self, diff: &str) -> String {
+ let mut out = String::new();
+ for line in diff.lines() {
+ if let Some(rest) = line.strip_prefix("diff --git ") {
+ // Format: diff --git a/<f> b/<f>
+ let parts: Vec<&str> = rest.split_whitespace().collect();
+ if parts.len() == 2 {
+ let a = parts[0].strip_prefix("a/").unwrap_or(parts[0]);
+ let b = parts[1].strip_prefix("b/").unwrap_or(parts[1]);
+
+ let a_ext_display = if a == "/dev/null" {
+ "/dev/null".to_string()
+ } else {
+ let a_base = Path::new(a)
+ .file_name()
+ .and_then(|s| s.to_str())
+ .unwrap_or(a);
+ let mapped = self
+ .temp_name_to_baseline_external
+ .get(a_base)
+ .cloned()
+ .unwrap_or_else(|| PathBuf::from(a));
+ mapped.display().to_string()
+ };
+
+ let b_ext_display = if b == "/dev/null" {
+ "/dev/null".to_string()
+ } else {
+ let b_base = Path::new(b)
+ .file_name()
+ .and_then(|s| s.to_str())
+ .unwrap_or(b);
+ let mapped = self
+ .temp_name_to_current_external
+ .get(b_base)
+ .cloned()
+ .unwrap_or_else(|| PathBuf::from(b));
+ mapped.display().to_string()
+ };
+
+ out.push_str(&format!("diff --git a/{a_ext_display} b/{b_ext_display}\n"));
+ continue;
+ }
+ }
+ if let Some(rest) = line.strip_prefix("--- ") {
+ if let Some(path) = rest.strip_prefix("a/") {
+ let external_display = if path == "/dev/null" {
+ "/dev/null".to_string()
+ } else {
+ let p_base = Path::new(path)
+ .file_name()
+ .and_then(|s| s.to_str())
+ .unwrap_or(path);
+ self.temp_name_to_baseline_external
+ .get(p_base)
+ .cloned()
+ .unwrap_or_else(|| PathBuf::from(path))
+ .display()
+ .to_string()
+ };
+ out.push_str(&format!("--- {external_display}\n"));
+ continue;
+ }
+ }
+ if let Some(rest) = line.strip_prefix("+++ ") {
+ if let Some(path) = rest.strip_prefix("b/") {
+ let external_display = if path == "/dev/null" {
+ "/dev/null".to_string()
+ } else {
+ let p_base = Path::new(path)
+ .file_name()
+ .and_then(|s| s.to_str())
+ .unwrap_or(path);
+ self.temp_name_to_current_external
+ .get(p_base)
+ .cloned()
+ .unwrap_or_else(|| PathBuf::from(path))
+ .display()
+ .to_string()
+ };
+ out.push_str(&format!("+++ {external_display}\n"));
+ continue;
+ }
+ }
+ out.push_str(line);
+ out.push('\n');
+ }
+ out
+ }
+}
+
+fn uuid_filename_for(path: &Path) -> String {
+ let id = Uuid::new_v4().to_string();
+ match path.extension().and_then(|e| e.to_str()) {
+ Some(ext) if !ext.is_empty() => format!("{id}.{ext}"),
+ _ => id,
+ }
+}
+
+fn run_git_allow_exit_codes(
+ repo: &Path,
+ args: &[&str],
+ allowed_exit_codes: &[i32],
+) -> Result<String> {
+ let output = Command::new("git")
+ .current_dir(repo)
+ .args(args)
+ .output()
+ .with_context(|| format!("failed to run git {:?} in {}", args, repo.display()))?;
+ let code = output.status.code().unwrap_or(-1);
+ if !allowed_exit_codes.contains(&code) {
+ anyhow::bail!(
+ "git {:?} failed with status {:?}: {}",
+ args,
+ output.status,
+ String::from_utf8_lossy(&output.stderr)
+ );
+ }
+ Ok(String::from_utf8_lossy(&output.stdout).into_owned())
+}
+
+#[cfg(test)]
+mod tests {
+ #![allow(clippy::unwrap_used)]
```
> Can you also test that paths with spaces work as intended?
- Created: 2025-08-01 17:15:26 UTC | Link: https://github.com/openai/codex/pull/1770#discussion_r2248479943
```diff
@@ -0,0 +1,476 @@
+use std::collections::HashMap;
+use std::fs;
+use std::path::Path;
+use std::path::PathBuf;
+use std::process::Command;
+
+use anyhow::Context;
+use anyhow::Result;
+use tempfile::TempDir;
+use uuid::Uuid;
+
+use crate::protocol::FileChange;
+
+/// Tracks sets of changes to files and exposes the overall unified diff.
+/// Internally, the way this works is now:
+/// 1. Create a temp directory to store baseline snapshots of files when they are first seen.
+/// 2. When a path is first observed, copy its current contents into the baseline dir if it exists on disk.
+/// For new additions, do not create a baseline file so that diffs are shown as proper additions (using /dev/null).
+/// 3. Keep a stable internal filename (uuid + same extension) per external path for path rewrite in diffs.
+/// 4. To compute the aggregated unified diff, compare each baseline snapshot to the current file on disk using
+/// `git diff --no-index` and rewrite paths to external paths.
+#[derive(Default)]
+pub struct TurnDiffTracker {
+ /// Temp directory holding baseline snapshots of files as first seen.
+ baseline_files_dir: Option<TempDir>,
+ /// Map external path -> internal filename (uuid + same extension).
+ external_to_temp_name: HashMap<PathBuf, String>,
+ /// Internal filename -> external path as of baseline snapshot.
+ temp_name_to_baseline_external: HashMap<String, PathBuf>,
+ /// Internal filename -> external path as of current accumulated state (after applying all changes).
+ /// This is where renames are tracked.
+ temp_name_to_current_external: HashMap<String, PathBuf>,
+ /// Aggregated unified diff for all accumulated changes across files.
+ pub unified_diff: Option<String>,
+}
+
+impl TurnDiffTracker {
+ pub fn new() -> Self {
+ Self::default()
+ }
+
+ /// Front-run apply patch calls to track the starting contents of any modified files.
+ /// - Creates a baseline snapshot for files that already exist on disk when first seen.
+ /// - For additions, we intentionally do not create a baseline snapshot so that diffs are proper additions.
+ /// - Also updates internal mappings for move/rename events.
+ pub fn on_patch_begin(&mut self, changes: &HashMap<PathBuf, FileChange>) -> Result<()> {
+ self.ensure_baseline_dir()?;
+ let baseline_dir = self.baseline_dir()?.to_path_buf();
+
+ for (path, change) in changes.iter() {
+ // Ensure a stable internal filename exists for this external path.
+ if !self.external_to_temp_name.contains_key(path) {
+ let internal = uuid_filename_for(path);
+ self.external_to_temp_name
+ .insert(path.clone(), internal.clone());
+ self.temp_name_to_baseline_external
+ .insert(internal.clone(), path.clone());
+ self.temp_name_to_current_external
+ .insert(internal.clone(), path.clone());
+
+ // If the file exists on disk now, snapshot as baseline; else leave missing to represent /dev/null.
+ if path.exists() {
+ let contents = fs::read(path)
+ .with_context(|| format!("failed to read original {}", path.display()))?;
+ let internal_path = baseline_dir.join(&internal);
+ fs::write(&internal_path, contents).with_context(|| {
+ format!("failed to write baseline file {}", internal_path.display())
+ })?;
+ }
+ }
+
+ // Track rename/move in current mapping if provided in an Update.
+ let move_path = match change {
+ FileChange::Update {
+ move_path: Some(dest),
+ ..
+ } => Some(dest),
+ _ => None,
+ };
+ if let Some(dest) = move_path {
```
> This could be folded into the above `match` statement?
- Created: 2025-08-01 17:17:12 UTC | Link: https://github.com/openai/codex/pull/1770#discussion_r2248482907
```diff
@@ -0,0 +1,476 @@
+use std::collections::HashMap;
+use std::fs;
+use std::path::Path;
+use std::path::PathBuf;
+use std::process::Command;
+
+use anyhow::Context;
+use anyhow::Result;
+use tempfile::TempDir;
+use uuid::Uuid;
+
+use crate::protocol::FileChange;
+
+/// Tracks sets of changes to files and exposes the overall unified diff.
+/// Internally, the way this works is now:
+/// 1. Create a temp directory to store baseline snapshots of files when they are first seen.
+/// 2. When a path is first observed, copy its current contents into the baseline dir if it exists on disk.
+/// For new additions, do not create a baseline file so that diffs are shown as proper additions (using /dev/null).
+/// 3. Keep a stable internal filename (uuid + same extension) per external path for path rewrite in diffs.
+/// 4. To compute the aggregated unified diff, compare each baseline snapshot to the current file on disk using
+/// `git diff --no-index` and rewrite paths to external paths.
+#[derive(Default)]
+pub struct TurnDiffTracker {
+ /// Temp directory holding baseline snapshots of files as first seen.
+ baseline_files_dir: Option<TempDir>,
+ /// Map external path -> internal filename (uuid + same extension).
+ external_to_temp_name: HashMap<PathBuf, String>,
+ /// Internal filename -> external path as of baseline snapshot.
+ temp_name_to_baseline_external: HashMap<String, PathBuf>,
+ /// Internal filename -> external path as of current accumulated state (after applying all changes).
+ /// This is where renames are tracked.
+ temp_name_to_current_external: HashMap<String, PathBuf>,
+ /// Aggregated unified diff for all accumulated changes across files.
+ pub unified_diff: Option<String>,
+}
+
+impl TurnDiffTracker {
+ pub fn new() -> Self {
+ Self::default()
+ }
+
+ /// Front-run apply patch calls to track the starting contents of any modified files.
+ /// - Creates a baseline snapshot for files that already exist on disk when first seen.
+ /// - For additions, we intentionally do not create a baseline snapshot so that diffs are proper additions.
+ /// - Also updates internal mappings for move/rename events.
+ pub fn on_patch_begin(&mut self, changes: &HashMap<PathBuf, FileChange>) -> Result<()> {
+ self.ensure_baseline_dir()?;
+ let baseline_dir = self.baseline_dir()?.to_path_buf();
+
+ for (path, change) in changes.iter() {
+ // Ensure a stable internal filename exists for this external path.
+ if !self.external_to_temp_name.contains_key(path) {
+ let internal = uuid_filename_for(path);
+ self.external_to_temp_name
+ .insert(path.clone(), internal.clone());
+ self.temp_name_to_baseline_external
+ .insert(internal.clone(), path.clone());
+ self.temp_name_to_current_external
+ .insert(internal.clone(), path.clone());
+
+ // If the file exists on disk now, snapshot as baseline; else leave missing to represent /dev/null.
+ if path.exists() {
+ let contents = fs::read(path)
+ .with_context(|| format!("failed to read original {}", path.display()))?;
+ let internal_path = baseline_dir.join(&internal);
+ fs::write(&internal_path, contents).with_context(|| {
+ format!("failed to write baseline file {}", internal_path.display())
+ })?;
+ }
+ }
+
+ // Track rename/move in current mapping if provided in an Update.
+ let move_path = match change {
+ FileChange::Update {
+ move_path: Some(dest),
+ ..
+ } => Some(dest),
+ _ => None,
+ };
+ if let Some(dest) = move_path {
+ let uuid_filename = match self.external_to_temp_name.get(path) {
+ Some(i) => i.clone(),
+ None => {
+ // This should be rare, but if we haven't mapped the source, create it with no baseline.
+ let i = uuid_filename_for(path);
+ self.external_to_temp_name.insert(path.clone(), i.clone());
+ self.temp_name_to_baseline_external
+ .insert(i.clone(), path.clone());
+ i
+ }
+ };
+ // Update current external mapping for temp file name.
+ self.temp_name_to_current_external
+ .insert(uuid_filename.clone(), dest.clone());
+ // Update forward file_mapping: external current -> internal name.
+ self.external_to_temp_name.remove(path);
+ self.external_to_temp_name
+ .insert(dest.clone(), uuid_filename);
+ }
+ }
+
+ Ok(())
+ }
+
+ /// Recompute the aggregated unified diff by comparing all baseline snapshots against
+ /// current files on disk using `git diff --no-index` and rewriting paths to external paths.
+ pub fn update_and_get_unified_diff(&mut self) -> Result<Option<String>> {
+ let baseline_dir = self.baseline_dir()?.to_path_buf();
+ let current_dir = baseline_dir.join("current");
+ if current_dir.exists() {
+ // Best-effort cleanup of previous run's mirror.
+ let _ = fs::remove_dir_all(&current_dir);
+ }
+ fs::create_dir_all(&current_dir).with_context(|| {
+ format!(
+ "failed to create current mirror dir {}",
+ current_dir.display()
+ )
+ })?;
+
+ let mut aggregated = String::new();
+
+ // Compute diffs per tracked internal file.
+ for (internal, baseline_external) in &self.temp_name_to_baseline_external {
+ let baseline_path = baseline_dir.join(internal);
+ let current_external = self
+ .temp_name_to_current_external
+ .get(internal)
+ .cloned()
+ .unwrap_or_else(|| baseline_external.clone());
+
+ let left_is_dev_null = !baseline_path.exists();
+ let right_exists = current_external.exists();
+
+ // Prepare right side mirror file if exists; otherwise use /dev/null for deletions.
+ let right_arg = if right_exists {
+ let mirror_path = current_dir.join(internal);
+ let contents = fs::read(&current_external).with_context(|| {
+ format!(
+ "failed to read current file for diff {}",
+ current_external.display()
+ )
+ })?;
+ fs::write(&mirror_path, contents).with_context(|| {
+ format!(
+ "failed to write current mirror file {}",
+ mirror_path.display()
+ )
+ })?;
+ // Use relative path from baseline_dir (so headers say a/<uuid> b/current/<uuid>).
+ format!("current/{internal}")
+ } else {
+ // Deletion: right side is /dev/null to show proper deleted file diff.
+ "/dev/null".to_string()
+ };
+
+ // Prepare left arg: baseline file path or /dev/null for additions.
+ let left_arg = if left_is_dev_null {
+ "/dev/null".to_string()
+ } else {
+ internal.clone()
+ };
+
+ // Run git diff --no-index from baseline_dir to keep paths predictable.
+ let raw = run_git_allow_exit_codes(
+ &baseline_dir,
+ &[
+ "-c",
+ "color.ui=false",
+ "diff",
+ "--no-color",
+ "--no-index",
+ "--",
+ &left_arg,
+ &right_arg,
+ ],
+ &[0, 1], // 0: no changes, 1: differences
+ )?;
+
+ if raw.trim().is_empty() {
+ continue;
+ }
+ let rewritten = self.rewrite_diff_paths(&raw);
+ if !rewritten.trim().is_empty() {
+ if !aggregated.is_empty() && !aggregated.ends_with('\n') {
+ aggregated.push('\n');
+ }
+ aggregated.push_str(&rewritten);
+ }
+ }
+
+ self.unified_diff = if aggregated.trim().is_empty() {
+ None
+ } else {
+ Some(aggregated)
+ };
+
+ // Clean up the current dir.
+ let _ = fs::remove_dir_all(&current_dir);
+
+ Ok(self.unified_diff.clone())
+ }
+
+ fn baseline_dir(&self) -> Result<&Path> {
+ self.baseline_files_dir
+ .as_ref()
+ .map(|d| d.path())
+ .ok_or_else(|| anyhow::anyhow!("baseline temp dir not initialized"))
+ }
+
+ fn ensure_baseline_dir(&mut self) -> Result<()> {
+ if self.baseline_files_dir.is_some() {
+ return Ok(());
+ }
+ let tmp = TempDir::new().context("create baseline temp dir")?;
+ self.baseline_files_dir = Some(tmp);
+ Ok(())
+ }
+
+ /// Rewrites the internal filenames to external paths in diff headers.
+ /// Handles inputs like:
+ /// diff --git a/<uuid> b/current/<uuid>
+ /// --- a/<uuid> | /dev/null
+ /// +++ b/current/<uuid> | /dev/null
+ /// and replaces uuid with the external paths tracking baseline/current.
+ fn rewrite_diff_paths(&self, diff: &str) -> String {
+ let mut out = String::new();
+ for line in diff.lines() {
+ if let Some(rest) = line.strip_prefix("diff --git ") {
+ // Format: diff --git a/<f> b/<f>
+ let parts: Vec<&str> = rest.split_whitespace().collect();
+ if parts.len() == 2 {
+ let a = parts[0].strip_prefix("a/").unwrap_or(parts[0]);
+ let b = parts[1].strip_prefix("b/").unwrap_or(parts[1]);
+
+ let a_ext_display = if a == "/dev/null" {
+ "/dev/null".to_string()
+ } else {
+ let a_base = Path::new(a)
+ .file_name()
+ .and_then(|s| s.to_str())
+ .unwrap_or(a);
+ let mapped = self
+ .temp_name_to_baseline_external
+ .get(a_base)
+ .cloned()
+ .unwrap_or_else(|| PathBuf::from(a));
+ mapped.display().to_string()
+ };
+
+ let b_ext_display = if b == "/dev/null" {
+ "/dev/null".to_string()
+ } else {
+ let b_base = Path::new(b)
+ .file_name()
+ .and_then(|s| s.to_str())
+ .unwrap_or(b);
+ let mapped = self
+ .temp_name_to_current_external
+ .get(b_base)
+ .cloned()
+ .unwrap_or_else(|| PathBuf::from(b));
+ mapped.display().to_string()
+ };
+
+ out.push_str(&format!("diff --git a/{a_ext_display} b/{b_ext_display}\n"));
+ continue;
+ }
+ }
+ if let Some(rest) = line.strip_prefix("--- ") {
+ if let Some(path) = rest.strip_prefix("a/") {
+ let external_display = if path == "/dev/null" {
+ "/dev/null".to_string()
+ } else {
+ let p_base = Path::new(path)
+ .file_name()
+ .and_then(|s| s.to_str())
+ .unwrap_or(path);
+ self.temp_name_to_baseline_external
+ .get(p_base)
+ .cloned()
+ .unwrap_or_else(|| PathBuf::from(path))
+ .display()
+ .to_string()
+ };
+ out.push_str(&format!("--- {external_display}\n"));
+ continue;
+ }
+ }
+ if let Some(rest) = line.strip_prefix("+++ ") {
+ if let Some(path) = rest.strip_prefix("b/") {
+ let external_display = if path == "/dev/null" {
+ "/dev/null".to_string()
+ } else {
+ let p_base = Path::new(path)
+ .file_name()
+ .and_then(|s| s.to_str())
+ .unwrap_or(path);
+ self.temp_name_to_current_external
+ .get(p_base)
+ .cloned()
+ .unwrap_or_else(|| PathBuf::from(path))
+ .display()
+ .to_string()
+ };
+ out.push_str(&format!("+++ {external_display}\n"));
+ continue;
+ }
+ }
+ out.push_str(line);
+ out.push('\n');
+ }
+ out
+ }
+}
+
+fn uuid_filename_for(path: &Path) -> String {
+ let id = Uuid::new_v4().to_string();
+ match path.extension().and_then(|e| e.to_str()) {
+ Some(ext) if !ext.is_empty() => format!("{id}.{ext}"),
+ _ => id,
+ }
+}
+
+fn run_git_allow_exit_codes(
```
> From https://github.com/openai/codex/pull/1747 I would include:
>
> ```rust
> let envs = vec![
> ("GIT_CONFIG_GLOBAL", "/dev/null"),
> ("GIT_CONFIG_NOSYSTEM", "1"),
> ];
> ```
- Created: 2025-08-01 17:20:55 UTC | Link: https://github.com/openai/codex/pull/1770#discussion_r2248489326
```diff
@@ -0,0 +1,476 @@
+use std::collections::HashMap;
+use std::fs;
+use std::path::Path;
+use std::path::PathBuf;
+use std::process::Command;
+
+use anyhow::Context;
+use anyhow::Result;
+use tempfile::TempDir;
+use uuid::Uuid;
+
+use crate::protocol::FileChange;
+
+/// Tracks sets of changes to files and exposes the overall unified diff.
+/// Internally, the way this works is now:
+/// 1. Create a temp directory to store baseline snapshots of files when they are first seen.
+/// 2. When a path is first observed, copy its current contents into the baseline dir if it exists on disk.
+/// For new additions, do not create a baseline file so that diffs are shown as proper additions (using /dev/null).
+/// 3. Keep a stable internal filename (uuid + same extension) per external path for path rewrite in diffs.
+/// 4. To compute the aggregated unified diff, compare each baseline snapshot to the current file on disk using
+/// `git diff --no-index` and rewrite paths to external paths.
+#[derive(Default)]
+pub struct TurnDiffTracker {
+ /// Temp directory holding baseline snapshots of files as first seen.
+ baseline_files_dir: Option<TempDir>,
+ /// Map external path -> internal filename (uuid + same extension).
+ external_to_temp_name: HashMap<PathBuf, String>,
+ /// Internal filename -> external path as of baseline snapshot.
+ temp_name_to_baseline_external: HashMap<String, PathBuf>,
+ /// Internal filename -> external path as of current accumulated state (after applying all changes).
+ /// This is where renames are tracked.
+ temp_name_to_current_external: HashMap<String, PathBuf>,
+ /// Aggregated unified diff for all accumulated changes across files.
+ pub unified_diff: Option<String>,
+}
+
+impl TurnDiffTracker {
+ pub fn new() -> Self {
+ Self::default()
+ }
+
+ /// Front-run apply patch calls to track the starting contents of any modified files.
+ /// - Creates a baseline snapshot for files that already exist on disk when first seen.
+ /// - For additions, we intentionally do not create a baseline snapshot so that diffs are proper additions.
+ /// - Also updates internal mappings for move/rename events.
+ pub fn on_patch_begin(&mut self, changes: &HashMap<PathBuf, FileChange>) -> Result<()> {
+ self.ensure_baseline_dir()?;
+ let baseline_dir = self.baseline_dir()?.to_path_buf();
+
+ for (path, change) in changes.iter() {
+ // Ensure a stable internal filename exists for this external path.
+ if !self.external_to_temp_name.contains_key(path) {
+ let internal = uuid_filename_for(path);
+ self.external_to_temp_name
+ .insert(path.clone(), internal.clone());
+ self.temp_name_to_baseline_external
+ .insert(internal.clone(), path.clone());
+ self.temp_name_to_current_external
+ .insert(internal.clone(), path.clone());
+
+ // If the file exists on disk now, snapshot as baseline; else leave missing to represent /dev/null.
+ if path.exists() {
+ let contents = fs::read(path)
+ .with_context(|| format!("failed to read original {}", path.display()))?;
+ let internal_path = baseline_dir.join(&internal);
+ fs::write(&internal_path, contents).with_context(|| {
+ format!("failed to write baseline file {}", internal_path.display())
+ })?;
+ }
+ }
+
+ // Track rename/move in current mapping if provided in an Update.
+ let move_path = match change {
+ FileChange::Update {
+ move_path: Some(dest),
+ ..
+ } => Some(dest),
+ _ => None,
+ };
+ if let Some(dest) = move_path {
+ let uuid_filename = match self.external_to_temp_name.get(path) {
+ Some(i) => i.clone(),
+ None => {
+ // This should be rare, but if we haven't mapped the source, create it with no baseline.
+ let i = uuid_filename_for(path);
+ self.external_to_temp_name.insert(path.clone(), i.clone());
+ self.temp_name_to_baseline_external
+ .insert(i.clone(), path.clone());
+ i
+ }
+ };
+ // Update current external mapping for temp file name.
+ self.temp_name_to_current_external
+ .insert(uuid_filename.clone(), dest.clone());
+ // Update forward file_mapping: external current -> internal name.
+ self.external_to_temp_name.remove(path);
+ self.external_to_temp_name
+ .insert(dest.clone(), uuid_filename);
+ }
+ }
+
+ Ok(())
+ }
+
+ /// Recompute the aggregated unified diff by comparing all baseline snapshots against
```
> Can you expand this comment? I don't have a great mental model of the structure you're trying to set up for the ultimate `git diff` call.
>
> I want to understand why this isn't something simpler like `diff -u backed-up-file current-file`.
- Created: 2025-08-04 02:45:51 UTC | Link: https://github.com/openai/codex/pull/1770#discussion_r2250292032
```diff
@@ -0,0 +1,766 @@
+use std::collections::HashMap;
+use std::fs;
+use std::path::Path;
+use std::path::PathBuf;
+use std::process::Command;
+
+use anyhow::Context;
+use anyhow::Result;
+use anyhow::anyhow;
+use uuid::Uuid;
+
+use crate::protocol::FileChange;
+
+struct BaselineFileInfo {
+ path: Option<PathBuf>,
+ contents_bytes: Option<Vec<u8>>,
+ mode: Option<String>,
+ oid: Option<String>,
+}
+
+/// Tracks sets of changes to files and exposes the overall unified diff.
+/// Internally, the way this works is now:
+/// 1. Maintain an in-memory baseline snapshot of files when they are first seen.
+/// For new additions, do not create a baseline so that diffs are shown as proper additions (using /dev/null).
+/// 2. Keep a stable internal filename (uuid + same extension) per external path for rename tracking.
+/// 3. To compute the aggregated unified diff, compare each baseline snapshot to the current file on disk entirely in-memory
+/// using the `similar` crate and emit unified diffs with rewritten external paths.
+#[derive(Default)]
+pub struct TurnDiffTracker {
+ /// Map external path -> internal filename (uuid + same extension).
+ external_to_temp_name: HashMap<PathBuf, String>,
+ /// Internal filename -> baseline file info.
+ baseline_file_info: HashMap<String, BaselineFileInfo>,
+ /// Internal filename -> external path as of current accumulated state (after applying all changes).
+ /// This is where renames are tracked.
+ temp_name_to_current_path: HashMap<String, PathBuf>,
+ /// Cache of known git worktree roots to avoid repeated filesystem walks.
+ git_root_cache: Vec<PathBuf>,
+}
+
+impl TurnDiffTracker {
+ pub fn new() -> Self {
+ Self::default()
+ }
+
+ /// Front-run apply patch calls to track the starting contents of any modified files.
+ /// - Creates an in-memory baseline snapshot for files that already exist on disk when first seen.
+ /// - For additions, we intentionally do not create a baseline snapshot so that diffs are proper additions.
+ /// - Also updates internal mappings for move/rename events.
+ pub fn on_patch_begin(&mut self, changes: &HashMap<PathBuf, FileChange>) -> Result<()> {
+ for (path, change) in changes.iter() {
+ // Ensure a stable internal filename exists for this external path.
+ if !self.external_to_temp_name.contains_key(path) {
+ let internal = uuid_filename_for(path);
+ self.external_to_temp_name
+ .insert(path.clone(), internal.clone());
+ self.temp_name_to_current_path
+ .insert(internal.clone(), path.clone());
+
+ // If the file exists on disk now, snapshot as baseline; else leave missing to represent /dev/null.
+ let (contents_bytes, mode, oid) = if path.exists() {
+ let mode = file_mode_for_path(path);
+ let mode_str = mode.as_deref().unwrap_or(REGULAR_MODE);
+ let contents_bytes = blob_bytes(path, mode_str)
+ .unwrap_or_default()
+ .unwrap_or_default();
+ let oid = if mode.as_deref() == Some(SYMLINK_MODE) {
+ git_blob_sha1_hex_bytes(&contents_bytes)
+ } else {
+ self.git_blob_oid_for_path(path)
+ .unwrap_or_else(|| git_blob_sha1_hex_bytes(&contents_bytes))
+ };
+ (Some(contents_bytes), mode, Some(oid))
+ } else {
+ (None, None, Some(ZERO_OID.to_string()))
+ };
+
+ self.baseline_file_info.insert(
+ internal.clone(),
+ BaselineFileInfo {
+ path: Some(path.clone()),
+ contents_bytes,
+ mode,
+ oid,
+ },
+ );
+ }
+
+ // Track rename/move in current mapping if provided in an Update.
+ if let FileChange::Update {
+ move_path: Some(dest),
+ ..
+ } = change
+ {
+ let uuid_filename = match self.external_to_temp_name.get(path) {
+ Some(i) => i.clone(),
+ None => {
+ // This should be rare, but if we haven't mapped the source, create it with no baseline.
+ let i = uuid_filename_for(path);
+ self.baseline_file_info.insert(
+ i.clone(),
+ BaselineFileInfo {
+ path: Some(path.clone()),
+ contents_bytes: None,
+ mode: None,
+ oid: Some(ZERO_OID.to_string()),
+ },
+ );
+ i
+ }
+ };
+ // Update current external mapping for temp file name.
+ self.temp_name_to_current_path
+ .insert(uuid_filename.clone(), dest.clone());
+ // Update forward file_mapping: external current -> internal name.
+ self.external_to_temp_name.remove(path);
+ self.external_to_temp_name
+ .insert(dest.clone(), uuid_filename);
+ };
+ }
+
+ Ok(())
```
> I don't see a `?` or a place where the `Err` variant is constructed, so does this need to return `Result`?
- Created: 2025-08-04 02:57:10 UTC | Link: https://github.com/openai/codex/pull/1770#discussion_r2250301151
```diff
@@ -0,0 +1,766 @@
+use std::collections::HashMap;
+use std::fs;
+use std::path::Path;
+use std::path::PathBuf;
+use std::process::Command;
+
+use anyhow::Context;
+use anyhow::Result;
+use anyhow::anyhow;
+use uuid::Uuid;
+
+use crate::protocol::FileChange;
+
+struct BaselineFileInfo {
+ path: Option<PathBuf>,
+ contents_bytes: Option<Vec<u8>>,
```
> I think the field name `file_contents` implies bytes given the type.
- Created: 2025-08-04 02:57:49 UTC | Link: https://github.com/openai/codex/pull/1770#discussion_r2250301670
```diff
@@ -0,0 +1,766 @@
+use std::collections::HashMap;
+use std::fs;
+use std::path::Path;
+use std::path::PathBuf;
+use std::process::Command;
+
+use anyhow::Context;
+use anyhow::Result;
+use anyhow::anyhow;
+use uuid::Uuid;
+
+use crate::protocol::FileChange;
+
+struct BaselineFileInfo {
+ path: Option<PathBuf>,
+ contents_bytes: Option<Vec<u8>>,
+ mode: Option<String>,
```
> I think an `enum` instead of a `String` would be clearer here.
- Created: 2025-08-04 02:58:20 UTC | Link: https://github.com/openai/codex/pull/1770#discussion_r2250302031
```diff
@@ -0,0 +1,766 @@
+use std::collections::HashMap;
+use std::fs;
+use std::path::Path;
+use std::path::PathBuf;
+use std::process::Command;
+
+use anyhow::Context;
+use anyhow::Result;
+use anyhow::anyhow;
+use uuid::Uuid;
+
+use crate::protocol::FileChange;
+
+struct BaselineFileInfo {
+ path: Option<PathBuf>,
```
> Add a comment since it is surprising that `path` could be `None`?
- Created: 2025-08-04 02:59:46 UTC | Link: https://github.com/openai/codex/pull/1770#discussion_r2250303137
```diff
@@ -0,0 +1,766 @@
+use std::collections::HashMap;
+use std::fs;
+use std::path::Path;
+use std::path::PathBuf;
+use std::process::Command;
+
+use anyhow::Context;
+use anyhow::Result;
+use anyhow::anyhow;
+use uuid::Uuid;
+
+use crate::protocol::FileChange;
+
+struct BaselineFileInfo {
+ path: Option<PathBuf>,
+ contents_bytes: Option<Vec<u8>>,
+ mode: Option<String>,
+ oid: Option<String>,
+}
+
+/// Tracks sets of changes to files and exposes the overall unified diff.
+/// Internally, the way this works is now:
+/// 1. Maintain an in-memory baseline snapshot of files when they are first seen.
+/// For new additions, do not create a baseline so that diffs are shown as proper additions (using /dev/null).
+/// 2. Keep a stable internal filename (uuid + same extension) per external path for rename tracking.
+/// 3. To compute the aggregated unified diff, compare each baseline snapshot to the current file on disk entirely in-memory
+/// using the `similar` crate and emit unified diffs with rewritten external paths.
+#[derive(Default)]
+pub struct TurnDiffTracker {
+ /// Map external path -> internal filename (uuid + same extension).
+ external_to_temp_name: HashMap<PathBuf, String>,
+ /// Internal filename -> baseline file info.
+ baseline_file_info: HashMap<String, BaselineFileInfo>,
+ /// Internal filename -> external path as of current accumulated state (after applying all changes).
+ /// This is where renames are tracked.
+ temp_name_to_current_path: HashMap<String, PathBuf>,
+ /// Cache of known git worktree roots to avoid repeated filesystem walks.
+ git_root_cache: Vec<PathBuf>,
+}
+
+impl TurnDiffTracker {
+ pub fn new() -> Self {
+ Self::default()
+ }
+
+ /// Front-run apply patch calls to track the starting contents of any modified files.
+ /// - Creates an in-memory baseline snapshot for files that already exist on disk when first seen.
+ /// - For additions, we intentionally do not create a baseline snapshot so that diffs are proper additions.
+ /// - Also updates internal mappings for move/rename events.
+ pub fn on_patch_begin(&mut self, changes: &HashMap<PathBuf, FileChange>) -> Result<()> {
+ for (path, change) in changes.iter() {
+ // Ensure a stable internal filename exists for this external path.
+ if !self.external_to_temp_name.contains_key(path) {
+ let internal = uuid_filename_for(path);
+ self.external_to_temp_name
+ .insert(path.clone(), internal.clone());
+ self.temp_name_to_current_path
+ .insert(internal.clone(), path.clone());
+
+ // If the file exists on disk now, snapshot as baseline; else leave missing to represent /dev/null.
+ let (contents_bytes, mode, oid) = if path.exists() {
+ let mode = file_mode_for_path(path);
+ let mode_str = mode.as_deref().unwrap_or(REGULAR_MODE);
```
> I `mode` becomes an `enum`, using `match` would be cleaner here.
- Created: 2025-08-04 03:00:33 UTC | Link: https://github.com/openai/codex/pull/1770#discussion_r2250303694
```diff
@@ -0,0 +1,766 @@
+use std::collections::HashMap;
+use std::fs;
+use std::path::Path;
+use std::path::PathBuf;
+use std::process::Command;
+
+use anyhow::Context;
+use anyhow::Result;
+use anyhow::anyhow;
+use uuid::Uuid;
+
+use crate::protocol::FileChange;
+
+struct BaselineFileInfo {
+ path: Option<PathBuf>,
```
> I don't see a case where it is `None` in the code, but maybe I'm missing something?
- Created: 2025-08-04 03:03:14 UTC | Link: https://github.com/openai/codex/pull/1770#discussion_r2250305839
```diff
@@ -0,0 +1,766 @@
+use std::collections::HashMap;
+use std::fs;
+use std::path::Path;
+use std::path::PathBuf;
+use std::process::Command;
+
+use anyhow::Context;
+use anyhow::Result;
+use anyhow::anyhow;
+use uuid::Uuid;
+
+use crate::protocol::FileChange;
+
+struct BaselineFileInfo {
+ path: Option<PathBuf>,
+ contents_bytes: Option<Vec<u8>>,
+ mode: Option<String>,
+ oid: Option<String>,
+}
+
+/// Tracks sets of changes to files and exposes the overall unified diff.
+/// Internally, the way this works is now:
+/// 1. Maintain an in-memory baseline snapshot of files when they are first seen.
+/// For new additions, do not create a baseline so that diffs are shown as proper additions (using /dev/null).
+/// 2. Keep a stable internal filename (uuid + same extension) per external path for rename tracking.
+/// 3. To compute the aggregated unified diff, compare each baseline snapshot to the current file on disk entirely in-memory
+/// using the `similar` crate and emit unified diffs with rewritten external paths.
+#[derive(Default)]
+pub struct TurnDiffTracker {
+ /// Map external path -> internal filename (uuid + same extension).
+ external_to_temp_name: HashMap<PathBuf, String>,
+ /// Internal filename -> baseline file info.
+ baseline_file_info: HashMap<String, BaselineFileInfo>,
+ /// Internal filename -> external path as of current accumulated state (after applying all changes).
+ /// This is where renames are tracked.
+ temp_name_to_current_path: HashMap<String, PathBuf>,
+ /// Cache of known git worktree roots to avoid repeated filesystem walks.
+ git_root_cache: Vec<PathBuf>,
+}
+
+impl TurnDiffTracker {
+ pub fn new() -> Self {
+ Self::default()
+ }
+
+ /// Front-run apply patch calls to track the starting contents of any modified files.
+ /// - Creates an in-memory baseline snapshot for files that already exist on disk when first seen.
+ /// - For additions, we intentionally do not create a baseline snapshot so that diffs are proper additions.
+ /// - Also updates internal mappings for move/rename events.
+ pub fn on_patch_begin(&mut self, changes: &HashMap<PathBuf, FileChange>) -> Result<()> {
+ for (path, change) in changes.iter() {
+ // Ensure a stable internal filename exists for this external path.
+ if !self.external_to_temp_name.contains_key(path) {
+ let internal = uuid_filename_for(path);
+ self.external_to_temp_name
+ .insert(path.clone(), internal.clone());
+ self.temp_name_to_current_path
+ .insert(internal.clone(), path.clone());
+
+ // If the file exists on disk now, snapshot as baseline; else leave missing to represent /dev/null.
+ let (contents_bytes, mode, oid) = if path.exists() {
+ let mode = file_mode_for_path(path);
+ let mode_str = mode.as_deref().unwrap_or(REGULAR_MODE);
+ let contents_bytes = blob_bytes(path, mode_str)
+ .unwrap_or_default()
+ .unwrap_or_default();
+ let oid = if mode.as_deref() == Some(SYMLINK_MODE) {
+ git_blob_sha1_hex_bytes(&contents_bytes)
+ } else {
+ self.git_blob_oid_for_path(path)
+ .unwrap_or_else(|| git_blob_sha1_hex_bytes(&contents_bytes))
+ };
+ (Some(contents_bytes), mode, Some(oid))
+ } else {
+ (None, None, Some(ZERO_OID.to_string()))
+ };
+
+ self.baseline_file_info.insert(
+ internal.clone(),
+ BaselineFileInfo {
+ path: Some(path.clone()),
+ contents_bytes,
+ mode,
+ oid,
+ },
+ );
+ }
+
+ // Track rename/move in current mapping if provided in an Update.
+ if let FileChange::Update {
+ move_path: Some(dest),
+ ..
+ } = change
+ {
+ let uuid_filename = match self.external_to_temp_name.get(path) {
+ Some(i) => i.clone(),
+ None => {
+ // This should be rare, but if we haven't mapped the source, create it with no baseline.
+ let i = uuid_filename_for(path);
+ self.baseline_file_info.insert(
+ i.clone(),
+ BaselineFileInfo {
+ path: Some(path.clone()),
+ contents_bytes: None,
+ mode: None,
+ oid: Some(ZERO_OID.to_string()),
+ },
+ );
+ i
+ }
+ };
+ // Update current external mapping for temp file name.
+ self.temp_name_to_current_path
+ .insert(uuid_filename.clone(), dest.clone());
+ // Update forward file_mapping: external current -> internal name.
+ self.external_to_temp_name.remove(path);
+ self.external_to_temp_name
+ .insert(dest.clone(), uuid_filename);
+ };
+ }
+
+ Ok(())
+ }
+
+ fn get_path_for_internal(&self, internal: &str) -> Option<PathBuf> {
+ self.temp_name_to_current_path
+ .get(internal)
+ .cloned()
+ .or_else(|| {
+ self.baseline_file_info
+ .get(internal)
+ .and_then(|info| info.path.clone())
+ })
+ }
+
+ /// Find the git worktree root for a file/directory by walking up to the first ancestor containing a `.git` entry.
+ /// Uses a simple cache of known roots and avoids negative-result caching for simplicity.
+ fn find_git_root_cached(&mut self, start: &Path) -> Option<PathBuf> {
+ let dir = if start.is_dir() {
+ start
+ } else {
+ start.parent()?
+ };
+
+ // Fast path: if any cached root is an ancestor of this path, use it.
+ if let Some(root) = self
+ .git_root_cache
+ .iter()
+ .find(|r| dir.starts_with(r))
+ .cloned()
+ {
+ return Some(root);
+ }
+
+ // Walk up to find a `.git` marker.
+ let mut cur = dir.to_path_buf();
+ loop {
+ let git_marker = cur.join(".git");
+ if git_marker.is_dir() || git_marker.is_file() {
+ if !self.git_root_cache.iter().any(|r| r == &cur) {
+ self.git_root_cache.push(cur.clone());
+ }
+ return Some(cur);
+ }
+
+ // On Windows, avoid walking above the drive or UNC share root.
+ #[cfg(windows)]
+ {
+ if is_windows_drive_or_unc_root(&cur) {
+ return None;
+ }
+ }
+
+ if let Some(parent) = cur.parent() {
+ cur = parent.to_path_buf();
+ } else {
+ return None;
+ }
+ }
+ }
+
+ /// Return a display string for `path` relative to its git root if found, else absolute.
+ fn relative_to_git_root_str(&mut self, path: &Path) -> String {
+ let s = if let Some(root) = self.find_git_root_cached(path) {
+ if let Ok(rel) = path.strip_prefix(&root) {
+ rel.display().to_string()
+ } else {
+ path.display().to_string()
+ }
+ } else {
+ path.display().to_string()
+ };
+ s.replace('\\', "/")
+ }
+
+ /// Ask git to compute the blob SHA-1 for the file at `path` within its repository.
+ /// Returns None if no repository is found or git invocation fails.
+ fn git_blob_oid_for_path(&mut self, path: &Path) -> Option<String> {
+ let root = self.find_git_root_cached(path)?;
+ // Compute a path relative to the repo root for better portability across platforms.
+ let rel = path.strip_prefix(&root).unwrap_or(path);
+ let output = Command::new("git")
+ .arg("-C")
+ .arg(&root)
+ .arg("hash-object")
+ .arg("--")
+ .arg(rel)
+ .output()
+ .ok()?;
+ if !output.status.success() {
+ return None;
+ }
+ let s = String::from_utf8_lossy(&output.stdout).trim().to_string();
+ if s.len() == 40 { Some(s) } else { None }
```
> If it is not 40, is this an error / unexpected situation?
- Created: 2025-08-04 03:03:31 UTC | Link: https://github.com/openai/codex/pull/1770#discussion_r2250306088
```diff
@@ -0,0 +1,766 @@
+use std::collections::HashMap;
+use std::fs;
+use std::path::Path;
+use std::path::PathBuf;
+use std::process::Command;
+
+use anyhow::Context;
+use anyhow::Result;
+use anyhow::anyhow;
+use uuid::Uuid;
+
+use crate::protocol::FileChange;
+
+struct BaselineFileInfo {
+ path: Option<PathBuf>,
+ contents_bytes: Option<Vec<u8>>,
+ mode: Option<String>,
+ oid: Option<String>,
+}
+
+/// Tracks sets of changes to files and exposes the overall unified diff.
+/// Internally, the way this works is now:
+/// 1. Maintain an in-memory baseline snapshot of files when they are first seen.
+/// For new additions, do not create a baseline so that diffs are shown as proper additions (using /dev/null).
+/// 2. Keep a stable internal filename (uuid + same extension) per external path for rename tracking.
+/// 3. To compute the aggregated unified diff, compare each baseline snapshot to the current file on disk entirely in-memory
+/// using the `similar` crate and emit unified diffs with rewritten external paths.
+#[derive(Default)]
+pub struct TurnDiffTracker {
+ /// Map external path -> internal filename (uuid + same extension).
+ external_to_temp_name: HashMap<PathBuf, String>,
+ /// Internal filename -> baseline file info.
+ baseline_file_info: HashMap<String, BaselineFileInfo>,
+ /// Internal filename -> external path as of current accumulated state (after applying all changes).
+ /// This is where renames are tracked.
+ temp_name_to_current_path: HashMap<String, PathBuf>,
+ /// Cache of known git worktree roots to avoid repeated filesystem walks.
+ git_root_cache: Vec<PathBuf>,
+}
+
+impl TurnDiffTracker {
+ pub fn new() -> Self {
+ Self::default()
+ }
+
+ /// Front-run apply patch calls to track the starting contents of any modified files.
+ /// - Creates an in-memory baseline snapshot for files that already exist on disk when first seen.
+ /// - For additions, we intentionally do not create a baseline snapshot so that diffs are proper additions.
+ /// - Also updates internal mappings for move/rename events.
+ pub fn on_patch_begin(&mut self, changes: &HashMap<PathBuf, FileChange>) -> Result<()> {
+ for (path, change) in changes.iter() {
+ // Ensure a stable internal filename exists for this external path.
+ if !self.external_to_temp_name.contains_key(path) {
+ let internal = uuid_filename_for(path);
+ self.external_to_temp_name
+ .insert(path.clone(), internal.clone());
+ self.temp_name_to_current_path
+ .insert(internal.clone(), path.clone());
+
+ // If the file exists on disk now, snapshot as baseline; else leave missing to represent /dev/null.
+ let (contents_bytes, mode, oid) = if path.exists() {
+ let mode = file_mode_for_path(path);
+ let mode_str = mode.as_deref().unwrap_or(REGULAR_MODE);
+ let contents_bytes = blob_bytes(path, mode_str)
+ .unwrap_or_default()
+ .unwrap_or_default();
+ let oid = if mode.as_deref() == Some(SYMLINK_MODE) {
+ git_blob_sha1_hex_bytes(&contents_bytes)
+ } else {
+ self.git_blob_oid_for_path(path)
+ .unwrap_or_else(|| git_blob_sha1_hex_bytes(&contents_bytes))
+ };
+ (Some(contents_bytes), mode, Some(oid))
+ } else {
+ (None, None, Some(ZERO_OID.to_string()))
+ };
+
+ self.baseline_file_info.insert(
+ internal.clone(),
+ BaselineFileInfo {
+ path: Some(path.clone()),
+ contents_bytes,
+ mode,
+ oid,
+ },
+ );
+ }
+
+ // Track rename/move in current mapping if provided in an Update.
+ if let FileChange::Update {
+ move_path: Some(dest),
+ ..
+ } = change
+ {
+ let uuid_filename = match self.external_to_temp_name.get(path) {
+ Some(i) => i.clone(),
+ None => {
+ // This should be rare, but if we haven't mapped the source, create it with no baseline.
+ let i = uuid_filename_for(path);
+ self.baseline_file_info.insert(
+ i.clone(),
+ BaselineFileInfo {
+ path: Some(path.clone()),
+ contents_bytes: None,
+ mode: None,
+ oid: Some(ZERO_OID.to_string()),
+ },
+ );
+ i
+ }
+ };
+ // Update current external mapping for temp file name.
+ self.temp_name_to_current_path
+ .insert(uuid_filename.clone(), dest.clone());
+ // Update forward file_mapping: external current -> internal name.
+ self.external_to_temp_name.remove(path);
+ self.external_to_temp_name
+ .insert(dest.clone(), uuid_filename);
+ };
+ }
+
+ Ok(())
+ }
+
+ fn get_path_for_internal(&self, internal: &str) -> Option<PathBuf> {
+ self.temp_name_to_current_path
+ .get(internal)
+ .cloned()
+ .or_else(|| {
+ self.baseline_file_info
+ .get(internal)
+ .and_then(|info| info.path.clone())
+ })
+ }
+
+ /// Find the git worktree root for a file/directory by walking up to the first ancestor containing a `.git` entry.
+ /// Uses a simple cache of known roots and avoids negative-result caching for simplicity.
+ fn find_git_root_cached(&mut self, start: &Path) -> Option<PathBuf> {
+ let dir = if start.is_dir() {
+ start
+ } else {
+ start.parent()?
+ };
+
+ // Fast path: if any cached root is an ancestor of this path, use it.
+ if let Some(root) = self
+ .git_root_cache
+ .iter()
+ .find(|r| dir.starts_with(r))
+ .cloned()
+ {
+ return Some(root);
+ }
+
+ // Walk up to find a `.git` marker.
+ let mut cur = dir.to_path_buf();
+ loop {
+ let git_marker = cur.join(".git");
+ if git_marker.is_dir() || git_marker.is_file() {
+ if !self.git_root_cache.iter().any(|r| r == &cur) {
+ self.git_root_cache.push(cur.clone());
+ }
+ return Some(cur);
+ }
+
+ // On Windows, avoid walking above the drive or UNC share root.
+ #[cfg(windows)]
+ {
+ if is_windows_drive_or_unc_root(&cur) {
+ return None;
+ }
+ }
+
+ if let Some(parent) = cur.parent() {
+ cur = parent.to_path_buf();
+ } else {
+ return None;
+ }
+ }
+ }
+
+ /// Return a display string for `path` relative to its git root if found, else absolute.
+ fn relative_to_git_root_str(&mut self, path: &Path) -> String {
+ let s = if let Some(root) = self.find_git_root_cached(path) {
+ if let Ok(rel) = path.strip_prefix(&root) {
+ rel.display().to_string()
+ } else {
+ path.display().to_string()
+ }
+ } else {
+ path.display().to_string()
+ };
+ s.replace('\\', "/")
+ }
+
+ /// Ask git to compute the blob SHA-1 for the file at `path` within its repository.
+ /// Returns None if no repository is found or git invocation fails.
+ fn git_blob_oid_for_path(&mut self, path: &Path) -> Option<String> {
+ let root = self.find_git_root_cached(path)?;
+ // Compute a path relative to the repo root for better portability across platforms.
+ let rel = path.strip_prefix(&root).unwrap_or(path);
+ let output = Command::new("git")
+ .arg("-C")
+ .arg(&root)
+ .arg("hash-object")
+ .arg("--")
+ .arg(rel)
+ .output()
+ .ok()?;
+ if !output.status.success() {
```
> Is this something expected or exceptional?
- Created: 2025-08-04 03:03:57 UTC | Link: https://github.com/openai/codex/pull/1770#discussion_r2250306484
```diff
@@ -0,0 +1,766 @@
+use std::collections::HashMap;
+use std::fs;
+use std::path::Path;
+use std::path::PathBuf;
+use std::process::Command;
+
+use anyhow::Context;
+use anyhow::Result;
+use anyhow::anyhow;
+use uuid::Uuid;
+
+use crate::protocol::FileChange;
+
+struct BaselineFileInfo {
+ path: Option<PathBuf>,
+ contents_bytes: Option<Vec<u8>>,
+ mode: Option<String>,
+ oid: Option<String>,
+}
+
+/// Tracks sets of changes to files and exposes the overall unified diff.
+/// Internally, the way this works is now:
+/// 1. Maintain an in-memory baseline snapshot of files when they are first seen.
+/// For new additions, do not create a baseline so that diffs are shown as proper additions (using /dev/null).
+/// 2. Keep a stable internal filename (uuid + same extension) per external path for rename tracking.
+/// 3. To compute the aggregated unified diff, compare each baseline snapshot to the current file on disk entirely in-memory
+/// using the `similar` crate and emit unified diffs with rewritten external paths.
+#[derive(Default)]
+pub struct TurnDiffTracker {
+ /// Map external path -> internal filename (uuid + same extension).
+ external_to_temp_name: HashMap<PathBuf, String>,
+ /// Internal filename -> baseline file info.
+ baseline_file_info: HashMap<String, BaselineFileInfo>,
+ /// Internal filename -> external path as of current accumulated state (after applying all changes).
+ /// This is where renames are tracked.
+ temp_name_to_current_path: HashMap<String, PathBuf>,
+ /// Cache of known git worktree roots to avoid repeated filesystem walks.
+ git_root_cache: Vec<PathBuf>,
+}
+
+impl TurnDiffTracker {
+ pub fn new() -> Self {
+ Self::default()
+ }
+
+ /// Front-run apply patch calls to track the starting contents of any modified files.
+ /// - Creates an in-memory baseline snapshot for files that already exist on disk when first seen.
+ /// - For additions, we intentionally do not create a baseline snapshot so that diffs are proper additions.
+ /// - Also updates internal mappings for move/rename events.
+ pub fn on_patch_begin(&mut self, changes: &HashMap<PathBuf, FileChange>) -> Result<()> {
+ for (path, change) in changes.iter() {
+ // Ensure a stable internal filename exists for this external path.
+ if !self.external_to_temp_name.contains_key(path) {
+ let internal = uuid_filename_for(path);
+ self.external_to_temp_name
+ .insert(path.clone(), internal.clone());
+ self.temp_name_to_current_path
+ .insert(internal.clone(), path.clone());
+
+ // If the file exists on disk now, snapshot as baseline; else leave missing to represent /dev/null.
+ let (contents_bytes, mode, oid) = if path.exists() {
+ let mode = file_mode_for_path(path);
+ let mode_str = mode.as_deref().unwrap_or(REGULAR_MODE);
+ let contents_bytes = blob_bytes(path, mode_str)
+ .unwrap_or_default()
+ .unwrap_or_default();
+ let oid = if mode.as_deref() == Some(SYMLINK_MODE) {
+ git_blob_sha1_hex_bytes(&contents_bytes)
+ } else {
+ self.git_blob_oid_for_path(path)
+ .unwrap_or_else(|| git_blob_sha1_hex_bytes(&contents_bytes))
+ };
+ (Some(contents_bytes), mode, Some(oid))
+ } else {
+ (None, None, Some(ZERO_OID.to_string()))
+ };
+
+ self.baseline_file_info.insert(
+ internal.clone(),
+ BaselineFileInfo {
+ path: Some(path.clone()),
+ contents_bytes,
+ mode,
+ oid,
+ },
+ );
+ }
+
+ // Track rename/move in current mapping if provided in an Update.
+ if let FileChange::Update {
+ move_path: Some(dest),
+ ..
+ } = change
+ {
+ let uuid_filename = match self.external_to_temp_name.get(path) {
+ Some(i) => i.clone(),
+ None => {
+ // This should be rare, but if we haven't mapped the source, create it with no baseline.
+ let i = uuid_filename_for(path);
+ self.baseline_file_info.insert(
+ i.clone(),
+ BaselineFileInfo {
+ path: Some(path.clone()),
+ contents_bytes: None,
+ mode: None,
+ oid: Some(ZERO_OID.to_string()),
+ },
+ );
+ i
+ }
+ };
+ // Update current external mapping for temp file name.
+ self.temp_name_to_current_path
+ .insert(uuid_filename.clone(), dest.clone());
+ // Update forward file_mapping: external current -> internal name.
+ self.external_to_temp_name.remove(path);
+ self.external_to_temp_name
+ .insert(dest.clone(), uuid_filename);
+ };
+ }
+
+ Ok(())
+ }
+
+ fn get_path_for_internal(&self, internal: &str) -> Option<PathBuf> {
+ self.temp_name_to_current_path
+ .get(internal)
+ .cloned()
+ .or_else(|| {
+ self.baseline_file_info
+ .get(internal)
+ .and_then(|info| info.path.clone())
+ })
+ }
+
+ /// Find the git worktree root for a file/directory by walking up to the first ancestor containing a `.git` entry.
+ /// Uses a simple cache of known roots and avoids negative-result caching for simplicity.
+ fn find_git_root_cached(&mut self, start: &Path) -> Option<PathBuf> {
+ let dir = if start.is_dir() {
+ start
+ } else {
+ start.parent()?
+ };
+
+ // Fast path: if any cached root is an ancestor of this path, use it.
+ if let Some(root) = self
+ .git_root_cache
+ .iter()
+ .find(|r| dir.starts_with(r))
+ .cloned()
+ {
+ return Some(root);
+ }
+
+ // Walk up to find a `.git` marker.
+ let mut cur = dir.to_path_buf();
+ loop {
+ let git_marker = cur.join(".git");
+ if git_marker.is_dir() || git_marker.is_file() {
+ if !self.git_root_cache.iter().any(|r| r == &cur) {
+ self.git_root_cache.push(cur.clone());
+ }
+ return Some(cur);
+ }
+
+ // On Windows, avoid walking above the drive or UNC share root.
+ #[cfg(windows)]
+ {
+ if is_windows_drive_or_unc_root(&cur) {
+ return None;
+ }
+ }
+
+ if let Some(parent) = cur.parent() {
+ cur = parent.to_path_buf();
+ } else {
+ return None;
+ }
+ }
+ }
+
+ /// Return a display string for `path` relative to its git root if found, else absolute.
+ fn relative_to_git_root_str(&mut self, path: &Path) -> String {
+ let s = if let Some(root) = self.find_git_root_cached(path) {
+ if let Ok(rel) = path.strip_prefix(&root) {
+ rel.display().to_string()
+ } else {
+ path.display().to_string()
+ }
+ } else {
+ path.display().to_string()
+ };
+ s.replace('\\', "/")
+ }
+
+ /// Ask git to compute the blob SHA-1 for the file at `path` within its repository.
+ /// Returns None if no repository is found or git invocation fails.
+ fn git_blob_oid_for_path(&mut self, path: &Path) -> Option<String> {
+ let root = self.find_git_root_cached(path)?;
+ // Compute a path relative to the repo root for better portability across platforms.
+ let rel = path.strip_prefix(&root).unwrap_or(path);
+ let output = Command::new("git")
+ .arg("-C")
+ .arg(&root)
+ .arg("hash-object")
+ .arg("--")
+ .arg(rel)
+ .output()
+ .ok()?;
+ if !output.status.success() {
+ return None;
+ }
+ let s = String::from_utf8_lossy(&output.stdout).trim().to_string();
+ if s.len() == 40 { Some(s) } else { None }
+ }
+
+ /// Recompute the aggregated unified diff by comparing all of the in-memory snapshots that were
+ /// collected before the first time they were touched by apply_patch during this turn with
+ /// the current repo state.
+ pub fn get_unified_diff(&mut self) -> Result<Option<String>> {
+ let mut aggregated = String::new();
+
+ // Compute diffs per tracked internal file in a stable order by external path.
+ let mut baseline_file_names: Vec<String> =
+ self.baseline_file_info.keys().cloned().collect();
+ // Sort lexicographically by full repo-relative path to match git behavior.
+ baseline_file_names.sort_by_key(|internal| {
```
> 👍
- Created: 2025-08-04 03:05:24 UTC | Link: https://github.com/openai/codex/pull/1770#discussion_r2250307567
```diff
@@ -0,0 +1,766 @@
+use std::collections::HashMap;
+use std::fs;
+use std::path::Path;
+use std::path::PathBuf;
+use std::process::Command;
+
+use anyhow::Context;
+use anyhow::Result;
+use anyhow::anyhow;
+use uuid::Uuid;
+
+use crate::protocol::FileChange;
+
+struct BaselineFileInfo {
+ path: Option<PathBuf>,
+ contents_bytes: Option<Vec<u8>>,
+ mode: Option<String>,
+ oid: Option<String>,
+}
+
+/// Tracks sets of changes to files and exposes the overall unified diff.
+/// Internally, the way this works is now:
+/// 1. Maintain an in-memory baseline snapshot of files when they are first seen.
+/// For new additions, do not create a baseline so that diffs are shown as proper additions (using /dev/null).
+/// 2. Keep a stable internal filename (uuid + same extension) per external path for rename tracking.
+/// 3. To compute the aggregated unified diff, compare each baseline snapshot to the current file on disk entirely in-memory
+/// using the `similar` crate and emit unified diffs with rewritten external paths.
+#[derive(Default)]
+pub struct TurnDiffTracker {
+ /// Map external path -> internal filename (uuid + same extension).
+ external_to_temp_name: HashMap<PathBuf, String>,
+ /// Internal filename -> baseline file info.
+ baseline_file_info: HashMap<String, BaselineFileInfo>,
+ /// Internal filename -> external path as of current accumulated state (after applying all changes).
+ /// This is where renames are tracked.
+ temp_name_to_current_path: HashMap<String, PathBuf>,
+ /// Cache of known git worktree roots to avoid repeated filesystem walks.
+ git_root_cache: Vec<PathBuf>,
+}
+
+impl TurnDiffTracker {
+ pub fn new() -> Self {
+ Self::default()
+ }
+
+ /// Front-run apply patch calls to track the starting contents of any modified files.
+ /// - Creates an in-memory baseline snapshot for files that already exist on disk when first seen.
+ /// - For additions, we intentionally do not create a baseline snapshot so that diffs are proper additions.
+ /// - Also updates internal mappings for move/rename events.
+ pub fn on_patch_begin(&mut self, changes: &HashMap<PathBuf, FileChange>) -> Result<()> {
+ for (path, change) in changes.iter() {
+ // Ensure a stable internal filename exists for this external path.
+ if !self.external_to_temp_name.contains_key(path) {
+ let internal = uuid_filename_for(path);
+ self.external_to_temp_name
+ .insert(path.clone(), internal.clone());
+ self.temp_name_to_current_path
+ .insert(internal.clone(), path.clone());
+
+ // If the file exists on disk now, snapshot as baseline; else leave missing to represent /dev/null.
+ let (contents_bytes, mode, oid) = if path.exists() {
+ let mode = file_mode_for_path(path);
+ let mode_str = mode.as_deref().unwrap_or(REGULAR_MODE);
+ let contents_bytes = blob_bytes(path, mode_str)
+ .unwrap_or_default()
+ .unwrap_or_default();
+ let oid = if mode.as_deref() == Some(SYMLINK_MODE) {
+ git_blob_sha1_hex_bytes(&contents_bytes)
+ } else {
+ self.git_blob_oid_for_path(path)
+ .unwrap_or_else(|| git_blob_sha1_hex_bytes(&contents_bytes))
+ };
+ (Some(contents_bytes), mode, Some(oid))
+ } else {
+ (None, None, Some(ZERO_OID.to_string()))
+ };
+
+ self.baseline_file_info.insert(
+ internal.clone(),
+ BaselineFileInfo {
+ path: Some(path.clone()),
+ contents_bytes,
+ mode,
+ oid,
+ },
+ );
+ }
+
+ // Track rename/move in current mapping if provided in an Update.
+ if let FileChange::Update {
+ move_path: Some(dest),
+ ..
+ } = change
+ {
+ let uuid_filename = match self.external_to_temp_name.get(path) {
+ Some(i) => i.clone(),
+ None => {
+ // This should be rare, but if we haven't mapped the source, create it with no baseline.
+ let i = uuid_filename_for(path);
+ self.baseline_file_info.insert(
+ i.clone(),
+ BaselineFileInfo {
+ path: Some(path.clone()),
+ contents_bytes: None,
+ mode: None,
+ oid: Some(ZERO_OID.to_string()),
+ },
+ );
+ i
+ }
+ };
+ // Update current external mapping for temp file name.
+ self.temp_name_to_current_path
+ .insert(uuid_filename.clone(), dest.clone());
+ // Update forward file_mapping: external current -> internal name.
+ self.external_to_temp_name.remove(path);
+ self.external_to_temp_name
+ .insert(dest.clone(), uuid_filename);
+ };
+ }
+
+ Ok(())
+ }
+
+ fn get_path_for_internal(&self, internal: &str) -> Option<PathBuf> {
+ self.temp_name_to_current_path
+ .get(internal)
+ .cloned()
+ .or_else(|| {
+ self.baseline_file_info
+ .get(internal)
+ .and_then(|info| info.path.clone())
+ })
+ }
+
+ /// Find the git worktree root for a file/directory by walking up to the first ancestor containing a `.git` entry.
+ /// Uses a simple cache of known roots and avoids negative-result caching for simplicity.
+ fn find_git_root_cached(&mut self, start: &Path) -> Option<PathBuf> {
+ let dir = if start.is_dir() {
+ start
+ } else {
+ start.parent()?
+ };
+
+ // Fast path: if any cached root is an ancestor of this path, use it.
+ if let Some(root) = self
+ .git_root_cache
+ .iter()
+ .find(|r| dir.starts_with(r))
+ .cloned()
+ {
+ return Some(root);
+ }
+
+ // Walk up to find a `.git` marker.
+ let mut cur = dir.to_path_buf();
+ loop {
+ let git_marker = cur.join(".git");
+ if git_marker.is_dir() || git_marker.is_file() {
+ if !self.git_root_cache.iter().any(|r| r == &cur) {
+ self.git_root_cache.push(cur.clone());
+ }
+ return Some(cur);
+ }
+
+ // On Windows, avoid walking above the drive or UNC share root.
+ #[cfg(windows)]
+ {
+ if is_windows_drive_or_unc_root(&cur) {
+ return None;
+ }
+ }
+
+ if let Some(parent) = cur.parent() {
+ cur = parent.to_path_buf();
+ } else {
+ return None;
+ }
+ }
+ }
+
+ /// Return a display string for `path` relative to its git root if found, else absolute.
+ fn relative_to_git_root_str(&mut self, path: &Path) -> String {
+ let s = if let Some(root) = self.find_git_root_cached(path) {
+ if let Ok(rel) = path.strip_prefix(&root) {
+ rel.display().to_string()
+ } else {
+ path.display().to_string()
+ }
+ } else {
+ path.display().to_string()
+ };
+ s.replace('\\', "/")
+ }
+
+ /// Ask git to compute the blob SHA-1 for the file at `path` within its repository.
+ /// Returns None if no repository is found or git invocation fails.
+ fn git_blob_oid_for_path(&mut self, path: &Path) -> Option<String> {
+ let root = self.find_git_root_cached(path)?;
+ // Compute a path relative to the repo root for better portability across platforms.
+ let rel = path.strip_prefix(&root).unwrap_or(path);
+ let output = Command::new("git")
+ .arg("-C")
+ .arg(&root)
+ .arg("hash-object")
+ .arg("--")
+ .arg(rel)
+ .output()
+ .ok()?;
+ if !output.status.success() {
+ return None;
+ }
+ let s = String::from_utf8_lossy(&output.stdout).trim().to_string();
+ if s.len() == 40 { Some(s) } else { None }
+ }
+
+ /// Recompute the aggregated unified diff by comparing all of the in-memory snapshots that were
+ /// collected before the first time they were touched by apply_patch during this turn with
+ /// the current repo state.
+ pub fn get_unified_diff(&mut self) -> Result<Option<String>> {
+ let mut aggregated = String::new();
+
+ // Compute diffs per tracked internal file in a stable order by external path.
+ let mut baseline_file_names: Vec<String> =
+ self.baseline_file_info.keys().cloned().collect();
+ // Sort lexicographically by full repo-relative path to match git behavior.
+ baseline_file_names.sort_by_key(|internal| {
+ self.get_path_for_internal(internal)
+ .map(|p| self.relative_to_git_root_str(&p))
+ .unwrap_or_default()
+ });
+
+ for internal in baseline_file_names {
+ // Baseline external must exist for any tracked internal.
+ let baseline_external = match self
+ .baseline_file_info
+ .get(&internal)
+ .and_then(|i| i.path.clone())
+ {
+ Some(p) => p,
+ None => continue,
+ };
+ let current_external = match self.get_path_for_internal(&internal) {
+ Some(p) => p,
+ None => continue,
+ };
+
+ // Determine modes early; needed to read symlink content correctly.
+ let baseline_mode = self
+ .baseline_file_info
+ .get(&internal)
+ .and_then(|i| i.mode.clone())
+ .unwrap_or_else(|| REGULAR_MODE.to_string());
+ let current_mode =
+ file_mode_for_path(&current_external).unwrap_or_else(|| REGULAR_MODE.to_string());
+
+ let left_bytes = self
+ .baseline_file_info
+ .get(&internal)
+ .and_then(|i| i.contents_bytes.clone());
+
+ let right_bytes = blob_bytes(&current_external, &current_mode)?;
+
+ // Fast path: identical bytes or both missing.
+ if left_bytes.as_deref() == right_bytes.as_deref() {
+ continue;
+ }
+
+ let left_display = self.relative_to_git_root_str(&baseline_external);
+ let right_display = self.relative_to_git_root_str(&current_external);
+
+ // Emit a git-style header for better readability and parity with previous behavior.
+ aggregated.push_str(&format!("diff --git a/{left_display} b/{right_display}\n"));
+
+ let is_add = left_bytes.is_none() && right_bytes.is_some();
```
> Could do `match (left_bytes, right_bytes)` to ensure all cases are covered.
- Created: 2025-08-04 03:06:34 UTC | Link: https://github.com/openai/codex/pull/1770#discussion_r2250308415
```diff
@@ -0,0 +1,766 @@
+use std::collections::HashMap;
+use std::fs;
+use std::path::Path;
+use std::path::PathBuf;
+use std::process::Command;
+
+use anyhow::Context;
+use anyhow::Result;
+use anyhow::anyhow;
+use uuid::Uuid;
+
+use crate::protocol::FileChange;
+
+struct BaselineFileInfo {
+ path: Option<PathBuf>,
+ contents_bytes: Option<Vec<u8>>,
+ mode: Option<String>,
+ oid: Option<String>,
+}
+
+/// Tracks sets of changes to files and exposes the overall unified diff.
+/// Internally, the way this works is now:
+/// 1. Maintain an in-memory baseline snapshot of files when they are first seen.
+/// For new additions, do not create a baseline so that diffs are shown as proper additions (using /dev/null).
+/// 2. Keep a stable internal filename (uuid + same extension) per external path for rename tracking.
+/// 3. To compute the aggregated unified diff, compare each baseline snapshot to the current file on disk entirely in-memory
+/// using the `similar` crate and emit unified diffs with rewritten external paths.
+#[derive(Default)]
+pub struct TurnDiffTracker {
+ /// Map external path -> internal filename (uuid + same extension).
+ external_to_temp_name: HashMap<PathBuf, String>,
+ /// Internal filename -> baseline file info.
+ baseline_file_info: HashMap<String, BaselineFileInfo>,
+ /// Internal filename -> external path as of current accumulated state (after applying all changes).
+ /// This is where renames are tracked.
+ temp_name_to_current_path: HashMap<String, PathBuf>,
+ /// Cache of known git worktree roots to avoid repeated filesystem walks.
+ git_root_cache: Vec<PathBuf>,
+}
+
+impl TurnDiffTracker {
+ pub fn new() -> Self {
+ Self::default()
+ }
+
+ /// Front-run apply patch calls to track the starting contents of any modified files.
+ /// - Creates an in-memory baseline snapshot for files that already exist on disk when first seen.
+ /// - For additions, we intentionally do not create a baseline snapshot so that diffs are proper additions.
+ /// - Also updates internal mappings for move/rename events.
+ pub fn on_patch_begin(&mut self, changes: &HashMap<PathBuf, FileChange>) -> Result<()> {
+ for (path, change) in changes.iter() {
+ // Ensure a stable internal filename exists for this external path.
+ if !self.external_to_temp_name.contains_key(path) {
+ let internal = uuid_filename_for(path);
+ self.external_to_temp_name
+ .insert(path.clone(), internal.clone());
+ self.temp_name_to_current_path
+ .insert(internal.clone(), path.clone());
+
+ // If the file exists on disk now, snapshot as baseline; else leave missing to represent /dev/null.
+ let (contents_bytes, mode, oid) = if path.exists() {
+ let mode = file_mode_for_path(path);
+ let mode_str = mode.as_deref().unwrap_or(REGULAR_MODE);
+ let contents_bytes = blob_bytes(path, mode_str)
+ .unwrap_or_default()
+ .unwrap_or_default();
+ let oid = if mode.as_deref() == Some(SYMLINK_MODE) {
+ git_blob_sha1_hex_bytes(&contents_bytes)
+ } else {
+ self.git_blob_oid_for_path(path)
+ .unwrap_or_else(|| git_blob_sha1_hex_bytes(&contents_bytes))
+ };
+ (Some(contents_bytes), mode, Some(oid))
+ } else {
+ (None, None, Some(ZERO_OID.to_string()))
+ };
+
+ self.baseline_file_info.insert(
+ internal.clone(),
+ BaselineFileInfo {
+ path: Some(path.clone()),
+ contents_bytes,
+ mode,
+ oid,
+ },
+ );
+ }
+
+ // Track rename/move in current mapping if provided in an Update.
+ if let FileChange::Update {
+ move_path: Some(dest),
+ ..
+ } = change
+ {
+ let uuid_filename = match self.external_to_temp_name.get(path) {
+ Some(i) => i.clone(),
+ None => {
+ // This should be rare, but if we haven't mapped the source, create it with no baseline.
+ let i = uuid_filename_for(path);
+ self.baseline_file_info.insert(
+ i.clone(),
+ BaselineFileInfo {
+ path: Some(path.clone()),
+ contents_bytes: None,
+ mode: None,
+ oid: Some(ZERO_OID.to_string()),
+ },
+ );
+ i
+ }
+ };
+ // Update current external mapping for temp file name.
+ self.temp_name_to_current_path
+ .insert(uuid_filename.clone(), dest.clone());
+ // Update forward file_mapping: external current -> internal name.
+ self.external_to_temp_name.remove(path);
+ self.external_to_temp_name
+ .insert(dest.clone(), uuid_filename);
+ };
+ }
+
+ Ok(())
+ }
+
+ fn get_path_for_internal(&self, internal: &str) -> Option<PathBuf> {
+ self.temp_name_to_current_path
+ .get(internal)
+ .cloned()
+ .or_else(|| {
+ self.baseline_file_info
+ .get(internal)
+ .and_then(|info| info.path.clone())
+ })
+ }
+
+ /// Find the git worktree root for a file/directory by walking up to the first ancestor containing a `.git` entry.
+ /// Uses a simple cache of known roots and avoids negative-result caching for simplicity.
+ fn find_git_root_cached(&mut self, start: &Path) -> Option<PathBuf> {
+ let dir = if start.is_dir() {
+ start
+ } else {
+ start.parent()?
+ };
+
+ // Fast path: if any cached root is an ancestor of this path, use it.
+ if let Some(root) = self
+ .git_root_cache
+ .iter()
+ .find(|r| dir.starts_with(r))
+ .cloned()
+ {
+ return Some(root);
+ }
+
+ // Walk up to find a `.git` marker.
+ let mut cur = dir.to_path_buf();
+ loop {
+ let git_marker = cur.join(".git");
+ if git_marker.is_dir() || git_marker.is_file() {
+ if !self.git_root_cache.iter().any(|r| r == &cur) {
+ self.git_root_cache.push(cur.clone());
+ }
+ return Some(cur);
+ }
+
+ // On Windows, avoid walking above the drive or UNC share root.
+ #[cfg(windows)]
+ {
+ if is_windows_drive_or_unc_root(&cur) {
+ return None;
+ }
+ }
+
+ if let Some(parent) = cur.parent() {
+ cur = parent.to_path_buf();
+ } else {
+ return None;
+ }
+ }
+ }
+
+ /// Return a display string for `path` relative to its git root if found, else absolute.
+ fn relative_to_git_root_str(&mut self, path: &Path) -> String {
+ let s = if let Some(root) = self.find_git_root_cached(path) {
+ if let Ok(rel) = path.strip_prefix(&root) {
+ rel.display().to_string()
+ } else {
+ path.display().to_string()
+ }
+ } else {
+ path.display().to_string()
+ };
+ s.replace('\\', "/")
+ }
+
+ /// Ask git to compute the blob SHA-1 for the file at `path` within its repository.
+ /// Returns None if no repository is found or git invocation fails.
+ fn git_blob_oid_for_path(&mut self, path: &Path) -> Option<String> {
+ let root = self.find_git_root_cached(path)?;
+ // Compute a path relative to the repo root for better portability across platforms.
+ let rel = path.strip_prefix(&root).unwrap_or(path);
+ let output = Command::new("git")
+ .arg("-C")
+ .arg(&root)
+ .arg("hash-object")
+ .arg("--")
+ .arg(rel)
+ .output()
+ .ok()?;
+ if !output.status.success() {
+ return None;
+ }
+ let s = String::from_utf8_lossy(&output.stdout).trim().to_string();
+ if s.len() == 40 { Some(s) } else { None }
+ }
+
+ /// Recompute the aggregated unified diff by comparing all of the in-memory snapshots that were
+ /// collected before the first time they were touched by apply_patch during this turn with
+ /// the current repo state.
+ pub fn get_unified_diff(&mut self) -> Result<Option<String>> {
```
> This function is quite long and feels like it would benefit from being broken up.
- Created: 2025-08-04 03:07:44 UTC | Link: https://github.com/openai/codex/pull/1770#discussion_r2250309286
```diff
@@ -0,0 +1,476 @@
+use std::collections::HashMap;
+use std::fs;
+use std::path::Path;
+use std::path::PathBuf;
+use std::process::Command;
+
+use anyhow::Context;
+use anyhow::Result;
+use tempfile::TempDir;
+use uuid::Uuid;
+
+use crate::protocol::FileChange;
+
+/// Tracks sets of changes to files and exposes the overall unified diff.
+/// Internally, the way this works is now:
+/// 1. Create a temp directory to store baseline snapshots of files when they are first seen.
+/// 2. When a path is first observed, copy its current contents into the baseline dir if it exists on disk.
+/// For new additions, do not create a baseline file so that diffs are shown as proper additions (using /dev/null).
+/// 3. Keep a stable internal filename (uuid + same extension) per external path for path rewrite in diffs.
+/// 4. To compute the aggregated unified diff, compare each baseline snapshot to the current file on disk using
+/// `git diff --no-index` and rewrite paths to external paths.
+#[derive(Default)]
+pub struct TurnDiffTracker {
+ /// Temp directory holding baseline snapshots of files as first seen.
+ baseline_files_dir: Option<TempDir>,
+ /// Map external path -> internal filename (uuid + same extension).
+ external_to_temp_name: HashMap<PathBuf, String>,
+ /// Internal filename -> external path as of baseline snapshot.
+ temp_name_to_baseline_external: HashMap<String, PathBuf>,
+ /// Internal filename -> external path as of current accumulated state (after applying all changes).
+ /// This is where renames are tracked.
+ temp_name_to_current_external: HashMap<String, PathBuf>,
+ /// Aggregated unified diff for all accumulated changes across files.
+ pub unified_diff: Option<String>,
+}
+
+impl TurnDiffTracker {
+ pub fn new() -> Self {
+ Self::default()
+ }
+
+ /// Front-run apply patch calls to track the starting contents of any modified files.
+ /// - Creates a baseline snapshot for files that already exist on disk when first seen.
+ /// - For additions, we intentionally do not create a baseline snapshot so that diffs are proper additions.
+ /// - Also updates internal mappings for move/rename events.
+ pub fn on_patch_begin(&mut self, changes: &HashMap<PathBuf, FileChange>) -> Result<()> {
+ self.ensure_baseline_dir()?;
+ let baseline_dir = self.baseline_dir()?.to_path_buf();
+
+ for (path, change) in changes.iter() {
+ // Ensure a stable internal filename exists for this external path.
+ if !self.external_to_temp_name.contains_key(path) {
+ let internal = uuid_filename_for(path);
+ self.external_to_temp_name
+ .insert(path.clone(), internal.clone());
+ self.temp_name_to_baseline_external
+ .insert(internal.clone(), path.clone());
+ self.temp_name_to_current_external
+ .insert(internal.clone(), path.clone());
+
+ // If the file exists on disk now, snapshot as baseline; else leave missing to represent /dev/null.
+ if path.exists() {
+ let contents = fs::read(path)
+ .with_context(|| format!("failed to read original {}", path.display()))?;
+ let internal_path = baseline_dir.join(&internal);
+ fs::write(&internal_path, contents).with_context(|| {
+ format!("failed to write baseline file {}", internal_path.display())
+ })?;
+ }
+ }
+
+ // Track rename/move in current mapping if provided in an Update.
+ let move_path = match change {
+ FileChange::Update {
+ move_path: Some(dest),
+ ..
+ } => Some(dest),
+ _ => None,
+ };
+ if let Some(dest) = move_path {
+ let uuid_filename = match self.external_to_temp_name.get(path) {
+ Some(i) => i.clone(),
+ None => {
+ // This should be rare, but if we haven't mapped the source, create it with no baseline.
+ let i = uuid_filename_for(path);
+ self.external_to_temp_name.insert(path.clone(), i.clone());
+ self.temp_name_to_baseline_external
+ .insert(i.clone(), path.clone());
+ i
+ }
+ };
+ // Update current external mapping for temp file name.
+ self.temp_name_to_current_external
+ .insert(uuid_filename.clone(), dest.clone());
+ // Update forward file_mapping: external current -> internal name.
+ self.external_to_temp_name.remove(path);
+ self.external_to_temp_name
+ .insert(dest.clone(), uuid_filename);
+ }
+ }
+
+ Ok(())
+ }
+
+ /// Recompute the aggregated unified diff by comparing all baseline snapshots against
+ /// current files on disk using `git diff --no-index` and rewriting paths to external paths.
+ pub fn update_and_get_unified_diff(&mut self) -> Result<Option<String>> {
+ let baseline_dir = self.baseline_dir()?.to_path_buf();
+ let current_dir = baseline_dir.join("current");
+ if current_dir.exists() {
+ // Best-effort cleanup of previous run's mirror.
+ let _ = fs::remove_dir_all(&current_dir);
+ }
+ fs::create_dir_all(&current_dir).with_context(|| {
+ format!(
+ "failed to create current mirror dir {}",
+ current_dir.display()
+ )
+ })?;
+
+ let mut aggregated = String::new();
+
+ // Compute diffs per tracked internal file.
+ for (internal, baseline_external) in &self.temp_name_to_baseline_external {
+ let baseline_path = baseline_dir.join(internal);
+ let current_external = self
+ .temp_name_to_current_external
+ .get(internal)
+ .cloned()
+ .unwrap_or_else(|| baseline_external.clone());
+
+ let left_is_dev_null = !baseline_path.exists();
+ let right_exists = current_external.exists();
+
+ // Prepare right side mirror file if exists; otherwise use /dev/null for deletions.
+ let right_arg = if right_exists {
+ let mirror_path = current_dir.join(internal);
+ let contents = fs::read(&current_external).with_context(|| {
+ format!(
+ "failed to read current file for diff {}",
+ current_external.display()
+ )
+ })?;
+ fs::write(&mirror_path, contents).with_context(|| {
+ format!(
+ "failed to write current mirror file {}",
+ mirror_path.display()
+ )
+ })?;
+ // Use relative path from baseline_dir (so headers say a/<uuid> b/current/<uuid>).
+ format!("current/{internal}")
+ } else {
+ // Deletion: right side is /dev/null to show proper deleted file diff.
+ "/dev/null".to_string()
+ };
+
+ // Prepare left arg: baseline file path or /dev/null for additions.
+ let left_arg = if left_is_dev_null {
+ "/dev/null".to_string()
+ } else {
+ internal.clone()
+ };
+
+ // Run git diff --no-index from baseline_dir to keep paths predictable.
+ let raw = run_git_allow_exit_codes(
+ &baseline_dir,
+ &[
+ "-c",
+ "color.ui=false",
+ "diff",
+ "--no-color",
+ "--no-index",
+ "--",
+ &left_arg,
+ &right_arg,
+ ],
+ &[0, 1], // 0: no changes, 1: differences
+ )?;
+
+ if raw.trim().is_empty() {
+ continue;
+ }
+ let rewritten = self.rewrite_diff_paths(&raw);
+ if !rewritten.trim().is_empty() {
+ if !aggregated.is_empty() && !aggregated.ends_with('\n') {
+ aggregated.push('\n');
+ }
+ aggregated.push_str(&rewritten);
+ }
+ }
+
+ self.unified_diff = if aggregated.trim().is_empty() {
+ None
+ } else {
+ Some(aggregated)
+ };
+
+ // Clean up the current dir.
+ let _ = fs::remove_dir_all(&current_dir);
+
+ Ok(self.unified_diff.clone())
+ }
+
+ fn baseline_dir(&self) -> Result<&Path> {
+ self.baseline_files_dir
+ .as_ref()
+ .map(|d| d.path())
+ .ok_or_else(|| anyhow::anyhow!("baseline temp dir not initialized"))
+ }
+
+ fn ensure_baseline_dir(&mut self) -> Result<()> {
+ if self.baseline_files_dir.is_some() {
+ return Ok(());
+ }
+ let tmp = TempDir::new().context("create baseline temp dir")?;
+ self.baseline_files_dir = Some(tmp);
+ Ok(())
+ }
+
+ /// Rewrites the internal filenames to external paths in diff headers.
+ /// Handles inputs like:
+ /// diff --git a/<uuid> b/current/<uuid>
+ /// --- a/<uuid> | /dev/null
+ /// +++ b/current/<uuid> | /dev/null
+ /// and replaces uuid with the external paths tracking baseline/current.
+ fn rewrite_diff_paths(&self, diff: &str) -> String {
+ let mut out = String::new();
+ for line in diff.lines() {
+ if let Some(rest) = line.strip_prefix("diff --git ") {
+ // Format: diff --git a/<f> b/<f>
+ let parts: Vec<&str> = rest.split_whitespace().collect();
+ if parts.len() == 2 {
+ let a = parts[0].strip_prefix("a/").unwrap_or(parts[0]);
+ let b = parts[1].strip_prefix("b/").unwrap_or(parts[1]);
+
+ let a_ext_display = if a == "/dev/null" {
+ "/dev/null".to_string()
+ } else {
+ let a_base = Path::new(a)
+ .file_name()
+ .and_then(|s| s.to_str())
+ .unwrap_or(a);
+ let mapped = self
+ .temp_name_to_baseline_external
+ .get(a_base)
+ .cloned()
+ .unwrap_or_else(|| PathBuf::from(a));
+ mapped.display().to_string()
+ };
+
+ let b_ext_display = if b == "/dev/null" {
+ "/dev/null".to_string()
+ } else {
+ let b_base = Path::new(b)
+ .file_name()
+ .and_then(|s| s.to_str())
+ .unwrap_or(b);
+ let mapped = self
+ .temp_name_to_current_external
+ .get(b_base)
+ .cloned()
+ .unwrap_or_else(|| PathBuf::from(b));
+ mapped.display().to_string()
+ };
+
+ out.push_str(&format!("diff --git a/{a_ext_display} b/{b_ext_display}\n"));
+ continue;
+ }
+ }
+ if let Some(rest) = line.strip_prefix("--- ") {
+ if let Some(path) = rest.strip_prefix("a/") {
+ let external_display = if path == "/dev/null" {
+ "/dev/null".to_string()
+ } else {
+ let p_base = Path::new(path)
+ .file_name()
+ .and_then(|s| s.to_str())
+ .unwrap_or(path);
+ self.temp_name_to_baseline_external
+ .get(p_base)
+ .cloned()
+ .unwrap_or_else(|| PathBuf::from(path))
+ .display()
+ .to_string()
+ };
+ out.push_str(&format!("--- {external_display}\n"));
+ continue;
+ }
+ }
+ if let Some(rest) = line.strip_prefix("+++ ") {
+ if let Some(path) = rest.strip_prefix("b/") {
+ let external_display = if path == "/dev/null" {
+ "/dev/null".to_string()
+ } else {
+ let p_base = Path::new(path)
+ .file_name()
+ .and_then(|s| s.to_str())
+ .unwrap_or(path);
+ self.temp_name_to_current_external
+ .get(p_base)
+ .cloned()
+ .unwrap_or_else(|| PathBuf::from(path))
+ .display()
+ .to_string()
+ };
+ out.push_str(&format!("+++ {external_display}\n"));
+ continue;
+ }
+ }
+ out.push_str(line);
+ out.push('\n');
+ }
+ out
+ }
+}
+
+fn uuid_filename_for(path: &Path) -> String {
+ let id = Uuid::new_v4().to_string();
+ match path.extension().and_then(|e| e.to_str()) {
+ Some(ext) if !ext.is_empty() => format!("{id}.{ext}"),
+ _ => id,
```
> I'm still unclear why `ext` is added.
- Created: 2025-08-04 03:08:45 UTC | Link: https://github.com/openai/codex/pull/1770#discussion_r2250310045
```diff
@@ -0,0 +1,766 @@
+use std::collections::HashMap;
+use std::fs;
+use std::path::Path;
+use std::path::PathBuf;
+use std::process::Command;
+
+use anyhow::Context;
+use anyhow::Result;
+use anyhow::anyhow;
+use uuid::Uuid;
+
+use crate::protocol::FileChange;
+
+struct BaselineFileInfo {
+ path: Option<PathBuf>,
+ contents_bytes: Option<Vec<u8>>,
+ mode: Option<String>,
+ oid: Option<String>,
+}
+
+/// Tracks sets of changes to files and exposes the overall unified diff.
+/// Internally, the way this works is now:
+/// 1. Maintain an in-memory baseline snapshot of files when they are first seen.
+/// For new additions, do not create a baseline so that diffs are shown as proper additions (using /dev/null).
+/// 2. Keep a stable internal filename (uuid + same extension) per external path for rename tracking.
+/// 3. To compute the aggregated unified diff, compare each baseline snapshot to the current file on disk entirely in-memory
+/// using the `similar` crate and emit unified diffs with rewritten external paths.
+#[derive(Default)]
+pub struct TurnDiffTracker {
+ /// Map external path -> internal filename (uuid + same extension).
+ external_to_temp_name: HashMap<PathBuf, String>,
+ /// Internal filename -> baseline file info.
+ baseline_file_info: HashMap<String, BaselineFileInfo>,
+ /// Internal filename -> external path as of current accumulated state (after applying all changes).
+ /// This is where renames are tracked.
+ temp_name_to_current_path: HashMap<String, PathBuf>,
+ /// Cache of known git worktree roots to avoid repeated filesystem walks.
+ git_root_cache: Vec<PathBuf>,
+}
+
+impl TurnDiffTracker {
+ pub fn new() -> Self {
+ Self::default()
+ }
+
+ /// Front-run apply patch calls to track the starting contents of any modified files.
+ /// - Creates an in-memory baseline snapshot for files that already exist on disk when first seen.
+ /// - For additions, we intentionally do not create a baseline snapshot so that diffs are proper additions.
+ /// - Also updates internal mappings for move/rename events.
+ pub fn on_patch_begin(&mut self, changes: &HashMap<PathBuf, FileChange>) -> Result<()> {
+ for (path, change) in changes.iter() {
+ // Ensure a stable internal filename exists for this external path.
+ if !self.external_to_temp_name.contains_key(path) {
+ let internal = uuid_filename_for(path);
+ self.external_to_temp_name
+ .insert(path.clone(), internal.clone());
+ self.temp_name_to_current_path
+ .insert(internal.clone(), path.clone());
+
+ // If the file exists on disk now, snapshot as baseline; else leave missing to represent /dev/null.
+ let (contents_bytes, mode, oid) = if path.exists() {
+ let mode = file_mode_for_path(path);
+ let mode_str = mode.as_deref().unwrap_or(REGULAR_MODE);
+ let contents_bytes = blob_bytes(path, mode_str)
+ .unwrap_or_default()
+ .unwrap_or_default();
+ let oid = if mode.as_deref() == Some(SYMLINK_MODE) {
+ git_blob_sha1_hex_bytes(&contents_bytes)
+ } else {
+ self.git_blob_oid_for_path(path)
+ .unwrap_or_else(|| git_blob_sha1_hex_bytes(&contents_bytes))
+ };
+ (Some(contents_bytes), mode, Some(oid))
+ } else {
+ (None, None, Some(ZERO_OID.to_string()))
+ };
+
+ self.baseline_file_info.insert(
+ internal.clone(),
+ BaselineFileInfo {
+ path: Some(path.clone()),
+ contents_bytes,
+ mode,
+ oid,
+ },
+ );
+ }
+
+ // Track rename/move in current mapping if provided in an Update.
+ if let FileChange::Update {
+ move_path: Some(dest),
+ ..
+ } = change
+ {
+ let uuid_filename = match self.external_to_temp_name.get(path) {
+ Some(i) => i.clone(),
+ None => {
+ // This should be rare, but if we haven't mapped the source, create it with no baseline.
+ let i = uuid_filename_for(path);
+ self.baseline_file_info.insert(
+ i.clone(),
+ BaselineFileInfo {
+ path: Some(path.clone()),
+ contents_bytes: None,
+ mode: None,
+ oid: Some(ZERO_OID.to_string()),
+ },
+ );
+ i
+ }
+ };
+ // Update current external mapping for temp file name.
+ self.temp_name_to_current_path
+ .insert(uuid_filename.clone(), dest.clone());
+ // Update forward file_mapping: external current -> internal name.
+ self.external_to_temp_name.remove(path);
+ self.external_to_temp_name
+ .insert(dest.clone(), uuid_filename);
+ };
+ }
+
+ Ok(())
+ }
+
+ fn get_path_for_internal(&self, internal: &str) -> Option<PathBuf> {
+ self.temp_name_to_current_path
+ .get(internal)
+ .cloned()
+ .or_else(|| {
+ self.baseline_file_info
+ .get(internal)
+ .and_then(|info| info.path.clone())
+ })
+ }
+
+ /// Find the git worktree root for a file/directory by walking up to the first ancestor containing a `.git` entry.
+ /// Uses a simple cache of known roots and avoids negative-result caching for simplicity.
+ fn find_git_root_cached(&mut self, start: &Path) -> Option<PathBuf> {
+ let dir = if start.is_dir() {
+ start
+ } else {
+ start.parent()?
+ };
+
+ // Fast path: if any cached root is an ancestor of this path, use it.
+ if let Some(root) = self
+ .git_root_cache
+ .iter()
+ .find(|r| dir.starts_with(r))
+ .cloned()
+ {
+ return Some(root);
+ }
+
+ // Walk up to find a `.git` marker.
+ let mut cur = dir.to_path_buf();
+ loop {
+ let git_marker = cur.join(".git");
+ if git_marker.is_dir() || git_marker.is_file() {
+ if !self.git_root_cache.iter().any(|r| r == &cur) {
+ self.git_root_cache.push(cur.clone());
+ }
+ return Some(cur);
+ }
+
+ // On Windows, avoid walking above the drive or UNC share root.
+ #[cfg(windows)]
+ {
+ if is_windows_drive_or_unc_root(&cur) {
+ return None;
+ }
+ }
+
+ if let Some(parent) = cur.parent() {
+ cur = parent.to_path_buf();
+ } else {
+ return None;
+ }
+ }
+ }
+
+ /// Return a display string for `path` relative to its git root if found, else absolute.
+ fn relative_to_git_root_str(&mut self, path: &Path) -> String {
+ let s = if let Some(root) = self.find_git_root_cached(path) {
+ if let Ok(rel) = path.strip_prefix(&root) {
+ rel.display().to_string()
+ } else {
+ path.display().to_string()
+ }
+ } else {
+ path.display().to_string()
+ };
+ s.replace('\\', "/")
+ }
+
+ /// Ask git to compute the blob SHA-1 for the file at `path` within its repository.
+ /// Returns None if no repository is found or git invocation fails.
+ fn git_blob_oid_for_path(&mut self, path: &Path) -> Option<String> {
+ let root = self.find_git_root_cached(path)?;
+ // Compute a path relative to the repo root for better portability across platforms.
+ let rel = path.strip_prefix(&root).unwrap_or(path);
+ let output = Command::new("git")
+ .arg("-C")
+ .arg(&root)
+ .arg("hash-object")
+ .arg("--")
+ .arg(rel)
+ .output()
+ .ok()?;
+ if !output.status.success() {
+ return None;
+ }
+ let s = String::from_utf8_lossy(&output.stdout).trim().to_string();
+ if s.len() == 40 { Some(s) } else { None }
+ }
+
+ /// Recompute the aggregated unified diff by comparing all of the in-memory snapshots that were
+ /// collected before the first time they were touched by apply_patch during this turn with
+ /// the current repo state.
+ pub fn get_unified_diff(&mut self) -> Result<Option<String>> {
+ let mut aggregated = String::new();
+
+ // Compute diffs per tracked internal file in a stable order by external path.
+ let mut baseline_file_names: Vec<String> =
+ self.baseline_file_info.keys().cloned().collect();
+ // Sort lexicographically by full repo-relative path to match git behavior.
+ baseline_file_names.sort_by_key(|internal| {
+ self.get_path_for_internal(internal)
+ .map(|p| self.relative_to_git_root_str(&p))
+ .unwrap_or_default()
+ });
+
+ for internal in baseline_file_names {
+ // Baseline external must exist for any tracked internal.
+ let baseline_external = match self
+ .baseline_file_info
+ .get(&internal)
+ .and_then(|i| i.path.clone())
+ {
+ Some(p) => p,
+ None => continue,
+ };
+ let current_external = match self.get_path_for_internal(&internal) {
+ Some(p) => p,
+ None => continue,
+ };
+
+ // Determine modes early; needed to read symlink content correctly.
+ let baseline_mode = self
+ .baseline_file_info
+ .get(&internal)
+ .and_then(|i| i.mode.clone())
+ .unwrap_or_else(|| REGULAR_MODE.to_string());
+ let current_mode =
+ file_mode_for_path(&current_external).unwrap_or_else(|| REGULAR_MODE.to_string());
+
+ let left_bytes = self
+ .baseline_file_info
+ .get(&internal)
+ .and_then(|i| i.contents_bytes.clone());
+
+ let right_bytes = blob_bytes(&current_external, &current_mode)?;
+
+ // Fast path: identical bytes or both missing.
+ if left_bytes.as_deref() == right_bytes.as_deref() {
+ continue;
+ }
+
+ let left_display = self.relative_to_git_root_str(&baseline_external);
+ let right_display = self.relative_to_git_root_str(&current_external);
+
+ // Emit a git-style header for better readability and parity with previous behavior.
+ aggregated.push_str(&format!("diff --git a/{left_display} b/{right_display}\n"));
+
+ let is_add = left_bytes.is_none() && right_bytes.is_some();
+ let is_delete = left_bytes.is_some() && right_bytes.is_none();
+
+ if is_add {
+ aggregated.push_str(&format!("new file mode {current_mode}\n"));
+ } else if is_delete {
+ aggregated.push_str(&format!("deleted file mode {baseline_mode}\n"));
+ } else if baseline_mode != current_mode {
+ aggregated.push_str(&format!("old mode {baseline_mode}\n"));
+ aggregated.push_str(&format!("new mode {current_mode}\n"));
+ }
+
+ // Determine blob object IDs for left and right contents. Prefer stored OIDs
+ // captured from the original repo state when the change was first seen.
+ let left_oid = self
+ .baseline_file_info
+ .get(&internal)
+ .and_then(|i| i.oid.clone())
+ .or_else(|| {
+ left_bytes
+ .as_ref()
+ .map(|b| git_blob_sha1_hex_bytes(b))
+ .or(Some(ZERO_OID.to_string()))
+ })
+ .unwrap_or_else(|| ZERO_OID.to_string());
+ let right_oid = if let Some(b) = right_bytes.as_ref() {
+ if current_mode == SYMLINK_MODE {
+ git_blob_sha1_hex_bytes(b)
+ } else {
+ self.git_blob_oid_for_path(&current_external)
+ .unwrap_or_else(|| git_blob_sha1_hex_bytes(b))
+ }
+ } else {
+ ZERO_OID.to_string()
+ };
+
+ // If either side isn't valid UTF-8, emit a binary diff header and continue.
+ let left_text = left_bytes
+ .as_deref()
+ .and_then(|b| std::str::from_utf8(b).ok());
+ let right_text = right_bytes
+ .as_deref()
+ .and_then(|b| std::str::from_utf8(b).ok());
+
+ // Prefer text diffs when possible:
+ // - both sides are valid UTF-8
+ // - OR one side is missing (add/delete) and the present side is valid UTF-8
+ let can_text_diff = match (left_text, right_text, is_add, is_delete) {
+ (Some(_), Some(_), _, _) => true,
+ (_, Some(_), true, _) => true, // add: left missing, right text
+ (Some(_), _, _, true) => true, // delete: left text, right missing
+ _ => false,
+ };
+
+ if can_text_diff {
+ // Diff the contents as text, treating missing side as empty string.
+ let l = left_text.unwrap_or("");
+ let r = right_text.unwrap_or("");
+
+ // Emit index line without mode suffix to preserve current test expectations.
+ aggregated.push_str(&format!("index {left_oid}..{right_oid}\n"));
+
+ let old_header = if left_bytes.is_some() {
+ format!("a/{left_display}")
+ } else {
+ "/dev/null".to_string()
+ };
+ let new_header = if right_bytes.is_some() {
+ format!("b/{right_display}")
+ } else {
+ "/dev/null".to_string()
+ };
+
+ let diff = similar::TextDiff::from_lines(l, r);
+ let unified = diff
+ .unified_diff()
+ .context_radius(3)
+ .header(&old_header, &new_header)
+ .to_string();
+
+ aggregated.push_str(&unified);
+ if !aggregated.ends_with('\n') {
+ aggregated.push('\n');
+ }
+ } else {
+ // Binary or invalid UTF-8: emit header only.
+ aggregated.push_str(&format!("index {left_oid}..{right_oid}\n"));
+ let old_header = if left_bytes.is_some() {
+ format!("a/{left_display}")
+ } else {
+ "/dev/null".to_string()
+ };
+ let new_header = if right_bytes.is_some() {
+ format!("b/{right_display}")
+ } else {
+ "/dev/null".to_string()
+ };
+ aggregated.push_str(&format!("--- {old_header}\n"));
+ aggregated.push_str(&format!("+++ {new_header}\n"));
+ aggregated.push_str("Binary files differ\n");
+ if !aggregated.ends_with('\n') {
+ aggregated.push('\n');
+ }
+ }
+ }
+
+ if aggregated.trim().is_empty() {
+ Ok(None)
+ } else {
+ Ok(Some(aggregated))
+ }
+ }
+}
+
+fn uuid_filename_for(path: &Path) -> String {
+ let id = Uuid::new_v4().to_string();
+ match path.extension().and_then(|e| e.to_str()) {
+ Some(ext) if !ext.is_empty() => format!("{id}.{ext}"),
+ _ => id,
+ }
+}
+
+/// Compute the Git SHA-1 blob object ID for the given content (bytes).
+fn git_blob_sha1_hex_bytes(data: &[u8]) -> String {
+ // Git blob hash is sha1 of: "blob <len>\0<data>"
+ let header = format!("blob {}\0", data.len());
+ use sha1::Digest;
+ let mut hasher = sha1::Sha1::new();
+ hasher.update(header.as_bytes());
+ hasher.update(data);
+ let digest = hasher.finalize();
+ let mut out = String::with_capacity(40);
+ for b in digest {
+ use std::fmt::Write;
+ let _ = write!(&mut out, "{b:02x}");
+ }
+ out
+}
+
+const ZERO_OID: &str = "0000000000000000000000000000000000000000";
+const REGULAR_MODE: &str = "100644";
+#[cfg(unix)]
```
> Though even on Windows, this has to be readable (and preserved?) in a Git tree object, no?
- Created: 2025-08-04 03:11:58 UTC | Link: https://github.com/openai/codex/pull/1770#discussion_r2250312120
```diff
@@ -0,0 +1,766 @@
+use std::collections::HashMap;
+use std::fs;
+use std::path::Path;
+use std::path::PathBuf;
+use std::process::Command;
+
+use anyhow::Context;
+use anyhow::Result;
+use anyhow::anyhow;
+use uuid::Uuid;
+
+use crate::protocol::FileChange;
+
+struct BaselineFileInfo {
+ path: Option<PathBuf>,
+ contents_bytes: Option<Vec<u8>>,
+ mode: Option<String>,
+ oid: Option<String>,
+}
+
+/// Tracks sets of changes to files and exposes the overall unified diff.
+/// Internally, the way this works is now:
+/// 1. Maintain an in-memory baseline snapshot of files when they are first seen.
+/// For new additions, do not create a baseline so that diffs are shown as proper additions (using /dev/null).
+/// 2. Keep a stable internal filename (uuid + same extension) per external path for rename tracking.
+/// 3. To compute the aggregated unified diff, compare each baseline snapshot to the current file on disk entirely in-memory
+/// using the `similar` crate and emit unified diffs with rewritten external paths.
+#[derive(Default)]
+pub struct TurnDiffTracker {
+ /// Map external path -> internal filename (uuid + same extension).
+ external_to_temp_name: HashMap<PathBuf, String>,
+ /// Internal filename -> baseline file info.
+ baseline_file_info: HashMap<String, BaselineFileInfo>,
+ /// Internal filename -> external path as of current accumulated state (after applying all changes).
+ /// This is where renames are tracked.
+ temp_name_to_current_path: HashMap<String, PathBuf>,
+ /// Cache of known git worktree roots to avoid repeated filesystem walks.
+ git_root_cache: Vec<PathBuf>,
+}
+
+impl TurnDiffTracker {
+ pub fn new() -> Self {
+ Self::default()
+ }
+
+ /// Front-run apply patch calls to track the starting contents of any modified files.
+ /// - Creates an in-memory baseline snapshot for files that already exist on disk when first seen.
+ /// - For additions, we intentionally do not create a baseline snapshot so that diffs are proper additions.
+ /// - Also updates internal mappings for move/rename events.
+ pub fn on_patch_begin(&mut self, changes: &HashMap<PathBuf, FileChange>) -> Result<()> {
+ for (path, change) in changes.iter() {
+ // Ensure a stable internal filename exists for this external path.
+ if !self.external_to_temp_name.contains_key(path) {
+ let internal = uuid_filename_for(path);
+ self.external_to_temp_name
+ .insert(path.clone(), internal.clone());
+ self.temp_name_to_current_path
+ .insert(internal.clone(), path.clone());
+
+ // If the file exists on disk now, snapshot as baseline; else leave missing to represent /dev/null.
+ let (contents_bytes, mode, oid) = if path.exists() {
+ let mode = file_mode_for_path(path);
+ let mode_str = mode.as_deref().unwrap_or(REGULAR_MODE);
+ let contents_bytes = blob_bytes(path, mode_str)
+ .unwrap_or_default()
+ .unwrap_or_default();
+ let oid = if mode.as_deref() == Some(SYMLINK_MODE) {
+ git_blob_sha1_hex_bytes(&contents_bytes)
+ } else {
+ self.git_blob_oid_for_path(path)
+ .unwrap_or_else(|| git_blob_sha1_hex_bytes(&contents_bytes))
+ };
+ (Some(contents_bytes), mode, Some(oid))
+ } else {
+ (None, None, Some(ZERO_OID.to_string()))
+ };
+
+ self.baseline_file_info.insert(
+ internal.clone(),
+ BaselineFileInfo {
+ path: Some(path.clone()),
+ contents_bytes,
+ mode,
+ oid,
+ },
+ );
+ }
+
+ // Track rename/move in current mapping if provided in an Update.
+ if let FileChange::Update {
+ move_path: Some(dest),
+ ..
+ } = change
+ {
+ let uuid_filename = match self.external_to_temp_name.get(path) {
+ Some(i) => i.clone(),
+ None => {
+ // This should be rare, but if we haven't mapped the source, create it with no baseline.
+ let i = uuid_filename_for(path);
+ self.baseline_file_info.insert(
+ i.clone(),
+ BaselineFileInfo {
+ path: Some(path.clone()),
+ contents_bytes: None,
+ mode: None,
+ oid: Some(ZERO_OID.to_string()),
+ },
+ );
+ i
+ }
+ };
+ // Update current external mapping for temp file name.
+ self.temp_name_to_current_path
+ .insert(uuid_filename.clone(), dest.clone());
+ // Update forward file_mapping: external current -> internal name.
+ self.external_to_temp_name.remove(path);
+ self.external_to_temp_name
+ .insert(dest.clone(), uuid_filename);
+ };
+ }
+
+ Ok(())
+ }
+
+ fn get_path_for_internal(&self, internal: &str) -> Option<PathBuf> {
+ self.temp_name_to_current_path
+ .get(internal)
+ .cloned()
+ .or_else(|| {
+ self.baseline_file_info
+ .get(internal)
+ .and_then(|info| info.path.clone())
+ })
+ }
+
+ /// Find the git worktree root for a file/directory by walking up to the first ancestor containing a `.git` entry.
+ /// Uses a simple cache of known roots and avoids negative-result caching for simplicity.
+ fn find_git_root_cached(&mut self, start: &Path) -> Option<PathBuf> {
+ let dir = if start.is_dir() {
+ start
+ } else {
+ start.parent()?
+ };
+
+ // Fast path: if any cached root is an ancestor of this path, use it.
+ if let Some(root) = self
+ .git_root_cache
+ .iter()
+ .find(|r| dir.starts_with(r))
+ .cloned()
+ {
+ return Some(root);
+ }
+
+ // Walk up to find a `.git` marker.
+ let mut cur = dir.to_path_buf();
+ loop {
+ let git_marker = cur.join(".git");
+ if git_marker.is_dir() || git_marker.is_file() {
+ if !self.git_root_cache.iter().any(|r| r == &cur) {
+ self.git_root_cache.push(cur.clone());
+ }
+ return Some(cur);
+ }
+
+ // On Windows, avoid walking above the drive or UNC share root.
+ #[cfg(windows)]
+ {
+ if is_windows_drive_or_unc_root(&cur) {
+ return None;
+ }
+ }
+
+ if let Some(parent) = cur.parent() {
+ cur = parent.to_path_buf();
+ } else {
+ return None;
+ }
+ }
+ }
+
+ /// Return a display string for `path` relative to its git root if found, else absolute.
+ fn relative_to_git_root_str(&mut self, path: &Path) -> String {
+ let s = if let Some(root) = self.find_git_root_cached(path) {
+ if let Ok(rel) = path.strip_prefix(&root) {
+ rel.display().to_string()
+ } else {
+ path.display().to_string()
+ }
+ } else {
+ path.display().to_string()
+ };
+ s.replace('\\', "/")
+ }
+
+ /// Ask git to compute the blob SHA-1 for the file at `path` within its repository.
+ /// Returns None if no repository is found or git invocation fails.
+ fn git_blob_oid_for_path(&mut self, path: &Path) -> Option<String> {
+ let root = self.find_git_root_cached(path)?;
+ // Compute a path relative to the repo root for better portability across platforms.
+ let rel = path.strip_prefix(&root).unwrap_or(path);
+ let output = Command::new("git")
+ .arg("-C")
+ .arg(&root)
+ .arg("hash-object")
+ .arg("--")
+ .arg(rel)
+ .output()
+ .ok()?;
+ if !output.status.success() {
+ return None;
+ }
+ let s = String::from_utf8_lossy(&output.stdout).trim().to_string();
+ if s.len() == 40 { Some(s) } else { None }
+ }
+
+ /// Recompute the aggregated unified diff by comparing all of the in-memory snapshots that were
+ /// collected before the first time they were touched by apply_patch during this turn with
+ /// the current repo state.
+ pub fn get_unified_diff(&mut self) -> Result<Option<String>> {
+ let mut aggregated = String::new();
+
+ // Compute diffs per tracked internal file in a stable order by external path.
+ let mut baseline_file_names: Vec<String> =
+ self.baseline_file_info.keys().cloned().collect();
+ // Sort lexicographically by full repo-relative path to match git behavior.
+ baseline_file_names.sort_by_key(|internal| {
+ self.get_path_for_internal(internal)
+ .map(|p| self.relative_to_git_root_str(&p))
+ .unwrap_or_default()
+ });
+
+ for internal in baseline_file_names {
+ // Baseline external must exist for any tracked internal.
+ let baseline_external = match self
+ .baseline_file_info
+ .get(&internal)
+ .and_then(|i| i.path.clone())
+ {
+ Some(p) => p,
+ None => continue,
+ };
+ let current_external = match self.get_path_for_internal(&internal) {
+ Some(p) => p,
+ None => continue,
+ };
+
+ // Determine modes early; needed to read symlink content correctly.
+ let baseline_mode = self
+ .baseline_file_info
+ .get(&internal)
+ .and_then(|i| i.mode.clone())
+ .unwrap_or_else(|| REGULAR_MODE.to_string());
+ let current_mode =
+ file_mode_for_path(&current_external).unwrap_or_else(|| REGULAR_MODE.to_string());
+
+ let left_bytes = self
+ .baseline_file_info
+ .get(&internal)
+ .and_then(|i| i.contents_bytes.clone());
+
+ let right_bytes = blob_bytes(&current_external, &current_mode)?;
+
+ // Fast path: identical bytes or both missing.
+ if left_bytes.as_deref() == right_bytes.as_deref() {
+ continue;
+ }
+
+ let left_display = self.relative_to_git_root_str(&baseline_external);
+ let right_display = self.relative_to_git_root_str(&current_external);
+
+ // Emit a git-style header for better readability and parity with previous behavior.
+ aggregated.push_str(&format!("diff --git a/{left_display} b/{right_display}\n"));
+
+ let is_add = left_bytes.is_none() && right_bytes.is_some();
+ let is_delete = left_bytes.is_some() && right_bytes.is_none();
+
+ if is_add {
+ aggregated.push_str(&format!("new file mode {current_mode}\n"));
+ } else if is_delete {
+ aggregated.push_str(&format!("deleted file mode {baseline_mode}\n"));
+ } else if baseline_mode != current_mode {
+ aggregated.push_str(&format!("old mode {baseline_mode}\n"));
+ aggregated.push_str(&format!("new mode {current_mode}\n"));
+ }
+
+ // Determine blob object IDs for left and right contents. Prefer stored OIDs
+ // captured from the original repo state when the change was first seen.
+ let left_oid = self
+ .baseline_file_info
+ .get(&internal)
+ .and_then(|i| i.oid.clone())
+ .or_else(|| {
+ left_bytes
+ .as_ref()
+ .map(|b| git_blob_sha1_hex_bytes(b))
+ .or(Some(ZERO_OID.to_string()))
+ })
+ .unwrap_or_else(|| ZERO_OID.to_string());
+ let right_oid = if let Some(b) = right_bytes.as_ref() {
+ if current_mode == SYMLINK_MODE {
+ git_blob_sha1_hex_bytes(b)
+ } else {
+ self.git_blob_oid_for_path(&current_external)
+ .unwrap_or_else(|| git_blob_sha1_hex_bytes(b))
+ }
+ } else {
+ ZERO_OID.to_string()
+ };
+
+ // If either side isn't valid UTF-8, emit a binary diff header and continue.
+ let left_text = left_bytes
+ .as_deref()
+ .and_then(|b| std::str::from_utf8(b).ok());
+ let right_text = right_bytes
+ .as_deref()
+ .and_then(|b| std::str::from_utf8(b).ok());
+
+ // Prefer text diffs when possible:
+ // - both sides are valid UTF-8
+ // - OR one side is missing (add/delete) and the present side is valid UTF-8
+ let can_text_diff = match (left_text, right_text, is_add, is_delete) {
+ (Some(_), Some(_), _, _) => true,
+ (_, Some(_), true, _) => true, // add: left missing, right text
+ (Some(_), _, _, true) => true, // delete: left text, right missing
+ _ => false,
+ };
+
+ if can_text_diff {
+ // Diff the contents as text, treating missing side as empty string.
+ let l = left_text.unwrap_or("");
+ let r = right_text.unwrap_or("");
+
+ // Emit index line without mode suffix to preserve current test expectations.
+ aggregated.push_str(&format!("index {left_oid}..{right_oid}\n"));
+
+ let old_header = if left_bytes.is_some() {
+ format!("a/{left_display}")
+ } else {
+ "/dev/null".to_string()
+ };
+ let new_header = if right_bytes.is_some() {
+ format!("b/{right_display}")
+ } else {
+ "/dev/null".to_string()
+ };
+
+ let diff = similar::TextDiff::from_lines(l, r);
+ let unified = diff
+ .unified_diff()
+ .context_radius(3)
+ .header(&old_header, &new_header)
+ .to_string();
+
+ aggregated.push_str(&unified);
+ if !aggregated.ends_with('\n') {
+ aggregated.push('\n');
+ }
+ } else {
+ // Binary or invalid UTF-8: emit header only.
+ aggregated.push_str(&format!("index {left_oid}..{right_oid}\n"));
+ let old_header = if left_bytes.is_some() {
+ format!("a/{left_display}")
+ } else {
+ "/dev/null".to_string()
+ };
+ let new_header = if right_bytes.is_some() {
+ format!("b/{right_display}")
+ } else {
+ "/dev/null".to_string()
+ };
+ aggregated.push_str(&format!("--- {old_header}\n"));
+ aggregated.push_str(&format!("+++ {new_header}\n"));
+ aggregated.push_str("Binary files differ\n");
+ if !aggregated.ends_with('\n') {
+ aggregated.push('\n');
+ }
+ }
+ }
+
+ if aggregated.trim().is_empty() {
+ Ok(None)
+ } else {
+ Ok(Some(aggregated))
+ }
+ }
+}
+
+fn uuid_filename_for(path: &Path) -> String {
+ let id = Uuid::new_v4().to_string();
+ match path.extension().and_then(|e| e.to_str()) {
+ Some(ext) if !ext.is_empty() => format!("{id}.{ext}"),
+ _ => id,
+ }
+}
+
+/// Compute the Git SHA-1 blob object ID for the given content (bytes).
+fn git_blob_sha1_hex_bytes(data: &[u8]) -> String {
+ // Git blob hash is sha1 of: "blob <len>\0<data>"
+ let header = format!("blob {}\0", data.len());
+ use sha1::Digest;
+ let mut hasher = sha1::Sha1::new();
+ hasher.update(header.as_bytes());
+ hasher.update(data);
+ let digest = hasher.finalize();
+ let mut out = String::with_capacity(40);
+ for b in digest {
+ use std::fmt::Write;
+ let _ = write!(&mut out, "{b:02x}");
+ }
+ out
+}
+
+const ZERO_OID: &str = "0000000000000000000000000000000000000000";
+const REGULAR_MODE: &str = "100644";
+#[cfg(unix)]
+const EXECUTABLE_MODE: &str = "100755";
+const SYMLINK_MODE: &str = "120000";
+
+#[cfg(unix)]
+fn file_mode_for_path(path: &Path) -> Option<String> {
+ use std::os::unix::fs::PermissionsExt;
+ let meta = fs::symlink_metadata(path).ok()?;
+ let ft = meta.file_type();
+ if ft.is_symlink() {
+ return Some(SYMLINK_MODE.to_string());
+ }
+ let mode = meta.permissions().mode();
+ let is_exec = (mode & 0o111) != 0;
+ Some(if is_exec {
+ EXECUTABLE_MODE.into()
+ } else {
+ REGULAR_MODE.into()
+ })
+}
+
+#[cfg(not(unix))]
+fn file_mode_for_path(_path: &Path) -> Option<String> {
+ // Default to non-executable on non-unix.
+ Some(REGULAR_MODE.to_string())
+}
+
+fn blob_bytes(path: &Path, mode: &str) -> Result<Option<Vec<u8>>> {
+ if path.exists() {
+ let contents = if mode == SYMLINK_MODE {
+ symlink_blob_bytes(path)
+ .ok_or_else(|| anyhow!("failed to read symlink target for {}", path.display()))?
+ } else {
+ fs::read(path).with_context(|| {
+ format!("failed to read current file for diff {}", path.display())
+ })?
+ };
+ Ok(Some(contents))
+ } else {
+ Ok(None)
+ }
+}
+
+#[cfg(unix)]
+fn symlink_blob_bytes(path: &Path) -> Option<Vec<u8>> {
+ use std::os::unix::ffi::OsStrExt;
+ let target = std::fs::read_link(path).ok()?;
+ Some(target.as_os_str().as_bytes().to_vec())
```
> One interesting operating system fact is that the contents of a symlink do not have to be a path to a file: you can just use it for arbitrary data storage. (As such, I think the max number of bytes you can store in a symlink is `PATH_MAX`, though.)
>
> I knew of one project that did this to save a system call because `readlink()` is one system call but `open()` plus `read()` for a regular file is two?
- Created: 2025-08-04 03:12:56 UTC | Link: https://github.com/openai/codex/pull/1770#discussion_r2250312732
```diff
@@ -0,0 +1,766 @@
+use std::collections::HashMap;
+use std::fs;
+use std::path::Path;
+use std::path::PathBuf;
+use std::process::Command;
+
+use anyhow::Context;
+use anyhow::Result;
+use anyhow::anyhow;
+use uuid::Uuid;
+
+use crate::protocol::FileChange;
+
+struct BaselineFileInfo {
+ path: Option<PathBuf>,
+ contents_bytes: Option<Vec<u8>>,
+ mode: Option<String>,
+ oid: Option<String>,
+}
+
+/// Tracks sets of changes to files and exposes the overall unified diff.
+/// Internally, the way this works is now:
+/// 1. Maintain an in-memory baseline snapshot of files when they are first seen.
+/// For new additions, do not create a baseline so that diffs are shown as proper additions (using /dev/null).
+/// 2. Keep a stable internal filename (uuid + same extension) per external path for rename tracking.
+/// 3. To compute the aggregated unified diff, compare each baseline snapshot to the current file on disk entirely in-memory
+/// using the `similar` crate and emit unified diffs with rewritten external paths.
+#[derive(Default)]
+pub struct TurnDiffTracker {
+ /// Map external path -> internal filename (uuid + same extension).
+ external_to_temp_name: HashMap<PathBuf, String>,
+ /// Internal filename -> baseline file info.
+ baseline_file_info: HashMap<String, BaselineFileInfo>,
+ /// Internal filename -> external path as of current accumulated state (after applying all changes).
+ /// This is where renames are tracked.
+ temp_name_to_current_path: HashMap<String, PathBuf>,
+ /// Cache of known git worktree roots to avoid repeated filesystem walks.
+ git_root_cache: Vec<PathBuf>,
+}
+
+impl TurnDiffTracker {
+ pub fn new() -> Self {
+ Self::default()
+ }
+
+ /// Front-run apply patch calls to track the starting contents of any modified files.
+ /// - Creates an in-memory baseline snapshot for files that already exist on disk when first seen.
+ /// - For additions, we intentionally do not create a baseline snapshot so that diffs are proper additions.
+ /// - Also updates internal mappings for move/rename events.
+ pub fn on_patch_begin(&mut self, changes: &HashMap<PathBuf, FileChange>) -> Result<()> {
+ for (path, change) in changes.iter() {
+ // Ensure a stable internal filename exists for this external path.
+ if !self.external_to_temp_name.contains_key(path) {
+ let internal = uuid_filename_for(path);
+ self.external_to_temp_name
+ .insert(path.clone(), internal.clone());
+ self.temp_name_to_current_path
+ .insert(internal.clone(), path.clone());
+
+ // If the file exists on disk now, snapshot as baseline; else leave missing to represent /dev/null.
+ let (contents_bytes, mode, oid) = if path.exists() {
+ let mode = file_mode_for_path(path);
+ let mode_str = mode.as_deref().unwrap_or(REGULAR_MODE);
+ let contents_bytes = blob_bytes(path, mode_str)
+ .unwrap_or_default()
+ .unwrap_or_default();
+ let oid = if mode.as_deref() == Some(SYMLINK_MODE) {
+ git_blob_sha1_hex_bytes(&contents_bytes)
+ } else {
+ self.git_blob_oid_for_path(path)
+ .unwrap_or_else(|| git_blob_sha1_hex_bytes(&contents_bytes))
+ };
+ (Some(contents_bytes), mode, Some(oid))
+ } else {
+ (None, None, Some(ZERO_OID.to_string()))
+ };
+
+ self.baseline_file_info.insert(
+ internal.clone(),
+ BaselineFileInfo {
+ path: Some(path.clone()),
+ contents_bytes,
+ mode,
+ oid,
+ },
+ );
+ }
+
+ // Track rename/move in current mapping if provided in an Update.
+ if let FileChange::Update {
+ move_path: Some(dest),
+ ..
+ } = change
+ {
+ let uuid_filename = match self.external_to_temp_name.get(path) {
+ Some(i) => i.clone(),
+ None => {
+ // This should be rare, but if we haven't mapped the source, create it with no baseline.
+ let i = uuid_filename_for(path);
+ self.baseline_file_info.insert(
+ i.clone(),
+ BaselineFileInfo {
+ path: Some(path.clone()),
+ contents_bytes: None,
+ mode: None,
+ oid: Some(ZERO_OID.to_string()),
+ },
+ );
+ i
+ }
+ };
+ // Update current external mapping for temp file name.
+ self.temp_name_to_current_path
+ .insert(uuid_filename.clone(), dest.clone());
+ // Update forward file_mapping: external current -> internal name.
+ self.external_to_temp_name.remove(path);
+ self.external_to_temp_name
+ .insert(dest.clone(), uuid_filename);
+ };
+ }
+
+ Ok(())
+ }
+
+ fn get_path_for_internal(&self, internal: &str) -> Option<PathBuf> {
+ self.temp_name_to_current_path
+ .get(internal)
+ .cloned()
+ .or_else(|| {
+ self.baseline_file_info
+ .get(internal)
+ .and_then(|info| info.path.clone())
+ })
+ }
+
+ /// Find the git worktree root for a file/directory by walking up to the first ancestor containing a `.git` entry.
+ /// Uses a simple cache of known roots and avoids negative-result caching for simplicity.
+ fn find_git_root_cached(&mut self, start: &Path) -> Option<PathBuf> {
+ let dir = if start.is_dir() {
+ start
+ } else {
+ start.parent()?
+ };
+
+ // Fast path: if any cached root is an ancestor of this path, use it.
+ if let Some(root) = self
+ .git_root_cache
+ .iter()
+ .find(|r| dir.starts_with(r))
+ .cloned()
+ {
+ return Some(root);
+ }
+
+ // Walk up to find a `.git` marker.
+ let mut cur = dir.to_path_buf();
+ loop {
+ let git_marker = cur.join(".git");
+ if git_marker.is_dir() || git_marker.is_file() {
+ if !self.git_root_cache.iter().any(|r| r == &cur) {
+ self.git_root_cache.push(cur.clone());
+ }
+ return Some(cur);
+ }
+
+ // On Windows, avoid walking above the drive or UNC share root.
+ #[cfg(windows)]
+ {
+ if is_windows_drive_or_unc_root(&cur) {
+ return None;
+ }
+ }
+
+ if let Some(parent) = cur.parent() {
+ cur = parent.to_path_buf();
+ } else {
+ return None;
+ }
+ }
+ }
+
+ /// Return a display string for `path` relative to its git root if found, else absolute.
+ fn relative_to_git_root_str(&mut self, path: &Path) -> String {
+ let s = if let Some(root) = self.find_git_root_cached(path) {
+ if let Ok(rel) = path.strip_prefix(&root) {
+ rel.display().to_string()
+ } else {
+ path.display().to_string()
+ }
+ } else {
+ path.display().to_string()
+ };
+ s.replace('\\', "/")
+ }
+
+ /// Ask git to compute the blob SHA-1 for the file at `path` within its repository.
+ /// Returns None if no repository is found or git invocation fails.
+ fn git_blob_oid_for_path(&mut self, path: &Path) -> Option<String> {
+ let root = self.find_git_root_cached(path)?;
+ // Compute a path relative to the repo root for better portability across platforms.
+ let rel = path.strip_prefix(&root).unwrap_or(path);
+ let output = Command::new("git")
+ .arg("-C")
+ .arg(&root)
+ .arg("hash-object")
+ .arg("--")
+ .arg(rel)
+ .output()
+ .ok()?;
+ if !output.status.success() {
+ return None;
+ }
+ let s = String::from_utf8_lossy(&output.stdout).trim().to_string();
+ if s.len() == 40 { Some(s) } else { None }
+ }
+
+ /// Recompute the aggregated unified diff by comparing all of the in-memory snapshots that were
+ /// collected before the first time they were touched by apply_patch during this turn with
+ /// the current repo state.
+ pub fn get_unified_diff(&mut self) -> Result<Option<String>> {
+ let mut aggregated = String::new();
+
+ // Compute diffs per tracked internal file in a stable order by external path.
+ let mut baseline_file_names: Vec<String> =
+ self.baseline_file_info.keys().cloned().collect();
+ // Sort lexicographically by full repo-relative path to match git behavior.
+ baseline_file_names.sort_by_key(|internal| {
+ self.get_path_for_internal(internal)
+ .map(|p| self.relative_to_git_root_str(&p))
+ .unwrap_or_default()
+ });
+
+ for internal in baseline_file_names {
+ // Baseline external must exist for any tracked internal.
+ let baseline_external = match self
+ .baseline_file_info
+ .get(&internal)
+ .and_then(|i| i.path.clone())
+ {
+ Some(p) => p,
+ None => continue,
+ };
+ let current_external = match self.get_path_for_internal(&internal) {
+ Some(p) => p,
+ None => continue,
+ };
+
+ // Determine modes early; needed to read symlink content correctly.
+ let baseline_mode = self
+ .baseline_file_info
+ .get(&internal)
+ .and_then(|i| i.mode.clone())
+ .unwrap_or_else(|| REGULAR_MODE.to_string());
+ let current_mode =
+ file_mode_for_path(&current_external).unwrap_or_else(|| REGULAR_MODE.to_string());
+
+ let left_bytes = self
+ .baseline_file_info
+ .get(&internal)
+ .and_then(|i| i.contents_bytes.clone());
+
+ let right_bytes = blob_bytes(&current_external, &current_mode)?;
+
+ // Fast path: identical bytes or both missing.
+ if left_bytes.as_deref() == right_bytes.as_deref() {
+ continue;
+ }
+
+ let left_display = self.relative_to_git_root_str(&baseline_external);
+ let right_display = self.relative_to_git_root_str(&current_external);
+
+ // Emit a git-style header for better readability and parity with previous behavior.
+ aggregated.push_str(&format!("diff --git a/{left_display} b/{right_display}\n"));
+
+ let is_add = left_bytes.is_none() && right_bytes.is_some();
+ let is_delete = left_bytes.is_some() && right_bytes.is_none();
+
+ if is_add {
+ aggregated.push_str(&format!("new file mode {current_mode}\n"));
+ } else if is_delete {
+ aggregated.push_str(&format!("deleted file mode {baseline_mode}\n"));
+ } else if baseline_mode != current_mode {
+ aggregated.push_str(&format!("old mode {baseline_mode}\n"));
+ aggregated.push_str(&format!("new mode {current_mode}\n"));
+ }
+
+ // Determine blob object IDs for left and right contents. Prefer stored OIDs
+ // captured from the original repo state when the change was first seen.
+ let left_oid = self
+ .baseline_file_info
+ .get(&internal)
+ .and_then(|i| i.oid.clone())
+ .or_else(|| {
+ left_bytes
+ .as_ref()
+ .map(|b| git_blob_sha1_hex_bytes(b))
+ .or(Some(ZERO_OID.to_string()))
+ })
+ .unwrap_or_else(|| ZERO_OID.to_string());
+ let right_oid = if let Some(b) = right_bytes.as_ref() {
+ if current_mode == SYMLINK_MODE {
+ git_blob_sha1_hex_bytes(b)
+ } else {
+ self.git_blob_oid_for_path(&current_external)
+ .unwrap_or_else(|| git_blob_sha1_hex_bytes(b))
+ }
+ } else {
+ ZERO_OID.to_string()
+ };
+
+ // If either side isn't valid UTF-8, emit a binary diff header and continue.
+ let left_text = left_bytes
+ .as_deref()
+ .and_then(|b| std::str::from_utf8(b).ok());
+ let right_text = right_bytes
+ .as_deref()
+ .and_then(|b| std::str::from_utf8(b).ok());
+
+ // Prefer text diffs when possible:
+ // - both sides are valid UTF-8
+ // - OR one side is missing (add/delete) and the present side is valid UTF-8
+ let can_text_diff = match (left_text, right_text, is_add, is_delete) {
+ (Some(_), Some(_), _, _) => true,
+ (_, Some(_), true, _) => true, // add: left missing, right text
+ (Some(_), _, _, true) => true, // delete: left text, right missing
+ _ => false,
+ };
+
+ if can_text_diff {
+ // Diff the contents as text, treating missing side as empty string.
+ let l = left_text.unwrap_or("");
+ let r = right_text.unwrap_or("");
+
+ // Emit index line without mode suffix to preserve current test expectations.
+ aggregated.push_str(&format!("index {left_oid}..{right_oid}\n"));
+
+ let old_header = if left_bytes.is_some() {
+ format!("a/{left_display}")
+ } else {
+ "/dev/null".to_string()
+ };
+ let new_header = if right_bytes.is_some() {
+ format!("b/{right_display}")
+ } else {
+ "/dev/null".to_string()
+ };
+
+ let diff = similar::TextDiff::from_lines(l, r);
+ let unified = diff
+ .unified_diff()
+ .context_radius(3)
+ .header(&old_header, &new_header)
+ .to_string();
+
+ aggregated.push_str(&unified);
+ if !aggregated.ends_with('\n') {
+ aggregated.push('\n');
+ }
+ } else {
+ // Binary or invalid UTF-8: emit header only.
+ aggregated.push_str(&format!("index {left_oid}..{right_oid}\n"));
+ let old_header = if left_bytes.is_some() {
+ format!("a/{left_display}")
+ } else {
+ "/dev/null".to_string()
+ };
+ let new_header = if right_bytes.is_some() {
+ format!("b/{right_display}")
+ } else {
+ "/dev/null".to_string()
+ };
+ aggregated.push_str(&format!("--- {old_header}\n"));
+ aggregated.push_str(&format!("+++ {new_header}\n"));
+ aggregated.push_str("Binary files differ\n");
+ if !aggregated.ends_with('\n') {
+ aggregated.push('\n');
+ }
+ }
+ }
+
+ if aggregated.trim().is_empty() {
+ Ok(None)
+ } else {
+ Ok(Some(aggregated))
+ }
+ }
+}
+
+fn uuid_filename_for(path: &Path) -> String {
+ let id = Uuid::new_v4().to_string();
+ match path.extension().and_then(|e| e.to_str()) {
+ Some(ext) if !ext.is_empty() => format!("{id}.{ext}"),
+ _ => id,
+ }
+}
+
+/// Compute the Git SHA-1 blob object ID for the given content (bytes).
+fn git_blob_sha1_hex_bytes(data: &[u8]) -> String {
+ // Git blob hash is sha1 of: "blob <len>\0<data>"
+ let header = format!("blob {}\0", data.len());
+ use sha1::Digest;
```
> Returning this type is slightly stronger since you don't have to verify the integrity of the `String` contents elsewhere.
- Created: 2025-08-04 03:13:59 UTC | Link: https://github.com/openai/codex/pull/1770#discussion_r2250313339
```diff
@@ -0,0 +1,766 @@
+use std::collections::HashMap;
+use std::fs;
+use std::path::Path;
+use std::path::PathBuf;
+use std::process::Command;
+
+use anyhow::Context;
+use anyhow::Result;
+use anyhow::anyhow;
+use uuid::Uuid;
+
+use crate::protocol::FileChange;
+
+struct BaselineFileInfo {
+ path: Option<PathBuf>,
+ contents_bytes: Option<Vec<u8>>,
+ mode: Option<String>,
+ oid: Option<String>,
+}
+
+/// Tracks sets of changes to files and exposes the overall unified diff.
+/// Internally, the way this works is now:
+/// 1. Maintain an in-memory baseline snapshot of files when they are first seen.
+/// For new additions, do not create a baseline so that diffs are shown as proper additions (using /dev/null).
+/// 2. Keep a stable internal filename (uuid + same extension) per external path for rename tracking.
+/// 3. To compute the aggregated unified diff, compare each baseline snapshot to the current file on disk entirely in-memory
+/// using the `similar` crate and emit unified diffs with rewritten external paths.
+#[derive(Default)]
+pub struct TurnDiffTracker {
+ /// Map external path -> internal filename (uuid + same extension).
+ external_to_temp_name: HashMap<PathBuf, String>,
+ /// Internal filename -> baseline file info.
+ baseline_file_info: HashMap<String, BaselineFileInfo>,
+ /// Internal filename -> external path as of current accumulated state (after applying all changes).
+ /// This is where renames are tracked.
+ temp_name_to_current_path: HashMap<String, PathBuf>,
+ /// Cache of known git worktree roots to avoid repeated filesystem walks.
+ git_root_cache: Vec<PathBuf>,
+}
+
+impl TurnDiffTracker {
+ pub fn new() -> Self {
+ Self::default()
+ }
+
+ /// Front-run apply patch calls to track the starting contents of any modified files.
+ /// - Creates an in-memory baseline snapshot for files that already exist on disk when first seen.
+ /// - For additions, we intentionally do not create a baseline snapshot so that diffs are proper additions.
+ /// - Also updates internal mappings for move/rename events.
+ pub fn on_patch_begin(&mut self, changes: &HashMap<PathBuf, FileChange>) -> Result<()> {
+ for (path, change) in changes.iter() {
+ // Ensure a stable internal filename exists for this external path.
+ if !self.external_to_temp_name.contains_key(path) {
+ let internal = uuid_filename_for(path);
+ self.external_to_temp_name
+ .insert(path.clone(), internal.clone());
+ self.temp_name_to_current_path
+ .insert(internal.clone(), path.clone());
+
+ // If the file exists on disk now, snapshot as baseline; else leave missing to represent /dev/null.
+ let (contents_bytes, mode, oid) = if path.exists() {
+ let mode = file_mode_for_path(path);
+ let mode_str = mode.as_deref().unwrap_or(REGULAR_MODE);
+ let contents_bytes = blob_bytes(path, mode_str)
+ .unwrap_or_default()
+ .unwrap_or_default();
+ let oid = if mode.as_deref() == Some(SYMLINK_MODE) {
+ git_blob_sha1_hex_bytes(&contents_bytes)
+ } else {
+ self.git_blob_oid_for_path(path)
+ .unwrap_or_else(|| git_blob_sha1_hex_bytes(&contents_bytes))
+ };
+ (Some(contents_bytes), mode, Some(oid))
+ } else {
+ (None, None, Some(ZERO_OID.to_string()))
+ };
+
+ self.baseline_file_info.insert(
+ internal.clone(),
+ BaselineFileInfo {
+ path: Some(path.clone()),
+ contents_bytes,
+ mode,
+ oid,
+ },
+ );
+ }
+
+ // Track rename/move in current mapping if provided in an Update.
+ if let FileChange::Update {
+ move_path: Some(dest),
+ ..
+ } = change
+ {
+ let uuid_filename = match self.external_to_temp_name.get(path) {
+ Some(i) => i.clone(),
+ None => {
+ // This should be rare, but if we haven't mapped the source, create it with no baseline.
+ let i = uuid_filename_for(path);
+ self.baseline_file_info.insert(
+ i.clone(),
+ BaselineFileInfo {
+ path: Some(path.clone()),
+ contents_bytes: None,
+ mode: None,
+ oid: Some(ZERO_OID.to_string()),
+ },
+ );
+ i
+ }
+ };
+ // Update current external mapping for temp file name.
+ self.temp_name_to_current_path
+ .insert(uuid_filename.clone(), dest.clone());
+ // Update forward file_mapping: external current -> internal name.
+ self.external_to_temp_name.remove(path);
+ self.external_to_temp_name
+ .insert(dest.clone(), uuid_filename);
+ };
+ }
+
+ Ok(())
+ }
+
+ fn get_path_for_internal(&self, internal: &str) -> Option<PathBuf> {
+ self.temp_name_to_current_path
+ .get(internal)
+ .cloned()
+ .or_else(|| {
+ self.baseline_file_info
+ .get(internal)
+ .and_then(|info| info.path.clone())
+ })
+ }
+
+ /// Find the git worktree root for a file/directory by walking up to the first ancestor containing a `.git` entry.
+ /// Uses a simple cache of known roots and avoids negative-result caching for simplicity.
+ fn find_git_root_cached(&mut self, start: &Path) -> Option<PathBuf> {
+ let dir = if start.is_dir() {
+ start
+ } else {
+ start.parent()?
+ };
+
+ // Fast path: if any cached root is an ancestor of this path, use it.
+ if let Some(root) = self
+ .git_root_cache
+ .iter()
+ .find(|r| dir.starts_with(r))
+ .cloned()
+ {
+ return Some(root);
+ }
+
+ // Walk up to find a `.git` marker.
+ let mut cur = dir.to_path_buf();
+ loop {
+ let git_marker = cur.join(".git");
+ if git_marker.is_dir() || git_marker.is_file() {
+ if !self.git_root_cache.iter().any(|r| r == &cur) {
+ self.git_root_cache.push(cur.clone());
+ }
+ return Some(cur);
+ }
+
+ // On Windows, avoid walking above the drive or UNC share root.
+ #[cfg(windows)]
+ {
+ if is_windows_drive_or_unc_root(&cur) {
+ return None;
+ }
+ }
+
+ if let Some(parent) = cur.parent() {
+ cur = parent.to_path_buf();
+ } else {
+ return None;
+ }
+ }
+ }
+
+ /// Return a display string for `path` relative to its git root if found, else absolute.
+ fn relative_to_git_root_str(&mut self, path: &Path) -> String {
+ let s = if let Some(root) = self.find_git_root_cached(path) {
+ if let Ok(rel) = path.strip_prefix(&root) {
+ rel.display().to_string()
+ } else {
+ path.display().to_string()
+ }
+ } else {
+ path.display().to_string()
+ };
+ s.replace('\\', "/")
+ }
+
+ /// Ask git to compute the blob SHA-1 for the file at `path` within its repository.
+ /// Returns None if no repository is found or git invocation fails.
+ fn git_blob_oid_for_path(&mut self, path: &Path) -> Option<String> {
+ let root = self.find_git_root_cached(path)?;
+ // Compute a path relative to the repo root for better portability across platforms.
+ let rel = path.strip_prefix(&root).unwrap_or(path);
+ let output = Command::new("git")
+ .arg("-C")
+ .arg(&root)
+ .arg("hash-object")
+ .arg("--")
+ .arg(rel)
+ .output()
+ .ok()?;
+ if !output.status.success() {
+ return None;
+ }
+ let s = String::from_utf8_lossy(&output.stdout).trim().to_string();
+ if s.len() == 40 { Some(s) } else { None }
+ }
+
+ /// Recompute the aggregated unified diff by comparing all of the in-memory snapshots that were
+ /// collected before the first time they were touched by apply_patch during this turn with
+ /// the current repo state.
+ pub fn get_unified_diff(&mut self) -> Result<Option<String>> {
+ let mut aggregated = String::new();
+
+ // Compute diffs per tracked internal file in a stable order by external path.
+ let mut baseline_file_names: Vec<String> =
+ self.baseline_file_info.keys().cloned().collect();
+ // Sort lexicographically by full repo-relative path to match git behavior.
+ baseline_file_names.sort_by_key(|internal| {
+ self.get_path_for_internal(internal)
+ .map(|p| self.relative_to_git_root_str(&p))
+ .unwrap_or_default()
+ });
+
+ for internal in baseline_file_names {
+ // Baseline external must exist for any tracked internal.
+ let baseline_external = match self
+ .baseline_file_info
+ .get(&internal)
+ .and_then(|i| i.path.clone())
+ {
+ Some(p) => p,
+ None => continue,
+ };
+ let current_external = match self.get_path_for_internal(&internal) {
+ Some(p) => p,
+ None => continue,
+ };
+
+ // Determine modes early; needed to read symlink content correctly.
+ let baseline_mode = self
+ .baseline_file_info
+ .get(&internal)
+ .and_then(|i| i.mode.clone())
+ .unwrap_or_else(|| REGULAR_MODE.to_string());
+ let current_mode =
+ file_mode_for_path(&current_external).unwrap_or_else(|| REGULAR_MODE.to_string());
+
+ let left_bytes = self
+ .baseline_file_info
+ .get(&internal)
+ .and_then(|i| i.contents_bytes.clone());
+
+ let right_bytes = blob_bytes(&current_external, &current_mode)?;
+
+ // Fast path: identical bytes or both missing.
+ if left_bytes.as_deref() == right_bytes.as_deref() {
+ continue;
+ }
+
+ let left_display = self.relative_to_git_root_str(&baseline_external);
+ let right_display = self.relative_to_git_root_str(&current_external);
+
+ // Emit a git-style header for better readability and parity with previous behavior.
+ aggregated.push_str(&format!("diff --git a/{left_display} b/{right_display}\n"));
+
+ let is_add = left_bytes.is_none() && right_bytes.is_some();
+ let is_delete = left_bytes.is_some() && right_bytes.is_none();
+
+ if is_add {
+ aggregated.push_str(&format!("new file mode {current_mode}\n"));
+ } else if is_delete {
+ aggregated.push_str(&format!("deleted file mode {baseline_mode}\n"));
+ } else if baseline_mode != current_mode {
+ aggregated.push_str(&format!("old mode {baseline_mode}\n"));
+ aggregated.push_str(&format!("new mode {current_mode}\n"));
+ }
+
+ // Determine blob object IDs for left and right contents. Prefer stored OIDs
+ // captured from the original repo state when the change was first seen.
+ let left_oid = self
+ .baseline_file_info
+ .get(&internal)
+ .and_then(|i| i.oid.clone())
+ .or_else(|| {
+ left_bytes
+ .as_ref()
+ .map(|b| git_blob_sha1_hex_bytes(b))
+ .or(Some(ZERO_OID.to_string()))
+ })
+ .unwrap_or_else(|| ZERO_OID.to_string());
+ let right_oid = if let Some(b) = right_bytes.as_ref() {
+ if current_mode == SYMLINK_MODE {
+ git_blob_sha1_hex_bytes(b)
+ } else {
+ self.git_blob_oid_for_path(&current_external)
+ .unwrap_or_else(|| git_blob_sha1_hex_bytes(b))
+ }
+ } else {
+ ZERO_OID.to_string()
+ };
+
+ // If either side isn't valid UTF-8, emit a binary diff header and continue.
+ let left_text = left_bytes
+ .as_deref()
+ .and_then(|b| std::str::from_utf8(b).ok());
+ let right_text = right_bytes
+ .as_deref()
+ .and_then(|b| std::str::from_utf8(b).ok());
+
+ // Prefer text diffs when possible:
+ // - both sides are valid UTF-8
+ // - OR one side is missing (add/delete) and the present side is valid UTF-8
+ let can_text_diff = match (left_text, right_text, is_add, is_delete) {
+ (Some(_), Some(_), _, _) => true,
+ (_, Some(_), true, _) => true, // add: left missing, right text
+ (Some(_), _, _, true) => true, // delete: left text, right missing
+ _ => false,
+ };
+
+ if can_text_diff {
+ // Diff the contents as text, treating missing side as empty string.
+ let l = left_text.unwrap_or("");
+ let r = right_text.unwrap_or("");
+
+ // Emit index line without mode suffix to preserve current test expectations.
+ aggregated.push_str(&format!("index {left_oid}..{right_oid}\n"));
+
+ let old_header = if left_bytes.is_some() {
+ format!("a/{left_display}")
+ } else {
+ "/dev/null".to_string()
+ };
+ let new_header = if right_bytes.is_some() {
+ format!("b/{right_display}")
+ } else {
+ "/dev/null".to_string()
+ };
+
+ let diff = similar::TextDiff::from_lines(l, r);
+ let unified = diff
+ .unified_diff()
+ .context_radius(3)
+ .header(&old_header, &new_header)
+ .to_string();
+
+ aggregated.push_str(&unified);
+ if !aggregated.ends_with('\n') {
+ aggregated.push('\n');
+ }
+ } else {
+ // Binary or invalid UTF-8: emit header only.
+ aggregated.push_str(&format!("index {left_oid}..{right_oid}\n"));
+ let old_header = if left_bytes.is_some() {
+ format!("a/{left_display}")
+ } else {
+ "/dev/null".to_string()
+ };
+ let new_header = if right_bytes.is_some() {
+ format!("b/{right_display}")
+ } else {
+ "/dev/null".to_string()
+ };
+ aggregated.push_str(&format!("--- {old_header}\n"));
+ aggregated.push_str(&format!("+++ {new_header}\n"));
+ aggregated.push_str("Binary files differ\n");
+ if !aggregated.ends_with('\n') {
+ aggregated.push('\n');
+ }
+ }
+ }
+
+ if aggregated.trim().is_empty() {
+ Ok(None)
+ } else {
+ Ok(Some(aggregated))
+ }
+ }
+}
+
+fn uuid_filename_for(path: &Path) -> String {
+ let id = Uuid::new_v4().to_string();
+ match path.extension().and_then(|e| e.to_str()) {
+ Some(ext) if !ext.is_empty() => format!("{id}.{ext}"),
+ _ => id,
+ }
+}
+
+/// Compute the Git SHA-1 blob object ID for the given content (bytes).
+fn git_blob_sha1_hex_bytes(data: &[u8]) -> String {
+ // Git blob hash is sha1 of: "blob <len>\0<data>"
+ let header = format!("blob {}\0", data.len());
+ use sha1::Digest;
+ let mut hasher = sha1::Sha1::new();
+ hasher.update(header.as_bytes());
+ hasher.update(data);
+ let digest = hasher.finalize();
+ let mut out = String::with_capacity(40);
+ for b in digest {
+ use std::fmt::Write;
+ let _ = write!(&mut out, "{b:02x}");
+ }
+ out
+}
+
+const ZERO_OID: &str = "0000000000000000000000000000000000000000";
+const REGULAR_MODE: &str = "100644";
+#[cfg(unix)]
+const EXECUTABLE_MODE: &str = "100755";
+const SYMLINK_MODE: &str = "120000";
+
+#[cfg(unix)]
+fn file_mode_for_path(path: &Path) -> Option<String> {
+ use std::os::unix::fs::PermissionsExt;
+ let meta = fs::symlink_metadata(path).ok()?;
+ let ft = meta.file_type();
+ if ft.is_symlink() {
+ return Some(SYMLINK_MODE.to_string());
+ }
+ let mode = meta.permissions().mode();
+ let is_exec = (mode & 0o111) != 0;
+ Some(if is_exec {
+ EXECUTABLE_MODE.into()
+ } else {
+ REGULAR_MODE.into()
+ })
+}
+
+#[cfg(not(unix))]
+fn file_mode_for_path(_path: &Path) -> Option<String> {
+ // Default to non-executable on non-unix.
+ Some(REGULAR_MODE.to_string())
+}
+
+fn blob_bytes(path: &Path, mode: &str) -> Result<Option<Vec<u8>>> {
+ if path.exists() {
+ let contents = if mode == SYMLINK_MODE {
+ symlink_blob_bytes(path)
+ .ok_or_else(|| anyhow!("failed to read symlink target for {}", path.display()))?
+ } else {
+ fs::read(path).with_context(|| {
+ format!("failed to read current file for diff {}", path.display())
+ })?
+ };
+ Ok(Some(contents))
+ } else {
+ Ok(None)
+ }
+}
+
+#[cfg(unix)]
+fn symlink_blob_bytes(path: &Path) -> Option<Vec<u8>> {
+ use std::os::unix::ffi::OsStrExt;
+ let target = std::fs::read_link(path).ok()?;
+ Some(target.as_os_str().as_bytes().to_vec())
+}
+
+#[cfg(not(unix))]
+fn symlink_blob_bytes(_path: &Path) -> Option<Vec<u8>> {
+ None
+}
+
+#[cfg(windows)]
+fn is_windows_drive_or_unc_root(p: &std::path::Path) -> bool {
+ use std::path::Component;
+ let mut comps = p.components();
+ matches!(
+ (comps.next(), comps.next(), comps.next()),
+ (Some(Component::Prefix(_)), Some(Component::RootDir), None)
+ )
+}
+
+#[cfg(test)]
+mod tests {
+ #![allow(clippy::unwrap_used)]
+ use super::*;
+ use pretty_assertions::assert_eq;
+ use tempfile::tempdir;
+
+ /// Compute the Git SHA-1 blob object ID for the given content (string).
+ /// This delegates to the bytes version to avoid UTF-8 lossy conversions here.
+ fn git_blob_sha1_hex(data: &str) -> String {
+ git_blob_sha1_hex_bytes(data.as_bytes())
+ }
+
+ fn normalize_diff_for_test(input: &str, root: &Path) -> String {
+ let root_str = root.display().to_string().replace('\\', "/");
+ let replaced = input.replace(&root_str, "<TMP>");
+ // Split into blocks on lines starting with "diff --git ", sort blocks for determinism, and rejoin
+ let mut blocks: Vec<String> = Vec::new();
+ let mut current = String::new();
+ for line in replaced.lines() {
+ if line.starts_with("diff --git ") && !current.is_empty() {
+ blocks.push(current);
+ current = String::new();
+ }
+ if !current.is_empty() {
+ current.push('\n');
+ }
+ current.push_str(line);
+ }
+ if !current.is_empty() {
+ blocks.push(current);
+ }
+ blocks.sort();
+ let mut out = blocks.join("\n");
+ if !out.ends_with('\n') {
+ out.push('\n');
+ }
+ out
+ }
+
+ #[test]
+ fn accumulates_add_and_update() {
+ let mut acc = TurnDiffTracker::new();
+
+ let dir = tempdir().unwrap();
+ let file = dir.path().join("a.txt");
+
+ // First patch: add file (baseline should be /dev/null).
+ let add_changes = HashMap::from([(
+ file.clone(),
+ FileChange::Add {
+ content: "foo\n".to_string(),
+ },
+ )]);
+ acc.on_patch_begin(&add_changes).unwrap();
+
+ // Simulate apply: create the file on disk.
+ fs::write(&file, "foo\n").unwrap();
+ let first = acc.get_unified_diff().unwrap().unwrap();
+ let first = normalize_diff_for_test(&first, dir.path());
+ let expected_first = {
+ let mode = file_mode_for_path(&file).unwrap_or_else(|| REGULAR_MODE.to_string());
+ let right_oid = git_blob_sha1_hex("foo\n");
+ format!(
+ "diff --git a/<TMP>/a.txt b/<TMP>/a.txt\nnew file mode {mode}\nindex {ZERO_OID}..{right_oid}\n--- /dev/null\n+++ b/<TMP>/a.txt\n@@ -0,0 +1 @@\n+foo\n",
```
> `r#` for better readability?
- Created: 2025-08-04 03:14:59 UTC | Link: https://github.com/openai/codex/pull/1770#discussion_r2250313964
```diff
@@ -0,0 +1,766 @@
+use std::collections::HashMap;
+use std::fs;
+use std::path::Path;
+use std::path::PathBuf;
+use std::process::Command;
+
+use anyhow::Context;
+use anyhow::Result;
+use anyhow::anyhow;
+use uuid::Uuid;
+
+use crate::protocol::FileChange;
+
+struct BaselineFileInfo {
+ path: Option<PathBuf>,
+ contents_bytes: Option<Vec<u8>>,
+ mode: Option<String>,
+ oid: Option<String>,
+}
+
+/// Tracks sets of changes to files and exposes the overall unified diff.
+/// Internally, the way this works is now:
+/// 1. Maintain an in-memory baseline snapshot of files when they are first seen.
+/// For new additions, do not create a baseline so that diffs are shown as proper additions (using /dev/null).
+/// 2. Keep a stable internal filename (uuid + same extension) per external path for rename tracking.
+/// 3. To compute the aggregated unified diff, compare each baseline snapshot to the current file on disk entirely in-memory
+/// using the `similar` crate and emit unified diffs with rewritten external paths.
+#[derive(Default)]
+pub struct TurnDiffTracker {
+ /// Map external path -> internal filename (uuid + same extension).
+ external_to_temp_name: HashMap<PathBuf, String>,
+ /// Internal filename -> baseline file info.
+ baseline_file_info: HashMap<String, BaselineFileInfo>,
+ /// Internal filename -> external path as of current accumulated state (after applying all changes).
+ /// This is where renames are tracked.
+ temp_name_to_current_path: HashMap<String, PathBuf>,
+ /// Cache of known git worktree roots to avoid repeated filesystem walks.
+ git_root_cache: Vec<PathBuf>,
+}
+
+impl TurnDiffTracker {
+ pub fn new() -> Self {
+ Self::default()
+ }
+
+ /// Front-run apply patch calls to track the starting contents of any modified files.
+ /// - Creates an in-memory baseline snapshot for files that already exist on disk when first seen.
+ /// - For additions, we intentionally do not create a baseline snapshot so that diffs are proper additions.
+ /// - Also updates internal mappings for move/rename events.
+ pub fn on_patch_begin(&mut self, changes: &HashMap<PathBuf, FileChange>) -> Result<()> {
+ for (path, change) in changes.iter() {
+ // Ensure a stable internal filename exists for this external path.
+ if !self.external_to_temp_name.contains_key(path) {
+ let internal = uuid_filename_for(path);
+ self.external_to_temp_name
+ .insert(path.clone(), internal.clone());
+ self.temp_name_to_current_path
+ .insert(internal.clone(), path.clone());
+
+ // If the file exists on disk now, snapshot as baseline; else leave missing to represent /dev/null.
+ let (contents_bytes, mode, oid) = if path.exists() {
+ let mode = file_mode_for_path(path);
+ let mode_str = mode.as_deref().unwrap_or(REGULAR_MODE);
+ let contents_bytes = blob_bytes(path, mode_str)
+ .unwrap_or_default()
+ .unwrap_or_default();
+ let oid = if mode.as_deref() == Some(SYMLINK_MODE) {
+ git_blob_sha1_hex_bytes(&contents_bytes)
+ } else {
+ self.git_blob_oid_for_path(path)
+ .unwrap_or_else(|| git_blob_sha1_hex_bytes(&contents_bytes))
+ };
+ (Some(contents_bytes), mode, Some(oid))
+ } else {
+ (None, None, Some(ZERO_OID.to_string()))
+ };
+
+ self.baseline_file_info.insert(
+ internal.clone(),
+ BaselineFileInfo {
+ path: Some(path.clone()),
+ contents_bytes,
+ mode,
+ oid,
+ },
+ );
+ }
+
+ // Track rename/move in current mapping if provided in an Update.
+ if let FileChange::Update {
+ move_path: Some(dest),
+ ..
+ } = change
+ {
+ let uuid_filename = match self.external_to_temp_name.get(path) {
+ Some(i) => i.clone(),
+ None => {
+ // This should be rare, but if we haven't mapped the source, create it with no baseline.
+ let i = uuid_filename_for(path);
+ self.baseline_file_info.insert(
+ i.clone(),
+ BaselineFileInfo {
+ path: Some(path.clone()),
+ contents_bytes: None,
+ mode: None,
+ oid: Some(ZERO_OID.to_string()),
+ },
+ );
+ i
+ }
+ };
+ // Update current external mapping for temp file name.
+ self.temp_name_to_current_path
+ .insert(uuid_filename.clone(), dest.clone());
+ // Update forward file_mapping: external current -> internal name.
+ self.external_to_temp_name.remove(path);
+ self.external_to_temp_name
+ .insert(dest.clone(), uuid_filename);
+ };
+ }
+
+ Ok(())
+ }
+
+ fn get_path_for_internal(&self, internal: &str) -> Option<PathBuf> {
+ self.temp_name_to_current_path
+ .get(internal)
+ .cloned()
+ .or_else(|| {
+ self.baseline_file_info
+ .get(internal)
+ .and_then(|info| info.path.clone())
+ })
+ }
+
+ /// Find the git worktree root for a file/directory by walking up to the first ancestor containing a `.git` entry.
+ /// Uses a simple cache of known roots and avoids negative-result caching for simplicity.
+ fn find_git_root_cached(&mut self, start: &Path) -> Option<PathBuf> {
+ let dir = if start.is_dir() {
+ start
+ } else {
+ start.parent()?
+ };
+
+ // Fast path: if any cached root is an ancestor of this path, use it.
+ if let Some(root) = self
+ .git_root_cache
+ .iter()
+ .find(|r| dir.starts_with(r))
+ .cloned()
+ {
+ return Some(root);
+ }
+
+ // Walk up to find a `.git` marker.
+ let mut cur = dir.to_path_buf();
+ loop {
+ let git_marker = cur.join(".git");
+ if git_marker.is_dir() || git_marker.is_file() {
+ if !self.git_root_cache.iter().any(|r| r == &cur) {
+ self.git_root_cache.push(cur.clone());
+ }
+ return Some(cur);
+ }
+
+ // On Windows, avoid walking above the drive or UNC share root.
+ #[cfg(windows)]
+ {
+ if is_windows_drive_or_unc_root(&cur) {
+ return None;
+ }
+ }
+
+ if let Some(parent) = cur.parent() {
+ cur = parent.to_path_buf();
+ } else {
+ return None;
+ }
+ }
+ }
+
+ /// Return a display string for `path` relative to its git root if found, else absolute.
+ fn relative_to_git_root_str(&mut self, path: &Path) -> String {
+ let s = if let Some(root) = self.find_git_root_cached(path) {
+ if let Ok(rel) = path.strip_prefix(&root) {
+ rel.display().to_string()
+ } else {
+ path.display().to_string()
+ }
+ } else {
+ path.display().to_string()
+ };
+ s.replace('\\', "/")
+ }
+
+ /// Ask git to compute the blob SHA-1 for the file at `path` within its repository.
+ /// Returns None if no repository is found or git invocation fails.
+ fn git_blob_oid_for_path(&mut self, path: &Path) -> Option<String> {
+ let root = self.find_git_root_cached(path)?;
+ // Compute a path relative to the repo root for better portability across platforms.
+ let rel = path.strip_prefix(&root).unwrap_or(path);
+ let output = Command::new("git")
+ .arg("-C")
+ .arg(&root)
+ .arg("hash-object")
+ .arg("--")
+ .arg(rel)
+ .output()
+ .ok()?;
+ if !output.status.success() {
+ return None;
+ }
+ let s = String::from_utf8_lossy(&output.stdout).trim().to_string();
+ if s.len() == 40 { Some(s) } else { None }
+ }
+
+ /// Recompute the aggregated unified diff by comparing all of the in-memory snapshots that were
+ /// collected before the first time they were touched by apply_patch during this turn with
+ /// the current repo state.
+ pub fn get_unified_diff(&mut self) -> Result<Option<String>> {
+ let mut aggregated = String::new();
+
+ // Compute diffs per tracked internal file in a stable order by external path.
+ let mut baseline_file_names: Vec<String> =
+ self.baseline_file_info.keys().cloned().collect();
+ // Sort lexicographically by full repo-relative path to match git behavior.
+ baseline_file_names.sort_by_key(|internal| {
+ self.get_path_for_internal(internal)
+ .map(|p| self.relative_to_git_root_str(&p))
+ .unwrap_or_default()
+ });
+
+ for internal in baseline_file_names {
+ // Baseline external must exist for any tracked internal.
+ let baseline_external = match self
+ .baseline_file_info
+ .get(&internal)
+ .and_then(|i| i.path.clone())
+ {
+ Some(p) => p,
+ None => continue,
+ };
+ let current_external = match self.get_path_for_internal(&internal) {
+ Some(p) => p,
+ None => continue,
+ };
+
+ // Determine modes early; needed to read symlink content correctly.
+ let baseline_mode = self
+ .baseline_file_info
+ .get(&internal)
+ .and_then(|i| i.mode.clone())
+ .unwrap_or_else(|| REGULAR_MODE.to_string());
+ let current_mode =
+ file_mode_for_path(&current_external).unwrap_or_else(|| REGULAR_MODE.to_string());
+
+ let left_bytes = self
+ .baseline_file_info
+ .get(&internal)
+ .and_then(|i| i.contents_bytes.clone());
+
+ let right_bytes = blob_bytes(&current_external, &current_mode)?;
+
+ // Fast path: identical bytes or both missing.
+ if left_bytes.as_deref() == right_bytes.as_deref() {
+ continue;
+ }
+
+ let left_display = self.relative_to_git_root_str(&baseline_external);
+ let right_display = self.relative_to_git_root_str(&current_external);
+
+ // Emit a git-style header for better readability and parity with previous behavior.
+ aggregated.push_str(&format!("diff --git a/{left_display} b/{right_display}\n"));
+
+ let is_add = left_bytes.is_none() && right_bytes.is_some();
+ let is_delete = left_bytes.is_some() && right_bytes.is_none();
+
+ if is_add {
+ aggregated.push_str(&format!("new file mode {current_mode}\n"));
+ } else if is_delete {
+ aggregated.push_str(&format!("deleted file mode {baseline_mode}\n"));
+ } else if baseline_mode != current_mode {
+ aggregated.push_str(&format!("old mode {baseline_mode}\n"));
+ aggregated.push_str(&format!("new mode {current_mode}\n"));
+ }
+
+ // Determine blob object IDs for left and right contents. Prefer stored OIDs
+ // captured from the original repo state when the change was first seen.
+ let left_oid = self
+ .baseline_file_info
+ .get(&internal)
+ .and_then(|i| i.oid.clone())
+ .or_else(|| {
+ left_bytes
+ .as_ref()
+ .map(|b| git_blob_sha1_hex_bytes(b))
+ .or(Some(ZERO_OID.to_string()))
+ })
+ .unwrap_or_else(|| ZERO_OID.to_string());
+ let right_oid = if let Some(b) = right_bytes.as_ref() {
+ if current_mode == SYMLINK_MODE {
+ git_blob_sha1_hex_bytes(b)
+ } else {
+ self.git_blob_oid_for_path(&current_external)
+ .unwrap_or_else(|| git_blob_sha1_hex_bytes(b))
+ }
+ } else {
+ ZERO_OID.to_string()
+ };
+
+ // If either side isn't valid UTF-8, emit a binary diff header and continue.
+ let left_text = left_bytes
+ .as_deref()
+ .and_then(|b| std::str::from_utf8(b).ok());
+ let right_text = right_bytes
+ .as_deref()
+ .and_then(|b| std::str::from_utf8(b).ok());
+
+ // Prefer text diffs when possible:
+ // - both sides are valid UTF-8
+ // - OR one side is missing (add/delete) and the present side is valid UTF-8
+ let can_text_diff = match (left_text, right_text, is_add, is_delete) {
+ (Some(_), Some(_), _, _) => true,
+ (_, Some(_), true, _) => true, // add: left missing, right text
+ (Some(_), _, _, true) => true, // delete: left text, right missing
+ _ => false,
+ };
+
+ if can_text_diff {
+ // Diff the contents as text, treating missing side as empty string.
+ let l = left_text.unwrap_or("");
+ let r = right_text.unwrap_or("");
+
+ // Emit index line without mode suffix to preserve current test expectations.
+ aggregated.push_str(&format!("index {left_oid}..{right_oid}\n"));
+
+ let old_header = if left_bytes.is_some() {
+ format!("a/{left_display}")
+ } else {
+ "/dev/null".to_string()
+ };
+ let new_header = if right_bytes.is_some() {
+ format!("b/{right_display}")
+ } else {
+ "/dev/null".to_string()
+ };
+
+ let diff = similar::TextDiff::from_lines(l, r);
+ let unified = diff
+ .unified_diff()
+ .context_radius(3)
+ .header(&old_header, &new_header)
+ .to_string();
+
+ aggregated.push_str(&unified);
+ if !aggregated.ends_with('\n') {
+ aggregated.push('\n');
+ }
+ } else {
+ // Binary or invalid UTF-8: emit header only.
+ aggregated.push_str(&format!("index {left_oid}..{right_oid}\n"));
+ let old_header = if left_bytes.is_some() {
+ format!("a/{left_display}")
+ } else {
+ "/dev/null".to_string()
+ };
+ let new_header = if right_bytes.is_some() {
+ format!("b/{right_display}")
+ } else {
+ "/dev/null".to_string()
+ };
+ aggregated.push_str(&format!("--- {old_header}\n"));
+ aggregated.push_str(&format!("+++ {new_header}\n"));
+ aggregated.push_str("Binary files differ\n");
+ if !aggregated.ends_with('\n') {
+ aggregated.push('\n');
+ }
+ }
+ }
+
+ if aggregated.trim().is_empty() {
+ Ok(None)
+ } else {
+ Ok(Some(aggregated))
+ }
+ }
+}
+
+fn uuid_filename_for(path: &Path) -> String {
+ let id = Uuid::new_v4().to_string();
+ match path.extension().and_then(|e| e.to_str()) {
+ Some(ext) if !ext.is_empty() => format!("{id}.{ext}"),
+ _ => id,
+ }
+}
+
+/// Compute the Git SHA-1 blob object ID for the given content (bytes).
+fn git_blob_sha1_hex_bytes(data: &[u8]) -> String {
+ // Git blob hash is sha1 of: "blob <len>\0<data>"
+ let header = format!("blob {}\0", data.len());
+ use sha1::Digest;
+ let mut hasher = sha1::Sha1::new();
+ hasher.update(header.as_bytes());
+ hasher.update(data);
+ let digest = hasher.finalize();
+ let mut out = String::with_capacity(40);
+ for b in digest {
+ use std::fmt::Write;
+ let _ = write!(&mut out, "{b:02x}");
+ }
+ out
+}
+
+const ZERO_OID: &str = "0000000000000000000000000000000000000000";
+const REGULAR_MODE: &str = "100644";
+#[cfg(unix)]
+const EXECUTABLE_MODE: &str = "100755";
+const SYMLINK_MODE: &str = "120000";
+
+#[cfg(unix)]
+fn file_mode_for_path(path: &Path) -> Option<String> {
+ use std::os::unix::fs::PermissionsExt;
+ let meta = fs::symlink_metadata(path).ok()?;
+ let ft = meta.file_type();
+ if ft.is_symlink() {
+ return Some(SYMLINK_MODE.to_string());
+ }
+ let mode = meta.permissions().mode();
+ let is_exec = (mode & 0o111) != 0;
+ Some(if is_exec {
+ EXECUTABLE_MODE.into()
+ } else {
+ REGULAR_MODE.into()
+ })
+}
+
+#[cfg(not(unix))]
+fn file_mode_for_path(_path: &Path) -> Option<String> {
+ // Default to non-executable on non-unix.
+ Some(REGULAR_MODE.to_string())
+}
+
+fn blob_bytes(path: &Path, mode: &str) -> Result<Option<Vec<u8>>> {
+ if path.exists() {
+ let contents = if mode == SYMLINK_MODE {
+ symlink_blob_bytes(path)
+ .ok_or_else(|| anyhow!("failed to read symlink target for {}", path.display()))?
+ } else {
+ fs::read(path).with_context(|| {
+ format!("failed to read current file for diff {}", path.display())
+ })?
+ };
+ Ok(Some(contents))
+ } else {
+ Ok(None)
+ }
+}
+
+#[cfg(unix)]
+fn symlink_blob_bytes(path: &Path) -> Option<Vec<u8>> {
+ use std::os::unix::ffi::OsStrExt;
+ let target = std::fs::read_link(path).ok()?;
+ Some(target.as_os_str().as_bytes().to_vec())
+}
+
+#[cfg(not(unix))]
+fn symlink_blob_bytes(_path: &Path) -> Option<Vec<u8>> {
+ None
+}
+
+#[cfg(windows)]
+fn is_windows_drive_or_unc_root(p: &std::path::Path) -> bool {
+ use std::path::Component;
+ let mut comps = p.components();
+ matches!(
+ (comps.next(), comps.next(), comps.next()),
+ (Some(Component::Prefix(_)), Some(Component::RootDir), None)
+ )
+}
+
+#[cfg(test)]
+mod tests {
+ #![allow(clippy::unwrap_used)]
+ use super::*;
+ use pretty_assertions::assert_eq;
+ use tempfile::tempdir;
+
+ /// Compute the Git SHA-1 blob object ID for the given content (string).
+ /// This delegates to the bytes version to avoid UTF-8 lossy conversions here.
+ fn git_blob_sha1_hex(data: &str) -> String {
+ git_blob_sha1_hex_bytes(data.as_bytes())
+ }
+
+ fn normalize_diff_for_test(input: &str, root: &Path) -> String {
+ let root_str = root.display().to_string().replace('\\', "/");
+ let replaced = input.replace(&root_str, "<TMP>");
+ // Split into blocks on lines starting with "diff --git ", sort blocks for determinism, and rejoin
+ let mut blocks: Vec<String> = Vec::new();
+ let mut current = String::new();
+ for line in replaced.lines() {
+ if line.starts_with("diff --git ") && !current.is_empty() {
+ blocks.push(current);
+ current = String::new();
+ }
+ if !current.is_empty() {
+ current.push('\n');
+ }
+ current.push_str(line);
+ }
+ if !current.is_empty() {
+ blocks.push(current);
+ }
+ blocks.sort();
+ let mut out = blocks.join("\n");
+ if !out.ends_with('\n') {
+ out.push('\n');
+ }
+ out
+ }
+
+ #[test]
+ fn accumulates_add_and_update() {
+ let mut acc = TurnDiffTracker::new();
+
+ let dir = tempdir().unwrap();
+ let file = dir.path().join("a.txt");
+
+ // First patch: add file (baseline should be /dev/null).
+ let add_changes = HashMap::from([(
+ file.clone(),
+ FileChange::Add {
+ content: "foo\n".to_string(),
+ },
+ )]);
+ acc.on_patch_begin(&add_changes).unwrap();
+
+ // Simulate apply: create the file on disk.
+ fs::write(&file, "foo\n").unwrap();
+ let first = acc.get_unified_diff().unwrap().unwrap();
+ let first = normalize_diff_for_test(&first, dir.path());
+ let expected_first = {
+ let mode = file_mode_for_path(&file).unwrap_or_else(|| REGULAR_MODE.to_string());
+ let right_oid = git_blob_sha1_hex("foo\n");
+ format!(
+ "diff --git a/<TMP>/a.txt b/<TMP>/a.txt\nnew file mode {mode}\nindex {ZERO_OID}..{right_oid}\n--- /dev/null\n+++ b/<TMP>/a.txt\n@@ -0,0 +1 @@\n+foo\n",
+ )
+ };
+ assert_eq!(first, expected_first);
+
+ // Second patch: update the file on disk.
+ let update_changes = HashMap::from([(
+ file.clone(),
+ FileChange::Update {
+ unified_diff: "".to_owned(),
+ move_path: None,
+ },
+ )]);
+ acc.on_patch_begin(&update_changes).unwrap();
+
+ // Simulate apply: append a new line.
+ fs::write(&file, "foo\nbar\n").unwrap();
+ let combined = acc.get_unified_diff().unwrap().unwrap();
+ let combined = normalize_diff_for_test(&combined, dir.path());
+ let expected_combined = {
+ let mode = file_mode_for_path(&file).unwrap_or_else(|| REGULAR_MODE.to_string());
+ let right_oid = git_blob_sha1_hex("foo\nbar\n");
+ format!(
+ "diff --git a/<TMP>/a.txt b/<TMP>/a.txt\nnew file mode {mode}\nindex {ZERO_OID}..{right_oid}\n--- /dev/null\n+++ b/<TMP>/a.txt\n@@ -0,0 +1,2 @@\n+foo\n+bar\n",
+ )
+ };
+ assert_eq!(combined, expected_combined);
+ }
+
+ #[test]
+ fn accumulates_delete() {
+ let dir = tempdir().unwrap();
+ let file = dir.path().join("b.txt");
+ fs::write(&file, "x\n").unwrap();
+
+ let mut acc = TurnDiffTracker::new();
+ let del_changes = HashMap::from([(file.clone(), FileChange::Delete)]);
+ acc.on_patch_begin(&del_changes).unwrap();
+
+ // Simulate apply: delete the file from disk.
+ let baseline_mode = file_mode_for_path(&file).unwrap_or_else(|| REGULAR_MODE.to_string());
+ fs::remove_file(&file).unwrap();
+ let diff = acc.get_unified_diff().unwrap().unwrap();
+ let diff = normalize_diff_for_test(&diff, dir.path());
+ let expected = {
+ let left_oid = git_blob_sha1_hex("x\n");
+ format!(
+ "diff --git a/<TMP>/b.txt b/<TMP>/b.txt\ndeleted file mode {baseline_mode}\nindex {left_oid}..{ZERO_OID}\n--- a/<TMP>/b.txt\n+++ /dev/null\n@@ -1 +0,0 @@\n-x\n",
+ )
+ };
+ assert_eq!(diff, expected);
+ }
+
+ #[test]
+ fn accumulates_move_and_update() {
+ let dir = tempdir().unwrap();
+ let src = dir.path().join("src.txt");
+ let dest = dir.path().join("dst.txt");
+ fs::write(&src, "line\n").unwrap();
+
+ let mut acc = TurnDiffTracker::new();
+ let mv_changes = HashMap::from([(
+ src.clone(),
+ FileChange::Update {
+ unified_diff: "".to_owned(),
+ move_path: Some(dest.clone()),
+ },
+ )]);
+ acc.on_patch_begin(&mv_changes).unwrap();
+
+ // Simulate apply: move and update content.
+ fs::rename(&src, &dest).unwrap();
+ fs::write(&dest, "line2\n").unwrap();
+
+ let out = acc.get_unified_diff().unwrap().unwrap();
+ let out = normalize_diff_for_test(&out, dir.path());
+ let expected = {
+ let left_oid = git_blob_sha1_hex("line\n");
+ let right_oid = git_blob_sha1_hex("line2\n");
+ format!(
+ "diff --git a/<TMP>/src.txt b/<TMP>/dst.txt\nindex {left_oid}..{right_oid}\n--- a/<TMP>/src.txt\n+++ b/<TMP>/dst.txt\n@@ -1 +1 @@\n-line\n+line2\n"
+ )
+ };
+ assert_eq!(out, expected);
+ }
+
+ #[test]
+ fn move_without_content_change_yields_no_diff() {
+ let dir = tempdir().unwrap();
+ let src = dir.path().join("moved.txt");
+ let dest = dir.path().join("renamed.txt");
+ fs::write(&src, "same\n").unwrap();
+
+ let mut acc = TurnDiffTracker::new();
+ let mv_changes = HashMap::from([(
+ src.clone(),
+ FileChange::Update {
+ unified_diff: "".to_owned(),
+ move_path: Some(dest.clone()),
+ },
+ )]);
+ acc.on_patch_begin(&mv_changes).unwrap();
+
+ // Simulate apply: move only, no content change.
+ fs::rename(&src, &dest).unwrap();
+
+ let diff = acc.get_unified_diff().unwrap();
+ assert_eq!(diff, None);
+ }
+
+ #[test]
+ fn move_declared_but_file_only_appears_at_dest_is_add() {
+ let dir = tempdir().unwrap();
+ let src = dir.path().join("src.txt");
+ let dest = dir.path().join("dest.txt");
+ let mut acc = TurnDiffTracker::new();
+ let mv = HashMap::from([(
+ src.clone(),
+ FileChange::Update {
+ unified_diff: "".into(),
+ move_path: Some(dest.clone()),
+ },
+ )]);
+ acc.on_patch_begin(&mv).unwrap();
+ // No file existed initially; create only dest
+ fs::write(&dest, "hello\n").unwrap();
+ let diff = acc.get_unified_diff().unwrap().unwrap();
+ assert!(diff.contains("new file mode"));
```
> We can't `assert_eq!()` here?
- Created: 2025-08-04 16:55:01 UTC | Link: https://github.com/openai/codex/pull/1770#discussion_r2252061946
```diff
@@ -0,0 +1,766 @@
+use std::collections::HashMap;
+use std::fs;
+use std::path::Path;
+use std::path::PathBuf;
+use std::process::Command;
+
+use anyhow::Context;
+use anyhow::Result;
+use anyhow::anyhow;
+use uuid::Uuid;
+
+use crate::protocol::FileChange;
+
+struct BaselineFileInfo {
+ path: Option<PathBuf>,
+ contents_bytes: Option<Vec<u8>>,
+ mode: Option<String>,
+ oid: Option<String>,
+}
+
+/// Tracks sets of changes to files and exposes the overall unified diff.
+/// Internally, the way this works is now:
+/// 1. Maintain an in-memory baseline snapshot of files when they are first seen.
+/// For new additions, do not create a baseline so that diffs are shown as proper additions (using /dev/null).
+/// 2. Keep a stable internal filename (uuid + same extension) per external path for rename tracking.
+/// 3. To compute the aggregated unified diff, compare each baseline snapshot to the current file on disk entirely in-memory
+/// using the `similar` crate and emit unified diffs with rewritten external paths.
+#[derive(Default)]
+pub struct TurnDiffTracker {
+ /// Map external path -> internal filename (uuid + same extension).
+ external_to_temp_name: HashMap<PathBuf, String>,
+ /// Internal filename -> baseline file info.
+ baseline_file_info: HashMap<String, BaselineFileInfo>,
+ /// Internal filename -> external path as of current accumulated state (after applying all changes).
+ /// This is where renames are tracked.
+ temp_name_to_current_path: HashMap<String, PathBuf>,
+ /// Cache of known git worktree roots to avoid repeated filesystem walks.
+ git_root_cache: Vec<PathBuf>,
+}
+
+impl TurnDiffTracker {
+ pub fn new() -> Self {
+ Self::default()
+ }
+
+ /// Front-run apply patch calls to track the starting contents of any modified files.
+ /// - Creates an in-memory baseline snapshot for files that already exist on disk when first seen.
+ /// - For additions, we intentionally do not create a baseline snapshot so that diffs are proper additions.
+ /// - Also updates internal mappings for move/rename events.
+ pub fn on_patch_begin(&mut self, changes: &HashMap<PathBuf, FileChange>) -> Result<()> {
+ for (path, change) in changes.iter() {
+ // Ensure a stable internal filename exists for this external path.
+ if !self.external_to_temp_name.contains_key(path) {
+ let internal = uuid_filename_for(path);
+ self.external_to_temp_name
+ .insert(path.clone(), internal.clone());
+ self.temp_name_to_current_path
+ .insert(internal.clone(), path.clone());
+
+ // If the file exists on disk now, snapshot as baseline; else leave missing to represent /dev/null.
+ let (contents_bytes, mode, oid) = if path.exists() {
+ let mode = file_mode_for_path(path);
+ let mode_str = mode.as_deref().unwrap_or(REGULAR_MODE);
+ let contents_bytes = blob_bytes(path, mode_str)
+ .unwrap_or_default()
+ .unwrap_or_default();
+ let oid = if mode.as_deref() == Some(SYMLINK_MODE) {
+ git_blob_sha1_hex_bytes(&contents_bytes)
+ } else {
+ self.git_blob_oid_for_path(path)
+ .unwrap_or_else(|| git_blob_sha1_hex_bytes(&contents_bytes))
+ };
+ (Some(contents_bytes), mode, Some(oid))
+ } else {
+ (None, None, Some(ZERO_OID.to_string()))
+ };
+
+ self.baseline_file_info.insert(
+ internal.clone(),
+ BaselineFileInfo {
+ path: Some(path.clone()),
+ contents_bytes,
+ mode,
+ oid,
+ },
+ );
+ }
+
+ // Track rename/move in current mapping if provided in an Update.
+ if let FileChange::Update {
+ move_path: Some(dest),
+ ..
+ } = change
+ {
+ let uuid_filename = match self.external_to_temp_name.get(path) {
+ Some(i) => i.clone(),
+ None => {
+ // This should be rare, but if we haven't mapped the source, create it with no baseline.
+ let i = uuid_filename_for(path);
+ self.baseline_file_info.insert(
+ i.clone(),
+ BaselineFileInfo {
+ path: Some(path.clone()),
+ contents_bytes: None,
+ mode: None,
+ oid: Some(ZERO_OID.to_string()),
+ },
+ );
+ i
+ }
+ };
+ // Update current external mapping for temp file name.
+ self.temp_name_to_current_path
+ .insert(uuid_filename.clone(), dest.clone());
+ // Update forward file_mapping: external current -> internal name.
+ self.external_to_temp_name.remove(path);
+ self.external_to_temp_name
+ .insert(dest.clone(), uuid_filename);
+ };
+ }
+
+ Ok(())
+ }
+
+ fn get_path_for_internal(&self, internal: &str) -> Option<PathBuf> {
+ self.temp_name_to_current_path
+ .get(internal)
+ .cloned()
+ .or_else(|| {
+ self.baseline_file_info
+ .get(internal)
+ .and_then(|info| info.path.clone())
+ })
+ }
+
+ /// Find the git worktree root for a file/directory by walking up to the first ancestor containing a `.git` entry.
+ /// Uses a simple cache of known roots and avoids negative-result caching for simplicity.
+ fn find_git_root_cached(&mut self, start: &Path) -> Option<PathBuf> {
+ let dir = if start.is_dir() {
+ start
+ } else {
+ start.parent()?
+ };
+
+ // Fast path: if any cached root is an ancestor of this path, use it.
+ if let Some(root) = self
+ .git_root_cache
+ .iter()
+ .find(|r| dir.starts_with(r))
+ .cloned()
+ {
+ return Some(root);
+ }
+
+ // Walk up to find a `.git` marker.
+ let mut cur = dir.to_path_buf();
+ loop {
+ let git_marker = cur.join(".git");
+ if git_marker.is_dir() || git_marker.is_file() {
+ if !self.git_root_cache.iter().any(|r| r == &cur) {
+ self.git_root_cache.push(cur.clone());
+ }
+ return Some(cur);
+ }
+
+ // On Windows, avoid walking above the drive or UNC share root.
+ #[cfg(windows)]
+ {
+ if is_windows_drive_or_unc_root(&cur) {
+ return None;
+ }
+ }
+
+ if let Some(parent) = cur.parent() {
+ cur = parent.to_path_buf();
+ } else {
+ return None;
+ }
+ }
+ }
+
+ /// Return a display string for `path` relative to its git root if found, else absolute.
+ fn relative_to_git_root_str(&mut self, path: &Path) -> String {
+ let s = if let Some(root) = self.find_git_root_cached(path) {
+ if let Ok(rel) = path.strip_prefix(&root) {
+ rel.display().to_string()
+ } else {
+ path.display().to_string()
+ }
+ } else {
+ path.display().to_string()
+ };
+ s.replace('\\', "/")
+ }
+
+ /// Ask git to compute the blob SHA-1 for the file at `path` within its repository.
+ /// Returns None if no repository is found or git invocation fails.
+ fn git_blob_oid_for_path(&mut self, path: &Path) -> Option<String> {
+ let root = self.find_git_root_cached(path)?;
+ // Compute a path relative to the repo root for better portability across platforms.
+ let rel = path.strip_prefix(&root).unwrap_or(path);
+ let output = Command::new("git")
+ .arg("-C")
+ .arg(&root)
+ .arg("hash-object")
+ .arg("--")
+ .arg(rel)
+ .output()
+ .ok()?;
+ if !output.status.success() {
+ return None;
+ }
+ let s = String::from_utf8_lossy(&output.stdout).trim().to_string();
+ if s.len() == 40 { Some(s) } else { None }
+ }
+
+ /// Recompute the aggregated unified diff by comparing all of the in-memory snapshots that were
+ /// collected before the first time they were touched by apply_patch during this turn with
+ /// the current repo state.
+ pub fn get_unified_diff(&mut self) -> Result<Option<String>> {
+ let mut aggregated = String::new();
+
+ // Compute diffs per tracked internal file in a stable order by external path.
+ let mut baseline_file_names: Vec<String> =
+ self.baseline_file_info.keys().cloned().collect();
+ // Sort lexicographically by full repo-relative path to match git behavior.
+ baseline_file_names.sort_by_key(|internal| {
+ self.get_path_for_internal(internal)
+ .map(|p| self.relative_to_git_root_str(&p))
+ .unwrap_or_default()
+ });
+
+ for internal in baseline_file_names {
+ // Baseline external must exist for any tracked internal.
+ let baseline_external = match self
+ .baseline_file_info
+ .get(&internal)
+ .and_then(|i| i.path.clone())
+ {
+ Some(p) => p,
+ None => continue,
+ };
+ let current_external = match self.get_path_for_internal(&internal) {
+ Some(p) => p,
+ None => continue,
+ };
+
+ // Determine modes early; needed to read symlink content correctly.
+ let baseline_mode = self
+ .baseline_file_info
+ .get(&internal)
+ .and_then(|i| i.mode.clone())
+ .unwrap_or_else(|| REGULAR_MODE.to_string());
+ let current_mode =
+ file_mode_for_path(&current_external).unwrap_or_else(|| REGULAR_MODE.to_string());
+
+ let left_bytes = self
+ .baseline_file_info
+ .get(&internal)
+ .and_then(|i| i.contents_bytes.clone());
+
+ let right_bytes = blob_bytes(&current_external, &current_mode)?;
+
+ // Fast path: identical bytes or both missing.
+ if left_bytes.as_deref() == right_bytes.as_deref() {
+ continue;
+ }
+
+ let left_display = self.relative_to_git_root_str(&baseline_external);
+ let right_display = self.relative_to_git_root_str(&current_external);
+
+ // Emit a git-style header for better readability and parity with previous behavior.
+ aggregated.push_str(&format!("diff --git a/{left_display} b/{right_display}\n"));
+
+ let is_add = left_bytes.is_none() && right_bytes.is_some();
+ let is_delete = left_bytes.is_some() && right_bytes.is_none();
+
+ if is_add {
+ aggregated.push_str(&format!("new file mode {current_mode}\n"));
+ } else if is_delete {
+ aggregated.push_str(&format!("deleted file mode {baseline_mode}\n"));
+ } else if baseline_mode != current_mode {
+ aggregated.push_str(&format!("old mode {baseline_mode}\n"));
+ aggregated.push_str(&format!("new mode {current_mode}\n"));
+ }
+
+ // Determine blob object IDs for left and right contents. Prefer stored OIDs
+ // captured from the original repo state when the change was first seen.
+ let left_oid = self
+ .baseline_file_info
+ .get(&internal)
+ .and_then(|i| i.oid.clone())
+ .or_else(|| {
+ left_bytes
+ .as_ref()
+ .map(|b| git_blob_sha1_hex_bytes(b))
+ .or(Some(ZERO_OID.to_string()))
+ })
+ .unwrap_or_else(|| ZERO_OID.to_string());
+ let right_oid = if let Some(b) = right_bytes.as_ref() {
+ if current_mode == SYMLINK_MODE {
+ git_blob_sha1_hex_bytes(b)
+ } else {
+ self.git_blob_oid_for_path(&current_external)
+ .unwrap_or_else(|| git_blob_sha1_hex_bytes(b))
+ }
+ } else {
+ ZERO_OID.to_string()
+ };
+
+ // If either side isn't valid UTF-8, emit a binary diff header and continue.
+ let left_text = left_bytes
+ .as_deref()
+ .and_then(|b| std::str::from_utf8(b).ok());
+ let right_text = right_bytes
+ .as_deref()
+ .and_then(|b| std::str::from_utf8(b).ok());
+
+ // Prefer text diffs when possible:
+ // - both sides are valid UTF-8
+ // - OR one side is missing (add/delete) and the present side is valid UTF-8
+ let can_text_diff = match (left_text, right_text, is_add, is_delete) {
+ (Some(_), Some(_), _, _) => true,
+ (_, Some(_), true, _) => true, // add: left missing, right text
+ (Some(_), _, _, true) => true, // delete: left text, right missing
+ _ => false,
+ };
+
+ if can_text_diff {
+ // Diff the contents as text, treating missing side as empty string.
+ let l = left_text.unwrap_or("");
+ let r = right_text.unwrap_or("");
+
+ // Emit index line without mode suffix to preserve current test expectations.
+ aggregated.push_str(&format!("index {left_oid}..{right_oid}\n"));
+
+ let old_header = if left_bytes.is_some() {
+ format!("a/{left_display}")
+ } else {
+ "/dev/null".to_string()
+ };
+ let new_header = if right_bytes.is_some() {
+ format!("b/{right_display}")
+ } else {
+ "/dev/null".to_string()
+ };
+
+ let diff = similar::TextDiff::from_lines(l, r);
+ let unified = diff
+ .unified_diff()
+ .context_radius(3)
+ .header(&old_header, &new_header)
+ .to_string();
+
+ aggregated.push_str(&unified);
+ if !aggregated.ends_with('\n') {
+ aggregated.push('\n');
+ }
+ } else {
+ // Binary or invalid UTF-8: emit header only.
+ aggregated.push_str(&format!("index {left_oid}..{right_oid}\n"));
+ let old_header = if left_bytes.is_some() {
+ format!("a/{left_display}")
+ } else {
+ "/dev/null".to_string()
+ };
+ let new_header = if right_bytes.is_some() {
+ format!("b/{right_display}")
+ } else {
+ "/dev/null".to_string()
+ };
+ aggregated.push_str(&format!("--- {old_header}\n"));
+ aggregated.push_str(&format!("+++ {new_header}\n"));
+ aggregated.push_str("Binary files differ\n");
+ if !aggregated.ends_with('\n') {
+ aggregated.push('\n');
+ }
+ }
+ }
+
+ if aggregated.trim().is_empty() {
+ Ok(None)
+ } else {
+ Ok(Some(aggregated))
+ }
+ }
+}
+
+fn uuid_filename_for(path: &Path) -> String {
+ let id = Uuid::new_v4().to_string();
+ match path.extension().and_then(|e| e.to_str()) {
+ Some(ext) if !ext.is_empty() => format!("{id}.{ext}"),
+ _ => id,
+ }
+}
+
+/// Compute the Git SHA-1 blob object ID for the given content (bytes).
+fn git_blob_sha1_hex_bytes(data: &[u8]) -> String {
+ // Git blob hash is sha1 of: "blob <len>\0<data>"
+ let header = format!("blob {}\0", data.len());
+ use sha1::Digest;
+ let mut hasher = sha1::Sha1::new();
+ hasher.update(header.as_bytes());
+ hasher.update(data);
+ let digest = hasher.finalize();
+ let mut out = String::with_capacity(40);
+ for b in digest {
+ use std::fmt::Write;
+ let _ = write!(&mut out, "{b:02x}");
+ }
+ out
+}
+
+const ZERO_OID: &str = "0000000000000000000000000000000000000000";
+const REGULAR_MODE: &str = "100644";
+#[cfg(unix)]
```
> @gpeal but then that would change the tree hash?