mirror of
https://github.com/openai/codex.git
synced 2026-04-30 09:26:44 +00:00
9402 lines
372 KiB
Markdown
9402 lines
372 KiB
Markdown
# PR #1770: Add a TurnDiffTracker to create a unified diff for an entire turn
|
|
|
|
- URL: https://github.com/openai/codex/pull/1770
|
|
- Author: gpeal
|
|
- Created: 2025-07-31 23:42:44 UTC
|
|
- Updated: 2025-08-04 16:55:01 UTC
|
|
- Changes: +998/-18, Files changed: 9, Commits: 25
|
|
|
|
## Description
|
|
|
|
This lets us show an accumulating diff across all patches in a turn. Refer to the docs for TurnDiffTracker for implementation details.
|
|
|
|
There are multiple ways this could have been done and this felt like the right tradeoff between reliability and completeness:
|
|
*Pros*
|
|
* It will pick up all changes to files that the model touched including if they prettier or another command that updates them.
|
|
* It will not pick up changes made by the user or other agents to files it didn't modify.
|
|
|
|
*Cons*
|
|
* It will pick up changes that the user made to a file that the model also touched
|
|
* It will not pick up changes to codegen or files that were not modified with apply_patch
|
|
|
|
## Full Diff
|
|
|
|
```diff
|
|
diff --git a/codex-rs/Cargo.lock b/codex-rs/Cargo.lock
|
|
index 7d4e41d0b1..eb4eccd897 100644
|
|
--- a/codex-rs/Cargo.lock
|
|
+++ b/codex-rs/Cargo.lock
|
|
@@ -699,6 +699,7 @@ dependencies = [
|
|
"serde_json",
|
|
"sha1",
|
|
"shlex",
|
|
+ "similar",
|
|
"strum_macros 0.27.2",
|
|
"tempfile",
|
|
"thiserror 2.0.12",
|
|
diff --git a/codex-rs/core/Cargo.toml b/codex-rs/core/Cargo.toml
|
|
index db3fd4f834..466e9adf02 100644
|
|
--- a/codex-rs/core/Cargo.toml
|
|
+++ b/codex-rs/core/Cargo.toml
|
|
@@ -34,6 +34,7 @@ serde_json = "1"
|
|
serde_bytes = "0.11"
|
|
sha1 = "0.10.6"
|
|
shlex = "1.3.0"
|
|
+similar = "2.7.0"
|
|
strum_macros = "0.27.2"
|
|
thiserror = "2.0.12"
|
|
time = { version = "0.3", features = ["formatting", "local-offset", "macros"] }
|
|
diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs
|
|
index 7004dcfcb7..568d87c4a8 100644
|
|
--- a/codex-rs/core/src/codex.rs
|
|
+++ b/codex-rs/core/src/codex.rs
|
|
@@ -85,11 +85,13 @@ use crate::protocol::SandboxPolicy;
|
|
use crate::protocol::SessionConfiguredEvent;
|
|
use crate::protocol::Submission;
|
|
use crate::protocol::TaskCompleteEvent;
|
|
+use crate::protocol::TurnDiffEvent;
|
|
use crate::rollout::RolloutRecorder;
|
|
use crate::safety::SafetyCheck;
|
|
use crate::safety::assess_command_safety;
|
|
use crate::safety::assess_safety_for_untrusted_command;
|
|
use crate::shell;
|
|
+use crate::turn_diff_tracker::TurnDiffTracker;
|
|
use crate::user_notification::UserNotification;
|
|
use crate::util::backoff;
|
|
|
|
@@ -362,7 +364,11 @@ impl Session {
|
|
}
|
|
}
|
|
|
|
- async fn notify_exec_command_begin(&self, exec_command_context: ExecCommandContext) {
|
|
+ async fn on_exec_command_begin(
|
|
+ &self,
|
|
+ turn_diff_tracker: &mut TurnDiffTracker,
|
|
+ exec_command_context: ExecCommandContext,
|
|
+ ) {
|
|
let ExecCommandContext {
|
|
sub_id,
|
|
call_id,
|
|
@@ -374,11 +380,15 @@ impl Session {
|
|
Some(ApplyPatchCommandContext {
|
|
user_explicitly_approved_this_action,
|
|
changes,
|
|
- }) => EventMsg::PatchApplyBegin(PatchApplyBeginEvent {
|
|
- call_id,
|
|
- auto_approved: !user_explicitly_approved_this_action,
|
|
- changes,
|
|
- }),
|
|
+ }) => {
|
|
+ turn_diff_tracker.on_patch_begin(&changes);
|
|
+
|
|
+ EventMsg::PatchApplyBegin(PatchApplyBeginEvent {
|
|
+ call_id,
|
|
+ auto_approved: !user_explicitly_approved_this_action,
|
|
+ changes,
|
|
+ })
|
|
+ }
|
|
None => EventMsg::ExecCommandBegin(ExecCommandBeginEvent {
|
|
call_id,
|
|
command: command_for_display.clone(),
|
|
@@ -392,8 +402,10 @@ impl Session {
|
|
let _ = self.tx_event.send(event).await;
|
|
}
|
|
|
|
- async fn notify_exec_command_end(
|
|
+ #[allow(clippy::too_many_arguments)]
|
|
+ async fn on_exec_command_end(
|
|
&self,
|
|
+ turn_diff_tracker: &mut TurnDiffTracker,
|
|
sub_id: &str,
|
|
call_id: &str,
|
|
output: &ExecToolCallOutput,
|
|
@@ -433,6 +445,20 @@ impl Session {
|
|
msg,
|
|
};
|
|
let _ = self.tx_event.send(event).await;
|
|
+
|
|
+ // If this is an apply_patch, after we emit the end patch, emit a second event
|
|
+ // with the full turn diff if there is one.
|
|
+ if is_apply_patch {
|
|
+ let unified_diff = turn_diff_tracker.get_unified_diff();
|
|
+ if let Ok(Some(unified_diff)) = unified_diff {
|
|
+ let msg = EventMsg::TurnDiff(TurnDiffEvent { unified_diff });
|
|
+ let event = Event {
|
|
+ id: sub_id.into(),
|
|
+ msg,
|
|
+ };
|
|
+ let _ = self.tx_event.send(event).await;
|
|
+ }
|
|
+ }
|
|
}
|
|
|
|
/// Helper that emits a BackgroundEvent with the given message. This keeps
|
|
@@ -1006,6 +1032,10 @@ async fn run_task(sess: Arc<Session>, sub_id: String, input: Vec<InputItem>) {
|
|
.await;
|
|
|
|
let last_agent_message: Option<String>;
|
|
+ // Although from the perspective of codex.rs, TurnDiffTracker has the lifecycle of a Task which contains
|
|
+ // many turns, from the perspective of the user, it is a single turn.
|
|
+ let mut turn_diff_tracker = TurnDiffTracker::new();
|
|
+
|
|
loop {
|
|
// Note that pending_input would be something like a message the user
|
|
// submitted through the UI while the model was running. Though the UI
|
|
@@ -1037,7 +1067,7 @@ async fn run_task(sess: Arc<Session>, sub_id: String, input: Vec<InputItem>) {
|
|
})
|
|
})
|
|
.collect();
|
|
- match run_turn(&sess, sub_id.clone(), turn_input).await {
|
|
+ match run_turn(&sess, &mut turn_diff_tracker, sub_id.clone(), turn_input).await {
|
|
Ok(turn_output) => {
|
|
let mut items_to_record_in_conversation_history = Vec::<ResponseItem>::new();
|
|
let mut responses = Vec::<ResponseInputItem>::new();
|
|
@@ -1163,6 +1193,7 @@ async fn run_task(sess: Arc<Session>, sub_id: String, input: Vec<InputItem>) {
|
|
|
|
async fn run_turn(
|
|
sess: &Session,
|
|
+ turn_diff_tracker: &mut TurnDiffTracker,
|
|
sub_id: String,
|
|
input: Vec<ResponseItem>,
|
|
) -> CodexResult<Vec<ProcessedResponseItem>> {
|
|
@@ -1177,7 +1208,7 @@ async fn run_turn(
|
|
|
|
let mut retries = 0;
|
|
loop {
|
|
- match try_run_turn(sess, &sub_id, &prompt).await {
|
|
+ match try_run_turn(sess, turn_diff_tracker, &sub_id, &prompt).await {
|
|
Ok(output) => return Ok(output),
|
|
Err(CodexErr::Interrupted) => return Err(CodexErr::Interrupted),
|
|
Err(CodexErr::EnvVar(var)) => return Err(CodexErr::EnvVar(var)),
|
|
@@ -1223,6 +1254,7 @@ struct ProcessedResponseItem {
|
|
|
|
async fn try_run_turn(
|
|
sess: &Session,
|
|
+ turn_diff_tracker: &mut TurnDiffTracker,
|
|
sub_id: &str,
|
|
prompt: &Prompt,
|
|
) -> CodexResult<Vec<ProcessedResponseItem>> {
|
|
@@ -1310,7 +1342,8 @@ async fn try_run_turn(
|
|
match event {
|
|
ResponseEvent::Created => {}
|
|
ResponseEvent::OutputItemDone(item) => {
|
|
- let response = handle_response_item(sess, sub_id, item.clone()).await?;
|
|
+ let response =
|
|
+ handle_response_item(sess, turn_diff_tracker, sub_id, item.clone()).await?;
|
|
|
|
output.push(ProcessedResponseItem { item, response });
|
|
}
|
|
@@ -1328,6 +1361,16 @@ async fn try_run_turn(
|
|
.ok();
|
|
}
|
|
|
|
+ let unified_diff = turn_diff_tracker.get_unified_diff();
|
|
+ if let Ok(Some(unified_diff)) = unified_diff {
|
|
+ let msg = EventMsg::TurnDiff(TurnDiffEvent { unified_diff });
|
|
+ let event = Event {
|
|
+ id: sub_id.to_string(),
|
|
+ msg,
|
|
+ };
|
|
+ let _ = sess.tx_event.send(event).await;
|
|
+ }
|
|
+
|
|
return Ok(output);
|
|
}
|
|
ResponseEvent::OutputTextDelta(delta) => {
|
|
@@ -1432,6 +1475,7 @@ async fn run_compact_task(
|
|
|
|
async fn handle_response_item(
|
|
sess: &Session,
|
|
+ turn_diff_tracker: &mut TurnDiffTracker,
|
|
sub_id: &str,
|
|
item: ResponseItem,
|
|
) -> CodexResult<Option<ResponseInputItem>> {
|
|
@@ -1469,7 +1513,17 @@ async fn handle_response_item(
|
|
..
|
|
} => {
|
|
info!("FunctionCall: {arguments}");
|
|
- Some(handle_function_call(sess, sub_id.to_string(), name, arguments, call_id).await)
|
|
+ Some(
|
|
+ handle_function_call(
|
|
+ sess,
|
|
+ turn_diff_tracker,
|
|
+ sub_id.to_string(),
|
|
+ name,
|
|
+ arguments,
|
|
+ call_id,
|
|
+ )
|
|
+ .await,
|
|
+ )
|
|
}
|
|
ResponseItem::LocalShellCall {
|
|
id,
|
|
@@ -1504,6 +1558,7 @@ async fn handle_response_item(
|
|
handle_container_exec_with_params(
|
|
exec_params,
|
|
sess,
|
|
+ turn_diff_tracker,
|
|
sub_id.to_string(),
|
|
effective_call_id,
|
|
)
|
|
@@ -1521,6 +1576,7 @@ async fn handle_response_item(
|
|
|
|
async fn handle_function_call(
|
|
sess: &Session,
|
|
+ turn_diff_tracker: &mut TurnDiffTracker,
|
|
sub_id: String,
|
|
name: String,
|
|
arguments: String,
|
|
@@ -1534,7 +1590,8 @@ async fn handle_function_call(
|
|
return *output;
|
|
}
|
|
};
|
|
- handle_container_exec_with_params(params, sess, sub_id, call_id).await
|
|
+ handle_container_exec_with_params(params, sess, turn_diff_tracker, sub_id, call_id)
|
|
+ .await
|
|
}
|
|
"update_plan" => handle_update_plan(sess, arguments, sub_id, call_id).await,
|
|
_ => {
|
|
@@ -1608,6 +1665,7 @@ fn maybe_run_with_user_profile(params: ExecParams, sess: &Session) -> ExecParams
|
|
async fn handle_container_exec_with_params(
|
|
params: ExecParams,
|
|
sess: &Session,
|
|
+ turn_diff_tracker: &mut TurnDiffTracker,
|
|
sub_id: String,
|
|
call_id: String,
|
|
) -> ResponseInputItem {
|
|
@@ -1755,7 +1813,7 @@ async fn handle_container_exec_with_params(
|
|
},
|
|
),
|
|
};
|
|
- sess.notify_exec_command_begin(exec_command_context.clone())
|
|
+ sess.on_exec_command_begin(turn_diff_tracker, exec_command_context.clone())
|
|
.await;
|
|
|
|
let params = maybe_run_with_user_profile(params, sess);
|
|
@@ -1782,7 +1840,8 @@ async fn handle_container_exec_with_params(
|
|
duration,
|
|
} = &output;
|
|
|
|
- sess.notify_exec_command_end(
|
|
+ sess.on_exec_command_end(
|
|
+ turn_diff_tracker,
|
|
&sub_id,
|
|
&call_id,
|
|
&output,
|
|
@@ -1806,7 +1865,15 @@ async fn handle_container_exec_with_params(
|
|
}
|
|
}
|
|
Err(CodexErr::Sandbox(error)) => {
|
|
- handle_sandbox_error(params, exec_command_context, error, sandbox_type, sess).await
|
|
+ handle_sandbox_error(
|
|
+ turn_diff_tracker,
|
|
+ params,
|
|
+ exec_command_context,
|
|
+ error,
|
|
+ sandbox_type,
|
|
+ sess,
|
|
+ )
|
|
+ .await
|
|
}
|
|
Err(e) => {
|
|
// Handle non-sandbox errors
|
|
@@ -1822,6 +1889,7 @@ async fn handle_container_exec_with_params(
|
|
}
|
|
|
|
async fn handle_sandbox_error(
|
|
+ turn_diff_tracker: &mut TurnDiffTracker,
|
|
params: ExecParams,
|
|
exec_command_context: ExecCommandContext,
|
|
error: SandboxErr,
|
|
@@ -1878,7 +1946,8 @@ async fn handle_sandbox_error(
|
|
sess.notify_background_event(&sub_id, "retrying command without sandbox")
|
|
.await;
|
|
|
|
- sess.notify_exec_command_begin(exec_command_context).await;
|
|
+ sess.on_exec_command_begin(turn_diff_tracker, exec_command_context)
|
|
+ .await;
|
|
|
|
// This is an escalated retry; the policy will not be
|
|
// examined and the sandbox has been set to `None`.
|
|
@@ -1905,8 +1974,14 @@ async fn handle_sandbox_error(
|
|
duration,
|
|
} = &retry_output;
|
|
|
|
- sess.notify_exec_command_end(&sub_id, &call_id, &retry_output, is_apply_patch)
|
|
- .await;
|
|
+ sess.on_exec_command_end(
|
|
+ turn_diff_tracker,
|
|
+ &sub_id,
|
|
+ &call_id,
|
|
+ &retry_output,
|
|
+ is_apply_patch,
|
|
+ )
|
|
+ .await;
|
|
|
|
let is_success = *exit_code == 0;
|
|
let content = format_exec_output(
|
|
diff --git a/codex-rs/core/src/lib.rs b/codex-rs/core/src/lib.rs
|
|
index 80f9014954..4f083d9e56 100644
|
|
--- a/codex-rs/core/src/lib.rs
|
|
+++ b/codex-rs/core/src/lib.rs
|
|
@@ -42,6 +42,7 @@ pub(crate) mod safety;
|
|
pub mod seatbelt;
|
|
pub mod shell;
|
|
pub mod spawn;
|
|
+pub mod turn_diff_tracker;
|
|
mod user_notification;
|
|
pub mod util;
|
|
|
|
diff --git a/codex-rs/core/src/protocol.rs b/codex-rs/core/src/protocol.rs
|
|
index cbb211d955..82591a2c78 100644
|
|
--- a/codex-rs/core/src/protocol.rs
|
|
+++ b/codex-rs/core/src/protocol.rs
|
|
@@ -387,6 +387,8 @@ pub enum EventMsg {
|
|
/// Notification that a patch application has finished.
|
|
PatchApplyEnd(PatchApplyEndEvent),
|
|
|
|
+ TurnDiff(TurnDiffEvent),
|
|
+
|
|
/// Response to GetHistoryEntryRequest.
|
|
GetHistoryEntryResponse(GetHistoryEntryResponseEvent),
|
|
|
|
@@ -598,6 +600,11 @@ pub struct PatchApplyEndEvent {
|
|
pub success: bool,
|
|
}
|
|
|
|
+#[derive(Debug, Clone, Deserialize, Serialize)]
|
|
+pub struct TurnDiffEvent {
|
|
+ pub unified_diff: String,
|
|
+}
|
|
+
|
|
#[derive(Debug, Clone, Deserialize, Serialize)]
|
|
pub struct GetHistoryEntryResponseEvent {
|
|
pub offset: usize,
|
|
diff --git a/codex-rs/core/src/turn_diff_tracker.rs b/codex-rs/core/src/turn_diff_tracker.rs
|
|
new file mode 100644
|
|
index 0000000000..7026d7bb32
|
|
--- /dev/null
|
|
+++ b/codex-rs/core/src/turn_diff_tracker.rs
|
|
@@ -0,0 +1,887 @@
|
|
+use std::collections::HashMap;
|
|
+use std::fs;
|
|
+use std::path::Path;
|
|
+use std::path::PathBuf;
|
|
+use std::process::Command;
|
|
+
|
|
+use anyhow::Context;
|
|
+use anyhow::Result;
|
|
+use anyhow::anyhow;
|
|
+use sha1::digest::Output;
|
|
+use uuid::Uuid;
|
|
+
|
|
+use crate::protocol::FileChange;
|
|
+
|
|
+const ZERO_OID: &str = "0000000000000000000000000000000000000000";
|
|
+const DEV_NULL: &str = "/dev/null";
|
|
+
|
|
+struct BaselineFileInfo {
|
|
+ path: PathBuf,
|
|
+ content: Vec<u8>,
|
|
+ mode: FileMode,
|
|
+ oid: String,
|
|
+}
|
|
+
|
|
+/// Tracks sets of changes to files and exposes the overall unified diff.
|
|
+/// Internally, the way this works is now:
|
|
+/// 1. Maintain an in-memory baseline snapshot of files when they are first seen.
|
|
+/// For new additions, do not create a baseline so that diffs are shown as proper additions (using /dev/null).
|
|
+/// 2. Keep a stable internal filename (uuid) per external path for rename tracking.
|
|
+/// 3. To compute the aggregated unified diff, compare each baseline snapshot to the current file on disk entirely in-memory
|
|
+/// using the `similar` crate and emit unified diffs with rewritten external paths.
|
|
+#[derive(Default)]
|
|
+pub struct TurnDiffTracker {
|
|
+ /// Map external path -> internal filename (uuid).
|
|
+ external_to_temp_name: HashMap<PathBuf, String>,
|
|
+ /// Internal filename -> baseline file info.
|
|
+ baseline_file_info: HashMap<String, BaselineFileInfo>,
|
|
+ /// Internal filename -> external path as of current accumulated state (after applying all changes).
|
|
+ /// This is where renames are tracked.
|
|
+ temp_name_to_current_path: HashMap<String, PathBuf>,
|
|
+ /// Cache of known git worktree roots to avoid repeated filesystem walks.
|
|
+ git_root_cache: Vec<PathBuf>,
|
|
+}
|
|
+
|
|
+impl TurnDiffTracker {
|
|
+ pub fn new() -> Self {
|
|
+ Self::default()
|
|
+ }
|
|
+
|
|
+ /// Front-run apply patch calls to track the starting contents of any modified files.
|
|
+ /// - Creates an in-memory baseline snapshot for files that already exist on disk when first seen.
|
|
+ /// - For additions, we intentionally do not create a baseline snapshot so that diffs are proper additions.
|
|
+ /// - Also updates internal mappings for move/rename events.
|
|
+ pub fn on_patch_begin(&mut self, changes: &HashMap<PathBuf, FileChange>) {
|
|
+ for (path, change) in changes.iter() {
|
|
+ // Ensure a stable internal filename exists for this external path.
|
|
+ if !self.external_to_temp_name.contains_key(path) {
|
|
+ let internal = Uuid::new_v4().to_string();
|
|
+ self.external_to_temp_name
|
|
+ .insert(path.clone(), internal.clone());
|
|
+ self.temp_name_to_current_path
|
|
+ .insert(internal.clone(), path.clone());
|
|
+
|
|
+ // If the file exists on disk now, snapshot as baseline; else leave missing to represent /dev/null.
|
|
+ let baseline_file_info = if path.exists() {
|
|
+ let mode = file_mode_for_path(path);
|
|
+ let mode_val = mode.unwrap_or(FileMode::Regular);
|
|
+ let content = blob_bytes(path, &mode_val).unwrap_or_default();
|
|
+ let oid = if mode == Some(FileMode::Symlink) {
|
|
+ format!("{:x}", git_blob_sha1_hex_bytes(&content))
|
|
+ } else {
|
|
+ self.git_blob_oid_for_path(path)
|
|
+ .unwrap_or_else(|| format!("{:x}", git_blob_sha1_hex_bytes(&content)))
|
|
+ };
|
|
+ Some(BaselineFileInfo {
|
|
+ path: path.clone(),
|
|
+ content,
|
|
+ mode: mode_val,
|
|
+ oid,
|
|
+ })
|
|
+ } else {
|
|
+ Some(BaselineFileInfo {
|
|
+ path: path.clone(),
|
|
+ content: vec![],
|
|
+ mode: FileMode::Regular,
|
|
+ oid: ZERO_OID.to_string(),
|
|
+ })
|
|
+ };
|
|
+
|
|
+ if let Some(baseline_file_info) = baseline_file_info {
|
|
+ self.baseline_file_info
|
|
+ .insert(internal.clone(), baseline_file_info);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ // Track rename/move in current mapping if provided in an Update.
|
|
+ if let FileChange::Update {
|
|
+ move_path: Some(dest),
|
|
+ ..
|
|
+ } = change
|
|
+ {
|
|
+ let uuid_filename = match self.external_to_temp_name.get(path) {
|
|
+ Some(i) => i.clone(),
|
|
+ None => {
|
|
+ // This should be rare, but if we haven't mapped the source, create it with no baseline.
|
|
+ let i = Uuid::new_v4().to_string();
|
|
+ self.baseline_file_info.insert(
|
|
+ i.clone(),
|
|
+ BaselineFileInfo {
|
|
+ path: path.clone(),
|
|
+ content: vec![],
|
|
+ mode: FileMode::Regular,
|
|
+ oid: ZERO_OID.to_string(),
|
|
+ },
|
|
+ );
|
|
+ i
|
|
+ }
|
|
+ };
|
|
+ // Update current external mapping for temp file name.
|
|
+ self.temp_name_to_current_path
|
|
+ .insert(uuid_filename.clone(), dest.clone());
|
|
+ // Update forward file_mapping: external current -> internal name.
|
|
+ self.external_to_temp_name.remove(path);
|
|
+ self.external_to_temp_name
|
|
+ .insert(dest.clone(), uuid_filename);
|
|
+ };
|
|
+ }
|
|
+ }
|
|
+
|
|
+ fn get_path_for_internal(&self, internal: &str) -> Option<PathBuf> {
|
|
+ self.temp_name_to_current_path
|
|
+ .get(internal)
|
|
+ .cloned()
|
|
+ .or_else(|| {
|
|
+ self.baseline_file_info
|
|
+ .get(internal)
|
|
+ .map(|info| info.path.clone())
|
|
+ })
|
|
+ }
|
|
+
|
|
+ /// Find the git worktree root for a file/directory by walking up to the first ancestor containing a `.git` entry.
|
|
+ /// Uses a simple cache of known roots and avoids negative-result caching for simplicity.
|
|
+ fn find_git_root_cached(&mut self, start: &Path) -> Option<PathBuf> {
|
|
+ let dir = if start.is_dir() {
|
|
+ start
|
|
+ } else {
|
|
+ start.parent()?
|
|
+ };
|
|
+
|
|
+ // Fast path: if any cached root is an ancestor of this path, use it.
|
|
+ if let Some(root) = self
|
|
+ .git_root_cache
|
|
+ .iter()
|
|
+ .find(|r| dir.starts_with(r))
|
|
+ .cloned()
|
|
+ {
|
|
+ return Some(root);
|
|
+ }
|
|
+
|
|
+ // Walk up to find a `.git` marker.
|
|
+ let mut cur = dir.to_path_buf();
|
|
+ loop {
|
|
+ let git_marker = cur.join(".git");
|
|
+ if git_marker.is_dir() || git_marker.is_file() {
|
|
+ if !self.git_root_cache.iter().any(|r| r == &cur) {
|
|
+ self.git_root_cache.push(cur.clone());
|
|
+ }
|
|
+ return Some(cur);
|
|
+ }
|
|
+
|
|
+ // On Windows, avoid walking above the drive or UNC share root.
|
|
+ #[cfg(windows)]
|
|
+ {
|
|
+ if is_windows_drive_or_unc_root(&cur) {
|
|
+ return None;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if let Some(parent) = cur.parent() {
|
|
+ cur = parent.to_path_buf();
|
|
+ } else {
|
|
+ return None;
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /// Return a display string for `path` relative to its git root if found, else absolute.
|
|
+ fn relative_to_git_root_str(&mut self, path: &Path) -> String {
|
|
+ let s = if let Some(root) = self.find_git_root_cached(path) {
|
|
+ if let Ok(rel) = path.strip_prefix(&root) {
|
|
+ rel.display().to_string()
|
|
+ } else {
|
|
+ path.display().to_string()
|
|
+ }
|
|
+ } else {
|
|
+ path.display().to_string()
|
|
+ };
|
|
+ s.replace('\\', "/")
|
|
+ }
|
|
+
|
|
+ /// Ask git to compute the blob SHA-1 for the file at `path` within its repository.
|
|
+ /// Returns None if no repository is found or git invocation fails.
|
|
+ fn git_blob_oid_for_path(&mut self, path: &Path) -> Option<String> {
|
|
+ let root = self.find_git_root_cached(path)?;
|
|
+ // Compute a path relative to the repo root for better portability across platforms.
|
|
+ let rel = path.strip_prefix(&root).unwrap_or(path);
|
|
+ let output = Command::new("git")
|
|
+ .arg("-C")
|
|
+ .arg(&root)
|
|
+ .arg("hash-object")
|
|
+ .arg("--")
|
|
+ .arg(rel)
|
|
+ .output()
|
|
+ .ok()?;
|
|
+ if !output.status.success() {
|
|
+ return None;
|
|
+ }
|
|
+ let s = String::from_utf8_lossy(&output.stdout).trim().to_string();
|
|
+ if s.len() == 40 { Some(s) } else { None }
|
|
+ }
|
|
+
|
|
+ /// Recompute the aggregated unified diff by comparing all of the in-memory snapshots that were
|
|
+ /// collected before the first time they were touched by apply_patch during this turn with
|
|
+ /// the current repo state.
|
|
+ pub fn get_unified_diff(&mut self) -> Result<Option<String>> {
|
|
+ let mut aggregated = String::new();
|
|
+
|
|
+ // Compute diffs per tracked internal file in a stable order by external path.
|
|
+ let mut baseline_file_names: Vec<String> =
|
|
+ self.baseline_file_info.keys().cloned().collect();
|
|
+ // Sort lexicographically by full repo-relative path to match git behavior.
|
|
+ baseline_file_names.sort_by_key(|internal| {
|
|
+ self.get_path_for_internal(internal)
|
|
+ .map(|p| self.relative_to_git_root_str(&p))
|
|
+ .unwrap_or_default()
|
|
+ });
|
|
+
|
|
+ for internal in baseline_file_names {
|
|
+ aggregated.push_str(self.get_file_diff(&internal).as_str());
|
|
+ if !aggregated.ends_with('\n') {
|
|
+ aggregated.push('\n');
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if aggregated.trim().is_empty() {
|
|
+ Ok(None)
|
|
+ } else {
|
|
+ Ok(Some(aggregated))
|
|
+ }
|
|
+ }
|
|
+
|
|
+ fn get_file_diff(&mut self, internal_file_name: &str) -> String {
|
|
+ let mut aggregated = String::new();
|
|
+
|
|
+ // Snapshot lightweight fields only.
|
|
+ let (baseline_external_path, baseline_mode, left_oid) = {
|
|
+ if let Some(info) = self.baseline_file_info.get(internal_file_name) {
|
|
+ (info.path.clone(), info.mode, info.oid.clone())
|
|
+ } else {
|
|
+ (PathBuf::new(), FileMode::Regular, ZERO_OID.to_string())
|
|
+ }
|
|
+ };
|
|
+ let current_external_path = match self.get_path_for_internal(internal_file_name) {
|
|
+ Some(p) => p,
|
|
+ None => return aggregated,
|
|
+ };
|
|
+
|
|
+ let current_mode = file_mode_for_path(¤t_external_path).unwrap_or(FileMode::Regular);
|
|
+ let right_bytes = blob_bytes(¤t_external_path, ¤t_mode);
|
|
+
|
|
+ // Compute displays with &mut self before borrowing any baseline content.
|
|
+ let left_display = self.relative_to_git_root_str(&baseline_external_path);
|
|
+ let right_display = self.relative_to_git_root_str(¤t_external_path);
|
|
+
|
|
+ // Compute right oid before borrowing baseline content.
|
|
+ let right_oid = if let Some(b) = right_bytes.as_ref() {
|
|
+ if current_mode == FileMode::Symlink {
|
|
+ format!("{:x}", git_blob_sha1_hex_bytes(b))
|
|
+ } else {
|
|
+ self.git_blob_oid_for_path(¤t_external_path)
|
|
+ .unwrap_or_else(|| format!("{:x}", git_blob_sha1_hex_bytes(b)))
|
|
+ }
|
|
+ } else {
|
|
+ ZERO_OID.to_string()
|
|
+ };
|
|
+
|
|
+ // Borrow baseline content only after all &mut self uses are done.
|
|
+ let left_present = left_oid.as_str() != ZERO_OID;
|
|
+ let left_bytes: Option<&[u8]> = if left_present {
|
|
+ self.baseline_file_info
|
|
+ .get(internal_file_name)
|
|
+ .map(|i| i.content.as_slice())
|
|
+ } else {
|
|
+ None
|
|
+ };
|
|
+
|
|
+ // Fast path: identical bytes or both missing.
|
|
+ if left_bytes == right_bytes.as_deref() {
|
|
+ return aggregated;
|
|
+ }
|
|
+
|
|
+ aggregated.push_str(&format!("diff --git a/{left_display} b/{right_display}\n"));
|
|
+
|
|
+ let is_add = !left_present && right_bytes.is_some();
|
|
+ let is_delete = left_present && right_bytes.is_none();
|
|
+
|
|
+ if is_add {
|
|
+ aggregated.push_str(&format!("new file mode {current_mode}\n"));
|
|
+ } else if is_delete {
|
|
+ aggregated.push_str(&format!("deleted file mode {baseline_mode}\n"));
|
|
+ } else if baseline_mode != current_mode {
|
|
+ aggregated.push_str(&format!("old mode {baseline_mode}\n"));
|
|
+ aggregated.push_str(&format!("new mode {current_mode}\n"));
|
|
+ }
|
|
+
|
|
+ let left_text = left_bytes.and_then(|b| std::str::from_utf8(b).ok());
|
|
+ let right_text = right_bytes
|
|
+ .as_deref()
|
|
+ .and_then(|b| std::str::from_utf8(b).ok());
|
|
+
|
|
+ let can_text_diff = matches!(
|
|
+ (left_text, right_text, is_add, is_delete),
|
|
+ (Some(_), Some(_), _, _) | (_, Some(_), true, _) | (Some(_), _, _, true)
|
|
+ );
|
|
+
|
|
+ if can_text_diff {
|
|
+ let l = left_text.unwrap_or("");
|
|
+ let r = right_text.unwrap_or("");
|
|
+
|
|
+ aggregated.push_str(&format!("index {left_oid}..{right_oid}\n"));
|
|
+
|
|
+ let old_header = if left_present {
|
|
+ format!("a/{left_display}")
|
|
+ } else {
|
|
+ DEV_NULL.to_string()
|
|
+ };
|
|
+ let new_header = if right_bytes.is_some() {
|
|
+ format!("b/{right_display}")
|
|
+ } else {
|
|
+ DEV_NULL.to_string()
|
|
+ };
|
|
+
|
|
+ let diff = similar::TextDiff::from_lines(l, r);
|
|
+ let unified = diff
|
|
+ .unified_diff()
|
|
+ .context_radius(3)
|
|
+ .header(&old_header, &new_header)
|
|
+ .to_string();
|
|
+
|
|
+ aggregated.push_str(&unified);
|
|
+ } else {
|
|
+ aggregated.push_str(&format!("index {left_oid}..{right_oid}\n"));
|
|
+ let old_header = if left_present {
|
|
+ format!("a/{left_display}")
|
|
+ } else {
|
|
+ DEV_NULL.to_string()
|
|
+ };
|
|
+ let new_header = if right_bytes.is_some() {
|
|
+ format!("b/{right_display}")
|
|
+ } else {
|
|
+ DEV_NULL.to_string()
|
|
+ };
|
|
+ aggregated.push_str(&format!("--- {old_header}\n"));
|
|
+ aggregated.push_str(&format!("+++ {new_header}\n"));
|
|
+ aggregated.push_str("Binary files differ\n");
|
|
+ }
|
|
+ aggregated
|
|
+ }
|
|
+}
|
|
+
|
|
+/// Compute the Git SHA-1 blob object ID for the given content (bytes).
|
|
+fn git_blob_sha1_hex_bytes(data: &[u8]) -> Output<sha1::Sha1> {
|
|
+ // Git blob hash is sha1 of: "blob <len>\0<data>"
|
|
+ let header = format!("blob {}\0", data.len());
|
|
+ use sha1::Digest;
|
|
+ let mut hasher = sha1::Sha1::new();
|
|
+ hasher.update(header.as_bytes());
|
|
+ hasher.update(data);
|
|
+ hasher.finalize()
|
|
+}
|
|
+
|
|
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
|
+enum FileMode {
|
|
+ Regular,
|
|
+ #[cfg(unix)]
|
|
+ Executable,
|
|
+ Symlink,
|
|
+}
|
|
+
|
|
+impl FileMode {
|
|
+ fn as_str(&self) -> &'static str {
|
|
+ match self {
|
|
+ FileMode::Regular => "100644",
|
|
+ #[cfg(unix)]
|
|
+ FileMode::Executable => "100755",
|
|
+ FileMode::Symlink => "120000",
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
+impl std::fmt::Display for FileMode {
|
|
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
+ f.write_str(self.as_str())
|
|
+ }
|
|
+}
|
|
+
|
|
+#[cfg(unix)]
|
|
+fn file_mode_for_path(path: &Path) -> Option<FileMode> {
|
|
+ use std::os::unix::fs::PermissionsExt;
|
|
+ let meta = fs::symlink_metadata(path).ok()?;
|
|
+ let ft = meta.file_type();
|
|
+ if ft.is_symlink() {
|
|
+ return Some(FileMode::Symlink);
|
|
+ }
|
|
+ let mode = meta.permissions().mode();
|
|
+ let is_exec = (mode & 0o111) != 0;
|
|
+ Some(if is_exec {
|
|
+ FileMode::Executable
|
|
+ } else {
|
|
+ FileMode::Regular
|
|
+ })
|
|
+}
|
|
+
|
|
+#[cfg(not(unix))]
|
|
+fn file_mode_for_path(_path: &Path) -> Option<FileMode> {
|
|
+ // Default to non-executable on non-unix.
|
|
+ Some(FileMode::Regular)
|
|
+}
|
|
+
|
|
+fn blob_bytes(path: &Path, mode: &FileMode) -> Option<Vec<u8>> {
|
|
+ if path.exists() {
|
|
+ let contents = if *mode == FileMode::Symlink {
|
|
+ symlink_blob_bytes(path)
|
|
+ .ok_or_else(|| anyhow!("failed to read symlink target for {}", path.display()))
|
|
+ } else {
|
|
+ fs::read(path)
|
|
+ .with_context(|| format!("failed to read current file for diff {}", path.display()))
|
|
+ };
|
|
+ contents.ok()
|
|
+ } else {
|
|
+ None
|
|
+ }
|
|
+}
|
|
+
|
|
+#[cfg(unix)]
|
|
+fn symlink_blob_bytes(path: &Path) -> Option<Vec<u8>> {
|
|
+ use std::os::unix::ffi::OsStrExt;
|
|
+ let target = std::fs::read_link(path).ok()?;
|
|
+ Some(target.as_os_str().as_bytes().to_vec())
|
|
+}
|
|
+
|
|
+#[cfg(not(unix))]
|
|
+fn symlink_blob_bytes(_path: &Path) -> Option<Vec<u8>> {
|
|
+ None
|
|
+}
|
|
+
|
|
+#[cfg(windows)]
|
|
+fn is_windows_drive_or_unc_root(p: &std::path::Path) -> bool {
|
|
+ use std::path::Component;
|
|
+ let mut comps = p.components();
|
|
+ matches!(
|
|
+ (comps.next(), comps.next(), comps.next()),
|
|
+ (Some(Component::Prefix(_)), Some(Component::RootDir), None)
|
|
+ )
|
|
+}
|
|
+
|
|
+#[cfg(test)]
|
|
+mod tests {
|
|
+ #![allow(clippy::unwrap_used)]
|
|
+ use super::*;
|
|
+ use pretty_assertions::assert_eq;
|
|
+ use tempfile::tempdir;
|
|
+
|
|
+ /// Compute the Git SHA-1 blob object ID for the given content (string).
|
|
+ /// This delegates to the bytes version to avoid UTF-8 lossy conversions here.
|
|
+ fn git_blob_sha1_hex(data: &str) -> String {
|
|
+ format!("{:x}", git_blob_sha1_hex_bytes(data.as_bytes()))
|
|
+ }
|
|
+
|
|
+ fn normalize_diff_for_test(input: &str, root: &Path) -> String {
|
|
+ let root_str = root.display().to_string().replace('\\', "/");
|
|
+ let replaced = input.replace(&root_str, "<TMP>");
|
|
+ // Split into blocks on lines starting with "diff --git ", sort blocks for determinism, and rejoin
|
|
+ let mut blocks: Vec<String> = Vec::new();
|
|
+ let mut current = String::new();
|
|
+ for line in replaced.lines() {
|
|
+ if line.starts_with("diff --git ") && !current.is_empty() {
|
|
+ blocks.push(current);
|
|
+ current = String::new();
|
|
+ }
|
|
+ if !current.is_empty() {
|
|
+ current.push('\n');
|
|
+ }
|
|
+ current.push_str(line);
|
|
+ }
|
|
+ if !current.is_empty() {
|
|
+ blocks.push(current);
|
|
+ }
|
|
+ blocks.sort();
|
|
+ let mut out = blocks.join("\n");
|
|
+ if !out.ends_with('\n') {
|
|
+ out.push('\n');
|
|
+ }
|
|
+ out
|
|
+ }
|
|
+
|
|
+ #[test]
|
|
+ fn accumulates_add_and_update() {
|
|
+ let mut acc = TurnDiffTracker::new();
|
|
+
|
|
+ let dir = tempdir().unwrap();
|
|
+ let file = dir.path().join("a.txt");
|
|
+
|
|
+ // First patch: add file (baseline should be /dev/null).
|
|
+ let add_changes = HashMap::from([(
|
|
+ file.clone(),
|
|
+ FileChange::Add {
|
|
+ content: "foo\n".to_string(),
|
|
+ },
|
|
+ )]);
|
|
+ acc.on_patch_begin(&add_changes);
|
|
+
|
|
+ // Simulate apply: create the file on disk.
|
|
+ fs::write(&file, "foo\n").unwrap();
|
|
+ let first = acc.get_unified_diff().unwrap().unwrap();
|
|
+ let first = normalize_diff_for_test(&first, dir.path());
|
|
+ let expected_first = {
|
|
+ let mode = file_mode_for_path(&file).unwrap_or(FileMode::Regular);
|
|
+ let right_oid = git_blob_sha1_hex("foo\n");
|
|
+ format!(
|
|
+ r#"diff --git a/<TMP>/a.txt b/<TMP>/a.txt
|
|
+new file mode {mode}
|
|
+index {ZERO_OID}..{right_oid}
|
|
+--- {DEV_NULL}
|
|
++++ b/<TMP>/a.txt
|
|
+@@ -0,0 +1 @@
|
|
++foo
|
|
+"#,
|
|
+ )
|
|
+ };
|
|
+ assert_eq!(first, expected_first);
|
|
+
|
|
+ // Second patch: update the file on disk.
|
|
+ let update_changes = HashMap::from([(
|
|
+ file.clone(),
|
|
+ FileChange::Update {
|
|
+ unified_diff: "".to_owned(),
|
|
+ move_path: None,
|
|
+ },
|
|
+ )]);
|
|
+ acc.on_patch_begin(&update_changes);
|
|
+
|
|
+ // Simulate apply: append a new line.
|
|
+ fs::write(&file, "foo\nbar\n").unwrap();
|
|
+ let combined = acc.get_unified_diff().unwrap().unwrap();
|
|
+ let combined = normalize_diff_for_test(&combined, dir.path());
|
|
+ let expected_combined = {
|
|
+ let mode = file_mode_for_path(&file).unwrap_or(FileMode::Regular);
|
|
+ let right_oid = git_blob_sha1_hex("foo\nbar\n");
|
|
+ format!(
|
|
+ r#"diff --git a/<TMP>/a.txt b/<TMP>/a.txt
|
|
+new file mode {mode}
|
|
+index {ZERO_OID}..{right_oid}
|
|
+--- {DEV_NULL}
|
|
++++ b/<TMP>/a.txt
|
|
+@@ -0,0 +1,2 @@
|
|
++foo
|
|
++bar
|
|
+"#,
|
|
+ )
|
|
+ };
|
|
+ assert_eq!(combined, expected_combined);
|
|
+ }
|
|
+
|
|
+ #[test]
|
|
+ fn accumulates_delete() {
|
|
+ let dir = tempdir().unwrap();
|
|
+ let file = dir.path().join("b.txt");
|
|
+ fs::write(&file, "x\n").unwrap();
|
|
+
|
|
+ let mut acc = TurnDiffTracker::new();
|
|
+ let del_changes = HashMap::from([(file.clone(), FileChange::Delete)]);
|
|
+ acc.on_patch_begin(&del_changes);
|
|
+
|
|
+ // Simulate apply: delete the file from disk.
|
|
+ let baseline_mode = file_mode_for_path(&file).unwrap_or(FileMode::Regular);
|
|
+ fs::remove_file(&file).unwrap();
|
|
+ let diff = acc.get_unified_diff().unwrap().unwrap();
|
|
+ let diff = normalize_diff_for_test(&diff, dir.path());
|
|
+ let expected = {
|
|
+ let left_oid = git_blob_sha1_hex("x\n");
|
|
+ format!(
|
|
+ r#"diff --git a/<TMP>/b.txt b/<TMP>/b.txt
|
|
+deleted file mode {baseline_mode}
|
|
+index {left_oid}..{ZERO_OID}
|
|
+--- a/<TMP>/b.txt
|
|
++++ {DEV_NULL}
|
|
+@@ -1 +0,0 @@
|
|
+-x
|
|
+"#,
|
|
+ )
|
|
+ };
|
|
+ assert_eq!(diff, expected);
|
|
+ }
|
|
+
|
|
+ #[test]
|
|
+ fn accumulates_move_and_update() {
|
|
+ let dir = tempdir().unwrap();
|
|
+ let src = dir.path().join("src.txt");
|
|
+ let dest = dir.path().join("dst.txt");
|
|
+ fs::write(&src, "line\n").unwrap();
|
|
+
|
|
+ let mut acc = TurnDiffTracker::new();
|
|
+ let mv_changes = HashMap::from([(
|
|
+ src.clone(),
|
|
+ FileChange::Update {
|
|
+ unified_diff: "".to_owned(),
|
|
+ move_path: Some(dest.clone()),
|
|
+ },
|
|
+ )]);
|
|
+ acc.on_patch_begin(&mv_changes);
|
|
+
|
|
+ // Simulate apply: move and update content.
|
|
+ fs::rename(&src, &dest).unwrap();
|
|
+ fs::write(&dest, "line2\n").unwrap();
|
|
+
|
|
+ let out = acc.get_unified_diff().unwrap().unwrap();
|
|
+ let out = normalize_diff_for_test(&out, dir.path());
|
|
+ let expected = {
|
|
+ let left_oid = git_blob_sha1_hex("line\n");
|
|
+ let right_oid = git_blob_sha1_hex("line2\n");
|
|
+ format!(
|
|
+ r#"diff --git a/<TMP>/src.txt b/<TMP>/dst.txt
|
|
+index {left_oid}..{right_oid}
|
|
+--- a/<TMP>/src.txt
|
|
++++ b/<TMP>/dst.txt
|
|
+@@ -1 +1 @@
|
|
+-line
|
|
++line2
|
|
+"#
|
|
+ )
|
|
+ };
|
|
+ assert_eq!(out, expected);
|
|
+ }
|
|
+
|
|
+ #[test]
|
|
+ fn move_without_1change_yields_no_diff() {
|
|
+ let dir = tempdir().unwrap();
|
|
+ let src = dir.path().join("moved.txt");
|
|
+ let dest = dir.path().join("renamed.txt");
|
|
+ fs::write(&src, "same\n").unwrap();
|
|
+
|
|
+ let mut acc = TurnDiffTracker::new();
|
|
+ let mv_changes = HashMap::from([(
|
|
+ src.clone(),
|
|
+ FileChange::Update {
|
|
+ unified_diff: "".to_owned(),
|
|
+ move_path: Some(dest.clone()),
|
|
+ },
|
|
+ )]);
|
|
+ acc.on_patch_begin(&mv_changes);
|
|
+
|
|
+ // Simulate apply: move only, no content change.
|
|
+ fs::rename(&src, &dest).unwrap();
|
|
+
|
|
+ let diff = acc.get_unified_diff().unwrap();
|
|
+ assert_eq!(diff, None);
|
|
+ }
|
|
+
|
|
+ #[test]
|
|
+ fn move_declared_but_file_only_appears_at_dest_is_add() {
|
|
+ let dir = tempdir().unwrap();
|
|
+ let src = dir.path().join("src.txt");
|
|
+ let dest = dir.path().join("dest.txt");
|
|
+ let mut acc = TurnDiffTracker::new();
|
|
+ let mv = HashMap::from([(
|
|
+ src.clone(),
|
|
+ FileChange::Update {
|
|
+ unified_diff: "".into(),
|
|
+ move_path: Some(dest.clone()),
|
|
+ },
|
|
+ )]);
|
|
+ acc.on_patch_begin(&mv);
|
|
+ // No file existed initially; create only dest
|
|
+ fs::write(&dest, "hello\n").unwrap();
|
|
+ let diff = acc.get_unified_diff().unwrap().unwrap();
|
|
+ let diff = normalize_diff_for_test(&diff, dir.path());
|
|
+ let expected = {
|
|
+ let mode = file_mode_for_path(&dest).unwrap_or(FileMode::Regular);
|
|
+ let right_oid = git_blob_sha1_hex("hello\n");
|
|
+ format!(
|
|
+ r#"diff --git a/<TMP>/src.txt b/<TMP>/dest.txt
|
|
+new file mode {mode}
|
|
+index {ZERO_OID}..{right_oid}
|
|
+--- {DEV_NULL}
|
|
++++ b/<TMP>/dest.txt
|
|
+@@ -0,0 +1 @@
|
|
++hello
|
|
+"#,
|
|
+ )
|
|
+ };
|
|
+ assert_eq!(diff, expected);
|
|
+ }
|
|
+
|
|
+ #[test]
|
|
+ fn update_persists_across_new_baseline_for_new_file() {
|
|
+ let dir = tempdir().unwrap();
|
|
+ let a = dir.path().join("a.txt");
|
|
+ let b = dir.path().join("b.txt");
|
|
+ fs::write(&a, "foo\n").unwrap();
|
|
+ fs::write(&b, "z\n").unwrap();
|
|
+
|
|
+ let mut acc = TurnDiffTracker::new();
|
|
+
|
|
+ // First: update existing a.txt (baseline snapshot is created for a).
|
|
+ let update_a = HashMap::from([(
|
|
+ a.clone(),
|
|
+ FileChange::Update {
|
|
+ unified_diff: "".to_owned(),
|
|
+ move_path: None,
|
|
+ },
|
|
+ )]);
|
|
+ acc.on_patch_begin(&update_a);
|
|
+ // Simulate apply: modify a.txt on disk.
|
|
+ fs::write(&a, "foo\nbar\n").unwrap();
|
|
+ let first = acc.get_unified_diff().unwrap().unwrap();
|
|
+ let first = normalize_diff_for_test(&first, dir.path());
|
|
+ let expected_first = {
|
|
+ let left_oid = git_blob_sha1_hex("foo\n");
|
|
+ let right_oid = git_blob_sha1_hex("foo\nbar\n");
|
|
+ format!(
|
|
+ r#"diff --git a/<TMP>/a.txt b/<TMP>/a.txt
|
|
+index {left_oid}..{right_oid}
|
|
+--- a/<TMP>/a.txt
|
|
++++ b/<TMP>/a.txt
|
|
+@@ -1 +1,2 @@
|
|
+ foo
|
|
++bar
|
|
+"#
|
|
+ )
|
|
+ };
|
|
+ assert_eq!(first, expected_first);
|
|
+
|
|
+ // Next: introduce a brand-new path b.txt into baseline snapshots via a delete change.
|
|
+ let del_b = HashMap::from([(b.clone(), FileChange::Delete)]);
|
|
+ acc.on_patch_begin(&del_b);
|
|
+ // Simulate apply: delete b.txt.
|
|
+ let baseline_mode = file_mode_for_path(&b).unwrap_or(FileMode::Regular);
|
|
+ fs::remove_file(&b).unwrap();
|
|
+
|
|
+ let combined = acc.get_unified_diff().unwrap().unwrap();
|
|
+ let combined = normalize_diff_for_test(&combined, dir.path());
|
|
+ let expected = {
|
|
+ let left_oid_a = git_blob_sha1_hex("foo\n");
|
|
+ let right_oid_a = git_blob_sha1_hex("foo\nbar\n");
|
|
+ let left_oid_b = git_blob_sha1_hex("z\n");
|
|
+ format!(
|
|
+ r#"diff --git a/<TMP>/a.txt b/<TMP>/a.txt
|
|
+index {left_oid_a}..{right_oid_a}
|
|
+--- a/<TMP>/a.txt
|
|
++++ b/<TMP>/a.txt
|
|
+@@ -1 +1,2 @@
|
|
+ foo
|
|
++bar
|
|
+diff --git a/<TMP>/b.txt b/<TMP>/b.txt
|
|
+deleted file mode {baseline_mode}
|
|
+index {left_oid_b}..{ZERO_OID}
|
|
+--- a/<TMP>/b.txt
|
|
++++ {DEV_NULL}
|
|
+@@ -1 +0,0 @@
|
|
+-z
|
|
+"#,
|
|
+ )
|
|
+ };
|
|
+ assert_eq!(combined, expected);
|
|
+ }
|
|
+
|
|
+ #[test]
|
|
+ fn binary_files_differ_update() {
|
|
+ let dir = tempdir().unwrap();
|
|
+ let file = dir.path().join("bin.dat");
|
|
+
|
|
+ // Initial non-UTF8 bytes
|
|
+ let left_bytes: Vec<u8> = vec![0xff, 0xfe, 0xfd, 0x00];
|
|
+ // Updated non-UTF8 bytes
|
|
+ let right_bytes: Vec<u8> = vec![0x01, 0x02, 0x03, 0x00];
|
|
+
|
|
+ fs::write(&file, &left_bytes).unwrap();
|
|
+
|
|
+ let mut acc = TurnDiffTracker::new();
|
|
+ let update_changes = HashMap::from([(
|
|
+ file.clone(),
|
|
+ FileChange::Update {
|
|
+ unified_diff: "".to_owned(),
|
|
+ move_path: None,
|
|
+ },
|
|
+ )]);
|
|
+ acc.on_patch_begin(&update_changes);
|
|
+
|
|
+ // Apply update on disk
|
|
+ fs::write(&file, &right_bytes).unwrap();
|
|
+
|
|
+ let diff = acc.get_unified_diff().unwrap().unwrap();
|
|
+ let diff = normalize_diff_for_test(&diff, dir.path());
|
|
+ let expected = {
|
|
+ let left_oid = format!("{:x}", git_blob_sha1_hex_bytes(&left_bytes));
|
|
+ let right_oid = format!("{:x}", git_blob_sha1_hex_bytes(&right_bytes));
|
|
+ format!(
|
|
+ r#"diff --git a/<TMP>/bin.dat b/<TMP>/bin.dat
|
|
+index {left_oid}..{right_oid}
|
|
+--- a/<TMP>/bin.dat
|
|
++++ b/<TMP>/bin.dat
|
|
+Binary files differ
|
|
+"#
|
|
+ )
|
|
+ };
|
|
+ assert_eq!(diff, expected);
|
|
+ }
|
|
+
|
|
+ #[test]
|
|
+ fn filenames_with_spaces_add_and_update() {
|
|
+ let mut acc = TurnDiffTracker::new();
|
|
+
|
|
+ let dir = tempdir().unwrap();
|
|
+ let file = dir.path().join("name with spaces.txt");
|
|
+
|
|
+ // First patch: add file (baseline should be /dev/null).
|
|
+ let add_changes = HashMap::from([(
|
|
+ file.clone(),
|
|
+ FileChange::Add {
|
|
+ content: "foo\n".to_string(),
|
|
+ },
|
|
+ )]);
|
|
+ acc.on_patch_begin(&add_changes);
|
|
+
|
|
+ // Simulate apply: create the file on disk.
|
|
+ fs::write(&file, "foo\n").unwrap();
|
|
+ let first = acc.get_unified_diff().unwrap().unwrap();
|
|
+ let first = normalize_diff_for_test(&first, dir.path());
|
|
+ let expected_first = {
|
|
+ let mode = file_mode_for_path(&file).unwrap_or(FileMode::Regular);
|
|
+ let right_oid = git_blob_sha1_hex("foo\n");
|
|
+ format!(
|
|
+ r#"diff --git a/<TMP>/name with spaces.txt b/<TMP>/name with spaces.txt
|
|
+new file mode {mode}
|
|
+index {ZERO_OID}..{right_oid}
|
|
+--- {DEV_NULL}
|
|
++++ b/<TMP>/name with spaces.txt
|
|
+@@ -0,0 +1 @@
|
|
++foo
|
|
+"#,
|
|
+ )
|
|
+ };
|
|
+ assert_eq!(first, expected_first);
|
|
+
|
|
+ // Second patch: update the file on disk.
|
|
+ let update_changes = HashMap::from([(
|
|
+ file.clone(),
|
|
+ FileChange::Update {
|
|
+ unified_diff: "".to_owned(),
|
|
+ move_path: None,
|
|
+ },
|
|
+ )]);
|
|
+ acc.on_patch_begin(&update_changes);
|
|
+
|
|
+ // Simulate apply: append a new line with a space.
|
|
+ fs::write(&file, "foo\nbar baz\n").unwrap();
|
|
+ let combined = acc.get_unified_diff().unwrap().unwrap();
|
|
+ let combined = normalize_diff_for_test(&combined, dir.path());
|
|
+ let expected_combined = {
|
|
+ let mode = file_mode_for_path(&file).unwrap_or(FileMode::Regular);
|
|
+ let right_oid = git_blob_sha1_hex("foo\nbar baz\n");
|
|
+ format!(
|
|
+ r#"diff --git a/<TMP>/name with spaces.txt b/<TMP>/name with spaces.txt
|
|
+new file mode {mode}
|
|
+index {ZERO_OID}..{right_oid}
|
|
+--- {DEV_NULL}
|
|
++++ b/<TMP>/name with spaces.txt
|
|
+@@ -0,0 +1,2 @@
|
|
++foo
|
|
++bar baz
|
|
+"#,
|
|
+ )
|
|
+ };
|
|
+ assert_eq!(combined, expected_combined);
|
|
+ }
|
|
+}
|
|
diff --git a/codex-rs/exec/src/event_processor_with_human_output.rs b/codex-rs/exec/src/event_processor_with_human_output.rs
|
|
index 72e2f9298f..c290d9336b 100644
|
|
--- a/codex-rs/exec/src/event_processor_with_human_output.rs
|
|
+++ b/codex-rs/exec/src/event_processor_with_human_output.rs
|
|
@@ -20,6 +20,7 @@ use codex_core::protocol::PatchApplyEndEvent;
|
|
use codex_core::protocol::SessionConfiguredEvent;
|
|
use codex_core::protocol::TaskCompleteEvent;
|
|
use codex_core::protocol::TokenUsage;
|
|
+use codex_core::protocol::TurnDiffEvent;
|
|
use owo_colors::OwoColorize;
|
|
use owo_colors::Style;
|
|
use shlex::try_join;
|
|
@@ -399,6 +400,7 @@ impl EventProcessor for EventProcessorWithHumanOutput {
|
|
stdout,
|
|
stderr,
|
|
success,
|
|
+ ..
|
|
}) => {
|
|
let patch_begin = self.call_id_to_patch.remove(&call_id);
|
|
|
|
@@ -428,6 +430,10 @@ impl EventProcessor for EventProcessorWithHumanOutput {
|
|
println!("{}", line.style(self.dimmed));
|
|
}
|
|
}
|
|
+ EventMsg::TurnDiff(TurnDiffEvent { unified_diff }) => {
|
|
+ ts_println!(self, "{}", "turn diff:".style(self.magenta));
|
|
+ println!("{unified_diff}");
|
|
+ }
|
|
EventMsg::ExecApprovalRequest(_) => {
|
|
// Should we exit?
|
|
}
|
|
diff --git a/codex-rs/mcp-server/src/codex_tool_runner.rs b/codex-rs/mcp-server/src/codex_tool_runner.rs
|
|
index d489ffe076..205dfa4631 100644
|
|
--- a/codex-rs/mcp-server/src/codex_tool_runner.rs
|
|
+++ b/codex-rs/mcp-server/src/codex_tool_runner.rs
|
|
@@ -263,6 +263,7 @@ async fn run_codex_tool_session_inner(
|
|
| EventMsg::BackgroundEvent(_)
|
|
| EventMsg::PatchApplyBegin(_)
|
|
| EventMsg::PatchApplyEnd(_)
|
|
+ | EventMsg::TurnDiff(_)
|
|
| EventMsg::GetHistoryEntryResponse(_)
|
|
| EventMsg::PlanUpdate(_)
|
|
| EventMsg::ShutdownComplete => {
|
|
diff --git a/codex-rs/mcp-server/src/conversation_loop.rs b/codex-rs/mcp-server/src/conversation_loop.rs
|
|
index 534275181a..1db39a2306 100644
|
|
--- a/codex-rs/mcp-server/src/conversation_loop.rs
|
|
+++ b/codex-rs/mcp-server/src/conversation_loop.rs
|
|
@@ -97,6 +97,7 @@ pub async fn run_conversation_loop(
|
|
| EventMsg::McpToolCallEnd(_)
|
|
| EventMsg::ExecCommandBegin(_)
|
|
| EventMsg::ExecCommandEnd(_)
|
|
+ | EventMsg::TurnDiff(_)
|
|
| EventMsg::BackgroundEvent(_)
|
|
| EventMsg::ExecCommandOutputDelta(_)
|
|
| EventMsg::PatchApplyBegin(_)
|
|
```
|
|
|
|
## Review Comments
|
|
|
|
### codex-rs/core/Cargo.toml
|
|
|
|
- Created: 2025-08-01 16:35:11 UTC | Link: https://github.com/openai/codex/pull/1770#discussion_r2248403984
|
|
|
|
```diff
|
|
@@ -51,6 +51,7 @@ tree-sitter-bash = "0.25.0"
|
|
uuid = { version = "1", features = ["serde", "v4"] }
|
|
whoami = "1.6.0"
|
|
wildmatch = "2.4.0"
|
|
+tempfile = "3"
|
|
```
|
|
|
|
> @pakrym-oai alpha sort is from saving in the editor, right?
|
|
|
|
- Created: 2025-08-04 02:42:24 UTC | Link: https://github.com/openai/codex/pull/1770#discussion_r2250289252
|
|
|
|
```diff
|
|
@@ -34,6 +34,7 @@ serde_json = "1"
|
|
serde_bytes = "0.11"
|
|
sha1 = "0.10.6"
|
|
shlex = "1.3.0"
|
|
+similar = "2"
|
|
```
|
|
|
|
> Maybe we should match https://github.com/openai/codex/blob/e3565a3f438c30c9d36412d2817346c7accd487c/codex-rs/apply-patch/Cargo.toml#L15 (or change that one to be `"2"`?)
|
|
|
|
### codex-rs/core/src/codex.rs
|
|
|
|
- Created: 2025-08-01 16:48:22 UTC | Link: https://github.com/openai/codex/pull/1770#discussion_r2248425776
|
|
|
|
```diff
|
|
@@ -1471,6 +1528,7 @@ fn maybe_run_with_user_profile(params: ExecParams, sess: &Session) -> ExecParams
|
|
async fn handle_container_exec_with_params(
|
|
params: ExecParams,
|
|
sess: &Session,
|
|
+ turn_diff_tracker: &mut TurnDiffTracker,
|
|
```
|
|
|
|
> Hmm, what would happen if we wanted to support parallel tool calls at one point. This would be a problem, no?
|
|
|
|
- Created: 2025-08-01 17:25:22 UTC | Link: https://github.com/openai/codex/pull/1770#discussion_r2248497145
|
|
|
|
```diff
|
|
@@ -1471,6 +1528,7 @@ fn maybe_run_with_user_profile(params: ExecParams, sess: &Session) -> ExecParams
|
|
async fn handle_container_exec_with_params(
|
|
params: ExecParams,
|
|
sess: &Session,
|
|
+ turn_diff_tracker: &mut TurnDiffTracker,
|
|
```
|
|
|
|
> Because only one tool call could take ownership of TurnDiffTracker.
|
|
|
|
- Created: 2025-08-04 02:46:36 UTC | Link: https://github.com/openai/codex/pull/1770#discussion_r2250292735
|
|
|
|
```diff
|
|
@@ -374,11 +380,15 @@ impl Session {
|
|
Some(ApplyPatchCommandContext {
|
|
user_explicitly_approved_this_action,
|
|
changes,
|
|
- }) => EventMsg::PatchApplyBegin(PatchApplyBeginEvent {
|
|
- call_id,
|
|
- auto_approved: !user_explicitly_approved_this_action,
|
|
- changes,
|
|
- }),
|
|
+ }) => {
|
|
+ let _ = turn_diff_tracker.on_patch_begin(&changes);
|
|
```
|
|
|
|
> If this doesn't have to return `Result`, then `let _` can go away, of course, but depending on what sort of `Err` we expect, perhaps we should at least `warn!()` or `error!()`?
|
|
|
|
- Created: 2025-08-04 02:47:22 UTC | Link: https://github.com/openai/codex/pull/1770#discussion_r2250293562
|
|
|
|
```diff
|
|
@@ -392,8 +402,10 @@ impl Session {
|
|
let _ = self.tx_event.send(event).await;
|
|
}
|
|
|
|
- async fn notify_exec_command_end(
|
|
+ #[allow(clippy::too_many_arguments)]
|
|
```
|
|
|
|
> We should maybe introduce a struct in a follow-up PR.
|
|
|
|
- Created: 2025-08-04 02:49:08 UTC | Link: https://github.com/openai/codex/pull/1770#discussion_r2250294909
|
|
|
|
```diff
|
|
@@ -1163,6 +1193,7 @@ async fn run_task(sess: Arc<Session>, sub_id: String, input: Vec<InputItem>) {
|
|
|
|
async fn run_turn(
|
|
sess: &Session,
|
|
+ turn_diff_tracker: &mut TurnDiffTracker,
|
|
```
|
|
|
|
> We'll probably want a `struct TurnContext` or somesuch in the near future.
|
|
|
|
- Created: 2025-08-04 02:51:47 UTC | Link: https://github.com/openai/codex/pull/1770#discussion_r2250296938
|
|
|
|
```diff
|
|
@@ -1471,6 +1528,7 @@ fn maybe_run_with_user_profile(params: ExecParams, sess: &Session) -> ExecParams
|
|
async fn handle_container_exec_with_params(
|
|
params: ExecParams,
|
|
sess: &Session,
|
|
+ turn_diff_tracker: &mut TurnDiffTracker,
|
|
```
|
|
|
|
> Yes, though also, if we introduce a `struct TurnContext` as mentioned above, that may also force the move to `Mutex`. But yes, does not have to be done in this PR.
|
|
|
|
- Created: 2025-08-04 02:55:26 UTC | Link: https://github.com/openai/codex/pull/1770#discussion_r2250299871
|
|
|
|
```diff
|
|
@@ -1328,6 +1361,16 @@ async fn try_run_turn(
|
|
.ok();
|
|
}
|
|
|
|
+ let unified_diff = turn_diff_tracker.get_unified_diff();
|
|
+ if let Ok(Some(unified_diff)) = unified_diff {
|
|
+ let msg = EventMsg::TurnDiff(TurnDiffEvent { unified_diff });
|
|
+ let event = Event {
|
|
+ id: sub_id.to_string(),
|
|
+ msg,
|
|
+ };
|
|
+ let _ = sess.tx_event.send(event).await;
|
|
+ }
|
|
+
|
|
```
|
|
|
|
> I'm starting to think that we should do `break token_usage;` to get out of the loop and then do all of this post-loop stuff below just in case there ever ends up being another way to break out.
|
|
>
|
|
> It would also eliminate this `return` statement buried in here (though admittedly it would bury the `break` statement instead).
|
|
|
|
### codex-rs/core/src/protocol.rs
|
|
|
|
- Created: 2025-08-01 16:41:09 UTC | Link: https://github.com/openai/codex/pull/1770#discussion_r2248413365
|
|
|
|
```diff
|
|
@@ -525,6 +527,11 @@ pub struct PatchApplyEndEvent {
|
|
pub success: bool,
|
|
}
|
|
|
|
+#[derive(Debug, Clone, Deserialize, Serialize)]
|
|
+pub struct TurnDiffEvent {
|
|
+ pub unified_diff: String,
|
|
```
|
|
|
|
> I feel like this would be easier to work with programmatically if this were keyed by path, more like `changes` in `PatchApplyBeginEvent`. Maybe for a full add or a full delete for an individual file, we still want the unified diff, but it's nice to have added/modified/removed metadata for each path so it's easy to build a compact summary for the diff (maybe with +/- line counts)?
|
|
|
|
- Created: 2025-08-01 16:49:48 UTC | Link: https://github.com/openai/codex/pull/1770#discussion_r2248429044
|
|
|
|
```diff
|
|
@@ -525,6 +527,11 @@ pub struct PatchApplyEndEvent {
|
|
pub success: bool,
|
|
}
|
|
|
|
+#[derive(Debug, Clone, Deserialize, Serialize)]
|
|
+pub struct TurnDiffEvent {
|
|
+ pub unified_diff: String,
|
|
```
|
|
|
|
> What guarantees, if any, can we make about the paths in the `unified_diff`: will they all be absolute paths?
|
|
|
|
### codex-rs/core/src/turn_diff_tracker.rs
|
|
|
|
- Created: 2025-08-01 16:54:37 UTC | Link: https://github.com/openai/codex/pull/1770#discussion_r2248437475
|
|
|
|
```diff
|
|
@@ -0,0 +1,476 @@
|
|
+use std::collections::HashMap;
|
|
+use std::fs;
|
|
+use std::path::Path;
|
|
+use std::path::PathBuf;
|
|
+use std::process::Command;
|
|
+
|
|
+use anyhow::Context;
|
|
+use anyhow::Result;
|
|
+use tempfile::TempDir;
|
|
+use uuid::Uuid;
|
|
+
|
|
+use crate::protocol::FileChange;
|
|
+
|
|
+/// Tracks sets of changes to files and exposes the overall unified diff.
|
|
+/// Internally, the way this works is now:
|
|
+/// 1. Create a temp directory to store baseline snapshots of files when they are first seen.
|
|
+/// 2. When a path is first observed, copy its current contents into the baseline dir if it exists on disk.
|
|
+/// For new additions, do not create a baseline file so that diffs are shown as proper additions (using /dev/null).
|
|
+/// 3. Keep a stable internal filename (uuid + same extension) per external path for path rewrite in diffs.
|
|
+/// 4. To compute the aggregated unified diff, compare each baseline snapshot to the current file on disk using
|
|
+/// `git diff --no-index` and rewrite paths to external paths.
|
|
+#[derive(Default)]
|
|
+pub struct TurnDiffTracker {
|
|
+ /// Temp directory holding baseline snapshots of files as first seen.
|
|
+ baseline_files_dir: Option<TempDir>,
|
|
+ /// Map external path -> internal filename (uuid + same extension).
|
|
+ external_to_temp_name: HashMap<PathBuf, String>,
|
|
+ /// Internal filename -> external path as of baseline snapshot.
|
|
+ temp_name_to_baseline_external: HashMap<String, PathBuf>,
|
|
+ /// Internal filename -> external path as of current accumulated state (after applying all changes).
|
|
+ /// This is where renames are tracked.
|
|
+ temp_name_to_current_external: HashMap<String, PathBuf>,
|
|
+ /// Aggregated unified diff for all accumulated changes across files.
|
|
+ pub unified_diff: Option<String>,
|
|
```
|
|
|
|
> I'm surprised to see this as a field as opposed to always derived?
|
|
|
|
- Created: 2025-08-01 16:57:12 UTC | Link: https://github.com/openai/codex/pull/1770#discussion_r2248442328
|
|
|
|
```diff
|
|
@@ -0,0 +1,476 @@
|
|
+use std::collections::HashMap;
|
|
+use std::fs;
|
|
+use std::path::Path;
|
|
+use std::path::PathBuf;
|
|
+use std::process::Command;
|
|
+
|
|
+use anyhow::Context;
|
|
+use anyhow::Result;
|
|
+use tempfile::TempDir;
|
|
+use uuid::Uuid;
|
|
+
|
|
+use crate::protocol::FileChange;
|
|
+
|
|
+/// Tracks sets of changes to files and exposes the overall unified diff.
|
|
+/// Internally, the way this works is now:
|
|
+/// 1. Create a temp directory to store baseline snapshots of files when they are first seen.
|
|
+/// 2. When a path is first observed, copy its current contents into the baseline dir if it exists on disk.
|
|
+/// For new additions, do not create a baseline file so that diffs are shown as proper additions (using /dev/null).
|
|
+/// 3. Keep a stable internal filename (uuid + same extension) per external path for path rewrite in diffs.
|
|
+/// 4. To compute the aggregated unified diff, compare each baseline snapshot to the current file on disk using
|
|
+/// `git diff --no-index` and rewrite paths to external paths.
|
|
+#[derive(Default)]
|
|
+pub struct TurnDiffTracker {
|
|
+ /// Temp directory holding baseline snapshots of files as first seen.
|
|
+ baseline_files_dir: Option<TempDir>,
|
|
+ /// Map external path -> internal filename (uuid + same extension).
|
|
+ external_to_temp_name: HashMap<PathBuf, String>,
|
|
+ /// Internal filename -> external path as of baseline snapshot.
|
|
+ temp_name_to_baseline_external: HashMap<String, PathBuf>,
|
|
+ /// Internal filename -> external path as of current accumulated state (after applying all changes).
|
|
+ /// This is where renames are tracked.
|
|
+ temp_name_to_current_external: HashMap<String, PathBuf>,
|
|
+ /// Aggregated unified diff for all accumulated changes across files.
|
|
+ pub unified_diff: Option<String>,
|
|
+}
|
|
+
|
|
+impl TurnDiffTracker {
|
|
+ pub fn new() -> Self {
|
|
+ Self::default()
|
|
+ }
|
|
+
|
|
+ /// Front-run apply patch calls to track the starting contents of any modified files.
|
|
+ /// - Creates a baseline snapshot for files that already exist on disk when first seen.
|
|
+ /// - For additions, we intentionally do not create a baseline snapshot so that diffs are proper additions.
|
|
+ /// - Also updates internal mappings for move/rename events.
|
|
+ pub fn on_patch_begin(&mut self, changes: &HashMap<PathBuf, FileChange>) -> Result<()> {
|
|
+ self.ensure_baseline_dir()?;
|
|
+ let baseline_dir = self.baseline_dir()?.to_path_buf();
|
|
+
|
|
+ for (path, change) in changes.iter() {
|
|
+ // Ensure a stable internal filename exists for this external path.
|
|
+ if !self.external_to_temp_name.contains_key(path) {
|
|
+ let internal = uuid_filename_for(path);
|
|
+ self.external_to_temp_name
|
|
+ .insert(path.clone(), internal.clone());
|
|
+ self.temp_name_to_baseline_external
|
|
+ .insert(internal.clone(), path.clone());
|
|
+ self.temp_name_to_current_external
|
|
+ .insert(internal.clone(), path.clone());
|
|
+
|
|
+ // If the file exists on disk now, snapshot as baseline; else leave missing to represent /dev/null.
|
|
+ if path.exists() {
|
|
+ let contents = fs::read(path)
|
|
+ .with_context(|| format!("failed to read original {}", path.display()))?;
|
|
+ let internal_path = baseline_dir.join(&internal);
|
|
+ fs::write(&internal_path, contents).with_context(|| {
|
|
+ format!("failed to write baseline file {}", internal_path.display())
|
|
+ })?;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ // Track rename/move in current mapping if provided in an Update.
|
|
+ let move_path = match change {
|
|
+ FileChange::Update {
|
|
+ move_path: Some(dest),
|
|
+ ..
|
|
+ } => Some(dest),
|
|
+ _ => None,
|
|
+ };
|
|
+ if let Some(dest) = move_path {
|
|
+ let uuid_filename = match self.external_to_temp_name.get(path) {
|
|
+ Some(i) => i.clone(),
|
|
+ None => {
|
|
+ // This should be rare, but if we haven't mapped the source, create it with no baseline.
|
|
+ let i = uuid_filename_for(path);
|
|
+ self.external_to_temp_name.insert(path.clone(), i.clone());
|
|
+ self.temp_name_to_baseline_external
|
|
+ .insert(i.clone(), path.clone());
|
|
+ i
|
|
+ }
|
|
+ };
|
|
+ // Update current external mapping for temp file name.
|
|
+ self.temp_name_to_current_external
|
|
+ .insert(uuid_filename.clone(), dest.clone());
|
|
+ // Update forward file_mapping: external current -> internal name.
|
|
+ self.external_to_temp_name.remove(path);
|
|
+ self.external_to_temp_name
|
|
+ .insert(dest.clone(), uuid_filename);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ Ok(())
|
|
+ }
|
|
+
|
|
+ /// Recompute the aggregated unified diff by comparing all baseline snapshots against
|
|
+ /// current files on disk using `git diff --no-index` and rewriting paths to external paths.
|
|
+ pub fn update_and_get_unified_diff(&mut self) -> Result<Option<String>> {
|
|
+ let baseline_dir = self.baseline_dir()?.to_path_buf();
|
|
+ let current_dir = baseline_dir.join("current");
|
|
+ if current_dir.exists() {
|
|
+ // Best-effort cleanup of previous run's mirror.
|
|
+ let _ = fs::remove_dir_all(¤t_dir);
|
|
+ }
|
|
+ fs::create_dir_all(¤t_dir).with_context(|| {
|
|
+ format!(
|
|
+ "failed to create current mirror dir {}",
|
|
+ current_dir.display()
|
|
+ )
|
|
+ })?;
|
|
+
|
|
+ let mut aggregated = String::new();
|
|
+
|
|
+ // Compute diffs per tracked internal file.
|
|
+ for (internal, baseline_external) in &self.temp_name_to_baseline_external {
|
|
+ let baseline_path = baseline_dir.join(internal);
|
|
+ let current_external = self
|
|
+ .temp_name_to_current_external
|
|
+ .get(internal)
|
|
+ .cloned()
|
|
+ .unwrap_or_else(|| baseline_external.clone());
|
|
+
|
|
+ let left_is_dev_null = !baseline_path.exists();
|
|
+ let right_exists = current_external.exists();
|
|
+
|
|
+ // Prepare right side mirror file if exists; otherwise use /dev/null for deletions.
|
|
+ let right_arg = if right_exists {
|
|
+ let mirror_path = current_dir.join(internal);
|
|
+ let contents = fs::read(¤t_external).with_context(|| {
|
|
+ format!(
|
|
+ "failed to read current file for diff {}",
|
|
+ current_external.display()
|
|
+ )
|
|
+ })?;
|
|
+ fs::write(&mirror_path, contents).with_context(|| {
|
|
+ format!(
|
|
+ "failed to write current mirror file {}",
|
|
+ mirror_path.display()
|
|
+ )
|
|
+ })?;
|
|
+ // Use relative path from baseline_dir (so headers say a/<uuid> b/current/<uuid>).
|
|
+ format!("current/{internal}")
|
|
+ } else {
|
|
+ // Deletion: right side is /dev/null to show proper deleted file diff.
|
|
+ "/dev/null".to_string()
|
|
+ };
|
|
+
|
|
+ // Prepare left arg: baseline file path or /dev/null for additions.
|
|
+ let left_arg = if left_is_dev_null {
|
|
+ "/dev/null".to_string()
|
|
+ } else {
|
|
+ internal.clone()
|
|
+ };
|
|
+
|
|
+ // Run git diff --no-index from baseline_dir to keep paths predictable.
|
|
+ let raw = run_git_allow_exit_codes(
|
|
+ &baseline_dir,
|
|
+ &[
|
|
+ "-c",
|
|
+ "color.ui=false",
|
|
+ "diff",
|
|
+ "--no-color",
|
|
+ "--no-index",
|
|
+ "--",
|
|
+ &left_arg,
|
|
+ &right_arg,
|
|
+ ],
|
|
+ &[0, 1], // 0: no changes, 1: differences
|
|
+ )?;
|
|
+
|
|
+ if raw.trim().is_empty() {
|
|
+ continue;
|
|
+ }
|
|
+ let rewritten = self.rewrite_diff_paths(&raw);
|
|
+ if !rewritten.trim().is_empty() {
|
|
+ if !aggregated.is_empty() && !aggregated.ends_with('\n') {
|
|
+ aggregated.push('\n');
|
|
+ }
|
|
+ aggregated.push_str(&rewritten);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ self.unified_diff = if aggregated.trim().is_empty() {
|
|
+ None
|
|
+ } else {
|
|
+ Some(aggregated)
|
|
+ };
|
|
+
|
|
+ // Clean up the current dir.
|
|
+ let _ = fs::remove_dir_all(¤t_dir);
|
|
+
|
|
+ Ok(self.unified_diff.clone())
|
|
+ }
|
|
+
|
|
+ fn baseline_dir(&self) -> Result<&Path> {
|
|
+ self.baseline_files_dir
|
|
+ .as_ref()
|
|
+ .map(|d| d.path())
|
|
+ .ok_or_else(|| anyhow::anyhow!("baseline temp dir not initialized"))
|
|
+ }
|
|
+
|
|
+ fn ensure_baseline_dir(&mut self) -> Result<()> {
|
|
+ if self.baseline_files_dir.is_some() {
|
|
+ return Ok(());
|
|
+ }
|
|
+ let tmp = TempDir::new().context("create baseline temp dir")?;
|
|
+ self.baseline_files_dir = Some(tmp);
|
|
+ Ok(())
|
|
+ }
|
|
+
|
|
+ /// Rewrites the internal filenames to external paths in diff headers.
|
|
+ /// Handles inputs like:
|
|
+ /// diff --git a/<uuid> b/current/<uuid>
|
|
+ /// --- a/<uuid> | /dev/null
|
|
+ /// +++ b/current/<uuid> | /dev/null
|
|
+ /// and replaces uuid with the external paths tracking baseline/current.
|
|
+ fn rewrite_diff_paths(&self, diff: &str) -> String {
|
|
+ let mut out = String::new();
|
|
+ for line in diff.lines() {
|
|
+ if let Some(rest) = line.strip_prefix("diff --git ") {
|
|
+ // Format: diff --git a/<f> b/<f>
|
|
+ let parts: Vec<&str> = rest.split_whitespace().collect();
|
|
+ if parts.len() == 2 {
|
|
+ let a = parts[0].strip_prefix("a/").unwrap_or(parts[0]);
|
|
+ let b = parts[1].strip_prefix("b/").unwrap_or(parts[1]);
|
|
+
|
|
+ let a_ext_display = if a == "/dev/null" {
|
|
+ "/dev/null".to_string()
|
|
+ } else {
|
|
+ let a_base = Path::new(a)
|
|
+ .file_name()
|
|
+ .and_then(|s| s.to_str())
|
|
+ .unwrap_or(a);
|
|
+ let mapped = self
|
|
+ .temp_name_to_baseline_external
|
|
+ .get(a_base)
|
|
+ .cloned()
|
|
+ .unwrap_or_else(|| PathBuf::from(a));
|
|
+ mapped.display().to_string()
|
|
+ };
|
|
+
|
|
+ let b_ext_display = if b == "/dev/null" {
|
|
+ "/dev/null".to_string()
|
|
+ } else {
|
|
+ let b_base = Path::new(b)
|
|
+ .file_name()
|
|
+ .and_then(|s| s.to_str())
|
|
+ .unwrap_or(b);
|
|
+ let mapped = self
|
|
+ .temp_name_to_current_external
|
|
+ .get(b_base)
|
|
+ .cloned()
|
|
+ .unwrap_or_else(|| PathBuf::from(b));
|
|
+ mapped.display().to_string()
|
|
+ };
|
|
+
|
|
+ out.push_str(&format!("diff --git a/{a_ext_display} b/{b_ext_display}\n"));
|
|
+ continue;
|
|
+ }
|
|
+ }
|
|
+ if let Some(rest) = line.strip_prefix("--- ") {
|
|
+ if let Some(path) = rest.strip_prefix("a/") {
|
|
+ let external_display = if path == "/dev/null" {
|
|
+ "/dev/null".to_string()
|
|
+ } else {
|
|
+ let p_base = Path::new(path)
|
|
+ .file_name()
|
|
+ .and_then(|s| s.to_str())
|
|
+ .unwrap_or(path);
|
|
+ self.temp_name_to_baseline_external
|
|
+ .get(p_base)
|
|
+ .cloned()
|
|
+ .unwrap_or_else(|| PathBuf::from(path))
|
|
+ .display()
|
|
+ .to_string()
|
|
+ };
|
|
+ out.push_str(&format!("--- {external_display}\n"));
|
|
+ continue;
|
|
+ }
|
|
+ }
|
|
+ if let Some(rest) = line.strip_prefix("+++ ") {
|
|
+ if let Some(path) = rest.strip_prefix("b/") {
|
|
+ let external_display = if path == "/dev/null" {
|
|
+ "/dev/null".to_string()
|
|
+ } else {
|
|
+ let p_base = Path::new(path)
|
|
+ .file_name()
|
|
+ .and_then(|s| s.to_str())
|
|
+ .unwrap_or(path);
|
|
+ self.temp_name_to_current_external
|
|
+ .get(p_base)
|
|
+ .cloned()
|
|
+ .unwrap_or_else(|| PathBuf::from(path))
|
|
+ .display()
|
|
+ .to_string()
|
|
+ };
|
|
+ out.push_str(&format!("+++ {external_display}\n"));
|
|
+ continue;
|
|
+ }
|
|
+ }
|
|
+ out.push_str(line);
|
|
+ out.push('\n');
|
|
+ }
|
|
+ out
|
|
+ }
|
|
+}
|
|
+
|
|
+fn uuid_filename_for(path: &Path) -> String {
|
|
+ let id = Uuid::new_v4().to_string();
|
|
+ match path.extension().and_then(|e| e.to_str()) {
|
|
+ Some(ext) if !ext.is_empty() => format!("{id}.{ext}"),
|
|
+ _ => id,
|
|
```
|
|
|
|
> ```suggestion
|
|
> None => id,
|
|
> ```
|
|
|
|
- Created: 2025-08-01 16:59:40 UTC | Link: https://github.com/openai/codex/pull/1770#discussion_r2248446717
|
|
|
|
```diff
|
|
@@ -0,0 +1,476 @@
|
|
+use std::collections::HashMap;
|
|
+use std::fs;
|
|
+use std::path::Path;
|
|
+use std::path::PathBuf;
|
|
+use std::process::Command;
|
|
+
|
|
+use anyhow::Context;
|
|
+use anyhow::Result;
|
|
+use tempfile::TempDir;
|
|
+use uuid::Uuid;
|
|
+
|
|
+use crate::protocol::FileChange;
|
|
+
|
|
+/// Tracks sets of changes to files and exposes the overall unified diff.
|
|
+/// Internally, the way this works is now:
|
|
+/// 1. Create a temp directory to store baseline snapshots of files when they are first seen.
|
|
+/// 2. When a path is first observed, copy its current contents into the baseline dir if it exists on disk.
|
|
+/// For new additions, do not create a baseline file so that diffs are shown as proper additions (using /dev/null).
|
|
+/// 3. Keep a stable internal filename (uuid + same extension) per external path for path rewrite in diffs.
|
|
+/// 4. To compute the aggregated unified diff, compare each baseline snapshot to the current file on disk using
|
|
+/// `git diff --no-index` and rewrite paths to external paths.
|
|
+#[derive(Default)]
|
|
+pub struct TurnDiffTracker {
|
|
+ /// Temp directory holding baseline snapshots of files as first seen.
|
|
+ baseline_files_dir: Option<TempDir>,
|
|
+ /// Map external path -> internal filename (uuid + same extension).
|
|
+ external_to_temp_name: HashMap<PathBuf, String>,
|
|
+ /// Internal filename -> external path as of baseline snapshot.
|
|
+ temp_name_to_baseline_external: HashMap<String, PathBuf>,
|
|
+ /// Internal filename -> external path as of current accumulated state (after applying all changes).
|
|
+ /// This is where renames are tracked.
|
|
+ temp_name_to_current_external: HashMap<String, PathBuf>,
|
|
+ /// Aggregated unified diff for all accumulated changes across files.
|
|
+ pub unified_diff: Option<String>,
|
|
+}
|
|
+
|
|
+impl TurnDiffTracker {
|
|
+ pub fn new() -> Self {
|
|
+ Self::default()
|
|
+ }
|
|
+
|
|
+ /// Front-run apply patch calls to track the starting contents of any modified files.
|
|
+ /// - Creates a baseline snapshot for files that already exist on disk when first seen.
|
|
+ /// - For additions, we intentionally do not create a baseline snapshot so that diffs are proper additions.
|
|
+ /// - Also updates internal mappings for move/rename events.
|
|
+ pub fn on_patch_begin(&mut self, changes: &HashMap<PathBuf, FileChange>) -> Result<()> {
|
|
+ self.ensure_baseline_dir()?;
|
|
+ let baseline_dir = self.baseline_dir()?.to_path_buf();
|
|
+
|
|
+ for (path, change) in changes.iter() {
|
|
+ // Ensure a stable internal filename exists for this external path.
|
|
+ if !self.external_to_temp_name.contains_key(path) {
|
|
+ let internal = uuid_filename_for(path);
|
|
+ self.external_to_temp_name
|
|
+ .insert(path.clone(), internal.clone());
|
|
+ self.temp_name_to_baseline_external
|
|
+ .insert(internal.clone(), path.clone());
|
|
+ self.temp_name_to_current_external
|
|
+ .insert(internal.clone(), path.clone());
|
|
+
|
|
+ // If the file exists on disk now, snapshot as baseline; else leave missing to represent /dev/null.
|
|
+ if path.exists() {
|
|
+ let contents = fs::read(path)
|
|
+ .with_context(|| format!("failed to read original {}", path.display()))?;
|
|
+ let internal_path = baseline_dir.join(&internal);
|
|
+ fs::write(&internal_path, contents).with_context(|| {
|
|
+ format!("failed to write baseline file {}", internal_path.display())
|
|
+ })?;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ // Track rename/move in current mapping if provided in an Update.
|
|
+ let move_path = match change {
|
|
+ FileChange::Update {
|
|
+ move_path: Some(dest),
|
|
+ ..
|
|
+ } => Some(dest),
|
|
+ _ => None,
|
|
+ };
|
|
+ if let Some(dest) = move_path {
|
|
+ let uuid_filename = match self.external_to_temp_name.get(path) {
|
|
+ Some(i) => i.clone(),
|
|
+ None => {
|
|
+ // This should be rare, but if we haven't mapped the source, create it with no baseline.
|
|
+ let i = uuid_filename_for(path);
|
|
+ self.external_to_temp_name.insert(path.clone(), i.clone());
|
|
+ self.temp_name_to_baseline_external
|
|
+ .insert(i.clone(), path.clone());
|
|
+ i
|
|
+ }
|
|
+ };
|
|
+ // Update current external mapping for temp file name.
|
|
+ self.temp_name_to_current_external
|
|
+ .insert(uuid_filename.clone(), dest.clone());
|
|
+ // Update forward file_mapping: external current -> internal name.
|
|
+ self.external_to_temp_name.remove(path);
|
|
+ self.external_to_temp_name
|
|
+ .insert(dest.clone(), uuid_filename);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ Ok(())
|
|
+ }
|
|
+
|
|
+ /// Recompute the aggregated unified diff by comparing all baseline snapshots against
|
|
+ /// current files on disk using `git diff --no-index` and rewriting paths to external paths.
|
|
+ pub fn update_and_get_unified_diff(&mut self) -> Result<Option<String>> {
|
|
+ let baseline_dir = self.baseline_dir()?.to_path_buf();
|
|
+ let current_dir = baseline_dir.join("current");
|
|
+ if current_dir.exists() {
|
|
+ // Best-effort cleanup of previous run's mirror.
|
|
+ let _ = fs::remove_dir_all(¤t_dir);
|
|
+ }
|
|
+ fs::create_dir_all(¤t_dir).with_context(|| {
|
|
+ format!(
|
|
+ "failed to create current mirror dir {}",
|
|
+ current_dir.display()
|
|
+ )
|
|
+ })?;
|
|
+
|
|
+ let mut aggregated = String::new();
|
|
+
|
|
+ // Compute diffs per tracked internal file.
|
|
+ for (internal, baseline_external) in &self.temp_name_to_baseline_external {
|
|
+ let baseline_path = baseline_dir.join(internal);
|
|
+ let current_external = self
|
|
+ .temp_name_to_current_external
|
|
+ .get(internal)
|
|
+ .cloned()
|
|
+ .unwrap_or_else(|| baseline_external.clone());
|
|
+
|
|
+ let left_is_dev_null = !baseline_path.exists();
|
|
+ let right_exists = current_external.exists();
|
|
+
|
|
+ // Prepare right side mirror file if exists; otherwise use /dev/null for deletions.
|
|
+ let right_arg = if right_exists {
|
|
+ let mirror_path = current_dir.join(internal);
|
|
+ let contents = fs::read(¤t_external).with_context(|| {
|
|
+ format!(
|
|
+ "failed to read current file for diff {}",
|
|
+ current_external.display()
|
|
+ )
|
|
+ })?;
|
|
+ fs::write(&mirror_path, contents).with_context(|| {
|
|
+ format!(
|
|
+ "failed to write current mirror file {}",
|
|
+ mirror_path.display()
|
|
+ )
|
|
+ })?;
|
|
+ // Use relative path from baseline_dir (so headers say a/<uuid> b/current/<uuid>).
|
|
+ format!("current/{internal}")
|
|
+ } else {
|
|
+ // Deletion: right side is /dev/null to show proper deleted file diff.
|
|
+ "/dev/null".to_string()
|
|
+ };
|
|
+
|
|
+ // Prepare left arg: baseline file path or /dev/null for additions.
|
|
+ let left_arg = if left_is_dev_null {
|
|
+ "/dev/null".to_string()
|
|
+ } else {
|
|
+ internal.clone()
|
|
+ };
|
|
+
|
|
+ // Run git diff --no-index from baseline_dir to keep paths predictable.
|
|
+ let raw = run_git_allow_exit_codes(
|
|
+ &baseline_dir,
|
|
+ &[
|
|
+ "-c",
|
|
+ "color.ui=false",
|
|
+ "diff",
|
|
+ "--no-color",
|
|
+ "--no-index",
|
|
+ "--",
|
|
+ &left_arg,
|
|
+ &right_arg,
|
|
+ ],
|
|
+ &[0, 1], // 0: no changes, 1: differences
|
|
+ )?;
|
|
+
|
|
+ if raw.trim().is_empty() {
|
|
+ continue;
|
|
+ }
|
|
+ let rewritten = self.rewrite_diff_paths(&raw);
|
|
+ if !rewritten.trim().is_empty() {
|
|
+ if !aggregated.is_empty() && !aggregated.ends_with('\n') {
|
|
+ aggregated.push('\n');
|
|
+ }
|
|
+ aggregated.push_str(&rewritten);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ self.unified_diff = if aggregated.trim().is_empty() {
|
|
+ None
|
|
+ } else {
|
|
+ Some(aggregated)
|
|
+ };
|
|
+
|
|
+ // Clean up the current dir.
|
|
+ let _ = fs::remove_dir_all(¤t_dir);
|
|
+
|
|
+ Ok(self.unified_diff.clone())
|
|
+ }
|
|
+
|
|
+ fn baseline_dir(&self) -> Result<&Path> {
|
|
+ self.baseline_files_dir
|
|
+ .as_ref()
|
|
+ .map(|d| d.path())
|
|
+ .ok_or_else(|| anyhow::anyhow!("baseline temp dir not initialized"))
|
|
+ }
|
|
+
|
|
+ fn ensure_baseline_dir(&mut self) -> Result<()> {
|
|
+ if self.baseline_files_dir.is_some() {
|
|
+ return Ok(());
|
|
+ }
|
|
+ let tmp = TempDir::new().context("create baseline temp dir")?;
|
|
+ self.baseline_files_dir = Some(tmp);
|
|
+ Ok(())
|
|
+ }
|
|
+
|
|
+ /// Rewrites the internal filenames to external paths in diff headers.
|
|
+ /// Handles inputs like:
|
|
+ /// diff --git a/<uuid> b/current/<uuid>
|
|
+ /// --- a/<uuid> | /dev/null
|
|
+ /// +++ b/current/<uuid> | /dev/null
|
|
+ /// and replaces uuid with the external paths tracking baseline/current.
|
|
+ fn rewrite_diff_paths(&self, diff: &str) -> String {
|
|
+ let mut out = String::new();
|
|
+ for line in diff.lines() {
|
|
+ if let Some(rest) = line.strip_prefix("diff --git ") {
|
|
+ // Format: diff --git a/<f> b/<f>
|
|
+ let parts: Vec<&str> = rest.split_whitespace().collect();
|
|
+ if parts.len() == 2 {
|
|
+ let a = parts[0].strip_prefix("a/").unwrap_or(parts[0]);
|
|
+ let b = parts[1].strip_prefix("b/").unwrap_or(parts[1]);
|
|
+
|
|
+ let a_ext_display = if a == "/dev/null" {
|
|
+ "/dev/null".to_string()
|
|
+ } else {
|
|
+ let a_base = Path::new(a)
|
|
+ .file_name()
|
|
+ .and_then(|s| s.to_str())
|
|
+ .unwrap_or(a);
|
|
+ let mapped = self
|
|
+ .temp_name_to_baseline_external
|
|
+ .get(a_base)
|
|
+ .cloned()
|
|
+ .unwrap_or_else(|| PathBuf::from(a));
|
|
+ mapped.display().to_string()
|
|
+ };
|
|
+
|
|
+ let b_ext_display = if b == "/dev/null" {
|
|
+ "/dev/null".to_string()
|
|
+ } else {
|
|
+ let b_base = Path::new(b)
|
|
+ .file_name()
|
|
+ .and_then(|s| s.to_str())
|
|
+ .unwrap_or(b);
|
|
+ let mapped = self
|
|
+ .temp_name_to_current_external
|
|
+ .get(b_base)
|
|
+ .cloned()
|
|
+ .unwrap_or_else(|| PathBuf::from(b));
|
|
+ mapped.display().to_string()
|
|
+ };
|
|
+
|
|
+ out.push_str(&format!("diff --git a/{a_ext_display} b/{b_ext_display}\n"));
|
|
+ continue;
|
|
+ }
|
|
+ }
|
|
+ if let Some(rest) = line.strip_prefix("--- ") {
|
|
+ if let Some(path) = rest.strip_prefix("a/") {
|
|
+ let external_display = if path == "/dev/null" {
|
|
+ "/dev/null".to_string()
|
|
+ } else {
|
|
+ let p_base = Path::new(path)
|
|
+ .file_name()
|
|
+ .and_then(|s| s.to_str())
|
|
+ .unwrap_or(path);
|
|
+ self.temp_name_to_baseline_external
|
|
+ .get(p_base)
|
|
+ .cloned()
|
|
+ .unwrap_or_else(|| PathBuf::from(path))
|
|
+ .display()
|
|
+ .to_string()
|
|
+ };
|
|
+ out.push_str(&format!("--- {external_display}\n"));
|
|
+ continue;
|
|
+ }
|
|
+ }
|
|
+ if let Some(rest) = line.strip_prefix("+++ ") {
|
|
+ if let Some(path) = rest.strip_prefix("b/") {
|
|
+ let external_display = if path == "/dev/null" {
|
|
+ "/dev/null".to_string()
|
|
+ } else {
|
|
+ let p_base = Path::new(path)
|
|
+ .file_name()
|
|
+ .and_then(|s| s.to_str())
|
|
+ .unwrap_or(path);
|
|
+ self.temp_name_to_current_external
|
|
+ .get(p_base)
|
|
+ .cloned()
|
|
+ .unwrap_or_else(|| PathBuf::from(path))
|
|
+ .display()
|
|
+ .to_string()
|
|
+ };
|
|
+ out.push_str(&format!("+++ {external_display}\n"));
|
|
+ continue;
|
|
+ }
|
|
+ }
|
|
+ out.push_str(line);
|
|
+ out.push('\n');
|
|
+ }
|
|
+ out
|
|
+ }
|
|
+}
|
|
+
|
|
+fn uuid_filename_for(path: &Path) -> String {
|
|
+ let id = Uuid::new_v4().to_string();
|
|
+ match path.extension().and_then(|e| e.to_str()) {
|
|
+ Some(ext) if !ext.is_empty() => format!("{id}.{ext}"),
|
|
+ _ => id,
|
|
+ }
|
|
+}
|
|
+
|
|
+fn run_git_allow_exit_codes(
|
|
+ repo: &Path,
|
|
+ args: &[&str],
|
|
+ allowed_exit_codes: &[i32],
|
|
+) -> Result<String> {
|
|
+ let output = Command::new("git")
|
|
+ .current_dir(repo)
|
|
+ .args(args)
|
|
+ .output()
|
|
+ .with_context(|| format!("failed to run git {:?} in {}", args, repo.display()))?;
|
|
```
|
|
|
|
> ```suggestion
|
|
> .with_context(|| format!("failed to run `git {args:?}` in {repo}"))?;
|
|
> ```
|
|
|
|
- Created: 2025-08-01 17:08:30 UTC | Link: https://github.com/openai/codex/pull/1770#discussion_r2248464099
|
|
|
|
```diff
|
|
@@ -0,0 +1,476 @@
|
|
+use std::collections::HashMap;
|
|
+use std::fs;
|
|
+use std::path::Path;
|
|
+use std::path::PathBuf;
|
|
+use std::process::Command;
|
|
+
|
|
+use anyhow::Context;
|
|
+use anyhow::Result;
|
|
+use tempfile::TempDir;
|
|
+use uuid::Uuid;
|
|
+
|
|
+use crate::protocol::FileChange;
|
|
+
|
|
+/// Tracks sets of changes to files and exposes the overall unified diff.
|
|
+/// Internally, the way this works is now:
|
|
+/// 1. Create a temp directory to store baseline snapshots of files when they are first seen.
|
|
+/// 2. When a path is first observed, copy its current contents into the baseline dir if it exists on disk.
|
|
+/// For new additions, do not create a baseline file so that diffs are shown as proper additions (using /dev/null).
|
|
+/// 3. Keep a stable internal filename (uuid + same extension) per external path for path rewrite in diffs.
|
|
+/// 4. To compute the aggregated unified diff, compare each baseline snapshot to the current file on disk using
|
|
+/// `git diff --no-index` and rewrite paths to external paths.
|
|
+#[derive(Default)]
|
|
+pub struct TurnDiffTracker {
|
|
+ /// Temp directory holding baseline snapshots of files as first seen.
|
|
+ baseline_files_dir: Option<TempDir>,
|
|
+ /// Map external path -> internal filename (uuid + same extension).
|
|
+ external_to_temp_name: HashMap<PathBuf, String>,
|
|
+ /// Internal filename -> external path as of baseline snapshot.
|
|
+ temp_name_to_baseline_external: HashMap<String, PathBuf>,
|
|
+ /// Internal filename -> external path as of current accumulated state (after applying all changes).
|
|
+ /// This is where renames are tracked.
|
|
+ temp_name_to_current_external: HashMap<String, PathBuf>,
|
|
+ /// Aggregated unified diff for all accumulated changes across files.
|
|
+ pub unified_diff: Option<String>,
|
|
+}
|
|
+
|
|
+impl TurnDiffTracker {
|
|
+ pub fn new() -> Self {
|
|
+ Self::default()
|
|
+ }
|
|
+
|
|
+ /// Front-run apply patch calls to track the starting contents of any modified files.
|
|
+ /// - Creates a baseline snapshot for files that already exist on disk when first seen.
|
|
+ /// - For additions, we intentionally do not create a baseline snapshot so that diffs are proper additions.
|
|
+ /// - Also updates internal mappings for move/rename events.
|
|
+ pub fn on_patch_begin(&mut self, changes: &HashMap<PathBuf, FileChange>) -> Result<()> {
|
|
+ self.ensure_baseline_dir()?;
|
|
+ let baseline_dir = self.baseline_dir()?.to_path_buf();
|
|
+
|
|
+ for (path, change) in changes.iter() {
|
|
+ // Ensure a stable internal filename exists for this external path.
|
|
+ if !self.external_to_temp_name.contains_key(path) {
|
|
+ let internal = uuid_filename_for(path);
|
|
+ self.external_to_temp_name
|
|
+ .insert(path.clone(), internal.clone());
|
|
+ self.temp_name_to_baseline_external
|
|
+ .insert(internal.clone(), path.clone());
|
|
+ self.temp_name_to_current_external
|
|
+ .insert(internal.clone(), path.clone());
|
|
+
|
|
+ // If the file exists on disk now, snapshot as baseline; else leave missing to represent /dev/null.
|
|
+ if path.exists() {
|
|
+ let contents = fs::read(path)
|
|
+ .with_context(|| format!("failed to read original {}", path.display()))?;
|
|
+ let internal_path = baseline_dir.join(&internal);
|
|
+ fs::write(&internal_path, contents).with_context(|| {
|
|
+ format!("failed to write baseline file {}", internal_path.display())
|
|
+ })?;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ // Track rename/move in current mapping if provided in an Update.
|
|
+ let move_path = match change {
|
|
+ FileChange::Update {
|
|
+ move_path: Some(dest),
|
|
+ ..
|
|
+ } => Some(dest),
|
|
+ _ => None,
|
|
+ };
|
|
+ if let Some(dest) = move_path {
|
|
+ let uuid_filename = match self.external_to_temp_name.get(path) {
|
|
+ Some(i) => i.clone(),
|
|
+ None => {
|
|
+ // This should be rare, but if we haven't mapped the source, create it with no baseline.
|
|
+ let i = uuid_filename_for(path);
|
|
+ self.external_to_temp_name.insert(path.clone(), i.clone());
|
|
+ self.temp_name_to_baseline_external
|
|
+ .insert(i.clone(), path.clone());
|
|
+ i
|
|
+ }
|
|
+ };
|
|
+ // Update current external mapping for temp file name.
|
|
+ self.temp_name_to_current_external
|
|
+ .insert(uuid_filename.clone(), dest.clone());
|
|
+ // Update forward file_mapping: external current -> internal name.
|
|
+ self.external_to_temp_name.remove(path);
|
|
+ self.external_to_temp_name
|
|
+ .insert(dest.clone(), uuid_filename);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ Ok(())
|
|
+ }
|
|
+
|
|
+ /// Recompute the aggregated unified diff by comparing all baseline snapshots against
|
|
+ /// current files on disk using `git diff --no-index` and rewriting paths to external paths.
|
|
+ pub fn update_and_get_unified_diff(&mut self) -> Result<Option<String>> {
|
|
+ let baseline_dir = self.baseline_dir()?.to_path_buf();
|
|
+ let current_dir = baseline_dir.join("current");
|
|
+ if current_dir.exists() {
|
|
+ // Best-effort cleanup of previous run's mirror.
|
|
+ let _ = fs::remove_dir_all(¤t_dir);
|
|
+ }
|
|
+ fs::create_dir_all(¤t_dir).with_context(|| {
|
|
+ format!(
|
|
+ "failed to create current mirror dir {}",
|
|
+ current_dir.display()
|
|
+ )
|
|
+ })?;
|
|
+
|
|
+ let mut aggregated = String::new();
|
|
+
|
|
+ // Compute diffs per tracked internal file.
|
|
+ for (internal, baseline_external) in &self.temp_name_to_baseline_external {
|
|
+ let baseline_path = baseline_dir.join(internal);
|
|
+ let current_external = self
|
|
+ .temp_name_to_current_external
|
|
+ .get(internal)
|
|
+ .cloned()
|
|
+ .unwrap_or_else(|| baseline_external.clone());
|
|
+
|
|
+ let left_is_dev_null = !baseline_path.exists();
|
|
+ let right_exists = current_external.exists();
|
|
+
|
|
+ // Prepare right side mirror file if exists; otherwise use /dev/null for deletions.
|
|
+ let right_arg = if right_exists {
|
|
+ let mirror_path = current_dir.join(internal);
|
|
+ let contents = fs::read(¤t_external).with_context(|| {
|
|
+ format!(
|
|
+ "failed to read current file for diff {}",
|
|
+ current_external.display()
|
|
+ )
|
|
+ })?;
|
|
+ fs::write(&mirror_path, contents).with_context(|| {
|
|
+ format!(
|
|
+ "failed to write current mirror file {}",
|
|
+ mirror_path.display()
|
|
+ )
|
|
+ })?;
|
|
+ // Use relative path from baseline_dir (so headers say a/<uuid> b/current/<uuid>).
|
|
+ format!("current/{internal}")
|
|
+ } else {
|
|
+ // Deletion: right side is /dev/null to show proper deleted file diff.
|
|
+ "/dev/null".to_string()
|
|
+ };
|
|
+
|
|
+ // Prepare left arg: baseline file path or /dev/null for additions.
|
|
+ let left_arg = if left_is_dev_null {
|
|
+ "/dev/null".to_string()
|
|
+ } else {
|
|
+ internal.clone()
|
|
+ };
|
|
+
|
|
+ // Run git diff --no-index from baseline_dir to keep paths predictable.
|
|
+ let raw = run_git_allow_exit_codes(
|
|
+ &baseline_dir,
|
|
+ &[
|
|
+ "-c",
|
|
+ "color.ui=false",
|
|
+ "diff",
|
|
+ "--no-color",
|
|
+ "--no-index",
|
|
+ "--",
|
|
+ &left_arg,
|
|
+ &right_arg,
|
|
+ ],
|
|
+ &[0, 1], // 0: no changes, 1: differences
|
|
+ )?;
|
|
+
|
|
+ if raw.trim().is_empty() {
|
|
+ continue;
|
|
+ }
|
|
+ let rewritten = self.rewrite_diff_paths(&raw);
|
|
+ if !rewritten.trim().is_empty() {
|
|
+ if !aggregated.is_empty() && !aggregated.ends_with('\n') {
|
|
+ aggregated.push('\n');
|
|
+ }
|
|
+ aggregated.push_str(&rewritten);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ self.unified_diff = if aggregated.trim().is_empty() {
|
|
+ None
|
|
+ } else {
|
|
+ Some(aggregated)
|
|
+ };
|
|
+
|
|
+ // Clean up the current dir.
|
|
+ let _ = fs::remove_dir_all(¤t_dir);
|
|
+
|
|
+ Ok(self.unified_diff.clone())
|
|
+ }
|
|
+
|
|
+ fn baseline_dir(&self) -> Result<&Path> {
|
|
+ self.baseline_files_dir
|
|
+ .as_ref()
|
|
+ .map(|d| d.path())
|
|
+ .ok_or_else(|| anyhow::anyhow!("baseline temp dir not initialized"))
|
|
+ }
|
|
+
|
|
+ fn ensure_baseline_dir(&mut self) -> Result<()> {
|
|
+ if self.baseline_files_dir.is_some() {
|
|
+ return Ok(());
|
|
+ }
|
|
+ let tmp = TempDir::new().context("create baseline temp dir")?;
|
|
+ self.baseline_files_dir = Some(tmp);
|
|
+ Ok(())
|
|
+ }
|
|
+
|
|
+ /// Rewrites the internal filenames to external paths in diff headers.
|
|
+ /// Handles inputs like:
|
|
+ /// diff --git a/<uuid> b/current/<uuid>
|
|
+ /// --- a/<uuid> | /dev/null
|
|
+ /// +++ b/current/<uuid> | /dev/null
|
|
+ /// and replaces uuid with the external paths tracking baseline/current.
|
|
+ fn rewrite_diff_paths(&self, diff: &str) -> String {
|
|
+ let mut out = String::new();
|
|
+ for line in diff.lines() {
|
|
+ if let Some(rest) = line.strip_prefix("diff --git ") {
|
|
```
|
|
|
|
> This gets indented pretty far, so maybe it's worth moving to a helper function that takes `(&mut String, temp_name_to_current_external, temp_name_to_baseline_external)`
|
|
|
|
- Created: 2025-08-01 17:09:15 UTC | Link: https://github.com/openai/codex/pull/1770#discussion_r2248466128
|
|
|
|
```diff
|
|
@@ -0,0 +1,476 @@
|
|
+use std::collections::HashMap;
|
|
+use std::fs;
|
|
+use std::path::Path;
|
|
+use std::path::PathBuf;
|
|
+use std::process::Command;
|
|
+
|
|
+use anyhow::Context;
|
|
+use anyhow::Result;
|
|
+use tempfile::TempDir;
|
|
+use uuid::Uuid;
|
|
+
|
|
+use crate::protocol::FileChange;
|
|
+
|
|
+/// Tracks sets of changes to files and exposes the overall unified diff.
|
|
+/// Internally, the way this works is now:
|
|
+/// 1. Create a temp directory to store baseline snapshots of files when they are first seen.
|
|
+/// 2. When a path is first observed, copy its current contents into the baseline dir if it exists on disk.
|
|
+/// For new additions, do not create a baseline file so that diffs are shown as proper additions (using /dev/null).
|
|
+/// 3. Keep a stable internal filename (uuid + same extension) per external path for path rewrite in diffs.
|
|
+/// 4. To compute the aggregated unified diff, compare each baseline snapshot to the current file on disk using
|
|
+/// `git diff --no-index` and rewrite paths to external paths.
|
|
+#[derive(Default)]
|
|
+pub struct TurnDiffTracker {
|
|
+ /// Temp directory holding baseline snapshots of files as first seen.
|
|
+ baseline_files_dir: Option<TempDir>,
|
|
+ /// Map external path -> internal filename (uuid + same extension).
|
|
+ external_to_temp_name: HashMap<PathBuf, String>,
|
|
+ /// Internal filename -> external path as of baseline snapshot.
|
|
+ temp_name_to_baseline_external: HashMap<String, PathBuf>,
|
|
+ /// Internal filename -> external path as of current accumulated state (after applying all changes).
|
|
+ /// This is where renames are tracked.
|
|
+ temp_name_to_current_external: HashMap<String, PathBuf>,
|
|
+ /// Aggregated unified diff for all accumulated changes across files.
|
|
+ pub unified_diff: Option<String>,
|
|
+}
|
|
+
|
|
+impl TurnDiffTracker {
|
|
+ pub fn new() -> Self {
|
|
+ Self::default()
|
|
+ }
|
|
+
|
|
+ /// Front-run apply patch calls to track the starting contents of any modified files.
|
|
+ /// - Creates a baseline snapshot for files that already exist on disk when first seen.
|
|
+ /// - For additions, we intentionally do not create a baseline snapshot so that diffs are proper additions.
|
|
+ /// - Also updates internal mappings for move/rename events.
|
|
+ pub fn on_patch_begin(&mut self, changes: &HashMap<PathBuf, FileChange>) -> Result<()> {
|
|
+ self.ensure_baseline_dir()?;
|
|
+ let baseline_dir = self.baseline_dir()?.to_path_buf();
|
|
+
|
|
+ for (path, change) in changes.iter() {
|
|
+ // Ensure a stable internal filename exists for this external path.
|
|
+ if !self.external_to_temp_name.contains_key(path) {
|
|
+ let internal = uuid_filename_for(path);
|
|
+ self.external_to_temp_name
|
|
+ .insert(path.clone(), internal.clone());
|
|
+ self.temp_name_to_baseline_external
|
|
+ .insert(internal.clone(), path.clone());
|
|
+ self.temp_name_to_current_external
|
|
+ .insert(internal.clone(), path.clone());
|
|
+
|
|
+ // If the file exists on disk now, snapshot as baseline; else leave missing to represent /dev/null.
|
|
+ if path.exists() {
|
|
+ let contents = fs::read(path)
|
|
+ .with_context(|| format!("failed to read original {}", path.display()))?;
|
|
+ let internal_path = baseline_dir.join(&internal);
|
|
+ fs::write(&internal_path, contents).with_context(|| {
|
|
+ format!("failed to write baseline file {}", internal_path.display())
|
|
+ })?;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ // Track rename/move in current mapping if provided in an Update.
|
|
+ let move_path = match change {
|
|
+ FileChange::Update {
|
|
+ move_path: Some(dest),
|
|
+ ..
|
|
+ } => Some(dest),
|
|
+ _ => None,
|
|
+ };
|
|
+ if let Some(dest) = move_path {
|
|
+ let uuid_filename = match self.external_to_temp_name.get(path) {
|
|
+ Some(i) => i.clone(),
|
|
+ None => {
|
|
+ // This should be rare, but if we haven't mapped the source, create it with no baseline.
|
|
+ let i = uuid_filename_for(path);
|
|
+ self.external_to_temp_name.insert(path.clone(), i.clone());
|
|
+ self.temp_name_to_baseline_external
|
|
+ .insert(i.clone(), path.clone());
|
|
+ i
|
|
+ }
|
|
+ };
|
|
+ // Update current external mapping for temp file name.
|
|
+ self.temp_name_to_current_external
|
|
+ .insert(uuid_filename.clone(), dest.clone());
|
|
+ // Update forward file_mapping: external current -> internal name.
|
|
+ self.external_to_temp_name.remove(path);
|
|
+ self.external_to_temp_name
|
|
+ .insert(dest.clone(), uuid_filename);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ Ok(())
|
|
+ }
|
|
+
|
|
+ /// Recompute the aggregated unified diff by comparing all baseline snapshots against
|
|
+ /// current files on disk using `git diff --no-index` and rewriting paths to external paths.
|
|
+ pub fn update_and_get_unified_diff(&mut self) -> Result<Option<String>> {
|
|
+ let baseline_dir = self.baseline_dir()?.to_path_buf();
|
|
+ let current_dir = baseline_dir.join("current");
|
|
+ if current_dir.exists() {
|
|
+ // Best-effort cleanup of previous run's mirror.
|
|
+ let _ = fs::remove_dir_all(¤t_dir);
|
|
+ }
|
|
+ fs::create_dir_all(¤t_dir).with_context(|| {
|
|
+ format!(
|
|
+ "failed to create current mirror dir {}",
|
|
+ current_dir.display()
|
|
+ )
|
|
+ })?;
|
|
+
|
|
+ let mut aggregated = String::new();
|
|
+
|
|
+ // Compute diffs per tracked internal file.
|
|
+ for (internal, baseline_external) in &self.temp_name_to_baseline_external {
|
|
+ let baseline_path = baseline_dir.join(internal);
|
|
+ let current_external = self
|
|
+ .temp_name_to_current_external
|
|
+ .get(internal)
|
|
+ .cloned()
|
|
+ .unwrap_or_else(|| baseline_external.clone());
|
|
+
|
|
+ let left_is_dev_null = !baseline_path.exists();
|
|
+ let right_exists = current_external.exists();
|
|
+
|
|
+ // Prepare right side mirror file if exists; otherwise use /dev/null for deletions.
|
|
+ let right_arg = if right_exists {
|
|
+ let mirror_path = current_dir.join(internal);
|
|
+ let contents = fs::read(¤t_external).with_context(|| {
|
|
+ format!(
|
|
+ "failed to read current file for diff {}",
|
|
+ current_external.display()
|
|
+ )
|
|
+ })?;
|
|
+ fs::write(&mirror_path, contents).with_context(|| {
|
|
+ format!(
|
|
+ "failed to write current mirror file {}",
|
|
+ mirror_path.display()
|
|
+ )
|
|
+ })?;
|
|
+ // Use relative path from baseline_dir (so headers say a/<uuid> b/current/<uuid>).
|
|
+ format!("current/{internal}")
|
|
+ } else {
|
|
+ // Deletion: right side is /dev/null to show proper deleted file diff.
|
|
+ "/dev/null".to_string()
|
|
+ };
|
|
+
|
|
+ // Prepare left arg: baseline file path or /dev/null for additions.
|
|
+ let left_arg = if left_is_dev_null {
|
|
+ "/dev/null".to_string()
|
|
+ } else {
|
|
+ internal.clone()
|
|
+ };
|
|
+
|
|
+ // Run git diff --no-index from baseline_dir to keep paths predictable.
|
|
+ let raw = run_git_allow_exit_codes(
|
|
+ &baseline_dir,
|
|
+ &[
|
|
+ "-c",
|
|
+ "color.ui=false",
|
|
+ "diff",
|
|
+ "--no-color",
|
|
+ "--no-index",
|
|
+ "--",
|
|
+ &left_arg,
|
|
+ &right_arg,
|
|
+ ],
|
|
+ &[0, 1], // 0: no changes, 1: differences
|
|
+ )?;
|
|
+
|
|
+ if raw.trim().is_empty() {
|
|
+ continue;
|
|
+ }
|
|
+ let rewritten = self.rewrite_diff_paths(&raw);
|
|
+ if !rewritten.trim().is_empty() {
|
|
+ if !aggregated.is_empty() && !aggregated.ends_with('\n') {
|
|
+ aggregated.push('\n');
|
|
+ }
|
|
+ aggregated.push_str(&rewritten);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ self.unified_diff = if aggregated.trim().is_empty() {
|
|
+ None
|
|
+ } else {
|
|
+ Some(aggregated)
|
|
+ };
|
|
+
|
|
+ // Clean up the current dir.
|
|
+ let _ = fs::remove_dir_all(¤t_dir);
|
|
+
|
|
+ Ok(self.unified_diff.clone())
|
|
+ }
|
|
+
|
|
+ fn baseline_dir(&self) -> Result<&Path> {
|
|
+ self.baseline_files_dir
|
|
+ .as_ref()
|
|
+ .map(|d| d.path())
|
|
+ .ok_or_else(|| anyhow::anyhow!("baseline temp dir not initialized"))
|
|
+ }
|
|
+
|
|
+ fn ensure_baseline_dir(&mut self) -> Result<()> {
|
|
+ if self.baseline_files_dir.is_some() {
|
|
+ return Ok(());
|
|
+ }
|
|
+ let tmp = TempDir::new().context("create baseline temp dir")?;
|
|
+ self.baseline_files_dir = Some(tmp);
|
|
+ Ok(())
|
|
+ }
|
|
+
|
|
+ /// Rewrites the internal filenames to external paths in diff headers.
|
|
+ /// Handles inputs like:
|
|
+ /// diff --git a/<uuid> b/current/<uuid>
|
|
+ /// --- a/<uuid> | /dev/null
|
|
+ /// +++ b/current/<uuid> | /dev/null
|
|
+ /// and replaces uuid with the external paths tracking baseline/current.
|
|
+ fn rewrite_diff_paths(&self, diff: &str) -> String {
|
|
+ let mut out = String::new();
|
|
+ for line in diff.lines() {
|
|
+ if let Some(rest) = line.strip_prefix("diff --git ") {
|
|
+ // Format: diff --git a/<f> b/<f>
|
|
+ let parts: Vec<&str> = rest.split_whitespace().collect();
|
|
+ if parts.len() == 2 {
|
|
```
|
|
|
|
> We don't have to worry about paths with spaces because they're all UUIDs?
|
|
|
|
- Created: 2025-08-01 17:10:28 UTC | Link: https://github.com/openai/codex/pull/1770#discussion_r2248468317
|
|
|
|
```diff
|
|
@@ -0,0 +1,476 @@
|
|
+use std::collections::HashMap;
|
|
+use std::fs;
|
|
+use std::path::Path;
|
|
+use std::path::PathBuf;
|
|
+use std::process::Command;
|
|
+
|
|
+use anyhow::Context;
|
|
+use anyhow::Result;
|
|
+use tempfile::TempDir;
|
|
+use uuid::Uuid;
|
|
+
|
|
+use crate::protocol::FileChange;
|
|
+
|
|
+/// Tracks sets of changes to files and exposes the overall unified diff.
|
|
+/// Internally, the way this works is now:
|
|
+/// 1. Create a temp directory to store baseline snapshots of files when they are first seen.
|
|
+/// 2. When a path is first observed, copy its current contents into the baseline dir if it exists on disk.
|
|
+/// For new additions, do not create a baseline file so that diffs are shown as proper additions (using /dev/null).
|
|
+/// 3. Keep a stable internal filename (uuid + same extension) per external path for path rewrite in diffs.
|
|
+/// 4. To compute the aggregated unified diff, compare each baseline snapshot to the current file on disk using
|
|
+/// `git diff --no-index` and rewrite paths to external paths.
|
|
+#[derive(Default)]
|
|
+pub struct TurnDiffTracker {
|
|
+ /// Temp directory holding baseline snapshots of files as first seen.
|
|
+ baseline_files_dir: Option<TempDir>,
|
|
+ /// Map external path -> internal filename (uuid + same extension).
|
|
+ external_to_temp_name: HashMap<PathBuf, String>,
|
|
+ /// Internal filename -> external path as of baseline snapshot.
|
|
+ temp_name_to_baseline_external: HashMap<String, PathBuf>,
|
|
+ /// Internal filename -> external path as of current accumulated state (after applying all changes).
|
|
+ /// This is where renames are tracked.
|
|
+ temp_name_to_current_external: HashMap<String, PathBuf>,
|
|
+ /// Aggregated unified diff for all accumulated changes across files.
|
|
+ pub unified_diff: Option<String>,
|
|
+}
|
|
+
|
|
+impl TurnDiffTracker {
|
|
+ pub fn new() -> Self {
|
|
+ Self::default()
|
|
+ }
|
|
+
|
|
+ /// Front-run apply patch calls to track the starting contents of any modified files.
|
|
+ /// - Creates a baseline snapshot for files that already exist on disk when first seen.
|
|
+ /// - For additions, we intentionally do not create a baseline snapshot so that diffs are proper additions.
|
|
+ /// - Also updates internal mappings for move/rename events.
|
|
+ pub fn on_patch_begin(&mut self, changes: &HashMap<PathBuf, FileChange>) -> Result<()> {
|
|
+ self.ensure_baseline_dir()?;
|
|
+ let baseline_dir = self.baseline_dir()?.to_path_buf();
|
|
+
|
|
+ for (path, change) in changes.iter() {
|
|
+ // Ensure a stable internal filename exists for this external path.
|
|
+ if !self.external_to_temp_name.contains_key(path) {
|
|
+ let internal = uuid_filename_for(path);
|
|
+ self.external_to_temp_name
|
|
+ .insert(path.clone(), internal.clone());
|
|
+ self.temp_name_to_baseline_external
|
|
+ .insert(internal.clone(), path.clone());
|
|
+ self.temp_name_to_current_external
|
|
+ .insert(internal.clone(), path.clone());
|
|
+
|
|
+ // If the file exists on disk now, snapshot as baseline; else leave missing to represent /dev/null.
|
|
+ if path.exists() {
|
|
+ let contents = fs::read(path)
|
|
+ .with_context(|| format!("failed to read original {}", path.display()))?;
|
|
+ let internal_path = baseline_dir.join(&internal);
|
|
+ fs::write(&internal_path, contents).with_context(|| {
|
|
+ format!("failed to write baseline file {}", internal_path.display())
|
|
+ })?;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ // Track rename/move in current mapping if provided in an Update.
|
|
+ let move_path = match change {
|
|
+ FileChange::Update {
|
|
+ move_path: Some(dest),
|
|
+ ..
|
|
+ } => Some(dest),
|
|
+ _ => None,
|
|
+ };
|
|
+ if let Some(dest) = move_path {
|
|
+ let uuid_filename = match self.external_to_temp_name.get(path) {
|
|
+ Some(i) => i.clone(),
|
|
+ None => {
|
|
+ // This should be rare, but if we haven't mapped the source, create it with no baseline.
|
|
+ let i = uuid_filename_for(path);
|
|
+ self.external_to_temp_name.insert(path.clone(), i.clone());
|
|
+ self.temp_name_to_baseline_external
|
|
+ .insert(i.clone(), path.clone());
|
|
+ i
|
|
+ }
|
|
+ };
|
|
+ // Update current external mapping for temp file name.
|
|
+ self.temp_name_to_current_external
|
|
+ .insert(uuid_filename.clone(), dest.clone());
|
|
+ // Update forward file_mapping: external current -> internal name.
|
|
+ self.external_to_temp_name.remove(path);
|
|
+ self.external_to_temp_name
|
|
+ .insert(dest.clone(), uuid_filename);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ Ok(())
|
|
+ }
|
|
+
|
|
+ /// Recompute the aggregated unified diff by comparing all baseline snapshots against
|
|
+ /// current files on disk using `git diff --no-index` and rewriting paths to external paths.
|
|
+ pub fn update_and_get_unified_diff(&mut self) -> Result<Option<String>> {
|
|
+ let baseline_dir = self.baseline_dir()?.to_path_buf();
|
|
+ let current_dir = baseline_dir.join("current");
|
|
+ if current_dir.exists() {
|
|
+ // Best-effort cleanup of previous run's mirror.
|
|
+ let _ = fs::remove_dir_all(¤t_dir);
|
|
+ }
|
|
+ fs::create_dir_all(¤t_dir).with_context(|| {
|
|
+ format!(
|
|
+ "failed to create current mirror dir {}",
|
|
+ current_dir.display()
|
|
+ )
|
|
+ })?;
|
|
+
|
|
+ let mut aggregated = String::new();
|
|
+
|
|
+ // Compute diffs per tracked internal file.
|
|
+ for (internal, baseline_external) in &self.temp_name_to_baseline_external {
|
|
+ let baseline_path = baseline_dir.join(internal);
|
|
+ let current_external = self
|
|
+ .temp_name_to_current_external
|
|
+ .get(internal)
|
|
+ .cloned()
|
|
+ .unwrap_or_else(|| baseline_external.clone());
|
|
+
|
|
+ let left_is_dev_null = !baseline_path.exists();
|
|
+ let right_exists = current_external.exists();
|
|
+
|
|
+ // Prepare right side mirror file if exists; otherwise use /dev/null for deletions.
|
|
+ let right_arg = if right_exists {
|
|
+ let mirror_path = current_dir.join(internal);
|
|
+ let contents = fs::read(¤t_external).with_context(|| {
|
|
+ format!(
|
|
+ "failed to read current file for diff {}",
|
|
+ current_external.display()
|
|
+ )
|
|
+ })?;
|
|
+ fs::write(&mirror_path, contents).with_context(|| {
|
|
+ format!(
|
|
+ "failed to write current mirror file {}",
|
|
+ mirror_path.display()
|
|
+ )
|
|
+ })?;
|
|
+ // Use relative path from baseline_dir (so headers say a/<uuid> b/current/<uuid>).
|
|
+ format!("current/{internal}")
|
|
+ } else {
|
|
+ // Deletion: right side is /dev/null to show proper deleted file diff.
|
|
+ "/dev/null".to_string()
|
|
+ };
|
|
+
|
|
+ // Prepare left arg: baseline file path or /dev/null for additions.
|
|
+ let left_arg = if left_is_dev_null {
|
|
+ "/dev/null".to_string()
|
|
+ } else {
|
|
+ internal.clone()
|
|
+ };
|
|
+
|
|
+ // Run git diff --no-index from baseline_dir to keep paths predictable.
|
|
+ let raw = run_git_allow_exit_codes(
|
|
+ &baseline_dir,
|
|
+ &[
|
|
+ "-c",
|
|
+ "color.ui=false",
|
|
+ "diff",
|
|
+ "--no-color",
|
|
+ "--no-index",
|
|
+ "--",
|
|
+ &left_arg,
|
|
+ &right_arg,
|
|
+ ],
|
|
+ &[0, 1], // 0: no changes, 1: differences
|
|
+ )?;
|
|
+
|
|
+ if raw.trim().is_empty() {
|
|
+ continue;
|
|
+ }
|
|
+ let rewritten = self.rewrite_diff_paths(&raw);
|
|
+ if !rewritten.trim().is_empty() {
|
|
+ if !aggregated.is_empty() && !aggregated.ends_with('\n') {
|
|
+ aggregated.push('\n');
|
|
+ }
|
|
+ aggregated.push_str(&rewritten);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ self.unified_diff = if aggregated.trim().is_empty() {
|
|
+ None
|
|
+ } else {
|
|
+ Some(aggregated)
|
|
+ };
|
|
+
|
|
+ // Clean up the current dir.
|
|
+ let _ = fs::remove_dir_all(¤t_dir);
|
|
+
|
|
+ Ok(self.unified_diff.clone())
|
|
+ }
|
|
+
|
|
+ fn baseline_dir(&self) -> Result<&Path> {
|
|
+ self.baseline_files_dir
|
|
+ .as_ref()
|
|
+ .map(|d| d.path())
|
|
+ .ok_or_else(|| anyhow::anyhow!("baseline temp dir not initialized"))
|
|
+ }
|
|
+
|
|
+ fn ensure_baseline_dir(&mut self) -> Result<()> {
|
|
+ if self.baseline_files_dir.is_some() {
|
|
+ return Ok(());
|
|
+ }
|
|
+ let tmp = TempDir::new().context("create baseline temp dir")?;
|
|
+ self.baseline_files_dir = Some(tmp);
|
|
+ Ok(())
|
|
+ }
|
|
+
|
|
+ /// Rewrites the internal filenames to external paths in diff headers.
|
|
+ /// Handles inputs like:
|
|
+ /// diff --git a/<uuid> b/current/<uuid>
|
|
+ /// --- a/<uuid> | /dev/null
|
|
+ /// +++ b/current/<uuid> | /dev/null
|
|
+ /// and replaces uuid with the external paths tracking baseline/current.
|
|
+ fn rewrite_diff_paths(&self, diff: &str) -> String {
|
|
+ let mut out = String::new();
|
|
+ for line in diff.lines() {
|
|
+ if let Some(rest) = line.strip_prefix("diff --git ") {
|
|
+ // Format: diff --git a/<f> b/<f>
|
|
+ let parts: Vec<&str> = rest.split_whitespace().collect();
|
|
+ if parts.len() == 2 {
|
|
+ let a = parts[0].strip_prefix("a/").unwrap_or(parts[0]);
|
|
+ let b = parts[1].strip_prefix("b/").unwrap_or(parts[1]);
|
|
+
|
|
+ let a_ext_display = if a == "/dev/null" {
|
|
+ "/dev/null".to_string()
|
|
+ } else {
|
|
+ let a_base = Path::new(a)
|
|
+ .file_name()
|
|
+ .and_then(|s| s.to_str())
|
|
+ .unwrap_or(a);
|
|
+ let mapped = self
|
|
+ .temp_name_to_baseline_external
|
|
+ .get(a_base)
|
|
+ .cloned()
|
|
+ .unwrap_or_else(|| PathBuf::from(a));
|
|
+ mapped.display().to_string()
|
|
+ };
|
|
+
|
|
+ let b_ext_display = if b == "/dev/null" {
|
|
+ "/dev/null".to_string()
|
|
+ } else {
|
|
+ let b_base = Path::new(b)
|
|
+ .file_name()
|
|
+ .and_then(|s| s.to_str())
|
|
+ .unwrap_or(b);
|
|
+ let mapped = self
|
|
+ .temp_name_to_current_external
|
|
+ .get(b_base)
|
|
+ .cloned()
|
|
+ .unwrap_or_else(|| PathBuf::from(b));
|
|
+ mapped.display().to_string()
|
|
+ };
|
|
+
|
|
+ out.push_str(&format!("diff --git a/{a_ext_display} b/{b_ext_display}\n"));
|
|
+ continue;
|
|
+ }
|
|
+ }
|
|
+ if let Some(rest) = line.strip_prefix("--- ") {
|
|
+ if let Some(path) = rest.strip_prefix("a/") {
|
|
+ let external_display = if path == "/dev/null" {
|
|
+ "/dev/null".to_string()
|
|
+ } else {
|
|
+ let p_base = Path::new(path)
|
|
+ .file_name()
|
|
+ .and_then(|s| s.to_str())
|
|
+ .unwrap_or(path);
|
|
+ self.temp_name_to_baseline_external
|
|
+ .get(p_base)
|
|
+ .cloned()
|
|
+ .unwrap_or_else(|| PathBuf::from(path))
|
|
+ .display()
|
|
+ .to_string()
|
|
+ };
|
|
+ out.push_str(&format!("--- {external_display}\n"));
|
|
+ continue;
|
|
+ }
|
|
+ }
|
|
+ if let Some(rest) = line.strip_prefix("+++ ") {
|
|
+ if let Some(path) = rest.strip_prefix("b/") {
|
|
+ let external_display = if path == "/dev/null" {
|
|
+ "/dev/null".to_string()
|
|
+ } else {
|
|
+ let p_base = Path::new(path)
|
|
+ .file_name()
|
|
+ .and_then(|s| s.to_str())
|
|
+ .unwrap_or(path);
|
|
+ self.temp_name_to_current_external
|
|
+ .get(p_base)
|
|
+ .cloned()
|
|
+ .unwrap_or_else(|| PathBuf::from(path))
|
|
+ .display()
|
|
+ .to_string()
|
|
+ };
|
|
+ out.push_str(&format!("+++ {external_display}\n"));
|
|
+ continue;
|
|
+ }
|
|
+ }
|
|
+ out.push_str(line);
|
|
+ out.push('\n');
|
|
+ }
|
|
+ out
|
|
+ }
|
|
+}
|
|
+
|
|
+fn uuid_filename_for(path: &Path) -> String {
|
|
+ let id = Uuid::new_v4().to_string();
|
|
+ match path.extension().and_then(|e| e.to_str()) {
|
|
```
|
|
|
|
> Why preserve the `ext`, btw? It could, in theory, contain a space, right?
|
|
|
|
- Created: 2025-08-01 17:10:59 UTC | Link: https://github.com/openai/codex/pull/1770#discussion_r2248469411
|
|
|
|
```diff
|
|
@@ -0,0 +1,476 @@
|
|
+use std::collections::HashMap;
|
|
+use std::fs;
|
|
+use std::path::Path;
|
|
+use std::path::PathBuf;
|
|
+use std::process::Command;
|
|
+
|
|
+use anyhow::Context;
|
|
+use anyhow::Result;
|
|
+use tempfile::TempDir;
|
|
+use uuid::Uuid;
|
|
+
|
|
+use crate::protocol::FileChange;
|
|
+
|
|
+/// Tracks sets of changes to files and exposes the overall unified diff.
|
|
+/// Internally, the way this works is now:
|
|
+/// 1. Create a temp directory to store baseline snapshots of files when they are first seen.
|
|
+/// 2. When a path is first observed, copy its current contents into the baseline dir if it exists on disk.
|
|
+/// For new additions, do not create a baseline file so that diffs are shown as proper additions (using /dev/null).
|
|
+/// 3. Keep a stable internal filename (uuid + same extension) per external path for path rewrite in diffs.
|
|
+/// 4. To compute the aggregated unified diff, compare each baseline snapshot to the current file on disk using
|
|
+/// `git diff --no-index` and rewrite paths to external paths.
|
|
+#[derive(Default)]
|
|
+pub struct TurnDiffTracker {
|
|
+ /// Temp directory holding baseline snapshots of files as first seen.
|
|
+ baseline_files_dir: Option<TempDir>,
|
|
+ /// Map external path -> internal filename (uuid + same extension).
|
|
+ external_to_temp_name: HashMap<PathBuf, String>,
|
|
+ /// Internal filename -> external path as of baseline snapshot.
|
|
+ temp_name_to_baseline_external: HashMap<String, PathBuf>,
|
|
+ /// Internal filename -> external path as of current accumulated state (after applying all changes).
|
|
+ /// This is where renames are tracked.
|
|
+ temp_name_to_current_external: HashMap<String, PathBuf>,
|
|
+ /// Aggregated unified diff for all accumulated changes across files.
|
|
+ pub unified_diff: Option<String>,
|
|
+}
|
|
+
|
|
+impl TurnDiffTracker {
|
|
+ pub fn new() -> Self {
|
|
+ Self::default()
|
|
+ }
|
|
+
|
|
+ /// Front-run apply patch calls to track the starting contents of any modified files.
|
|
+ /// - Creates a baseline snapshot for files that already exist on disk when first seen.
|
|
+ /// - For additions, we intentionally do not create a baseline snapshot so that diffs are proper additions.
|
|
+ /// - Also updates internal mappings for move/rename events.
|
|
+ pub fn on_patch_begin(&mut self, changes: &HashMap<PathBuf, FileChange>) -> Result<()> {
|
|
+ self.ensure_baseline_dir()?;
|
|
+ let baseline_dir = self.baseline_dir()?.to_path_buf();
|
|
+
|
|
+ for (path, change) in changes.iter() {
|
|
+ // Ensure a stable internal filename exists for this external path.
|
|
+ if !self.external_to_temp_name.contains_key(path) {
|
|
+ let internal = uuid_filename_for(path);
|
|
+ self.external_to_temp_name
|
|
+ .insert(path.clone(), internal.clone());
|
|
+ self.temp_name_to_baseline_external
|
|
+ .insert(internal.clone(), path.clone());
|
|
+ self.temp_name_to_current_external
|
|
+ .insert(internal.clone(), path.clone());
|
|
+
|
|
+ // If the file exists on disk now, snapshot as baseline; else leave missing to represent /dev/null.
|
|
+ if path.exists() {
|
|
+ let contents = fs::read(path)
|
|
+ .with_context(|| format!("failed to read original {}", path.display()))?;
|
|
+ let internal_path = baseline_dir.join(&internal);
|
|
+ fs::write(&internal_path, contents).with_context(|| {
|
|
+ format!("failed to write baseline file {}", internal_path.display())
|
|
+ })?;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ // Track rename/move in current mapping if provided in an Update.
|
|
+ let move_path = match change {
|
|
+ FileChange::Update {
|
|
+ move_path: Some(dest),
|
|
+ ..
|
|
+ } => Some(dest),
|
|
+ _ => None,
|
|
+ };
|
|
+ if let Some(dest) = move_path {
|
|
+ let uuid_filename = match self.external_to_temp_name.get(path) {
|
|
+ Some(i) => i.clone(),
|
|
+ None => {
|
|
+ // This should be rare, but if we haven't mapped the source, create it with no baseline.
|
|
+ let i = uuid_filename_for(path);
|
|
+ self.external_to_temp_name.insert(path.clone(), i.clone());
|
|
+ self.temp_name_to_baseline_external
|
|
+ .insert(i.clone(), path.clone());
|
|
+ i
|
|
+ }
|
|
+ };
|
|
+ // Update current external mapping for temp file name.
|
|
+ self.temp_name_to_current_external
|
|
+ .insert(uuid_filename.clone(), dest.clone());
|
|
+ // Update forward file_mapping: external current -> internal name.
|
|
+ self.external_to_temp_name.remove(path);
|
|
+ self.external_to_temp_name
|
|
+ .insert(dest.clone(), uuid_filename);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ Ok(())
|
|
+ }
|
|
+
|
|
+ /// Recompute the aggregated unified diff by comparing all baseline snapshots against
|
|
+ /// current files on disk using `git diff --no-index` and rewriting paths to external paths.
|
|
+ pub fn update_and_get_unified_diff(&mut self) -> Result<Option<String>> {
|
|
+ let baseline_dir = self.baseline_dir()?.to_path_buf();
|
|
+ let current_dir = baseline_dir.join("current");
|
|
+ if current_dir.exists() {
|
|
+ // Best-effort cleanup of previous run's mirror.
|
|
+ let _ = fs::remove_dir_all(¤t_dir);
|
|
+ }
|
|
+ fs::create_dir_all(¤t_dir).with_context(|| {
|
|
+ format!(
|
|
+ "failed to create current mirror dir {}",
|
|
+ current_dir.display()
|
|
+ )
|
|
+ })?;
|
|
+
|
|
+ let mut aggregated = String::new();
|
|
+
|
|
+ // Compute diffs per tracked internal file.
|
|
+ for (internal, baseline_external) in &self.temp_name_to_baseline_external {
|
|
+ let baseline_path = baseline_dir.join(internal);
|
|
+ let current_external = self
|
|
+ .temp_name_to_current_external
|
|
+ .get(internal)
|
|
+ .cloned()
|
|
+ .unwrap_or_else(|| baseline_external.clone());
|
|
+
|
|
+ let left_is_dev_null = !baseline_path.exists();
|
|
+ let right_exists = current_external.exists();
|
|
+
|
|
+ // Prepare right side mirror file if exists; otherwise use /dev/null for deletions.
|
|
+ let right_arg = if right_exists {
|
|
+ let mirror_path = current_dir.join(internal);
|
|
+ let contents = fs::read(¤t_external).with_context(|| {
|
|
+ format!(
|
|
+ "failed to read current file for diff {}",
|
|
+ current_external.display()
|
|
+ )
|
|
+ })?;
|
|
+ fs::write(&mirror_path, contents).with_context(|| {
|
|
+ format!(
|
|
+ "failed to write current mirror file {}",
|
|
+ mirror_path.display()
|
|
+ )
|
|
+ })?;
|
|
+ // Use relative path from baseline_dir (so headers say a/<uuid> b/current/<uuid>).
|
|
+ format!("current/{internal}")
|
|
+ } else {
|
|
+ // Deletion: right side is /dev/null to show proper deleted file diff.
|
|
+ "/dev/null".to_string()
|
|
+ };
|
|
+
|
|
+ // Prepare left arg: baseline file path or /dev/null for additions.
|
|
+ let left_arg = if left_is_dev_null {
|
|
+ "/dev/null".to_string()
|
|
+ } else {
|
|
+ internal.clone()
|
|
+ };
|
|
+
|
|
+ // Run git diff --no-index from baseline_dir to keep paths predictable.
|
|
+ let raw = run_git_allow_exit_codes(
|
|
+ &baseline_dir,
|
|
+ &[
|
|
+ "-c",
|
|
+ "color.ui=false",
|
|
+ "diff",
|
|
+ "--no-color",
|
|
+ "--no-index",
|
|
+ "--",
|
|
+ &left_arg,
|
|
+ &right_arg,
|
|
+ ],
|
|
+ &[0, 1], // 0: no changes, 1: differences
|
|
+ )?;
|
|
+
|
|
+ if raw.trim().is_empty() {
|
|
+ continue;
|
|
+ }
|
|
+ let rewritten = self.rewrite_diff_paths(&raw);
|
|
+ if !rewritten.trim().is_empty() {
|
|
+ if !aggregated.is_empty() && !aggregated.ends_with('\n') {
|
|
+ aggregated.push('\n');
|
|
+ }
|
|
+ aggregated.push_str(&rewritten);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ self.unified_diff = if aggregated.trim().is_empty() {
|
|
+ None
|
|
+ } else {
|
|
+ Some(aggregated)
|
|
+ };
|
|
+
|
|
+ // Clean up the current dir.
|
|
+ let _ = fs::remove_dir_all(¤t_dir);
|
|
+
|
|
+ Ok(self.unified_diff.clone())
|
|
+ }
|
|
+
|
|
+ fn baseline_dir(&self) -> Result<&Path> {
|
|
+ self.baseline_files_dir
|
|
+ .as_ref()
|
|
+ .map(|d| d.path())
|
|
+ .ok_or_else(|| anyhow::anyhow!("baseline temp dir not initialized"))
|
|
+ }
|
|
+
|
|
+ fn ensure_baseline_dir(&mut self) -> Result<()> {
|
|
+ if self.baseline_files_dir.is_some() {
|
|
+ return Ok(());
|
|
+ }
|
|
+ let tmp = TempDir::new().context("create baseline temp dir")?;
|
|
+ self.baseline_files_dir = Some(tmp);
|
|
+ Ok(())
|
|
+ }
|
|
+
|
|
+ /// Rewrites the internal filenames to external paths in diff headers.
|
|
+ /// Handles inputs like:
|
|
+ /// diff --git a/<uuid> b/current/<uuid>
|
|
+ /// --- a/<uuid> | /dev/null
|
|
+ /// +++ b/current/<uuid> | /dev/null
|
|
+ /// and replaces uuid with the external paths tracking baseline/current.
|
|
+ fn rewrite_diff_paths(&self, diff: &str) -> String {
|
|
+ let mut out = String::new();
|
|
+ for line in diff.lines() {
|
|
+ if let Some(rest) = line.strip_prefix("diff --git ") {
|
|
+ // Format: diff --git a/<f> b/<f>
|
|
+ let parts: Vec<&str> = rest.split_whitespace().collect();
|
|
+ if parts.len() == 2 {
|
|
+ let a = parts[0].strip_prefix("a/").unwrap_or(parts[0]);
|
|
+ let b = parts[1].strip_prefix("b/").unwrap_or(parts[1]);
|
|
+
|
|
+ let a_ext_display = if a == "/dev/null" {
|
|
+ "/dev/null".to_string()
|
|
+ } else {
|
|
+ let a_base = Path::new(a)
|
|
+ .file_name()
|
|
+ .and_then(|s| s.to_str())
|
|
+ .unwrap_or(a);
|
|
+ let mapped = self
|
|
+ .temp_name_to_baseline_external
|
|
+ .get(a_base)
|
|
+ .cloned()
|
|
+ .unwrap_or_else(|| PathBuf::from(a));
|
|
+ mapped.display().to_string()
|
|
+ };
|
|
+
|
|
+ let b_ext_display = if b == "/dev/null" {
|
|
+ "/dev/null".to_string()
|
|
+ } else {
|
|
+ let b_base = Path::new(b)
|
|
+ .file_name()
|
|
+ .and_then(|s| s.to_str())
|
|
+ .unwrap_or(b);
|
|
+ let mapped = self
|
|
+ .temp_name_to_current_external
|
|
+ .get(b_base)
|
|
+ .cloned()
|
|
+ .unwrap_or_else(|| PathBuf::from(b));
|
|
+ mapped.display().to_string()
|
|
+ };
|
|
+
|
|
+ out.push_str(&format!("diff --git a/{a_ext_display} b/{b_ext_display}\n"));
|
|
+ continue;
|
|
+ }
|
|
+ }
|
|
+ if let Some(rest) = line.strip_prefix("--- ") {
|
|
+ if let Some(path) = rest.strip_prefix("a/") {
|
|
+ let external_display = if path == "/dev/null" {
|
|
+ "/dev/null".to_string()
|
|
+ } else {
|
|
+ let p_base = Path::new(path)
|
|
+ .file_name()
|
|
+ .and_then(|s| s.to_str())
|
|
+ .unwrap_or(path);
|
|
+ self.temp_name_to_baseline_external
|
|
+ .get(p_base)
|
|
+ .cloned()
|
|
+ .unwrap_or_else(|| PathBuf::from(path))
|
|
+ .display()
|
|
+ .to_string()
|
|
+ };
|
|
+ out.push_str(&format!("--- {external_display}\n"));
|
|
+ continue;
|
|
+ }
|
|
+ }
|
|
+ if let Some(rest) = line.strip_prefix("+++ ") {
|
|
+ if let Some(path) = rest.strip_prefix("b/") {
|
|
+ let external_display = if path == "/dev/null" {
|
|
+ "/dev/null".to_string()
|
|
+ } else {
|
|
+ let p_base = Path::new(path)
|
|
+ .file_name()
|
|
+ .and_then(|s| s.to_str())
|
|
+ .unwrap_or(path);
|
|
+ self.temp_name_to_current_external
|
|
+ .get(p_base)
|
|
+ .cloned()
|
|
+ .unwrap_or_else(|| PathBuf::from(path))
|
|
+ .display()
|
|
+ .to_string()
|
|
+ };
|
|
+ out.push_str(&format!("+++ {external_display}\n"));
|
|
+ continue;
|
|
+ }
|
|
+ }
|
|
+ out.push_str(line);
|
|
+ out.push('\n');
|
|
+ }
|
|
+ out
|
|
+ }
|
|
+}
|
|
+
|
|
+fn uuid_filename_for(path: &Path) -> String {
|
|
+ let id = Uuid::new_v4().to_string();
|
|
+ match path.extension().and_then(|e| e.to_str()) {
|
|
+ Some(ext) if !ext.is_empty() => format!("{id}.{ext}"),
|
|
+ _ => id,
|
|
+ }
|
|
+}
|
|
+
|
|
+fn run_git_allow_exit_codes(
|
|
+ repo: &Path,
|
|
+ args: &[&str],
|
|
+ allowed_exit_codes: &[i32],
|
|
+) -> Result<String> {
|
|
+ let output = Command::new("git")
|
|
```
|
|
|
|
> Should we make this async and use `tokio::Command`?
|
|
|
|
- Created: 2025-08-01 17:12:45 UTC | Link: https://github.com/openai/codex/pull/1770#discussion_r2248474046
|
|
|
|
```diff
|
|
@@ -0,0 +1,476 @@
|
|
+use std::collections::HashMap;
|
|
+use std::fs;
|
|
+use std::path::Path;
|
|
+use std::path::PathBuf;
|
|
+use std::process::Command;
|
|
+
|
|
+use anyhow::Context;
|
|
+use anyhow::Result;
|
|
+use tempfile::TempDir;
|
|
+use uuid::Uuid;
|
|
+
|
|
+use crate::protocol::FileChange;
|
|
+
|
|
+/// Tracks sets of changes to files and exposes the overall unified diff.
|
|
+/// Internally, the way this works is now:
|
|
+/// 1. Create a temp directory to store baseline snapshots of files when they are first seen.
|
|
+/// 2. When a path is first observed, copy its current contents into the baseline dir if it exists on disk.
|
|
+/// For new additions, do not create a baseline file so that diffs are shown as proper additions (using /dev/null).
|
|
+/// 3. Keep a stable internal filename (uuid + same extension) per external path for path rewrite in diffs.
|
|
+/// 4. To compute the aggregated unified diff, compare each baseline snapshot to the current file on disk using
|
|
+/// `git diff --no-index` and rewrite paths to external paths.
|
|
+#[derive(Default)]
|
|
+pub struct TurnDiffTracker {
|
|
+ /// Temp directory holding baseline snapshots of files as first seen.
|
|
+ baseline_files_dir: Option<TempDir>,
|
|
+ /// Map external path -> internal filename (uuid + same extension).
|
|
+ external_to_temp_name: HashMap<PathBuf, String>,
|
|
+ /// Internal filename -> external path as of baseline snapshot.
|
|
+ temp_name_to_baseline_external: HashMap<String, PathBuf>,
|
|
+ /// Internal filename -> external path as of current accumulated state (after applying all changes).
|
|
+ /// This is where renames are tracked.
|
|
+ temp_name_to_current_external: HashMap<String, PathBuf>,
|
|
+ /// Aggregated unified diff for all accumulated changes across files.
|
|
+ pub unified_diff: Option<String>,
|
|
+}
|
|
+
|
|
+impl TurnDiffTracker {
|
|
+ pub fn new() -> Self {
|
|
+ Self::default()
|
|
+ }
|
|
+
|
|
+ /// Front-run apply patch calls to track the starting contents of any modified files.
|
|
+ /// - Creates a baseline snapshot for files that already exist on disk when first seen.
|
|
+ /// - For additions, we intentionally do not create a baseline snapshot so that diffs are proper additions.
|
|
+ /// - Also updates internal mappings for move/rename events.
|
|
+ pub fn on_patch_begin(&mut self, changes: &HashMap<PathBuf, FileChange>) -> Result<()> {
|
|
+ self.ensure_baseline_dir()?;
|
|
+ let baseline_dir = self.baseline_dir()?.to_path_buf();
|
|
+
|
|
+ for (path, change) in changes.iter() {
|
|
+ // Ensure a stable internal filename exists for this external path.
|
|
+ if !self.external_to_temp_name.contains_key(path) {
|
|
+ let internal = uuid_filename_for(path);
|
|
+ self.external_to_temp_name
|
|
+ .insert(path.clone(), internal.clone());
|
|
+ self.temp_name_to_baseline_external
|
|
+ .insert(internal.clone(), path.clone());
|
|
+ self.temp_name_to_current_external
|
|
+ .insert(internal.clone(), path.clone());
|
|
+
|
|
+ // If the file exists on disk now, snapshot as baseline; else leave missing to represent /dev/null.
|
|
+ if path.exists() {
|
|
+ let contents = fs::read(path)
|
|
+ .with_context(|| format!("failed to read original {}", path.display()))?;
|
|
+ let internal_path = baseline_dir.join(&internal);
|
|
+ fs::write(&internal_path, contents).with_context(|| {
|
|
+ format!("failed to write baseline file {}", internal_path.display())
|
|
+ })?;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ // Track rename/move in current mapping if provided in an Update.
|
|
+ let move_path = match change {
|
|
+ FileChange::Update {
|
|
+ move_path: Some(dest),
|
|
+ ..
|
|
+ } => Some(dest),
|
|
+ _ => None,
|
|
+ };
|
|
+ if let Some(dest) = move_path {
|
|
+ let uuid_filename = match self.external_to_temp_name.get(path) {
|
|
+ Some(i) => i.clone(),
|
|
+ None => {
|
|
+ // This should be rare, but if we haven't mapped the source, create it with no baseline.
|
|
+ let i = uuid_filename_for(path);
|
|
+ self.external_to_temp_name.insert(path.clone(), i.clone());
|
|
+ self.temp_name_to_baseline_external
|
|
+ .insert(i.clone(), path.clone());
|
|
+ i
|
|
+ }
|
|
+ };
|
|
+ // Update current external mapping for temp file name.
|
|
+ self.temp_name_to_current_external
|
|
+ .insert(uuid_filename.clone(), dest.clone());
|
|
+ // Update forward file_mapping: external current -> internal name.
|
|
+ self.external_to_temp_name.remove(path);
|
|
+ self.external_to_temp_name
|
|
+ .insert(dest.clone(), uuid_filename);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ Ok(())
|
|
+ }
|
|
+
|
|
+ /// Recompute the aggregated unified diff by comparing all baseline snapshots against
|
|
+ /// current files on disk using `git diff --no-index` and rewriting paths to external paths.
|
|
+ pub fn update_and_get_unified_diff(&mut self) -> Result<Option<String>> {
|
|
+ let baseline_dir = self.baseline_dir()?.to_path_buf();
|
|
+ let current_dir = baseline_dir.join("current");
|
|
+ if current_dir.exists() {
|
|
+ // Best-effort cleanup of previous run's mirror.
|
|
+ let _ = fs::remove_dir_all(¤t_dir);
|
|
+ }
|
|
+ fs::create_dir_all(¤t_dir).with_context(|| {
|
|
+ format!(
|
|
+ "failed to create current mirror dir {}",
|
|
+ current_dir.display()
|
|
+ )
|
|
+ })?;
|
|
+
|
|
+ let mut aggregated = String::new();
|
|
+
|
|
+ // Compute diffs per tracked internal file.
|
|
+ for (internal, baseline_external) in &self.temp_name_to_baseline_external {
|
|
+ let baseline_path = baseline_dir.join(internal);
|
|
+ let current_external = self
|
|
+ .temp_name_to_current_external
|
|
+ .get(internal)
|
|
+ .cloned()
|
|
+ .unwrap_or_else(|| baseline_external.clone());
|
|
+
|
|
+ let left_is_dev_null = !baseline_path.exists();
|
|
+ let right_exists = current_external.exists();
|
|
+
|
|
+ // Prepare right side mirror file if exists; otherwise use /dev/null for deletions.
|
|
+ let right_arg = if right_exists {
|
|
+ let mirror_path = current_dir.join(internal);
|
|
+ let contents = fs::read(¤t_external).with_context(|| {
|
|
+ format!(
|
|
+ "failed to read current file for diff {}",
|
|
+ current_external.display()
|
|
+ )
|
|
+ })?;
|
|
+ fs::write(&mirror_path, contents).with_context(|| {
|
|
+ format!(
|
|
+ "failed to write current mirror file {}",
|
|
+ mirror_path.display()
|
|
+ )
|
|
+ })?;
|
|
+ // Use relative path from baseline_dir (so headers say a/<uuid> b/current/<uuid>).
|
|
+ format!("current/{internal}")
|
|
+ } else {
|
|
+ // Deletion: right side is /dev/null to show proper deleted file diff.
|
|
+ "/dev/null".to_string()
|
|
+ };
|
|
+
|
|
+ // Prepare left arg: baseline file path or /dev/null for additions.
|
|
+ let left_arg = if left_is_dev_null {
|
|
+ "/dev/null".to_string()
|
|
+ } else {
|
|
+ internal.clone()
|
|
+ };
|
|
+
|
|
+ // Run git diff --no-index from baseline_dir to keep paths predictable.
|
|
+ let raw = run_git_allow_exit_codes(
|
|
+ &baseline_dir,
|
|
+ &[
|
|
+ "-c",
|
|
+ "color.ui=false",
|
|
+ "diff",
|
|
+ "--no-color",
|
|
+ "--no-index",
|
|
+ "--",
|
|
+ &left_arg,
|
|
+ &right_arg,
|
|
+ ],
|
|
+ &[0, 1], // 0: no changes, 1: differences
|
|
+ )?;
|
|
+
|
|
+ if raw.trim().is_empty() {
|
|
+ continue;
|
|
+ }
|
|
+ let rewritten = self.rewrite_diff_paths(&raw);
|
|
+ if !rewritten.trim().is_empty() {
|
|
+ if !aggregated.is_empty() && !aggregated.ends_with('\n') {
|
|
+ aggregated.push('\n');
|
|
+ }
|
|
+ aggregated.push_str(&rewritten);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ self.unified_diff = if aggregated.trim().is_empty() {
|
|
+ None
|
|
+ } else {
|
|
+ Some(aggregated)
|
|
+ };
|
|
+
|
|
+ // Clean up the current dir.
|
|
+ let _ = fs::remove_dir_all(¤t_dir);
|
|
+
|
|
+ Ok(self.unified_diff.clone())
|
|
+ }
|
|
+
|
|
+ fn baseline_dir(&self) -> Result<&Path> {
|
|
+ self.baseline_files_dir
|
|
+ .as_ref()
|
|
+ .map(|d| d.path())
|
|
+ .ok_or_else(|| anyhow::anyhow!("baseline temp dir not initialized"))
|
|
+ }
|
|
+
|
|
+ fn ensure_baseline_dir(&mut self) -> Result<()> {
|
|
+ if self.baseline_files_dir.is_some() {
|
|
+ return Ok(());
|
|
+ }
|
|
+ let tmp = TempDir::new().context("create baseline temp dir")?;
|
|
+ self.baseline_files_dir = Some(tmp);
|
|
+ Ok(())
|
|
+ }
|
|
+
|
|
+ /// Rewrites the internal filenames to external paths in diff headers.
|
|
+ /// Handles inputs like:
|
|
+ /// diff --git a/<uuid> b/current/<uuid>
|
|
+ /// --- a/<uuid> | /dev/null
|
|
+ /// +++ b/current/<uuid> | /dev/null
|
|
+ /// and replaces uuid with the external paths tracking baseline/current.
|
|
+ fn rewrite_diff_paths(&self, diff: &str) -> String {
|
|
+ let mut out = String::new();
|
|
+ for line in diff.lines() {
|
|
+ if let Some(rest) = line.strip_prefix("diff --git ") {
|
|
+ // Format: diff --git a/<f> b/<f>
|
|
+ let parts: Vec<&str> = rest.split_whitespace().collect();
|
|
+ if parts.len() == 2 {
|
|
+ let a = parts[0].strip_prefix("a/").unwrap_or(parts[0]);
|
|
+ let b = parts[1].strip_prefix("b/").unwrap_or(parts[1]);
|
|
+
|
|
+ let a_ext_display = if a == "/dev/null" {
|
|
+ "/dev/null".to_string()
|
|
+ } else {
|
|
+ let a_base = Path::new(a)
|
|
+ .file_name()
|
|
+ .and_then(|s| s.to_str())
|
|
+ .unwrap_or(a);
|
|
+ let mapped = self
|
|
+ .temp_name_to_baseline_external
|
|
+ .get(a_base)
|
|
+ .cloned()
|
|
+ .unwrap_or_else(|| PathBuf::from(a));
|
|
+ mapped.display().to_string()
|
|
+ };
|
|
+
|
|
+ let b_ext_display = if b == "/dev/null" {
|
|
+ "/dev/null".to_string()
|
|
+ } else {
|
|
+ let b_base = Path::new(b)
|
|
+ .file_name()
|
|
+ .and_then(|s| s.to_str())
|
|
+ .unwrap_or(b);
|
|
+ let mapped = self
|
|
+ .temp_name_to_current_external
|
|
+ .get(b_base)
|
|
+ .cloned()
|
|
+ .unwrap_or_else(|| PathBuf::from(b));
|
|
+ mapped.display().to_string()
|
|
+ };
|
|
+
|
|
+ out.push_str(&format!("diff --git a/{a_ext_display} b/{b_ext_display}\n"));
|
|
+ continue;
|
|
+ }
|
|
+ }
|
|
+ if let Some(rest) = line.strip_prefix("--- ") {
|
|
+ if let Some(path) = rest.strip_prefix("a/") {
|
|
+ let external_display = if path == "/dev/null" {
|
|
+ "/dev/null".to_string()
|
|
+ } else {
|
|
+ let p_base = Path::new(path)
|
|
+ .file_name()
|
|
+ .and_then(|s| s.to_str())
|
|
+ .unwrap_or(path);
|
|
+ self.temp_name_to_baseline_external
|
|
+ .get(p_base)
|
|
+ .cloned()
|
|
+ .unwrap_or_else(|| PathBuf::from(path))
|
|
+ .display()
|
|
+ .to_string()
|
|
+ };
|
|
+ out.push_str(&format!("--- {external_display}\n"));
|
|
+ continue;
|
|
+ }
|
|
+ }
|
|
+ if let Some(rest) = line.strip_prefix("+++ ") {
|
|
+ if let Some(path) = rest.strip_prefix("b/") {
|
|
+ let external_display = if path == "/dev/null" {
|
|
+ "/dev/null".to_string()
|
|
+ } else {
|
|
+ let p_base = Path::new(path)
|
|
+ .file_name()
|
|
+ .and_then(|s| s.to_str())
|
|
+ .unwrap_or(path);
|
|
+ self.temp_name_to_current_external
|
|
+ .get(p_base)
|
|
+ .cloned()
|
|
+ .unwrap_or_else(|| PathBuf::from(path))
|
|
+ .display()
|
|
+ .to_string()
|
|
+ };
|
|
+ out.push_str(&format!("+++ {external_display}\n"));
|
|
+ continue;
|
|
+ }
|
|
+ }
|
|
+ out.push_str(line);
|
|
+ out.push('\n');
|
|
+ }
|
|
+ out
|
|
+ }
|
|
+}
|
|
+
|
|
+fn uuid_filename_for(path: &Path) -> String {
|
|
+ let id = Uuid::new_v4().to_string();
|
|
+ match path.extension().and_then(|e| e.to_str()) {
|
|
+ Some(ext) if !ext.is_empty() => format!("{id}.{ext}"),
|
|
+ _ => id,
|
|
+ }
|
|
+}
|
|
+
|
|
+fn run_git_allow_exit_codes(
|
|
+ repo: &Path,
|
|
+ args: &[&str],
|
|
+ allowed_exit_codes: &[i32],
|
|
+) -> Result<String> {
|
|
+ let output = Command::new("git")
|
|
+ .current_dir(repo)
|
|
+ .args(args)
|
|
+ .output()
|
|
+ .with_context(|| format!("failed to run git {:?} in {}", args, repo.display()))?;
|
|
+ let code = output.status.code().unwrap_or(-1);
|
|
+ if !allowed_exit_codes.contains(&code) {
|
|
+ anyhow::bail!(
|
|
+ "git {:?} failed with status {:?}: {}",
|
|
+ args,
|
|
+ output.status,
|
|
+ String::from_utf8_lossy(&output.stderr)
|
|
+ );
|
|
+ }
|
|
+ Ok(String::from_utf8_lossy(&output.stdout).into_owned())
|
|
+}
|
|
+
|
|
+#[cfg(test)]
|
|
+mod tests {
|
|
+ #![allow(clippy::unwrap_used)]
|
|
+ use super::*;
|
|
+ use tempfile::tempdir;
|
|
+
|
|
+ #[test]
|
|
+ fn accumulates_add_and_update() {
|
|
+ let mut acc = TurnDiffTracker::new();
|
|
+
|
|
+ let dir = tempdir().unwrap();
|
|
+ let file = dir.path().join("a.txt");
|
|
+
|
|
+ // First patch: add file (baseline should be /dev/null).
|
|
+ let add_changes = HashMap::from([(
|
|
+ file.clone(),
|
|
+ FileChange::Add {
|
|
+ content: "foo\n".to_string(),
|
|
+ },
|
|
+ )]);
|
|
+ acc.on_patch_begin(&add_changes).unwrap();
|
|
+
|
|
+ // Simulate apply: create the file on disk.
|
|
+ // This must happen after on_patch_begin.
|
|
+ fs::write(&file, "foo\n").unwrap();
|
|
+ acc.update_and_get_unified_diff().unwrap();
|
|
+ let first = acc.unified_diff.clone().unwrap();
|
|
+ assert!(first.contains("+foo"));
|
|
```
|
|
|
|
> Instead of `contains()` checks, can these all be full `assert_eq!()` checks?
|
|
|
|
- Created: 2025-08-01 17:13:43 UTC | Link: https://github.com/openai/codex/pull/1770#discussion_r2248476408
|
|
|
|
```diff
|
|
@@ -0,0 +1,476 @@
|
|
+use std::collections::HashMap;
|
|
+use std::fs;
|
|
+use std::path::Path;
|
|
+use std::path::PathBuf;
|
|
+use std::process::Command;
|
|
+
|
|
+use anyhow::Context;
|
|
+use anyhow::Result;
|
|
+use tempfile::TempDir;
|
|
+use uuid::Uuid;
|
|
+
|
|
+use crate::protocol::FileChange;
|
|
+
|
|
+/// Tracks sets of changes to files and exposes the overall unified diff.
|
|
+/// Internally, the way this works is now:
|
|
+/// 1. Create a temp directory to store baseline snapshots of files when they are first seen.
|
|
+/// 2. When a path is first observed, copy its current contents into the baseline dir if it exists on disk.
|
|
+/// For new additions, do not create a baseline file so that diffs are shown as proper additions (using /dev/null).
|
|
+/// 3. Keep a stable internal filename (uuid + same extension) per external path for path rewrite in diffs.
|
|
+/// 4. To compute the aggregated unified diff, compare each baseline snapshot to the current file on disk using
|
|
+/// `git diff --no-index` and rewrite paths to external paths.
|
|
+#[derive(Default)]
|
|
+pub struct TurnDiffTracker {
|
|
+ /// Temp directory holding baseline snapshots of files as first seen.
|
|
+ baseline_files_dir: Option<TempDir>,
|
|
+ /// Map external path -> internal filename (uuid + same extension).
|
|
+ external_to_temp_name: HashMap<PathBuf, String>,
|
|
+ /// Internal filename -> external path as of baseline snapshot.
|
|
+ temp_name_to_baseline_external: HashMap<String, PathBuf>,
|
|
+ /// Internal filename -> external path as of current accumulated state (after applying all changes).
|
|
+ /// This is where renames are tracked.
|
|
+ temp_name_to_current_external: HashMap<String, PathBuf>,
|
|
+ /// Aggregated unified diff for all accumulated changes across files.
|
|
+ pub unified_diff: Option<String>,
|
|
+}
|
|
+
|
|
+impl TurnDiffTracker {
|
|
+ pub fn new() -> Self {
|
|
+ Self::default()
|
|
+ }
|
|
+
|
|
+ /// Front-run apply patch calls to track the starting contents of any modified files.
|
|
+ /// - Creates a baseline snapshot for files that already exist on disk when first seen.
|
|
+ /// - For additions, we intentionally do not create a baseline snapshot so that diffs are proper additions.
|
|
+ /// - Also updates internal mappings for move/rename events.
|
|
+ pub fn on_patch_begin(&mut self, changes: &HashMap<PathBuf, FileChange>) -> Result<()> {
|
|
+ self.ensure_baseline_dir()?;
|
|
+ let baseline_dir = self.baseline_dir()?.to_path_buf();
|
|
+
|
|
+ for (path, change) in changes.iter() {
|
|
+ // Ensure a stable internal filename exists for this external path.
|
|
+ if !self.external_to_temp_name.contains_key(path) {
|
|
+ let internal = uuid_filename_for(path);
|
|
+ self.external_to_temp_name
|
|
+ .insert(path.clone(), internal.clone());
|
|
+ self.temp_name_to_baseline_external
|
|
+ .insert(internal.clone(), path.clone());
|
|
+ self.temp_name_to_current_external
|
|
+ .insert(internal.clone(), path.clone());
|
|
+
|
|
+ // If the file exists on disk now, snapshot as baseline; else leave missing to represent /dev/null.
|
|
+ if path.exists() {
|
|
+ let contents = fs::read(path)
|
|
+ .with_context(|| format!("failed to read original {}", path.display()))?;
|
|
+ let internal_path = baseline_dir.join(&internal);
|
|
+ fs::write(&internal_path, contents).with_context(|| {
|
|
+ format!("failed to write baseline file {}", internal_path.display())
|
|
+ })?;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ // Track rename/move in current mapping if provided in an Update.
|
|
+ let move_path = match change {
|
|
+ FileChange::Update {
|
|
+ move_path: Some(dest),
|
|
+ ..
|
|
+ } => Some(dest),
|
|
+ _ => None,
|
|
+ };
|
|
+ if let Some(dest) = move_path {
|
|
+ let uuid_filename = match self.external_to_temp_name.get(path) {
|
|
+ Some(i) => i.clone(),
|
|
+ None => {
|
|
+ // This should be rare, but if we haven't mapped the source, create it with no baseline.
|
|
+ let i = uuid_filename_for(path);
|
|
+ self.external_to_temp_name.insert(path.clone(), i.clone());
|
|
+ self.temp_name_to_baseline_external
|
|
+ .insert(i.clone(), path.clone());
|
|
+ i
|
|
+ }
|
|
+ };
|
|
+ // Update current external mapping for temp file name.
|
|
+ self.temp_name_to_current_external
|
|
+ .insert(uuid_filename.clone(), dest.clone());
|
|
+ // Update forward file_mapping: external current -> internal name.
|
|
+ self.external_to_temp_name.remove(path);
|
|
+ self.external_to_temp_name
|
|
+ .insert(dest.clone(), uuid_filename);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ Ok(())
|
|
+ }
|
|
+
|
|
+ /// Recompute the aggregated unified diff by comparing all baseline snapshots against
|
|
+ /// current files on disk using `git diff --no-index` and rewriting paths to external paths.
|
|
+ pub fn update_and_get_unified_diff(&mut self) -> Result<Option<String>> {
|
|
+ let baseline_dir = self.baseline_dir()?.to_path_buf();
|
|
+ let current_dir = baseline_dir.join("current");
|
|
+ if current_dir.exists() {
|
|
+ // Best-effort cleanup of previous run's mirror.
|
|
+ let _ = fs::remove_dir_all(¤t_dir);
|
|
+ }
|
|
+ fs::create_dir_all(¤t_dir).with_context(|| {
|
|
+ format!(
|
|
+ "failed to create current mirror dir {}",
|
|
+ current_dir.display()
|
|
+ )
|
|
+ })?;
|
|
+
|
|
+ let mut aggregated = String::new();
|
|
+
|
|
+ // Compute diffs per tracked internal file.
|
|
+ for (internal, baseline_external) in &self.temp_name_to_baseline_external {
|
|
+ let baseline_path = baseline_dir.join(internal);
|
|
+ let current_external = self
|
|
+ .temp_name_to_current_external
|
|
+ .get(internal)
|
|
+ .cloned()
|
|
+ .unwrap_or_else(|| baseline_external.clone());
|
|
+
|
|
+ let left_is_dev_null = !baseline_path.exists();
|
|
+ let right_exists = current_external.exists();
|
|
+
|
|
+ // Prepare right side mirror file if exists; otherwise use /dev/null for deletions.
|
|
+ let right_arg = if right_exists {
|
|
+ let mirror_path = current_dir.join(internal);
|
|
+ let contents = fs::read(¤t_external).with_context(|| {
|
|
+ format!(
|
|
+ "failed to read current file for diff {}",
|
|
+ current_external.display()
|
|
+ )
|
|
+ })?;
|
|
+ fs::write(&mirror_path, contents).with_context(|| {
|
|
+ format!(
|
|
+ "failed to write current mirror file {}",
|
|
+ mirror_path.display()
|
|
+ )
|
|
+ })?;
|
|
+ // Use relative path from baseline_dir (so headers say a/<uuid> b/current/<uuid>).
|
|
+ format!("current/{internal}")
|
|
+ } else {
|
|
+ // Deletion: right side is /dev/null to show proper deleted file diff.
|
|
+ "/dev/null".to_string()
|
|
+ };
|
|
+
|
|
+ // Prepare left arg: baseline file path or /dev/null for additions.
|
|
+ let left_arg = if left_is_dev_null {
|
|
+ "/dev/null".to_string()
|
|
+ } else {
|
|
+ internal.clone()
|
|
+ };
|
|
+
|
|
+ // Run git diff --no-index from baseline_dir to keep paths predictable.
|
|
+ let raw = run_git_allow_exit_codes(
|
|
+ &baseline_dir,
|
|
+ &[
|
|
+ "-c",
|
|
+ "color.ui=false",
|
|
+ "diff",
|
|
+ "--no-color",
|
|
+ "--no-index",
|
|
+ "--",
|
|
+ &left_arg,
|
|
+ &right_arg,
|
|
+ ],
|
|
+ &[0, 1], // 0: no changes, 1: differences
|
|
+ )?;
|
|
+
|
|
+ if raw.trim().is_empty() {
|
|
+ continue;
|
|
+ }
|
|
+ let rewritten = self.rewrite_diff_paths(&raw);
|
|
+ if !rewritten.trim().is_empty() {
|
|
+ if !aggregated.is_empty() && !aggregated.ends_with('\n') {
|
|
+ aggregated.push('\n');
|
|
+ }
|
|
+ aggregated.push_str(&rewritten);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ self.unified_diff = if aggregated.trim().is_empty() {
|
|
+ None
|
|
+ } else {
|
|
+ Some(aggregated)
|
|
+ };
|
|
+
|
|
+ // Clean up the current dir.
|
|
+ let _ = fs::remove_dir_all(¤t_dir);
|
|
+
|
|
+ Ok(self.unified_diff.clone())
|
|
+ }
|
|
+
|
|
+ fn baseline_dir(&self) -> Result<&Path> {
|
|
+ self.baseline_files_dir
|
|
+ .as_ref()
|
|
+ .map(|d| d.path())
|
|
+ .ok_or_else(|| anyhow::anyhow!("baseline temp dir not initialized"))
|
|
+ }
|
|
+
|
|
+ fn ensure_baseline_dir(&mut self) -> Result<()> {
|
|
+ if self.baseline_files_dir.is_some() {
|
|
+ return Ok(());
|
|
+ }
|
|
+ let tmp = TempDir::new().context("create baseline temp dir")?;
|
|
+ self.baseline_files_dir = Some(tmp);
|
|
+ Ok(())
|
|
+ }
|
|
+
|
|
+ /// Rewrites the internal filenames to external paths in diff headers.
|
|
+ /// Handles inputs like:
|
|
+ /// diff --git a/<uuid> b/current/<uuid>
|
|
+ /// --- a/<uuid> | /dev/null
|
|
+ /// +++ b/current/<uuid> | /dev/null
|
|
+ /// and replaces uuid with the external paths tracking baseline/current.
|
|
+ fn rewrite_diff_paths(&self, diff: &str) -> String {
|
|
+ let mut out = String::new();
|
|
+ for line in diff.lines() {
|
|
+ if let Some(rest) = line.strip_prefix("diff --git ") {
|
|
+ // Format: diff --git a/<f> b/<f>
|
|
+ let parts: Vec<&str> = rest.split_whitespace().collect();
|
|
+ if parts.len() == 2 {
|
|
+ let a = parts[0].strip_prefix("a/").unwrap_or(parts[0]);
|
|
+ let b = parts[1].strip_prefix("b/").unwrap_or(parts[1]);
|
|
+
|
|
+ let a_ext_display = if a == "/dev/null" {
|
|
+ "/dev/null".to_string()
|
|
+ } else {
|
|
+ let a_base = Path::new(a)
|
|
+ .file_name()
|
|
+ .and_then(|s| s.to_str())
|
|
+ .unwrap_or(a);
|
|
+ let mapped = self
|
|
+ .temp_name_to_baseline_external
|
|
+ .get(a_base)
|
|
+ .cloned()
|
|
+ .unwrap_or_else(|| PathBuf::from(a));
|
|
+ mapped.display().to_string()
|
|
+ };
|
|
+
|
|
+ let b_ext_display = if b == "/dev/null" {
|
|
+ "/dev/null".to_string()
|
|
+ } else {
|
|
+ let b_base = Path::new(b)
|
|
+ .file_name()
|
|
+ .and_then(|s| s.to_str())
|
|
+ .unwrap_or(b);
|
|
+ let mapped = self
|
|
+ .temp_name_to_current_external
|
|
+ .get(b_base)
|
|
+ .cloned()
|
|
+ .unwrap_or_else(|| PathBuf::from(b));
|
|
+ mapped.display().to_string()
|
|
+ };
|
|
+
|
|
+ out.push_str(&format!("diff --git a/{a_ext_display} b/{b_ext_display}\n"));
|
|
+ continue;
|
|
+ }
|
|
+ }
|
|
+ if let Some(rest) = line.strip_prefix("--- ") {
|
|
+ if let Some(path) = rest.strip_prefix("a/") {
|
|
+ let external_display = if path == "/dev/null" {
|
|
+ "/dev/null".to_string()
|
|
+ } else {
|
|
+ let p_base = Path::new(path)
|
|
+ .file_name()
|
|
+ .and_then(|s| s.to_str())
|
|
+ .unwrap_or(path);
|
|
+ self.temp_name_to_baseline_external
|
|
+ .get(p_base)
|
|
+ .cloned()
|
|
+ .unwrap_or_else(|| PathBuf::from(path))
|
|
+ .display()
|
|
+ .to_string()
|
|
+ };
|
|
+ out.push_str(&format!("--- {external_display}\n"));
|
|
+ continue;
|
|
+ }
|
|
+ }
|
|
+ if let Some(rest) = line.strip_prefix("+++ ") {
|
|
+ if let Some(path) = rest.strip_prefix("b/") {
|
|
+ let external_display = if path == "/dev/null" {
|
|
+ "/dev/null".to_string()
|
|
+ } else {
|
|
+ let p_base = Path::new(path)
|
|
+ .file_name()
|
|
+ .and_then(|s| s.to_str())
|
|
+ .unwrap_or(path);
|
|
+ self.temp_name_to_current_external
|
|
+ .get(p_base)
|
|
+ .cloned()
|
|
+ .unwrap_or_else(|| PathBuf::from(path))
|
|
+ .display()
|
|
+ .to_string()
|
|
+ };
|
|
+ out.push_str(&format!("+++ {external_display}\n"));
|
|
+ continue;
|
|
+ }
|
|
+ }
|
|
+ out.push_str(line);
|
|
+ out.push('\n');
|
|
+ }
|
|
+ out
|
|
+ }
|
|
+}
|
|
+
|
|
+fn uuid_filename_for(path: &Path) -> String {
|
|
+ let id = Uuid::new_v4().to_string();
|
|
+ match path.extension().and_then(|e| e.to_str()) {
|
|
+ Some(ext) if !ext.is_empty() => format!("{id}.{ext}"),
|
|
+ _ => id,
|
|
+ }
|
|
+}
|
|
+
|
|
+fn run_git_allow_exit_codes(
|
|
+ repo: &Path,
|
|
+ args: &[&str],
|
|
+ allowed_exit_codes: &[i32],
|
|
+) -> Result<String> {
|
|
+ let output = Command::new("git")
|
|
+ .current_dir(repo)
|
|
+ .args(args)
|
|
+ .output()
|
|
+ .with_context(|| format!("failed to run git {:?} in {}", args, repo.display()))?;
|
|
+ let code = output.status.code().unwrap_or(-1);
|
|
+ if !allowed_exit_codes.contains(&code) {
|
|
+ anyhow::bail!(
|
|
+ "git {:?} failed with status {:?}: {}",
|
|
+ args,
|
|
+ output.status,
|
|
+ String::from_utf8_lossy(&output.stderr)
|
|
+ );
|
|
+ }
|
|
+ Ok(String::from_utf8_lossy(&output.stdout).into_owned())
|
|
+}
|
|
+
|
|
+#[cfg(test)]
|
|
+mod tests {
|
|
+ #![allow(clippy::unwrap_used)]
|
|
```
|
|
|
|
> Can you also test that paths with spaces work as intended?
|
|
|
|
- Created: 2025-08-01 17:15:26 UTC | Link: https://github.com/openai/codex/pull/1770#discussion_r2248479943
|
|
|
|
```diff
|
|
@@ -0,0 +1,476 @@
|
|
+use std::collections::HashMap;
|
|
+use std::fs;
|
|
+use std::path::Path;
|
|
+use std::path::PathBuf;
|
|
+use std::process::Command;
|
|
+
|
|
+use anyhow::Context;
|
|
+use anyhow::Result;
|
|
+use tempfile::TempDir;
|
|
+use uuid::Uuid;
|
|
+
|
|
+use crate::protocol::FileChange;
|
|
+
|
|
+/// Tracks sets of changes to files and exposes the overall unified diff.
|
|
+/// Internally, the way this works is now:
|
|
+/// 1. Create a temp directory to store baseline snapshots of files when they are first seen.
|
|
+/// 2. When a path is first observed, copy its current contents into the baseline dir if it exists on disk.
|
|
+/// For new additions, do not create a baseline file so that diffs are shown as proper additions (using /dev/null).
|
|
+/// 3. Keep a stable internal filename (uuid + same extension) per external path for path rewrite in diffs.
|
|
+/// 4. To compute the aggregated unified diff, compare each baseline snapshot to the current file on disk using
|
|
+/// `git diff --no-index` and rewrite paths to external paths.
|
|
+#[derive(Default)]
|
|
+pub struct TurnDiffTracker {
|
|
+ /// Temp directory holding baseline snapshots of files as first seen.
|
|
+ baseline_files_dir: Option<TempDir>,
|
|
+ /// Map external path -> internal filename (uuid + same extension).
|
|
+ external_to_temp_name: HashMap<PathBuf, String>,
|
|
+ /// Internal filename -> external path as of baseline snapshot.
|
|
+ temp_name_to_baseline_external: HashMap<String, PathBuf>,
|
|
+ /// Internal filename -> external path as of current accumulated state (after applying all changes).
|
|
+ /// This is where renames are tracked.
|
|
+ temp_name_to_current_external: HashMap<String, PathBuf>,
|
|
+ /// Aggregated unified diff for all accumulated changes across files.
|
|
+ pub unified_diff: Option<String>,
|
|
+}
|
|
+
|
|
+impl TurnDiffTracker {
|
|
+ pub fn new() -> Self {
|
|
+ Self::default()
|
|
+ }
|
|
+
|
|
+ /// Front-run apply patch calls to track the starting contents of any modified files.
|
|
+ /// - Creates a baseline snapshot for files that already exist on disk when first seen.
|
|
+ /// - For additions, we intentionally do not create a baseline snapshot so that diffs are proper additions.
|
|
+ /// - Also updates internal mappings for move/rename events.
|
|
+ pub fn on_patch_begin(&mut self, changes: &HashMap<PathBuf, FileChange>) -> Result<()> {
|
|
+ self.ensure_baseline_dir()?;
|
|
+ let baseline_dir = self.baseline_dir()?.to_path_buf();
|
|
+
|
|
+ for (path, change) in changes.iter() {
|
|
+ // Ensure a stable internal filename exists for this external path.
|
|
+ if !self.external_to_temp_name.contains_key(path) {
|
|
+ let internal = uuid_filename_for(path);
|
|
+ self.external_to_temp_name
|
|
+ .insert(path.clone(), internal.clone());
|
|
+ self.temp_name_to_baseline_external
|
|
+ .insert(internal.clone(), path.clone());
|
|
+ self.temp_name_to_current_external
|
|
+ .insert(internal.clone(), path.clone());
|
|
+
|
|
+ // If the file exists on disk now, snapshot as baseline; else leave missing to represent /dev/null.
|
|
+ if path.exists() {
|
|
+ let contents = fs::read(path)
|
|
+ .with_context(|| format!("failed to read original {}", path.display()))?;
|
|
+ let internal_path = baseline_dir.join(&internal);
|
|
+ fs::write(&internal_path, contents).with_context(|| {
|
|
+ format!("failed to write baseline file {}", internal_path.display())
|
|
+ })?;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ // Track rename/move in current mapping if provided in an Update.
|
|
+ let move_path = match change {
|
|
+ FileChange::Update {
|
|
+ move_path: Some(dest),
|
|
+ ..
|
|
+ } => Some(dest),
|
|
+ _ => None,
|
|
+ };
|
|
+ if let Some(dest) = move_path {
|
|
```
|
|
|
|
> This could be folded into the above `match` statement?
|
|
|
|
- Created: 2025-08-01 17:17:12 UTC | Link: https://github.com/openai/codex/pull/1770#discussion_r2248482907
|
|
|
|
```diff
|
|
@@ -0,0 +1,476 @@
|
|
+use std::collections::HashMap;
|
|
+use std::fs;
|
|
+use std::path::Path;
|
|
+use std::path::PathBuf;
|
|
+use std::process::Command;
|
|
+
|
|
+use anyhow::Context;
|
|
+use anyhow::Result;
|
|
+use tempfile::TempDir;
|
|
+use uuid::Uuid;
|
|
+
|
|
+use crate::protocol::FileChange;
|
|
+
|
|
+/// Tracks sets of changes to files and exposes the overall unified diff.
|
|
+/// Internally, the way this works is now:
|
|
+/// 1. Create a temp directory to store baseline snapshots of files when they are first seen.
|
|
+/// 2. When a path is first observed, copy its current contents into the baseline dir if it exists on disk.
|
|
+/// For new additions, do not create a baseline file so that diffs are shown as proper additions (using /dev/null).
|
|
+/// 3. Keep a stable internal filename (uuid + same extension) per external path for path rewrite in diffs.
|
|
+/// 4. To compute the aggregated unified diff, compare each baseline snapshot to the current file on disk using
|
|
+/// `git diff --no-index` and rewrite paths to external paths.
|
|
+#[derive(Default)]
|
|
+pub struct TurnDiffTracker {
|
|
+ /// Temp directory holding baseline snapshots of files as first seen.
|
|
+ baseline_files_dir: Option<TempDir>,
|
|
+ /// Map external path -> internal filename (uuid + same extension).
|
|
+ external_to_temp_name: HashMap<PathBuf, String>,
|
|
+ /// Internal filename -> external path as of baseline snapshot.
|
|
+ temp_name_to_baseline_external: HashMap<String, PathBuf>,
|
|
+ /// Internal filename -> external path as of current accumulated state (after applying all changes).
|
|
+ /// This is where renames are tracked.
|
|
+ temp_name_to_current_external: HashMap<String, PathBuf>,
|
|
+ /// Aggregated unified diff for all accumulated changes across files.
|
|
+ pub unified_diff: Option<String>,
|
|
+}
|
|
+
|
|
+impl TurnDiffTracker {
|
|
+ pub fn new() -> Self {
|
|
+ Self::default()
|
|
+ }
|
|
+
|
|
+ /// Front-run apply patch calls to track the starting contents of any modified files.
|
|
+ /// - Creates a baseline snapshot for files that already exist on disk when first seen.
|
|
+ /// - For additions, we intentionally do not create a baseline snapshot so that diffs are proper additions.
|
|
+ /// - Also updates internal mappings for move/rename events.
|
|
+ pub fn on_patch_begin(&mut self, changes: &HashMap<PathBuf, FileChange>) -> Result<()> {
|
|
+ self.ensure_baseline_dir()?;
|
|
+ let baseline_dir = self.baseline_dir()?.to_path_buf();
|
|
+
|
|
+ for (path, change) in changes.iter() {
|
|
+ // Ensure a stable internal filename exists for this external path.
|
|
+ if !self.external_to_temp_name.contains_key(path) {
|
|
+ let internal = uuid_filename_for(path);
|
|
+ self.external_to_temp_name
|
|
+ .insert(path.clone(), internal.clone());
|
|
+ self.temp_name_to_baseline_external
|
|
+ .insert(internal.clone(), path.clone());
|
|
+ self.temp_name_to_current_external
|
|
+ .insert(internal.clone(), path.clone());
|
|
+
|
|
+ // If the file exists on disk now, snapshot as baseline; else leave missing to represent /dev/null.
|
|
+ if path.exists() {
|
|
+ let contents = fs::read(path)
|
|
+ .with_context(|| format!("failed to read original {}", path.display()))?;
|
|
+ let internal_path = baseline_dir.join(&internal);
|
|
+ fs::write(&internal_path, contents).with_context(|| {
|
|
+ format!("failed to write baseline file {}", internal_path.display())
|
|
+ })?;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ // Track rename/move in current mapping if provided in an Update.
|
|
+ let move_path = match change {
|
|
+ FileChange::Update {
|
|
+ move_path: Some(dest),
|
|
+ ..
|
|
+ } => Some(dest),
|
|
+ _ => None,
|
|
+ };
|
|
+ if let Some(dest) = move_path {
|
|
+ let uuid_filename = match self.external_to_temp_name.get(path) {
|
|
+ Some(i) => i.clone(),
|
|
+ None => {
|
|
+ // This should be rare, but if we haven't mapped the source, create it with no baseline.
|
|
+ let i = uuid_filename_for(path);
|
|
+ self.external_to_temp_name.insert(path.clone(), i.clone());
|
|
+ self.temp_name_to_baseline_external
|
|
+ .insert(i.clone(), path.clone());
|
|
+ i
|
|
+ }
|
|
+ };
|
|
+ // Update current external mapping for temp file name.
|
|
+ self.temp_name_to_current_external
|
|
+ .insert(uuid_filename.clone(), dest.clone());
|
|
+ // Update forward file_mapping: external current -> internal name.
|
|
+ self.external_to_temp_name.remove(path);
|
|
+ self.external_to_temp_name
|
|
+ .insert(dest.clone(), uuid_filename);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ Ok(())
|
|
+ }
|
|
+
|
|
+ /// Recompute the aggregated unified diff by comparing all baseline snapshots against
|
|
+ /// current files on disk using `git diff --no-index` and rewriting paths to external paths.
|
|
+ pub fn update_and_get_unified_diff(&mut self) -> Result<Option<String>> {
|
|
+ let baseline_dir = self.baseline_dir()?.to_path_buf();
|
|
+ let current_dir = baseline_dir.join("current");
|
|
+ if current_dir.exists() {
|
|
+ // Best-effort cleanup of previous run's mirror.
|
|
+ let _ = fs::remove_dir_all(¤t_dir);
|
|
+ }
|
|
+ fs::create_dir_all(¤t_dir).with_context(|| {
|
|
+ format!(
|
|
+ "failed to create current mirror dir {}",
|
|
+ current_dir.display()
|
|
+ )
|
|
+ })?;
|
|
+
|
|
+ let mut aggregated = String::new();
|
|
+
|
|
+ // Compute diffs per tracked internal file.
|
|
+ for (internal, baseline_external) in &self.temp_name_to_baseline_external {
|
|
+ let baseline_path = baseline_dir.join(internal);
|
|
+ let current_external = self
|
|
+ .temp_name_to_current_external
|
|
+ .get(internal)
|
|
+ .cloned()
|
|
+ .unwrap_or_else(|| baseline_external.clone());
|
|
+
|
|
+ let left_is_dev_null = !baseline_path.exists();
|
|
+ let right_exists = current_external.exists();
|
|
+
|
|
+ // Prepare right side mirror file if exists; otherwise use /dev/null for deletions.
|
|
+ let right_arg = if right_exists {
|
|
+ let mirror_path = current_dir.join(internal);
|
|
+ let contents = fs::read(¤t_external).with_context(|| {
|
|
+ format!(
|
|
+ "failed to read current file for diff {}",
|
|
+ current_external.display()
|
|
+ )
|
|
+ })?;
|
|
+ fs::write(&mirror_path, contents).with_context(|| {
|
|
+ format!(
|
|
+ "failed to write current mirror file {}",
|
|
+ mirror_path.display()
|
|
+ )
|
|
+ })?;
|
|
+ // Use relative path from baseline_dir (so headers say a/<uuid> b/current/<uuid>).
|
|
+ format!("current/{internal}")
|
|
+ } else {
|
|
+ // Deletion: right side is /dev/null to show proper deleted file diff.
|
|
+ "/dev/null".to_string()
|
|
+ };
|
|
+
|
|
+ // Prepare left arg: baseline file path or /dev/null for additions.
|
|
+ let left_arg = if left_is_dev_null {
|
|
+ "/dev/null".to_string()
|
|
+ } else {
|
|
+ internal.clone()
|
|
+ };
|
|
+
|
|
+ // Run git diff --no-index from baseline_dir to keep paths predictable.
|
|
+ let raw = run_git_allow_exit_codes(
|
|
+ &baseline_dir,
|
|
+ &[
|
|
+ "-c",
|
|
+ "color.ui=false",
|
|
+ "diff",
|
|
+ "--no-color",
|
|
+ "--no-index",
|
|
+ "--",
|
|
+ &left_arg,
|
|
+ &right_arg,
|
|
+ ],
|
|
+ &[0, 1], // 0: no changes, 1: differences
|
|
+ )?;
|
|
+
|
|
+ if raw.trim().is_empty() {
|
|
+ continue;
|
|
+ }
|
|
+ let rewritten = self.rewrite_diff_paths(&raw);
|
|
+ if !rewritten.trim().is_empty() {
|
|
+ if !aggregated.is_empty() && !aggregated.ends_with('\n') {
|
|
+ aggregated.push('\n');
|
|
+ }
|
|
+ aggregated.push_str(&rewritten);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ self.unified_diff = if aggregated.trim().is_empty() {
|
|
+ None
|
|
+ } else {
|
|
+ Some(aggregated)
|
|
+ };
|
|
+
|
|
+ // Clean up the current dir.
|
|
+ let _ = fs::remove_dir_all(¤t_dir);
|
|
+
|
|
+ Ok(self.unified_diff.clone())
|
|
+ }
|
|
+
|
|
+ fn baseline_dir(&self) -> Result<&Path> {
|
|
+ self.baseline_files_dir
|
|
+ .as_ref()
|
|
+ .map(|d| d.path())
|
|
+ .ok_or_else(|| anyhow::anyhow!("baseline temp dir not initialized"))
|
|
+ }
|
|
+
|
|
+ fn ensure_baseline_dir(&mut self) -> Result<()> {
|
|
+ if self.baseline_files_dir.is_some() {
|
|
+ return Ok(());
|
|
+ }
|
|
+ let tmp = TempDir::new().context("create baseline temp dir")?;
|
|
+ self.baseline_files_dir = Some(tmp);
|
|
+ Ok(())
|
|
+ }
|
|
+
|
|
+ /// Rewrites the internal filenames to external paths in diff headers.
|
|
+ /// Handles inputs like:
|
|
+ /// diff --git a/<uuid> b/current/<uuid>
|
|
+ /// --- a/<uuid> | /dev/null
|
|
+ /// +++ b/current/<uuid> | /dev/null
|
|
+ /// and replaces uuid with the external paths tracking baseline/current.
|
|
+ fn rewrite_diff_paths(&self, diff: &str) -> String {
|
|
+ let mut out = String::new();
|
|
+ for line in diff.lines() {
|
|
+ if let Some(rest) = line.strip_prefix("diff --git ") {
|
|
+ // Format: diff --git a/<f> b/<f>
|
|
+ let parts: Vec<&str> = rest.split_whitespace().collect();
|
|
+ if parts.len() == 2 {
|
|
+ let a = parts[0].strip_prefix("a/").unwrap_or(parts[0]);
|
|
+ let b = parts[1].strip_prefix("b/").unwrap_or(parts[1]);
|
|
+
|
|
+ let a_ext_display = if a == "/dev/null" {
|
|
+ "/dev/null".to_string()
|
|
+ } else {
|
|
+ let a_base = Path::new(a)
|
|
+ .file_name()
|
|
+ .and_then(|s| s.to_str())
|
|
+ .unwrap_or(a);
|
|
+ let mapped = self
|
|
+ .temp_name_to_baseline_external
|
|
+ .get(a_base)
|
|
+ .cloned()
|
|
+ .unwrap_or_else(|| PathBuf::from(a));
|
|
+ mapped.display().to_string()
|
|
+ };
|
|
+
|
|
+ let b_ext_display = if b == "/dev/null" {
|
|
+ "/dev/null".to_string()
|
|
+ } else {
|
|
+ let b_base = Path::new(b)
|
|
+ .file_name()
|
|
+ .and_then(|s| s.to_str())
|
|
+ .unwrap_or(b);
|
|
+ let mapped = self
|
|
+ .temp_name_to_current_external
|
|
+ .get(b_base)
|
|
+ .cloned()
|
|
+ .unwrap_or_else(|| PathBuf::from(b));
|
|
+ mapped.display().to_string()
|
|
+ };
|
|
+
|
|
+ out.push_str(&format!("diff --git a/{a_ext_display} b/{b_ext_display}\n"));
|
|
+ continue;
|
|
+ }
|
|
+ }
|
|
+ if let Some(rest) = line.strip_prefix("--- ") {
|
|
+ if let Some(path) = rest.strip_prefix("a/") {
|
|
+ let external_display = if path == "/dev/null" {
|
|
+ "/dev/null".to_string()
|
|
+ } else {
|
|
+ let p_base = Path::new(path)
|
|
+ .file_name()
|
|
+ .and_then(|s| s.to_str())
|
|
+ .unwrap_or(path);
|
|
+ self.temp_name_to_baseline_external
|
|
+ .get(p_base)
|
|
+ .cloned()
|
|
+ .unwrap_or_else(|| PathBuf::from(path))
|
|
+ .display()
|
|
+ .to_string()
|
|
+ };
|
|
+ out.push_str(&format!("--- {external_display}\n"));
|
|
+ continue;
|
|
+ }
|
|
+ }
|
|
+ if let Some(rest) = line.strip_prefix("+++ ") {
|
|
+ if let Some(path) = rest.strip_prefix("b/") {
|
|
+ let external_display = if path == "/dev/null" {
|
|
+ "/dev/null".to_string()
|
|
+ } else {
|
|
+ let p_base = Path::new(path)
|
|
+ .file_name()
|
|
+ .and_then(|s| s.to_str())
|
|
+ .unwrap_or(path);
|
|
+ self.temp_name_to_current_external
|
|
+ .get(p_base)
|
|
+ .cloned()
|
|
+ .unwrap_or_else(|| PathBuf::from(path))
|
|
+ .display()
|
|
+ .to_string()
|
|
+ };
|
|
+ out.push_str(&format!("+++ {external_display}\n"));
|
|
+ continue;
|
|
+ }
|
|
+ }
|
|
+ out.push_str(line);
|
|
+ out.push('\n');
|
|
+ }
|
|
+ out
|
|
+ }
|
|
+}
|
|
+
|
|
+fn uuid_filename_for(path: &Path) -> String {
|
|
+ let id = Uuid::new_v4().to_string();
|
|
+ match path.extension().and_then(|e| e.to_str()) {
|
|
+ Some(ext) if !ext.is_empty() => format!("{id}.{ext}"),
|
|
+ _ => id,
|
|
+ }
|
|
+}
|
|
+
|
|
+fn run_git_allow_exit_codes(
|
|
```
|
|
|
|
> From https://github.com/openai/codex/pull/1747 I would include:
|
|
>
|
|
> ```rust
|
|
> let envs = vec![
|
|
> ("GIT_CONFIG_GLOBAL", "/dev/null"),
|
|
> ("GIT_CONFIG_NOSYSTEM", "1"),
|
|
> ];
|
|
> ```
|
|
|
|
- Created: 2025-08-01 17:20:55 UTC | Link: https://github.com/openai/codex/pull/1770#discussion_r2248489326
|
|
|
|
```diff
|
|
@@ -0,0 +1,476 @@
|
|
+use std::collections::HashMap;
|
|
+use std::fs;
|
|
+use std::path::Path;
|
|
+use std::path::PathBuf;
|
|
+use std::process::Command;
|
|
+
|
|
+use anyhow::Context;
|
|
+use anyhow::Result;
|
|
+use tempfile::TempDir;
|
|
+use uuid::Uuid;
|
|
+
|
|
+use crate::protocol::FileChange;
|
|
+
|
|
+/// Tracks sets of changes to files and exposes the overall unified diff.
|
|
+/// Internally, the way this works is now:
|
|
+/// 1. Create a temp directory to store baseline snapshots of files when they are first seen.
|
|
+/// 2. When a path is first observed, copy its current contents into the baseline dir if it exists on disk.
|
|
+/// For new additions, do not create a baseline file so that diffs are shown as proper additions (using /dev/null).
|
|
+/// 3. Keep a stable internal filename (uuid + same extension) per external path for path rewrite in diffs.
|
|
+/// 4. To compute the aggregated unified diff, compare each baseline snapshot to the current file on disk using
|
|
+/// `git diff --no-index` and rewrite paths to external paths.
|
|
+#[derive(Default)]
|
|
+pub struct TurnDiffTracker {
|
|
+ /// Temp directory holding baseline snapshots of files as first seen.
|
|
+ baseline_files_dir: Option<TempDir>,
|
|
+ /// Map external path -> internal filename (uuid + same extension).
|
|
+ external_to_temp_name: HashMap<PathBuf, String>,
|
|
+ /// Internal filename -> external path as of baseline snapshot.
|
|
+ temp_name_to_baseline_external: HashMap<String, PathBuf>,
|
|
+ /// Internal filename -> external path as of current accumulated state (after applying all changes).
|
|
+ /// This is where renames are tracked.
|
|
+ temp_name_to_current_external: HashMap<String, PathBuf>,
|
|
+ /// Aggregated unified diff for all accumulated changes across files.
|
|
+ pub unified_diff: Option<String>,
|
|
+}
|
|
+
|
|
+impl TurnDiffTracker {
|
|
+ pub fn new() -> Self {
|
|
+ Self::default()
|
|
+ }
|
|
+
|
|
+ /// Front-run apply patch calls to track the starting contents of any modified files.
|
|
+ /// - Creates a baseline snapshot for files that already exist on disk when first seen.
|
|
+ /// - For additions, we intentionally do not create a baseline snapshot so that diffs are proper additions.
|
|
+ /// - Also updates internal mappings for move/rename events.
|
|
+ pub fn on_patch_begin(&mut self, changes: &HashMap<PathBuf, FileChange>) -> Result<()> {
|
|
+ self.ensure_baseline_dir()?;
|
|
+ let baseline_dir = self.baseline_dir()?.to_path_buf();
|
|
+
|
|
+ for (path, change) in changes.iter() {
|
|
+ // Ensure a stable internal filename exists for this external path.
|
|
+ if !self.external_to_temp_name.contains_key(path) {
|
|
+ let internal = uuid_filename_for(path);
|
|
+ self.external_to_temp_name
|
|
+ .insert(path.clone(), internal.clone());
|
|
+ self.temp_name_to_baseline_external
|
|
+ .insert(internal.clone(), path.clone());
|
|
+ self.temp_name_to_current_external
|
|
+ .insert(internal.clone(), path.clone());
|
|
+
|
|
+ // If the file exists on disk now, snapshot as baseline; else leave missing to represent /dev/null.
|
|
+ if path.exists() {
|
|
+ let contents = fs::read(path)
|
|
+ .with_context(|| format!("failed to read original {}", path.display()))?;
|
|
+ let internal_path = baseline_dir.join(&internal);
|
|
+ fs::write(&internal_path, contents).with_context(|| {
|
|
+ format!("failed to write baseline file {}", internal_path.display())
|
|
+ })?;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ // Track rename/move in current mapping if provided in an Update.
|
|
+ let move_path = match change {
|
|
+ FileChange::Update {
|
|
+ move_path: Some(dest),
|
|
+ ..
|
|
+ } => Some(dest),
|
|
+ _ => None,
|
|
+ };
|
|
+ if let Some(dest) = move_path {
|
|
+ let uuid_filename = match self.external_to_temp_name.get(path) {
|
|
+ Some(i) => i.clone(),
|
|
+ None => {
|
|
+ // This should be rare, but if we haven't mapped the source, create it with no baseline.
|
|
+ let i = uuid_filename_for(path);
|
|
+ self.external_to_temp_name.insert(path.clone(), i.clone());
|
|
+ self.temp_name_to_baseline_external
|
|
+ .insert(i.clone(), path.clone());
|
|
+ i
|
|
+ }
|
|
+ };
|
|
+ // Update current external mapping for temp file name.
|
|
+ self.temp_name_to_current_external
|
|
+ .insert(uuid_filename.clone(), dest.clone());
|
|
+ // Update forward file_mapping: external current -> internal name.
|
|
+ self.external_to_temp_name.remove(path);
|
|
+ self.external_to_temp_name
|
|
+ .insert(dest.clone(), uuid_filename);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ Ok(())
|
|
+ }
|
|
+
|
|
+ /// Recompute the aggregated unified diff by comparing all baseline snapshots against
|
|
```
|
|
|
|
> Can you expand this comment? I don't have a great mental model of the structure you're trying to set up for the ultimate `git diff` call.
|
|
>
|
|
> I want to understand why this isn't something simpler like `diff -u backed-up-file current-file`.
|
|
|
|
- Created: 2025-08-04 02:45:51 UTC | Link: https://github.com/openai/codex/pull/1770#discussion_r2250292032
|
|
|
|
```diff
|
|
@@ -0,0 +1,766 @@
|
|
+use std::collections::HashMap;
|
|
+use std::fs;
|
|
+use std::path::Path;
|
|
+use std::path::PathBuf;
|
|
+use std::process::Command;
|
|
+
|
|
+use anyhow::Context;
|
|
+use anyhow::Result;
|
|
+use anyhow::anyhow;
|
|
+use uuid::Uuid;
|
|
+
|
|
+use crate::protocol::FileChange;
|
|
+
|
|
+struct BaselineFileInfo {
|
|
+ path: Option<PathBuf>,
|
|
+ contents_bytes: Option<Vec<u8>>,
|
|
+ mode: Option<String>,
|
|
+ oid: Option<String>,
|
|
+}
|
|
+
|
|
+/// Tracks sets of changes to files and exposes the overall unified diff.
|
|
+/// Internally, the way this works is now:
|
|
+/// 1. Maintain an in-memory baseline snapshot of files when they are first seen.
|
|
+/// For new additions, do not create a baseline so that diffs are shown as proper additions (using /dev/null).
|
|
+/// 2. Keep a stable internal filename (uuid + same extension) per external path for rename tracking.
|
|
+/// 3. To compute the aggregated unified diff, compare each baseline snapshot to the current file on disk entirely in-memory
|
|
+/// using the `similar` crate and emit unified diffs with rewritten external paths.
|
|
+#[derive(Default)]
|
|
+pub struct TurnDiffTracker {
|
|
+ /// Map external path -> internal filename (uuid + same extension).
|
|
+ external_to_temp_name: HashMap<PathBuf, String>,
|
|
+ /// Internal filename -> baseline file info.
|
|
+ baseline_file_info: HashMap<String, BaselineFileInfo>,
|
|
+ /// Internal filename -> external path as of current accumulated state (after applying all changes).
|
|
+ /// This is where renames are tracked.
|
|
+ temp_name_to_current_path: HashMap<String, PathBuf>,
|
|
+ /// Cache of known git worktree roots to avoid repeated filesystem walks.
|
|
+ git_root_cache: Vec<PathBuf>,
|
|
+}
|
|
+
|
|
+impl TurnDiffTracker {
|
|
+ pub fn new() -> Self {
|
|
+ Self::default()
|
|
+ }
|
|
+
|
|
+ /// Front-run apply patch calls to track the starting contents of any modified files.
|
|
+ /// - Creates an in-memory baseline snapshot for files that already exist on disk when first seen.
|
|
+ /// - For additions, we intentionally do not create a baseline snapshot so that diffs are proper additions.
|
|
+ /// - Also updates internal mappings for move/rename events.
|
|
+ pub fn on_patch_begin(&mut self, changes: &HashMap<PathBuf, FileChange>) -> Result<()> {
|
|
+ for (path, change) in changes.iter() {
|
|
+ // Ensure a stable internal filename exists for this external path.
|
|
+ if !self.external_to_temp_name.contains_key(path) {
|
|
+ let internal = uuid_filename_for(path);
|
|
+ self.external_to_temp_name
|
|
+ .insert(path.clone(), internal.clone());
|
|
+ self.temp_name_to_current_path
|
|
+ .insert(internal.clone(), path.clone());
|
|
+
|
|
+ // If the file exists on disk now, snapshot as baseline; else leave missing to represent /dev/null.
|
|
+ let (contents_bytes, mode, oid) = if path.exists() {
|
|
+ let mode = file_mode_for_path(path);
|
|
+ let mode_str = mode.as_deref().unwrap_or(REGULAR_MODE);
|
|
+ let contents_bytes = blob_bytes(path, mode_str)
|
|
+ .unwrap_or_default()
|
|
+ .unwrap_or_default();
|
|
+ let oid = if mode.as_deref() == Some(SYMLINK_MODE) {
|
|
+ git_blob_sha1_hex_bytes(&contents_bytes)
|
|
+ } else {
|
|
+ self.git_blob_oid_for_path(path)
|
|
+ .unwrap_or_else(|| git_blob_sha1_hex_bytes(&contents_bytes))
|
|
+ };
|
|
+ (Some(contents_bytes), mode, Some(oid))
|
|
+ } else {
|
|
+ (None, None, Some(ZERO_OID.to_string()))
|
|
+ };
|
|
+
|
|
+ self.baseline_file_info.insert(
|
|
+ internal.clone(),
|
|
+ BaselineFileInfo {
|
|
+ path: Some(path.clone()),
|
|
+ contents_bytes,
|
|
+ mode,
|
|
+ oid,
|
|
+ },
|
|
+ );
|
|
+ }
|
|
+
|
|
+ // Track rename/move in current mapping if provided in an Update.
|
|
+ if let FileChange::Update {
|
|
+ move_path: Some(dest),
|
|
+ ..
|
|
+ } = change
|
|
+ {
|
|
+ let uuid_filename = match self.external_to_temp_name.get(path) {
|
|
+ Some(i) => i.clone(),
|
|
+ None => {
|
|
+ // This should be rare, but if we haven't mapped the source, create it with no baseline.
|
|
+ let i = uuid_filename_for(path);
|
|
+ self.baseline_file_info.insert(
|
|
+ i.clone(),
|
|
+ BaselineFileInfo {
|
|
+ path: Some(path.clone()),
|
|
+ contents_bytes: None,
|
|
+ mode: None,
|
|
+ oid: Some(ZERO_OID.to_string()),
|
|
+ },
|
|
+ );
|
|
+ i
|
|
+ }
|
|
+ };
|
|
+ // Update current external mapping for temp file name.
|
|
+ self.temp_name_to_current_path
|
|
+ .insert(uuid_filename.clone(), dest.clone());
|
|
+ // Update forward file_mapping: external current -> internal name.
|
|
+ self.external_to_temp_name.remove(path);
|
|
+ self.external_to_temp_name
|
|
+ .insert(dest.clone(), uuid_filename);
|
|
+ };
|
|
+ }
|
|
+
|
|
+ Ok(())
|
|
```
|
|
|
|
> I don't see a `?` or a place where the `Err` variant is constructed, so does this need to return `Result`?
|
|
|
|
- Created: 2025-08-04 02:57:10 UTC | Link: https://github.com/openai/codex/pull/1770#discussion_r2250301151
|
|
|
|
```diff
|
|
@@ -0,0 +1,766 @@
|
|
+use std::collections::HashMap;
|
|
+use std::fs;
|
|
+use std::path::Path;
|
|
+use std::path::PathBuf;
|
|
+use std::process::Command;
|
|
+
|
|
+use anyhow::Context;
|
|
+use anyhow::Result;
|
|
+use anyhow::anyhow;
|
|
+use uuid::Uuid;
|
|
+
|
|
+use crate::protocol::FileChange;
|
|
+
|
|
+struct BaselineFileInfo {
|
|
+ path: Option<PathBuf>,
|
|
+ contents_bytes: Option<Vec<u8>>,
|
|
```
|
|
|
|
> I think the field name `file_contents` implies bytes given the type.
|
|
|
|
- Created: 2025-08-04 02:57:49 UTC | Link: https://github.com/openai/codex/pull/1770#discussion_r2250301670
|
|
|
|
```diff
|
|
@@ -0,0 +1,766 @@
|
|
+use std::collections::HashMap;
|
|
+use std::fs;
|
|
+use std::path::Path;
|
|
+use std::path::PathBuf;
|
|
+use std::process::Command;
|
|
+
|
|
+use anyhow::Context;
|
|
+use anyhow::Result;
|
|
+use anyhow::anyhow;
|
|
+use uuid::Uuid;
|
|
+
|
|
+use crate::protocol::FileChange;
|
|
+
|
|
+struct BaselineFileInfo {
|
|
+ path: Option<PathBuf>,
|
|
+ contents_bytes: Option<Vec<u8>>,
|
|
+ mode: Option<String>,
|
|
```
|
|
|
|
> I think an `enum` instead of a `String` would be clearer here.
|
|
|
|
- Created: 2025-08-04 02:58:20 UTC | Link: https://github.com/openai/codex/pull/1770#discussion_r2250302031
|
|
|
|
```diff
|
|
@@ -0,0 +1,766 @@
|
|
+use std::collections::HashMap;
|
|
+use std::fs;
|
|
+use std::path::Path;
|
|
+use std::path::PathBuf;
|
|
+use std::process::Command;
|
|
+
|
|
+use anyhow::Context;
|
|
+use anyhow::Result;
|
|
+use anyhow::anyhow;
|
|
+use uuid::Uuid;
|
|
+
|
|
+use crate::protocol::FileChange;
|
|
+
|
|
+struct BaselineFileInfo {
|
|
+ path: Option<PathBuf>,
|
|
```
|
|
|
|
> Add a comment since it is surprising that `path` could be `None`?
|
|
|
|
- Created: 2025-08-04 02:59:46 UTC | Link: https://github.com/openai/codex/pull/1770#discussion_r2250303137
|
|
|
|
```diff
|
|
@@ -0,0 +1,766 @@
|
|
+use std::collections::HashMap;
|
|
+use std::fs;
|
|
+use std::path::Path;
|
|
+use std::path::PathBuf;
|
|
+use std::process::Command;
|
|
+
|
|
+use anyhow::Context;
|
|
+use anyhow::Result;
|
|
+use anyhow::anyhow;
|
|
+use uuid::Uuid;
|
|
+
|
|
+use crate::protocol::FileChange;
|
|
+
|
|
+struct BaselineFileInfo {
|
|
+ path: Option<PathBuf>,
|
|
+ contents_bytes: Option<Vec<u8>>,
|
|
+ mode: Option<String>,
|
|
+ oid: Option<String>,
|
|
+}
|
|
+
|
|
+/// Tracks sets of changes to files and exposes the overall unified diff.
|
|
+/// Internally, the way this works is now:
|
|
+/// 1. Maintain an in-memory baseline snapshot of files when they are first seen.
|
|
+/// For new additions, do not create a baseline so that diffs are shown as proper additions (using /dev/null).
|
|
+/// 2. Keep a stable internal filename (uuid + same extension) per external path for rename tracking.
|
|
+/// 3. To compute the aggregated unified diff, compare each baseline snapshot to the current file on disk entirely in-memory
|
|
+/// using the `similar` crate and emit unified diffs with rewritten external paths.
|
|
+#[derive(Default)]
|
|
+pub struct TurnDiffTracker {
|
|
+ /// Map external path -> internal filename (uuid + same extension).
|
|
+ external_to_temp_name: HashMap<PathBuf, String>,
|
|
+ /// Internal filename -> baseline file info.
|
|
+ baseline_file_info: HashMap<String, BaselineFileInfo>,
|
|
+ /// Internal filename -> external path as of current accumulated state (after applying all changes).
|
|
+ /// This is where renames are tracked.
|
|
+ temp_name_to_current_path: HashMap<String, PathBuf>,
|
|
+ /// Cache of known git worktree roots to avoid repeated filesystem walks.
|
|
+ git_root_cache: Vec<PathBuf>,
|
|
+}
|
|
+
|
|
+impl TurnDiffTracker {
|
|
+ pub fn new() -> Self {
|
|
+ Self::default()
|
|
+ }
|
|
+
|
|
+ /// Front-run apply patch calls to track the starting contents of any modified files.
|
|
+ /// - Creates an in-memory baseline snapshot for files that already exist on disk when first seen.
|
|
+ /// - For additions, we intentionally do not create a baseline snapshot so that diffs are proper additions.
|
|
+ /// - Also updates internal mappings for move/rename events.
|
|
+ pub fn on_patch_begin(&mut self, changes: &HashMap<PathBuf, FileChange>) -> Result<()> {
|
|
+ for (path, change) in changes.iter() {
|
|
+ // Ensure a stable internal filename exists for this external path.
|
|
+ if !self.external_to_temp_name.contains_key(path) {
|
|
+ let internal = uuid_filename_for(path);
|
|
+ self.external_to_temp_name
|
|
+ .insert(path.clone(), internal.clone());
|
|
+ self.temp_name_to_current_path
|
|
+ .insert(internal.clone(), path.clone());
|
|
+
|
|
+ // If the file exists on disk now, snapshot as baseline; else leave missing to represent /dev/null.
|
|
+ let (contents_bytes, mode, oid) = if path.exists() {
|
|
+ let mode = file_mode_for_path(path);
|
|
+ let mode_str = mode.as_deref().unwrap_or(REGULAR_MODE);
|
|
```
|
|
|
|
> I `mode` becomes an `enum`, using `match` would be cleaner here.
|
|
|
|
- Created: 2025-08-04 03:00:33 UTC | Link: https://github.com/openai/codex/pull/1770#discussion_r2250303694
|
|
|
|
```diff
|
|
@@ -0,0 +1,766 @@
|
|
+use std::collections::HashMap;
|
|
+use std::fs;
|
|
+use std::path::Path;
|
|
+use std::path::PathBuf;
|
|
+use std::process::Command;
|
|
+
|
|
+use anyhow::Context;
|
|
+use anyhow::Result;
|
|
+use anyhow::anyhow;
|
|
+use uuid::Uuid;
|
|
+
|
|
+use crate::protocol::FileChange;
|
|
+
|
|
+struct BaselineFileInfo {
|
|
+ path: Option<PathBuf>,
|
|
```
|
|
|
|
> I don't see a case where it is `None` in the code, but maybe I'm missing something?
|
|
|
|
- Created: 2025-08-04 03:03:14 UTC | Link: https://github.com/openai/codex/pull/1770#discussion_r2250305839
|
|
|
|
```diff
|
|
@@ -0,0 +1,766 @@
|
|
+use std::collections::HashMap;
|
|
+use std::fs;
|
|
+use std::path::Path;
|
|
+use std::path::PathBuf;
|
|
+use std::process::Command;
|
|
+
|
|
+use anyhow::Context;
|
|
+use anyhow::Result;
|
|
+use anyhow::anyhow;
|
|
+use uuid::Uuid;
|
|
+
|
|
+use crate::protocol::FileChange;
|
|
+
|
|
+struct BaselineFileInfo {
|
|
+ path: Option<PathBuf>,
|
|
+ contents_bytes: Option<Vec<u8>>,
|
|
+ mode: Option<String>,
|
|
+ oid: Option<String>,
|
|
+}
|
|
+
|
|
+/// Tracks sets of changes to files and exposes the overall unified diff.
|
|
+/// Internally, the way this works is now:
|
|
+/// 1. Maintain an in-memory baseline snapshot of files when they are first seen.
|
|
+/// For new additions, do not create a baseline so that diffs are shown as proper additions (using /dev/null).
|
|
+/// 2. Keep a stable internal filename (uuid + same extension) per external path for rename tracking.
|
|
+/// 3. To compute the aggregated unified diff, compare each baseline snapshot to the current file on disk entirely in-memory
|
|
+/// using the `similar` crate and emit unified diffs with rewritten external paths.
|
|
+#[derive(Default)]
|
|
+pub struct TurnDiffTracker {
|
|
+ /// Map external path -> internal filename (uuid + same extension).
|
|
+ external_to_temp_name: HashMap<PathBuf, String>,
|
|
+ /// Internal filename -> baseline file info.
|
|
+ baseline_file_info: HashMap<String, BaselineFileInfo>,
|
|
+ /// Internal filename -> external path as of current accumulated state (after applying all changes).
|
|
+ /// This is where renames are tracked.
|
|
+ temp_name_to_current_path: HashMap<String, PathBuf>,
|
|
+ /// Cache of known git worktree roots to avoid repeated filesystem walks.
|
|
+ git_root_cache: Vec<PathBuf>,
|
|
+}
|
|
+
|
|
+impl TurnDiffTracker {
|
|
+ pub fn new() -> Self {
|
|
+ Self::default()
|
|
+ }
|
|
+
|
|
+ /// Front-run apply patch calls to track the starting contents of any modified files.
|
|
+ /// - Creates an in-memory baseline snapshot for files that already exist on disk when first seen.
|
|
+ /// - For additions, we intentionally do not create a baseline snapshot so that diffs are proper additions.
|
|
+ /// - Also updates internal mappings for move/rename events.
|
|
+ pub fn on_patch_begin(&mut self, changes: &HashMap<PathBuf, FileChange>) -> Result<()> {
|
|
+ for (path, change) in changes.iter() {
|
|
+ // Ensure a stable internal filename exists for this external path.
|
|
+ if !self.external_to_temp_name.contains_key(path) {
|
|
+ let internal = uuid_filename_for(path);
|
|
+ self.external_to_temp_name
|
|
+ .insert(path.clone(), internal.clone());
|
|
+ self.temp_name_to_current_path
|
|
+ .insert(internal.clone(), path.clone());
|
|
+
|
|
+ // If the file exists on disk now, snapshot as baseline; else leave missing to represent /dev/null.
|
|
+ let (contents_bytes, mode, oid) = if path.exists() {
|
|
+ let mode = file_mode_for_path(path);
|
|
+ let mode_str = mode.as_deref().unwrap_or(REGULAR_MODE);
|
|
+ let contents_bytes = blob_bytes(path, mode_str)
|
|
+ .unwrap_or_default()
|
|
+ .unwrap_or_default();
|
|
+ let oid = if mode.as_deref() == Some(SYMLINK_MODE) {
|
|
+ git_blob_sha1_hex_bytes(&contents_bytes)
|
|
+ } else {
|
|
+ self.git_blob_oid_for_path(path)
|
|
+ .unwrap_or_else(|| git_blob_sha1_hex_bytes(&contents_bytes))
|
|
+ };
|
|
+ (Some(contents_bytes), mode, Some(oid))
|
|
+ } else {
|
|
+ (None, None, Some(ZERO_OID.to_string()))
|
|
+ };
|
|
+
|
|
+ self.baseline_file_info.insert(
|
|
+ internal.clone(),
|
|
+ BaselineFileInfo {
|
|
+ path: Some(path.clone()),
|
|
+ contents_bytes,
|
|
+ mode,
|
|
+ oid,
|
|
+ },
|
|
+ );
|
|
+ }
|
|
+
|
|
+ // Track rename/move in current mapping if provided in an Update.
|
|
+ if let FileChange::Update {
|
|
+ move_path: Some(dest),
|
|
+ ..
|
|
+ } = change
|
|
+ {
|
|
+ let uuid_filename = match self.external_to_temp_name.get(path) {
|
|
+ Some(i) => i.clone(),
|
|
+ None => {
|
|
+ // This should be rare, but if we haven't mapped the source, create it with no baseline.
|
|
+ let i = uuid_filename_for(path);
|
|
+ self.baseline_file_info.insert(
|
|
+ i.clone(),
|
|
+ BaselineFileInfo {
|
|
+ path: Some(path.clone()),
|
|
+ contents_bytes: None,
|
|
+ mode: None,
|
|
+ oid: Some(ZERO_OID.to_string()),
|
|
+ },
|
|
+ );
|
|
+ i
|
|
+ }
|
|
+ };
|
|
+ // Update current external mapping for temp file name.
|
|
+ self.temp_name_to_current_path
|
|
+ .insert(uuid_filename.clone(), dest.clone());
|
|
+ // Update forward file_mapping: external current -> internal name.
|
|
+ self.external_to_temp_name.remove(path);
|
|
+ self.external_to_temp_name
|
|
+ .insert(dest.clone(), uuid_filename);
|
|
+ };
|
|
+ }
|
|
+
|
|
+ Ok(())
|
|
+ }
|
|
+
|
|
+ fn get_path_for_internal(&self, internal: &str) -> Option<PathBuf> {
|
|
+ self.temp_name_to_current_path
|
|
+ .get(internal)
|
|
+ .cloned()
|
|
+ .or_else(|| {
|
|
+ self.baseline_file_info
|
|
+ .get(internal)
|
|
+ .and_then(|info| info.path.clone())
|
|
+ })
|
|
+ }
|
|
+
|
|
+ /// Find the git worktree root for a file/directory by walking up to the first ancestor containing a `.git` entry.
|
|
+ /// Uses a simple cache of known roots and avoids negative-result caching for simplicity.
|
|
+ fn find_git_root_cached(&mut self, start: &Path) -> Option<PathBuf> {
|
|
+ let dir = if start.is_dir() {
|
|
+ start
|
|
+ } else {
|
|
+ start.parent()?
|
|
+ };
|
|
+
|
|
+ // Fast path: if any cached root is an ancestor of this path, use it.
|
|
+ if let Some(root) = self
|
|
+ .git_root_cache
|
|
+ .iter()
|
|
+ .find(|r| dir.starts_with(r))
|
|
+ .cloned()
|
|
+ {
|
|
+ return Some(root);
|
|
+ }
|
|
+
|
|
+ // Walk up to find a `.git` marker.
|
|
+ let mut cur = dir.to_path_buf();
|
|
+ loop {
|
|
+ let git_marker = cur.join(".git");
|
|
+ if git_marker.is_dir() || git_marker.is_file() {
|
|
+ if !self.git_root_cache.iter().any(|r| r == &cur) {
|
|
+ self.git_root_cache.push(cur.clone());
|
|
+ }
|
|
+ return Some(cur);
|
|
+ }
|
|
+
|
|
+ // On Windows, avoid walking above the drive or UNC share root.
|
|
+ #[cfg(windows)]
|
|
+ {
|
|
+ if is_windows_drive_or_unc_root(&cur) {
|
|
+ return None;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if let Some(parent) = cur.parent() {
|
|
+ cur = parent.to_path_buf();
|
|
+ } else {
|
|
+ return None;
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /// Return a display string for `path` relative to its git root if found, else absolute.
|
|
+ fn relative_to_git_root_str(&mut self, path: &Path) -> String {
|
|
+ let s = if let Some(root) = self.find_git_root_cached(path) {
|
|
+ if let Ok(rel) = path.strip_prefix(&root) {
|
|
+ rel.display().to_string()
|
|
+ } else {
|
|
+ path.display().to_string()
|
|
+ }
|
|
+ } else {
|
|
+ path.display().to_string()
|
|
+ };
|
|
+ s.replace('\\', "/")
|
|
+ }
|
|
+
|
|
+ /// Ask git to compute the blob SHA-1 for the file at `path` within its repository.
|
|
+ /// Returns None if no repository is found or git invocation fails.
|
|
+ fn git_blob_oid_for_path(&mut self, path: &Path) -> Option<String> {
|
|
+ let root = self.find_git_root_cached(path)?;
|
|
+ // Compute a path relative to the repo root for better portability across platforms.
|
|
+ let rel = path.strip_prefix(&root).unwrap_or(path);
|
|
+ let output = Command::new("git")
|
|
+ .arg("-C")
|
|
+ .arg(&root)
|
|
+ .arg("hash-object")
|
|
+ .arg("--")
|
|
+ .arg(rel)
|
|
+ .output()
|
|
+ .ok()?;
|
|
+ if !output.status.success() {
|
|
+ return None;
|
|
+ }
|
|
+ let s = String::from_utf8_lossy(&output.stdout).trim().to_string();
|
|
+ if s.len() == 40 { Some(s) } else { None }
|
|
```
|
|
|
|
> If it is not 40, is this an error / unexpected situation?
|
|
|
|
- Created: 2025-08-04 03:03:31 UTC | Link: https://github.com/openai/codex/pull/1770#discussion_r2250306088
|
|
|
|
```diff
|
|
@@ -0,0 +1,766 @@
|
|
+use std::collections::HashMap;
|
|
+use std::fs;
|
|
+use std::path::Path;
|
|
+use std::path::PathBuf;
|
|
+use std::process::Command;
|
|
+
|
|
+use anyhow::Context;
|
|
+use anyhow::Result;
|
|
+use anyhow::anyhow;
|
|
+use uuid::Uuid;
|
|
+
|
|
+use crate::protocol::FileChange;
|
|
+
|
|
+struct BaselineFileInfo {
|
|
+ path: Option<PathBuf>,
|
|
+ contents_bytes: Option<Vec<u8>>,
|
|
+ mode: Option<String>,
|
|
+ oid: Option<String>,
|
|
+}
|
|
+
|
|
+/// Tracks sets of changes to files and exposes the overall unified diff.
|
|
+/// Internally, the way this works is now:
|
|
+/// 1. Maintain an in-memory baseline snapshot of files when they are first seen.
|
|
+/// For new additions, do not create a baseline so that diffs are shown as proper additions (using /dev/null).
|
|
+/// 2. Keep a stable internal filename (uuid + same extension) per external path for rename tracking.
|
|
+/// 3. To compute the aggregated unified diff, compare each baseline snapshot to the current file on disk entirely in-memory
|
|
+/// using the `similar` crate and emit unified diffs with rewritten external paths.
|
|
+#[derive(Default)]
|
|
+pub struct TurnDiffTracker {
|
|
+ /// Map external path -> internal filename (uuid + same extension).
|
|
+ external_to_temp_name: HashMap<PathBuf, String>,
|
|
+ /// Internal filename -> baseline file info.
|
|
+ baseline_file_info: HashMap<String, BaselineFileInfo>,
|
|
+ /// Internal filename -> external path as of current accumulated state (after applying all changes).
|
|
+ /// This is where renames are tracked.
|
|
+ temp_name_to_current_path: HashMap<String, PathBuf>,
|
|
+ /// Cache of known git worktree roots to avoid repeated filesystem walks.
|
|
+ git_root_cache: Vec<PathBuf>,
|
|
+}
|
|
+
|
|
+impl TurnDiffTracker {
|
|
+ pub fn new() -> Self {
|
|
+ Self::default()
|
|
+ }
|
|
+
|
|
+ /// Front-run apply patch calls to track the starting contents of any modified files.
|
|
+ /// - Creates an in-memory baseline snapshot for files that already exist on disk when first seen.
|
|
+ /// - For additions, we intentionally do not create a baseline snapshot so that diffs are proper additions.
|
|
+ /// - Also updates internal mappings for move/rename events.
|
|
+ pub fn on_patch_begin(&mut self, changes: &HashMap<PathBuf, FileChange>) -> Result<()> {
|
|
+ for (path, change) in changes.iter() {
|
|
+ // Ensure a stable internal filename exists for this external path.
|
|
+ if !self.external_to_temp_name.contains_key(path) {
|
|
+ let internal = uuid_filename_for(path);
|
|
+ self.external_to_temp_name
|
|
+ .insert(path.clone(), internal.clone());
|
|
+ self.temp_name_to_current_path
|
|
+ .insert(internal.clone(), path.clone());
|
|
+
|
|
+ // If the file exists on disk now, snapshot as baseline; else leave missing to represent /dev/null.
|
|
+ let (contents_bytes, mode, oid) = if path.exists() {
|
|
+ let mode = file_mode_for_path(path);
|
|
+ let mode_str = mode.as_deref().unwrap_or(REGULAR_MODE);
|
|
+ let contents_bytes = blob_bytes(path, mode_str)
|
|
+ .unwrap_or_default()
|
|
+ .unwrap_or_default();
|
|
+ let oid = if mode.as_deref() == Some(SYMLINK_MODE) {
|
|
+ git_blob_sha1_hex_bytes(&contents_bytes)
|
|
+ } else {
|
|
+ self.git_blob_oid_for_path(path)
|
|
+ .unwrap_or_else(|| git_blob_sha1_hex_bytes(&contents_bytes))
|
|
+ };
|
|
+ (Some(contents_bytes), mode, Some(oid))
|
|
+ } else {
|
|
+ (None, None, Some(ZERO_OID.to_string()))
|
|
+ };
|
|
+
|
|
+ self.baseline_file_info.insert(
|
|
+ internal.clone(),
|
|
+ BaselineFileInfo {
|
|
+ path: Some(path.clone()),
|
|
+ contents_bytes,
|
|
+ mode,
|
|
+ oid,
|
|
+ },
|
|
+ );
|
|
+ }
|
|
+
|
|
+ // Track rename/move in current mapping if provided in an Update.
|
|
+ if let FileChange::Update {
|
|
+ move_path: Some(dest),
|
|
+ ..
|
|
+ } = change
|
|
+ {
|
|
+ let uuid_filename = match self.external_to_temp_name.get(path) {
|
|
+ Some(i) => i.clone(),
|
|
+ None => {
|
|
+ // This should be rare, but if we haven't mapped the source, create it with no baseline.
|
|
+ let i = uuid_filename_for(path);
|
|
+ self.baseline_file_info.insert(
|
|
+ i.clone(),
|
|
+ BaselineFileInfo {
|
|
+ path: Some(path.clone()),
|
|
+ contents_bytes: None,
|
|
+ mode: None,
|
|
+ oid: Some(ZERO_OID.to_string()),
|
|
+ },
|
|
+ );
|
|
+ i
|
|
+ }
|
|
+ };
|
|
+ // Update current external mapping for temp file name.
|
|
+ self.temp_name_to_current_path
|
|
+ .insert(uuid_filename.clone(), dest.clone());
|
|
+ // Update forward file_mapping: external current -> internal name.
|
|
+ self.external_to_temp_name.remove(path);
|
|
+ self.external_to_temp_name
|
|
+ .insert(dest.clone(), uuid_filename);
|
|
+ };
|
|
+ }
|
|
+
|
|
+ Ok(())
|
|
+ }
|
|
+
|
|
+ fn get_path_for_internal(&self, internal: &str) -> Option<PathBuf> {
|
|
+ self.temp_name_to_current_path
|
|
+ .get(internal)
|
|
+ .cloned()
|
|
+ .or_else(|| {
|
|
+ self.baseline_file_info
|
|
+ .get(internal)
|
|
+ .and_then(|info| info.path.clone())
|
|
+ })
|
|
+ }
|
|
+
|
|
+ /// Find the git worktree root for a file/directory by walking up to the first ancestor containing a `.git` entry.
|
|
+ /// Uses a simple cache of known roots and avoids negative-result caching for simplicity.
|
|
+ fn find_git_root_cached(&mut self, start: &Path) -> Option<PathBuf> {
|
|
+ let dir = if start.is_dir() {
|
|
+ start
|
|
+ } else {
|
|
+ start.parent()?
|
|
+ };
|
|
+
|
|
+ // Fast path: if any cached root is an ancestor of this path, use it.
|
|
+ if let Some(root) = self
|
|
+ .git_root_cache
|
|
+ .iter()
|
|
+ .find(|r| dir.starts_with(r))
|
|
+ .cloned()
|
|
+ {
|
|
+ return Some(root);
|
|
+ }
|
|
+
|
|
+ // Walk up to find a `.git` marker.
|
|
+ let mut cur = dir.to_path_buf();
|
|
+ loop {
|
|
+ let git_marker = cur.join(".git");
|
|
+ if git_marker.is_dir() || git_marker.is_file() {
|
|
+ if !self.git_root_cache.iter().any(|r| r == &cur) {
|
|
+ self.git_root_cache.push(cur.clone());
|
|
+ }
|
|
+ return Some(cur);
|
|
+ }
|
|
+
|
|
+ // On Windows, avoid walking above the drive or UNC share root.
|
|
+ #[cfg(windows)]
|
|
+ {
|
|
+ if is_windows_drive_or_unc_root(&cur) {
|
|
+ return None;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if let Some(parent) = cur.parent() {
|
|
+ cur = parent.to_path_buf();
|
|
+ } else {
|
|
+ return None;
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /// Return a display string for `path` relative to its git root if found, else absolute.
|
|
+ fn relative_to_git_root_str(&mut self, path: &Path) -> String {
|
|
+ let s = if let Some(root) = self.find_git_root_cached(path) {
|
|
+ if let Ok(rel) = path.strip_prefix(&root) {
|
|
+ rel.display().to_string()
|
|
+ } else {
|
|
+ path.display().to_string()
|
|
+ }
|
|
+ } else {
|
|
+ path.display().to_string()
|
|
+ };
|
|
+ s.replace('\\', "/")
|
|
+ }
|
|
+
|
|
+ /// Ask git to compute the blob SHA-1 for the file at `path` within its repository.
|
|
+ /// Returns None if no repository is found or git invocation fails.
|
|
+ fn git_blob_oid_for_path(&mut self, path: &Path) -> Option<String> {
|
|
+ let root = self.find_git_root_cached(path)?;
|
|
+ // Compute a path relative to the repo root for better portability across platforms.
|
|
+ let rel = path.strip_prefix(&root).unwrap_or(path);
|
|
+ let output = Command::new("git")
|
|
+ .arg("-C")
|
|
+ .arg(&root)
|
|
+ .arg("hash-object")
|
|
+ .arg("--")
|
|
+ .arg(rel)
|
|
+ .output()
|
|
+ .ok()?;
|
|
+ if !output.status.success() {
|
|
```
|
|
|
|
> Is this something expected or exceptional?
|
|
|
|
- Created: 2025-08-04 03:03:57 UTC | Link: https://github.com/openai/codex/pull/1770#discussion_r2250306484
|
|
|
|
```diff
|
|
@@ -0,0 +1,766 @@
|
|
+use std::collections::HashMap;
|
|
+use std::fs;
|
|
+use std::path::Path;
|
|
+use std::path::PathBuf;
|
|
+use std::process::Command;
|
|
+
|
|
+use anyhow::Context;
|
|
+use anyhow::Result;
|
|
+use anyhow::anyhow;
|
|
+use uuid::Uuid;
|
|
+
|
|
+use crate::protocol::FileChange;
|
|
+
|
|
+struct BaselineFileInfo {
|
|
+ path: Option<PathBuf>,
|
|
+ contents_bytes: Option<Vec<u8>>,
|
|
+ mode: Option<String>,
|
|
+ oid: Option<String>,
|
|
+}
|
|
+
|
|
+/// Tracks sets of changes to files and exposes the overall unified diff.
|
|
+/// Internally, the way this works is now:
|
|
+/// 1. Maintain an in-memory baseline snapshot of files when they are first seen.
|
|
+/// For new additions, do not create a baseline so that diffs are shown as proper additions (using /dev/null).
|
|
+/// 2. Keep a stable internal filename (uuid + same extension) per external path for rename tracking.
|
|
+/// 3. To compute the aggregated unified diff, compare each baseline snapshot to the current file on disk entirely in-memory
|
|
+/// using the `similar` crate and emit unified diffs with rewritten external paths.
|
|
+#[derive(Default)]
|
|
+pub struct TurnDiffTracker {
|
|
+ /// Map external path -> internal filename (uuid + same extension).
|
|
+ external_to_temp_name: HashMap<PathBuf, String>,
|
|
+ /// Internal filename -> baseline file info.
|
|
+ baseline_file_info: HashMap<String, BaselineFileInfo>,
|
|
+ /// Internal filename -> external path as of current accumulated state (after applying all changes).
|
|
+ /// This is where renames are tracked.
|
|
+ temp_name_to_current_path: HashMap<String, PathBuf>,
|
|
+ /// Cache of known git worktree roots to avoid repeated filesystem walks.
|
|
+ git_root_cache: Vec<PathBuf>,
|
|
+}
|
|
+
|
|
+impl TurnDiffTracker {
|
|
+ pub fn new() -> Self {
|
|
+ Self::default()
|
|
+ }
|
|
+
|
|
+ /// Front-run apply patch calls to track the starting contents of any modified files.
|
|
+ /// - Creates an in-memory baseline snapshot for files that already exist on disk when first seen.
|
|
+ /// - For additions, we intentionally do not create a baseline snapshot so that diffs are proper additions.
|
|
+ /// - Also updates internal mappings for move/rename events.
|
|
+ pub fn on_patch_begin(&mut self, changes: &HashMap<PathBuf, FileChange>) -> Result<()> {
|
|
+ for (path, change) in changes.iter() {
|
|
+ // Ensure a stable internal filename exists for this external path.
|
|
+ if !self.external_to_temp_name.contains_key(path) {
|
|
+ let internal = uuid_filename_for(path);
|
|
+ self.external_to_temp_name
|
|
+ .insert(path.clone(), internal.clone());
|
|
+ self.temp_name_to_current_path
|
|
+ .insert(internal.clone(), path.clone());
|
|
+
|
|
+ // If the file exists on disk now, snapshot as baseline; else leave missing to represent /dev/null.
|
|
+ let (contents_bytes, mode, oid) = if path.exists() {
|
|
+ let mode = file_mode_for_path(path);
|
|
+ let mode_str = mode.as_deref().unwrap_or(REGULAR_MODE);
|
|
+ let contents_bytes = blob_bytes(path, mode_str)
|
|
+ .unwrap_or_default()
|
|
+ .unwrap_or_default();
|
|
+ let oid = if mode.as_deref() == Some(SYMLINK_MODE) {
|
|
+ git_blob_sha1_hex_bytes(&contents_bytes)
|
|
+ } else {
|
|
+ self.git_blob_oid_for_path(path)
|
|
+ .unwrap_or_else(|| git_blob_sha1_hex_bytes(&contents_bytes))
|
|
+ };
|
|
+ (Some(contents_bytes), mode, Some(oid))
|
|
+ } else {
|
|
+ (None, None, Some(ZERO_OID.to_string()))
|
|
+ };
|
|
+
|
|
+ self.baseline_file_info.insert(
|
|
+ internal.clone(),
|
|
+ BaselineFileInfo {
|
|
+ path: Some(path.clone()),
|
|
+ contents_bytes,
|
|
+ mode,
|
|
+ oid,
|
|
+ },
|
|
+ );
|
|
+ }
|
|
+
|
|
+ // Track rename/move in current mapping if provided in an Update.
|
|
+ if let FileChange::Update {
|
|
+ move_path: Some(dest),
|
|
+ ..
|
|
+ } = change
|
|
+ {
|
|
+ let uuid_filename = match self.external_to_temp_name.get(path) {
|
|
+ Some(i) => i.clone(),
|
|
+ None => {
|
|
+ // This should be rare, but if we haven't mapped the source, create it with no baseline.
|
|
+ let i = uuid_filename_for(path);
|
|
+ self.baseline_file_info.insert(
|
|
+ i.clone(),
|
|
+ BaselineFileInfo {
|
|
+ path: Some(path.clone()),
|
|
+ contents_bytes: None,
|
|
+ mode: None,
|
|
+ oid: Some(ZERO_OID.to_string()),
|
|
+ },
|
|
+ );
|
|
+ i
|
|
+ }
|
|
+ };
|
|
+ // Update current external mapping for temp file name.
|
|
+ self.temp_name_to_current_path
|
|
+ .insert(uuid_filename.clone(), dest.clone());
|
|
+ // Update forward file_mapping: external current -> internal name.
|
|
+ self.external_to_temp_name.remove(path);
|
|
+ self.external_to_temp_name
|
|
+ .insert(dest.clone(), uuid_filename);
|
|
+ };
|
|
+ }
|
|
+
|
|
+ Ok(())
|
|
+ }
|
|
+
|
|
+ fn get_path_for_internal(&self, internal: &str) -> Option<PathBuf> {
|
|
+ self.temp_name_to_current_path
|
|
+ .get(internal)
|
|
+ .cloned()
|
|
+ .or_else(|| {
|
|
+ self.baseline_file_info
|
|
+ .get(internal)
|
|
+ .and_then(|info| info.path.clone())
|
|
+ })
|
|
+ }
|
|
+
|
|
+ /// Find the git worktree root for a file/directory by walking up to the first ancestor containing a `.git` entry.
|
|
+ /// Uses a simple cache of known roots and avoids negative-result caching for simplicity.
|
|
+ fn find_git_root_cached(&mut self, start: &Path) -> Option<PathBuf> {
|
|
+ let dir = if start.is_dir() {
|
|
+ start
|
|
+ } else {
|
|
+ start.parent()?
|
|
+ };
|
|
+
|
|
+ // Fast path: if any cached root is an ancestor of this path, use it.
|
|
+ if let Some(root) = self
|
|
+ .git_root_cache
|
|
+ .iter()
|
|
+ .find(|r| dir.starts_with(r))
|
|
+ .cloned()
|
|
+ {
|
|
+ return Some(root);
|
|
+ }
|
|
+
|
|
+ // Walk up to find a `.git` marker.
|
|
+ let mut cur = dir.to_path_buf();
|
|
+ loop {
|
|
+ let git_marker = cur.join(".git");
|
|
+ if git_marker.is_dir() || git_marker.is_file() {
|
|
+ if !self.git_root_cache.iter().any(|r| r == &cur) {
|
|
+ self.git_root_cache.push(cur.clone());
|
|
+ }
|
|
+ return Some(cur);
|
|
+ }
|
|
+
|
|
+ // On Windows, avoid walking above the drive or UNC share root.
|
|
+ #[cfg(windows)]
|
|
+ {
|
|
+ if is_windows_drive_or_unc_root(&cur) {
|
|
+ return None;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if let Some(parent) = cur.parent() {
|
|
+ cur = parent.to_path_buf();
|
|
+ } else {
|
|
+ return None;
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /// Return a display string for `path` relative to its git root if found, else absolute.
|
|
+ fn relative_to_git_root_str(&mut self, path: &Path) -> String {
|
|
+ let s = if let Some(root) = self.find_git_root_cached(path) {
|
|
+ if let Ok(rel) = path.strip_prefix(&root) {
|
|
+ rel.display().to_string()
|
|
+ } else {
|
|
+ path.display().to_string()
|
|
+ }
|
|
+ } else {
|
|
+ path.display().to_string()
|
|
+ };
|
|
+ s.replace('\\', "/")
|
|
+ }
|
|
+
|
|
+ /// Ask git to compute the blob SHA-1 for the file at `path` within its repository.
|
|
+ /// Returns None if no repository is found or git invocation fails.
|
|
+ fn git_blob_oid_for_path(&mut self, path: &Path) -> Option<String> {
|
|
+ let root = self.find_git_root_cached(path)?;
|
|
+ // Compute a path relative to the repo root for better portability across platforms.
|
|
+ let rel = path.strip_prefix(&root).unwrap_or(path);
|
|
+ let output = Command::new("git")
|
|
+ .arg("-C")
|
|
+ .arg(&root)
|
|
+ .arg("hash-object")
|
|
+ .arg("--")
|
|
+ .arg(rel)
|
|
+ .output()
|
|
+ .ok()?;
|
|
+ if !output.status.success() {
|
|
+ return None;
|
|
+ }
|
|
+ let s = String::from_utf8_lossy(&output.stdout).trim().to_string();
|
|
+ if s.len() == 40 { Some(s) } else { None }
|
|
+ }
|
|
+
|
|
+ /// Recompute the aggregated unified diff by comparing all of the in-memory snapshots that were
|
|
+ /// collected before the first time they were touched by apply_patch during this turn with
|
|
+ /// the current repo state.
|
|
+ pub fn get_unified_diff(&mut self) -> Result<Option<String>> {
|
|
+ let mut aggregated = String::new();
|
|
+
|
|
+ // Compute diffs per tracked internal file in a stable order by external path.
|
|
+ let mut baseline_file_names: Vec<String> =
|
|
+ self.baseline_file_info.keys().cloned().collect();
|
|
+ // Sort lexicographically by full repo-relative path to match git behavior.
|
|
+ baseline_file_names.sort_by_key(|internal| {
|
|
```
|
|
|
|
> 👍
|
|
|
|
- Created: 2025-08-04 03:05:24 UTC | Link: https://github.com/openai/codex/pull/1770#discussion_r2250307567
|
|
|
|
```diff
|
|
@@ -0,0 +1,766 @@
|
|
+use std::collections::HashMap;
|
|
+use std::fs;
|
|
+use std::path::Path;
|
|
+use std::path::PathBuf;
|
|
+use std::process::Command;
|
|
+
|
|
+use anyhow::Context;
|
|
+use anyhow::Result;
|
|
+use anyhow::anyhow;
|
|
+use uuid::Uuid;
|
|
+
|
|
+use crate::protocol::FileChange;
|
|
+
|
|
+struct BaselineFileInfo {
|
|
+ path: Option<PathBuf>,
|
|
+ contents_bytes: Option<Vec<u8>>,
|
|
+ mode: Option<String>,
|
|
+ oid: Option<String>,
|
|
+}
|
|
+
|
|
+/// Tracks sets of changes to files and exposes the overall unified diff.
|
|
+/// Internally, the way this works is now:
|
|
+/// 1. Maintain an in-memory baseline snapshot of files when they are first seen.
|
|
+/// For new additions, do not create a baseline so that diffs are shown as proper additions (using /dev/null).
|
|
+/// 2. Keep a stable internal filename (uuid + same extension) per external path for rename tracking.
|
|
+/// 3. To compute the aggregated unified diff, compare each baseline snapshot to the current file on disk entirely in-memory
|
|
+/// using the `similar` crate and emit unified diffs with rewritten external paths.
|
|
+#[derive(Default)]
|
|
+pub struct TurnDiffTracker {
|
|
+ /// Map external path -> internal filename (uuid + same extension).
|
|
+ external_to_temp_name: HashMap<PathBuf, String>,
|
|
+ /// Internal filename -> baseline file info.
|
|
+ baseline_file_info: HashMap<String, BaselineFileInfo>,
|
|
+ /// Internal filename -> external path as of current accumulated state (after applying all changes).
|
|
+ /// This is where renames are tracked.
|
|
+ temp_name_to_current_path: HashMap<String, PathBuf>,
|
|
+ /// Cache of known git worktree roots to avoid repeated filesystem walks.
|
|
+ git_root_cache: Vec<PathBuf>,
|
|
+}
|
|
+
|
|
+impl TurnDiffTracker {
|
|
+ pub fn new() -> Self {
|
|
+ Self::default()
|
|
+ }
|
|
+
|
|
+ /// Front-run apply patch calls to track the starting contents of any modified files.
|
|
+ /// - Creates an in-memory baseline snapshot for files that already exist on disk when first seen.
|
|
+ /// - For additions, we intentionally do not create a baseline snapshot so that diffs are proper additions.
|
|
+ /// - Also updates internal mappings for move/rename events.
|
|
+ pub fn on_patch_begin(&mut self, changes: &HashMap<PathBuf, FileChange>) -> Result<()> {
|
|
+ for (path, change) in changes.iter() {
|
|
+ // Ensure a stable internal filename exists for this external path.
|
|
+ if !self.external_to_temp_name.contains_key(path) {
|
|
+ let internal = uuid_filename_for(path);
|
|
+ self.external_to_temp_name
|
|
+ .insert(path.clone(), internal.clone());
|
|
+ self.temp_name_to_current_path
|
|
+ .insert(internal.clone(), path.clone());
|
|
+
|
|
+ // If the file exists on disk now, snapshot as baseline; else leave missing to represent /dev/null.
|
|
+ let (contents_bytes, mode, oid) = if path.exists() {
|
|
+ let mode = file_mode_for_path(path);
|
|
+ let mode_str = mode.as_deref().unwrap_or(REGULAR_MODE);
|
|
+ let contents_bytes = blob_bytes(path, mode_str)
|
|
+ .unwrap_or_default()
|
|
+ .unwrap_or_default();
|
|
+ let oid = if mode.as_deref() == Some(SYMLINK_MODE) {
|
|
+ git_blob_sha1_hex_bytes(&contents_bytes)
|
|
+ } else {
|
|
+ self.git_blob_oid_for_path(path)
|
|
+ .unwrap_or_else(|| git_blob_sha1_hex_bytes(&contents_bytes))
|
|
+ };
|
|
+ (Some(contents_bytes), mode, Some(oid))
|
|
+ } else {
|
|
+ (None, None, Some(ZERO_OID.to_string()))
|
|
+ };
|
|
+
|
|
+ self.baseline_file_info.insert(
|
|
+ internal.clone(),
|
|
+ BaselineFileInfo {
|
|
+ path: Some(path.clone()),
|
|
+ contents_bytes,
|
|
+ mode,
|
|
+ oid,
|
|
+ },
|
|
+ );
|
|
+ }
|
|
+
|
|
+ // Track rename/move in current mapping if provided in an Update.
|
|
+ if let FileChange::Update {
|
|
+ move_path: Some(dest),
|
|
+ ..
|
|
+ } = change
|
|
+ {
|
|
+ let uuid_filename = match self.external_to_temp_name.get(path) {
|
|
+ Some(i) => i.clone(),
|
|
+ None => {
|
|
+ // This should be rare, but if we haven't mapped the source, create it with no baseline.
|
|
+ let i = uuid_filename_for(path);
|
|
+ self.baseline_file_info.insert(
|
|
+ i.clone(),
|
|
+ BaselineFileInfo {
|
|
+ path: Some(path.clone()),
|
|
+ contents_bytes: None,
|
|
+ mode: None,
|
|
+ oid: Some(ZERO_OID.to_string()),
|
|
+ },
|
|
+ );
|
|
+ i
|
|
+ }
|
|
+ };
|
|
+ // Update current external mapping for temp file name.
|
|
+ self.temp_name_to_current_path
|
|
+ .insert(uuid_filename.clone(), dest.clone());
|
|
+ // Update forward file_mapping: external current -> internal name.
|
|
+ self.external_to_temp_name.remove(path);
|
|
+ self.external_to_temp_name
|
|
+ .insert(dest.clone(), uuid_filename);
|
|
+ };
|
|
+ }
|
|
+
|
|
+ Ok(())
|
|
+ }
|
|
+
|
|
+ fn get_path_for_internal(&self, internal: &str) -> Option<PathBuf> {
|
|
+ self.temp_name_to_current_path
|
|
+ .get(internal)
|
|
+ .cloned()
|
|
+ .or_else(|| {
|
|
+ self.baseline_file_info
|
|
+ .get(internal)
|
|
+ .and_then(|info| info.path.clone())
|
|
+ })
|
|
+ }
|
|
+
|
|
+ /// Find the git worktree root for a file/directory by walking up to the first ancestor containing a `.git` entry.
|
|
+ /// Uses a simple cache of known roots and avoids negative-result caching for simplicity.
|
|
+ fn find_git_root_cached(&mut self, start: &Path) -> Option<PathBuf> {
|
|
+ let dir = if start.is_dir() {
|
|
+ start
|
|
+ } else {
|
|
+ start.parent()?
|
|
+ };
|
|
+
|
|
+ // Fast path: if any cached root is an ancestor of this path, use it.
|
|
+ if let Some(root) = self
|
|
+ .git_root_cache
|
|
+ .iter()
|
|
+ .find(|r| dir.starts_with(r))
|
|
+ .cloned()
|
|
+ {
|
|
+ return Some(root);
|
|
+ }
|
|
+
|
|
+ // Walk up to find a `.git` marker.
|
|
+ let mut cur = dir.to_path_buf();
|
|
+ loop {
|
|
+ let git_marker = cur.join(".git");
|
|
+ if git_marker.is_dir() || git_marker.is_file() {
|
|
+ if !self.git_root_cache.iter().any(|r| r == &cur) {
|
|
+ self.git_root_cache.push(cur.clone());
|
|
+ }
|
|
+ return Some(cur);
|
|
+ }
|
|
+
|
|
+ // On Windows, avoid walking above the drive or UNC share root.
|
|
+ #[cfg(windows)]
|
|
+ {
|
|
+ if is_windows_drive_or_unc_root(&cur) {
|
|
+ return None;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if let Some(parent) = cur.parent() {
|
|
+ cur = parent.to_path_buf();
|
|
+ } else {
|
|
+ return None;
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /// Return a display string for `path` relative to its git root if found, else absolute.
|
|
+ fn relative_to_git_root_str(&mut self, path: &Path) -> String {
|
|
+ let s = if let Some(root) = self.find_git_root_cached(path) {
|
|
+ if let Ok(rel) = path.strip_prefix(&root) {
|
|
+ rel.display().to_string()
|
|
+ } else {
|
|
+ path.display().to_string()
|
|
+ }
|
|
+ } else {
|
|
+ path.display().to_string()
|
|
+ };
|
|
+ s.replace('\\', "/")
|
|
+ }
|
|
+
|
|
+ /// Ask git to compute the blob SHA-1 for the file at `path` within its repository.
|
|
+ /// Returns None if no repository is found or git invocation fails.
|
|
+ fn git_blob_oid_for_path(&mut self, path: &Path) -> Option<String> {
|
|
+ let root = self.find_git_root_cached(path)?;
|
|
+ // Compute a path relative to the repo root for better portability across platforms.
|
|
+ let rel = path.strip_prefix(&root).unwrap_or(path);
|
|
+ let output = Command::new("git")
|
|
+ .arg("-C")
|
|
+ .arg(&root)
|
|
+ .arg("hash-object")
|
|
+ .arg("--")
|
|
+ .arg(rel)
|
|
+ .output()
|
|
+ .ok()?;
|
|
+ if !output.status.success() {
|
|
+ return None;
|
|
+ }
|
|
+ let s = String::from_utf8_lossy(&output.stdout).trim().to_string();
|
|
+ if s.len() == 40 { Some(s) } else { None }
|
|
+ }
|
|
+
|
|
+ /// Recompute the aggregated unified diff by comparing all of the in-memory snapshots that were
|
|
+ /// collected before the first time they were touched by apply_patch during this turn with
|
|
+ /// the current repo state.
|
|
+ pub fn get_unified_diff(&mut self) -> Result<Option<String>> {
|
|
+ let mut aggregated = String::new();
|
|
+
|
|
+ // Compute diffs per tracked internal file in a stable order by external path.
|
|
+ let mut baseline_file_names: Vec<String> =
|
|
+ self.baseline_file_info.keys().cloned().collect();
|
|
+ // Sort lexicographically by full repo-relative path to match git behavior.
|
|
+ baseline_file_names.sort_by_key(|internal| {
|
|
+ self.get_path_for_internal(internal)
|
|
+ .map(|p| self.relative_to_git_root_str(&p))
|
|
+ .unwrap_or_default()
|
|
+ });
|
|
+
|
|
+ for internal in baseline_file_names {
|
|
+ // Baseline external must exist for any tracked internal.
|
|
+ let baseline_external = match self
|
|
+ .baseline_file_info
|
|
+ .get(&internal)
|
|
+ .and_then(|i| i.path.clone())
|
|
+ {
|
|
+ Some(p) => p,
|
|
+ None => continue,
|
|
+ };
|
|
+ let current_external = match self.get_path_for_internal(&internal) {
|
|
+ Some(p) => p,
|
|
+ None => continue,
|
|
+ };
|
|
+
|
|
+ // Determine modes early; needed to read symlink content correctly.
|
|
+ let baseline_mode = self
|
|
+ .baseline_file_info
|
|
+ .get(&internal)
|
|
+ .and_then(|i| i.mode.clone())
|
|
+ .unwrap_or_else(|| REGULAR_MODE.to_string());
|
|
+ let current_mode =
|
|
+ file_mode_for_path(¤t_external).unwrap_or_else(|| REGULAR_MODE.to_string());
|
|
+
|
|
+ let left_bytes = self
|
|
+ .baseline_file_info
|
|
+ .get(&internal)
|
|
+ .and_then(|i| i.contents_bytes.clone());
|
|
+
|
|
+ let right_bytes = blob_bytes(¤t_external, ¤t_mode)?;
|
|
+
|
|
+ // Fast path: identical bytes or both missing.
|
|
+ if left_bytes.as_deref() == right_bytes.as_deref() {
|
|
+ continue;
|
|
+ }
|
|
+
|
|
+ let left_display = self.relative_to_git_root_str(&baseline_external);
|
|
+ let right_display = self.relative_to_git_root_str(¤t_external);
|
|
+
|
|
+ // Emit a git-style header for better readability and parity with previous behavior.
|
|
+ aggregated.push_str(&format!("diff --git a/{left_display} b/{right_display}\n"));
|
|
+
|
|
+ let is_add = left_bytes.is_none() && right_bytes.is_some();
|
|
```
|
|
|
|
> Could do `match (left_bytes, right_bytes)` to ensure all cases are covered.
|
|
|
|
- Created: 2025-08-04 03:06:34 UTC | Link: https://github.com/openai/codex/pull/1770#discussion_r2250308415
|
|
|
|
```diff
|
|
@@ -0,0 +1,766 @@
|
|
+use std::collections::HashMap;
|
|
+use std::fs;
|
|
+use std::path::Path;
|
|
+use std::path::PathBuf;
|
|
+use std::process::Command;
|
|
+
|
|
+use anyhow::Context;
|
|
+use anyhow::Result;
|
|
+use anyhow::anyhow;
|
|
+use uuid::Uuid;
|
|
+
|
|
+use crate::protocol::FileChange;
|
|
+
|
|
+struct BaselineFileInfo {
|
|
+ path: Option<PathBuf>,
|
|
+ contents_bytes: Option<Vec<u8>>,
|
|
+ mode: Option<String>,
|
|
+ oid: Option<String>,
|
|
+}
|
|
+
|
|
+/// Tracks sets of changes to files and exposes the overall unified diff.
|
|
+/// Internally, the way this works is now:
|
|
+/// 1. Maintain an in-memory baseline snapshot of files when they are first seen.
|
|
+/// For new additions, do not create a baseline so that diffs are shown as proper additions (using /dev/null).
|
|
+/// 2. Keep a stable internal filename (uuid + same extension) per external path for rename tracking.
|
|
+/// 3. To compute the aggregated unified diff, compare each baseline snapshot to the current file on disk entirely in-memory
|
|
+/// using the `similar` crate and emit unified diffs with rewritten external paths.
|
|
+#[derive(Default)]
|
|
+pub struct TurnDiffTracker {
|
|
+ /// Map external path -> internal filename (uuid + same extension).
|
|
+ external_to_temp_name: HashMap<PathBuf, String>,
|
|
+ /// Internal filename -> baseline file info.
|
|
+ baseline_file_info: HashMap<String, BaselineFileInfo>,
|
|
+ /// Internal filename -> external path as of current accumulated state (after applying all changes).
|
|
+ /// This is where renames are tracked.
|
|
+ temp_name_to_current_path: HashMap<String, PathBuf>,
|
|
+ /// Cache of known git worktree roots to avoid repeated filesystem walks.
|
|
+ git_root_cache: Vec<PathBuf>,
|
|
+}
|
|
+
|
|
+impl TurnDiffTracker {
|
|
+ pub fn new() -> Self {
|
|
+ Self::default()
|
|
+ }
|
|
+
|
|
+ /// Front-run apply patch calls to track the starting contents of any modified files.
|
|
+ /// - Creates an in-memory baseline snapshot for files that already exist on disk when first seen.
|
|
+ /// - For additions, we intentionally do not create a baseline snapshot so that diffs are proper additions.
|
|
+ /// - Also updates internal mappings for move/rename events.
|
|
+ pub fn on_patch_begin(&mut self, changes: &HashMap<PathBuf, FileChange>) -> Result<()> {
|
|
+ for (path, change) in changes.iter() {
|
|
+ // Ensure a stable internal filename exists for this external path.
|
|
+ if !self.external_to_temp_name.contains_key(path) {
|
|
+ let internal = uuid_filename_for(path);
|
|
+ self.external_to_temp_name
|
|
+ .insert(path.clone(), internal.clone());
|
|
+ self.temp_name_to_current_path
|
|
+ .insert(internal.clone(), path.clone());
|
|
+
|
|
+ // If the file exists on disk now, snapshot as baseline; else leave missing to represent /dev/null.
|
|
+ let (contents_bytes, mode, oid) = if path.exists() {
|
|
+ let mode = file_mode_for_path(path);
|
|
+ let mode_str = mode.as_deref().unwrap_or(REGULAR_MODE);
|
|
+ let contents_bytes = blob_bytes(path, mode_str)
|
|
+ .unwrap_or_default()
|
|
+ .unwrap_or_default();
|
|
+ let oid = if mode.as_deref() == Some(SYMLINK_MODE) {
|
|
+ git_blob_sha1_hex_bytes(&contents_bytes)
|
|
+ } else {
|
|
+ self.git_blob_oid_for_path(path)
|
|
+ .unwrap_or_else(|| git_blob_sha1_hex_bytes(&contents_bytes))
|
|
+ };
|
|
+ (Some(contents_bytes), mode, Some(oid))
|
|
+ } else {
|
|
+ (None, None, Some(ZERO_OID.to_string()))
|
|
+ };
|
|
+
|
|
+ self.baseline_file_info.insert(
|
|
+ internal.clone(),
|
|
+ BaselineFileInfo {
|
|
+ path: Some(path.clone()),
|
|
+ contents_bytes,
|
|
+ mode,
|
|
+ oid,
|
|
+ },
|
|
+ );
|
|
+ }
|
|
+
|
|
+ // Track rename/move in current mapping if provided in an Update.
|
|
+ if let FileChange::Update {
|
|
+ move_path: Some(dest),
|
|
+ ..
|
|
+ } = change
|
|
+ {
|
|
+ let uuid_filename = match self.external_to_temp_name.get(path) {
|
|
+ Some(i) => i.clone(),
|
|
+ None => {
|
|
+ // This should be rare, but if we haven't mapped the source, create it with no baseline.
|
|
+ let i = uuid_filename_for(path);
|
|
+ self.baseline_file_info.insert(
|
|
+ i.clone(),
|
|
+ BaselineFileInfo {
|
|
+ path: Some(path.clone()),
|
|
+ contents_bytes: None,
|
|
+ mode: None,
|
|
+ oid: Some(ZERO_OID.to_string()),
|
|
+ },
|
|
+ );
|
|
+ i
|
|
+ }
|
|
+ };
|
|
+ // Update current external mapping for temp file name.
|
|
+ self.temp_name_to_current_path
|
|
+ .insert(uuid_filename.clone(), dest.clone());
|
|
+ // Update forward file_mapping: external current -> internal name.
|
|
+ self.external_to_temp_name.remove(path);
|
|
+ self.external_to_temp_name
|
|
+ .insert(dest.clone(), uuid_filename);
|
|
+ };
|
|
+ }
|
|
+
|
|
+ Ok(())
|
|
+ }
|
|
+
|
|
+ fn get_path_for_internal(&self, internal: &str) -> Option<PathBuf> {
|
|
+ self.temp_name_to_current_path
|
|
+ .get(internal)
|
|
+ .cloned()
|
|
+ .or_else(|| {
|
|
+ self.baseline_file_info
|
|
+ .get(internal)
|
|
+ .and_then(|info| info.path.clone())
|
|
+ })
|
|
+ }
|
|
+
|
|
+ /// Find the git worktree root for a file/directory by walking up to the first ancestor containing a `.git` entry.
|
|
+ /// Uses a simple cache of known roots and avoids negative-result caching for simplicity.
|
|
+ fn find_git_root_cached(&mut self, start: &Path) -> Option<PathBuf> {
|
|
+ let dir = if start.is_dir() {
|
|
+ start
|
|
+ } else {
|
|
+ start.parent()?
|
|
+ };
|
|
+
|
|
+ // Fast path: if any cached root is an ancestor of this path, use it.
|
|
+ if let Some(root) = self
|
|
+ .git_root_cache
|
|
+ .iter()
|
|
+ .find(|r| dir.starts_with(r))
|
|
+ .cloned()
|
|
+ {
|
|
+ return Some(root);
|
|
+ }
|
|
+
|
|
+ // Walk up to find a `.git` marker.
|
|
+ let mut cur = dir.to_path_buf();
|
|
+ loop {
|
|
+ let git_marker = cur.join(".git");
|
|
+ if git_marker.is_dir() || git_marker.is_file() {
|
|
+ if !self.git_root_cache.iter().any(|r| r == &cur) {
|
|
+ self.git_root_cache.push(cur.clone());
|
|
+ }
|
|
+ return Some(cur);
|
|
+ }
|
|
+
|
|
+ // On Windows, avoid walking above the drive or UNC share root.
|
|
+ #[cfg(windows)]
|
|
+ {
|
|
+ if is_windows_drive_or_unc_root(&cur) {
|
|
+ return None;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if let Some(parent) = cur.parent() {
|
|
+ cur = parent.to_path_buf();
|
|
+ } else {
|
|
+ return None;
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /// Return a display string for `path` relative to its git root if found, else absolute.
|
|
+ fn relative_to_git_root_str(&mut self, path: &Path) -> String {
|
|
+ let s = if let Some(root) = self.find_git_root_cached(path) {
|
|
+ if let Ok(rel) = path.strip_prefix(&root) {
|
|
+ rel.display().to_string()
|
|
+ } else {
|
|
+ path.display().to_string()
|
|
+ }
|
|
+ } else {
|
|
+ path.display().to_string()
|
|
+ };
|
|
+ s.replace('\\', "/")
|
|
+ }
|
|
+
|
|
+ /// Ask git to compute the blob SHA-1 for the file at `path` within its repository.
|
|
+ /// Returns None if no repository is found or git invocation fails.
|
|
+ fn git_blob_oid_for_path(&mut self, path: &Path) -> Option<String> {
|
|
+ let root = self.find_git_root_cached(path)?;
|
|
+ // Compute a path relative to the repo root for better portability across platforms.
|
|
+ let rel = path.strip_prefix(&root).unwrap_or(path);
|
|
+ let output = Command::new("git")
|
|
+ .arg("-C")
|
|
+ .arg(&root)
|
|
+ .arg("hash-object")
|
|
+ .arg("--")
|
|
+ .arg(rel)
|
|
+ .output()
|
|
+ .ok()?;
|
|
+ if !output.status.success() {
|
|
+ return None;
|
|
+ }
|
|
+ let s = String::from_utf8_lossy(&output.stdout).trim().to_string();
|
|
+ if s.len() == 40 { Some(s) } else { None }
|
|
+ }
|
|
+
|
|
+ /// Recompute the aggregated unified diff by comparing all of the in-memory snapshots that were
|
|
+ /// collected before the first time they were touched by apply_patch during this turn with
|
|
+ /// the current repo state.
|
|
+ pub fn get_unified_diff(&mut self) -> Result<Option<String>> {
|
|
```
|
|
|
|
> This function is quite long and feels like it would benefit from being broken up.
|
|
|
|
- Created: 2025-08-04 03:07:44 UTC | Link: https://github.com/openai/codex/pull/1770#discussion_r2250309286
|
|
|
|
```diff
|
|
@@ -0,0 +1,476 @@
|
|
+use std::collections::HashMap;
|
|
+use std::fs;
|
|
+use std::path::Path;
|
|
+use std::path::PathBuf;
|
|
+use std::process::Command;
|
|
+
|
|
+use anyhow::Context;
|
|
+use anyhow::Result;
|
|
+use tempfile::TempDir;
|
|
+use uuid::Uuid;
|
|
+
|
|
+use crate::protocol::FileChange;
|
|
+
|
|
+/// Tracks sets of changes to files and exposes the overall unified diff.
|
|
+/// Internally, the way this works is now:
|
|
+/// 1. Create a temp directory to store baseline snapshots of files when they are first seen.
|
|
+/// 2. When a path is first observed, copy its current contents into the baseline dir if it exists on disk.
|
|
+/// For new additions, do not create a baseline file so that diffs are shown as proper additions (using /dev/null).
|
|
+/// 3. Keep a stable internal filename (uuid + same extension) per external path for path rewrite in diffs.
|
|
+/// 4. To compute the aggregated unified diff, compare each baseline snapshot to the current file on disk using
|
|
+/// `git diff --no-index` and rewrite paths to external paths.
|
|
+#[derive(Default)]
|
|
+pub struct TurnDiffTracker {
|
|
+ /// Temp directory holding baseline snapshots of files as first seen.
|
|
+ baseline_files_dir: Option<TempDir>,
|
|
+ /// Map external path -> internal filename (uuid + same extension).
|
|
+ external_to_temp_name: HashMap<PathBuf, String>,
|
|
+ /// Internal filename -> external path as of baseline snapshot.
|
|
+ temp_name_to_baseline_external: HashMap<String, PathBuf>,
|
|
+ /// Internal filename -> external path as of current accumulated state (after applying all changes).
|
|
+ /// This is where renames are tracked.
|
|
+ temp_name_to_current_external: HashMap<String, PathBuf>,
|
|
+ /// Aggregated unified diff for all accumulated changes across files.
|
|
+ pub unified_diff: Option<String>,
|
|
+}
|
|
+
|
|
+impl TurnDiffTracker {
|
|
+ pub fn new() -> Self {
|
|
+ Self::default()
|
|
+ }
|
|
+
|
|
+ /// Front-run apply patch calls to track the starting contents of any modified files.
|
|
+ /// - Creates a baseline snapshot for files that already exist on disk when first seen.
|
|
+ /// - For additions, we intentionally do not create a baseline snapshot so that diffs are proper additions.
|
|
+ /// - Also updates internal mappings for move/rename events.
|
|
+ pub fn on_patch_begin(&mut self, changes: &HashMap<PathBuf, FileChange>) -> Result<()> {
|
|
+ self.ensure_baseline_dir()?;
|
|
+ let baseline_dir = self.baseline_dir()?.to_path_buf();
|
|
+
|
|
+ for (path, change) in changes.iter() {
|
|
+ // Ensure a stable internal filename exists for this external path.
|
|
+ if !self.external_to_temp_name.contains_key(path) {
|
|
+ let internal = uuid_filename_for(path);
|
|
+ self.external_to_temp_name
|
|
+ .insert(path.clone(), internal.clone());
|
|
+ self.temp_name_to_baseline_external
|
|
+ .insert(internal.clone(), path.clone());
|
|
+ self.temp_name_to_current_external
|
|
+ .insert(internal.clone(), path.clone());
|
|
+
|
|
+ // If the file exists on disk now, snapshot as baseline; else leave missing to represent /dev/null.
|
|
+ if path.exists() {
|
|
+ let contents = fs::read(path)
|
|
+ .with_context(|| format!("failed to read original {}", path.display()))?;
|
|
+ let internal_path = baseline_dir.join(&internal);
|
|
+ fs::write(&internal_path, contents).with_context(|| {
|
|
+ format!("failed to write baseline file {}", internal_path.display())
|
|
+ })?;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ // Track rename/move in current mapping if provided in an Update.
|
|
+ let move_path = match change {
|
|
+ FileChange::Update {
|
|
+ move_path: Some(dest),
|
|
+ ..
|
|
+ } => Some(dest),
|
|
+ _ => None,
|
|
+ };
|
|
+ if let Some(dest) = move_path {
|
|
+ let uuid_filename = match self.external_to_temp_name.get(path) {
|
|
+ Some(i) => i.clone(),
|
|
+ None => {
|
|
+ // This should be rare, but if we haven't mapped the source, create it with no baseline.
|
|
+ let i = uuid_filename_for(path);
|
|
+ self.external_to_temp_name.insert(path.clone(), i.clone());
|
|
+ self.temp_name_to_baseline_external
|
|
+ .insert(i.clone(), path.clone());
|
|
+ i
|
|
+ }
|
|
+ };
|
|
+ // Update current external mapping for temp file name.
|
|
+ self.temp_name_to_current_external
|
|
+ .insert(uuid_filename.clone(), dest.clone());
|
|
+ // Update forward file_mapping: external current -> internal name.
|
|
+ self.external_to_temp_name.remove(path);
|
|
+ self.external_to_temp_name
|
|
+ .insert(dest.clone(), uuid_filename);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ Ok(())
|
|
+ }
|
|
+
|
|
+ /// Recompute the aggregated unified diff by comparing all baseline snapshots against
|
|
+ /// current files on disk using `git diff --no-index` and rewriting paths to external paths.
|
|
+ pub fn update_and_get_unified_diff(&mut self) -> Result<Option<String>> {
|
|
+ let baseline_dir = self.baseline_dir()?.to_path_buf();
|
|
+ let current_dir = baseline_dir.join("current");
|
|
+ if current_dir.exists() {
|
|
+ // Best-effort cleanup of previous run's mirror.
|
|
+ let _ = fs::remove_dir_all(¤t_dir);
|
|
+ }
|
|
+ fs::create_dir_all(¤t_dir).with_context(|| {
|
|
+ format!(
|
|
+ "failed to create current mirror dir {}",
|
|
+ current_dir.display()
|
|
+ )
|
|
+ })?;
|
|
+
|
|
+ let mut aggregated = String::new();
|
|
+
|
|
+ // Compute diffs per tracked internal file.
|
|
+ for (internal, baseline_external) in &self.temp_name_to_baseline_external {
|
|
+ let baseline_path = baseline_dir.join(internal);
|
|
+ let current_external = self
|
|
+ .temp_name_to_current_external
|
|
+ .get(internal)
|
|
+ .cloned()
|
|
+ .unwrap_or_else(|| baseline_external.clone());
|
|
+
|
|
+ let left_is_dev_null = !baseline_path.exists();
|
|
+ let right_exists = current_external.exists();
|
|
+
|
|
+ // Prepare right side mirror file if exists; otherwise use /dev/null for deletions.
|
|
+ let right_arg = if right_exists {
|
|
+ let mirror_path = current_dir.join(internal);
|
|
+ let contents = fs::read(¤t_external).with_context(|| {
|
|
+ format!(
|
|
+ "failed to read current file for diff {}",
|
|
+ current_external.display()
|
|
+ )
|
|
+ })?;
|
|
+ fs::write(&mirror_path, contents).with_context(|| {
|
|
+ format!(
|
|
+ "failed to write current mirror file {}",
|
|
+ mirror_path.display()
|
|
+ )
|
|
+ })?;
|
|
+ // Use relative path from baseline_dir (so headers say a/<uuid> b/current/<uuid>).
|
|
+ format!("current/{internal}")
|
|
+ } else {
|
|
+ // Deletion: right side is /dev/null to show proper deleted file diff.
|
|
+ "/dev/null".to_string()
|
|
+ };
|
|
+
|
|
+ // Prepare left arg: baseline file path or /dev/null for additions.
|
|
+ let left_arg = if left_is_dev_null {
|
|
+ "/dev/null".to_string()
|
|
+ } else {
|
|
+ internal.clone()
|
|
+ };
|
|
+
|
|
+ // Run git diff --no-index from baseline_dir to keep paths predictable.
|
|
+ let raw = run_git_allow_exit_codes(
|
|
+ &baseline_dir,
|
|
+ &[
|
|
+ "-c",
|
|
+ "color.ui=false",
|
|
+ "diff",
|
|
+ "--no-color",
|
|
+ "--no-index",
|
|
+ "--",
|
|
+ &left_arg,
|
|
+ &right_arg,
|
|
+ ],
|
|
+ &[0, 1], // 0: no changes, 1: differences
|
|
+ )?;
|
|
+
|
|
+ if raw.trim().is_empty() {
|
|
+ continue;
|
|
+ }
|
|
+ let rewritten = self.rewrite_diff_paths(&raw);
|
|
+ if !rewritten.trim().is_empty() {
|
|
+ if !aggregated.is_empty() && !aggregated.ends_with('\n') {
|
|
+ aggregated.push('\n');
|
|
+ }
|
|
+ aggregated.push_str(&rewritten);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ self.unified_diff = if aggregated.trim().is_empty() {
|
|
+ None
|
|
+ } else {
|
|
+ Some(aggregated)
|
|
+ };
|
|
+
|
|
+ // Clean up the current dir.
|
|
+ let _ = fs::remove_dir_all(¤t_dir);
|
|
+
|
|
+ Ok(self.unified_diff.clone())
|
|
+ }
|
|
+
|
|
+ fn baseline_dir(&self) -> Result<&Path> {
|
|
+ self.baseline_files_dir
|
|
+ .as_ref()
|
|
+ .map(|d| d.path())
|
|
+ .ok_or_else(|| anyhow::anyhow!("baseline temp dir not initialized"))
|
|
+ }
|
|
+
|
|
+ fn ensure_baseline_dir(&mut self) -> Result<()> {
|
|
+ if self.baseline_files_dir.is_some() {
|
|
+ return Ok(());
|
|
+ }
|
|
+ let tmp = TempDir::new().context("create baseline temp dir")?;
|
|
+ self.baseline_files_dir = Some(tmp);
|
|
+ Ok(())
|
|
+ }
|
|
+
|
|
+ /// Rewrites the internal filenames to external paths in diff headers.
|
|
+ /// Handles inputs like:
|
|
+ /// diff --git a/<uuid> b/current/<uuid>
|
|
+ /// --- a/<uuid> | /dev/null
|
|
+ /// +++ b/current/<uuid> | /dev/null
|
|
+ /// and replaces uuid with the external paths tracking baseline/current.
|
|
+ fn rewrite_diff_paths(&self, diff: &str) -> String {
|
|
+ let mut out = String::new();
|
|
+ for line in diff.lines() {
|
|
+ if let Some(rest) = line.strip_prefix("diff --git ") {
|
|
+ // Format: diff --git a/<f> b/<f>
|
|
+ let parts: Vec<&str> = rest.split_whitespace().collect();
|
|
+ if parts.len() == 2 {
|
|
+ let a = parts[0].strip_prefix("a/").unwrap_or(parts[0]);
|
|
+ let b = parts[1].strip_prefix("b/").unwrap_or(parts[1]);
|
|
+
|
|
+ let a_ext_display = if a == "/dev/null" {
|
|
+ "/dev/null".to_string()
|
|
+ } else {
|
|
+ let a_base = Path::new(a)
|
|
+ .file_name()
|
|
+ .and_then(|s| s.to_str())
|
|
+ .unwrap_or(a);
|
|
+ let mapped = self
|
|
+ .temp_name_to_baseline_external
|
|
+ .get(a_base)
|
|
+ .cloned()
|
|
+ .unwrap_or_else(|| PathBuf::from(a));
|
|
+ mapped.display().to_string()
|
|
+ };
|
|
+
|
|
+ let b_ext_display = if b == "/dev/null" {
|
|
+ "/dev/null".to_string()
|
|
+ } else {
|
|
+ let b_base = Path::new(b)
|
|
+ .file_name()
|
|
+ .and_then(|s| s.to_str())
|
|
+ .unwrap_or(b);
|
|
+ let mapped = self
|
|
+ .temp_name_to_current_external
|
|
+ .get(b_base)
|
|
+ .cloned()
|
|
+ .unwrap_or_else(|| PathBuf::from(b));
|
|
+ mapped.display().to_string()
|
|
+ };
|
|
+
|
|
+ out.push_str(&format!("diff --git a/{a_ext_display} b/{b_ext_display}\n"));
|
|
+ continue;
|
|
+ }
|
|
+ }
|
|
+ if let Some(rest) = line.strip_prefix("--- ") {
|
|
+ if let Some(path) = rest.strip_prefix("a/") {
|
|
+ let external_display = if path == "/dev/null" {
|
|
+ "/dev/null".to_string()
|
|
+ } else {
|
|
+ let p_base = Path::new(path)
|
|
+ .file_name()
|
|
+ .and_then(|s| s.to_str())
|
|
+ .unwrap_or(path);
|
|
+ self.temp_name_to_baseline_external
|
|
+ .get(p_base)
|
|
+ .cloned()
|
|
+ .unwrap_or_else(|| PathBuf::from(path))
|
|
+ .display()
|
|
+ .to_string()
|
|
+ };
|
|
+ out.push_str(&format!("--- {external_display}\n"));
|
|
+ continue;
|
|
+ }
|
|
+ }
|
|
+ if let Some(rest) = line.strip_prefix("+++ ") {
|
|
+ if let Some(path) = rest.strip_prefix("b/") {
|
|
+ let external_display = if path == "/dev/null" {
|
|
+ "/dev/null".to_string()
|
|
+ } else {
|
|
+ let p_base = Path::new(path)
|
|
+ .file_name()
|
|
+ .and_then(|s| s.to_str())
|
|
+ .unwrap_or(path);
|
|
+ self.temp_name_to_current_external
|
|
+ .get(p_base)
|
|
+ .cloned()
|
|
+ .unwrap_or_else(|| PathBuf::from(path))
|
|
+ .display()
|
|
+ .to_string()
|
|
+ };
|
|
+ out.push_str(&format!("+++ {external_display}\n"));
|
|
+ continue;
|
|
+ }
|
|
+ }
|
|
+ out.push_str(line);
|
|
+ out.push('\n');
|
|
+ }
|
|
+ out
|
|
+ }
|
|
+}
|
|
+
|
|
+fn uuid_filename_for(path: &Path) -> String {
|
|
+ let id = Uuid::new_v4().to_string();
|
|
+ match path.extension().and_then(|e| e.to_str()) {
|
|
+ Some(ext) if !ext.is_empty() => format!("{id}.{ext}"),
|
|
+ _ => id,
|
|
```
|
|
|
|
> I'm still unclear why `ext` is added.
|
|
|
|
- Created: 2025-08-04 03:08:45 UTC | Link: https://github.com/openai/codex/pull/1770#discussion_r2250310045
|
|
|
|
```diff
|
|
@@ -0,0 +1,766 @@
|
|
+use std::collections::HashMap;
|
|
+use std::fs;
|
|
+use std::path::Path;
|
|
+use std::path::PathBuf;
|
|
+use std::process::Command;
|
|
+
|
|
+use anyhow::Context;
|
|
+use anyhow::Result;
|
|
+use anyhow::anyhow;
|
|
+use uuid::Uuid;
|
|
+
|
|
+use crate::protocol::FileChange;
|
|
+
|
|
+struct BaselineFileInfo {
|
|
+ path: Option<PathBuf>,
|
|
+ contents_bytes: Option<Vec<u8>>,
|
|
+ mode: Option<String>,
|
|
+ oid: Option<String>,
|
|
+}
|
|
+
|
|
+/// Tracks sets of changes to files and exposes the overall unified diff.
|
|
+/// Internally, the way this works is now:
|
|
+/// 1. Maintain an in-memory baseline snapshot of files when they are first seen.
|
|
+/// For new additions, do not create a baseline so that diffs are shown as proper additions (using /dev/null).
|
|
+/// 2. Keep a stable internal filename (uuid + same extension) per external path for rename tracking.
|
|
+/// 3. To compute the aggregated unified diff, compare each baseline snapshot to the current file on disk entirely in-memory
|
|
+/// using the `similar` crate and emit unified diffs with rewritten external paths.
|
|
+#[derive(Default)]
|
|
+pub struct TurnDiffTracker {
|
|
+ /// Map external path -> internal filename (uuid + same extension).
|
|
+ external_to_temp_name: HashMap<PathBuf, String>,
|
|
+ /// Internal filename -> baseline file info.
|
|
+ baseline_file_info: HashMap<String, BaselineFileInfo>,
|
|
+ /// Internal filename -> external path as of current accumulated state (after applying all changes).
|
|
+ /// This is where renames are tracked.
|
|
+ temp_name_to_current_path: HashMap<String, PathBuf>,
|
|
+ /// Cache of known git worktree roots to avoid repeated filesystem walks.
|
|
+ git_root_cache: Vec<PathBuf>,
|
|
+}
|
|
+
|
|
+impl TurnDiffTracker {
|
|
+ pub fn new() -> Self {
|
|
+ Self::default()
|
|
+ }
|
|
+
|
|
+ /// Front-run apply patch calls to track the starting contents of any modified files.
|
|
+ /// - Creates an in-memory baseline snapshot for files that already exist on disk when first seen.
|
|
+ /// - For additions, we intentionally do not create a baseline snapshot so that diffs are proper additions.
|
|
+ /// - Also updates internal mappings for move/rename events.
|
|
+ pub fn on_patch_begin(&mut self, changes: &HashMap<PathBuf, FileChange>) -> Result<()> {
|
|
+ for (path, change) in changes.iter() {
|
|
+ // Ensure a stable internal filename exists for this external path.
|
|
+ if !self.external_to_temp_name.contains_key(path) {
|
|
+ let internal = uuid_filename_for(path);
|
|
+ self.external_to_temp_name
|
|
+ .insert(path.clone(), internal.clone());
|
|
+ self.temp_name_to_current_path
|
|
+ .insert(internal.clone(), path.clone());
|
|
+
|
|
+ // If the file exists on disk now, snapshot as baseline; else leave missing to represent /dev/null.
|
|
+ let (contents_bytes, mode, oid) = if path.exists() {
|
|
+ let mode = file_mode_for_path(path);
|
|
+ let mode_str = mode.as_deref().unwrap_or(REGULAR_MODE);
|
|
+ let contents_bytes = blob_bytes(path, mode_str)
|
|
+ .unwrap_or_default()
|
|
+ .unwrap_or_default();
|
|
+ let oid = if mode.as_deref() == Some(SYMLINK_MODE) {
|
|
+ git_blob_sha1_hex_bytes(&contents_bytes)
|
|
+ } else {
|
|
+ self.git_blob_oid_for_path(path)
|
|
+ .unwrap_or_else(|| git_blob_sha1_hex_bytes(&contents_bytes))
|
|
+ };
|
|
+ (Some(contents_bytes), mode, Some(oid))
|
|
+ } else {
|
|
+ (None, None, Some(ZERO_OID.to_string()))
|
|
+ };
|
|
+
|
|
+ self.baseline_file_info.insert(
|
|
+ internal.clone(),
|
|
+ BaselineFileInfo {
|
|
+ path: Some(path.clone()),
|
|
+ contents_bytes,
|
|
+ mode,
|
|
+ oid,
|
|
+ },
|
|
+ );
|
|
+ }
|
|
+
|
|
+ // Track rename/move in current mapping if provided in an Update.
|
|
+ if let FileChange::Update {
|
|
+ move_path: Some(dest),
|
|
+ ..
|
|
+ } = change
|
|
+ {
|
|
+ let uuid_filename = match self.external_to_temp_name.get(path) {
|
|
+ Some(i) => i.clone(),
|
|
+ None => {
|
|
+ // This should be rare, but if we haven't mapped the source, create it with no baseline.
|
|
+ let i = uuid_filename_for(path);
|
|
+ self.baseline_file_info.insert(
|
|
+ i.clone(),
|
|
+ BaselineFileInfo {
|
|
+ path: Some(path.clone()),
|
|
+ contents_bytes: None,
|
|
+ mode: None,
|
|
+ oid: Some(ZERO_OID.to_string()),
|
|
+ },
|
|
+ );
|
|
+ i
|
|
+ }
|
|
+ };
|
|
+ // Update current external mapping for temp file name.
|
|
+ self.temp_name_to_current_path
|
|
+ .insert(uuid_filename.clone(), dest.clone());
|
|
+ // Update forward file_mapping: external current -> internal name.
|
|
+ self.external_to_temp_name.remove(path);
|
|
+ self.external_to_temp_name
|
|
+ .insert(dest.clone(), uuid_filename);
|
|
+ };
|
|
+ }
|
|
+
|
|
+ Ok(())
|
|
+ }
|
|
+
|
|
+ fn get_path_for_internal(&self, internal: &str) -> Option<PathBuf> {
|
|
+ self.temp_name_to_current_path
|
|
+ .get(internal)
|
|
+ .cloned()
|
|
+ .or_else(|| {
|
|
+ self.baseline_file_info
|
|
+ .get(internal)
|
|
+ .and_then(|info| info.path.clone())
|
|
+ })
|
|
+ }
|
|
+
|
|
+ /// Find the git worktree root for a file/directory by walking up to the first ancestor containing a `.git` entry.
|
|
+ /// Uses a simple cache of known roots and avoids negative-result caching for simplicity.
|
|
+ fn find_git_root_cached(&mut self, start: &Path) -> Option<PathBuf> {
|
|
+ let dir = if start.is_dir() {
|
|
+ start
|
|
+ } else {
|
|
+ start.parent()?
|
|
+ };
|
|
+
|
|
+ // Fast path: if any cached root is an ancestor of this path, use it.
|
|
+ if let Some(root) = self
|
|
+ .git_root_cache
|
|
+ .iter()
|
|
+ .find(|r| dir.starts_with(r))
|
|
+ .cloned()
|
|
+ {
|
|
+ return Some(root);
|
|
+ }
|
|
+
|
|
+ // Walk up to find a `.git` marker.
|
|
+ let mut cur = dir.to_path_buf();
|
|
+ loop {
|
|
+ let git_marker = cur.join(".git");
|
|
+ if git_marker.is_dir() || git_marker.is_file() {
|
|
+ if !self.git_root_cache.iter().any(|r| r == &cur) {
|
|
+ self.git_root_cache.push(cur.clone());
|
|
+ }
|
|
+ return Some(cur);
|
|
+ }
|
|
+
|
|
+ // On Windows, avoid walking above the drive or UNC share root.
|
|
+ #[cfg(windows)]
|
|
+ {
|
|
+ if is_windows_drive_or_unc_root(&cur) {
|
|
+ return None;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if let Some(parent) = cur.parent() {
|
|
+ cur = parent.to_path_buf();
|
|
+ } else {
|
|
+ return None;
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /// Return a display string for `path` relative to its git root if found, else absolute.
|
|
+ fn relative_to_git_root_str(&mut self, path: &Path) -> String {
|
|
+ let s = if let Some(root) = self.find_git_root_cached(path) {
|
|
+ if let Ok(rel) = path.strip_prefix(&root) {
|
|
+ rel.display().to_string()
|
|
+ } else {
|
|
+ path.display().to_string()
|
|
+ }
|
|
+ } else {
|
|
+ path.display().to_string()
|
|
+ };
|
|
+ s.replace('\\', "/")
|
|
+ }
|
|
+
|
|
+ /// Ask git to compute the blob SHA-1 for the file at `path` within its repository.
|
|
+ /// Returns None if no repository is found or git invocation fails.
|
|
+ fn git_blob_oid_for_path(&mut self, path: &Path) -> Option<String> {
|
|
+ let root = self.find_git_root_cached(path)?;
|
|
+ // Compute a path relative to the repo root for better portability across platforms.
|
|
+ let rel = path.strip_prefix(&root).unwrap_or(path);
|
|
+ let output = Command::new("git")
|
|
+ .arg("-C")
|
|
+ .arg(&root)
|
|
+ .arg("hash-object")
|
|
+ .arg("--")
|
|
+ .arg(rel)
|
|
+ .output()
|
|
+ .ok()?;
|
|
+ if !output.status.success() {
|
|
+ return None;
|
|
+ }
|
|
+ let s = String::from_utf8_lossy(&output.stdout).trim().to_string();
|
|
+ if s.len() == 40 { Some(s) } else { None }
|
|
+ }
|
|
+
|
|
+ /// Recompute the aggregated unified diff by comparing all of the in-memory snapshots that were
|
|
+ /// collected before the first time they were touched by apply_patch during this turn with
|
|
+ /// the current repo state.
|
|
+ pub fn get_unified_diff(&mut self) -> Result<Option<String>> {
|
|
+ let mut aggregated = String::new();
|
|
+
|
|
+ // Compute diffs per tracked internal file in a stable order by external path.
|
|
+ let mut baseline_file_names: Vec<String> =
|
|
+ self.baseline_file_info.keys().cloned().collect();
|
|
+ // Sort lexicographically by full repo-relative path to match git behavior.
|
|
+ baseline_file_names.sort_by_key(|internal| {
|
|
+ self.get_path_for_internal(internal)
|
|
+ .map(|p| self.relative_to_git_root_str(&p))
|
|
+ .unwrap_or_default()
|
|
+ });
|
|
+
|
|
+ for internal in baseline_file_names {
|
|
+ // Baseline external must exist for any tracked internal.
|
|
+ let baseline_external = match self
|
|
+ .baseline_file_info
|
|
+ .get(&internal)
|
|
+ .and_then(|i| i.path.clone())
|
|
+ {
|
|
+ Some(p) => p,
|
|
+ None => continue,
|
|
+ };
|
|
+ let current_external = match self.get_path_for_internal(&internal) {
|
|
+ Some(p) => p,
|
|
+ None => continue,
|
|
+ };
|
|
+
|
|
+ // Determine modes early; needed to read symlink content correctly.
|
|
+ let baseline_mode = self
|
|
+ .baseline_file_info
|
|
+ .get(&internal)
|
|
+ .and_then(|i| i.mode.clone())
|
|
+ .unwrap_or_else(|| REGULAR_MODE.to_string());
|
|
+ let current_mode =
|
|
+ file_mode_for_path(¤t_external).unwrap_or_else(|| REGULAR_MODE.to_string());
|
|
+
|
|
+ let left_bytes = self
|
|
+ .baseline_file_info
|
|
+ .get(&internal)
|
|
+ .and_then(|i| i.contents_bytes.clone());
|
|
+
|
|
+ let right_bytes = blob_bytes(¤t_external, ¤t_mode)?;
|
|
+
|
|
+ // Fast path: identical bytes or both missing.
|
|
+ if left_bytes.as_deref() == right_bytes.as_deref() {
|
|
+ continue;
|
|
+ }
|
|
+
|
|
+ let left_display = self.relative_to_git_root_str(&baseline_external);
|
|
+ let right_display = self.relative_to_git_root_str(¤t_external);
|
|
+
|
|
+ // Emit a git-style header for better readability and parity with previous behavior.
|
|
+ aggregated.push_str(&format!("diff --git a/{left_display} b/{right_display}\n"));
|
|
+
|
|
+ let is_add = left_bytes.is_none() && right_bytes.is_some();
|
|
+ let is_delete = left_bytes.is_some() && right_bytes.is_none();
|
|
+
|
|
+ if is_add {
|
|
+ aggregated.push_str(&format!("new file mode {current_mode}\n"));
|
|
+ } else if is_delete {
|
|
+ aggregated.push_str(&format!("deleted file mode {baseline_mode}\n"));
|
|
+ } else if baseline_mode != current_mode {
|
|
+ aggregated.push_str(&format!("old mode {baseline_mode}\n"));
|
|
+ aggregated.push_str(&format!("new mode {current_mode}\n"));
|
|
+ }
|
|
+
|
|
+ // Determine blob object IDs for left and right contents. Prefer stored OIDs
|
|
+ // captured from the original repo state when the change was first seen.
|
|
+ let left_oid = self
|
|
+ .baseline_file_info
|
|
+ .get(&internal)
|
|
+ .and_then(|i| i.oid.clone())
|
|
+ .or_else(|| {
|
|
+ left_bytes
|
|
+ .as_ref()
|
|
+ .map(|b| git_blob_sha1_hex_bytes(b))
|
|
+ .or(Some(ZERO_OID.to_string()))
|
|
+ })
|
|
+ .unwrap_or_else(|| ZERO_OID.to_string());
|
|
+ let right_oid = if let Some(b) = right_bytes.as_ref() {
|
|
+ if current_mode == SYMLINK_MODE {
|
|
+ git_blob_sha1_hex_bytes(b)
|
|
+ } else {
|
|
+ self.git_blob_oid_for_path(¤t_external)
|
|
+ .unwrap_or_else(|| git_blob_sha1_hex_bytes(b))
|
|
+ }
|
|
+ } else {
|
|
+ ZERO_OID.to_string()
|
|
+ };
|
|
+
|
|
+ // If either side isn't valid UTF-8, emit a binary diff header and continue.
|
|
+ let left_text = left_bytes
|
|
+ .as_deref()
|
|
+ .and_then(|b| std::str::from_utf8(b).ok());
|
|
+ let right_text = right_bytes
|
|
+ .as_deref()
|
|
+ .and_then(|b| std::str::from_utf8(b).ok());
|
|
+
|
|
+ // Prefer text diffs when possible:
|
|
+ // - both sides are valid UTF-8
|
|
+ // - OR one side is missing (add/delete) and the present side is valid UTF-8
|
|
+ let can_text_diff = match (left_text, right_text, is_add, is_delete) {
|
|
+ (Some(_), Some(_), _, _) => true,
|
|
+ (_, Some(_), true, _) => true, // add: left missing, right text
|
|
+ (Some(_), _, _, true) => true, // delete: left text, right missing
|
|
+ _ => false,
|
|
+ };
|
|
+
|
|
+ if can_text_diff {
|
|
+ // Diff the contents as text, treating missing side as empty string.
|
|
+ let l = left_text.unwrap_or("");
|
|
+ let r = right_text.unwrap_or("");
|
|
+
|
|
+ // Emit index line without mode suffix to preserve current test expectations.
|
|
+ aggregated.push_str(&format!("index {left_oid}..{right_oid}\n"));
|
|
+
|
|
+ let old_header = if left_bytes.is_some() {
|
|
+ format!("a/{left_display}")
|
|
+ } else {
|
|
+ "/dev/null".to_string()
|
|
+ };
|
|
+ let new_header = if right_bytes.is_some() {
|
|
+ format!("b/{right_display}")
|
|
+ } else {
|
|
+ "/dev/null".to_string()
|
|
+ };
|
|
+
|
|
+ let diff = similar::TextDiff::from_lines(l, r);
|
|
+ let unified = diff
|
|
+ .unified_diff()
|
|
+ .context_radius(3)
|
|
+ .header(&old_header, &new_header)
|
|
+ .to_string();
|
|
+
|
|
+ aggregated.push_str(&unified);
|
|
+ if !aggregated.ends_with('\n') {
|
|
+ aggregated.push('\n');
|
|
+ }
|
|
+ } else {
|
|
+ // Binary or invalid UTF-8: emit header only.
|
|
+ aggregated.push_str(&format!("index {left_oid}..{right_oid}\n"));
|
|
+ let old_header = if left_bytes.is_some() {
|
|
+ format!("a/{left_display}")
|
|
+ } else {
|
|
+ "/dev/null".to_string()
|
|
+ };
|
|
+ let new_header = if right_bytes.is_some() {
|
|
+ format!("b/{right_display}")
|
|
+ } else {
|
|
+ "/dev/null".to_string()
|
|
+ };
|
|
+ aggregated.push_str(&format!("--- {old_header}\n"));
|
|
+ aggregated.push_str(&format!("+++ {new_header}\n"));
|
|
+ aggregated.push_str("Binary files differ\n");
|
|
+ if !aggregated.ends_with('\n') {
|
|
+ aggregated.push('\n');
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if aggregated.trim().is_empty() {
|
|
+ Ok(None)
|
|
+ } else {
|
|
+ Ok(Some(aggregated))
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
+fn uuid_filename_for(path: &Path) -> String {
|
|
+ let id = Uuid::new_v4().to_string();
|
|
+ match path.extension().and_then(|e| e.to_str()) {
|
|
+ Some(ext) if !ext.is_empty() => format!("{id}.{ext}"),
|
|
+ _ => id,
|
|
+ }
|
|
+}
|
|
+
|
|
+/// Compute the Git SHA-1 blob object ID for the given content (bytes).
|
|
+fn git_blob_sha1_hex_bytes(data: &[u8]) -> String {
|
|
+ // Git blob hash is sha1 of: "blob <len>\0<data>"
|
|
+ let header = format!("blob {}\0", data.len());
|
|
+ use sha1::Digest;
|
|
+ let mut hasher = sha1::Sha1::new();
|
|
+ hasher.update(header.as_bytes());
|
|
+ hasher.update(data);
|
|
+ let digest = hasher.finalize();
|
|
+ let mut out = String::with_capacity(40);
|
|
+ for b in digest {
|
|
+ use std::fmt::Write;
|
|
+ let _ = write!(&mut out, "{b:02x}");
|
|
+ }
|
|
+ out
|
|
+}
|
|
+
|
|
+const ZERO_OID: &str = "0000000000000000000000000000000000000000";
|
|
+const REGULAR_MODE: &str = "100644";
|
|
+#[cfg(unix)]
|
|
```
|
|
|
|
> Though even on Windows, this has to be readable (and preserved?) in a Git tree object, no?
|
|
|
|
- Created: 2025-08-04 03:11:58 UTC | Link: https://github.com/openai/codex/pull/1770#discussion_r2250312120
|
|
|
|
```diff
|
|
@@ -0,0 +1,766 @@
|
|
+use std::collections::HashMap;
|
|
+use std::fs;
|
|
+use std::path::Path;
|
|
+use std::path::PathBuf;
|
|
+use std::process::Command;
|
|
+
|
|
+use anyhow::Context;
|
|
+use anyhow::Result;
|
|
+use anyhow::anyhow;
|
|
+use uuid::Uuid;
|
|
+
|
|
+use crate::protocol::FileChange;
|
|
+
|
|
+struct BaselineFileInfo {
|
|
+ path: Option<PathBuf>,
|
|
+ contents_bytes: Option<Vec<u8>>,
|
|
+ mode: Option<String>,
|
|
+ oid: Option<String>,
|
|
+}
|
|
+
|
|
+/// Tracks sets of changes to files and exposes the overall unified diff.
|
|
+/// Internally, the way this works is now:
|
|
+/// 1. Maintain an in-memory baseline snapshot of files when they are first seen.
|
|
+/// For new additions, do not create a baseline so that diffs are shown as proper additions (using /dev/null).
|
|
+/// 2. Keep a stable internal filename (uuid + same extension) per external path for rename tracking.
|
|
+/// 3. To compute the aggregated unified diff, compare each baseline snapshot to the current file on disk entirely in-memory
|
|
+/// using the `similar` crate and emit unified diffs with rewritten external paths.
|
|
+#[derive(Default)]
|
|
+pub struct TurnDiffTracker {
|
|
+ /// Map external path -> internal filename (uuid + same extension).
|
|
+ external_to_temp_name: HashMap<PathBuf, String>,
|
|
+ /// Internal filename -> baseline file info.
|
|
+ baseline_file_info: HashMap<String, BaselineFileInfo>,
|
|
+ /// Internal filename -> external path as of current accumulated state (after applying all changes).
|
|
+ /// This is where renames are tracked.
|
|
+ temp_name_to_current_path: HashMap<String, PathBuf>,
|
|
+ /// Cache of known git worktree roots to avoid repeated filesystem walks.
|
|
+ git_root_cache: Vec<PathBuf>,
|
|
+}
|
|
+
|
|
+impl TurnDiffTracker {
|
|
+ pub fn new() -> Self {
|
|
+ Self::default()
|
|
+ }
|
|
+
|
|
+ /// Front-run apply patch calls to track the starting contents of any modified files.
|
|
+ /// - Creates an in-memory baseline snapshot for files that already exist on disk when first seen.
|
|
+ /// - For additions, we intentionally do not create a baseline snapshot so that diffs are proper additions.
|
|
+ /// - Also updates internal mappings for move/rename events.
|
|
+ pub fn on_patch_begin(&mut self, changes: &HashMap<PathBuf, FileChange>) -> Result<()> {
|
|
+ for (path, change) in changes.iter() {
|
|
+ // Ensure a stable internal filename exists for this external path.
|
|
+ if !self.external_to_temp_name.contains_key(path) {
|
|
+ let internal = uuid_filename_for(path);
|
|
+ self.external_to_temp_name
|
|
+ .insert(path.clone(), internal.clone());
|
|
+ self.temp_name_to_current_path
|
|
+ .insert(internal.clone(), path.clone());
|
|
+
|
|
+ // If the file exists on disk now, snapshot as baseline; else leave missing to represent /dev/null.
|
|
+ let (contents_bytes, mode, oid) = if path.exists() {
|
|
+ let mode = file_mode_for_path(path);
|
|
+ let mode_str = mode.as_deref().unwrap_or(REGULAR_MODE);
|
|
+ let contents_bytes = blob_bytes(path, mode_str)
|
|
+ .unwrap_or_default()
|
|
+ .unwrap_or_default();
|
|
+ let oid = if mode.as_deref() == Some(SYMLINK_MODE) {
|
|
+ git_blob_sha1_hex_bytes(&contents_bytes)
|
|
+ } else {
|
|
+ self.git_blob_oid_for_path(path)
|
|
+ .unwrap_or_else(|| git_blob_sha1_hex_bytes(&contents_bytes))
|
|
+ };
|
|
+ (Some(contents_bytes), mode, Some(oid))
|
|
+ } else {
|
|
+ (None, None, Some(ZERO_OID.to_string()))
|
|
+ };
|
|
+
|
|
+ self.baseline_file_info.insert(
|
|
+ internal.clone(),
|
|
+ BaselineFileInfo {
|
|
+ path: Some(path.clone()),
|
|
+ contents_bytes,
|
|
+ mode,
|
|
+ oid,
|
|
+ },
|
|
+ );
|
|
+ }
|
|
+
|
|
+ // Track rename/move in current mapping if provided in an Update.
|
|
+ if let FileChange::Update {
|
|
+ move_path: Some(dest),
|
|
+ ..
|
|
+ } = change
|
|
+ {
|
|
+ let uuid_filename = match self.external_to_temp_name.get(path) {
|
|
+ Some(i) => i.clone(),
|
|
+ None => {
|
|
+ // This should be rare, but if we haven't mapped the source, create it with no baseline.
|
|
+ let i = uuid_filename_for(path);
|
|
+ self.baseline_file_info.insert(
|
|
+ i.clone(),
|
|
+ BaselineFileInfo {
|
|
+ path: Some(path.clone()),
|
|
+ contents_bytes: None,
|
|
+ mode: None,
|
|
+ oid: Some(ZERO_OID.to_string()),
|
|
+ },
|
|
+ );
|
|
+ i
|
|
+ }
|
|
+ };
|
|
+ // Update current external mapping for temp file name.
|
|
+ self.temp_name_to_current_path
|
|
+ .insert(uuid_filename.clone(), dest.clone());
|
|
+ // Update forward file_mapping: external current -> internal name.
|
|
+ self.external_to_temp_name.remove(path);
|
|
+ self.external_to_temp_name
|
|
+ .insert(dest.clone(), uuid_filename);
|
|
+ };
|
|
+ }
|
|
+
|
|
+ Ok(())
|
|
+ }
|
|
+
|
|
+ fn get_path_for_internal(&self, internal: &str) -> Option<PathBuf> {
|
|
+ self.temp_name_to_current_path
|
|
+ .get(internal)
|
|
+ .cloned()
|
|
+ .or_else(|| {
|
|
+ self.baseline_file_info
|
|
+ .get(internal)
|
|
+ .and_then(|info| info.path.clone())
|
|
+ })
|
|
+ }
|
|
+
|
|
+ /// Find the git worktree root for a file/directory by walking up to the first ancestor containing a `.git` entry.
|
|
+ /// Uses a simple cache of known roots and avoids negative-result caching for simplicity.
|
|
+ fn find_git_root_cached(&mut self, start: &Path) -> Option<PathBuf> {
|
|
+ let dir = if start.is_dir() {
|
|
+ start
|
|
+ } else {
|
|
+ start.parent()?
|
|
+ };
|
|
+
|
|
+ // Fast path: if any cached root is an ancestor of this path, use it.
|
|
+ if let Some(root) = self
|
|
+ .git_root_cache
|
|
+ .iter()
|
|
+ .find(|r| dir.starts_with(r))
|
|
+ .cloned()
|
|
+ {
|
|
+ return Some(root);
|
|
+ }
|
|
+
|
|
+ // Walk up to find a `.git` marker.
|
|
+ let mut cur = dir.to_path_buf();
|
|
+ loop {
|
|
+ let git_marker = cur.join(".git");
|
|
+ if git_marker.is_dir() || git_marker.is_file() {
|
|
+ if !self.git_root_cache.iter().any(|r| r == &cur) {
|
|
+ self.git_root_cache.push(cur.clone());
|
|
+ }
|
|
+ return Some(cur);
|
|
+ }
|
|
+
|
|
+ // On Windows, avoid walking above the drive or UNC share root.
|
|
+ #[cfg(windows)]
|
|
+ {
|
|
+ if is_windows_drive_or_unc_root(&cur) {
|
|
+ return None;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if let Some(parent) = cur.parent() {
|
|
+ cur = parent.to_path_buf();
|
|
+ } else {
|
|
+ return None;
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /// Return a display string for `path` relative to its git root if found, else absolute.
|
|
+ fn relative_to_git_root_str(&mut self, path: &Path) -> String {
|
|
+ let s = if let Some(root) = self.find_git_root_cached(path) {
|
|
+ if let Ok(rel) = path.strip_prefix(&root) {
|
|
+ rel.display().to_string()
|
|
+ } else {
|
|
+ path.display().to_string()
|
|
+ }
|
|
+ } else {
|
|
+ path.display().to_string()
|
|
+ };
|
|
+ s.replace('\\', "/")
|
|
+ }
|
|
+
|
|
+ /// Ask git to compute the blob SHA-1 for the file at `path` within its repository.
|
|
+ /// Returns None if no repository is found or git invocation fails.
|
|
+ fn git_blob_oid_for_path(&mut self, path: &Path) -> Option<String> {
|
|
+ let root = self.find_git_root_cached(path)?;
|
|
+ // Compute a path relative to the repo root for better portability across platforms.
|
|
+ let rel = path.strip_prefix(&root).unwrap_or(path);
|
|
+ let output = Command::new("git")
|
|
+ .arg("-C")
|
|
+ .arg(&root)
|
|
+ .arg("hash-object")
|
|
+ .arg("--")
|
|
+ .arg(rel)
|
|
+ .output()
|
|
+ .ok()?;
|
|
+ if !output.status.success() {
|
|
+ return None;
|
|
+ }
|
|
+ let s = String::from_utf8_lossy(&output.stdout).trim().to_string();
|
|
+ if s.len() == 40 { Some(s) } else { None }
|
|
+ }
|
|
+
|
|
+ /// Recompute the aggregated unified diff by comparing all of the in-memory snapshots that were
|
|
+ /// collected before the first time they were touched by apply_patch during this turn with
|
|
+ /// the current repo state.
|
|
+ pub fn get_unified_diff(&mut self) -> Result<Option<String>> {
|
|
+ let mut aggregated = String::new();
|
|
+
|
|
+ // Compute diffs per tracked internal file in a stable order by external path.
|
|
+ let mut baseline_file_names: Vec<String> =
|
|
+ self.baseline_file_info.keys().cloned().collect();
|
|
+ // Sort lexicographically by full repo-relative path to match git behavior.
|
|
+ baseline_file_names.sort_by_key(|internal| {
|
|
+ self.get_path_for_internal(internal)
|
|
+ .map(|p| self.relative_to_git_root_str(&p))
|
|
+ .unwrap_or_default()
|
|
+ });
|
|
+
|
|
+ for internal in baseline_file_names {
|
|
+ // Baseline external must exist for any tracked internal.
|
|
+ let baseline_external = match self
|
|
+ .baseline_file_info
|
|
+ .get(&internal)
|
|
+ .and_then(|i| i.path.clone())
|
|
+ {
|
|
+ Some(p) => p,
|
|
+ None => continue,
|
|
+ };
|
|
+ let current_external = match self.get_path_for_internal(&internal) {
|
|
+ Some(p) => p,
|
|
+ None => continue,
|
|
+ };
|
|
+
|
|
+ // Determine modes early; needed to read symlink content correctly.
|
|
+ let baseline_mode = self
|
|
+ .baseline_file_info
|
|
+ .get(&internal)
|
|
+ .and_then(|i| i.mode.clone())
|
|
+ .unwrap_or_else(|| REGULAR_MODE.to_string());
|
|
+ let current_mode =
|
|
+ file_mode_for_path(¤t_external).unwrap_or_else(|| REGULAR_MODE.to_string());
|
|
+
|
|
+ let left_bytes = self
|
|
+ .baseline_file_info
|
|
+ .get(&internal)
|
|
+ .and_then(|i| i.contents_bytes.clone());
|
|
+
|
|
+ let right_bytes = blob_bytes(¤t_external, ¤t_mode)?;
|
|
+
|
|
+ // Fast path: identical bytes or both missing.
|
|
+ if left_bytes.as_deref() == right_bytes.as_deref() {
|
|
+ continue;
|
|
+ }
|
|
+
|
|
+ let left_display = self.relative_to_git_root_str(&baseline_external);
|
|
+ let right_display = self.relative_to_git_root_str(¤t_external);
|
|
+
|
|
+ // Emit a git-style header for better readability and parity with previous behavior.
|
|
+ aggregated.push_str(&format!("diff --git a/{left_display} b/{right_display}\n"));
|
|
+
|
|
+ let is_add = left_bytes.is_none() && right_bytes.is_some();
|
|
+ let is_delete = left_bytes.is_some() && right_bytes.is_none();
|
|
+
|
|
+ if is_add {
|
|
+ aggregated.push_str(&format!("new file mode {current_mode}\n"));
|
|
+ } else if is_delete {
|
|
+ aggregated.push_str(&format!("deleted file mode {baseline_mode}\n"));
|
|
+ } else if baseline_mode != current_mode {
|
|
+ aggregated.push_str(&format!("old mode {baseline_mode}\n"));
|
|
+ aggregated.push_str(&format!("new mode {current_mode}\n"));
|
|
+ }
|
|
+
|
|
+ // Determine blob object IDs for left and right contents. Prefer stored OIDs
|
|
+ // captured from the original repo state when the change was first seen.
|
|
+ let left_oid = self
|
|
+ .baseline_file_info
|
|
+ .get(&internal)
|
|
+ .and_then(|i| i.oid.clone())
|
|
+ .or_else(|| {
|
|
+ left_bytes
|
|
+ .as_ref()
|
|
+ .map(|b| git_blob_sha1_hex_bytes(b))
|
|
+ .or(Some(ZERO_OID.to_string()))
|
|
+ })
|
|
+ .unwrap_or_else(|| ZERO_OID.to_string());
|
|
+ let right_oid = if let Some(b) = right_bytes.as_ref() {
|
|
+ if current_mode == SYMLINK_MODE {
|
|
+ git_blob_sha1_hex_bytes(b)
|
|
+ } else {
|
|
+ self.git_blob_oid_for_path(¤t_external)
|
|
+ .unwrap_or_else(|| git_blob_sha1_hex_bytes(b))
|
|
+ }
|
|
+ } else {
|
|
+ ZERO_OID.to_string()
|
|
+ };
|
|
+
|
|
+ // If either side isn't valid UTF-8, emit a binary diff header and continue.
|
|
+ let left_text = left_bytes
|
|
+ .as_deref()
|
|
+ .and_then(|b| std::str::from_utf8(b).ok());
|
|
+ let right_text = right_bytes
|
|
+ .as_deref()
|
|
+ .and_then(|b| std::str::from_utf8(b).ok());
|
|
+
|
|
+ // Prefer text diffs when possible:
|
|
+ // - both sides are valid UTF-8
|
|
+ // - OR one side is missing (add/delete) and the present side is valid UTF-8
|
|
+ let can_text_diff = match (left_text, right_text, is_add, is_delete) {
|
|
+ (Some(_), Some(_), _, _) => true,
|
|
+ (_, Some(_), true, _) => true, // add: left missing, right text
|
|
+ (Some(_), _, _, true) => true, // delete: left text, right missing
|
|
+ _ => false,
|
|
+ };
|
|
+
|
|
+ if can_text_diff {
|
|
+ // Diff the contents as text, treating missing side as empty string.
|
|
+ let l = left_text.unwrap_or("");
|
|
+ let r = right_text.unwrap_or("");
|
|
+
|
|
+ // Emit index line without mode suffix to preserve current test expectations.
|
|
+ aggregated.push_str(&format!("index {left_oid}..{right_oid}\n"));
|
|
+
|
|
+ let old_header = if left_bytes.is_some() {
|
|
+ format!("a/{left_display}")
|
|
+ } else {
|
|
+ "/dev/null".to_string()
|
|
+ };
|
|
+ let new_header = if right_bytes.is_some() {
|
|
+ format!("b/{right_display}")
|
|
+ } else {
|
|
+ "/dev/null".to_string()
|
|
+ };
|
|
+
|
|
+ let diff = similar::TextDiff::from_lines(l, r);
|
|
+ let unified = diff
|
|
+ .unified_diff()
|
|
+ .context_radius(3)
|
|
+ .header(&old_header, &new_header)
|
|
+ .to_string();
|
|
+
|
|
+ aggregated.push_str(&unified);
|
|
+ if !aggregated.ends_with('\n') {
|
|
+ aggregated.push('\n');
|
|
+ }
|
|
+ } else {
|
|
+ // Binary or invalid UTF-8: emit header only.
|
|
+ aggregated.push_str(&format!("index {left_oid}..{right_oid}\n"));
|
|
+ let old_header = if left_bytes.is_some() {
|
|
+ format!("a/{left_display}")
|
|
+ } else {
|
|
+ "/dev/null".to_string()
|
|
+ };
|
|
+ let new_header = if right_bytes.is_some() {
|
|
+ format!("b/{right_display}")
|
|
+ } else {
|
|
+ "/dev/null".to_string()
|
|
+ };
|
|
+ aggregated.push_str(&format!("--- {old_header}\n"));
|
|
+ aggregated.push_str(&format!("+++ {new_header}\n"));
|
|
+ aggregated.push_str("Binary files differ\n");
|
|
+ if !aggregated.ends_with('\n') {
|
|
+ aggregated.push('\n');
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if aggregated.trim().is_empty() {
|
|
+ Ok(None)
|
|
+ } else {
|
|
+ Ok(Some(aggregated))
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
+fn uuid_filename_for(path: &Path) -> String {
|
|
+ let id = Uuid::new_v4().to_string();
|
|
+ match path.extension().and_then(|e| e.to_str()) {
|
|
+ Some(ext) if !ext.is_empty() => format!("{id}.{ext}"),
|
|
+ _ => id,
|
|
+ }
|
|
+}
|
|
+
|
|
+/// Compute the Git SHA-1 blob object ID for the given content (bytes).
|
|
+fn git_blob_sha1_hex_bytes(data: &[u8]) -> String {
|
|
+ // Git blob hash is sha1 of: "blob <len>\0<data>"
|
|
+ let header = format!("blob {}\0", data.len());
|
|
+ use sha1::Digest;
|
|
+ let mut hasher = sha1::Sha1::new();
|
|
+ hasher.update(header.as_bytes());
|
|
+ hasher.update(data);
|
|
+ let digest = hasher.finalize();
|
|
+ let mut out = String::with_capacity(40);
|
|
+ for b in digest {
|
|
+ use std::fmt::Write;
|
|
+ let _ = write!(&mut out, "{b:02x}");
|
|
+ }
|
|
+ out
|
|
+}
|
|
+
|
|
+const ZERO_OID: &str = "0000000000000000000000000000000000000000";
|
|
+const REGULAR_MODE: &str = "100644";
|
|
+#[cfg(unix)]
|
|
+const EXECUTABLE_MODE: &str = "100755";
|
|
+const SYMLINK_MODE: &str = "120000";
|
|
+
|
|
+#[cfg(unix)]
|
|
+fn file_mode_for_path(path: &Path) -> Option<String> {
|
|
+ use std::os::unix::fs::PermissionsExt;
|
|
+ let meta = fs::symlink_metadata(path).ok()?;
|
|
+ let ft = meta.file_type();
|
|
+ if ft.is_symlink() {
|
|
+ return Some(SYMLINK_MODE.to_string());
|
|
+ }
|
|
+ let mode = meta.permissions().mode();
|
|
+ let is_exec = (mode & 0o111) != 0;
|
|
+ Some(if is_exec {
|
|
+ EXECUTABLE_MODE.into()
|
|
+ } else {
|
|
+ REGULAR_MODE.into()
|
|
+ })
|
|
+}
|
|
+
|
|
+#[cfg(not(unix))]
|
|
+fn file_mode_for_path(_path: &Path) -> Option<String> {
|
|
+ // Default to non-executable on non-unix.
|
|
+ Some(REGULAR_MODE.to_string())
|
|
+}
|
|
+
|
|
+fn blob_bytes(path: &Path, mode: &str) -> Result<Option<Vec<u8>>> {
|
|
+ if path.exists() {
|
|
+ let contents = if mode == SYMLINK_MODE {
|
|
+ symlink_blob_bytes(path)
|
|
+ .ok_or_else(|| anyhow!("failed to read symlink target for {}", path.display()))?
|
|
+ } else {
|
|
+ fs::read(path).with_context(|| {
|
|
+ format!("failed to read current file for diff {}", path.display())
|
|
+ })?
|
|
+ };
|
|
+ Ok(Some(contents))
|
|
+ } else {
|
|
+ Ok(None)
|
|
+ }
|
|
+}
|
|
+
|
|
+#[cfg(unix)]
|
|
+fn symlink_blob_bytes(path: &Path) -> Option<Vec<u8>> {
|
|
+ use std::os::unix::ffi::OsStrExt;
|
|
+ let target = std::fs::read_link(path).ok()?;
|
|
+ Some(target.as_os_str().as_bytes().to_vec())
|
|
```
|
|
|
|
> One interesting operating system fact is that the contents of a symlink do not have to be a path to a file: you can just use it for arbitrary data storage. (As such, I think the max number of bytes you can store in a symlink is `PATH_MAX`, though.)
|
|
>
|
|
> I knew of one project that did this to save a system call because `readlink()` is one system call but `open()` plus `read()` for a regular file is two?
|
|
|
|
- Created: 2025-08-04 03:12:56 UTC | Link: https://github.com/openai/codex/pull/1770#discussion_r2250312732
|
|
|
|
```diff
|
|
@@ -0,0 +1,766 @@
|
|
+use std::collections::HashMap;
|
|
+use std::fs;
|
|
+use std::path::Path;
|
|
+use std::path::PathBuf;
|
|
+use std::process::Command;
|
|
+
|
|
+use anyhow::Context;
|
|
+use anyhow::Result;
|
|
+use anyhow::anyhow;
|
|
+use uuid::Uuid;
|
|
+
|
|
+use crate::protocol::FileChange;
|
|
+
|
|
+struct BaselineFileInfo {
|
|
+ path: Option<PathBuf>,
|
|
+ contents_bytes: Option<Vec<u8>>,
|
|
+ mode: Option<String>,
|
|
+ oid: Option<String>,
|
|
+}
|
|
+
|
|
+/// Tracks sets of changes to files and exposes the overall unified diff.
|
|
+/// Internally, the way this works is now:
|
|
+/// 1. Maintain an in-memory baseline snapshot of files when they are first seen.
|
|
+/// For new additions, do not create a baseline so that diffs are shown as proper additions (using /dev/null).
|
|
+/// 2. Keep a stable internal filename (uuid + same extension) per external path for rename tracking.
|
|
+/// 3. To compute the aggregated unified diff, compare each baseline snapshot to the current file on disk entirely in-memory
|
|
+/// using the `similar` crate and emit unified diffs with rewritten external paths.
|
|
+#[derive(Default)]
|
|
+pub struct TurnDiffTracker {
|
|
+ /// Map external path -> internal filename (uuid + same extension).
|
|
+ external_to_temp_name: HashMap<PathBuf, String>,
|
|
+ /// Internal filename -> baseline file info.
|
|
+ baseline_file_info: HashMap<String, BaselineFileInfo>,
|
|
+ /// Internal filename -> external path as of current accumulated state (after applying all changes).
|
|
+ /// This is where renames are tracked.
|
|
+ temp_name_to_current_path: HashMap<String, PathBuf>,
|
|
+ /// Cache of known git worktree roots to avoid repeated filesystem walks.
|
|
+ git_root_cache: Vec<PathBuf>,
|
|
+}
|
|
+
|
|
+impl TurnDiffTracker {
|
|
+ pub fn new() -> Self {
|
|
+ Self::default()
|
|
+ }
|
|
+
|
|
+ /// Front-run apply patch calls to track the starting contents of any modified files.
|
|
+ /// - Creates an in-memory baseline snapshot for files that already exist on disk when first seen.
|
|
+ /// - For additions, we intentionally do not create a baseline snapshot so that diffs are proper additions.
|
|
+ /// - Also updates internal mappings for move/rename events.
|
|
+ pub fn on_patch_begin(&mut self, changes: &HashMap<PathBuf, FileChange>) -> Result<()> {
|
|
+ for (path, change) in changes.iter() {
|
|
+ // Ensure a stable internal filename exists for this external path.
|
|
+ if !self.external_to_temp_name.contains_key(path) {
|
|
+ let internal = uuid_filename_for(path);
|
|
+ self.external_to_temp_name
|
|
+ .insert(path.clone(), internal.clone());
|
|
+ self.temp_name_to_current_path
|
|
+ .insert(internal.clone(), path.clone());
|
|
+
|
|
+ // If the file exists on disk now, snapshot as baseline; else leave missing to represent /dev/null.
|
|
+ let (contents_bytes, mode, oid) = if path.exists() {
|
|
+ let mode = file_mode_for_path(path);
|
|
+ let mode_str = mode.as_deref().unwrap_or(REGULAR_MODE);
|
|
+ let contents_bytes = blob_bytes(path, mode_str)
|
|
+ .unwrap_or_default()
|
|
+ .unwrap_or_default();
|
|
+ let oid = if mode.as_deref() == Some(SYMLINK_MODE) {
|
|
+ git_blob_sha1_hex_bytes(&contents_bytes)
|
|
+ } else {
|
|
+ self.git_blob_oid_for_path(path)
|
|
+ .unwrap_or_else(|| git_blob_sha1_hex_bytes(&contents_bytes))
|
|
+ };
|
|
+ (Some(contents_bytes), mode, Some(oid))
|
|
+ } else {
|
|
+ (None, None, Some(ZERO_OID.to_string()))
|
|
+ };
|
|
+
|
|
+ self.baseline_file_info.insert(
|
|
+ internal.clone(),
|
|
+ BaselineFileInfo {
|
|
+ path: Some(path.clone()),
|
|
+ contents_bytes,
|
|
+ mode,
|
|
+ oid,
|
|
+ },
|
|
+ );
|
|
+ }
|
|
+
|
|
+ // Track rename/move in current mapping if provided in an Update.
|
|
+ if let FileChange::Update {
|
|
+ move_path: Some(dest),
|
|
+ ..
|
|
+ } = change
|
|
+ {
|
|
+ let uuid_filename = match self.external_to_temp_name.get(path) {
|
|
+ Some(i) => i.clone(),
|
|
+ None => {
|
|
+ // This should be rare, but if we haven't mapped the source, create it with no baseline.
|
|
+ let i = uuid_filename_for(path);
|
|
+ self.baseline_file_info.insert(
|
|
+ i.clone(),
|
|
+ BaselineFileInfo {
|
|
+ path: Some(path.clone()),
|
|
+ contents_bytes: None,
|
|
+ mode: None,
|
|
+ oid: Some(ZERO_OID.to_string()),
|
|
+ },
|
|
+ );
|
|
+ i
|
|
+ }
|
|
+ };
|
|
+ // Update current external mapping for temp file name.
|
|
+ self.temp_name_to_current_path
|
|
+ .insert(uuid_filename.clone(), dest.clone());
|
|
+ // Update forward file_mapping: external current -> internal name.
|
|
+ self.external_to_temp_name.remove(path);
|
|
+ self.external_to_temp_name
|
|
+ .insert(dest.clone(), uuid_filename);
|
|
+ };
|
|
+ }
|
|
+
|
|
+ Ok(())
|
|
+ }
|
|
+
|
|
+ fn get_path_for_internal(&self, internal: &str) -> Option<PathBuf> {
|
|
+ self.temp_name_to_current_path
|
|
+ .get(internal)
|
|
+ .cloned()
|
|
+ .or_else(|| {
|
|
+ self.baseline_file_info
|
|
+ .get(internal)
|
|
+ .and_then(|info| info.path.clone())
|
|
+ })
|
|
+ }
|
|
+
|
|
+ /// Find the git worktree root for a file/directory by walking up to the first ancestor containing a `.git` entry.
|
|
+ /// Uses a simple cache of known roots and avoids negative-result caching for simplicity.
|
|
+ fn find_git_root_cached(&mut self, start: &Path) -> Option<PathBuf> {
|
|
+ let dir = if start.is_dir() {
|
|
+ start
|
|
+ } else {
|
|
+ start.parent()?
|
|
+ };
|
|
+
|
|
+ // Fast path: if any cached root is an ancestor of this path, use it.
|
|
+ if let Some(root) = self
|
|
+ .git_root_cache
|
|
+ .iter()
|
|
+ .find(|r| dir.starts_with(r))
|
|
+ .cloned()
|
|
+ {
|
|
+ return Some(root);
|
|
+ }
|
|
+
|
|
+ // Walk up to find a `.git` marker.
|
|
+ let mut cur = dir.to_path_buf();
|
|
+ loop {
|
|
+ let git_marker = cur.join(".git");
|
|
+ if git_marker.is_dir() || git_marker.is_file() {
|
|
+ if !self.git_root_cache.iter().any(|r| r == &cur) {
|
|
+ self.git_root_cache.push(cur.clone());
|
|
+ }
|
|
+ return Some(cur);
|
|
+ }
|
|
+
|
|
+ // On Windows, avoid walking above the drive or UNC share root.
|
|
+ #[cfg(windows)]
|
|
+ {
|
|
+ if is_windows_drive_or_unc_root(&cur) {
|
|
+ return None;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if let Some(parent) = cur.parent() {
|
|
+ cur = parent.to_path_buf();
|
|
+ } else {
|
|
+ return None;
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /// Return a display string for `path` relative to its git root if found, else absolute.
|
|
+ fn relative_to_git_root_str(&mut self, path: &Path) -> String {
|
|
+ let s = if let Some(root) = self.find_git_root_cached(path) {
|
|
+ if let Ok(rel) = path.strip_prefix(&root) {
|
|
+ rel.display().to_string()
|
|
+ } else {
|
|
+ path.display().to_string()
|
|
+ }
|
|
+ } else {
|
|
+ path.display().to_string()
|
|
+ };
|
|
+ s.replace('\\', "/")
|
|
+ }
|
|
+
|
|
+ /// Ask git to compute the blob SHA-1 for the file at `path` within its repository.
|
|
+ /// Returns None if no repository is found or git invocation fails.
|
|
+ fn git_blob_oid_for_path(&mut self, path: &Path) -> Option<String> {
|
|
+ let root = self.find_git_root_cached(path)?;
|
|
+ // Compute a path relative to the repo root for better portability across platforms.
|
|
+ let rel = path.strip_prefix(&root).unwrap_or(path);
|
|
+ let output = Command::new("git")
|
|
+ .arg("-C")
|
|
+ .arg(&root)
|
|
+ .arg("hash-object")
|
|
+ .arg("--")
|
|
+ .arg(rel)
|
|
+ .output()
|
|
+ .ok()?;
|
|
+ if !output.status.success() {
|
|
+ return None;
|
|
+ }
|
|
+ let s = String::from_utf8_lossy(&output.stdout).trim().to_string();
|
|
+ if s.len() == 40 { Some(s) } else { None }
|
|
+ }
|
|
+
|
|
+ /// Recompute the aggregated unified diff by comparing all of the in-memory snapshots that were
|
|
+ /// collected before the first time they were touched by apply_patch during this turn with
|
|
+ /// the current repo state.
|
|
+ pub fn get_unified_diff(&mut self) -> Result<Option<String>> {
|
|
+ let mut aggregated = String::new();
|
|
+
|
|
+ // Compute diffs per tracked internal file in a stable order by external path.
|
|
+ let mut baseline_file_names: Vec<String> =
|
|
+ self.baseline_file_info.keys().cloned().collect();
|
|
+ // Sort lexicographically by full repo-relative path to match git behavior.
|
|
+ baseline_file_names.sort_by_key(|internal| {
|
|
+ self.get_path_for_internal(internal)
|
|
+ .map(|p| self.relative_to_git_root_str(&p))
|
|
+ .unwrap_or_default()
|
|
+ });
|
|
+
|
|
+ for internal in baseline_file_names {
|
|
+ // Baseline external must exist for any tracked internal.
|
|
+ let baseline_external = match self
|
|
+ .baseline_file_info
|
|
+ .get(&internal)
|
|
+ .and_then(|i| i.path.clone())
|
|
+ {
|
|
+ Some(p) => p,
|
|
+ None => continue,
|
|
+ };
|
|
+ let current_external = match self.get_path_for_internal(&internal) {
|
|
+ Some(p) => p,
|
|
+ None => continue,
|
|
+ };
|
|
+
|
|
+ // Determine modes early; needed to read symlink content correctly.
|
|
+ let baseline_mode = self
|
|
+ .baseline_file_info
|
|
+ .get(&internal)
|
|
+ .and_then(|i| i.mode.clone())
|
|
+ .unwrap_or_else(|| REGULAR_MODE.to_string());
|
|
+ let current_mode =
|
|
+ file_mode_for_path(¤t_external).unwrap_or_else(|| REGULAR_MODE.to_string());
|
|
+
|
|
+ let left_bytes = self
|
|
+ .baseline_file_info
|
|
+ .get(&internal)
|
|
+ .and_then(|i| i.contents_bytes.clone());
|
|
+
|
|
+ let right_bytes = blob_bytes(¤t_external, ¤t_mode)?;
|
|
+
|
|
+ // Fast path: identical bytes or both missing.
|
|
+ if left_bytes.as_deref() == right_bytes.as_deref() {
|
|
+ continue;
|
|
+ }
|
|
+
|
|
+ let left_display = self.relative_to_git_root_str(&baseline_external);
|
|
+ let right_display = self.relative_to_git_root_str(¤t_external);
|
|
+
|
|
+ // Emit a git-style header for better readability and parity with previous behavior.
|
|
+ aggregated.push_str(&format!("diff --git a/{left_display} b/{right_display}\n"));
|
|
+
|
|
+ let is_add = left_bytes.is_none() && right_bytes.is_some();
|
|
+ let is_delete = left_bytes.is_some() && right_bytes.is_none();
|
|
+
|
|
+ if is_add {
|
|
+ aggregated.push_str(&format!("new file mode {current_mode}\n"));
|
|
+ } else if is_delete {
|
|
+ aggregated.push_str(&format!("deleted file mode {baseline_mode}\n"));
|
|
+ } else if baseline_mode != current_mode {
|
|
+ aggregated.push_str(&format!("old mode {baseline_mode}\n"));
|
|
+ aggregated.push_str(&format!("new mode {current_mode}\n"));
|
|
+ }
|
|
+
|
|
+ // Determine blob object IDs for left and right contents. Prefer stored OIDs
|
|
+ // captured from the original repo state when the change was first seen.
|
|
+ let left_oid = self
|
|
+ .baseline_file_info
|
|
+ .get(&internal)
|
|
+ .and_then(|i| i.oid.clone())
|
|
+ .or_else(|| {
|
|
+ left_bytes
|
|
+ .as_ref()
|
|
+ .map(|b| git_blob_sha1_hex_bytes(b))
|
|
+ .or(Some(ZERO_OID.to_string()))
|
|
+ })
|
|
+ .unwrap_or_else(|| ZERO_OID.to_string());
|
|
+ let right_oid = if let Some(b) = right_bytes.as_ref() {
|
|
+ if current_mode == SYMLINK_MODE {
|
|
+ git_blob_sha1_hex_bytes(b)
|
|
+ } else {
|
|
+ self.git_blob_oid_for_path(¤t_external)
|
|
+ .unwrap_or_else(|| git_blob_sha1_hex_bytes(b))
|
|
+ }
|
|
+ } else {
|
|
+ ZERO_OID.to_string()
|
|
+ };
|
|
+
|
|
+ // If either side isn't valid UTF-8, emit a binary diff header and continue.
|
|
+ let left_text = left_bytes
|
|
+ .as_deref()
|
|
+ .and_then(|b| std::str::from_utf8(b).ok());
|
|
+ let right_text = right_bytes
|
|
+ .as_deref()
|
|
+ .and_then(|b| std::str::from_utf8(b).ok());
|
|
+
|
|
+ // Prefer text diffs when possible:
|
|
+ // - both sides are valid UTF-8
|
|
+ // - OR one side is missing (add/delete) and the present side is valid UTF-8
|
|
+ let can_text_diff = match (left_text, right_text, is_add, is_delete) {
|
|
+ (Some(_), Some(_), _, _) => true,
|
|
+ (_, Some(_), true, _) => true, // add: left missing, right text
|
|
+ (Some(_), _, _, true) => true, // delete: left text, right missing
|
|
+ _ => false,
|
|
+ };
|
|
+
|
|
+ if can_text_diff {
|
|
+ // Diff the contents as text, treating missing side as empty string.
|
|
+ let l = left_text.unwrap_or("");
|
|
+ let r = right_text.unwrap_or("");
|
|
+
|
|
+ // Emit index line without mode suffix to preserve current test expectations.
|
|
+ aggregated.push_str(&format!("index {left_oid}..{right_oid}\n"));
|
|
+
|
|
+ let old_header = if left_bytes.is_some() {
|
|
+ format!("a/{left_display}")
|
|
+ } else {
|
|
+ "/dev/null".to_string()
|
|
+ };
|
|
+ let new_header = if right_bytes.is_some() {
|
|
+ format!("b/{right_display}")
|
|
+ } else {
|
|
+ "/dev/null".to_string()
|
|
+ };
|
|
+
|
|
+ let diff = similar::TextDiff::from_lines(l, r);
|
|
+ let unified = diff
|
|
+ .unified_diff()
|
|
+ .context_radius(3)
|
|
+ .header(&old_header, &new_header)
|
|
+ .to_string();
|
|
+
|
|
+ aggregated.push_str(&unified);
|
|
+ if !aggregated.ends_with('\n') {
|
|
+ aggregated.push('\n');
|
|
+ }
|
|
+ } else {
|
|
+ // Binary or invalid UTF-8: emit header only.
|
|
+ aggregated.push_str(&format!("index {left_oid}..{right_oid}\n"));
|
|
+ let old_header = if left_bytes.is_some() {
|
|
+ format!("a/{left_display}")
|
|
+ } else {
|
|
+ "/dev/null".to_string()
|
|
+ };
|
|
+ let new_header = if right_bytes.is_some() {
|
|
+ format!("b/{right_display}")
|
|
+ } else {
|
|
+ "/dev/null".to_string()
|
|
+ };
|
|
+ aggregated.push_str(&format!("--- {old_header}\n"));
|
|
+ aggregated.push_str(&format!("+++ {new_header}\n"));
|
|
+ aggregated.push_str("Binary files differ\n");
|
|
+ if !aggregated.ends_with('\n') {
|
|
+ aggregated.push('\n');
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if aggregated.trim().is_empty() {
|
|
+ Ok(None)
|
|
+ } else {
|
|
+ Ok(Some(aggregated))
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
+fn uuid_filename_for(path: &Path) -> String {
|
|
+ let id = Uuid::new_v4().to_string();
|
|
+ match path.extension().and_then(|e| e.to_str()) {
|
|
+ Some(ext) if !ext.is_empty() => format!("{id}.{ext}"),
|
|
+ _ => id,
|
|
+ }
|
|
+}
|
|
+
|
|
+/// Compute the Git SHA-1 blob object ID for the given content (bytes).
|
|
+fn git_blob_sha1_hex_bytes(data: &[u8]) -> String {
|
|
+ // Git blob hash is sha1 of: "blob <len>\0<data>"
|
|
+ let header = format!("blob {}\0", data.len());
|
|
+ use sha1::Digest;
|
|
```
|
|
|
|
> Returning this type is slightly stronger since you don't have to verify the integrity of the `String` contents elsewhere.
|
|
|
|
- Created: 2025-08-04 03:13:59 UTC | Link: https://github.com/openai/codex/pull/1770#discussion_r2250313339
|
|
|
|
```diff
|
|
@@ -0,0 +1,766 @@
|
|
+use std::collections::HashMap;
|
|
+use std::fs;
|
|
+use std::path::Path;
|
|
+use std::path::PathBuf;
|
|
+use std::process::Command;
|
|
+
|
|
+use anyhow::Context;
|
|
+use anyhow::Result;
|
|
+use anyhow::anyhow;
|
|
+use uuid::Uuid;
|
|
+
|
|
+use crate::protocol::FileChange;
|
|
+
|
|
+struct BaselineFileInfo {
|
|
+ path: Option<PathBuf>,
|
|
+ contents_bytes: Option<Vec<u8>>,
|
|
+ mode: Option<String>,
|
|
+ oid: Option<String>,
|
|
+}
|
|
+
|
|
+/// Tracks sets of changes to files and exposes the overall unified diff.
|
|
+/// Internally, the way this works is now:
|
|
+/// 1. Maintain an in-memory baseline snapshot of files when they are first seen.
|
|
+/// For new additions, do not create a baseline so that diffs are shown as proper additions (using /dev/null).
|
|
+/// 2. Keep a stable internal filename (uuid + same extension) per external path for rename tracking.
|
|
+/// 3. To compute the aggregated unified diff, compare each baseline snapshot to the current file on disk entirely in-memory
|
|
+/// using the `similar` crate and emit unified diffs with rewritten external paths.
|
|
+#[derive(Default)]
|
|
+pub struct TurnDiffTracker {
|
|
+ /// Map external path -> internal filename (uuid + same extension).
|
|
+ external_to_temp_name: HashMap<PathBuf, String>,
|
|
+ /// Internal filename -> baseline file info.
|
|
+ baseline_file_info: HashMap<String, BaselineFileInfo>,
|
|
+ /// Internal filename -> external path as of current accumulated state (after applying all changes).
|
|
+ /// This is where renames are tracked.
|
|
+ temp_name_to_current_path: HashMap<String, PathBuf>,
|
|
+ /// Cache of known git worktree roots to avoid repeated filesystem walks.
|
|
+ git_root_cache: Vec<PathBuf>,
|
|
+}
|
|
+
|
|
+impl TurnDiffTracker {
|
|
+ pub fn new() -> Self {
|
|
+ Self::default()
|
|
+ }
|
|
+
|
|
+ /// Front-run apply patch calls to track the starting contents of any modified files.
|
|
+ /// - Creates an in-memory baseline snapshot for files that already exist on disk when first seen.
|
|
+ /// - For additions, we intentionally do not create a baseline snapshot so that diffs are proper additions.
|
|
+ /// - Also updates internal mappings for move/rename events.
|
|
+ pub fn on_patch_begin(&mut self, changes: &HashMap<PathBuf, FileChange>) -> Result<()> {
|
|
+ for (path, change) in changes.iter() {
|
|
+ // Ensure a stable internal filename exists for this external path.
|
|
+ if !self.external_to_temp_name.contains_key(path) {
|
|
+ let internal = uuid_filename_for(path);
|
|
+ self.external_to_temp_name
|
|
+ .insert(path.clone(), internal.clone());
|
|
+ self.temp_name_to_current_path
|
|
+ .insert(internal.clone(), path.clone());
|
|
+
|
|
+ // If the file exists on disk now, snapshot as baseline; else leave missing to represent /dev/null.
|
|
+ let (contents_bytes, mode, oid) = if path.exists() {
|
|
+ let mode = file_mode_for_path(path);
|
|
+ let mode_str = mode.as_deref().unwrap_or(REGULAR_MODE);
|
|
+ let contents_bytes = blob_bytes(path, mode_str)
|
|
+ .unwrap_or_default()
|
|
+ .unwrap_or_default();
|
|
+ let oid = if mode.as_deref() == Some(SYMLINK_MODE) {
|
|
+ git_blob_sha1_hex_bytes(&contents_bytes)
|
|
+ } else {
|
|
+ self.git_blob_oid_for_path(path)
|
|
+ .unwrap_or_else(|| git_blob_sha1_hex_bytes(&contents_bytes))
|
|
+ };
|
|
+ (Some(contents_bytes), mode, Some(oid))
|
|
+ } else {
|
|
+ (None, None, Some(ZERO_OID.to_string()))
|
|
+ };
|
|
+
|
|
+ self.baseline_file_info.insert(
|
|
+ internal.clone(),
|
|
+ BaselineFileInfo {
|
|
+ path: Some(path.clone()),
|
|
+ contents_bytes,
|
|
+ mode,
|
|
+ oid,
|
|
+ },
|
|
+ );
|
|
+ }
|
|
+
|
|
+ // Track rename/move in current mapping if provided in an Update.
|
|
+ if let FileChange::Update {
|
|
+ move_path: Some(dest),
|
|
+ ..
|
|
+ } = change
|
|
+ {
|
|
+ let uuid_filename = match self.external_to_temp_name.get(path) {
|
|
+ Some(i) => i.clone(),
|
|
+ None => {
|
|
+ // This should be rare, but if we haven't mapped the source, create it with no baseline.
|
|
+ let i = uuid_filename_for(path);
|
|
+ self.baseline_file_info.insert(
|
|
+ i.clone(),
|
|
+ BaselineFileInfo {
|
|
+ path: Some(path.clone()),
|
|
+ contents_bytes: None,
|
|
+ mode: None,
|
|
+ oid: Some(ZERO_OID.to_string()),
|
|
+ },
|
|
+ );
|
|
+ i
|
|
+ }
|
|
+ };
|
|
+ // Update current external mapping for temp file name.
|
|
+ self.temp_name_to_current_path
|
|
+ .insert(uuid_filename.clone(), dest.clone());
|
|
+ // Update forward file_mapping: external current -> internal name.
|
|
+ self.external_to_temp_name.remove(path);
|
|
+ self.external_to_temp_name
|
|
+ .insert(dest.clone(), uuid_filename);
|
|
+ };
|
|
+ }
|
|
+
|
|
+ Ok(())
|
|
+ }
|
|
+
|
|
+ fn get_path_for_internal(&self, internal: &str) -> Option<PathBuf> {
|
|
+ self.temp_name_to_current_path
|
|
+ .get(internal)
|
|
+ .cloned()
|
|
+ .or_else(|| {
|
|
+ self.baseline_file_info
|
|
+ .get(internal)
|
|
+ .and_then(|info| info.path.clone())
|
|
+ })
|
|
+ }
|
|
+
|
|
+ /// Find the git worktree root for a file/directory by walking up to the first ancestor containing a `.git` entry.
|
|
+ /// Uses a simple cache of known roots and avoids negative-result caching for simplicity.
|
|
+ fn find_git_root_cached(&mut self, start: &Path) -> Option<PathBuf> {
|
|
+ let dir = if start.is_dir() {
|
|
+ start
|
|
+ } else {
|
|
+ start.parent()?
|
|
+ };
|
|
+
|
|
+ // Fast path: if any cached root is an ancestor of this path, use it.
|
|
+ if let Some(root) = self
|
|
+ .git_root_cache
|
|
+ .iter()
|
|
+ .find(|r| dir.starts_with(r))
|
|
+ .cloned()
|
|
+ {
|
|
+ return Some(root);
|
|
+ }
|
|
+
|
|
+ // Walk up to find a `.git` marker.
|
|
+ let mut cur = dir.to_path_buf();
|
|
+ loop {
|
|
+ let git_marker = cur.join(".git");
|
|
+ if git_marker.is_dir() || git_marker.is_file() {
|
|
+ if !self.git_root_cache.iter().any(|r| r == &cur) {
|
|
+ self.git_root_cache.push(cur.clone());
|
|
+ }
|
|
+ return Some(cur);
|
|
+ }
|
|
+
|
|
+ // On Windows, avoid walking above the drive or UNC share root.
|
|
+ #[cfg(windows)]
|
|
+ {
|
|
+ if is_windows_drive_or_unc_root(&cur) {
|
|
+ return None;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if let Some(parent) = cur.parent() {
|
|
+ cur = parent.to_path_buf();
|
|
+ } else {
|
|
+ return None;
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /// Return a display string for `path` relative to its git root if found, else absolute.
|
|
+ fn relative_to_git_root_str(&mut self, path: &Path) -> String {
|
|
+ let s = if let Some(root) = self.find_git_root_cached(path) {
|
|
+ if let Ok(rel) = path.strip_prefix(&root) {
|
|
+ rel.display().to_string()
|
|
+ } else {
|
|
+ path.display().to_string()
|
|
+ }
|
|
+ } else {
|
|
+ path.display().to_string()
|
|
+ };
|
|
+ s.replace('\\', "/")
|
|
+ }
|
|
+
|
|
+ /// Ask git to compute the blob SHA-1 for the file at `path` within its repository.
|
|
+ /// Returns None if no repository is found or git invocation fails.
|
|
+ fn git_blob_oid_for_path(&mut self, path: &Path) -> Option<String> {
|
|
+ let root = self.find_git_root_cached(path)?;
|
|
+ // Compute a path relative to the repo root for better portability across platforms.
|
|
+ let rel = path.strip_prefix(&root).unwrap_or(path);
|
|
+ let output = Command::new("git")
|
|
+ .arg("-C")
|
|
+ .arg(&root)
|
|
+ .arg("hash-object")
|
|
+ .arg("--")
|
|
+ .arg(rel)
|
|
+ .output()
|
|
+ .ok()?;
|
|
+ if !output.status.success() {
|
|
+ return None;
|
|
+ }
|
|
+ let s = String::from_utf8_lossy(&output.stdout).trim().to_string();
|
|
+ if s.len() == 40 { Some(s) } else { None }
|
|
+ }
|
|
+
|
|
+ /// Recompute the aggregated unified diff by comparing all of the in-memory snapshots that were
|
|
+ /// collected before the first time they were touched by apply_patch during this turn with
|
|
+ /// the current repo state.
|
|
+ pub fn get_unified_diff(&mut self) -> Result<Option<String>> {
|
|
+ let mut aggregated = String::new();
|
|
+
|
|
+ // Compute diffs per tracked internal file in a stable order by external path.
|
|
+ let mut baseline_file_names: Vec<String> =
|
|
+ self.baseline_file_info.keys().cloned().collect();
|
|
+ // Sort lexicographically by full repo-relative path to match git behavior.
|
|
+ baseline_file_names.sort_by_key(|internal| {
|
|
+ self.get_path_for_internal(internal)
|
|
+ .map(|p| self.relative_to_git_root_str(&p))
|
|
+ .unwrap_or_default()
|
|
+ });
|
|
+
|
|
+ for internal in baseline_file_names {
|
|
+ // Baseline external must exist for any tracked internal.
|
|
+ let baseline_external = match self
|
|
+ .baseline_file_info
|
|
+ .get(&internal)
|
|
+ .and_then(|i| i.path.clone())
|
|
+ {
|
|
+ Some(p) => p,
|
|
+ None => continue,
|
|
+ };
|
|
+ let current_external = match self.get_path_for_internal(&internal) {
|
|
+ Some(p) => p,
|
|
+ None => continue,
|
|
+ };
|
|
+
|
|
+ // Determine modes early; needed to read symlink content correctly.
|
|
+ let baseline_mode = self
|
|
+ .baseline_file_info
|
|
+ .get(&internal)
|
|
+ .and_then(|i| i.mode.clone())
|
|
+ .unwrap_or_else(|| REGULAR_MODE.to_string());
|
|
+ let current_mode =
|
|
+ file_mode_for_path(¤t_external).unwrap_or_else(|| REGULAR_MODE.to_string());
|
|
+
|
|
+ let left_bytes = self
|
|
+ .baseline_file_info
|
|
+ .get(&internal)
|
|
+ .and_then(|i| i.contents_bytes.clone());
|
|
+
|
|
+ let right_bytes = blob_bytes(¤t_external, ¤t_mode)?;
|
|
+
|
|
+ // Fast path: identical bytes or both missing.
|
|
+ if left_bytes.as_deref() == right_bytes.as_deref() {
|
|
+ continue;
|
|
+ }
|
|
+
|
|
+ let left_display = self.relative_to_git_root_str(&baseline_external);
|
|
+ let right_display = self.relative_to_git_root_str(¤t_external);
|
|
+
|
|
+ // Emit a git-style header for better readability and parity with previous behavior.
|
|
+ aggregated.push_str(&format!("diff --git a/{left_display} b/{right_display}\n"));
|
|
+
|
|
+ let is_add = left_bytes.is_none() && right_bytes.is_some();
|
|
+ let is_delete = left_bytes.is_some() && right_bytes.is_none();
|
|
+
|
|
+ if is_add {
|
|
+ aggregated.push_str(&format!("new file mode {current_mode}\n"));
|
|
+ } else if is_delete {
|
|
+ aggregated.push_str(&format!("deleted file mode {baseline_mode}\n"));
|
|
+ } else if baseline_mode != current_mode {
|
|
+ aggregated.push_str(&format!("old mode {baseline_mode}\n"));
|
|
+ aggregated.push_str(&format!("new mode {current_mode}\n"));
|
|
+ }
|
|
+
|
|
+ // Determine blob object IDs for left and right contents. Prefer stored OIDs
|
|
+ // captured from the original repo state when the change was first seen.
|
|
+ let left_oid = self
|
|
+ .baseline_file_info
|
|
+ .get(&internal)
|
|
+ .and_then(|i| i.oid.clone())
|
|
+ .or_else(|| {
|
|
+ left_bytes
|
|
+ .as_ref()
|
|
+ .map(|b| git_blob_sha1_hex_bytes(b))
|
|
+ .or(Some(ZERO_OID.to_string()))
|
|
+ })
|
|
+ .unwrap_or_else(|| ZERO_OID.to_string());
|
|
+ let right_oid = if let Some(b) = right_bytes.as_ref() {
|
|
+ if current_mode == SYMLINK_MODE {
|
|
+ git_blob_sha1_hex_bytes(b)
|
|
+ } else {
|
|
+ self.git_blob_oid_for_path(¤t_external)
|
|
+ .unwrap_or_else(|| git_blob_sha1_hex_bytes(b))
|
|
+ }
|
|
+ } else {
|
|
+ ZERO_OID.to_string()
|
|
+ };
|
|
+
|
|
+ // If either side isn't valid UTF-8, emit a binary diff header and continue.
|
|
+ let left_text = left_bytes
|
|
+ .as_deref()
|
|
+ .and_then(|b| std::str::from_utf8(b).ok());
|
|
+ let right_text = right_bytes
|
|
+ .as_deref()
|
|
+ .and_then(|b| std::str::from_utf8(b).ok());
|
|
+
|
|
+ // Prefer text diffs when possible:
|
|
+ // - both sides are valid UTF-8
|
|
+ // - OR one side is missing (add/delete) and the present side is valid UTF-8
|
|
+ let can_text_diff = match (left_text, right_text, is_add, is_delete) {
|
|
+ (Some(_), Some(_), _, _) => true,
|
|
+ (_, Some(_), true, _) => true, // add: left missing, right text
|
|
+ (Some(_), _, _, true) => true, // delete: left text, right missing
|
|
+ _ => false,
|
|
+ };
|
|
+
|
|
+ if can_text_diff {
|
|
+ // Diff the contents as text, treating missing side as empty string.
|
|
+ let l = left_text.unwrap_or("");
|
|
+ let r = right_text.unwrap_or("");
|
|
+
|
|
+ // Emit index line without mode suffix to preserve current test expectations.
|
|
+ aggregated.push_str(&format!("index {left_oid}..{right_oid}\n"));
|
|
+
|
|
+ let old_header = if left_bytes.is_some() {
|
|
+ format!("a/{left_display}")
|
|
+ } else {
|
|
+ "/dev/null".to_string()
|
|
+ };
|
|
+ let new_header = if right_bytes.is_some() {
|
|
+ format!("b/{right_display}")
|
|
+ } else {
|
|
+ "/dev/null".to_string()
|
|
+ };
|
|
+
|
|
+ let diff = similar::TextDiff::from_lines(l, r);
|
|
+ let unified = diff
|
|
+ .unified_diff()
|
|
+ .context_radius(3)
|
|
+ .header(&old_header, &new_header)
|
|
+ .to_string();
|
|
+
|
|
+ aggregated.push_str(&unified);
|
|
+ if !aggregated.ends_with('\n') {
|
|
+ aggregated.push('\n');
|
|
+ }
|
|
+ } else {
|
|
+ // Binary or invalid UTF-8: emit header only.
|
|
+ aggregated.push_str(&format!("index {left_oid}..{right_oid}\n"));
|
|
+ let old_header = if left_bytes.is_some() {
|
|
+ format!("a/{left_display}")
|
|
+ } else {
|
|
+ "/dev/null".to_string()
|
|
+ };
|
|
+ let new_header = if right_bytes.is_some() {
|
|
+ format!("b/{right_display}")
|
|
+ } else {
|
|
+ "/dev/null".to_string()
|
|
+ };
|
|
+ aggregated.push_str(&format!("--- {old_header}\n"));
|
|
+ aggregated.push_str(&format!("+++ {new_header}\n"));
|
|
+ aggregated.push_str("Binary files differ\n");
|
|
+ if !aggregated.ends_with('\n') {
|
|
+ aggregated.push('\n');
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if aggregated.trim().is_empty() {
|
|
+ Ok(None)
|
|
+ } else {
|
|
+ Ok(Some(aggregated))
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
+fn uuid_filename_for(path: &Path) -> String {
|
|
+ let id = Uuid::new_v4().to_string();
|
|
+ match path.extension().and_then(|e| e.to_str()) {
|
|
+ Some(ext) if !ext.is_empty() => format!("{id}.{ext}"),
|
|
+ _ => id,
|
|
+ }
|
|
+}
|
|
+
|
|
+/// Compute the Git SHA-1 blob object ID for the given content (bytes).
|
|
+fn git_blob_sha1_hex_bytes(data: &[u8]) -> String {
|
|
+ // Git blob hash is sha1 of: "blob <len>\0<data>"
|
|
+ let header = format!("blob {}\0", data.len());
|
|
+ use sha1::Digest;
|
|
+ let mut hasher = sha1::Sha1::new();
|
|
+ hasher.update(header.as_bytes());
|
|
+ hasher.update(data);
|
|
+ let digest = hasher.finalize();
|
|
+ let mut out = String::with_capacity(40);
|
|
+ for b in digest {
|
|
+ use std::fmt::Write;
|
|
+ let _ = write!(&mut out, "{b:02x}");
|
|
+ }
|
|
+ out
|
|
+}
|
|
+
|
|
+const ZERO_OID: &str = "0000000000000000000000000000000000000000";
|
|
+const REGULAR_MODE: &str = "100644";
|
|
+#[cfg(unix)]
|
|
+const EXECUTABLE_MODE: &str = "100755";
|
|
+const SYMLINK_MODE: &str = "120000";
|
|
+
|
|
+#[cfg(unix)]
|
|
+fn file_mode_for_path(path: &Path) -> Option<String> {
|
|
+ use std::os::unix::fs::PermissionsExt;
|
|
+ let meta = fs::symlink_metadata(path).ok()?;
|
|
+ let ft = meta.file_type();
|
|
+ if ft.is_symlink() {
|
|
+ return Some(SYMLINK_MODE.to_string());
|
|
+ }
|
|
+ let mode = meta.permissions().mode();
|
|
+ let is_exec = (mode & 0o111) != 0;
|
|
+ Some(if is_exec {
|
|
+ EXECUTABLE_MODE.into()
|
|
+ } else {
|
|
+ REGULAR_MODE.into()
|
|
+ })
|
|
+}
|
|
+
|
|
+#[cfg(not(unix))]
|
|
+fn file_mode_for_path(_path: &Path) -> Option<String> {
|
|
+ // Default to non-executable on non-unix.
|
|
+ Some(REGULAR_MODE.to_string())
|
|
+}
|
|
+
|
|
+fn blob_bytes(path: &Path, mode: &str) -> Result<Option<Vec<u8>>> {
|
|
+ if path.exists() {
|
|
+ let contents = if mode == SYMLINK_MODE {
|
|
+ symlink_blob_bytes(path)
|
|
+ .ok_or_else(|| anyhow!("failed to read symlink target for {}", path.display()))?
|
|
+ } else {
|
|
+ fs::read(path).with_context(|| {
|
|
+ format!("failed to read current file for diff {}", path.display())
|
|
+ })?
|
|
+ };
|
|
+ Ok(Some(contents))
|
|
+ } else {
|
|
+ Ok(None)
|
|
+ }
|
|
+}
|
|
+
|
|
+#[cfg(unix)]
|
|
+fn symlink_blob_bytes(path: &Path) -> Option<Vec<u8>> {
|
|
+ use std::os::unix::ffi::OsStrExt;
|
|
+ let target = std::fs::read_link(path).ok()?;
|
|
+ Some(target.as_os_str().as_bytes().to_vec())
|
|
+}
|
|
+
|
|
+#[cfg(not(unix))]
|
|
+fn symlink_blob_bytes(_path: &Path) -> Option<Vec<u8>> {
|
|
+ None
|
|
+}
|
|
+
|
|
+#[cfg(windows)]
|
|
+fn is_windows_drive_or_unc_root(p: &std::path::Path) -> bool {
|
|
+ use std::path::Component;
|
|
+ let mut comps = p.components();
|
|
+ matches!(
|
|
+ (comps.next(), comps.next(), comps.next()),
|
|
+ (Some(Component::Prefix(_)), Some(Component::RootDir), None)
|
|
+ )
|
|
+}
|
|
+
|
|
+#[cfg(test)]
|
|
+mod tests {
|
|
+ #![allow(clippy::unwrap_used)]
|
|
+ use super::*;
|
|
+ use pretty_assertions::assert_eq;
|
|
+ use tempfile::tempdir;
|
|
+
|
|
+ /// Compute the Git SHA-1 blob object ID for the given content (string).
|
|
+ /// This delegates to the bytes version to avoid UTF-8 lossy conversions here.
|
|
+ fn git_blob_sha1_hex(data: &str) -> String {
|
|
+ git_blob_sha1_hex_bytes(data.as_bytes())
|
|
+ }
|
|
+
|
|
+ fn normalize_diff_for_test(input: &str, root: &Path) -> String {
|
|
+ let root_str = root.display().to_string().replace('\\', "/");
|
|
+ let replaced = input.replace(&root_str, "<TMP>");
|
|
+ // Split into blocks on lines starting with "diff --git ", sort blocks for determinism, and rejoin
|
|
+ let mut blocks: Vec<String> = Vec::new();
|
|
+ let mut current = String::new();
|
|
+ for line in replaced.lines() {
|
|
+ if line.starts_with("diff --git ") && !current.is_empty() {
|
|
+ blocks.push(current);
|
|
+ current = String::new();
|
|
+ }
|
|
+ if !current.is_empty() {
|
|
+ current.push('\n');
|
|
+ }
|
|
+ current.push_str(line);
|
|
+ }
|
|
+ if !current.is_empty() {
|
|
+ blocks.push(current);
|
|
+ }
|
|
+ blocks.sort();
|
|
+ let mut out = blocks.join("\n");
|
|
+ if !out.ends_with('\n') {
|
|
+ out.push('\n');
|
|
+ }
|
|
+ out
|
|
+ }
|
|
+
|
|
+ #[test]
|
|
+ fn accumulates_add_and_update() {
|
|
+ let mut acc = TurnDiffTracker::new();
|
|
+
|
|
+ let dir = tempdir().unwrap();
|
|
+ let file = dir.path().join("a.txt");
|
|
+
|
|
+ // First patch: add file (baseline should be /dev/null).
|
|
+ let add_changes = HashMap::from([(
|
|
+ file.clone(),
|
|
+ FileChange::Add {
|
|
+ content: "foo\n".to_string(),
|
|
+ },
|
|
+ )]);
|
|
+ acc.on_patch_begin(&add_changes).unwrap();
|
|
+
|
|
+ // Simulate apply: create the file on disk.
|
|
+ fs::write(&file, "foo\n").unwrap();
|
|
+ let first = acc.get_unified_diff().unwrap().unwrap();
|
|
+ let first = normalize_diff_for_test(&first, dir.path());
|
|
+ let expected_first = {
|
|
+ let mode = file_mode_for_path(&file).unwrap_or_else(|| REGULAR_MODE.to_string());
|
|
+ let right_oid = git_blob_sha1_hex("foo\n");
|
|
+ format!(
|
|
+ "diff --git a/<TMP>/a.txt b/<TMP>/a.txt\nnew file mode {mode}\nindex {ZERO_OID}..{right_oid}\n--- /dev/null\n+++ b/<TMP>/a.txt\n@@ -0,0 +1 @@\n+foo\n",
|
|
```
|
|
|
|
> `r#` for better readability?
|
|
|
|
- Created: 2025-08-04 03:14:59 UTC | Link: https://github.com/openai/codex/pull/1770#discussion_r2250313964
|
|
|
|
```diff
|
|
@@ -0,0 +1,766 @@
|
|
+use std::collections::HashMap;
|
|
+use std::fs;
|
|
+use std::path::Path;
|
|
+use std::path::PathBuf;
|
|
+use std::process::Command;
|
|
+
|
|
+use anyhow::Context;
|
|
+use anyhow::Result;
|
|
+use anyhow::anyhow;
|
|
+use uuid::Uuid;
|
|
+
|
|
+use crate::protocol::FileChange;
|
|
+
|
|
+struct BaselineFileInfo {
|
|
+ path: Option<PathBuf>,
|
|
+ contents_bytes: Option<Vec<u8>>,
|
|
+ mode: Option<String>,
|
|
+ oid: Option<String>,
|
|
+}
|
|
+
|
|
+/// Tracks sets of changes to files and exposes the overall unified diff.
|
|
+/// Internally, the way this works is now:
|
|
+/// 1. Maintain an in-memory baseline snapshot of files when they are first seen.
|
|
+/// For new additions, do not create a baseline so that diffs are shown as proper additions (using /dev/null).
|
|
+/// 2. Keep a stable internal filename (uuid + same extension) per external path for rename tracking.
|
|
+/// 3. To compute the aggregated unified diff, compare each baseline snapshot to the current file on disk entirely in-memory
|
|
+/// using the `similar` crate and emit unified diffs with rewritten external paths.
|
|
+#[derive(Default)]
|
|
+pub struct TurnDiffTracker {
|
|
+ /// Map external path -> internal filename (uuid + same extension).
|
|
+ external_to_temp_name: HashMap<PathBuf, String>,
|
|
+ /// Internal filename -> baseline file info.
|
|
+ baseline_file_info: HashMap<String, BaselineFileInfo>,
|
|
+ /// Internal filename -> external path as of current accumulated state (after applying all changes).
|
|
+ /// This is where renames are tracked.
|
|
+ temp_name_to_current_path: HashMap<String, PathBuf>,
|
|
+ /// Cache of known git worktree roots to avoid repeated filesystem walks.
|
|
+ git_root_cache: Vec<PathBuf>,
|
|
+}
|
|
+
|
|
+impl TurnDiffTracker {
|
|
+ pub fn new() -> Self {
|
|
+ Self::default()
|
|
+ }
|
|
+
|
|
+ /// Front-run apply patch calls to track the starting contents of any modified files.
|
|
+ /// - Creates an in-memory baseline snapshot for files that already exist on disk when first seen.
|
|
+ /// - For additions, we intentionally do not create a baseline snapshot so that diffs are proper additions.
|
|
+ /// - Also updates internal mappings for move/rename events.
|
|
+ pub fn on_patch_begin(&mut self, changes: &HashMap<PathBuf, FileChange>) -> Result<()> {
|
|
+ for (path, change) in changes.iter() {
|
|
+ // Ensure a stable internal filename exists for this external path.
|
|
+ if !self.external_to_temp_name.contains_key(path) {
|
|
+ let internal = uuid_filename_for(path);
|
|
+ self.external_to_temp_name
|
|
+ .insert(path.clone(), internal.clone());
|
|
+ self.temp_name_to_current_path
|
|
+ .insert(internal.clone(), path.clone());
|
|
+
|
|
+ // If the file exists on disk now, snapshot as baseline; else leave missing to represent /dev/null.
|
|
+ let (contents_bytes, mode, oid) = if path.exists() {
|
|
+ let mode = file_mode_for_path(path);
|
|
+ let mode_str = mode.as_deref().unwrap_or(REGULAR_MODE);
|
|
+ let contents_bytes = blob_bytes(path, mode_str)
|
|
+ .unwrap_or_default()
|
|
+ .unwrap_or_default();
|
|
+ let oid = if mode.as_deref() == Some(SYMLINK_MODE) {
|
|
+ git_blob_sha1_hex_bytes(&contents_bytes)
|
|
+ } else {
|
|
+ self.git_blob_oid_for_path(path)
|
|
+ .unwrap_or_else(|| git_blob_sha1_hex_bytes(&contents_bytes))
|
|
+ };
|
|
+ (Some(contents_bytes), mode, Some(oid))
|
|
+ } else {
|
|
+ (None, None, Some(ZERO_OID.to_string()))
|
|
+ };
|
|
+
|
|
+ self.baseline_file_info.insert(
|
|
+ internal.clone(),
|
|
+ BaselineFileInfo {
|
|
+ path: Some(path.clone()),
|
|
+ contents_bytes,
|
|
+ mode,
|
|
+ oid,
|
|
+ },
|
|
+ );
|
|
+ }
|
|
+
|
|
+ // Track rename/move in current mapping if provided in an Update.
|
|
+ if let FileChange::Update {
|
|
+ move_path: Some(dest),
|
|
+ ..
|
|
+ } = change
|
|
+ {
|
|
+ let uuid_filename = match self.external_to_temp_name.get(path) {
|
|
+ Some(i) => i.clone(),
|
|
+ None => {
|
|
+ // This should be rare, but if we haven't mapped the source, create it with no baseline.
|
|
+ let i = uuid_filename_for(path);
|
|
+ self.baseline_file_info.insert(
|
|
+ i.clone(),
|
|
+ BaselineFileInfo {
|
|
+ path: Some(path.clone()),
|
|
+ contents_bytes: None,
|
|
+ mode: None,
|
|
+ oid: Some(ZERO_OID.to_string()),
|
|
+ },
|
|
+ );
|
|
+ i
|
|
+ }
|
|
+ };
|
|
+ // Update current external mapping for temp file name.
|
|
+ self.temp_name_to_current_path
|
|
+ .insert(uuid_filename.clone(), dest.clone());
|
|
+ // Update forward file_mapping: external current -> internal name.
|
|
+ self.external_to_temp_name.remove(path);
|
|
+ self.external_to_temp_name
|
|
+ .insert(dest.clone(), uuid_filename);
|
|
+ };
|
|
+ }
|
|
+
|
|
+ Ok(())
|
|
+ }
|
|
+
|
|
+ fn get_path_for_internal(&self, internal: &str) -> Option<PathBuf> {
|
|
+ self.temp_name_to_current_path
|
|
+ .get(internal)
|
|
+ .cloned()
|
|
+ .or_else(|| {
|
|
+ self.baseline_file_info
|
|
+ .get(internal)
|
|
+ .and_then(|info| info.path.clone())
|
|
+ })
|
|
+ }
|
|
+
|
|
+ /// Find the git worktree root for a file/directory by walking up to the first ancestor containing a `.git` entry.
|
|
+ /// Uses a simple cache of known roots and avoids negative-result caching for simplicity.
|
|
+ fn find_git_root_cached(&mut self, start: &Path) -> Option<PathBuf> {
|
|
+ let dir = if start.is_dir() {
|
|
+ start
|
|
+ } else {
|
|
+ start.parent()?
|
|
+ };
|
|
+
|
|
+ // Fast path: if any cached root is an ancestor of this path, use it.
|
|
+ if let Some(root) = self
|
|
+ .git_root_cache
|
|
+ .iter()
|
|
+ .find(|r| dir.starts_with(r))
|
|
+ .cloned()
|
|
+ {
|
|
+ return Some(root);
|
|
+ }
|
|
+
|
|
+ // Walk up to find a `.git` marker.
|
|
+ let mut cur = dir.to_path_buf();
|
|
+ loop {
|
|
+ let git_marker = cur.join(".git");
|
|
+ if git_marker.is_dir() || git_marker.is_file() {
|
|
+ if !self.git_root_cache.iter().any(|r| r == &cur) {
|
|
+ self.git_root_cache.push(cur.clone());
|
|
+ }
|
|
+ return Some(cur);
|
|
+ }
|
|
+
|
|
+ // On Windows, avoid walking above the drive or UNC share root.
|
|
+ #[cfg(windows)]
|
|
+ {
|
|
+ if is_windows_drive_or_unc_root(&cur) {
|
|
+ return None;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if let Some(parent) = cur.parent() {
|
|
+ cur = parent.to_path_buf();
|
|
+ } else {
|
|
+ return None;
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /// Return a display string for `path` relative to its git root if found, else absolute.
|
|
+ fn relative_to_git_root_str(&mut self, path: &Path) -> String {
|
|
+ let s = if let Some(root) = self.find_git_root_cached(path) {
|
|
+ if let Ok(rel) = path.strip_prefix(&root) {
|
|
+ rel.display().to_string()
|
|
+ } else {
|
|
+ path.display().to_string()
|
|
+ }
|
|
+ } else {
|
|
+ path.display().to_string()
|
|
+ };
|
|
+ s.replace('\\', "/")
|
|
+ }
|
|
+
|
|
+ /// Ask git to compute the blob SHA-1 for the file at `path` within its repository.
|
|
+ /// Returns None if no repository is found or git invocation fails.
|
|
+ fn git_blob_oid_for_path(&mut self, path: &Path) -> Option<String> {
|
|
+ let root = self.find_git_root_cached(path)?;
|
|
+ // Compute a path relative to the repo root for better portability across platforms.
|
|
+ let rel = path.strip_prefix(&root).unwrap_or(path);
|
|
+ let output = Command::new("git")
|
|
+ .arg("-C")
|
|
+ .arg(&root)
|
|
+ .arg("hash-object")
|
|
+ .arg("--")
|
|
+ .arg(rel)
|
|
+ .output()
|
|
+ .ok()?;
|
|
+ if !output.status.success() {
|
|
+ return None;
|
|
+ }
|
|
+ let s = String::from_utf8_lossy(&output.stdout).trim().to_string();
|
|
+ if s.len() == 40 { Some(s) } else { None }
|
|
+ }
|
|
+
|
|
+ /// Recompute the aggregated unified diff by comparing all of the in-memory snapshots that were
|
|
+ /// collected before the first time they were touched by apply_patch during this turn with
|
|
+ /// the current repo state.
|
|
+ pub fn get_unified_diff(&mut self) -> Result<Option<String>> {
|
|
+ let mut aggregated = String::new();
|
|
+
|
|
+ // Compute diffs per tracked internal file in a stable order by external path.
|
|
+ let mut baseline_file_names: Vec<String> =
|
|
+ self.baseline_file_info.keys().cloned().collect();
|
|
+ // Sort lexicographically by full repo-relative path to match git behavior.
|
|
+ baseline_file_names.sort_by_key(|internal| {
|
|
+ self.get_path_for_internal(internal)
|
|
+ .map(|p| self.relative_to_git_root_str(&p))
|
|
+ .unwrap_or_default()
|
|
+ });
|
|
+
|
|
+ for internal in baseline_file_names {
|
|
+ // Baseline external must exist for any tracked internal.
|
|
+ let baseline_external = match self
|
|
+ .baseline_file_info
|
|
+ .get(&internal)
|
|
+ .and_then(|i| i.path.clone())
|
|
+ {
|
|
+ Some(p) => p,
|
|
+ None => continue,
|
|
+ };
|
|
+ let current_external = match self.get_path_for_internal(&internal) {
|
|
+ Some(p) => p,
|
|
+ None => continue,
|
|
+ };
|
|
+
|
|
+ // Determine modes early; needed to read symlink content correctly.
|
|
+ let baseline_mode = self
|
|
+ .baseline_file_info
|
|
+ .get(&internal)
|
|
+ .and_then(|i| i.mode.clone())
|
|
+ .unwrap_or_else(|| REGULAR_MODE.to_string());
|
|
+ let current_mode =
|
|
+ file_mode_for_path(¤t_external).unwrap_or_else(|| REGULAR_MODE.to_string());
|
|
+
|
|
+ let left_bytes = self
|
|
+ .baseline_file_info
|
|
+ .get(&internal)
|
|
+ .and_then(|i| i.contents_bytes.clone());
|
|
+
|
|
+ let right_bytes = blob_bytes(¤t_external, ¤t_mode)?;
|
|
+
|
|
+ // Fast path: identical bytes or both missing.
|
|
+ if left_bytes.as_deref() == right_bytes.as_deref() {
|
|
+ continue;
|
|
+ }
|
|
+
|
|
+ let left_display = self.relative_to_git_root_str(&baseline_external);
|
|
+ let right_display = self.relative_to_git_root_str(¤t_external);
|
|
+
|
|
+ // Emit a git-style header for better readability and parity with previous behavior.
|
|
+ aggregated.push_str(&format!("diff --git a/{left_display} b/{right_display}\n"));
|
|
+
|
|
+ let is_add = left_bytes.is_none() && right_bytes.is_some();
|
|
+ let is_delete = left_bytes.is_some() && right_bytes.is_none();
|
|
+
|
|
+ if is_add {
|
|
+ aggregated.push_str(&format!("new file mode {current_mode}\n"));
|
|
+ } else if is_delete {
|
|
+ aggregated.push_str(&format!("deleted file mode {baseline_mode}\n"));
|
|
+ } else if baseline_mode != current_mode {
|
|
+ aggregated.push_str(&format!("old mode {baseline_mode}\n"));
|
|
+ aggregated.push_str(&format!("new mode {current_mode}\n"));
|
|
+ }
|
|
+
|
|
+ // Determine blob object IDs for left and right contents. Prefer stored OIDs
|
|
+ // captured from the original repo state when the change was first seen.
|
|
+ let left_oid = self
|
|
+ .baseline_file_info
|
|
+ .get(&internal)
|
|
+ .and_then(|i| i.oid.clone())
|
|
+ .or_else(|| {
|
|
+ left_bytes
|
|
+ .as_ref()
|
|
+ .map(|b| git_blob_sha1_hex_bytes(b))
|
|
+ .or(Some(ZERO_OID.to_string()))
|
|
+ })
|
|
+ .unwrap_or_else(|| ZERO_OID.to_string());
|
|
+ let right_oid = if let Some(b) = right_bytes.as_ref() {
|
|
+ if current_mode == SYMLINK_MODE {
|
|
+ git_blob_sha1_hex_bytes(b)
|
|
+ } else {
|
|
+ self.git_blob_oid_for_path(¤t_external)
|
|
+ .unwrap_or_else(|| git_blob_sha1_hex_bytes(b))
|
|
+ }
|
|
+ } else {
|
|
+ ZERO_OID.to_string()
|
|
+ };
|
|
+
|
|
+ // If either side isn't valid UTF-8, emit a binary diff header and continue.
|
|
+ let left_text = left_bytes
|
|
+ .as_deref()
|
|
+ .and_then(|b| std::str::from_utf8(b).ok());
|
|
+ let right_text = right_bytes
|
|
+ .as_deref()
|
|
+ .and_then(|b| std::str::from_utf8(b).ok());
|
|
+
|
|
+ // Prefer text diffs when possible:
|
|
+ // - both sides are valid UTF-8
|
|
+ // - OR one side is missing (add/delete) and the present side is valid UTF-8
|
|
+ let can_text_diff = match (left_text, right_text, is_add, is_delete) {
|
|
+ (Some(_), Some(_), _, _) => true,
|
|
+ (_, Some(_), true, _) => true, // add: left missing, right text
|
|
+ (Some(_), _, _, true) => true, // delete: left text, right missing
|
|
+ _ => false,
|
|
+ };
|
|
+
|
|
+ if can_text_diff {
|
|
+ // Diff the contents as text, treating missing side as empty string.
|
|
+ let l = left_text.unwrap_or("");
|
|
+ let r = right_text.unwrap_or("");
|
|
+
|
|
+ // Emit index line without mode suffix to preserve current test expectations.
|
|
+ aggregated.push_str(&format!("index {left_oid}..{right_oid}\n"));
|
|
+
|
|
+ let old_header = if left_bytes.is_some() {
|
|
+ format!("a/{left_display}")
|
|
+ } else {
|
|
+ "/dev/null".to_string()
|
|
+ };
|
|
+ let new_header = if right_bytes.is_some() {
|
|
+ format!("b/{right_display}")
|
|
+ } else {
|
|
+ "/dev/null".to_string()
|
|
+ };
|
|
+
|
|
+ let diff = similar::TextDiff::from_lines(l, r);
|
|
+ let unified = diff
|
|
+ .unified_diff()
|
|
+ .context_radius(3)
|
|
+ .header(&old_header, &new_header)
|
|
+ .to_string();
|
|
+
|
|
+ aggregated.push_str(&unified);
|
|
+ if !aggregated.ends_with('\n') {
|
|
+ aggregated.push('\n');
|
|
+ }
|
|
+ } else {
|
|
+ // Binary or invalid UTF-8: emit header only.
|
|
+ aggregated.push_str(&format!("index {left_oid}..{right_oid}\n"));
|
|
+ let old_header = if left_bytes.is_some() {
|
|
+ format!("a/{left_display}")
|
|
+ } else {
|
|
+ "/dev/null".to_string()
|
|
+ };
|
|
+ let new_header = if right_bytes.is_some() {
|
|
+ format!("b/{right_display}")
|
|
+ } else {
|
|
+ "/dev/null".to_string()
|
|
+ };
|
|
+ aggregated.push_str(&format!("--- {old_header}\n"));
|
|
+ aggregated.push_str(&format!("+++ {new_header}\n"));
|
|
+ aggregated.push_str("Binary files differ\n");
|
|
+ if !aggregated.ends_with('\n') {
|
|
+ aggregated.push('\n');
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if aggregated.trim().is_empty() {
|
|
+ Ok(None)
|
|
+ } else {
|
|
+ Ok(Some(aggregated))
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
+fn uuid_filename_for(path: &Path) -> String {
|
|
+ let id = Uuid::new_v4().to_string();
|
|
+ match path.extension().and_then(|e| e.to_str()) {
|
|
+ Some(ext) if !ext.is_empty() => format!("{id}.{ext}"),
|
|
+ _ => id,
|
|
+ }
|
|
+}
|
|
+
|
|
+/// Compute the Git SHA-1 blob object ID for the given content (bytes).
|
|
+fn git_blob_sha1_hex_bytes(data: &[u8]) -> String {
|
|
+ // Git blob hash is sha1 of: "blob <len>\0<data>"
|
|
+ let header = format!("blob {}\0", data.len());
|
|
+ use sha1::Digest;
|
|
+ let mut hasher = sha1::Sha1::new();
|
|
+ hasher.update(header.as_bytes());
|
|
+ hasher.update(data);
|
|
+ let digest = hasher.finalize();
|
|
+ let mut out = String::with_capacity(40);
|
|
+ for b in digest {
|
|
+ use std::fmt::Write;
|
|
+ let _ = write!(&mut out, "{b:02x}");
|
|
+ }
|
|
+ out
|
|
+}
|
|
+
|
|
+const ZERO_OID: &str = "0000000000000000000000000000000000000000";
|
|
+const REGULAR_MODE: &str = "100644";
|
|
+#[cfg(unix)]
|
|
+const EXECUTABLE_MODE: &str = "100755";
|
|
+const SYMLINK_MODE: &str = "120000";
|
|
+
|
|
+#[cfg(unix)]
|
|
+fn file_mode_for_path(path: &Path) -> Option<String> {
|
|
+ use std::os::unix::fs::PermissionsExt;
|
|
+ let meta = fs::symlink_metadata(path).ok()?;
|
|
+ let ft = meta.file_type();
|
|
+ if ft.is_symlink() {
|
|
+ return Some(SYMLINK_MODE.to_string());
|
|
+ }
|
|
+ let mode = meta.permissions().mode();
|
|
+ let is_exec = (mode & 0o111) != 0;
|
|
+ Some(if is_exec {
|
|
+ EXECUTABLE_MODE.into()
|
|
+ } else {
|
|
+ REGULAR_MODE.into()
|
|
+ })
|
|
+}
|
|
+
|
|
+#[cfg(not(unix))]
|
|
+fn file_mode_for_path(_path: &Path) -> Option<String> {
|
|
+ // Default to non-executable on non-unix.
|
|
+ Some(REGULAR_MODE.to_string())
|
|
+}
|
|
+
|
|
+fn blob_bytes(path: &Path, mode: &str) -> Result<Option<Vec<u8>>> {
|
|
+ if path.exists() {
|
|
+ let contents = if mode == SYMLINK_MODE {
|
|
+ symlink_blob_bytes(path)
|
|
+ .ok_or_else(|| anyhow!("failed to read symlink target for {}", path.display()))?
|
|
+ } else {
|
|
+ fs::read(path).with_context(|| {
|
|
+ format!("failed to read current file for diff {}", path.display())
|
|
+ })?
|
|
+ };
|
|
+ Ok(Some(contents))
|
|
+ } else {
|
|
+ Ok(None)
|
|
+ }
|
|
+}
|
|
+
|
|
+#[cfg(unix)]
|
|
+fn symlink_blob_bytes(path: &Path) -> Option<Vec<u8>> {
|
|
+ use std::os::unix::ffi::OsStrExt;
|
|
+ let target = std::fs::read_link(path).ok()?;
|
|
+ Some(target.as_os_str().as_bytes().to_vec())
|
|
+}
|
|
+
|
|
+#[cfg(not(unix))]
|
|
+fn symlink_blob_bytes(_path: &Path) -> Option<Vec<u8>> {
|
|
+ None
|
|
+}
|
|
+
|
|
+#[cfg(windows)]
|
|
+fn is_windows_drive_or_unc_root(p: &std::path::Path) -> bool {
|
|
+ use std::path::Component;
|
|
+ let mut comps = p.components();
|
|
+ matches!(
|
|
+ (comps.next(), comps.next(), comps.next()),
|
|
+ (Some(Component::Prefix(_)), Some(Component::RootDir), None)
|
|
+ )
|
|
+}
|
|
+
|
|
+#[cfg(test)]
|
|
+mod tests {
|
|
+ #![allow(clippy::unwrap_used)]
|
|
+ use super::*;
|
|
+ use pretty_assertions::assert_eq;
|
|
+ use tempfile::tempdir;
|
|
+
|
|
+ /// Compute the Git SHA-1 blob object ID for the given content (string).
|
|
+ /// This delegates to the bytes version to avoid UTF-8 lossy conversions here.
|
|
+ fn git_blob_sha1_hex(data: &str) -> String {
|
|
+ git_blob_sha1_hex_bytes(data.as_bytes())
|
|
+ }
|
|
+
|
|
+ fn normalize_diff_for_test(input: &str, root: &Path) -> String {
|
|
+ let root_str = root.display().to_string().replace('\\', "/");
|
|
+ let replaced = input.replace(&root_str, "<TMP>");
|
|
+ // Split into blocks on lines starting with "diff --git ", sort blocks for determinism, and rejoin
|
|
+ let mut blocks: Vec<String> = Vec::new();
|
|
+ let mut current = String::new();
|
|
+ for line in replaced.lines() {
|
|
+ if line.starts_with("diff --git ") && !current.is_empty() {
|
|
+ blocks.push(current);
|
|
+ current = String::new();
|
|
+ }
|
|
+ if !current.is_empty() {
|
|
+ current.push('\n');
|
|
+ }
|
|
+ current.push_str(line);
|
|
+ }
|
|
+ if !current.is_empty() {
|
|
+ blocks.push(current);
|
|
+ }
|
|
+ blocks.sort();
|
|
+ let mut out = blocks.join("\n");
|
|
+ if !out.ends_with('\n') {
|
|
+ out.push('\n');
|
|
+ }
|
|
+ out
|
|
+ }
|
|
+
|
|
+ #[test]
|
|
+ fn accumulates_add_and_update() {
|
|
+ let mut acc = TurnDiffTracker::new();
|
|
+
|
|
+ let dir = tempdir().unwrap();
|
|
+ let file = dir.path().join("a.txt");
|
|
+
|
|
+ // First patch: add file (baseline should be /dev/null).
|
|
+ let add_changes = HashMap::from([(
|
|
+ file.clone(),
|
|
+ FileChange::Add {
|
|
+ content: "foo\n".to_string(),
|
|
+ },
|
|
+ )]);
|
|
+ acc.on_patch_begin(&add_changes).unwrap();
|
|
+
|
|
+ // Simulate apply: create the file on disk.
|
|
+ fs::write(&file, "foo\n").unwrap();
|
|
+ let first = acc.get_unified_diff().unwrap().unwrap();
|
|
+ let first = normalize_diff_for_test(&first, dir.path());
|
|
+ let expected_first = {
|
|
+ let mode = file_mode_for_path(&file).unwrap_or_else(|| REGULAR_MODE.to_string());
|
|
+ let right_oid = git_blob_sha1_hex("foo\n");
|
|
+ format!(
|
|
+ "diff --git a/<TMP>/a.txt b/<TMP>/a.txt\nnew file mode {mode}\nindex {ZERO_OID}..{right_oid}\n--- /dev/null\n+++ b/<TMP>/a.txt\n@@ -0,0 +1 @@\n+foo\n",
|
|
+ )
|
|
+ };
|
|
+ assert_eq!(first, expected_first);
|
|
+
|
|
+ // Second patch: update the file on disk.
|
|
+ let update_changes = HashMap::from([(
|
|
+ file.clone(),
|
|
+ FileChange::Update {
|
|
+ unified_diff: "".to_owned(),
|
|
+ move_path: None,
|
|
+ },
|
|
+ )]);
|
|
+ acc.on_patch_begin(&update_changes).unwrap();
|
|
+
|
|
+ // Simulate apply: append a new line.
|
|
+ fs::write(&file, "foo\nbar\n").unwrap();
|
|
+ let combined = acc.get_unified_diff().unwrap().unwrap();
|
|
+ let combined = normalize_diff_for_test(&combined, dir.path());
|
|
+ let expected_combined = {
|
|
+ let mode = file_mode_for_path(&file).unwrap_or_else(|| REGULAR_MODE.to_string());
|
|
+ let right_oid = git_blob_sha1_hex("foo\nbar\n");
|
|
+ format!(
|
|
+ "diff --git a/<TMP>/a.txt b/<TMP>/a.txt\nnew file mode {mode}\nindex {ZERO_OID}..{right_oid}\n--- /dev/null\n+++ b/<TMP>/a.txt\n@@ -0,0 +1,2 @@\n+foo\n+bar\n",
|
|
+ )
|
|
+ };
|
|
+ assert_eq!(combined, expected_combined);
|
|
+ }
|
|
+
|
|
+ #[test]
|
|
+ fn accumulates_delete() {
|
|
+ let dir = tempdir().unwrap();
|
|
+ let file = dir.path().join("b.txt");
|
|
+ fs::write(&file, "x\n").unwrap();
|
|
+
|
|
+ let mut acc = TurnDiffTracker::new();
|
|
+ let del_changes = HashMap::from([(file.clone(), FileChange::Delete)]);
|
|
+ acc.on_patch_begin(&del_changes).unwrap();
|
|
+
|
|
+ // Simulate apply: delete the file from disk.
|
|
+ let baseline_mode = file_mode_for_path(&file).unwrap_or_else(|| REGULAR_MODE.to_string());
|
|
+ fs::remove_file(&file).unwrap();
|
|
+ let diff = acc.get_unified_diff().unwrap().unwrap();
|
|
+ let diff = normalize_diff_for_test(&diff, dir.path());
|
|
+ let expected = {
|
|
+ let left_oid = git_blob_sha1_hex("x\n");
|
|
+ format!(
|
|
+ "diff --git a/<TMP>/b.txt b/<TMP>/b.txt\ndeleted file mode {baseline_mode}\nindex {left_oid}..{ZERO_OID}\n--- a/<TMP>/b.txt\n+++ /dev/null\n@@ -1 +0,0 @@\n-x\n",
|
|
+ )
|
|
+ };
|
|
+ assert_eq!(diff, expected);
|
|
+ }
|
|
+
|
|
+ #[test]
|
|
+ fn accumulates_move_and_update() {
|
|
+ let dir = tempdir().unwrap();
|
|
+ let src = dir.path().join("src.txt");
|
|
+ let dest = dir.path().join("dst.txt");
|
|
+ fs::write(&src, "line\n").unwrap();
|
|
+
|
|
+ let mut acc = TurnDiffTracker::new();
|
|
+ let mv_changes = HashMap::from([(
|
|
+ src.clone(),
|
|
+ FileChange::Update {
|
|
+ unified_diff: "".to_owned(),
|
|
+ move_path: Some(dest.clone()),
|
|
+ },
|
|
+ )]);
|
|
+ acc.on_patch_begin(&mv_changes).unwrap();
|
|
+
|
|
+ // Simulate apply: move and update content.
|
|
+ fs::rename(&src, &dest).unwrap();
|
|
+ fs::write(&dest, "line2\n").unwrap();
|
|
+
|
|
+ let out = acc.get_unified_diff().unwrap().unwrap();
|
|
+ let out = normalize_diff_for_test(&out, dir.path());
|
|
+ let expected = {
|
|
+ let left_oid = git_blob_sha1_hex("line\n");
|
|
+ let right_oid = git_blob_sha1_hex("line2\n");
|
|
+ format!(
|
|
+ "diff --git a/<TMP>/src.txt b/<TMP>/dst.txt\nindex {left_oid}..{right_oid}\n--- a/<TMP>/src.txt\n+++ b/<TMP>/dst.txt\n@@ -1 +1 @@\n-line\n+line2\n"
|
|
+ )
|
|
+ };
|
|
+ assert_eq!(out, expected);
|
|
+ }
|
|
+
|
|
+ #[test]
|
|
+ fn move_without_content_change_yields_no_diff() {
|
|
+ let dir = tempdir().unwrap();
|
|
+ let src = dir.path().join("moved.txt");
|
|
+ let dest = dir.path().join("renamed.txt");
|
|
+ fs::write(&src, "same\n").unwrap();
|
|
+
|
|
+ let mut acc = TurnDiffTracker::new();
|
|
+ let mv_changes = HashMap::from([(
|
|
+ src.clone(),
|
|
+ FileChange::Update {
|
|
+ unified_diff: "".to_owned(),
|
|
+ move_path: Some(dest.clone()),
|
|
+ },
|
|
+ )]);
|
|
+ acc.on_patch_begin(&mv_changes).unwrap();
|
|
+
|
|
+ // Simulate apply: move only, no content change.
|
|
+ fs::rename(&src, &dest).unwrap();
|
|
+
|
|
+ let diff = acc.get_unified_diff().unwrap();
|
|
+ assert_eq!(diff, None);
|
|
+ }
|
|
+
|
|
+ #[test]
|
|
+ fn move_declared_but_file_only_appears_at_dest_is_add() {
|
|
+ let dir = tempdir().unwrap();
|
|
+ let src = dir.path().join("src.txt");
|
|
+ let dest = dir.path().join("dest.txt");
|
|
+ let mut acc = TurnDiffTracker::new();
|
|
+ let mv = HashMap::from([(
|
|
+ src.clone(),
|
|
+ FileChange::Update {
|
|
+ unified_diff: "".into(),
|
|
+ move_path: Some(dest.clone()),
|
|
+ },
|
|
+ )]);
|
|
+ acc.on_patch_begin(&mv).unwrap();
|
|
+ // No file existed initially; create only dest
|
|
+ fs::write(&dest, "hello\n").unwrap();
|
|
+ let diff = acc.get_unified_diff().unwrap().unwrap();
|
|
+ assert!(diff.contains("new file mode"));
|
|
```
|
|
|
|
> We can't `assert_eq!()` here?
|
|
|
|
- Created: 2025-08-04 16:55:01 UTC | Link: https://github.com/openai/codex/pull/1770#discussion_r2252061946
|
|
|
|
```diff
|
|
@@ -0,0 +1,766 @@
|
|
+use std::collections::HashMap;
|
|
+use std::fs;
|
|
+use std::path::Path;
|
|
+use std::path::PathBuf;
|
|
+use std::process::Command;
|
|
+
|
|
+use anyhow::Context;
|
|
+use anyhow::Result;
|
|
+use anyhow::anyhow;
|
|
+use uuid::Uuid;
|
|
+
|
|
+use crate::protocol::FileChange;
|
|
+
|
|
+struct BaselineFileInfo {
|
|
+ path: Option<PathBuf>,
|
|
+ contents_bytes: Option<Vec<u8>>,
|
|
+ mode: Option<String>,
|
|
+ oid: Option<String>,
|
|
+}
|
|
+
|
|
+/// Tracks sets of changes to files and exposes the overall unified diff.
|
|
+/// Internally, the way this works is now:
|
|
+/// 1. Maintain an in-memory baseline snapshot of files when they are first seen.
|
|
+/// For new additions, do not create a baseline so that diffs are shown as proper additions (using /dev/null).
|
|
+/// 2. Keep a stable internal filename (uuid + same extension) per external path for rename tracking.
|
|
+/// 3. To compute the aggregated unified diff, compare each baseline snapshot to the current file on disk entirely in-memory
|
|
+/// using the `similar` crate and emit unified diffs with rewritten external paths.
|
|
+#[derive(Default)]
|
|
+pub struct TurnDiffTracker {
|
|
+ /// Map external path -> internal filename (uuid + same extension).
|
|
+ external_to_temp_name: HashMap<PathBuf, String>,
|
|
+ /// Internal filename -> baseline file info.
|
|
+ baseline_file_info: HashMap<String, BaselineFileInfo>,
|
|
+ /// Internal filename -> external path as of current accumulated state (after applying all changes).
|
|
+ /// This is where renames are tracked.
|
|
+ temp_name_to_current_path: HashMap<String, PathBuf>,
|
|
+ /// Cache of known git worktree roots to avoid repeated filesystem walks.
|
|
+ git_root_cache: Vec<PathBuf>,
|
|
+}
|
|
+
|
|
+impl TurnDiffTracker {
|
|
+ pub fn new() -> Self {
|
|
+ Self::default()
|
|
+ }
|
|
+
|
|
+ /// Front-run apply patch calls to track the starting contents of any modified files.
|
|
+ /// - Creates an in-memory baseline snapshot for files that already exist on disk when first seen.
|
|
+ /// - For additions, we intentionally do not create a baseline snapshot so that diffs are proper additions.
|
|
+ /// - Also updates internal mappings for move/rename events.
|
|
+ pub fn on_patch_begin(&mut self, changes: &HashMap<PathBuf, FileChange>) -> Result<()> {
|
|
+ for (path, change) in changes.iter() {
|
|
+ // Ensure a stable internal filename exists for this external path.
|
|
+ if !self.external_to_temp_name.contains_key(path) {
|
|
+ let internal = uuid_filename_for(path);
|
|
+ self.external_to_temp_name
|
|
+ .insert(path.clone(), internal.clone());
|
|
+ self.temp_name_to_current_path
|
|
+ .insert(internal.clone(), path.clone());
|
|
+
|
|
+ // If the file exists on disk now, snapshot as baseline; else leave missing to represent /dev/null.
|
|
+ let (contents_bytes, mode, oid) = if path.exists() {
|
|
+ let mode = file_mode_for_path(path);
|
|
+ let mode_str = mode.as_deref().unwrap_or(REGULAR_MODE);
|
|
+ let contents_bytes = blob_bytes(path, mode_str)
|
|
+ .unwrap_or_default()
|
|
+ .unwrap_or_default();
|
|
+ let oid = if mode.as_deref() == Some(SYMLINK_MODE) {
|
|
+ git_blob_sha1_hex_bytes(&contents_bytes)
|
|
+ } else {
|
|
+ self.git_blob_oid_for_path(path)
|
|
+ .unwrap_or_else(|| git_blob_sha1_hex_bytes(&contents_bytes))
|
|
+ };
|
|
+ (Some(contents_bytes), mode, Some(oid))
|
|
+ } else {
|
|
+ (None, None, Some(ZERO_OID.to_string()))
|
|
+ };
|
|
+
|
|
+ self.baseline_file_info.insert(
|
|
+ internal.clone(),
|
|
+ BaselineFileInfo {
|
|
+ path: Some(path.clone()),
|
|
+ contents_bytes,
|
|
+ mode,
|
|
+ oid,
|
|
+ },
|
|
+ );
|
|
+ }
|
|
+
|
|
+ // Track rename/move in current mapping if provided in an Update.
|
|
+ if let FileChange::Update {
|
|
+ move_path: Some(dest),
|
|
+ ..
|
|
+ } = change
|
|
+ {
|
|
+ let uuid_filename = match self.external_to_temp_name.get(path) {
|
|
+ Some(i) => i.clone(),
|
|
+ None => {
|
|
+ // This should be rare, but if we haven't mapped the source, create it with no baseline.
|
|
+ let i = uuid_filename_for(path);
|
|
+ self.baseline_file_info.insert(
|
|
+ i.clone(),
|
|
+ BaselineFileInfo {
|
|
+ path: Some(path.clone()),
|
|
+ contents_bytes: None,
|
|
+ mode: None,
|
|
+ oid: Some(ZERO_OID.to_string()),
|
|
+ },
|
|
+ );
|
|
+ i
|
|
+ }
|
|
+ };
|
|
+ // Update current external mapping for temp file name.
|
|
+ self.temp_name_to_current_path
|
|
+ .insert(uuid_filename.clone(), dest.clone());
|
|
+ // Update forward file_mapping: external current -> internal name.
|
|
+ self.external_to_temp_name.remove(path);
|
|
+ self.external_to_temp_name
|
|
+ .insert(dest.clone(), uuid_filename);
|
|
+ };
|
|
+ }
|
|
+
|
|
+ Ok(())
|
|
+ }
|
|
+
|
|
+ fn get_path_for_internal(&self, internal: &str) -> Option<PathBuf> {
|
|
+ self.temp_name_to_current_path
|
|
+ .get(internal)
|
|
+ .cloned()
|
|
+ .or_else(|| {
|
|
+ self.baseline_file_info
|
|
+ .get(internal)
|
|
+ .and_then(|info| info.path.clone())
|
|
+ })
|
|
+ }
|
|
+
|
|
+ /// Find the git worktree root for a file/directory by walking up to the first ancestor containing a `.git` entry.
|
|
+ /// Uses a simple cache of known roots and avoids negative-result caching for simplicity.
|
|
+ fn find_git_root_cached(&mut self, start: &Path) -> Option<PathBuf> {
|
|
+ let dir = if start.is_dir() {
|
|
+ start
|
|
+ } else {
|
|
+ start.parent()?
|
|
+ };
|
|
+
|
|
+ // Fast path: if any cached root is an ancestor of this path, use it.
|
|
+ if let Some(root) = self
|
|
+ .git_root_cache
|
|
+ .iter()
|
|
+ .find(|r| dir.starts_with(r))
|
|
+ .cloned()
|
|
+ {
|
|
+ return Some(root);
|
|
+ }
|
|
+
|
|
+ // Walk up to find a `.git` marker.
|
|
+ let mut cur = dir.to_path_buf();
|
|
+ loop {
|
|
+ let git_marker = cur.join(".git");
|
|
+ if git_marker.is_dir() || git_marker.is_file() {
|
|
+ if !self.git_root_cache.iter().any(|r| r == &cur) {
|
|
+ self.git_root_cache.push(cur.clone());
|
|
+ }
|
|
+ return Some(cur);
|
|
+ }
|
|
+
|
|
+ // On Windows, avoid walking above the drive or UNC share root.
|
|
+ #[cfg(windows)]
|
|
+ {
|
|
+ if is_windows_drive_or_unc_root(&cur) {
|
|
+ return None;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if let Some(parent) = cur.parent() {
|
|
+ cur = parent.to_path_buf();
|
|
+ } else {
|
|
+ return None;
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /// Return a display string for `path` relative to its git root if found, else absolute.
|
|
+ fn relative_to_git_root_str(&mut self, path: &Path) -> String {
|
|
+ let s = if let Some(root) = self.find_git_root_cached(path) {
|
|
+ if let Ok(rel) = path.strip_prefix(&root) {
|
|
+ rel.display().to_string()
|
|
+ } else {
|
|
+ path.display().to_string()
|
|
+ }
|
|
+ } else {
|
|
+ path.display().to_string()
|
|
+ };
|
|
+ s.replace('\\', "/")
|
|
+ }
|
|
+
|
|
+ /// Ask git to compute the blob SHA-1 for the file at `path` within its repository.
|
|
+ /// Returns None if no repository is found or git invocation fails.
|
|
+ fn git_blob_oid_for_path(&mut self, path: &Path) -> Option<String> {
|
|
+ let root = self.find_git_root_cached(path)?;
|
|
+ // Compute a path relative to the repo root for better portability across platforms.
|
|
+ let rel = path.strip_prefix(&root).unwrap_or(path);
|
|
+ let output = Command::new("git")
|
|
+ .arg("-C")
|
|
+ .arg(&root)
|
|
+ .arg("hash-object")
|
|
+ .arg("--")
|
|
+ .arg(rel)
|
|
+ .output()
|
|
+ .ok()?;
|
|
+ if !output.status.success() {
|
|
+ return None;
|
|
+ }
|
|
+ let s = String::from_utf8_lossy(&output.stdout).trim().to_string();
|
|
+ if s.len() == 40 { Some(s) } else { None }
|
|
+ }
|
|
+
|
|
+ /// Recompute the aggregated unified diff by comparing all of the in-memory snapshots that were
|
|
+ /// collected before the first time they were touched by apply_patch during this turn with
|
|
+ /// the current repo state.
|
|
+ pub fn get_unified_diff(&mut self) -> Result<Option<String>> {
|
|
+ let mut aggregated = String::new();
|
|
+
|
|
+ // Compute diffs per tracked internal file in a stable order by external path.
|
|
+ let mut baseline_file_names: Vec<String> =
|
|
+ self.baseline_file_info.keys().cloned().collect();
|
|
+ // Sort lexicographically by full repo-relative path to match git behavior.
|
|
+ baseline_file_names.sort_by_key(|internal| {
|
|
+ self.get_path_for_internal(internal)
|
|
+ .map(|p| self.relative_to_git_root_str(&p))
|
|
+ .unwrap_or_default()
|
|
+ });
|
|
+
|
|
+ for internal in baseline_file_names {
|
|
+ // Baseline external must exist for any tracked internal.
|
|
+ let baseline_external = match self
|
|
+ .baseline_file_info
|
|
+ .get(&internal)
|
|
+ .and_then(|i| i.path.clone())
|
|
+ {
|
|
+ Some(p) => p,
|
|
+ None => continue,
|
|
+ };
|
|
+ let current_external = match self.get_path_for_internal(&internal) {
|
|
+ Some(p) => p,
|
|
+ None => continue,
|
|
+ };
|
|
+
|
|
+ // Determine modes early; needed to read symlink content correctly.
|
|
+ let baseline_mode = self
|
|
+ .baseline_file_info
|
|
+ .get(&internal)
|
|
+ .and_then(|i| i.mode.clone())
|
|
+ .unwrap_or_else(|| REGULAR_MODE.to_string());
|
|
+ let current_mode =
|
|
+ file_mode_for_path(¤t_external).unwrap_or_else(|| REGULAR_MODE.to_string());
|
|
+
|
|
+ let left_bytes = self
|
|
+ .baseline_file_info
|
|
+ .get(&internal)
|
|
+ .and_then(|i| i.contents_bytes.clone());
|
|
+
|
|
+ let right_bytes = blob_bytes(¤t_external, ¤t_mode)?;
|
|
+
|
|
+ // Fast path: identical bytes or both missing.
|
|
+ if left_bytes.as_deref() == right_bytes.as_deref() {
|
|
+ continue;
|
|
+ }
|
|
+
|
|
+ let left_display = self.relative_to_git_root_str(&baseline_external);
|
|
+ let right_display = self.relative_to_git_root_str(¤t_external);
|
|
+
|
|
+ // Emit a git-style header for better readability and parity with previous behavior.
|
|
+ aggregated.push_str(&format!("diff --git a/{left_display} b/{right_display}\n"));
|
|
+
|
|
+ let is_add = left_bytes.is_none() && right_bytes.is_some();
|
|
+ let is_delete = left_bytes.is_some() && right_bytes.is_none();
|
|
+
|
|
+ if is_add {
|
|
+ aggregated.push_str(&format!("new file mode {current_mode}\n"));
|
|
+ } else if is_delete {
|
|
+ aggregated.push_str(&format!("deleted file mode {baseline_mode}\n"));
|
|
+ } else if baseline_mode != current_mode {
|
|
+ aggregated.push_str(&format!("old mode {baseline_mode}\n"));
|
|
+ aggregated.push_str(&format!("new mode {current_mode}\n"));
|
|
+ }
|
|
+
|
|
+ // Determine blob object IDs for left and right contents. Prefer stored OIDs
|
|
+ // captured from the original repo state when the change was first seen.
|
|
+ let left_oid = self
|
|
+ .baseline_file_info
|
|
+ .get(&internal)
|
|
+ .and_then(|i| i.oid.clone())
|
|
+ .or_else(|| {
|
|
+ left_bytes
|
|
+ .as_ref()
|
|
+ .map(|b| git_blob_sha1_hex_bytes(b))
|
|
+ .or(Some(ZERO_OID.to_string()))
|
|
+ })
|
|
+ .unwrap_or_else(|| ZERO_OID.to_string());
|
|
+ let right_oid = if let Some(b) = right_bytes.as_ref() {
|
|
+ if current_mode == SYMLINK_MODE {
|
|
+ git_blob_sha1_hex_bytes(b)
|
|
+ } else {
|
|
+ self.git_blob_oid_for_path(¤t_external)
|
|
+ .unwrap_or_else(|| git_blob_sha1_hex_bytes(b))
|
|
+ }
|
|
+ } else {
|
|
+ ZERO_OID.to_string()
|
|
+ };
|
|
+
|
|
+ // If either side isn't valid UTF-8, emit a binary diff header and continue.
|
|
+ let left_text = left_bytes
|
|
+ .as_deref()
|
|
+ .and_then(|b| std::str::from_utf8(b).ok());
|
|
+ let right_text = right_bytes
|
|
+ .as_deref()
|
|
+ .and_then(|b| std::str::from_utf8(b).ok());
|
|
+
|
|
+ // Prefer text diffs when possible:
|
|
+ // - both sides are valid UTF-8
|
|
+ // - OR one side is missing (add/delete) and the present side is valid UTF-8
|
|
+ let can_text_diff = match (left_text, right_text, is_add, is_delete) {
|
|
+ (Some(_), Some(_), _, _) => true,
|
|
+ (_, Some(_), true, _) => true, // add: left missing, right text
|
|
+ (Some(_), _, _, true) => true, // delete: left text, right missing
|
|
+ _ => false,
|
|
+ };
|
|
+
|
|
+ if can_text_diff {
|
|
+ // Diff the contents as text, treating missing side as empty string.
|
|
+ let l = left_text.unwrap_or("");
|
|
+ let r = right_text.unwrap_or("");
|
|
+
|
|
+ // Emit index line without mode suffix to preserve current test expectations.
|
|
+ aggregated.push_str(&format!("index {left_oid}..{right_oid}\n"));
|
|
+
|
|
+ let old_header = if left_bytes.is_some() {
|
|
+ format!("a/{left_display}")
|
|
+ } else {
|
|
+ "/dev/null".to_string()
|
|
+ };
|
|
+ let new_header = if right_bytes.is_some() {
|
|
+ format!("b/{right_display}")
|
|
+ } else {
|
|
+ "/dev/null".to_string()
|
|
+ };
|
|
+
|
|
+ let diff = similar::TextDiff::from_lines(l, r);
|
|
+ let unified = diff
|
|
+ .unified_diff()
|
|
+ .context_radius(3)
|
|
+ .header(&old_header, &new_header)
|
|
+ .to_string();
|
|
+
|
|
+ aggregated.push_str(&unified);
|
|
+ if !aggregated.ends_with('\n') {
|
|
+ aggregated.push('\n');
|
|
+ }
|
|
+ } else {
|
|
+ // Binary or invalid UTF-8: emit header only.
|
|
+ aggregated.push_str(&format!("index {left_oid}..{right_oid}\n"));
|
|
+ let old_header = if left_bytes.is_some() {
|
|
+ format!("a/{left_display}")
|
|
+ } else {
|
|
+ "/dev/null".to_string()
|
|
+ };
|
|
+ let new_header = if right_bytes.is_some() {
|
|
+ format!("b/{right_display}")
|
|
+ } else {
|
|
+ "/dev/null".to_string()
|
|
+ };
|
|
+ aggregated.push_str(&format!("--- {old_header}\n"));
|
|
+ aggregated.push_str(&format!("+++ {new_header}\n"));
|
|
+ aggregated.push_str("Binary files differ\n");
|
|
+ if !aggregated.ends_with('\n') {
|
|
+ aggregated.push('\n');
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if aggregated.trim().is_empty() {
|
|
+ Ok(None)
|
|
+ } else {
|
|
+ Ok(Some(aggregated))
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
+fn uuid_filename_for(path: &Path) -> String {
|
|
+ let id = Uuid::new_v4().to_string();
|
|
+ match path.extension().and_then(|e| e.to_str()) {
|
|
+ Some(ext) if !ext.is_empty() => format!("{id}.{ext}"),
|
|
+ _ => id,
|
|
+ }
|
|
+}
|
|
+
|
|
+/// Compute the Git SHA-1 blob object ID for the given content (bytes).
|
|
+fn git_blob_sha1_hex_bytes(data: &[u8]) -> String {
|
|
+ // Git blob hash is sha1 of: "blob <len>\0<data>"
|
|
+ let header = format!("blob {}\0", data.len());
|
|
+ use sha1::Digest;
|
|
+ let mut hasher = sha1::Sha1::new();
|
|
+ hasher.update(header.as_bytes());
|
|
+ hasher.update(data);
|
|
+ let digest = hasher.finalize();
|
|
+ let mut out = String::with_capacity(40);
|
|
+ for b in digest {
|
|
+ use std::fmt::Write;
|
|
+ let _ = write!(&mut out, "{b:02x}");
|
|
+ }
|
|
+ out
|
|
+}
|
|
+
|
|
+const ZERO_OID: &str = "0000000000000000000000000000000000000000";
|
|
+const REGULAR_MODE: &str = "100644";
|
|
+#[cfg(unix)]
|
|
```
|
|
|
|
> @gpeal but then that would change the tree hash? |