From f7e8ff8e5026f92fc4b0be1478bf98f7ffcdd781 Mon Sep 17 00:00:00 2001 From: jif-oai Date: Thu, 7 May 2026 11:33:47 +0200 Subject: [PATCH] Make turn diff tracking operation backed (#21180) ## Summary - replace filesystem-based turn diff tracking with an operation-backed accumulator - preserve enough verified apply_patch state to render move-overwrite cases correctly - keep the turn/diff/updated contract intact while removing remote-only turn-diff test skips This takes the assumption that no 3P services rely on the output format of `apply_patch` ## Why For the CCA file system isolation push --------- Co-authored-by: Codex --- codex-rs/apply-patch/src/invocation.rs | 62 +- codex-rs/apply-patch/src/lib.rs | 225 +++++- .../apply-patch/src/standalone_executable.rs | 2 +- codex-rs/arg0/src/lib.rs | 2 +- codex-rs/core/src/apply_patch.rs | 9 +- codex-rs/core/src/safety.rs | 4 +- codex-rs/core/src/session/turn.rs | 11 +- codex-rs/core/src/tools/events.rs | 86 ++- .../core/src/tools/handlers/apply_patch.rs | 12 +- codex-rs/core/src/tools/handlers/shell.rs | 4 +- .../core/src/tools/runtimes/apply_patch.rs | 16 +- codex-rs/core/src/turn_diff_tracker.rs | 642 +++++++----------- codex-rs/core/src/turn_diff_tracker_tests.rs | 641 ++++++++--------- .../core/src/unified_exec/async_watcher.rs | 8 +- codex-rs/core/tests/suite/apply_patch_cli.rs | 224 ++++-- 15 files changed, 1050 insertions(+), 898 deletions(-) diff --git a/codex-rs/apply-patch/src/invocation.rs b/codex-rs/apply-patch/src/invocation.rs index 075c94c60c..abe223f1d9 100644 --- a/codex-rs/apply-patch/src/invocation.rs +++ b/codex-rs/apply-patch/src/invocation.rs @@ -193,6 +193,7 @@ pub async fn maybe_parse_apply_patch_verified( let ApplyPatchFileUpdate { unified_diff, content: contents, + .. } = match unified_diff_from_chunks(&path, &chunks, fs, sandbox).await { Ok(diff) => diff, Err(e) => { @@ -707,6 +708,7 @@ PATCH"#, "#; let expected = ApplyPatchFileUpdate { unified_diff: expected_diff.to_string(), + original_content: "foo\nbar\nbaz\n".to_string(), content: "foo\nbar\nBAZ\n".to_string(), }; assert_eq!(expected, diff); @@ -745,6 +747,7 @@ PATCH"#, "#; let expected = ApplyPatchFileUpdate { unified_diff: expected_diff.to_string(), + original_content: "foo\nbar\nbaz\n".to_string(), content: "foo\nbar\nbaz\nquux\n".to_string(), }; assert_eq!(expected, diff); @@ -839,9 +842,10 @@ PATCH"#, assert_eq!(action.cwd.as_path(), worktree_dir.as_path()); + let source_path = worktree_dir.join(source_name); let change = action .changes() - .get(&worktree_dir.join(source_name)) + .get(source_path.as_path()) .expect("source file change present"); match change { @@ -854,4 +858,60 @@ PATCH"#, other => panic!("expected update change, got {other:?}"), } } + + #[tokio::test] + async fn test_unreadable_destinations_still_verify() { + let session_dir = tempdir().unwrap(); + fs::write(session_dir.path().join("binary.dat"), [0xff, 0xfe, 0xfd]).unwrap(); + let cwd = AbsolutePathBuf::from_absolute_path(session_dir.path()).unwrap(); + let add_argv = vec![ + "apply_patch".to_string(), + "*** Begin Patch\n*** Add File: binary.dat\n+text\n*** End Patch".to_string(), + ]; + fs::write(session_dir.path().join("source.txt"), "before\n").unwrap(); + let move_argv = vec![ + "apply_patch".to_string(), + "*** Begin Patch\n*** Update File: source.txt\n*** Move to: binary.dat\n@@\n-before\n+after\n*** End Patch".to_string(), + ]; + + for argv in [add_argv, move_argv] { + let result = maybe_parse_apply_patch_verified( + &argv, + &cwd, + LOCAL_FS.as_ref(), + /*sandbox*/ None, + ) + .await; + + assert!(matches!(result, MaybeApplyPatchVerified::Body(_))); + } + } + + #[cfg(unix)] + #[tokio::test] + async fn test_delete_symlink_still_verifies() { + use std::os::unix::fs::symlink; + + let session_dir = tempdir().unwrap(); + fs::write(session_dir.path().join("target.txt"), "target\n").unwrap(); + symlink( + session_dir.path().join("target.txt"), + session_dir.path().join("link.txt"), + ) + .unwrap(); + let argv = vec![ + "apply_patch".to_string(), + "*** Begin Patch\n*** Delete File: link.txt\n*** End Patch".to_string(), + ]; + + let result = maybe_parse_apply_patch_verified( + &argv, + &AbsolutePathBuf::from_absolute_path(session_dir.path()).unwrap(), + LOCAL_FS.as_ref(), + /*sandbox*/ None, + ) + .await; + + assert!(matches!(result, MaybeApplyPatchVerified::Body(_))); + } } diff --git a/codex-rs/apply-patch/src/lib.rs b/codex-rs/apply-patch/src/lib.rs index 7a47b1ea48..c3ed4aa02f 100644 --- a/codex-rs/apply-patch/src/lib.rs +++ b/codex-rs/apply-patch/src/lib.rs @@ -180,6 +180,51 @@ impl ApplyPatchAction { } } +/// Textual file changes that were actually committed while applying a patch. +#[derive(Clone, Debug, PartialEq)] +pub struct AppliedPatchDelta { + changes: Vec, + exact: bool, +} + +impl AppliedPatchDelta { + fn new(changes: Vec, exact: bool) -> Self { + Self { changes, exact } + } + + pub fn changes(&self) -> &[AppliedPatchChange] { + &self.changes + } + + pub fn is_exact(&self) -> bool { + self.exact + } +} + +/// A committed file change, preserved in the order it was applied. +#[derive(Clone, Debug, PartialEq)] +pub struct AppliedPatchChange { + pub path: PathBuf, + pub change: AppliedPatchFileChange, +} + +#[derive(Clone, Debug, PartialEq)] +pub enum AppliedPatchFileChange { + Add { + content: String, + overwritten_content: Option, + }, + Delete { + content: String, + }, + Update { + move_path: Option, + old_content: String, + overwritten_move_content: Option, + new_content: String, + }, +} + /// Applies the patch and prints the result to stdout/stderr. pub async fn apply_patch( patch: &str, @@ -188,7 +233,7 @@ pub async fn apply_patch( stderr: &mut impl std::io::Write, fs: &dyn ExecutorFileSystem, sandbox: Option<&FileSystemSandboxContext>, -) -> Result<(), ApplyPatchError> { +) -> Result { let hunks = match parse_patch(patch) { Ok(source) => source.hunks, Err(e) => { @@ -211,9 +256,7 @@ pub async fn apply_patch( } }; - apply_hunks(&hunks, cwd, stdout, stderr, fs, sandbox).await?; - - Ok(()) + apply_hunks(&hunks, cwd, stdout, stderr, fs, sandbox).await } /// Applies hunks and continues to update stdout/stderr @@ -224,12 +267,12 @@ pub async fn apply_hunks( stderr: &mut impl std::io::Write, fs: &dyn ExecutorFileSystem, sandbox: Option<&FileSystemSandboxContext>, -) -> Result<(), ApplyPatchError> { +) -> Result { // Delegate to a helper that applies each hunk to the filesystem. match apply_hunks_to_files(hunks, cwd, fs, sandbox).await { - Ok(affected) => { - print_summary(&affected, stdout).map_err(ApplyPatchError::from)?; - Ok(()) + Ok(applied) => { + print_summary(&applied.affected_paths, stdout).map_err(ApplyPatchError::from)?; + Ok(applied.delta) } Err(err) => { let msg = err.to_string(); @@ -256,6 +299,11 @@ pub struct AffectedPaths { pub deleted: Vec, } +struct AppliedHunks { + affected_paths: AffectedPaths, + delta: AppliedPatchDelta, +} + /// Apply the hunks to the filesystem, returning which files were added, modified, or deleted. /// Returns an error if the patch could not be applied. async fn apply_hunks_to_files( @@ -263,7 +311,7 @@ async fn apply_hunks_to_files( cwd: &AbsolutePathBuf, fs: &dyn ExecutorFileSystem, sandbox: Option<&FileSystemSandboxContext>, -) -> anyhow::Result { +) -> anyhow::Result { if hunks.is_empty() { anyhow::bail!("No files were modified."); } @@ -271,11 +319,16 @@ async fn apply_hunks_to_files( let mut added: Vec = Vec::new(); let mut modified: Vec = Vec::new(); let mut deleted: Vec = Vec::new(); + let mut delta_changes = Vec::new(); + let mut delta_exact = true; for hunk in hunks { let affected_path = hunk.path().to_path_buf(); let path_abs = hunk.resolve_path(cwd); match hunk { Hunk::AddFile { contents, .. } => { + let overwritten_content = + read_optional_file_text_for_delta(&path_abs, fs, sandbox, &mut delta_exact) + .await; write_file_with_missing_parent_retry( fs, &path_abs, @@ -283,9 +336,21 @@ async fn apply_hunks_to_files( sandbox, ) .await?; + delta_changes.push(AppliedPatchChange { + path: path_abs.into_path_buf(), + change: AppliedPatchFileChange::Add { + content: contents.clone(), + overwritten_content, + }, + }); added.push(affected_path); } Hunk::DeleteFile { .. } => { + note_existing_path_delta_support(&path_abs, fs, sandbox, &mut delta_exact).await; + let deleted_content = fs.read_file_text(&path_abs, sandbox).await.ok(); + if deleted_content.is_none() { + delta_exact = false; + } let result: io::Result<()> = async { let metadata = fs.get_metadata(&path_abs, sandbox).await?; if metadata.is_directory { @@ -306,19 +371,31 @@ async fn apply_hunks_to_files( } .await; result.with_context(|| format!("Failed to delete file {}", path_abs.display()))?; + if let Some(content) = deleted_content { + delta_changes.push(AppliedPatchChange { + path: path_abs.into_path_buf(), + change: AppliedPatchFileChange::Delete { content }, + }); + } deleted.push(affected_path); } Hunk::UpdateFile { move_path, chunks, .. } => { - let AppliedPatch { new_contents, .. } = - derive_new_contents_from_chunks(&path_abs, chunks, fs, sandbox).await?; + note_existing_path_delta_support(&path_abs, fs, sandbox, &mut delta_exact).await; + let AppliedPatch { + original_contents, + new_contents, + } = derive_new_contents_from_chunks(&path_abs, chunks, fs, sandbox).await?; if let Some(dest) = move_path { let dest_abs = AbsolutePathBuf::resolve_path_against_base(dest, cwd); + let overwritten_move_content = + read_optional_file_text_for_delta(&dest_abs, fs, sandbox, &mut delta_exact) + .await; write_file_with_missing_parent_retry( fs, &dest_abs, - new_contents.into_bytes(), + new_contents.clone().into_bytes(), sandbox, ) .await?; @@ -344,23 +421,75 @@ async fn apply_hunks_to_files( result.with_context(|| { format!("Failed to remove original {}", path_abs.display()) })?; + delta_changes.push(AppliedPatchChange { + path: path_abs.into_path_buf(), + change: AppliedPatchFileChange::Update { + move_path: Some(dest_abs.into_path_buf()), + old_content: original_contents, + overwritten_move_content, + new_content: new_contents, + }, + }); modified.push(affected_path); } else { - fs.write_file(&path_abs, new_contents.into_bytes(), sandbox) + fs.write_file(&path_abs, new_contents.clone().into_bytes(), sandbox) .await .with_context(|| format!("Failed to write file {}", path_abs.display()))?; + delta_changes.push(AppliedPatchChange { + path: path_abs.into_path_buf(), + change: AppliedPatchFileChange::Update { + move_path: None, + old_content: original_contents, + overwritten_move_content: None, + new_content: new_contents, + }, + }); modified.push(affected_path); } } } } - Ok(AffectedPaths { - added, - modified, - deleted, + Ok(AppliedHunks { + affected_paths: AffectedPaths { + added, + modified, + deleted, + }, + delta: AppliedPatchDelta::new(delta_changes, delta_exact), }) } +async fn read_optional_file_text_for_delta( + path: &AbsolutePathBuf, + fs: &dyn ExecutorFileSystem, + sandbox: Option<&FileSystemSandboxContext>, + exact: &mut bool, +) -> Option { + note_existing_path_delta_support(path, fs, sandbox, exact).await; + match fs.read_file_text(path, sandbox).await { + Ok(content) => Some(content), + Err(source) if source.kind() == io::ErrorKind::NotFound => None, + Err(_) => { + *exact = false; + None + } + } +} + +async fn note_existing_path_delta_support( + path: &AbsolutePathBuf, + fs: &dyn ExecutorFileSystem, + sandbox: Option<&FileSystemSandboxContext>, + exact: &mut bool, +) { + match fs.get_metadata(path, sandbox).await { + Ok(metadata) if metadata.is_file && !metadata.is_symlink => {} + Ok(_) => *exact = false, + Err(source) if source.kind() == io::ErrorKind::NotFound => {} + Err(_) => *exact = false, + } +} + async fn write_file_with_missing_parent_retry( fs: &dyn ExecutorFileSystem, path_abs: &AbsolutePathBuf, @@ -561,6 +690,7 @@ fn apply_replacements( #[derive(Debug, Eq, PartialEq)] pub struct ApplyPatchFileUpdate { unified_diff: String, + original_content: String, content: String, } @@ -588,6 +718,7 @@ pub async fn unified_diff_from_chunks_with_context( let unified_diff = text_diff.unified_diff().context_radius(context).to_string(); Ok(ApplyPatchFileUpdate { unified_diff, + original_content: original_contents, content: new_contents, }) } @@ -1082,6 +1213,7 @@ mod tests { "#; let expected = ApplyPatchFileUpdate { unified_diff: expected_diff.to_string(), + original_content: "foo\nbar\nbaz\nqux\n".to_string(), content: "foo\nBAR\nbaz\nQUX\n".to_string(), }; assert_eq!(expected, diff); @@ -1122,6 +1254,7 @@ mod tests { "#; let expected = ApplyPatchFileUpdate { unified_diff: expected_diff.to_string(), + original_content: "foo\nbar\nbaz\n".to_string(), content: "FOO\nbar\nbaz\n".to_string(), }; assert_eq!(expected, diff); @@ -1163,6 +1296,7 @@ mod tests { "#; let expected = ApplyPatchFileUpdate { unified_diff: expected_diff.to_string(), + original_content: "foo\nbar\nbaz\n".to_string(), content: "foo\nbar\nBAZ\n".to_string(), }; assert_eq!(expected, diff); @@ -1201,6 +1335,7 @@ mod tests { "#; let expected = ApplyPatchFileUpdate { unified_diff: expected_diff.to_string(), + original_content: "foo\nbar\nbaz\n".to_string(), content: "foo\nbar\nbaz\nquux\n".to_string(), }; assert_eq!(expected, diff); @@ -1260,6 +1395,7 @@ mod tests { let expected = ApplyPatchFileUpdate { unified_diff: expected_diff.to_string(), + original_content: "a\nb\nc\nd\ne\nf\n".to_string(), content: "a\nB\nc\nd\nE\nf\ng\n".to_string(), }; @@ -1318,4 +1454,59 @@ g .await; assert!(result.is_err()); } + + #[tokio::test] + async fn test_unreadable_destinations_return_inexact_delta() { + let dir = tempdir().unwrap(); + let path = dir.path().join("binary.dat"); + fs::write(dir.path().join("source.txt"), "before\n").unwrap(); + let cwd = AbsolutePathBuf::from_absolute_path(dir.path()).unwrap(); + + for patch in [ + wrap_patch("*** Add File: binary.dat\n+text"), + wrap_patch("*** Update File: source.txt\n*** Move to: binary.dat\n@@\n-before\n+after"), + ] { + fs::write(&path, [0xff, 0xfe, 0xfd]).unwrap(); + let mut stdout = Vec::new(); + let mut stderr = Vec::new(); + let delta = apply_patch( + &patch, + &cwd, + &mut stdout, + &mut stderr, + LOCAL_FS.as_ref(), + /*sandbox*/ None, + ) + .await + .unwrap(); + + assert!(!delta.is_exact()); + } + } + + #[cfg(unix)] + #[tokio::test] + async fn test_delete_symlink_returns_inexact_delta() { + use std::os::unix::fs::symlink; + + let dir = tempdir().unwrap(); + fs::write(dir.path().join("target.txt"), "target\n").unwrap(); + symlink(dir.path().join("target.txt"), dir.path().join("link.txt")).unwrap(); + let patch = wrap_patch("*** Delete File: link.txt"); + + let mut stdout = Vec::new(); + let mut stderr = Vec::new(); + let delta = apply_patch( + &patch, + &AbsolutePathBuf::from_absolute_path(dir.path()).unwrap(), + &mut stdout, + &mut stderr, + LOCAL_FS.as_ref(), + /*sandbox*/ None, + ) + .await + .unwrap(); + + assert!(!delta.is_exact()); + } } diff --git a/codex-rs/apply-patch/src/standalone_executable.rs b/codex-rs/apply-patch/src/standalone_executable.rs index 093bda543b..45ca0d0619 100644 --- a/codex-rs/apply-patch/src/standalone_executable.rs +++ b/codex-rs/apply-patch/src/standalone_executable.rs @@ -73,7 +73,7 @@ pub fn run_main() -> i32 { codex_exec_server::LOCAL_FS.as_ref(), /*sandbox*/ None, )) { - Ok(()) => { + Ok(_) => { // Flush to ensure output ordering when used in pipelines. let _ = stdout.flush(); 0 diff --git a/codex-rs/arg0/src/lib.rs b/codex-rs/arg0/src/lib.rs index 75fefce5cc..2f6ae4653c 100644 --- a/codex-rs/arg0/src/lib.rs +++ b/codex-rs/arg0/src/lib.rs @@ -122,7 +122,7 @@ pub fn arg0_dispatch() -> Option { codex_exec_server::LOCAL_FS.as_ref(), /*sandbox*/ None, )) { - Ok(()) => 0, + Ok(_) => 0, Err(_) => 1, } } diff --git a/codex-rs/core/src/apply_patch.rs b/codex-rs/core/src/apply_patch.rs index d5ebe4fe1f..2463d69c2b 100644 --- a/codex-rs/core/src/apply_patch.rs +++ b/codex-rs/core/src/apply_patch.rs @@ -76,11 +76,10 @@ pub(crate) async fn apply_patch( pub(crate) fn convert_apply_patch_to_protocol( action: &ApplyPatchAction, ) -> HashMap { - let changes = action.changes(); - let mut result = HashMap::with_capacity(changes.len()); - for (path, change) in changes { + let mut result = HashMap::with_capacity(action.changes().len()); + for (path, change) in action.changes() { let protocol_change = match change { - ApplyPatchFileChange::Add { content } => FileChange::Add { + ApplyPatchFileChange::Add { content, .. } => FileChange::Add { content: content.clone(), }, ApplyPatchFileChange::Delete { content } => FileChange::Delete { @@ -95,7 +94,7 @@ pub(crate) fn convert_apply_patch_to_protocol( move_path: move_path.clone(), }, }; - result.insert(path.clone(), protocol_change); + result.insert(path.to_path_buf(), protocol_change); } result } diff --git a/codex-rs/core/src/safety.rs b/codex-rs/core/src/safety.rs index c8c85a6814..dbae1c0c68 100644 --- a/codex-rs/core/src/safety.rs +++ b/codex-rs/core/src/safety.rs @@ -159,8 +159,8 @@ fn is_write_patch_constrained_to_writable_paths( // Determine whether `path` is inside **any** writable root. Both `path` // and roots are converted to absolute, normalized forms before the // prefix check. - let is_path_writable = |p: &PathBuf| { - let abs = resolve_path(cwd, p); + let is_path_writable = |p: &Path| { + let abs = resolve_path(cwd, &p.to_path_buf()); let abs = match normalize(&abs) { Some(v) => v, None => return false, diff --git a/codex-rs/core/src/session/turn.rs b/codex-rs/core/src/session/turn.rs index 171ca10d68..000bbc8ba6 100644 --- a/codex-rs/core/src/session/turn.rs +++ b/codex-rs/core/src/session/turn.rs @@ -68,6 +68,7 @@ use codex_analytics::TurnResolvedConfigFact; use codex_analytics::build_track_events_context; use codex_async_utils::OrCancelExt; use codex_features::Feature; +use codex_git_utils::get_git_repo_root; use codex_hooks::HookEvent; use codex_hooks::HookEventAfterAgent; use codex_hooks::HookPayload; @@ -365,7 +366,11 @@ pub(crate) async fn run_turn( let mut stop_hook_active = false; // Although from the perspective of codex.rs, TurnDiffTracker has the lifecycle of a Task which contains // many turns, from the perspective of the user, it is a single turn. - let turn_diff_tracker = Arc::new(tokio::sync::Mutex::new(TurnDiffTracker::new())); + let display_root = get_git_repo_root(turn_context.cwd.as_path()) + .unwrap_or_else(|| turn_context.cwd.clone().into_path_buf()); + let turn_diff_tracker = Arc::new(tokio::sync::Mutex::new(TurnDiffTracker::with_display_root( + display_root, + ))); // `ModelClientSession` is turn-scoped and caches WebSocket + sticky routing state, so we reuse // one instance across retries within this turn. @@ -2247,10 +2252,10 @@ async fn try_run_sampling_request( if should_emit_turn_diff { let unified_diff = { - let mut tracker = turn_diff_tracker.lock().await; + let tracker = turn_diff_tracker.lock().await; tracker.get_unified_diff() }; - if let Ok(Some(unified_diff)) = unified_diff { + if let Some(unified_diff) = unified_diff { let msg = EventMsg::TurnDiff(TurnDiffEvent { unified_diff }); sess.clone().send_event(&turn_context, msg).await; } diff --git a/codex-rs/core/src/tools/events.rs b/codex-rs/core/src/tools/events.rs index f97e51fd8a..f3a9513ae9 100644 --- a/codex-rs/core/src/tools/events.rs +++ b/codex-rs/core/src/tools/events.rs @@ -4,6 +4,7 @@ use crate::session::turn_context::TurnContext; use crate::tools::context::SharedTurnDiffTracker; use crate::tools::sandboxing::ToolError; use crate::turn_timing::now_unix_timestamp_ms; +use codex_apply_patch::AppliedPatchDelta; use codex_protocol::error::CodexErr; use codex_protocol::error::SandboxErr; use codex_protocol::exec_output::ExecToolCallOutput; @@ -50,9 +51,12 @@ impl<'a> ToolEventCtx<'a> { } } -pub(crate) enum ToolEventStage { +pub(crate) enum ToolEventStage<'a> { Begin, - Success(ExecToolCallOutput), + Success { + output: ExecToolCallOutput, + applied_patch_delta: Option<&'a AppliedPatchDelta>, + }, Failure(ToolEventFailure), } @@ -62,6 +66,12 @@ pub(crate) enum ToolEventFailure { Rejected(String), } +enum TurnDiffTrackerUpdate<'a> { + Track(&'a AppliedPatchDelta), + Invalidate, + None, +} + pub(crate) async fn emit_exec_command_begin( ctx: ToolEventCtx<'_>, command: &[String], @@ -150,7 +160,7 @@ impl ToolEmitter { } } - pub async fn emit(&self, ctx: ToolEventCtx<'_>, stage: ToolEventStage) { + pub async fn emit(&self, ctx: ToolEventCtx<'_>, stage: ToolEventStage<'_>) { match (self, stage) { ( Self::Shell { @@ -177,13 +187,10 @@ impl ToolEmitter { Self::ApplyPatch { changes, auto_approved, + .. }, ToolEventStage::Begin, ) => { - if let Some(tracker) = ctx.turn_diff_tracker { - let mut guard = tracker.lock().await; - guard.on_patch_begin(changes); - } ctx.session .emit_turn_item_started( ctx.turn, @@ -198,17 +205,34 @@ impl ToolEmitter { ) .await; } - (Self::ApplyPatch { changes, .. }, ToolEventStage::Success(output)) => { + ( + Self::ApplyPatch { changes, .. }, + ToolEventStage::Success { + output, + applied_patch_delta, + }, + ) => { + let status = if output.exit_code == 0 { + PatchApplyStatus::Completed + } else { + PatchApplyStatus::Failed + }; + let tracker_update = if output.exit_code == 0 { + if let Some(delta) = applied_patch_delta { + TurnDiffTrackerUpdate::Track(delta) + } else { + TurnDiffTrackerUpdate::Invalidate + } + } else { + TurnDiffTrackerUpdate::Invalidate + }; emit_patch_end( ctx, changes.clone(), output.stdout.text.clone(), output.stderr.text.clone(), - if output.exit_code == 0 { - PatchApplyStatus::Completed - } else { - PatchApplyStatus::Failed - }, + status, + tracker_update, ) .await; } @@ -226,6 +250,7 @@ impl ToolEmitter { } else { PatchApplyStatus::Failed }, + TurnDiffTrackerUpdate::Invalidate, ) .await; } @@ -239,6 +264,7 @@ impl ToolEmitter { String::new(), (*message).to_string(), PatchApplyStatus::Failed, + TurnDiffTrackerUpdate::None, ) .await; } @@ -252,6 +278,7 @@ impl ToolEmitter { String::new(), (*message).to_string(), PatchApplyStatus::Declined, + TurnDiffTrackerUpdate::None, ) .await; } @@ -303,12 +330,16 @@ impl ToolEmitter { &self, ctx: ToolEventCtx<'_>, out: Result, + applied_patch_delta: Option<&AppliedPatchDelta>, ) -> Result { let (event, result) = match out { Ok(output) => { let content = self.format_exec_output_for_model(&output, ctx); let exit_code = output.exit_code; - let event = ToolEventStage::Success(output); + let event = ToolEventStage::Success { + output, + applied_patch_delta, + }; let result = if exit_code == 0 { Ok(content) } else { @@ -401,7 +432,7 @@ struct ExecCommandResult { async fn emit_exec_stage( ctx: ToolEventCtx<'_>, exec_input: ExecCommandInput<'_>, - stage: ToolEventStage, + stage: ToolEventStage<'_>, ) { match stage { ToolEventStage::Begin => { @@ -416,7 +447,7 @@ async fn emit_exec_stage( ) .await; } - ToolEventStage::Success(output) + ToolEventStage::Success { output, .. } | ToolEventStage::Failure(ToolEventFailure::Output(output)) => { let exec_result = ExecCommandResult { stdout: output.stdout.text.clone(), @@ -498,6 +529,7 @@ async fn emit_patch_end( stdout: String, stderr: String, status: PatchApplyStatus, + tracker_update: TurnDiffTrackerUpdate<'_>, ) { ctx.session .emit_turn_item_completed( @@ -514,11 +546,27 @@ async fn emit_patch_end( .await; if let Some(tracker) = ctx.turn_diff_tracker { - let unified_diff = { + let (should_emit_turn_diff, unified_diff) = { let mut guard = tracker.lock().await; - guard.get_unified_diff() + let previous_diff = guard.get_unified_diff(); + let tracker_changed = match tracker_update { + TurnDiffTrackerUpdate::Track(action) => { + guard.track_successful_patch(action); + true + } + TurnDiffTrackerUpdate::Invalidate => { + guard.invalidate(); + true + } + TurnDiffTrackerUpdate::None => false, + }; + let unified_diff = guard.get_unified_diff(); + ( + tracker_changed && (previous_diff.is_some() || unified_diff.is_some()), + unified_diff.unwrap_or_default(), + ) }; - if let Ok(Some(unified_diff)) = unified_diff { + if should_emit_turn_diff { ctx.session .send_event(ctx.turn, EventMsg::TurnDiff(TurnDiffEvent { unified_diff })) .await; diff --git a/codex-rs/core/src/tools/handlers/apply_patch.rs b/codex-rs/core/src/tools/handlers/apply_patch.rs index 75a92953c6..9e31eb9915 100644 --- a/codex-rs/core/src/tools/handlers/apply_patch.rs +++ b/codex-rs/core/src/tools/handlers/apply_patch.rs @@ -437,13 +437,17 @@ impl ToolHandler for ApplyPatchHandler { ) .await .map(|result| result.output); + let (out, delta) = match out { + Ok(output) => (Ok(output.exec_output), output.delta), + Err(error) => (Err(error), None), + }; let event_ctx = ToolEventCtx::new( session.as_ref(), turn.as_ref(), &call_id, Some(&tracker), ); - let content = emitter.finish(event_ctx, out).await?; + let content = emitter.finish(event_ctx, out, delta.as_ref()).await?; Ok(ApplyPatchToolOutput::from_text(content)) } } @@ -545,13 +549,17 @@ pub(crate) async fn intercept_apply_patch( ) .await .map(|result| result.output); + let (out, delta) = match out { + Ok(output) => (Ok(output.exec_output), output.delta), + Err(error) => (Err(error), None), + }; let event_ctx = ToolEventCtx::new( session.as_ref(), turn.as_ref(), call_id, tracker.as_ref().copied(), ); - let content = emitter.finish(event_ctx, out).await?; + let content = emitter.finish(event_ctx, out, delta.as_ref()).await?; Ok(Some(FunctionToolOutput::from_text(content, Some(true)))) } } diff --git a/codex-rs/core/src/tools/handlers/shell.rs b/codex-rs/core/src/tools/handlers/shell.rs index 469c0a0799..a0d744305e 100644 --- a/codex-rs/core/src/tools/handlers/shell.rs +++ b/codex-rs/core/src/tools/handlers/shell.rs @@ -294,7 +294,9 @@ async fn run_exec_like(args: RunExecLikeArgs) -> Result, +} + impl ApplyPatchRuntime { pub fn new() -> Self { Self @@ -184,13 +191,13 @@ impl Approvable for ApplyPatchRuntime { } } -impl ToolRuntime for ApplyPatchRuntime { +impl ToolRuntime for ApplyPatchRuntime { async fn run( &mut self, req: &ApplyPatchRequest, attempt: &SandboxAttempt<'_>, ctx: &ToolCtx, - ) -> Result { + ) -> Result { let turn_environment = ctx.turn.environments.primary().ok_or_else(|| { ToolError::Rejected("apply_patch is unavailable in this session".to_string()) })?; @@ -225,7 +232,10 @@ impl ToolRuntime for ApplyPatchRuntime { network_policy_decision: None, }))); } - Ok(output) + Ok(ApplyPatchRuntimeOutput { + exec_output: output, + delta: result.ok(), + }) } } diff --git a/codex-rs/core/src/turn_diff_tracker.rs b/codex-rs/core/src/turn_diff_tracker.rs index 2353e49e82..b7b75bbca7 100644 --- a/codex-rs/core/src/turn_diff_tracker.rs +++ b/codex-rs/core/src/turn_diff_tracker.rs @@ -1,45 +1,38 @@ use std::collections::HashMap; -use std::fs; +use std::collections::HashSet; use std::path::Path; use std::path::PathBuf; -use std::process::Command; -use anyhow::Context; -use anyhow::Result; -use anyhow::anyhow; use sha1::digest::Output; -use uuid::Uuid; -use codex_protocol::protocol::FileChange; +use codex_apply_patch::AppliedPatchChange; +use codex_apply_patch::AppliedPatchDelta; +use codex_apply_patch::AppliedPatchFileChange; const ZERO_OID: &str = "0000000000000000000000000000000000000000"; const DEV_NULL: &str = "/dev/null"; +const REGULAR_FILE_MODE: &str = "100644"; -struct BaselineFileInfo { - path: PathBuf, - content: Vec, - mode: FileMode, - oid: String, +/// Tracks the net text diff for the current turn from successful apply_patch +/// operations, without rereading the workspace filesystem. +pub struct TurnDiffTracker { + valid: bool, + display_root: Option, + baseline_by_path: HashMap, + current_by_path: HashMap, + origin_by_current_path: HashMap, } -/// Tracks sets of changes to files and exposes the overall unified diff. -/// Internally, the way this works is now: -/// 1. Maintain an in-memory baseline snapshot of files when they are first seen. -/// For new additions, do not create a baseline so that diffs are shown as proper additions (using /dev/null). -/// 2. Keep a stable internal filename (uuid) per external path for rename tracking. -/// 3. To compute the aggregated unified diff, compare each baseline snapshot to the current file on disk entirely in-memory -/// using the `similar` crate and emit unified diffs with rewritten external paths. -#[derive(Default)] -pub struct TurnDiffTracker { - /// Map external path -> internal filename (uuid). - external_to_temp_name: HashMap, - /// Internal filename -> baseline file info. - baseline_file_info: HashMap, - /// Internal filename -> external path as of current accumulated state (after applying all changes). - /// This is where renames are tracked. - temp_name_to_current_path: HashMap, - /// Cache of known git worktree roots to avoid repeated filesystem walks. - git_root_cache: Vec, +impl Default for TurnDiffTracker { + fn default() -> Self { + Self { + valid: true, + display_root: None, + baseline_by_path: HashMap::new(), + current_by_path: HashMap::new(), + origin_by_current_path: HashMap::new(), + } + } } impl TurnDiffTracker { @@ -47,330 +40,268 @@ impl TurnDiffTracker { Self::default() } - /// Front-run apply patch calls to track the starting contents of any modified files. - /// - Creates an in-memory baseline snapshot for files that already exist on disk when first seen. - /// - For additions, we intentionally do not create a baseline snapshot so that diffs are proper additions. - /// - Also updates internal mappings for move/rename events. - pub fn on_patch_begin(&mut self, changes: &HashMap) { - for (path, change) in changes.iter() { - // Ensure a stable internal filename exists for this external path. - if !self.external_to_temp_name.contains_key(path) { - let internal = Uuid::new_v4().to_string(); - self.external_to_temp_name - .insert(path.clone(), internal.clone()); - self.temp_name_to_current_path - .insert(internal.clone(), path.clone()); + pub fn with_display_root(display_root: PathBuf) -> Self { + let mut tracker = Self::new(); + tracker.display_root = Some(display_root); + tracker + } - // If the file exists on disk now, snapshot as baseline; else leave missing to represent /dev/null. - let baseline_file_info = if path.exists() { - let mode = file_mode_for_path(path); - let mode_val = mode.unwrap_or(FileMode::Regular); - let content = blob_bytes(path, mode_val).unwrap_or_default(); - let oid = if mode == Some(FileMode::Symlink) { - format!("{:x}", git_blob_sha1_hex_bytes(&content)) - } else { - self.git_blob_oid_for_path(path) - .unwrap_or_else(|| format!("{:x}", git_blob_sha1_hex_bytes(&content))) - }; - Some(BaselineFileInfo { - path: path.clone(), - content, - mode: mode_val, - oid, - }) - } else { - Some(BaselineFileInfo { - path: path.clone(), - content: vec![], - mode: FileMode::Regular, - oid: ZERO_OID.to_string(), - }) - }; + pub fn track_successful_patch(&mut self, delta: &AppliedPatchDelta) { + if !delta.is_exact() { + self.invalidate(); + return; + } - if let Some(baseline_file_info) = baseline_file_info { - self.baseline_file_info - .insert(internal.clone(), baseline_file_info); - } - } - - // Track rename/move in current mapping if provided in an Update. - if let FileChange::Update { - move_path: Some(dest), - .. - } = change - { - let uuid_filename = match self.external_to_temp_name.get(path) { - Some(i) => i.clone(), - None => { - // This should be rare, but if we haven't mapped the source, create it with no baseline. - let i = Uuid::new_v4().to_string(); - self.baseline_file_info.insert( - i.clone(), - BaselineFileInfo { - path: path.clone(), - content: vec![], - mode: FileMode::Regular, - oid: ZERO_OID.to_string(), - }, - ); - i - } - }; - // Update current external mapping for temp file name. - self.temp_name_to_current_path - .insert(uuid_filename.clone(), dest.clone()); - // Update forward file_mapping: external current -> internal name. - self.external_to_temp_name.remove(path); - self.external_to_temp_name - .insert(dest.clone(), uuid_filename); - }; + for change in delta.changes() { + self.apply_change(change); } } - fn get_path_for_internal(&self, internal: &str) -> Option { - self.temp_name_to_current_path - .get(internal) - .cloned() - .or_else(|| { - self.baseline_file_info - .get(internal) - .map(|info| info.path.clone()) - }) + pub fn invalidate(&mut self) { + self.valid = false; } - /// Find the git worktree root for a file/directory by walking up to the first ancestor containing a `.git` entry. - /// Uses a simple cache of known roots and avoids negative-result caching for simplicity. - fn find_git_root_cached(&mut self, start: &Path) -> Option { - let dir = if start.is_dir() { - start - } else { - start.parent()? - }; - - // Fast path: if any cached root is an ancestor of this path, use it. - if let Some(root) = self - .git_root_cache - .iter() - .find(|r| dir.starts_with(r)) - .cloned() - { - return Some(root); - } - - // Walk up to find a `.git` marker. - let mut cur = dir.to_path_buf(); - loop { - let git_marker = cur.join(".git"); - if git_marker.is_dir() || git_marker.is_file() { - if !self.git_root_cache.iter().any(|r| r == &cur) { - self.git_root_cache.push(cur.clone()); - } - return Some(cur); - } - - // On Windows, avoid walking above the drive or UNC share root. - #[cfg(windows)] - { - if is_windows_drive_or_unc_root(&cur) { - return None; - } - } - - if let Some(parent) = cur.parent() { - cur = parent.to_path_buf(); - } else { - return None; - } - } - } - - /// Return a display string for `path` relative to its git root if found, else absolute. - fn relative_to_git_root_str(&mut self, path: &Path) -> String { - let s = if let Some(root) = self.find_git_root_cached(path) { - if let Ok(rel) = path.strip_prefix(&root) { - rel.display().to_string() - } else { - path.display().to_string() - } - } else { - path.display().to_string() - }; - s.replace('\\', "/") - } - - /// Ask git to compute the blob SHA-1 for the file at `path` within its repository. - /// Returns None if no repository is found or git invocation fails. - fn git_blob_oid_for_path(&mut self, path: &Path) -> Option { - let root = self.find_git_root_cached(path)?; - // Compute a path relative to the repo root for better portability across platforms. - let rel = path.strip_prefix(&root).unwrap_or(path); - let output = Command::new("git") - .arg("-C") - .arg(&root) - .arg("hash-object") - .arg("--") - .arg(rel) - .output() - .ok()?; - if !output.status.success() { + pub fn get_unified_diff(&self) -> Option { + if !self.valid { return None; } - let s = String::from_utf8_lossy(&output.stdout).trim().to_string(); - if s.len() == 40 { Some(s) } else { None } - } - /// Recompute the aggregated unified diff by comparing all of the in-memory snapshots that were - /// collected before the first time they were touched by apply_patch during this turn with - /// the current repo state. - pub fn get_unified_diff(&mut self) -> Result> { + let rename_pairs = self.rename_pairs(); + let paired_destinations = rename_pairs.values().cloned().collect::>(); + let mut handled = HashSet::new(); + let mut paths = self + .baseline_by_path + .keys() + .chain(self.current_by_path.keys()) + .cloned() + .collect::>(); + paths.sort_by_key(|path| self.display_path(path)); + paths.dedup(); + let mut aggregated = String::new(); + for path in paths { + if !handled.insert(path.clone()) { + continue; + } - // Compute diffs per tracked internal file in a stable order by external path. - let mut baseline_file_names: Vec = - self.baseline_file_info.keys().cloned().collect(); - // Sort lexicographically by full repo-relative path to match git behavior. - baseline_file_names.sort_by_key(|internal| { - self.get_path_for_internal(internal) - .map(|p| self.relative_to_git_root_str(&p)) - .unwrap_or_default() - }); + if paired_destinations.contains(&path) { + continue; + } - for internal in baseline_file_names { - aggregated.push_str(self.get_file_diff(&internal).as_str()); - if !aggregated.ends_with('\n') { - aggregated.push('\n'); + let diff = if let Some(dest) = rename_pairs.get(&path) { + handled.insert(dest.clone()); + self.render_rename_diff(&path, dest) + } else { + self.render_path_diff(&path) + }; + + if let Some(diff) = diff { + aggregated.push_str(&diff); + if !aggregated.ends_with('\n') { + aggregated.push('\n'); + } } } - if aggregated.trim().is_empty() { - Ok(None) - } else { - Ok(Some(aggregated)) + (!aggregated.is_empty()).then_some(aggregated) + } + + fn apply_change(&mut self, change: &AppliedPatchChange) { + let source_path = change.path.as_path(); + match &change.change { + AppliedPatchFileChange::Add { + content, + overwritten_content, + } => self.apply_add(source_path, content, overwritten_content.as_deref()), + AppliedPatchFileChange::Delete { content } => self.apply_delete(source_path, content), + AppliedPatchFileChange::Update { + move_path, + old_content, + overwritten_move_content, + new_content, + } => self.apply_update( + source_path, + move_path.as_deref(), + old_content, + overwritten_move_content.as_deref(), + new_content, + ), } } - fn get_file_diff(&mut self, internal_file_name: &str) -> String { - let mut aggregated = String::new(); + fn apply_add(&mut self, path: &Path, content: &str, overwritten_content: Option<&str>) { + self.origin_by_current_path.remove(path); + if !self.current_by_path.contains_key(path) + && !self.baseline_by_path.contains_key(path) + && let Some(overwritten_content) = overwritten_content + { + self.baseline_by_path + .insert(path.to_path_buf(), overwritten_content.to_string()); + } + self.current_by_path + .insert(path.to_path_buf(), content.to_string()); + } - // Snapshot lightweight fields only. - let (baseline_external_path, baseline_mode, left_oid) = { - if let Some(info) = self.baseline_file_info.get(internal_file_name) { - (info.path.clone(), info.mode, info.oid.clone()) - } else { - (PathBuf::new(), FileMode::Regular, ZERO_OID.to_string()) - } - }; - let current_external_path = match self.get_path_for_internal(internal_file_name) { - Some(p) => p, - None => return aggregated, - }; + fn apply_delete(&mut self, path: &Path, content: &str) { + if self.current_by_path.remove(path).is_none() && !self.baseline_by_path.contains_key(path) + { + self.baseline_by_path + .insert(path.to_path_buf(), content.to_string()); + } + self.origin_by_current_path.remove(path); + } - let current_mode = file_mode_for_path(¤t_external_path).unwrap_or(FileMode::Regular); - let right_bytes = blob_bytes(¤t_external_path, current_mode); - - // Compute displays with &mut self before borrowing any baseline content. - let left_display = self.relative_to_git_root_str(&baseline_external_path); - let right_display = self.relative_to_git_root_str(¤t_external_path); - - // Compute right oid before borrowing baseline content. - let right_oid = if let Some(b) = right_bytes.as_ref() { - if current_mode == FileMode::Symlink { - format!("{:x}", git_blob_sha1_hex_bytes(b)) - } else { - self.git_blob_oid_for_path(¤t_external_path) - .unwrap_or_else(|| format!("{:x}", git_blob_sha1_hex_bytes(b))) - } - } else { - ZERO_OID.to_string() - }; - - // Borrow baseline content only after all &mut self uses are done. - let left_present = left_oid.as_str() != ZERO_OID; - let left_bytes: Option<&[u8]> = if left_present { - self.baseline_file_info - .get(internal_file_name) - .map(|i| i.content.as_slice()) - } else { - None - }; - - // Fast path: identical bytes or both missing. - if left_bytes == right_bytes.as_deref() { - return aggregated; + fn apply_update( + &mut self, + source_path: &Path, + move_path: Option<&Path>, + old_content: &str, + overwritten_move_content: Option<&str>, + new_content: &str, + ) { + if !self.current_by_path.contains_key(source_path) + && !self.baseline_by_path.contains_key(source_path) + { + self.baseline_by_path + .insert(source_path.to_path_buf(), old_content.to_string()); } - aggregated.push_str(&format!("diff --git a/{left_display} b/{right_display}\n")); + match move_path { + Some(dest_path) => { + if !self.current_by_path.contains_key(dest_path) + && !self.baseline_by_path.contains_key(dest_path) + && let Some(overwritten_move_content) = overwritten_move_content + { + self.baseline_by_path.insert( + dest_path.to_path_buf(), + overwritten_move_content.to_string(), + ); + } + let origin = self + .origin_by_current_path + .remove(source_path) + .unwrap_or_else(|| source_path.to_path_buf()); + self.current_by_path.remove(source_path); + self.current_by_path + .insert(dest_path.to_path_buf(), new_content.to_string()); + self.origin_by_current_path.remove(dest_path); + if dest_path != origin.as_path() { + self.origin_by_current_path + .insert(dest_path.to_path_buf(), origin); + } + } + None => { + self.current_by_path + .insert(source_path.to_path_buf(), new_content.to_string()); + } + } + } - let is_add = !left_present && right_bytes.is_some(); - let is_delete = left_present && right_bytes.is_none(); + fn rename_pairs(&self) -> HashMap { + self.origin_by_current_path + .iter() + .filter_map(|(dest_path, origin_path)| { + if dest_path == origin_path + || self.current_by_path.contains_key(origin_path) + || !self.current_by_path.contains_key(dest_path) + || !self.baseline_by_path.contains_key(origin_path) + || self.baseline_by_path.contains_key(dest_path) + { + return None; + } - if is_add { - aggregated.push_str(&format!("new file mode {current_mode}\n")); - } else if is_delete { - aggregated.push_str(&format!("deleted file mode {baseline_mode}\n")); - } else if baseline_mode != current_mode { - aggregated.push_str(&format!("old mode {baseline_mode}\n")); - aggregated.push_str(&format!("new mode {current_mode}\n")); + Some((origin_path.clone(), dest_path.clone())) + }) + .collect() + } + + fn render_path_diff(&self, path: &Path) -> Option { + self.render_diff( + path, + self.baseline_by_path.get(path).map(String::as_str), + path, + self.current_by_path.get(path).map(String::as_str), + ) + } + + fn render_rename_diff(&self, source_path: &Path, dest_path: &Path) -> Option { + self.render_diff( + source_path, + self.baseline_by_path.get(source_path).map(String::as_str), + dest_path, + self.current_by_path.get(dest_path).map(String::as_str), + ) + } + + fn render_diff( + &self, + left_path: &Path, + left_content: Option<&str>, + right_path: &Path, + right_content: Option<&str>, + ) -> Option { + if left_content == right_content { + return None; } - let left_text = left_bytes.and_then(|b| std::str::from_utf8(b).ok()); - let right_text = right_bytes - .as_deref() - .and_then(|b| std::str::from_utf8(b).ok()); - - let can_text_diff = matches!( - (left_text, right_text, is_add, is_delete), - (Some(_), Some(_), _, _) | (_, Some(_), true, _) | (Some(_), _, _, true) + let left_display = self.display_path(left_path); + let right_display = self.display_path(right_path); + let left_oid = left_content.map_or_else( + || ZERO_OID.to_string(), + |content| git_blob_oid(content.as_bytes()), + ); + let right_oid = right_content.map_or_else( + || ZERO_OID.to_string(), + |content| git_blob_oid(content.as_bytes()), ); - if can_text_diff { - let l = left_text.unwrap_or(""); - let r = right_text.unwrap_or(""); + let mut diff = format!("diff --git a/{left_display} b/{right_display}\n"); + match (left_content, right_content) { + (None, Some(_)) => diff.push_str(&format!("new file mode {REGULAR_FILE_MODE}\n")), + (Some(_), None) => diff.push_str(&format!("deleted file mode {REGULAR_FILE_MODE}\n")), + (Some(_), Some(_)) => {} + (None, None) => return None, + } - aggregated.push_str(&format!("index {left_oid}..{right_oid}\n")); + diff.push_str(&format!("index {left_oid}..{right_oid}\n")); - let old_header = if left_present { - format!("a/{left_display}") - } else { - DEV_NULL.to_string() - }; - let new_header = if right_bytes.is_some() { - format!("b/{right_display}") - } else { - DEV_NULL.to_string() - }; + let old_header = if left_content.is_some() { + format!("a/{left_display}") + } else { + DEV_NULL.to_string() + }; + let new_header = if right_content.is_some() { + format!("b/{right_display}") + } else { + DEV_NULL.to_string() + }; - let diff = similar::TextDiff::from_lines(l, r); - let unified = diff + let unified = + similar::TextDiff::from_lines(left_content.unwrap_or(""), right_content.unwrap_or("")) .unified_diff() .context_radius(3) .header(&old_header, &new_header) .to_string(); - - aggregated.push_str(&unified); - } else { - aggregated.push_str(&format!("index {left_oid}..{right_oid}\n")); - let old_header = if left_present { - format!("a/{left_display}") - } else { - DEV_NULL.to_string() - }; - let new_header = if right_bytes.is_some() { - format!("b/{right_display}") - } else { - DEV_NULL.to_string() - }; - aggregated.push_str(&format!("--- {old_header}\n")); - aggregated.push_str(&format!("+++ {new_header}\n")); - aggregated.push_str("Binary files differ\n"); - } - aggregated + diff.push_str(&unified); + Some(diff) } + + fn display_path(&self, path: &Path) -> String { + let display = self + .display_root + .as_deref() + .and_then(|root| path.strip_prefix(root).ok()) + .unwrap_or(path); + display.display().to_string().replace('\\', "/") + } +} + +fn git_blob_oid(data: &[u8]) -> String { + format!("{:x}", git_blob_sha1_hex_bytes(data)) } /// Compute the Git SHA-1 blob object ID for the given content (bytes). fn git_blob_sha1_hex_bytes(data: &[u8]) -> Output { - // Git blob hash is sha1 of: "blob \0" let header = format!("blob {}\0", data.len()); use sha1::Digest; let mut hasher = sha1::Sha1::new(); @@ -379,91 +310,6 @@ fn git_blob_sha1_hex_bytes(data: &[u8]) -> Output { hasher.finalize() } -#[derive(Clone, Copy, Debug, PartialEq, Eq)] -enum FileMode { - Regular, - #[cfg(unix)] - Executable, - Symlink, -} - -impl FileMode { - fn as_str(self) -> &'static str { - match self { - FileMode::Regular => "100644", - #[cfg(unix)] - FileMode::Executable => "100755", - FileMode::Symlink => "120000", - } - } -} - -impl std::fmt::Display for FileMode { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.write_str(self.as_str()) - } -} - -#[cfg(unix)] -fn file_mode_for_path(path: &Path) -> Option { - use std::os::unix::fs::PermissionsExt; - let meta = fs::symlink_metadata(path).ok()?; - let ft = meta.file_type(); - if ft.is_symlink() { - return Some(FileMode::Symlink); - } - let mode = meta.permissions().mode(); - let is_exec = (mode & 0o111) != 0; - Some(if is_exec { - FileMode::Executable - } else { - FileMode::Regular - }) -} - -#[cfg(not(unix))] -fn file_mode_for_path(_path: &Path) -> Option { - // Default to non-executable on non-unix. - Some(FileMode::Regular) -} - -fn blob_bytes(path: &Path, mode: FileMode) -> Option> { - if path.exists() { - let contents = if mode == FileMode::Symlink { - symlink_blob_bytes(path) - .ok_or_else(|| anyhow!("failed to read symlink target for {}", path.display())) - } else { - fs::read(path) - .with_context(|| format!("failed to read current file for diff {}", path.display())) - }; - contents.ok() - } else { - None - } -} - -#[cfg(unix)] -fn symlink_blob_bytes(path: &Path) -> Option> { - use std::os::unix::ffi::OsStrExt; - let target = std::fs::read_link(path).ok()?; - Some(target.as_os_str().as_bytes().to_vec()) -} - -#[cfg(not(unix))] -fn symlink_blob_bytes(_path: &Path) -> Option> { - None -} - -#[cfg(windows)] -fn is_windows_drive_or_unc_root(p: &std::path::Path) -> bool { - use std::path::Component; - let mut comps = p.components(); - matches!( - (comps.next(), comps.next(), comps.next()), - (Some(Component::Prefix(_)), Some(Component::RootDir), None) - ) -} - #[cfg(test)] #[path = "turn_diff_tracker_tests.rs"] mod tests; diff --git a/codex-rs/core/src/turn_diff_tracker_tests.rs b/codex-rs/core/src/turn_diff_tracker_tests.rs index e0ab2dd667..ed5da70634 100644 --- a/codex-rs/core/src/turn_diff_tracker_tests.rs +++ b/codex-rs/core/src/turn_diff_tracker_tests.rs @@ -1,427 +1,330 @@ use super::*; +use codex_apply_patch::AppliedPatchDelta; +use codex_apply_patch::MaybeApplyPatchVerified; +use codex_exec_server::LOCAL_FS; +use codex_utils_absolute_path::AbsolutePathBuf; use pretty_assertions::assert_eq; +use std::fs; +use std::path::Path; use tempfile::tempdir; -/// Compute the Git SHA-1 blob object ID for the given content (string). -/// This delegates to the bytes version to avoid UTF-8 lossy conversions here. fn git_blob_sha1_hex(data: &str) -> String { format!("{:x}", git_blob_sha1_hex_bytes(data.as_bytes())) } -fn normalize_diff_for_test(input: &str, root: &Path) -> String { - let root_str = root.display().to_string().replace('\\', "/"); - let replaced = input.replace(&root_str, ""); - // Split into blocks on lines starting with "diff --git ", sort blocks for determinism, and rejoin - let mut blocks: Vec = Vec::new(); - let mut current = String::new(); - for line in replaced.lines() { - if line.starts_with("diff --git ") && !current.is_empty() { - blocks.push(current); - current = String::new(); - } - if !current.is_empty() { - current.push('\n'); - } - current.push_str(line); +async fn apply_verified_patch(root: &Path, patch: &str) -> AppliedPatchDelta { + let cwd = AbsolutePathBuf::from_absolute_path(root).expect("absolute tempdir path"); + let argv = vec!["apply_patch".to_string(), patch.to_string()]; + match codex_apply_patch::maybe_parse_apply_patch_verified( + &argv, + &cwd, + LOCAL_FS.as_ref(), + /*sandbox*/ None, + ) + .await + { + MaybeApplyPatchVerified::Body(_) => {} + other => panic!("expected verified patch action, got {other:?}"), } - if !current.is_empty() { - blocks.push(current); - } - blocks.sort(); - let mut out = blocks.join("\n"); - if !out.ends_with('\n') { - out.push('\n'); - } - out + + let mut stdout = Vec::new(); + let mut stderr = Vec::new(); + codex_apply_patch::apply_patch( + patch, + &cwd, + &mut stdout, + &mut stderr, + LOCAL_FS.as_ref(), + /*sandbox*/ None, + ) + .await + .expect("patch should apply") } -#[test] -fn accumulates_add_and_update() { - let mut acc = TurnDiffTracker::new(); +#[tokio::test] +async fn accumulates_add_then_update_as_single_add() { + let dir = tempdir().expect("tempdir"); + let mut tracker = TurnDiffTracker::with_display_root(dir.path().to_path_buf()); - let dir = tempdir().unwrap(); - let file = dir.path().join("a.txt"); + let add = apply_verified_patch( + dir.path(), + "*** Begin Patch\n*** Add File: a.txt\n+foo\n*** End Patch", + ) + .await; + tracker.track_successful_patch(&add); - // First patch: add file (baseline should be /dev/null). - let add_changes = HashMap::from([( - file.clone(), - FileChange::Add { - content: "foo\n".to_string(), - }, - )]); - acc.on_patch_begin(&add_changes); + let update = apply_verified_patch( + dir.path(), + "*** Begin Patch\n*** Update File: a.txt\n@@\n foo\n+bar\n*** End Patch", + ) + .await; + tracker.track_successful_patch(&update); - // Simulate apply: create the file on disk. - fs::write(&file, "foo\n").unwrap(); - let first = acc.get_unified_diff().unwrap().unwrap(); - let first = normalize_diff_for_test(&first, dir.path()); - let expected_first = { - let mode = file_mode_for_path(&file).unwrap_or(FileMode::Regular); - let right_oid = git_blob_sha1_hex("foo\n"); - format!( - r#"diff --git a//a.txt b//a.txt -new file mode {mode} + let right_oid = git_blob_sha1_hex("foo\nbar\n"); + let expected = format!( + r#"diff --git a/a.txt b/a.txt +new file mode {REGULAR_FILE_MODE} index {ZERO_OID}..{right_oid} --- {DEV_NULL} -+++ b//a.txt -@@ -0,0 +1 @@ -+foo -"#, - ) - }; - assert_eq!(first, expected_first); - - // Second patch: update the file on disk. - let update_changes = HashMap::from([( - file.clone(), - FileChange::Update { - unified_diff: "".to_owned(), - move_path: None, - }, - )]); - acc.on_patch_begin(&update_changes); - - // Simulate apply: append a new line. - fs::write(&file, "foo\nbar\n").unwrap(); - let combined = acc.get_unified_diff().unwrap().unwrap(); - let combined = normalize_diff_for_test(&combined, dir.path()); - let expected_combined = { - let mode = file_mode_for_path(&file).unwrap_or(FileMode::Regular); - let right_oid = git_blob_sha1_hex("foo\nbar\n"); - format!( - r#"diff --git a//a.txt b//a.txt -new file mode {mode} -index {ZERO_OID}..{right_oid} ---- {DEV_NULL} -+++ b//a.txt ++++ b/a.txt @@ -0,0 +1,2 @@ +foo +bar "#, - ) - }; - assert_eq!(combined, expected_combined); + ); + assert_eq!(tracker.get_unified_diff(), Some(expected)); } -#[test] -fn accumulates_delete() { - let dir = tempdir().unwrap(); - let file = dir.path().join("b.txt"); - fs::write(&file, "x\n").unwrap(); +#[tokio::test] +async fn invalidated_tracker_suppresses_existing_diff() { + let dir = tempdir().expect("tempdir"); + let mut tracker = TurnDiffTracker::with_display_root(dir.path().to_path_buf()); - let mut acc = TurnDiffTracker::new(); - let del_changes = HashMap::from([( - file.clone(), - FileChange::Delete { - content: "x\n".to_string(), - }, - )]); - acc.on_patch_begin(&del_changes); + let add = apply_verified_patch( + dir.path(), + "*** Begin Patch\n*** Add File: a.txt\n+foo\n*** End Patch", + ) + .await; + tracker.track_successful_patch(&add); - // Simulate apply: delete the file from disk. - let baseline_mode = file_mode_for_path(&file).unwrap_or(FileMode::Regular); - fs::remove_file(&file).unwrap(); - let diff = acc.get_unified_diff().unwrap().unwrap(); - let diff = normalize_diff_for_test(&diff, dir.path()); - let expected = { - let left_oid = git_blob_sha1_hex("x\n"); - format!( - r#"diff --git a//b.txt b//b.txt -deleted file mode {baseline_mode} + tracker.invalidate(); + + assert_eq!(tracker.get_unified_diff(), None); +} + +#[tokio::test] +async fn accumulates_delete() { + let dir = tempdir().expect("tempdir"); + fs::write(dir.path().join("b.txt"), "x\n").expect("seed file"); + + let mut tracker = TurnDiffTracker::with_display_root(dir.path().to_path_buf()); + let delete = apply_verified_patch( + dir.path(), + "*** Begin Patch\n*** Delete File: b.txt\n*** End Patch", + ) + .await; + tracker.track_successful_patch(&delete); + + let left_oid = git_blob_sha1_hex("x\n"); + let expected = format!( + r#"diff --git a/b.txt b/b.txt +deleted file mode {REGULAR_FILE_MODE} index {left_oid}..{ZERO_OID} ---- a//b.txt +--- a/b.txt +++ {DEV_NULL} @@ -1 +0,0 @@ -x "#, - ) - }; - assert_eq!(diff, expected); + ); + assert_eq!(tracker.get_unified_diff(), Some(expected)); } -#[test] -fn accumulates_move_and_update() { - let dir = tempdir().unwrap(); - let src = dir.path().join("src.txt"); - let dest = dir.path().join("dst.txt"); - fs::write(&src, "line\n").unwrap(); +#[tokio::test] +async fn accumulates_move_and_update() { + let dir = tempdir().expect("tempdir"); + fs::write(dir.path().join("src.txt"), "line\n").expect("seed file"); - let mut acc = TurnDiffTracker::new(); - let mv_changes = HashMap::from([( - src.clone(), - FileChange::Update { - unified_diff: "".to_owned(), - move_path: Some(dest.clone()), - }, - )]); - acc.on_patch_begin(&mv_changes); + let mut tracker = TurnDiffTracker::with_display_root(dir.path().to_path_buf()); + let update = apply_verified_patch( + dir.path(), + "*** Begin Patch\n*** Update File: src.txt\n*** Move to: dst.txt\n@@\n-line\n+line2\n*** End Patch", + ) + .await; + tracker.track_successful_patch(&update); - // Simulate apply: move and update content. - fs::rename(&src, &dest).unwrap(); - fs::write(&dest, "line2\n").unwrap(); - - let out = acc.get_unified_diff().unwrap().unwrap(); - let out = normalize_diff_for_test(&out, dir.path()); - let expected = { - let left_oid = git_blob_sha1_hex("line\n"); - let right_oid = git_blob_sha1_hex("line2\n"); - format!( - r#"diff --git a//src.txt b//dst.txt + let left_oid = git_blob_sha1_hex("line\n"); + let right_oid = git_blob_sha1_hex("line2\n"); + let expected = format!( + r#"diff --git a/src.txt b/dst.txt index {left_oid}..{right_oid} ---- a//src.txt -+++ b//dst.txt +--- a/src.txt ++++ b/dst.txt @@ -1 +1 @@ -line +line2 -"# - ) - }; - assert_eq!(out, expected); -} - -#[test] -fn move_without_1change_yields_no_diff() { - let dir = tempdir().unwrap(); - let src = dir.path().join("moved.txt"); - let dest = dir.path().join("renamed.txt"); - fs::write(&src, "same\n").unwrap(); - - let mut acc = TurnDiffTracker::new(); - let mv_changes = HashMap::from([( - src.clone(), - FileChange::Update { - unified_diff: "".to_owned(), - move_path: Some(dest.clone()), - }, - )]); - acc.on_patch_begin(&mv_changes); - - // Simulate apply: move only, no content change. - fs::rename(&src, &dest).unwrap(); - - let diff = acc.get_unified_diff().unwrap(); - assert_eq!(diff, None); -} - -#[test] -fn move_declared_but_file_only_appears_at_dest_is_add() { - let dir = tempdir().unwrap(); - let src = dir.path().join("src.txt"); - let dest = dir.path().join("dest.txt"); - let mut acc = TurnDiffTracker::new(); - let mv = HashMap::from([( - src, - FileChange::Update { - unified_diff: "".into(), - move_path: Some(dest.clone()), - }, - )]); - acc.on_patch_begin(&mv); - // No file existed initially; create only dest - fs::write(&dest, "hello\n").unwrap(); - let diff = acc.get_unified_diff().unwrap().unwrap(); - let diff = normalize_diff_for_test(&diff, dir.path()); - let expected = { - let mode = file_mode_for_path(&dest).unwrap_or(FileMode::Regular); - let right_oid = git_blob_sha1_hex("hello\n"); - format!( - r#"diff --git a//src.txt b//dest.txt -new file mode {mode} -index {ZERO_OID}..{right_oid} ---- {DEV_NULL} -+++ b//dest.txt -@@ -0,0 +1 @@ -+hello "#, - ) - }; - assert_eq!(diff, expected); + ); + assert_eq!(tracker.get_unified_diff(), Some(expected)); } -#[test] -fn update_persists_across_new_baseline_for_new_file() { - let dir = tempdir().unwrap(); - let a = dir.path().join("a.txt"); - let b = dir.path().join("b.txt"); - fs::write(&a, "foo\n").unwrap(); - fs::write(&b, "z\n").unwrap(); +#[tokio::test] +async fn pure_rename_yields_no_diff() { + let dir = tempdir().expect("tempdir"); + fs::write(dir.path().join("old.txt"), "same\n").expect("seed file"); - let mut acc = TurnDiffTracker::new(); + let mut tracker = TurnDiffTracker::with_display_root(dir.path().to_path_buf()); + let rename = apply_verified_patch( + dir.path(), + "*** Begin Patch\n*** Update File: old.txt\n*** Move to: new.txt\n@@\n same\n*** End Patch", + ) + .await; + tracker.track_successful_patch(&rename); - // First: update existing a.txt (baseline snapshot is created for a). - let update_a = HashMap::from([( - a.clone(), - FileChange::Update { - unified_diff: "".to_owned(), - move_path: None, - }, - )]); - acc.on_patch_begin(&update_a); - // Simulate apply: modify a.txt on disk. - fs::write(&a, "foo\nbar\n").unwrap(); - let first = acc.get_unified_diff().unwrap().unwrap(); - let first = normalize_diff_for_test(&first, dir.path()); - let expected_first = { - let left_oid = git_blob_sha1_hex("foo\n"); - let right_oid = git_blob_sha1_hex("foo\nbar\n"); - format!( - r#"diff --git a//a.txt b//a.txt + assert_eq!(tracker.get_unified_diff(), None); +} + +#[tokio::test] +async fn add_over_existing_file_becomes_update() { + let dir = tempdir().expect("tempdir"); + fs::write(dir.path().join("dup.txt"), "before\n").expect("seed file"); + + let mut tracker = TurnDiffTracker::with_display_root(dir.path().to_path_buf()); + let add = apply_verified_patch( + dir.path(), + "*** Begin Patch\n*** Add File: dup.txt\n+after\n*** End Patch", + ) + .await; + tracker.track_successful_patch(&add); + + let left_oid = git_blob_sha1_hex("before\n"); + let right_oid = git_blob_sha1_hex("after\n"); + let expected = format!( + r#"diff --git a/dup.txt b/dup.txt index {left_oid}..{right_oid} ---- a//a.txt -+++ b//a.txt -@@ -1 +1,2 @@ - foo -+bar -"# - ) - }; - assert_eq!(first, expected_first); +--- a/dup.txt ++++ b/dup.txt +@@ -1 +1 @@ +-before ++after +"#, + ); + assert_eq!(tracker.get_unified_diff(), Some(expected)); +} - // Next: introduce a brand-new path b.txt into baseline snapshots via a delete change. - let del_b = HashMap::from([( - b.clone(), - FileChange::Delete { - content: "z\n".to_string(), - }, - )]); - acc.on_patch_begin(&del_b); - // Simulate apply: delete b.txt. - let baseline_mode = file_mode_for_path(&b).unwrap_or(FileMode::Regular); - fs::remove_file(&b).unwrap(); +#[tokio::test] +async fn delete_then_readd_same_path_becomes_update() { + let dir = tempdir().expect("tempdir"); + fs::write(dir.path().join("cycle.txt"), "before\n").expect("seed file"); - let combined = acc.get_unified_diff().unwrap().unwrap(); - let combined = normalize_diff_for_test(&combined, dir.path()); - let expected = { - let left_oid_a = git_blob_sha1_hex("foo\n"); - let right_oid_a = git_blob_sha1_hex("foo\nbar\n"); - let left_oid_b = git_blob_sha1_hex("z\n"); - format!( - r#"diff --git a//a.txt b//a.txt -index {left_oid_a}..{right_oid_a} ---- a//a.txt -+++ b//a.txt -@@ -1 +1,2 @@ - foo -+bar -diff --git a//b.txt b//b.txt -deleted file mode {baseline_mode} -index {left_oid_b}..{ZERO_OID} ---- a//b.txt + let mut tracker = TurnDiffTracker::with_display_root(dir.path().to_path_buf()); + let delete = apply_verified_patch( + dir.path(), + "*** Begin Patch\n*** Delete File: cycle.txt\n*** End Patch", + ) + .await; + tracker.track_successful_patch(&delete); + + let add = apply_verified_patch( + dir.path(), + "*** Begin Patch\n*** Add File: cycle.txt\n+after\n*** End Patch", + ) + .await; + tracker.track_successful_patch(&add); + + let left_oid = git_blob_sha1_hex("before\n"); + let right_oid = git_blob_sha1_hex("after\n"); + let expected = format!( + r#"diff --git a/cycle.txt b/cycle.txt +index {left_oid}..{right_oid} +--- a/cycle.txt ++++ b/cycle.txt +@@ -1 +1 @@ +-before ++after +"#, + ); + assert_eq!(tracker.get_unified_diff(), Some(expected)); +} + +#[tokio::test] +async fn move_over_existing_destination_without_content_change_deletes_source_only() { + let dir = tempdir().expect("tempdir"); + fs::write(dir.path().join("a.txt"), "same\n").expect("seed source"); + fs::write(dir.path().join("b.txt"), "same\n").expect("seed destination"); + + let mut tracker = TurnDiffTracker::with_display_root(dir.path().to_path_buf()); + let move_overwrite = apply_verified_patch( + dir.path(), + "*** Begin Patch\n*** Update File: a.txt\n*** Move to: b.txt\n@@\n same\n*** End Patch", + ) + .await; + tracker.track_successful_patch(&move_overwrite); + + let left_oid = git_blob_sha1_hex("same\n"); + let expected = format!( + r#"diff --git a/a.txt b/a.txt +deleted file mode {REGULAR_FILE_MODE} +index {left_oid}..{ZERO_OID} +--- a/a.txt +++ {DEV_NULL} @@ -1 +0,0 @@ --z +-same "#, - ) - }; - assert_eq!(combined, expected); + ); + assert_eq!(tracker.get_unified_diff(), Some(expected)); } -#[test] -fn binary_files_differ_update() { - let dir = tempdir().unwrap(); - let file = dir.path().join("bin.dat"); +#[tokio::test] +async fn move_over_existing_destination_with_content_change_deletes_source_and_updates_destination() +{ + let dir = tempdir().expect("tempdir"); + fs::write(dir.path().join("a.txt"), "from\n").expect("seed source"); + fs::write(dir.path().join("b.txt"), "existing\n").expect("seed destination"); - // Initial non-UTF8 bytes - let left_bytes: Vec = vec![0xff, 0xfe, 0xfd, 0x00]; - // Updated non-UTF8 bytes - let right_bytes: Vec = vec![0x01, 0x02, 0x03, 0x00]; + let mut tracker = TurnDiffTracker::with_display_root(dir.path().to_path_buf()); + let move_overwrite = apply_verified_patch( + dir.path(), + "*** Begin Patch\n*** Update File: a.txt\n*** Move to: b.txt\n@@\n-from\n+new\n*** End Patch", + ) + .await; + tracker.track_successful_patch(&move_overwrite); - fs::write(&file, &left_bytes).unwrap(); - - let mut acc = TurnDiffTracker::new(); - let update_changes = HashMap::from([( - file.clone(), - FileChange::Update { - unified_diff: "".to_owned(), - move_path: None, - }, - )]); - acc.on_patch_begin(&update_changes); - - // Apply update on disk - fs::write(&file, &right_bytes).unwrap(); - - let diff = acc.get_unified_diff().unwrap().unwrap(); - let diff = normalize_diff_for_test(&diff, dir.path()); - let expected = { - let left_oid = format!("{:x}", git_blob_sha1_hex_bytes(&left_bytes)); - let right_oid = format!("{:x}", git_blob_sha1_hex_bytes(&right_bytes)); - format!( - r#"diff --git a//bin.dat b//bin.dat -index {left_oid}..{right_oid} ---- a//bin.dat -+++ b//bin.dat -Binary files differ -"# - ) - }; - assert_eq!(diff, expected); + let left_oid_a = git_blob_sha1_hex("from\n"); + let left_oid_b = git_blob_sha1_hex("existing\n"); + let right_oid_b = git_blob_sha1_hex("new\n"); + let expected = format!( + r#"diff --git a/a.txt b/a.txt +deleted file mode {REGULAR_FILE_MODE} +index {left_oid_a}..{ZERO_OID} +--- a/a.txt ++++ {DEV_NULL} +@@ -1 +0,0 @@ +-from +diff --git a/b.txt b/b.txt +index {left_oid_b}..{right_oid_b} +--- a/b.txt ++++ b/b.txt +@@ -1 +1 @@ +-existing ++new +"#, + ); + assert_eq!(tracker.get_unified_diff(), Some(expected)); } -#[test] -fn filenames_with_spaces_add_and_update() { - let mut acc = TurnDiffTracker::new(); +#[tokio::test] +async fn preserves_committed_change_order_with_delete_then_move_overwrite() { + let dir = tempdir().expect("tempdir"); + fs::write(dir.path().join("a.txt"), "from\n").expect("seed source"); + fs::write(dir.path().join("b.txt"), "existing\n").expect("seed destination"); - let dir = tempdir().unwrap(); - let file = dir.path().join("name with spaces.txt"); + let mut tracker = TurnDiffTracker::with_display_root(dir.path().to_path_buf()); + let ordered_patch = apply_verified_patch( + dir.path(), + "*** Begin Patch\n*** Delete File: b.txt\n*** Update File: a.txt\n*** Move to: b.txt\n@@\n-from\n+new\n*** End Patch", + ) + .await; + tracker.track_successful_patch(&ordered_patch); - // First patch: add file (baseline should be /dev/null). - let add_changes = HashMap::from([( - file.clone(), - FileChange::Add { - content: "foo\n".to_string(), - }, - )]); - acc.on_patch_begin(&add_changes); - - // Simulate apply: create the file on disk. - fs::write(&file, "foo\n").unwrap(); - let first = acc.get_unified_diff().unwrap().unwrap(); - let first = normalize_diff_for_test(&first, dir.path()); - let expected_first = { - let mode = file_mode_for_path(&file).unwrap_or(FileMode::Regular); - let right_oid = git_blob_sha1_hex("foo\n"); - format!( - r#"diff --git a//name with spaces.txt b//name with spaces.txt -new file mode {mode} -index {ZERO_OID}..{right_oid} ---- {DEV_NULL} -+++ b//name with spaces.txt -@@ -0,0 +1 @@ -+foo + let left_oid_a = git_blob_sha1_hex("from\n"); + let left_oid_b = git_blob_sha1_hex("existing\n"); + let right_oid_b = git_blob_sha1_hex("new\n"); + let expected = format!( + r#"diff --git a/a.txt b/a.txt +deleted file mode {REGULAR_FILE_MODE} +index {left_oid_a}..{ZERO_OID} +--- a/a.txt ++++ {DEV_NULL} +@@ -1 +0,0 @@ +-from +diff --git a/b.txt b/b.txt +index {left_oid_b}..{right_oid_b} +--- a/b.txt ++++ b/b.txt +@@ -1 +1 @@ +-existing ++new "#, - ) - }; - assert_eq!(first, expected_first); - - // Second patch: update the file on disk. - let update_changes = HashMap::from([( - file.clone(), - FileChange::Update { - unified_diff: "".to_owned(), - move_path: None, - }, - )]); - acc.on_patch_begin(&update_changes); - - // Simulate apply: append a new line with a space. - fs::write(&file, "foo\nbar baz\n").unwrap(); - let combined = acc.get_unified_diff().unwrap().unwrap(); - let combined = normalize_diff_for_test(&combined, dir.path()); - let expected_combined = { - let mode = file_mode_for_path(&file).unwrap_or(FileMode::Regular); - let right_oid = git_blob_sha1_hex("foo\nbar baz\n"); - format!( - r#"diff --git a//name with spaces.txt b//name with spaces.txt -new file mode {mode} -index {ZERO_OID}..{right_oid} ---- {DEV_NULL} -+++ b//name with spaces.txt -@@ -0,0 +1,2 @@ -+foo -+bar baz -"#, - ) - }; - assert_eq!(combined, expected_combined); + ); + assert_eq!(tracker.get_unified_diff(), Some(expected)); } diff --git a/codex-rs/core/src/unified_exec/async_watcher.rs b/codex-rs/core/src/unified_exec/async_watcher.rs index b4c7c9c8b4..33dbf843dc 100644 --- a/codex-rs/core/src/unified_exec/async_watcher.rs +++ b/codex-rs/core/src/unified_exec/async_watcher.rs @@ -226,7 +226,13 @@ pub(crate) async fn emit_exec_end_for_unified_exec( process_id, ); emitter - .emit(event_ctx, ToolEventStage::Success(output)) + .emit( + event_ctx, + ToolEventStage::Success { + output, + applied_patch_delta: None, + }, + ) .await; } diff --git a/codex-rs/core/tests/suite/apply_patch_cli.rs b/codex-rs/core/tests/suite/apply_patch_cli.rs index f08dfd5f0e..e52bca8fca 100644 --- a/codex-rs/core/tests/suite/apply_patch_cli.rs +++ b/codex-rs/core/tests/suite/apply_patch_cli.rs @@ -12,6 +12,7 @@ use std::sync::atomic::AtomicI32; use std::sync::atomic::Ordering; use std::time::Duration; +use codex_exec_server::CreateDirectoryOptions; use codex_features::Feature; use codex_protocol::models::PermissionProfile; use codex_protocol::permissions::NetworkSandboxPolicy; @@ -377,10 +378,6 @@ async fn apply_patch_cli_move_without_content_change_has_no_turn_diff( model_output: ApplyPatchModelOutput, ) -> Result<()> { skip_if_no_network!(Ok(())); - skip_if_remote!( - Ok(()), - "TurnDiffTracker currently reads the test-runner filesystem, not the remote executor filesystem", - ); let harness = apply_patch_harness().await?; let test = harness.test(); @@ -1050,10 +1047,6 @@ async fn apply_patch_custom_tool_streaming_emits_updated_changes() -> Result<()> #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn apply_patch_shell_command_heredoc_with_cd_emits_turn_diff() -> Result<()> { skip_if_no_network!(Ok(())); - skip_if_remote!( - Ok(()), - "TurnDiffTracker currently reads the test-runner filesystem, not the remote executor filesystem", - ); let harness = apply_patch_harness_with(|builder| builder.with_model("gpt-5.4")).await?; let test = harness.test(); @@ -1113,13 +1106,88 @@ async fn apply_patch_shell_command_heredoc_with_cd_emits_turn_diff() -> Result<( Ok(()) } +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn apply_patch_turn_diff_paths_stay_repo_relative_when_session_cwd_is_nested() -> Result<()> { + skip_if_no_network!(Ok(())); + + let harness = apply_patch_harness_with(|builder| { + builder + .with_model("gpt-5.4") + .with_config(|config| { + config.cwd = config.cwd.join("subdir"); + }) + .with_workspace_setup(|cwd, fs| async move { + fs.create_directory( + &cwd, + CreateDirectoryOptions { recursive: true }, + /*sandbox*/ None, + ) + .await?; + let repo_root = cwd.parent().expect("nested cwd should have parent"); + fs.write_file( + &repo_root.join(".git"), + b"gitdir: /tmp/fake-worktree\n".to_vec(), + /*sandbox*/ None, + ) + .await?; + fs.write_file( + &repo_root.join("repo.txt"), + b"before\n".to_vec(), + /*sandbox*/ None, + ) + .await?; + Ok(()) + }) + }) + .await?; + let test = harness.test(); + let codex = test.codex.clone(); + let repo_root = harness + .test() + .config + .cwd + .parent() + .expect("nested cwd should have parent"); + + let call_id = "apply-nested-cwd-repo-relative"; + let patch = "*** Begin Patch\n*** Update File: ../repo.txt\n@@\n-before\n+after\n*** End Patch"; + mount_apply_patch( + &harness, + call_id, + patch, + "updated repo-relative path", + ApplyPatchModelOutput::Function, + ) + .await; + + submit_without_wait(&harness, "update file outside nested cwd but inside repo").await?; + + let mut last_diff: Option = None; + wait_for_event(&codex, |event| match event { + EventMsg::TurnDiff(ev) => { + last_diff = Some(ev.unified_diff.clone()); + false + } + EventMsg::TurnComplete(_) => true, + _ => false, + }) + .await; + + let diff = last_diff.expect("expected TurnDiff event after update"); + assert!( + diff.contains("diff --git a/repo.txt b/repo.txt"), + "diff should stay repo-relative: {diff:?}" + ); + assert!( + !diff.contains(repo_root.as_path().to_string_lossy().as_ref()), + "diff should not leak absolute repo paths: {diff:?}" + ); + Ok(()) +} + #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn apply_patch_shell_command_failure_propagates_error_and_skips_diff() -> Result<()> { skip_if_no_network!(Ok(())); - skip_if_remote!( - Ok(()), - "TurnDiffTracker currently reads the test-runner filesystem, not the remote executor filesystem", - ); let harness = apply_patch_harness_with(|builder| builder.with_model("gpt-5.4")).await?; let test = harness.test(); @@ -1265,10 +1333,6 @@ async fn apply_patch_emits_turn_diff_event_with_unified_diff( model_output: ApplyPatchModelOutput, ) -> Result<()> { skip_if_no_network!(Ok(())); - skip_if_remote!( - Ok(()), - "TurnDiffTracker currently reads the test-runner filesystem, not the remote executor filesystem", - ); let harness = apply_patch_harness().await?; let test = harness.test(); @@ -1300,64 +1364,9 @@ async fn apply_patch_emits_turn_diff_event_with_unified_diff( Ok(()) } -#[tokio::test(flavor = "multi_thread", worker_threads = 2)] -#[test_case(ApplyPatchModelOutput::Freeform)] -#[test_case(ApplyPatchModelOutput::Function)] -#[test_case(ApplyPatchModelOutput::Shell)] -#[test_case(ApplyPatchModelOutput::ShellViaHeredoc)] -#[test_case(ApplyPatchModelOutput::ShellCommandViaHeredoc)] -async fn apply_patch_turn_diff_for_rename_with_content_change( - model_output: ApplyPatchModelOutput, -) -> Result<()> { - skip_if_no_network!(Ok(())); - skip_if_remote!( - Ok(()), - "TurnDiffTracker currently reads the test-runner filesystem, not the remote executor filesystem", - ); - - let harness = apply_patch_harness().await?; - let test = harness.test(); - let codex = test.codex.clone(); - - // Seed original file - harness.write_file("old.txt", "old\n").await?; - - // Patch: update + move - let call_id = "apply-rename-change"; - let patch = "*** Begin Patch\n*** Update File: old.txt\n*** Move to: new.txt\n@@\n-old\n+new\n*** End Patch"; - mount_apply_patch(&harness, call_id, patch, "ok", model_output).await; - - submit_without_wait(&harness, "rename with change").await?; - - let mut last_diff: Option = None; - wait_for_event(&codex, |event| match event { - EventMsg::TurnDiff(ev) => { - last_diff = Some(ev.unified_diff.clone()); - false - } - EventMsg::TurnComplete(_) => true, - _ => false, - }) - .await; - - let diff = last_diff.expect("expected TurnDiff event after rename"); - // Basic checks: shows old -> new, and the content delta - assert!(diff.contains("old.txt"), "diff missing old path: {diff:?}"); - assert!(diff.contains("new.txt"), "diff missing new path: {diff:?}"); - assert!(diff.contains("--- a/"), "missing old header"); - assert!(diff.contains("+++ b/"), "missing new header"); - assert!(diff.contains("-old\n"), "missing removal line"); - assert!(diff.contains("+new\n"), "missing addition line"); - Ok(()) -} - #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn apply_patch_aggregates_diff_across_multiple_tool_calls() -> Result<()> { skip_if_no_network!(Ok(())); - skip_if_remote!( - Ok(()), - "TurnDiffTracker currently reads the test-runner filesystem, not the remote executor filesystem", - ); let harness = apply_patch_harness().await?; let test = harness.test(); @@ -1408,10 +1417,6 @@ async fn apply_patch_aggregates_diff_across_multiple_tool_calls() -> Result<()> #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn apply_patch_aggregates_diff_preserves_success_after_failure() -> Result<()> { skip_if_no_network!(Ok(())); - skip_if_remote!( - Ok(()), - "TurnDiffTracker currently reads the test-runner filesystem, not the remote executor filesystem", - ); let harness = apply_patch_harness().await?; let test = harness.test(); @@ -1482,6 +1487,75 @@ async fn apply_patch_aggregates_diff_preserves_success_after_failure() -> Result Ok(()) } +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn apply_patch_clears_aggregated_diff_after_inexact_delta() -> Result<()> { + skip_if_no_network!(Ok(())); + + let harness = apply_patch_harness_with(|builder| { + builder.with_workspace_setup(|cwd, fs| async move { + fs.write_file( + &cwd.join("binary.dat"), + vec![0xff, 0xfe, 0xfd], + /*sandbox*/ None, + ) + .await?; + Ok(()) + }) + }) + .await?; + let test = harness.test(); + let codex = test.codex.clone(); + + let call_success = "agg-success"; + let call_inexact = "agg-inexact"; + let patch_success = "*** Begin Patch\n*** Add File: partial/success.txt\n+ok\n*** End Patch"; + let patch_inexact = "*** Begin Patch\n*** Add File: binary.dat\n+text\n*** End Patch"; + + let responses = vec![ + sse(vec![ + ev_response_created("resp-1"), + ev_apply_patch_function_call(call_success, patch_success), + ev_completed("resp-1"), + ]), + sse(vec![ + ev_response_created("resp-2"), + ev_apply_patch_function_call(call_inexact, patch_inexact), + ev_completed("resp-2"), + ]), + sse(vec![ + ev_assistant_message("msg-1", "done"), + ev_completed("resp-3"), + ]), + ]; + mount_sse_sequence(harness.server(), responses).await; + + submit_without_wait(&harness, "apply patch twice with inexact delta").await?; + + let mut last_diff: Option = None; + wait_for_event_with_timeout( + &codex, + |event| match event { + EventMsg::TurnDiff(ev) => { + last_diff = Some(ev.unified_diff.clone()); + false + } + EventMsg::TurnComplete(_) => true, + _ => false, + }, + Duration::from_secs(30), + ) + .await; + + assert_eq!( + last_diff.as_deref(), + Some(""), + "inexact delta should clear the aggregate diff" + ); + assert_eq!(harness.read_file_text("partial/success.txt").await?, "ok\n"); + assert_eq!(harness.read_file_text("binary.dat").await?, "text\n"); + Ok(()) +} + #[tokio::test(flavor = "multi_thread", worker_threads = 2)] #[test_case(ApplyPatchModelOutput::Freeform)] #[test_case(ApplyPatchModelOutput::Function)]