Files
codex/codex-rs/core/src/turn_diff_tracker.rs
jif-oai 9b6c6f7a01 fix: preserve exact turn diffs after partial apply_patch failures (#21518)
## Why

Follow-up to #21180: turn diffs are operation-backed now, but a failed
`apply_patch` can still leave exact filesystem mutations behind. For
example, a move can write the destination file before failing to remove
the source. Treating the whole call as unknowable then drops a change
that Codex actually knows happened, so the emitted turn diff can drift
from the workspace.

## What changed

-
[`apply-patch`](f55724e027/codex-rs/apply-patch/src/lib.rs (L248-L345))
now returns `ApplyPatchFailure` with the exact committed prefix
accumulated before an error. If a write failure may already have mutated
the target, the delta is marked inexact instead of being reused blindly.
- Move handling now records the destination write before attempting
source removal, so a partially failed move can still report the
destination file that definitely landed
([code](f55724e027/codex-rs/apply-patch/src/lib.rs (L463-L521))).
-
[`ApplyPatchRuntime`](f55724e027/codex-rs/core/src/tools/runtimes/apply_patch.rs (L49-L67))
now accumulates committed deltas across attempts and forwards them even
when the visible tool result is failed or sandbox-denied ([runtime
path](f55724e027/codex-rs/core/src/tools/runtimes/apply_patch.rs (L223-L250)),
[event
path](f55724e027/codex-rs/core/src/tools/events.rs (L215-L225))).
- `TurnDiffTracker` now consumes committed exact deltas rather than only
fully successful patches; exact-empty failures leave the aggregate
unchanged, while inexact deltas still invalidate it.

## Verification

- Added a regression test covering a failed move that still emits the
committed destination diff:
[`apply_patch_failed_move_preserves_committed_destination_diff`](f55724e027/codex-rs/core/tests/suite/apply_patch_cli.rs (L1517-L1586)).
- Kept explicit coverage that an inexact delta clears the aggregate
instead of publishing a guessed diff:
[`apply_patch_clears_aggregated_diff_after_inexact_delta`](f55724e027/codex-rs/core/tests/suite/apply_patch_cli.rs (L1589-L1655)).

---------

Co-authored-by: Codex <noreply@openai.com>
2026-05-07 18:05:45 +02:00

316 lines
10 KiB
Rust

use std::collections::HashMap;
use std::collections::HashSet;
use std::path::Path;
use std::path::PathBuf;
use sha1::digest::Output;
use codex_apply_patch::AppliedPatchChange;
use codex_apply_patch::AppliedPatchDelta;
use codex_apply_patch::AppliedPatchFileChange;
const ZERO_OID: &str = "0000000000000000000000000000000000000000";
const DEV_NULL: &str = "/dev/null";
const REGULAR_FILE_MODE: &str = "100644";
/// Tracks the net text diff for the current turn from committed apply_patch
/// mutations, without rereading the workspace filesystem.
pub struct TurnDiffTracker {
valid: bool,
display_root: Option<PathBuf>,
baseline_by_path: HashMap<PathBuf, String>,
current_by_path: HashMap<PathBuf, String>,
origin_by_current_path: HashMap<PathBuf, PathBuf>,
}
impl Default for TurnDiffTracker {
fn default() -> Self {
Self {
valid: true,
display_root: None,
baseline_by_path: HashMap::new(),
current_by_path: HashMap::new(),
origin_by_current_path: HashMap::new(),
}
}
}
impl TurnDiffTracker {
pub fn new() -> Self {
Self::default()
}
pub fn with_display_root(display_root: PathBuf) -> Self {
let mut tracker = Self::new();
tracker.display_root = Some(display_root);
tracker
}
pub fn track_delta(&mut self, delta: &AppliedPatchDelta) {
if !delta.is_exact() {
self.invalidate();
return;
}
for change in delta.changes() {
self.apply_change(change);
}
}
pub fn invalidate(&mut self) {
self.valid = false;
}
pub fn get_unified_diff(&self) -> Option<String> {
if !self.valid {
return None;
}
let rename_pairs = self.rename_pairs();
let paired_destinations = rename_pairs.values().cloned().collect::<HashSet<_>>();
let mut handled = HashSet::new();
let mut paths = self
.baseline_by_path
.keys()
.chain(self.current_by_path.keys())
.cloned()
.collect::<Vec<_>>();
paths.sort_by_key(|path| self.display_path(path));
paths.dedup();
let mut aggregated = String::new();
for path in paths {
if !handled.insert(path.clone()) {
continue;
}
if paired_destinations.contains(&path) {
continue;
}
let diff = if let Some(dest) = rename_pairs.get(&path) {
handled.insert(dest.clone());
self.render_rename_diff(&path, dest)
} else {
self.render_path_diff(&path)
};
if let Some(diff) = diff {
aggregated.push_str(&diff);
if !aggregated.ends_with('\n') {
aggregated.push('\n');
}
}
}
(!aggregated.is_empty()).then_some(aggregated)
}
fn apply_change(&mut self, change: &AppliedPatchChange) {
let source_path = change.path.as_path();
match &change.change {
AppliedPatchFileChange::Add {
content,
overwritten_content,
} => self.apply_add(source_path, content, overwritten_content.as_deref()),
AppliedPatchFileChange::Delete { content } => self.apply_delete(source_path, content),
AppliedPatchFileChange::Update {
move_path,
old_content,
overwritten_move_content,
new_content,
} => self.apply_update(
source_path,
move_path.as_deref(),
old_content,
overwritten_move_content.as_deref(),
new_content,
),
}
}
fn apply_add(&mut self, path: &Path, content: &str, overwritten_content: Option<&str>) {
self.origin_by_current_path.remove(path);
if !self.current_by_path.contains_key(path)
&& !self.baseline_by_path.contains_key(path)
&& let Some(overwritten_content) = overwritten_content
{
self.baseline_by_path
.insert(path.to_path_buf(), overwritten_content.to_string());
}
self.current_by_path
.insert(path.to_path_buf(), content.to_string());
}
fn apply_delete(&mut self, path: &Path, content: &str) {
if self.current_by_path.remove(path).is_none() && !self.baseline_by_path.contains_key(path)
{
self.baseline_by_path
.insert(path.to_path_buf(), content.to_string());
}
self.origin_by_current_path.remove(path);
}
fn apply_update(
&mut self,
source_path: &Path,
move_path: Option<&Path>,
old_content: &str,
overwritten_move_content: Option<&str>,
new_content: &str,
) {
if !self.current_by_path.contains_key(source_path)
&& !self.baseline_by_path.contains_key(source_path)
{
self.baseline_by_path
.insert(source_path.to_path_buf(), old_content.to_string());
}
match move_path {
Some(dest_path) => {
if !self.current_by_path.contains_key(dest_path)
&& !self.baseline_by_path.contains_key(dest_path)
&& let Some(overwritten_move_content) = overwritten_move_content
{
self.baseline_by_path.insert(
dest_path.to_path_buf(),
overwritten_move_content.to_string(),
);
}
let origin = self
.origin_by_current_path
.remove(source_path)
.unwrap_or_else(|| source_path.to_path_buf());
self.current_by_path.remove(source_path);
self.current_by_path
.insert(dest_path.to_path_buf(), new_content.to_string());
self.origin_by_current_path.remove(dest_path);
if dest_path != origin.as_path() {
self.origin_by_current_path
.insert(dest_path.to_path_buf(), origin);
}
}
None => {
self.current_by_path
.insert(source_path.to_path_buf(), new_content.to_string());
}
}
}
fn rename_pairs(&self) -> HashMap<PathBuf, PathBuf> {
self.origin_by_current_path
.iter()
.filter_map(|(dest_path, origin_path)| {
if dest_path == origin_path
|| self.current_by_path.contains_key(origin_path)
|| !self.current_by_path.contains_key(dest_path)
|| !self.baseline_by_path.contains_key(origin_path)
|| self.baseline_by_path.contains_key(dest_path)
{
return None;
}
Some((origin_path.clone(), dest_path.clone()))
})
.collect()
}
fn render_path_diff(&self, path: &Path) -> Option<String> {
self.render_diff(
path,
self.baseline_by_path.get(path).map(String::as_str),
path,
self.current_by_path.get(path).map(String::as_str),
)
}
fn render_rename_diff(&self, source_path: &Path, dest_path: &Path) -> Option<String> {
self.render_diff(
source_path,
self.baseline_by_path.get(source_path).map(String::as_str),
dest_path,
self.current_by_path.get(dest_path).map(String::as_str),
)
}
fn render_diff(
&self,
left_path: &Path,
left_content: Option<&str>,
right_path: &Path,
right_content: Option<&str>,
) -> Option<String> {
if left_content == right_content {
return None;
}
let left_display = self.display_path(left_path);
let right_display = self.display_path(right_path);
let left_oid = left_content.map_or_else(
|| ZERO_OID.to_string(),
|content| git_blob_oid(content.as_bytes()),
);
let right_oid = right_content.map_or_else(
|| ZERO_OID.to_string(),
|content| git_blob_oid(content.as_bytes()),
);
let mut diff = format!("diff --git a/{left_display} b/{right_display}\n");
match (left_content, right_content) {
(None, Some(_)) => diff.push_str(&format!("new file mode {REGULAR_FILE_MODE}\n")),
(Some(_), None) => diff.push_str(&format!("deleted file mode {REGULAR_FILE_MODE}\n")),
(Some(_), Some(_)) => {}
(None, None) => return None,
}
diff.push_str(&format!("index {left_oid}..{right_oid}\n"));
let old_header = if left_content.is_some() {
format!("a/{left_display}")
} else {
DEV_NULL.to_string()
};
let new_header = if right_content.is_some() {
format!("b/{right_display}")
} else {
DEV_NULL.to_string()
};
let unified =
similar::TextDiff::from_lines(left_content.unwrap_or(""), right_content.unwrap_or(""))
.unified_diff()
.context_radius(3)
.header(&old_header, &new_header)
.to_string();
diff.push_str(&unified);
Some(diff)
}
fn display_path(&self, path: &Path) -> String {
let display = self
.display_root
.as_deref()
.and_then(|root| path.strip_prefix(root).ok())
.unwrap_or(path);
display.display().to_string().replace('\\', "/")
}
}
fn git_blob_oid(data: &[u8]) -> String {
format!("{:x}", git_blob_sha1_hex_bytes(data))
}
/// Compute the Git SHA-1 blob object ID for the given content (bytes).
fn git_blob_sha1_hex_bytes(data: &[u8]) -> Output<sha1::Sha1> {
let header = format!("blob {}\0", data.len());
use sha1::Digest;
let mut hasher = sha1::Sha1::new();
hasher.update(header.as_bytes());
hasher.update(data);
hasher.finalize()
}
#[cfg(test)]
#[path = "turn_diff_tracker_tests.rs"]
mod tests;