fix(apply-patch): preserve CRLF line endings on Windows (#7515)

## Summary
This PR is heavily based on #4017, which contains the core logic for the
fix. To reduce the risk, we are first introducing it only on windows. We
can then expand to wsl / other environments as needed, and then tackle
net new files.

## Testing
- [x] added unit tests in apply-patch
- [x] add integration tests to apply_patch_cli.rs

---------

Co-authored-by: Chase Naples <Cnaples79@gmail.com>
This commit is contained in:
Dylan Hurd
2025-12-05 16:43:27 -08:00
committed by GitHub
parent 952d6c9465
commit 6c9c563faf
2 changed files with 244 additions and 8 deletions

View File

@@ -699,13 +699,7 @@ fn derive_new_contents_from_chunks(
}
};
let mut original_lines: Vec<String> = original_contents.split('\n').map(String::from).collect();
// Drop the trailing empty element that results from the final newline so
// that line counts match the behaviour of standard `diff`.
if original_lines.last().is_some_and(String::is_empty) {
original_lines.pop();
}
let original_lines: Vec<String> = build_lines_from_contents(&original_contents);
let replacements = compute_replacements(&original_lines, path, chunks)?;
let new_lines = apply_replacements(original_lines, &replacements);
@@ -713,13 +707,67 @@ fn derive_new_contents_from_chunks(
if !new_lines.last().is_some_and(String::is_empty) {
new_lines.push(String::new());
}
let new_contents = new_lines.join("\n");
let new_contents = build_contents_from_lines(&original_contents, &new_lines);
Ok(AppliedPatch {
original_contents,
new_contents,
})
}
// TODO(dylan-hurd-oai): I think we can migrate to just use `contents.lines()`
// across all platforms.
fn build_lines_from_contents(contents: &str) -> Vec<String> {
if cfg!(windows) {
contents.lines().map(String::from).collect()
} else {
let mut lines: Vec<String> = contents.split('\n').map(String::from).collect();
// Drop the trailing empty element that results from the final newline so
// that line counts match the behaviour of standard `diff`.
if lines.last().is_some_and(String::is_empty) {
lines.pop();
}
lines
}
}
fn build_contents_from_lines(original_contents: &str, lines: &[String]) -> String {
if cfg!(windows) {
// for now, only compute this if we're on Windows.
let uses_crlf = contents_uses_crlf(original_contents);
if uses_crlf {
lines.join("\r\n")
} else {
lines.join("\n")
}
} else {
lines.join("\n")
}
}
/// Detects whether the source file uses Windows CRLF line endings consistently.
/// We only consider a file CRLF-formatted if every newline is part of a
/// CRLF sequence. This avoids rewriting an LF-formatted file that merely
/// contains embedded sequences of "\r\n".
///
/// Returns `true` if the file uses CRLF line endings, `false` otherwise.
fn contents_uses_crlf(contents: &str) -> bool {
let bytes = contents.as_bytes();
let mut n_newlines = 0usize;
let mut n_crlf = 0usize;
for i in 0..bytes.len() {
if bytes[i] == b'\n' {
n_newlines += 1;
if i > 0 && bytes[i - 1] == b'\r' {
n_crlf += 1;
}
}
}
n_newlines > 0 && n_crlf == n_newlines
}
/// Compute a list of replacements needed to transform `original_lines` into the
/// new lines, given the patch `chunks`. Each replacement is returned as
/// `(start_index, old_len, new_lines)`.
@@ -1359,6 +1407,72 @@ PATCH"#,
assert_eq!(contents, "a\nB\nc\nd\nE\nf\ng\n");
}
/// Ensure CRLF line endings are preserved for updated files on Windowsstyle inputs.
#[cfg(windows)]
#[test]
fn test_preserve_crlf_line_endings_on_update() {
let dir = tempdir().unwrap();
let path = dir.path().join("crlf.txt");
// Original file uses CRLF (\r\n) endings.
std::fs::write(&path, b"a\r\nb\r\nc\r\n").unwrap();
// Replace `b` -> `B` and append `d`.
let patch = wrap_patch(&format!(
r#"*** Update File: {}
@@
a
-b
+B
@@
c
+d
*** End of File"#,
path.display()
));
let mut stdout = Vec::new();
let mut stderr = Vec::new();
apply_patch(&patch, &mut stdout, &mut stderr).unwrap();
let out = std::fs::read(&path).unwrap();
// Expect all CRLF endings; count occurrences of CRLF and ensure there are 4 lines.
let content = String::from_utf8_lossy(&out);
assert!(content.contains("\r\n"));
// No bare LF occurrences immediately preceding a non-CR: the text should not contain "a\nb".
assert!(!content.contains("a\nb"));
// Validate exact content sequence with CRLF delimiters.
assert_eq!(content, "a\r\nB\r\nc\r\nd\r\n");
}
/// Ensure CRLF inputs with embedded carriage returns in the content are preserved.
#[cfg(windows)]
#[test]
fn test_preserve_crlf_embedded_carriage_returns_on_append() {
let dir = tempdir().unwrap();
let path = dir.path().join("crlf_cr_content.txt");
// Original file: first line has a literal '\r' in the content before the CRLF terminator.
std::fs::write(&path, b"foo\r\r\nbar\r\n").unwrap();
// Append a new line without modifying existing ones.
let patch = wrap_patch(&format!(
r#"*** Update File: {}
@@
+BAZ
*** End of File"#,
path.display()
));
let mut stdout = Vec::new();
let mut stderr = Vec::new();
apply_patch(&patch, &mut stdout, &mut stderr).unwrap();
let out = std::fs::read(&path).unwrap();
// CRLF endings must be preserved and the extra CR in "foo\r\r" must not be collapsed.
assert_eq!(out.as_slice(), b"foo\r\r\nbar\r\nBAZ\r\n");
}
#[test]
fn test_pure_addition_chunk_followed_by_removal() {
let dir = tempdir().unwrap();
@@ -1544,6 +1658,37 @@ PATCH"#,
assert_eq!(expected, diff);
}
/// For LF-only inputs with a trailing newline ensure that the helper used
/// on Windows-style builds drops the synthetic trailing empty element so
/// replacements behave like standard `diff` line numbering.
#[test]
fn test_derive_new_contents_lf_trailing_newline() {
let dir = tempdir().unwrap();
let path = dir.path().join("lf_trailing_newline.txt");
fs::write(&path, "foo\nbar\n").unwrap();
let patch = wrap_patch(&format!(
r#"*** Update File: {}
@@
foo
-bar
+BAR
"#,
path.display()
));
let patch = parse_patch(&patch).unwrap();
let chunks = match patch.hunks.as_slice() {
[Hunk::UpdateFile { chunks, .. }] => chunks,
_ => panic!("Expected a single UpdateFile hunk"),
};
let AppliedPatch { new_contents, .. } =
derive_new_contents_from_chunks(&path, chunks).unwrap();
assert_eq!(new_contents, "foo\nBAR\n");
}
#[test]
fn test_unified_diff_insert_at_eof() {
// Insert a new line at endoffile.

View File

@@ -1250,3 +1250,94 @@ async fn apply_patch_change_context_disambiguates_target(
assert_eq!(contents, "fn a\nx=10\ny=2\nfn b\nx=11\ny=20\n");
Ok(())
}
/// Ensure that applying a patch can update a CRLF file with unicode characters.
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
#[test_case(ApplyPatchModelOutput::Freeform)]
#[test_case(ApplyPatchModelOutput::Function)]
#[test_case(ApplyPatchModelOutput::Shell)]
#[test_case(ApplyPatchModelOutput::ShellViaHeredoc)]
#[test_case(ApplyPatchModelOutput::ShellCommandViaHeredoc)]
async fn apply_patch_cli_updates_unicode_characters(
model_output: ApplyPatchModelOutput,
) -> Result<()> {
skip_if_no_network!(Ok(()));
let harness = apply_patch_harness().await?;
let target = harness.path("unicode.txt");
fs::write(&target, "first ⚠️\nsecond ❌\nthird 🔥\n")?;
let patch = format!(
r#"*** Begin Patch
*** Update File: {}
@@
first ⚠️
-second ❌
+SECOND ✅
@@
third 🔥
+FOURTH
*** End of File
*** End Patch"#,
target.display()
);
let call_id = "apply-unicode-update";
mount_apply_patch(&harness, call_id, patch.as_str(), "ok", model_output).await;
harness
.submit("update unicode characters via apply_patch CLI")
.await?;
let file_contents = fs::read(&target)?;
let content = String::from_utf8_lossy(&file_contents);
assert_eq!(content, "first ⚠️\nSECOND ✅\nthird 🔥\nFOURTH\n");
Ok(())
}
/// Ensure that applying a patch via the CLI preserves CRLF line endings for
/// Windows-style inputs even when updating the file contents.
#[cfg(windows)]
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
#[test_case(ApplyPatchModelOutput::Freeform)]
#[test_case(ApplyPatchModelOutput::Function)]
#[test_case(ApplyPatchModelOutput::Shell)]
#[test_case(ApplyPatchModelOutput::ShellViaHeredoc)]
#[test_case(ApplyPatchModelOutput::ShellCommandViaHeredoc)]
async fn apply_patch_cli_updates_crlf_file_preserves_line_endings(
model_output: ApplyPatchModelOutput,
) -> Result<()> {
skip_if_no_network!(Ok(()));
let harness = apply_patch_harness().await?;
let target = harness.path("crlf.txt");
fs::write(&target, b"first\r\nsecond\r\nthird\r\n")?;
let patch = format!(
r#"*** Begin Patch
*** Update File: {}
@@
first
-second
+SECOND
@@
third
+FOURTH
*** End of File
*** End Patch"#,
target.display()
);
let call_id = "apply-crlf-update";
mount_apply_patch(&harness, call_id, patch.as_str(), "ok", model_output).await;
harness
.submit("update crlf file via apply_patch CLI")
.await?;
let file_contents = fs::read(&target)?;
let content = String::from_utf8_lossy(&file_contents);
assert!(content.contains("\r\n"));
assert_eq!(content, "first\r\nSECOND\r\nthird\r\nFOURTH\r\n");
Ok(())
}