mirror of
https://github.com/openai/codex.git
synced 2026-05-25 05:24:37 +00:00
Make apply_patch streaming parser stateful
This commit is contained in:
@@ -2,6 +2,7 @@ mod invocation;
|
||||
mod parser;
|
||||
mod seek_sequence;
|
||||
mod standalone_executable;
|
||||
mod streaming_parser;
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::io;
|
||||
@@ -20,8 +21,8 @@ pub use parser::ParseError;
|
||||
use parser::ParseError::*;
|
||||
pub use parser::UpdateFileChunk;
|
||||
pub use parser::parse_patch;
|
||||
pub use parser::parse_patch_streaming;
|
||||
use similar::TextDiff;
|
||||
pub use streaming_parser::StreamingPatchParser;
|
||||
use thiserror::Error;
|
||||
|
||||
pub use invocation::maybe_parse_apply_patch_verified;
|
||||
|
||||
@@ -31,15 +31,15 @@ use std::path::PathBuf;
|
||||
|
||||
use thiserror::Error;
|
||||
|
||||
const BEGIN_PATCH_MARKER: &str = "*** Begin Patch";
|
||||
const END_PATCH_MARKER: &str = "*** End Patch";
|
||||
const ADD_FILE_MARKER: &str = "*** Add File: ";
|
||||
const DELETE_FILE_MARKER: &str = "*** Delete File: ";
|
||||
const UPDATE_FILE_MARKER: &str = "*** Update File: ";
|
||||
const MOVE_TO_MARKER: &str = "*** Move to: ";
|
||||
const EOF_MARKER: &str = "*** End of File";
|
||||
const CHANGE_CONTEXT_MARKER: &str = "@@ ";
|
||||
const EMPTY_CHANGE_CONTEXT_MARKER: &str = "@@";
|
||||
pub(crate) const BEGIN_PATCH_MARKER: &str = "*** Begin Patch";
|
||||
pub(crate) const END_PATCH_MARKER: &str = "*** End Patch";
|
||||
pub(crate) const ADD_FILE_MARKER: &str = "*** Add File: ";
|
||||
pub(crate) const DELETE_FILE_MARKER: &str = "*** Delete File: ";
|
||||
pub(crate) const UPDATE_FILE_MARKER: &str = "*** Update File: ";
|
||||
pub(crate) const MOVE_TO_MARKER: &str = "*** Move to: ";
|
||||
pub(crate) const EOF_MARKER: &str = "*** End of File";
|
||||
pub(crate) const CHANGE_CONTEXT_MARKER: &str = "@@ ";
|
||||
pub(crate) const EMPTY_CHANGE_CONTEXT_MARKER: &str = "@@";
|
||||
|
||||
/// Currently, the only OpenAI model that knowingly requires lenient parsing is
|
||||
/// gpt-4.1. While we could try to require everyone to pass in a strictness
|
||||
@@ -132,14 +132,6 @@ pub fn parse_patch(patch: &str) -> Result<ApplyPatchArgs, ParseError> {
|
||||
parse_patch_text(patch, mode)
|
||||
}
|
||||
|
||||
/// Parses streamed patch text that may not have reached `*** End Patch` yet.
|
||||
///
|
||||
/// This entry point is for progress reporting only; callers must not use its
|
||||
/// output to apply a patch.
|
||||
pub fn parse_patch_streaming(patch: &str) -> Result<ApplyPatchArgs, ParseError> {
|
||||
parse_patch_text(patch, ParseMode::Streaming)
|
||||
}
|
||||
|
||||
enum ParseMode {
|
||||
/// Parse the patch text argument as is.
|
||||
Strict,
|
||||
@@ -177,12 +169,6 @@ enum ParseMode {
|
||||
/// `<<'EOF'` and ends with `EOF\n`. If so, we strip off these markers,
|
||||
/// trim() the result, and treat what is left as the patch text.
|
||||
Lenient,
|
||||
|
||||
/// Parse partial patch text for progress reporting while the model is
|
||||
/// still streaming tool input. This mode requires a begin marker but does
|
||||
/// not require an end marker, and its output must not be used to apply a
|
||||
/// patch.
|
||||
Streaming,
|
||||
}
|
||||
|
||||
fn parse_patch_text(patch: &str, mode: ParseMode) -> Result<ApplyPatchArgs, ParseError> {
|
||||
@@ -190,15 +176,13 @@ fn parse_patch_text(patch: &str, mode: ParseMode) -> Result<ApplyPatchArgs, Pars
|
||||
let (patch_lines, hunk_lines) = match mode {
|
||||
ParseMode::Strict => check_patch_boundaries_strict(&lines)?,
|
||||
ParseMode::Lenient => check_patch_boundaries_lenient(&lines)?,
|
||||
ParseMode::Streaming => check_patch_boundaries_streaming(&lines)?,
|
||||
};
|
||||
|
||||
let mut hunks: Vec<Hunk> = Vec::new();
|
||||
let mut remaining_lines = hunk_lines;
|
||||
let mut line_number = 2;
|
||||
let allow_incomplete = matches!(mode, ParseMode::Streaming);
|
||||
while !remaining_lines.is_empty() {
|
||||
let (hunk, hunk_lines) = parse_one_hunk(remaining_lines, line_number, allow_incomplete)?;
|
||||
let (hunk, hunk_lines) = parse_one_hunk(remaining_lines, line_number)?;
|
||||
hunks.push(hunk);
|
||||
line_number += hunk_lines;
|
||||
remaining_lines = &remaining_lines[hunk_lines..]
|
||||
@@ -211,25 +195,6 @@ fn parse_patch_text(patch: &str, mode: ParseMode) -> Result<ApplyPatchArgs, Pars
|
||||
})
|
||||
}
|
||||
|
||||
fn check_patch_boundaries_streaming<'a>(
|
||||
original_lines: &'a [&'a str],
|
||||
) -> Result<(&'a [&'a str], &'a [&'a str]), ParseError> {
|
||||
match original_lines {
|
||||
[first, ..] if first.trim() == BEGIN_PATCH_MARKER => {
|
||||
let body_lines = if original_lines
|
||||
.last()
|
||||
.is_some_and(|line| line.trim() == END_PATCH_MARKER)
|
||||
{
|
||||
&original_lines[1..original_lines.len() - 1]
|
||||
} else {
|
||||
&original_lines[1..]
|
||||
};
|
||||
Ok((original_lines, body_lines))
|
||||
}
|
||||
_ => check_patch_boundaries_strict(original_lines),
|
||||
}
|
||||
}
|
||||
|
||||
/// Checks the start and end lines of the patch text for `apply_patch`,
|
||||
/// returning an error if they do not match the expected markers.
|
||||
fn check_patch_boundaries_strict<'a>(
|
||||
@@ -297,15 +262,9 @@ fn check_start_and_end_lines_strict(
|
||||
|
||||
/// Attempts to parse a single hunk from the start of lines.
|
||||
/// Returns the parsed hunk and the number of lines parsed (or a ParseError).
|
||||
fn parse_one_hunk(
|
||||
lines: &[&str],
|
||||
line_number: usize,
|
||||
allow_incomplete: bool,
|
||||
) -> Result<(Hunk, usize), ParseError> {
|
||||
// Be tolerant of case mismatches and extra padding around marker strings.
|
||||
fn parse_one_hunk(lines: &[&str], line_number: usize) -> Result<(Hunk, usize), ParseError> {
|
||||
let first_line = lines[0].trim();
|
||||
if let Some(path) = first_line.strip_prefix(ADD_FILE_MARKER) {
|
||||
// Add File
|
||||
let mut contents = String::new();
|
||||
let mut parsed_lines = 1;
|
||||
for add_line in &lines[1..] {
|
||||
@@ -325,7 +284,6 @@ fn parse_one_hunk(
|
||||
parsed_lines,
|
||||
));
|
||||
} else if let Some(path) = first_line.strip_prefix(DELETE_FILE_MARKER) {
|
||||
// Delete File
|
||||
return Ok((
|
||||
DeleteFile {
|
||||
path: PathBuf::from(path),
|
||||
@@ -333,11 +291,8 @@ fn parse_one_hunk(
|
||||
1,
|
||||
));
|
||||
} else if let Some(path) = first_line.strip_prefix(UPDATE_FILE_MARKER) {
|
||||
// Update File
|
||||
let mut remaining_lines = &lines[1..];
|
||||
let mut parsed_lines = 1;
|
||||
|
||||
// Optional: move file line
|
||||
let move_path = remaining_lines
|
||||
.first()
|
||||
.and_then(|x| x.strip_prefix(MOVE_TO_MARKER));
|
||||
@@ -348,9 +303,7 @@ fn parse_one_hunk(
|
||||
}
|
||||
|
||||
let mut chunks = Vec::new();
|
||||
// NOTE: we need to know to stop once we reach the next special marker header.
|
||||
while !remaining_lines.is_empty() {
|
||||
// Skip over any completely blank lines that may separate chunks.
|
||||
if remaining_lines[0].trim().is_empty() {
|
||||
parsed_lines += 1;
|
||||
remaining_lines = &remaining_lines[1..];
|
||||
@@ -361,22 +314,11 @@ fn parse_one_hunk(
|
||||
break;
|
||||
}
|
||||
|
||||
if allow_incomplete && remaining_lines[0] == "@" {
|
||||
break;
|
||||
}
|
||||
|
||||
let parsed_chunk = parse_update_file_chunk(
|
||||
let (chunk, chunk_lines) = parse_update_file_chunk(
|
||||
remaining_lines,
|
||||
line_number + parsed_lines,
|
||||
chunks.is_empty(),
|
||||
);
|
||||
let (chunk, chunk_lines) = match parsed_chunk {
|
||||
Ok(parsed) => parsed,
|
||||
Err(InvalidHunkError { .. }) if allow_incomplete && !chunks.is_empty() => {
|
||||
break;
|
||||
}
|
||||
Err(err) => return Err(err),
|
||||
};
|
||||
)?;
|
||||
chunks.push(chunk);
|
||||
parsed_lines += chunk_lines;
|
||||
remaining_lines = &remaining_lines[chunk_lines..]
|
||||
@@ -384,7 +326,10 @@ fn parse_one_hunk(
|
||||
|
||||
if chunks.is_empty() {
|
||||
return Err(InvalidHunkError {
|
||||
message: format!("Update file hunk for path '{path}' is empty"),
|
||||
message: format!(
|
||||
"Update file hunk for path '{}' is empty",
|
||||
Path::new(path).display()
|
||||
),
|
||||
line_number,
|
||||
});
|
||||
}
|
||||
@@ -418,8 +363,6 @@ fn parse_update_file_chunk(
|
||||
line_number,
|
||||
});
|
||||
}
|
||||
// If we see an explicit context marker @@ or @@ <context>, consume it; otherwise, optionally
|
||||
// allow treating the chunk as starting directly with diff lines.
|
||||
let (change_context, start_index) = if lines[0] == EMPTY_CHANGE_CONTEXT_MARKER {
|
||||
(None, 1)
|
||||
} else if let Some(context) = lines[0].strip_prefix(CHANGE_CONTEXT_MARKER) {
|
||||
@@ -500,166 +443,6 @@ fn parse_update_file_chunk(
|
||||
Ok((chunk, parsed_lines + start_index))
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_patch_streaming() {
|
||||
assert_eq!(
|
||||
parse_patch_streaming("*** Begin Patch\n*** Add File: src/hello.txt\n+hello\n+wor"),
|
||||
Ok(ApplyPatchArgs {
|
||||
hunks: vec![AddFile {
|
||||
path: PathBuf::from("src/hello.txt"),
|
||||
contents: "hello\nwor\n".to_string(),
|
||||
}],
|
||||
patch: "*** Begin Patch\n*** Add File: src/hello.txt\n+hello\n+wor".to_string(),
|
||||
workdir: None,
|
||||
})
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
parse_patch_streaming(
|
||||
"*** Begin Patch\n*** Update File: src/old.rs\n*** Move to: src/new.rs\n@@\n-old\n+new",
|
||||
),
|
||||
Ok(ApplyPatchArgs {
|
||||
hunks: vec![UpdateFile {
|
||||
path: PathBuf::from("src/old.rs"),
|
||||
move_path: Some(PathBuf::from("src/new.rs")),
|
||||
chunks: vec![UpdateFileChunk {
|
||||
change_context: None,
|
||||
old_lines: vec!["old".to_string()],
|
||||
new_lines: vec!["new".to_string()],
|
||||
is_end_of_file: false,
|
||||
}],
|
||||
}],
|
||||
patch: "*** Begin Patch\n*** Update File: src/old.rs\n*** Move to: src/new.rs\n@@\n-old\n+new".to_string(),
|
||||
workdir: None,
|
||||
})
|
||||
);
|
||||
|
||||
assert!(
|
||||
parse_patch_text(
|
||||
"*** Begin Patch\n*** Delete File: gone.txt",
|
||||
ParseMode::Streaming
|
||||
)
|
||||
.is_ok()
|
||||
);
|
||||
assert!(
|
||||
parse_patch_text(
|
||||
"*** Begin Patch\n*** Delete File: gone.txt",
|
||||
ParseMode::Strict
|
||||
)
|
||||
.is_err()
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
parse_patch_streaming(
|
||||
"*** Begin Patch\n*** Add File: src/one.txt\n+one\n*** Delete File: src/two.txt\n",
|
||||
),
|
||||
Ok(ApplyPatchArgs {
|
||||
hunks: vec![
|
||||
AddFile {
|
||||
path: PathBuf::from("src/one.txt"),
|
||||
contents: "one\n".to_string(),
|
||||
},
|
||||
DeleteFile {
|
||||
path: PathBuf::from("src/two.txt"),
|
||||
},
|
||||
],
|
||||
patch: "*** Begin Patch\n*** Add File: src/one.txt\n+one\n*** Delete File: src/two.txt"
|
||||
.to_string(),
|
||||
workdir: None,
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_patch_streaming_large_patch_by_character() {
|
||||
let patch = "\
|
||||
*** Begin Patch
|
||||
*** Add File: docs/release-notes.md
|
||||
+# Release notes
|
||||
+
|
||||
+## CLI
|
||||
+- Surface apply_patch progress while arguments stream.
|
||||
+- Keep final patch application gated on the completed tool call.
|
||||
+- Include file summaries in the progress event payload.
|
||||
*** Update File: src/config.rs
|
||||
@@ impl Config
|
||||
- pub apply_patch_progress: bool,
|
||||
+ pub stream_apply_patch_progress: bool,
|
||||
pub include_diagnostics: bool,
|
||||
@@ fn default_progress_interval()
|
||||
- Duration::from_millis(500)
|
||||
+ Duration::from_millis(250)
|
||||
*** Delete File: src/legacy_patch_progress.rs
|
||||
*** Update File: crates/cli/src/main.rs
|
||||
*** Move to: crates/cli/src/bin/codex.rs
|
||||
@@ fn run()
|
||||
- let args = Args::parse();
|
||||
- dispatch(args)
|
||||
+ let cli = Cli::parse();
|
||||
+ dispatch(cli)
|
||||
*** Add File: tests/fixtures/apply_patch_progress.json
|
||||
+{
|
||||
+ \"type\": \"apply_patch_progress\",
|
||||
+ \"hunks\": [
|
||||
+ { \"operation\": \"add\", \"path\": \"docs/release-notes.md\" },
|
||||
+ { \"operation\": \"update\", \"path\": \"src/config.rs\" }
|
||||
+ ]
|
||||
+}
|
||||
*** Update File: README.md
|
||||
@@ Development workflow
|
||||
Build the Rust workspace before opening a pull request.
|
||||
+When touching streamed tool calls, include parser coverage for partial input.
|
||||
+Prefer tests that exercise the exact event payload shape.
|
||||
*** Delete File: docs/old-apply-patch-progress.md
|
||||
*** End Patch";
|
||||
|
||||
let mut max_hunk_count = 0;
|
||||
let mut saw_hunk_counts = Vec::new();
|
||||
for i in 1..=patch.len() {
|
||||
let partial = &patch[..i];
|
||||
if let Ok(parsed) = parse_patch_streaming(partial) {
|
||||
let hunk_count = parsed.hunks.len();
|
||||
assert!(
|
||||
hunk_count >= max_hunk_count,
|
||||
"hunk count should never decrease while streaming: {hunk_count} < {max_hunk_count} for {partial:?}",
|
||||
);
|
||||
if hunk_count > max_hunk_count {
|
||||
saw_hunk_counts.push(hunk_count);
|
||||
max_hunk_count = hunk_count;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
assert_eq!(saw_hunk_counts, vec![1, 2, 3, 4, 5, 6, 7]);
|
||||
let parsed = parse_patch_streaming(patch).unwrap();
|
||||
assert_eq!(parsed.hunks.len(), 7);
|
||||
assert_eq!(
|
||||
parsed
|
||||
.hunks
|
||||
.iter()
|
||||
.map(|hunk| match hunk {
|
||||
AddFile { .. } => "add",
|
||||
DeleteFile { .. } => "delete",
|
||||
UpdateFile {
|
||||
move_path: Some(_), ..
|
||||
} => "move-update",
|
||||
UpdateFile {
|
||||
move_path: None, ..
|
||||
} => "update",
|
||||
})
|
||||
.collect::<Vec<_>>(),
|
||||
vec![
|
||||
"add",
|
||||
"update",
|
||||
"delete",
|
||||
"move-update",
|
||||
"add",
|
||||
"update",
|
||||
"delete"
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_patch() {
|
||||
assert_eq!(
|
||||
@@ -997,112 +780,3 @@ fn test_parse_patch_lenient() {
|
||||
))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_one_hunk() {
|
||||
assert_eq!(
|
||||
parse_one_hunk(&["bad"], /*line_number*/ 234, /*allow_incomplete*/ false),
|
||||
Err(InvalidHunkError {
|
||||
message: "'bad' is not a valid hunk header. \
|
||||
Valid hunk headers: '*** Add File: {path}', '*** Delete File: {path}', '*** Update File: {path}'".to_string(),
|
||||
line_number: 234
|
||||
})
|
||||
);
|
||||
// Other edge cases are already covered by tests above/below.
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_update_file_chunk() {
|
||||
assert_eq!(
|
||||
parse_update_file_chunk(
|
||||
&["bad"],
|
||||
/*line_number*/ 123,
|
||||
/*allow_missing_context*/ false
|
||||
),
|
||||
Err(InvalidHunkError {
|
||||
message: "Expected update hunk to start with a @@ context marker, got: 'bad'"
|
||||
.to_string(),
|
||||
line_number: 123
|
||||
})
|
||||
);
|
||||
assert_eq!(
|
||||
parse_update_file_chunk(
|
||||
&["@@"],
|
||||
/*line_number*/ 123,
|
||||
/*allow_missing_context*/ false
|
||||
),
|
||||
Err(InvalidHunkError {
|
||||
message: "Update hunk does not contain any lines".to_string(),
|
||||
line_number: 124
|
||||
})
|
||||
);
|
||||
assert_eq!(
|
||||
parse_update_file_chunk(&["@@", "bad"], /*line_number*/ 123, /*allow_missing_context*/ false),
|
||||
Err(InvalidHunkError {
|
||||
message: "Unexpected line found in update hunk: 'bad'. \
|
||||
Every line should start with ' ' (context line), '+' (added line), or '-' (removed line)".to_string(),
|
||||
line_number: 124
|
||||
})
|
||||
);
|
||||
assert_eq!(
|
||||
parse_update_file_chunk(
|
||||
&["@@", "*** End of File"],
|
||||
/*line_number*/ 123,
|
||||
/*allow_missing_context*/ false
|
||||
),
|
||||
Err(InvalidHunkError {
|
||||
message: "Update hunk does not contain any lines".to_string(),
|
||||
line_number: 124
|
||||
})
|
||||
);
|
||||
assert_eq!(
|
||||
parse_update_file_chunk(
|
||||
&[
|
||||
"@@ change_context",
|
||||
"",
|
||||
" context",
|
||||
"-remove",
|
||||
"+add",
|
||||
" context2",
|
||||
"*** End Patch",
|
||||
],
|
||||
/*line_number*/ 123,
|
||||
/*allow_missing_context*/ false
|
||||
),
|
||||
Ok((
|
||||
(UpdateFileChunk {
|
||||
change_context: Some("change_context".to_string()),
|
||||
old_lines: vec![
|
||||
"".to_string(),
|
||||
"context".to_string(),
|
||||
"remove".to_string(),
|
||||
"context2".to_string()
|
||||
],
|
||||
new_lines: vec![
|
||||
"".to_string(),
|
||||
"context".to_string(),
|
||||
"add".to_string(),
|
||||
"context2".to_string()
|
||||
],
|
||||
is_end_of_file: false
|
||||
}),
|
||||
6
|
||||
))
|
||||
);
|
||||
assert_eq!(
|
||||
parse_update_file_chunk(
|
||||
&["@@", "+line", "*** End of File"],
|
||||
/*line_number*/ 123,
|
||||
/*allow_missing_context*/ false
|
||||
),
|
||||
Ok((
|
||||
(UpdateFileChunk {
|
||||
change_context: None,
|
||||
old_lines: vec![],
|
||||
new_lines: vec!["line".to_string()],
|
||||
is_end_of_file: true
|
||||
}),
|
||||
3
|
||||
))
|
||||
);
|
||||
}
|
||||
|
||||
473
codex-rs/apply-patch/src/streaming_parser.rs
Normal file
473
codex-rs/apply-patch/src/streaming_parser.rs
Normal file
@@ -0,0 +1,473 @@
|
||||
use std::path::PathBuf;
|
||||
|
||||
use crate::parser::ADD_FILE_MARKER;
|
||||
use crate::parser::BEGIN_PATCH_MARKER;
|
||||
use crate::parser::CHANGE_CONTEXT_MARKER;
|
||||
use crate::parser::DELETE_FILE_MARKER;
|
||||
use crate::parser::EMPTY_CHANGE_CONTEXT_MARKER;
|
||||
use crate::parser::END_PATCH_MARKER;
|
||||
use crate::parser::EOF_MARKER;
|
||||
use crate::parser::Hunk;
|
||||
use crate::parser::MOVE_TO_MARKER;
|
||||
use crate::parser::ParseError;
|
||||
use crate::parser::UPDATE_FILE_MARKER;
|
||||
use crate::parser::UpdateFileChunk;
|
||||
|
||||
use Hunk::*;
|
||||
use ParseError::*;
|
||||
|
||||
#[derive(Debug, Default, Clone)]
|
||||
pub struct StreamingPatchParser {
|
||||
line_buffer: String,
|
||||
state: StreamingParserState,
|
||||
line_number: usize,
|
||||
}
|
||||
|
||||
#[derive(Debug, Default, Clone)]
|
||||
struct StreamingParserState {
|
||||
mode: StreamingParserMode,
|
||||
hunks: Vec<Hunk>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Default, Clone)]
|
||||
enum StreamingParserMode {
|
||||
#[default]
|
||||
NotStarted,
|
||||
StartedPatch,
|
||||
AddFile,
|
||||
DeleteFile,
|
||||
UpdateFile,
|
||||
EndedPatch,
|
||||
}
|
||||
|
||||
fn handle_hunk_headers_and_end_patch(
|
||||
trimmed: &str,
|
||||
hunks: &mut Vec<Hunk>,
|
||||
) -> Option<StreamingParserMode> {
|
||||
if trimmed == END_PATCH_MARKER {
|
||||
return Some(StreamingParserMode::EndedPatch);
|
||||
}
|
||||
if let Some(path) = trimmed.strip_prefix(ADD_FILE_MARKER) {
|
||||
hunks.push(AddFile {
|
||||
path: PathBuf::from(path),
|
||||
contents: String::new(),
|
||||
});
|
||||
return Some(StreamingParserMode::AddFile);
|
||||
}
|
||||
if let Some(path) = trimmed.strip_prefix(DELETE_FILE_MARKER) {
|
||||
hunks.push(DeleteFile {
|
||||
path: PathBuf::from(path),
|
||||
});
|
||||
return Some(StreamingParserMode::DeleteFile);
|
||||
}
|
||||
if let Some(path) = trimmed.strip_prefix(UPDATE_FILE_MARKER) {
|
||||
hunks.push(UpdateFile {
|
||||
path: PathBuf::from(path),
|
||||
move_path: None,
|
||||
chunks: Vec::new(),
|
||||
});
|
||||
return Some(StreamingParserMode::UpdateFile);
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
impl StreamingPatchParser {
|
||||
pub fn push_delta(&mut self, delta: &str) -> Result<Option<Vec<Hunk>>, ParseError> {
|
||||
for ch in delta.chars() {
|
||||
if ch == '\n' {
|
||||
let line = std::mem::take(&mut self.line_buffer);
|
||||
let state = std::mem::take(&mut self.state);
|
||||
self.line_number += 1;
|
||||
self.state =
|
||||
Self::process_line(state, line.trim_end_matches('\r'), self.line_number)?;
|
||||
} else {
|
||||
self.line_buffer.push(ch);
|
||||
}
|
||||
}
|
||||
|
||||
let hunks = self.state.hunks.clone();
|
||||
Ok(if hunks.is_empty() { None } else { Some(hunks) })
|
||||
}
|
||||
|
||||
fn process_line(
|
||||
state: StreamingParserState,
|
||||
line: &str,
|
||||
line_number: usize,
|
||||
) -> Result<StreamingParserState, ParseError> {
|
||||
let trimmed = line.trim();
|
||||
let StreamingParserState {
|
||||
mut mode,
|
||||
mut hunks,
|
||||
} = state;
|
||||
mode = match mode {
|
||||
StreamingParserMode::NotStarted => {
|
||||
if trimmed == BEGIN_PATCH_MARKER {
|
||||
return Ok(StreamingParserState {
|
||||
mode: StreamingParserMode::StartedPatch,
|
||||
hunks,
|
||||
});
|
||||
}
|
||||
return Err(InvalidPatchError(
|
||||
"The first line of the patch must be '*** Begin Patch'".to_string(),
|
||||
));
|
||||
}
|
||||
StreamingParserMode::StartedPatch => {
|
||||
if let Some(mode) = handle_hunk_headers_and_end_patch(trimmed, &mut hunks) {
|
||||
return Ok(StreamingParserState { mode, hunks });
|
||||
}
|
||||
return Err(InvalidHunkError {
|
||||
message: format!(
|
||||
"'{trimmed}' is not a valid hunk header. Valid hunk headers: '*** Add File: {{path}}', '*** Delete File: {{path}}', '*** Update File: {{path}}'"
|
||||
),
|
||||
line_number,
|
||||
});
|
||||
}
|
||||
StreamingParserMode::AddFile => {
|
||||
if let Some(mode) = handle_hunk_headers_and_end_patch(trimmed, &mut hunks) {
|
||||
return Ok(StreamingParserState { mode, hunks });
|
||||
}
|
||||
if let Some(line_to_add) = line.strip_prefix('+')
|
||||
&& let Some(AddFile { contents, .. }) = hunks.last_mut()
|
||||
{
|
||||
contents.push_str(line_to_add);
|
||||
contents.push('\n');
|
||||
return Ok(StreamingParserState {
|
||||
mode: StreamingParserMode::AddFile,
|
||||
hunks,
|
||||
});
|
||||
}
|
||||
return Err(InvalidHunkError {
|
||||
message: format!(
|
||||
"Unexpected line found in add file hunk: '{line}'. Every line should start with '+'"
|
||||
),
|
||||
line_number,
|
||||
});
|
||||
}
|
||||
StreamingParserMode::DeleteFile => {
|
||||
if let Some(mode) = handle_hunk_headers_and_end_patch(trimmed, &mut hunks) {
|
||||
return Ok(StreamingParserState { mode, hunks });
|
||||
}
|
||||
return Err(InvalidHunkError {
|
||||
message: format!(
|
||||
"'{trimmed}' is not a valid hunk header. Valid hunk headers: '*** Add File: {{path}}', '*** Delete File: {{path}}', '*** Update File: {{path}}'"
|
||||
),
|
||||
line_number,
|
||||
});
|
||||
}
|
||||
StreamingParserMode::UpdateFile => {
|
||||
if let Some(mode) = handle_hunk_headers_and_end_patch(trimmed, &mut hunks) {
|
||||
return Ok(StreamingParserState { mode, hunks });
|
||||
}
|
||||
|
||||
if let Some(UpdateFile {
|
||||
move_path, chunks, ..
|
||||
}) = hunks.last_mut()
|
||||
{
|
||||
if chunks.is_empty()
|
||||
&& move_path.is_none()
|
||||
&& let Some(move_to_path) = line.trim().strip_prefix(MOVE_TO_MARKER)
|
||||
{
|
||||
*move_path = Some(PathBuf::from(move_to_path));
|
||||
return Ok(StreamingParserState {
|
||||
mode: StreamingParserMode::UpdateFile,
|
||||
hunks,
|
||||
});
|
||||
}
|
||||
|
||||
match line.trim() {
|
||||
EMPTY_CHANGE_CONTEXT_MARKER => {
|
||||
chunks.push(UpdateFileChunk {
|
||||
change_context: None,
|
||||
old_lines: Vec::new(),
|
||||
new_lines: Vec::new(),
|
||||
is_end_of_file: false,
|
||||
});
|
||||
return Ok(StreamingParserState {
|
||||
mode: StreamingParserMode::UpdateFile,
|
||||
hunks,
|
||||
});
|
||||
}
|
||||
line => {
|
||||
if let Some(change_context) = line.strip_prefix(CHANGE_CONTEXT_MARKER) {
|
||||
chunks.push(UpdateFileChunk {
|
||||
change_context: Some(change_context.to_string()),
|
||||
old_lines: Vec::new(),
|
||||
new_lines: Vec::new(),
|
||||
is_end_of_file: false,
|
||||
});
|
||||
return Ok(StreamingParserState {
|
||||
mode: StreamingParserMode::UpdateFile,
|
||||
hunks,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if trimmed == EOF_MARKER {
|
||||
if let Some(chunk) = chunks.last_mut() {
|
||||
chunk.is_end_of_file = true;
|
||||
}
|
||||
return Ok(StreamingParserState {
|
||||
mode: StreamingParserMode::UpdateFile,
|
||||
hunks,
|
||||
});
|
||||
}
|
||||
|
||||
let chunk = if chunks.is_empty() {
|
||||
chunks.push(UpdateFileChunk {
|
||||
change_context: None,
|
||||
old_lines: Vec::new(),
|
||||
new_lines: Vec::new(),
|
||||
is_end_of_file: false,
|
||||
});
|
||||
chunks.last_mut()
|
||||
} else {
|
||||
chunks.last_mut()
|
||||
};
|
||||
if let Some(chunk) = chunk {
|
||||
let parsed_update_line = match line.chars().next() {
|
||||
None => {
|
||||
chunk.old_lines.push(String::new());
|
||||
chunk.new_lines.push(String::new());
|
||||
true
|
||||
}
|
||||
Some(' ') => {
|
||||
chunk.old_lines.push(line[1..].to_string());
|
||||
chunk.new_lines.push(line[1..].to_string());
|
||||
true
|
||||
}
|
||||
Some('+') => {
|
||||
chunk.new_lines.push(line[1..].to_string());
|
||||
true
|
||||
}
|
||||
Some('-') => {
|
||||
chunk.old_lines.push(line[1..].to_string());
|
||||
true
|
||||
}
|
||||
Some(_) => false,
|
||||
};
|
||||
if parsed_update_line {
|
||||
return Ok(StreamingParserState {
|
||||
mode: StreamingParserMode::UpdateFile,
|
||||
hunks,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
return Err(InvalidHunkError {
|
||||
message: format!(
|
||||
"Unexpected line found in update hunk: '{line}'. Every line should start with ' ' (context line), '+' (added line), or '-' (removed line)"
|
||||
),
|
||||
line_number,
|
||||
});
|
||||
}
|
||||
StreamingParserMode::EndedPatch => mode,
|
||||
};
|
||||
Ok(StreamingParserState { mode, hunks })
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use pretty_assertions::assert_eq;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_streaming_patch_parser_streams_complete_lines_before_end_patch() {
|
||||
let mut parser = StreamingPatchParser::default();
|
||||
assert_eq!(
|
||||
parser.push_delta("*** Begin Patch\n*** Add File: src/hello.txt\n+hello\n+wor"),
|
||||
Ok(Some(vec![AddFile {
|
||||
path: PathBuf::from("src/hello.txt"),
|
||||
contents: "hello\n".to_string(),
|
||||
}]))
|
||||
);
|
||||
assert_eq!(
|
||||
parser.push_delta("ld\n"),
|
||||
Ok(Some(vec![AddFile {
|
||||
path: PathBuf::from("src/hello.txt"),
|
||||
contents: "hello\nworld\n".to_string(),
|
||||
}]))
|
||||
);
|
||||
|
||||
let mut parser = StreamingPatchParser::default();
|
||||
assert_eq!(
|
||||
parser.push_delta(
|
||||
"*** Begin Patch\n*** Update File: src/old.rs\n*** Move to: src/new.rs\n@@\n-old\n+new\n",
|
||||
),
|
||||
Ok(Some(vec![UpdateFile {
|
||||
path: PathBuf::from("src/old.rs"),
|
||||
move_path: Some(PathBuf::from("src/new.rs")),
|
||||
chunks: vec![UpdateFileChunk {
|
||||
change_context: None,
|
||||
old_lines: vec!["old".to_string()],
|
||||
new_lines: vec!["new".to_string()],
|
||||
is_end_of_file: false,
|
||||
}],
|
||||
}]))
|
||||
);
|
||||
|
||||
let mut parser = StreamingPatchParser::default();
|
||||
assert_eq!(
|
||||
parser.push_delta("*** Begin Patch\n*** Delete File: gone.txt"),
|
||||
Ok(None)
|
||||
);
|
||||
assert_eq!(
|
||||
parser.push_delta("\n"),
|
||||
Ok(Some(vec![DeleteFile {
|
||||
path: PathBuf::from("gone.txt"),
|
||||
}]))
|
||||
);
|
||||
|
||||
let mut parser = StreamingPatchParser::default();
|
||||
assert_eq!(
|
||||
parser.push_delta(
|
||||
"*** Begin Patch\n*** Add File: src/one.txt\n+one\n*** Delete File: src/two.txt\n",
|
||||
),
|
||||
Ok(Some(vec![
|
||||
AddFile {
|
||||
path: PathBuf::from("src/one.txt"),
|
||||
contents: "one\n".to_string(),
|
||||
},
|
||||
DeleteFile {
|
||||
path: PathBuf::from("src/two.txt"),
|
||||
},
|
||||
]))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_streaming_patch_parser_large_patch_split_by_character() {
|
||||
let patch = "\
|
||||
*** Begin Patch
|
||||
*** Add File: docs/release-notes.md
|
||||
+# Release notes
|
||||
+
|
||||
+## CLI
|
||||
+- Surface apply_patch progress while arguments stream.
|
||||
+- Keep final patch application gated on the completed tool call.
|
||||
+- Include file summaries in the progress event payload.
|
||||
*** Update File: src/config.rs
|
||||
@@ impl Config
|
||||
- pub apply_patch_progress: bool,
|
||||
+ pub stream_apply_patch_progress: bool,
|
||||
pub include_diagnostics: bool,
|
||||
@@ fn default_progress_interval()
|
||||
- Duration::from_millis(500)
|
||||
+ Duration::from_millis(250)
|
||||
*** Delete File: src/legacy_patch_progress.rs
|
||||
*** Update File: crates/cli/src/main.rs
|
||||
*** Move to: crates/cli/src/bin/codex.rs
|
||||
@@ fn run()
|
||||
- let args = Args::parse();
|
||||
- dispatch(args)
|
||||
+ let cli = Cli::parse();
|
||||
+ dispatch(cli)
|
||||
*** Add File: tests/fixtures/apply_patch_progress.json
|
||||
+{
|
||||
+ \"type\": \"apply_patch_progress\",
|
||||
+ \"hunks\": [
|
||||
+ { \"operation\": \"add\", \"path\": \"docs/release-notes.md\" },
|
||||
+ { \"operation\": \"update\", \"path\": \"src/config.rs\" }
|
||||
+ ]
|
||||
+}
|
||||
*** Update File: README.md
|
||||
@@ Development workflow
|
||||
Build the Rust workspace before opening a pull request.
|
||||
+When touching streamed tool calls, include parser coverage for partial input.
|
||||
+Prefer tests that exercise the exact event payload shape.
|
||||
*** Delete File: docs/old-apply-patch-progress.md
|
||||
*** End Patch";
|
||||
|
||||
let mut parser = StreamingPatchParser::default();
|
||||
let mut max_hunk_count = 0;
|
||||
let mut saw_hunk_counts = Vec::new();
|
||||
let mut hunks = Vec::new();
|
||||
for ch in patch.chars() {
|
||||
if let Some(updated_hunks) = parser.push_delta(&ch.to_string()).unwrap() {
|
||||
let hunk_count = updated_hunks.len();
|
||||
assert!(
|
||||
hunk_count >= max_hunk_count,
|
||||
"hunk count should never decrease while streaming: {hunk_count} < {max_hunk_count}",
|
||||
);
|
||||
if hunk_count > max_hunk_count {
|
||||
saw_hunk_counts.push(hunk_count);
|
||||
max_hunk_count = hunk_count;
|
||||
}
|
||||
hunks = updated_hunks;
|
||||
}
|
||||
}
|
||||
|
||||
assert_eq!(saw_hunk_counts, vec![1, 2, 3, 4, 5, 6, 7]);
|
||||
assert_eq!(hunks.len(), 7);
|
||||
assert_eq!(
|
||||
hunks
|
||||
.iter()
|
||||
.map(|hunk| match hunk {
|
||||
AddFile { .. } => "add",
|
||||
DeleteFile { .. } => "delete",
|
||||
UpdateFile {
|
||||
move_path: Some(_), ..
|
||||
} => "move-update",
|
||||
UpdateFile {
|
||||
move_path: None, ..
|
||||
} => "update",
|
||||
})
|
||||
.collect::<Vec<_>>(),
|
||||
vec![
|
||||
"add",
|
||||
"update",
|
||||
"delete",
|
||||
"move-update",
|
||||
"add",
|
||||
"update",
|
||||
"delete"
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_streaming_patch_parser_returns_errors() {
|
||||
let mut parser = StreamingPatchParser::default();
|
||||
assert_eq!(
|
||||
parser.push_delta("bad\n"),
|
||||
Err(InvalidPatchError(
|
||||
"The first line of the patch must be '*** Begin Patch'".to_string(),
|
||||
))
|
||||
);
|
||||
|
||||
let mut parser = StreamingPatchParser::default();
|
||||
assert_eq!(parser.push_delta("*** Begin Patch\n"), Ok(None));
|
||||
assert_eq!(
|
||||
parser.push_delta("bad\n"),
|
||||
Err(InvalidHunkError {
|
||||
message: "'bad' is not a valid hunk header. Valid hunk headers: '*** Add File: {path}', '*** Delete File: {path}', '*** Update File: {path}'"
|
||||
.to_string(),
|
||||
line_number: 2,
|
||||
})
|
||||
);
|
||||
|
||||
let mut parser = StreamingPatchParser::default();
|
||||
assert_eq!(
|
||||
parser.push_delta("*** Begin Patch\n*** Add File: file.txt\nbad\n"),
|
||||
Err(InvalidHunkError {
|
||||
message:
|
||||
"Unexpected line found in add file hunk: 'bad'. Every line should start with '+'"
|
||||
.to_string(),
|
||||
line_number: 3,
|
||||
})
|
||||
);
|
||||
|
||||
let mut parser = StreamingPatchParser::default();
|
||||
assert_eq!(
|
||||
parser.push_delta("*** Begin Patch\n*** Delete File: file.txt\nbad\n"),
|
||||
Err(InvalidHunkError {
|
||||
message: "'bad' is not a valid hunk header. Valid hunk headers: '*** Add File: {path}', '*** Delete File: {path}', '*** Update File: {path}'"
|
||||
.to_string(),
|
||||
line_number: 3,
|
||||
})
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -27,10 +27,9 @@ use crate::tools::runtimes::apply_patch::ApplyPatchRequest;
|
||||
use crate::tools::runtimes::apply_patch::ApplyPatchRuntime;
|
||||
use crate::tools::sandboxing::ToolCtx;
|
||||
use codex_apply_patch::ApplyPatchAction;
|
||||
use codex_apply_patch::ApplyPatchArgs;
|
||||
use codex_apply_patch::ApplyPatchFileChange;
|
||||
use codex_apply_patch::Hunk;
|
||||
use codex_apply_patch::parse_patch_streaming;
|
||||
use codex_apply_patch::StreamingPatchParser;
|
||||
use codex_exec_server::ExecutorFileSystem;
|
||||
use codex_features::Feature;
|
||||
use codex_protocol::models::FileSystemPermissions;
|
||||
@@ -48,8 +47,7 @@ pub struct ApplyPatchHandler;
|
||||
|
||||
#[derive(Default)]
|
||||
struct ApplyPatchArgumentDiffConsumer {
|
||||
input: String,
|
||||
last_progress: Option<Vec<Hunk>>,
|
||||
parser: StreamingPatchParser,
|
||||
}
|
||||
|
||||
impl ToolArgumentDiffConsumer for ApplyPatchArgumentDiffConsumer {
|
||||
@@ -70,18 +68,8 @@ impl ToolArgumentDiffConsumer for ApplyPatchArgumentDiffConsumer {
|
||||
|
||||
impl ApplyPatchArgumentDiffConsumer {
|
||||
fn push_delta(&mut self, call_id: String, delta: &str) -> Option<PatchApplyUpdatedEvent> {
|
||||
self.input.push_str(delta);
|
||||
|
||||
let ApplyPatchArgs { hunks, .. } = parse_patch_streaming(&self.input).ok()?;
|
||||
if hunks.is_empty() {
|
||||
return None;
|
||||
}
|
||||
if self.last_progress.as_ref() == Some(&hunks) {
|
||||
return None;
|
||||
}
|
||||
|
||||
let hunks = self.parser.push_delta(delta).ok()??;
|
||||
let changes = convert_apply_patch_hunks_to_protocol(&hunks);
|
||||
self.last_progress = Some(hunks);
|
||||
Some(PatchApplyUpdatedEvent { call_id, changes })
|
||||
}
|
||||
}
|
||||
|
||||
@@ -48,7 +48,7 @@ fn diff_consumer_streams_apply_patch_changes() {
|
||||
HashMap::from([(
|
||||
PathBuf::from("hello.txt"),
|
||||
FileChange::Add {
|
||||
content: "hello\n".to_string(),
|
||||
content: String::new(),
|
||||
},
|
||||
)]),
|
||||
)
|
||||
@@ -57,6 +57,22 @@ fn diff_consumer_streams_apply_patch_changes() {
|
||||
let event = consumer
|
||||
.push_delta("call-1".to_string(), "\n+world")
|
||||
.expect("progress event");
|
||||
assert_eq!(
|
||||
(event.call_id, event.changes),
|
||||
(
|
||||
"call-1".to_string(),
|
||||
HashMap::from([(
|
||||
PathBuf::from("hello.txt"),
|
||||
FileChange::Add {
|
||||
content: "hello\n".to_string(),
|
||||
},
|
||||
)]),
|
||||
)
|
||||
);
|
||||
|
||||
let event = consumer
|
||||
.push_delta("call-1".to_string(), "\n")
|
||||
.expect("progress event");
|
||||
assert_eq!(
|
||||
(event.call_id, event.changes),
|
||||
(
|
||||
|
||||
Reference in New Issue
Block a user