Parse apply_patch environment preamble in parser

Move environment id extraction into the apply-patch parser and verify parsed freeform tool input without reparsing. Always pass the selected-cwd filesystem sandbox context into apply_patch verification so local preflight reads follow the same sandbox path as remote reads.

Co-authored-by: Codex <noreply@openai.com>
This commit is contained in:
starr-openai
2026-05-08 18:07:30 -07:00
parent 2cf36f8b50
commit 77b3116c65
5 changed files with 214 additions and 166 deletions

View File

@@ -151,78 +151,88 @@ pub async fn maybe_parse_apply_patch_verified(
}
match maybe_parse_apply_patch(argv) {
MaybeApplyPatch::Body(ApplyPatchArgs {
patch,
hunks,
workdir,
}) => {
let effective_cwd = workdir
.as_ref()
.map(|dir| cwd.join(Path::new(dir)))
.unwrap_or_else(|| cwd.clone());
let mut changes = HashMap::new();
for hunk in hunks {
let path = hunk.resolve_path(&effective_cwd);
match hunk {
Hunk::AddFile { contents, .. } => {
changes.insert(
path.into_path_buf(),
ApplyPatchFileChange::Add { content: contents },
);
}
Hunk::DeleteFile { .. } => {
let content = match fs.read_file_text(&path, sandbox).await {
Ok(content) => content,
Err(e) => {
return MaybeApplyPatchVerified::CorrectnessError(
ApplyPatchError::IoError(IoError {
context: format!("Failed to read {}", path.display()),
source: e,
}),
);
}
};
changes.insert(
path.into_path_buf(),
ApplyPatchFileChange::Delete { content },
);
}
Hunk::UpdateFile {
move_path, chunks, ..
} => {
let ApplyPatchFileUpdate {
unified_diff,
content: contents,
..
} = match unified_diff_from_chunks(&path, &chunks, fs, sandbox).await {
Ok(diff) => diff,
Err(e) => {
return MaybeApplyPatchVerified::CorrectnessError(e);
}
};
changes.insert(
path.into_path_buf(),
ApplyPatchFileChange::Update {
unified_diff,
move_path: move_path.map(|p| effective_cwd.join(p).into_path_buf()),
new_content: contents,
},
);
}
}
}
MaybeApplyPatchVerified::Body(ApplyPatchAction {
changes,
patch,
cwd: effective_cwd,
})
}
MaybeApplyPatch::Body(args) => verify_apply_patch_args(args, cwd, fs, sandbox).await,
MaybeApplyPatch::ShellParseError(e) => MaybeApplyPatchVerified::ShellParseError(e),
MaybeApplyPatch::PatchParseError(e) => MaybeApplyPatchVerified::CorrectnessError(e.into()),
MaybeApplyPatch::NotApplyPatch => MaybeApplyPatchVerified::NotApplyPatch,
}
}
pub async fn verify_apply_patch_args(
args: ApplyPatchArgs,
cwd: &AbsolutePathBuf,
fs: &dyn ExecutorFileSystem,
sandbox: Option<&codex_exec_server::FileSystemSandboxContext>,
) -> MaybeApplyPatchVerified {
let ApplyPatchArgs {
patch,
hunks,
workdir,
environment_id,
} = args;
let effective_cwd = workdir
.as_ref()
.map(|dir| cwd.join(Path::new(dir)))
.unwrap_or_else(|| cwd.clone());
let mut changes = HashMap::new();
for hunk in hunks {
let path = hunk.resolve_path(&effective_cwd);
match hunk {
Hunk::AddFile { contents, .. } => {
changes.insert(
path.into_path_buf(),
ApplyPatchFileChange::Add { content: contents },
);
}
Hunk::DeleteFile { .. } => {
let content = match fs.read_file_text(&path, sandbox).await {
Ok(content) => content,
Err(e) => {
return MaybeApplyPatchVerified::CorrectnessError(
ApplyPatchError::IoError(IoError {
context: format!("Failed to read {}", path.display()),
source: e,
}),
);
}
};
changes.insert(
path.into_path_buf(),
ApplyPatchFileChange::Delete { content },
);
}
Hunk::UpdateFile {
move_path, chunks, ..
} => {
let ApplyPatchFileUpdate {
unified_diff,
content: contents,
..
} = match unified_diff_from_chunks(&path, &chunks, fs, sandbox).await {
Ok(diff) => diff,
Err(e) => {
return MaybeApplyPatchVerified::CorrectnessError(e);
}
};
changes.insert(
path.into_path_buf(),
ApplyPatchFileChange::Update {
unified_diff,
move_path: move_path.map(|p| effective_cwd.join(p).into_path_buf()),
new_content: contents,
},
);
}
}
}
MaybeApplyPatchVerified::Body(ApplyPatchAction {
changes,
patch,
cwd: effective_cwd,
environment_id,
})
}
/// Extract the heredoc body (and optional `cd` workdir) from a `bash -lc` script
/// that invokes the apply_patch tool using a heredoc.
///
@@ -801,6 +811,7 @@ PATCH"#,
)]),
patch: argv[1].clone(),
cwd: AbsolutePathBuf::from_absolute_path(session_dir.path()).unwrap(),
environment_id: None,
})
);
}

View File

@@ -26,6 +26,7 @@ pub use streaming_parser::StreamingPatchParser;
use thiserror::Error;
pub use invocation::maybe_parse_apply_patch_verified;
pub use invocation::verify_apply_patch_args;
pub use standalone_executable::main;
use crate::invocation::ExtractHeredocError;
@@ -97,6 +98,7 @@ pub struct ApplyPatchArgs {
pub patch: String,
pub hunks: Vec<Hunk>,
pub workdir: Option<String>,
pub environment_id: Option<String>,
}
#[derive(Debug, PartialEq)]
@@ -143,6 +145,9 @@ pub struct ApplyPatchAction {
/// The working directory that was used to resolve relative paths in the patch.
pub cwd: AbsolutePathBuf,
/// Optional environment selected by the patch preamble.
pub environment_id: Option<String>,
}
impl ApplyPatchAction {
@@ -176,6 +181,7 @@ impl ApplyPatchAction {
changes,
cwd: path.parent().expect("path should have parent"),
patch,
environment_id: None,
}
}
}

View File

@@ -3,8 +3,9 @@
//!
//! The official Lark grammar for the apply-patch format is:
//!
//! start: begin_patch hunk+ end_patch
//! start: begin_patch environment_id? hunk+ end_patch
//! begin_patch: "*** Begin Patch" LF
//! environment_id: "*** Environment ID: " filename LF
//! end_patch: "*** End Patch" LF?
//!
//! hunk: add_hunk | delete_hunk | update_hunk
@@ -32,6 +33,7 @@ use std::path::PathBuf;
use thiserror::Error;
pub(crate) const BEGIN_PATCH_MARKER: &str = "*** Begin Patch";
pub(crate) const ENVIRONMENT_ID_MARKER: &str = "*** Environment ID: ";
pub(crate) const END_PATCH_MARKER: &str = "*** End Patch";
pub(crate) const ADD_FILE_MARKER: &str = "*** Add File: ";
pub(crate) const DELETE_FILE_MARKER: &str = "*** Delete File: ";
@@ -178,9 +180,9 @@ fn parse_patch_text(patch: &str, mode: ParseMode) -> Result<ApplyPatchArgs, Pars
ParseMode::Lenient => check_patch_boundaries_lenient(&lines)?,
};
let (environment_id, mut remaining_lines, mut line_number) =
parse_environment_id_preamble(hunk_lines)?;
let mut hunks: Vec<Hunk> = Vec::new();
let mut remaining_lines = hunk_lines;
let mut line_number = 2;
while !remaining_lines.is_empty() {
let (hunk, hunk_lines) = parse_one_hunk(remaining_lines, line_number)?;
hunks.push(hunk);
@@ -192,9 +194,28 @@ fn parse_patch_text(patch: &str, mode: ParseMode) -> Result<ApplyPatchArgs, Pars
hunks,
patch,
workdir: None,
environment_id,
})
}
fn parse_environment_id_preamble<'a>(
hunk_lines: &'a [&'a str],
) -> Result<(Option<String>, &'a [&'a str], usize), ParseError> {
let Some(first_line) = hunk_lines.first() else {
return Ok((None, hunk_lines, 2));
};
let Some(environment_id) = first_line.trim_start().strip_prefix(ENVIRONMENT_ID_MARKER) else {
return Ok((None, hunk_lines, 2));
};
let environment_id = environment_id.trim();
if environment_id.is_empty() {
return Err(InvalidPatchError(
"apply_patch environment_id cannot be empty".to_string(),
));
}
Ok((Some(environment_id.to_string()), &hunk_lines[1..], 3))
}
/// Checks the start and end lines of the patch text for `apply_patch`,
/// returning an error if they do not match the expected markers.
fn check_patch_boundaries_strict<'a>(
@@ -837,6 +858,7 @@ fn test_parse_patch_lenient() {
hunks: expected_patch.clone(),
patch: patch_text.to_string(),
workdir: None,
environment_id: None,
})
);
@@ -851,6 +873,7 @@ fn test_parse_patch_lenient() {
hunks: expected_patch.clone(),
patch: patch_text.to_string(),
workdir: None,
environment_id: None,
})
);
@@ -865,6 +888,7 @@ fn test_parse_patch_lenient() {
hunks: expected_patch,
patch: patch_text.to_string(),
workdir: None,
environment_id: None,
})
);
@@ -891,3 +915,40 @@ fn test_parse_patch_lenient() {
))
);
}
#[test]
fn test_parse_patch_environment_id_preamble() {
assert_eq!(
parse_patch_text(
"*** Begin Patch\n\
*** Environment ID: remote\n\
*** Add File: hello.txt\n\
+hello\n\
*** End Patch",
ParseMode::Strict
),
Ok(ApplyPatchArgs {
hunks: vec![AddFile {
path: PathBuf::from("hello.txt"),
contents: "hello\n".to_string(),
}],
patch: "*** Begin Patch\n*** Environment ID: remote\n*** Add File: hello.txt\n+hello\n*** End Patch".to_string(),
workdir: None,
environment_id: Some("remote".to_string()),
})
);
assert_eq!(
parse_patch_text(
"*** Begin Patch\n\
*** Environment ID: \n\
*** Add File: hello.txt\n\
+hello\n\
*** End Patch",
ParseMode::Strict
),
Err(InvalidPatchError(
"apply_patch environment_id cannot be empty".to_string()
))
);
}

View File

@@ -52,8 +52,6 @@ use codex_tools::ToolSpec;
use codex_utils_absolute_path::AbsolutePathBuf;
const APPLY_PATCH_ARGUMENT_DIFF_BUFFER_INTERVAL: Duration = Duration::from_millis(500);
const APPLY_PATCH_ENVIRONMENT_ID_MARKER: &str = "*** Environment ID: ";
/// Handles freeform `apply_patch` requests and routes verified patches to the
/// selected environment filesystem.
#[derive(Default)]
@@ -364,13 +362,18 @@ impl ToolHandler for ApplyPatchHandler {
"apply_patch handler received unsupported payload".to_string(),
));
};
let (parsed_environment_id, patch_input) =
extract_patch_environment_id(&patch_input, self.multi_environment)?;
let args = match codex_apply_patch::parse_patch(&patch_input) {
Ok(args) => args,
Err(parse_error) => {
return Err(FunctionCallError::RespondToModel(format!(
"apply_patch verification failed: {parse_error}"
)));
}
};
let selected_environment_id =
require_environment_id(parsed_environment_id.as_deref(), self.multi_environment)?;
require_environment_id(args.environment_id.as_deref(), self.multi_environment)?;
// Re-parse and verify the patch so we can compute changes and approval.
// Avoid building temporary ExecParams/command vectors; derive directly from inputs.
// Verify the parsed patch against the selected environment filesystem.
let Some(turn_environment) =
resolve_tool_environment(turn.as_ref(), selected_environment_id.as_deref())?
else {
@@ -379,21 +382,11 @@ impl ToolHandler for ApplyPatchHandler {
));
};
let cwd = turn_environment.cwd.clone();
let command = vec!["apply_patch".to_string(), patch_input.clone()];
let fs = turn_environment.environment.get_filesystem();
let sandbox = turn_environment.environment.is_remote().then(|| {
let mut sandbox =
turn.file_system_sandbox_context(/*additional_permissions*/ None);
sandbox.cwd = Some(cwd.clone());
sandbox
});
match codex_apply_patch::maybe_parse_apply_patch_verified(
&command,
&cwd,
fs.as_ref(),
sandbox.as_ref(),
)
.await
let mut sandbox = turn.file_system_sandbox_context(/*additional_permissions*/ None);
sandbox.cwd = Some(cwd.clone());
match codex_apply_patch::verify_apply_patch_args(args, &cwd, fs.as_ref(), Some(&sandbox))
.await
{
codex_apply_patch::MaybeApplyPatchVerified::Body(changes) => {
let (file_paths, effective_additional_permissions, file_system_sandbox_policy) =
@@ -495,19 +488,9 @@ pub(crate) async fn intercept_apply_patch(
call_id: &str,
tool_name: &str,
) -> Result<Option<FunctionToolOutput>, FunctionCallError> {
let sandbox = turn
.environments
.turn_environments
.iter()
.find(|env| env.environment_id == environment_id)
.filter(|env| env.environment.is_remote())
.map(|_| {
let mut sandbox =
turn.file_system_sandbox_context(/*additional_permissions*/ None);
sandbox.cwd = Some(cwd.clone());
sandbox
});
match codex_apply_patch::maybe_parse_apply_patch_verified(command, cwd, fs, sandbox.as_ref())
let mut sandbox = turn.file_system_sandbox_context(/*additional_permissions*/ None);
sandbox.cwd = Some(cwd.clone());
match codex_apply_patch::maybe_parse_apply_patch_verified(command, cwd, fs, Some(&sandbox))
.await
{
codex_apply_patch::MaybeApplyPatchVerified::Body(changes) => {
@@ -597,46 +580,14 @@ pub(crate) async fn intercept_apply_patch(
}
}
fn extract_patch_environment_id(
patch_input: &str,
allow_environment_id: bool,
) -> Result<(Option<String>, String), FunctionCallError> {
let Some(rest) = patch_input.strip_prefix("*** Begin Patch\n") else {
return Ok((None, patch_input.to_string()));
};
let Some(after_marker) = rest.strip_prefix(APPLY_PATCH_ENVIRONMENT_ID_MARKER) else {
return Ok((None, patch_input.to_string()));
};
let Some((environment_id, remaining_patch)) = after_marker.split_once('\n') else {
return Ok((None, patch_input.to_string()));
};
if !allow_environment_id {
return Err(FunctionCallError::RespondToModel(
"apply_patch environment selection is unavailable for this turn".to_string(),
));
}
let environment_id = environment_id.trim();
if environment_id.is_empty() {
return Err(FunctionCallError::RespondToModel(
"apply_patch environment_id cannot be empty".to_string(),
));
}
Ok((
Some(environment_id.to_string()),
format!("*** Begin Patch\n{remaining_patch}"),
))
}
fn require_environment_id(
parsed_environment_id: Option<&str>,
allow_environment_id: bool,
) -> Result<Option<String>, FunctionCallError> {
match parsed_environment_id {
Some(_) if !allow_environment_id => Err(FunctionCallError::RespondToModel(
"apply_patch environment selection is unavailable for this turn".to_string(),
)),
Some(environment_id) => Ok(Some(environment_id.to_string())),
None if allow_environment_id => Err(FunctionCallError::RespondToModel(
"apply_patch environment_id is required when multiple environments are available"

View File

@@ -1,6 +1,7 @@
use super::*;
use codex_apply_patch::MaybeApplyPatchVerified;
use codex_exec_server::LOCAL_FS;
use codex_protocol::models::PermissionProfile;
use codex_protocol::permissions::FileSystemSandboxPolicy;
use codex_protocol::protocol::FileChange;
use codex_protocol::protocol::SandboxPolicy;
@@ -195,39 +196,13 @@ fn diff_consumer_sends_next_update_after_buffer_interval() {
}
#[test]
fn extract_patch_environment_id_strips_header_only_when_enabled() {
let patch = "*** Begin Patch\n*** Environment ID: remote\n*** Add File: hello.txt\n+hello\n*** End Patch";
fn reconcile_environment_id_requires_selection_when_enabled() {
assert_eq!(
extract_patch_environment_id(patch, /*allow_environment_id*/ true),
Ok((
Some("remote".to_string()),
"*** Begin Patch\n*** Add File: hello.txt\n+hello\n*** End Patch".to_string(),
))
);
assert_eq!(
extract_patch_environment_id(patch, /*allow_environment_id*/ false),
require_environment_id(Some("remote"), /*allow_environment_id*/ false),
Err(FunctionCallError::RespondToModel(
"apply_patch environment selection is unavailable for this turn".to_string(),
))
);
}
#[test]
fn extract_patch_environment_id_rejects_empty_header() {
let patch =
"*** Begin Patch\n*** Environment ID: \n*** Add File: hello.txt\n+hello\n*** End Patch";
assert_eq!(
extract_patch_environment_id(patch, /*allow_environment_id*/ true),
Err(FunctionCallError::RespondToModel(
"apply_patch environment_id cannot be empty".to_string(),
))
);
}
#[test]
fn reconcile_environment_id_requires_selection_when_enabled() {
assert_eq!(
require_environment_id(
/*parsed_environment_id*/ None, /*allow_environment_id*/ true
@@ -271,6 +246,50 @@ async fn approval_keys_include_move_destination() {
assert_eq!(keys.len(), 2);
}
#[tokio::test]
async fn intercepted_apply_patch_verification_uses_local_sandbox() {
let tmp = TempDir::new().expect("tmp");
std::fs::write(tmp.path().join("file.txt"), "old content\n").expect("write file");
let cwd = tmp.path().abs();
let command = vec![
"apply_patch".to_string(),
r#"*** Begin Patch
*** Update File: file.txt
@@
-old content
+new content
*** End Patch"#
.to_string(),
];
let (session, mut turn) = make_session_and_context().await;
turn.permission_profile = PermissionProfile::read_only();
let result = intercept_apply_patch(
&command,
&cwd,
LOCAL_FS.as_ref(),
codex_exec_server::LOCAL_ENVIRONMENT_ID,
Arc::new(session),
Arc::new(turn),
/*tracker*/ None,
"call-1",
"exec_command",
)
.await;
let Err(FunctionCallError::RespondToModel(message)) = result else {
panic!("expected sandboxed filesystem error");
};
assert!(
message.contains("apply_patch verification failed"),
"{message}"
);
assert!(
message.contains("sandboxed filesystem operations require configured runtime paths"),
"{message}"
);
}
#[test]
fn write_permissions_for_paths_skip_dirs_already_writable_under_workspace_root() {
let tmp = TempDir::new().expect("tmp");