chore(core) Consolidate apply_patch tests (#6545)

## Summary
Consolidates our apply_patch tests into one suite, and ensures each test
case tests the various ways the harness supports apply_patch:
1. Freeform custom tool call
2. JSON function tool
3. Simple shell call
4. Heredoc shell call

There are a few test cases that are specific to a particular variant,
I've left those alone.

## Testing
- [x] This adds a significant number of tests
This commit is contained in:
Dylan Hurd
2025-11-13 15:52:39 -08:00
committed by GitHub
parent 547be54ee8
commit 2c1b693da4
7 changed files with 289 additions and 1079 deletions

View File

@@ -29,6 +29,15 @@ use crate::wait_for_event;
type ConfigMutator = dyn FnOnce(&mut Config) + Send;
/// A collection of different ways the model can output an apply_patch call
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub enum ApplyPatchModelOutput {
Freeform,
Function,
Shell,
ShellViaHeredoc,
}
pub struct TestCodexBuilder {
config_mutators: Vec<Box<ConfigMutator>>,
}
@@ -265,6 +274,19 @@ impl TestCodexHarness {
.expect("output string")
.to_string()
}
pub async fn apply_patch_output(
&self,
call_id: &str,
output_type: ApplyPatchModelOutput,
) -> String {
match output_type {
ApplyPatchModelOutput::Freeform => self.custom_tool_call_output(call_id).await,
ApplyPatchModelOutput::Function
| ApplyPatchModelOutput::Shell
| ApplyPatchModelOutput::ShellViaHeredoc => self.function_call_stdout(call_id).await,
}
}
}
fn custom_tool_call_output<'a>(bodies: &'a [Value], call_id: &str) -> &'a Value {