Compare commits

...

8 Commits

Author SHA1 Message Date
Dylan Hurd
f8bf48ded5 fix windows line endings 2025-10-04 02:29:21 -07:00
Dylan Hurd
97a858c47d fix exec config 2025-10-04 02:03:01 -07:00
Dylan Hurd
fc6163780f fix incorrect test config 2025-10-04 00:55:50 -07:00
Dylan Hurd
e12f1038ec unit tests 2025-10-04 00:39:30 -07:00
Dylan Hurd
2334aa7538 rename to format_exec_output 2025-10-04 00:23:16 -07:00
Dylan Hurd
fe07766dae clippy 2025-10-04 00:20:10 -07:00
Dylan Hurd
9e26e0499a apply_patch freeform tool description 2025-10-04 00:10:04 -07:00
Dylan Hurd
9f73414da3 feat: custom-apply-patch with text shell output 2025-10-04 00:02:31 -07:00
11 changed files with 566 additions and 62 deletions

View File

@@ -2400,6 +2400,8 @@ mod tests {
use crate::state::TaskKind;
use crate::tasks::SessionTask;
use crate::tasks::SessionTaskContext;
use crate::tools::ExecResponseFormat;
use crate::tools::HandleExecRequest;
use crate::tools::MODEL_FORMAT_HEAD_LINES;
use crate::tools::MODEL_FORMAT_MAX_BYTES;
use crate::tools::MODEL_FORMAT_MAX_LINES;
@@ -3081,15 +3083,16 @@ mod tests {
let sub_id = "test-sub".to_string();
let call_id = "test-call".to_string();
let resp = handle_container_exec_with_params(
let resp = handle_container_exec_with_params(HandleExecRequest {
tool_name,
params,
&session,
&turn_context,
&mut turn_diff_tracker,
sess: &session,
turn_context: &turn_context,
turn_diff_tracker: &mut turn_diff_tracker,
sub_id,
call_id,
)
response_format: ExecResponseFormat::LegacyJson,
})
.await;
let Err(FunctionCallError::RespondToModel(output)) = resp else {
@@ -3107,15 +3110,16 @@ mod tests {
// Force DangerFullAccess to avoid platform sandbox dependencies in tests.
turn_context.sandbox_policy = SandboxPolicy::DangerFullAccess;
let resp2 = handle_container_exec_with_params(
let resp2 = handle_container_exec_with_params(HandleExecRequest {
tool_name,
params2,
&session,
&turn_context,
&mut turn_diff_tracker,
"test-sub".to_string(),
"test-call-2".to_string(),
)
params: params2,
sess: &session,
turn_context: &turn_context,
turn_diff_tracker: &mut turn_diff_tracker,
sub_id: "test-sub".to_string(),
call_id: "test-call-2".to_string(),
response_format: ExecResponseFormat::LegacyJson,
})
.await;
let output = resp2.expect("expected Ok result");

View File

@@ -8,6 +8,8 @@ use crate::client_common::tools::ToolSpec;
use crate::exec::ExecParams;
use crate::function_tool::FunctionCallError;
use crate::openai_tools::JsonSchema;
use crate::tools::ExecResponseFormat;
use crate::tools::HandleExecRequest;
use crate::tools::context::ToolInvocation;
use crate::tools::context::ToolOutput;
use crate::tools::context::ToolPayload;
@@ -50,16 +52,16 @@ impl ToolHandler for ApplyPatchHandler {
payload,
} = invocation;
let patch_input = match payload {
let (patch_input, response_format) = match payload {
ToolPayload::Function { arguments } => {
let args: ApplyPatchToolArgs = serde_json::from_str(&arguments).map_err(|e| {
FunctionCallError::RespondToModel(format!(
"failed to parse function arguments: {e:?}"
))
})?;
args.input
(args.input, ExecResponseFormat::LegacyJson)
}
ToolPayload::Custom { input } => input,
ToolPayload::Custom { input } => (input, ExecResponseFormat::StructuredText),
_ => {
return Err(FunctionCallError::RespondToModel(
"apply_patch handler received unsupported payload".to_string(),
@@ -76,15 +78,16 @@ impl ToolHandler for ApplyPatchHandler {
justification: None,
};
let content = handle_container_exec_with_params(
tool_name.as_str(),
exec_params,
session,
turn,
tracker,
sub_id.to_string(),
call_id.clone(),
)
let content = handle_container_exec_with_params(HandleExecRequest {
tool_name: tool_name.as_str(),
params: exec_params,
sess: session,
turn_context: turn,
turn_diff_tracker: tracker,
sub_id: sub_id.to_string(),
call_id: call_id.clone(),
response_format,
})
.await?;
Ok(ToolOutput::Function {
@@ -106,7 +109,7 @@ pub enum ApplyPatchToolType {
pub(crate) fn create_apply_patch_freeform_tool() -> ToolSpec {
ToolSpec::Freeform(FreeformTool {
name: "apply_patch".to_string(),
description: "Use the `apply_patch` tool to edit files".to_string(),
description: "Use the `apply_patch` tool to edit files. This is a FREEFORM tool, so do not wrap the patch in JSON.".to_string(),
format: FreeformToolFormat {
r#type: "grammar".to_string(),
syntax: "lark".to_string(),

View File

@@ -5,10 +5,13 @@ use crate::codex::TurnContext;
use crate::exec::ExecParams;
use crate::exec_env::create_env;
use crate::function_tool::FunctionCallError;
use crate::tools::ExecResponseFormat;
use crate::tools::HandleExecRequest;
use crate::tools::context::ToolInvocation;
use crate::tools::context::ToolOutput;
use crate::tools::context::ToolPayload;
use crate::tools::handle_container_exec_with_params;
use crate::tools::handlers::apply_patch::ApplyPatchToolType;
use crate::tools::registry::ToolHandler;
use crate::tools::registry::ToolKind;
@@ -54,6 +57,14 @@ impl ToolHandler for ShellHandler {
payload,
} = invocation;
// When using the freeform apply_patch tool type, shell tool output should also be raw
// output, not json-encoded.
let response_format = match turn.tools_config.apply_patch_tool_type {
Some(ApplyPatchToolType::Freeform) => ExecResponseFormat::StructuredText,
Some(ApplyPatchToolType::Function) => ExecResponseFormat::LegacyJson,
None => ExecResponseFormat::LegacyJson,
};
match payload {
ToolPayload::Function { arguments } => {
let params: ShellToolCallParams =
@@ -63,15 +74,16 @@ impl ToolHandler for ShellHandler {
))
})?;
let exec_params = Self::to_exec_params(params, turn);
let content = handle_container_exec_with_params(
tool_name.as_str(),
exec_params,
session,
turn,
tracker,
sub_id.to_string(),
call_id.clone(),
)
let content = handle_container_exec_with_params(HandleExecRequest {
tool_name: tool_name.as_str(),
params: exec_params,
sess: session,
turn_context: turn,
turn_diff_tracker: tracker,
sub_id: sub_id.to_string(),
call_id: call_id.clone(),
response_format,
})
.await?;
Ok(ToolOutput::Function {
content,
@@ -80,15 +92,16 @@ impl ToolHandler for ShellHandler {
}
ToolPayload::LocalShell { params } => {
let exec_params = Self::to_exec_params(params, turn);
let content = handle_container_exec_with_params(
tool_name.as_str(),
exec_params,
session,
turn,
tracker,
sub_id.to_string(),
call_id.clone(),
)
let content = handle_container_exec_with_params(HandleExecRequest {
tool_name: tool_name.as_str(),
params: exec_params,
sess: session,
turn_context: turn,
turn_diff_tracker: tracker,
sub_id: sub_id.to_string(),
call_id: call_id.clone(),
response_format,
})
.await?;
Ok(ToolOutput::Function {
content,

View File

@@ -44,16 +44,37 @@ pub(crate) const TELEMETRY_PREVIEW_MAX_LINES: usize = 64; // lines
pub(crate) const TELEMETRY_PREVIEW_TRUNCATION_NOTICE: &str =
"[... telemetry preview truncated ...]";
#[derive(Clone, Copy)]
pub(crate) enum ExecResponseFormat {
LegacyJson,
StructuredText,
}
// TODO(jif) break this down
pub(crate) struct HandleExecRequest<'a> {
pub tool_name: &'a str,
pub params: ExecParams,
pub sess: &'a Session,
pub turn_context: &'a TurnContext,
pub turn_diff_tracker: &'a mut TurnDiffTracker,
pub sub_id: String,
pub call_id: String,
pub response_format: ExecResponseFormat,
}
pub(crate) async fn handle_container_exec_with_params(
tool_name: &str,
params: ExecParams,
sess: &Session,
turn_context: &TurnContext,
turn_diff_tracker: &mut TurnDiffTracker,
sub_id: String,
call_id: String,
request: HandleExecRequest<'_>,
) -> Result<String, FunctionCallError> {
let HandleExecRequest {
tool_name,
params,
sess,
turn_context,
turn_diff_tracker,
sub_id,
call_id,
response_format,
} = request;
let otel_event_manager = turn_context.client.get_otel_event_manager();
if params.with_escalated_permissions.unwrap_or(false)
@@ -148,7 +169,7 @@ pub(crate) async fn handle_container_exec_with_params(
match output_result {
Ok(output) => {
let ExecToolCallOutput { exit_code, .. } = &output;
let content = format_exec_output_apply_patch(&output);
let content = format_exec_output(&output, response_format);
if *exit_code == 0 {
Ok(content)
} else {
@@ -157,11 +178,11 @@ pub(crate) async fn handle_container_exec_with_params(
}
Err(ExecError::Function(err)) => Err(err),
Err(ExecError::Codex(CodexErr::Sandbox(SandboxErr::Timeout { output }))) => Err(
FunctionCallError::RespondToModel(format_exec_output_apply_patch(&output)),
FunctionCallError::RespondToModel(format_exec_output(&output, response_format)),
),
Err(ExecError::Codex(err)) => Err(FunctionCallError::RespondToModel(format!(
"execution error: {err:?}"
))),
Err(ExecError::Codex(err)) => Err(FunctionCallError::RespondToModel(
format_unexpected_exec_error(err, response_format),
)),
}
}
@@ -201,6 +222,104 @@ pub fn format_exec_output_apply_patch(exec_output: &ExecToolCallOutput) -> Strin
serde_json::to_string(&payload).expect("serialize ExecOutput")
}
fn format_exec_output(
exec_output: &ExecToolCallOutput,
response_format: ExecResponseFormat,
) -> String {
match response_format {
ExecResponseFormat::LegacyJson => format_exec_output_apply_patch(exec_output),
ExecResponseFormat::StructuredText => format_exec_output_structured(exec_output),
}
}
fn format_unexpected_exec_error(err: CodexErr, response_format: ExecResponseFormat) -> String {
match response_format {
ExecResponseFormat::LegacyJson => format!("execution error: {err:?}"),
ExecResponseFormat::StructuredText => format_structured_error(&format!("{err:?}")),
}
}
fn format_structured_error(message: &str) -> String {
let lines = [
"Exit code: N/A".to_string(),
"Wall time: N/A seconds".to_string(),
format!("Error: {message}"),
"Output:".to_string(),
String::new(),
];
lines.join("\n")
}
fn format_wall_time(duration: std::time::Duration) -> String {
format_significant_digits(duration.as_secs_f64(), 4)
}
fn format_significant_digits(value: f64, digits: usize) -> String {
if !value.is_finite() {
return value.to_string();
}
if value == 0.0 {
return "0".to_string();
}
let abs = value.abs();
let initial_exponent = abs.log10().floor() as i32;
let rounded_value = if value == 0.0 {
0.0
} else {
let scale = 10_f64.powf((digits as f64 - 1.0) - initial_exponent as f64);
(value * scale).round() / scale
};
let abs_rounded = rounded_value.abs();
let exponent = if abs_rounded == 0.0 {
0
} else {
abs_rounded.log10().floor() as i32
};
let use_exp = exponent < -4 || exponent >= digits as i32;
if use_exp {
return format!("{rounded_value:.prec$e}", prec = digits.saturating_sub(1));
}
let decimal_places = (digits as i32 - exponent - 1).max(0) as usize;
let mut s = format!("{rounded_value:.decimal_places$}");
if s.contains('.') {
while s.ends_with('0') {
s.pop();
}
if s.ends_with('.') {
s.pop();
}
}
s
}
pub fn format_exec_output_structured(exec_output: &ExecToolCallOutput) -> String {
let ExecToolCallOutput {
exit_code,
duration,
aggregated_output,
..
} = exec_output;
let mut sections = Vec::new();
sections.push(format!("Exit code: {exit_code}"));
sections.push(format!(
"Wall time: {} seconds",
format_wall_time(*duration)
));
if let Some(total_lines) = aggregated_output.truncated_after_lines {
sections.push(format!("Total output lines: {total_lines}"));
}
sections.push("Output:".to_string());
sections.push(format_exec_output_str(exec_output));
sections.join("\n")
}
pub fn format_exec_output_str(exec_output: &ExecToolCallOutput) -> String {
let ExecToolCallOutput {
aggregated_output, ..
@@ -278,3 +397,98 @@ pub fn format_exec_output_str(exec_output: &ExecToolCallOutput) -> String {
result
}
#[cfg(test)]
mod tests {
use super::*;
use crate::exec::StreamOutput;
use pretty_assertions::assert_eq;
use std::time::Duration;
const TRUNCATED_STRUCTURED_EXPECTED: &str =
include_str!("tests/truncated_structured_expected.txt");
fn normalize_line_endings(input: &str) -> String {
if input.contains('\r') {
input.replace('\r', "")
} else {
input.to_string()
}
}
fn sample_output() -> ExecToolCallOutput {
ExecToolCallOutput {
exit_code: 0,
stdout: StreamOutput::new("stdout".to_string()),
stderr: StreamOutput::new("stderr".to_string()),
aggregated_output: StreamOutput::new("stdout\nstderr".to_string()),
duration: Duration::from_secs_f64(1.2345),
timed_out: false,
}
}
#[test]
fn structured_format_basic() {
let formatted = format_exec_output_structured(&sample_output());
let expected = "Exit code: 0\nWall time: 1.235 seconds\nOutput:\nstdout\nstderr";
assert_eq!(formatted, expected);
}
#[test]
fn structured_format_includes_truncation_metadata() {
let mut output = sample_output();
output.aggregated_output.truncated_after_lines = Some(200);
let formatted = format_exec_output_structured(&output);
assert!(formatted.contains("Total output lines: 200"));
}
#[test]
fn significant_digit_formatting_matches_expectations() {
assert_eq!(format_significant_digits(0.0, 4), "0");
assert_eq!(format_significant_digits(1.23456, 4), "1.235");
assert_eq!(format_significant_digits(12345.0, 4), "1.235e4");
assert_eq!(format_significant_digits(0.000123456, 4), "0.0001235");
}
#[test]
fn structured_error_includes_metadata() {
let error = format_structured_error("unexpected failure");
assert_eq!(
error,
"Exit code: N/A\nWall time: N/A seconds\nError: unexpected failure\nOutput:\n"
);
}
#[test]
fn format_exec_output_uses_legacy_json_formatter() {
let output = sample_output();
let formatted = format_exec_output(&output, ExecResponseFormat::LegacyJson);
assert_eq!(
formatted,
"{\"output\":\"stdout\\nstderr\",\"metadata\":{\"exit_code\":0,\"duration_seconds\":1.2}}"
);
}
#[test]
fn format_exec_output_uses_structured_formatter() {
let output = sample_output();
let formatted = format_exec_output(&output, ExecResponseFormat::StructuredText);
assert_eq!(
formatted,
"Exit code: 0\nWall time: 1.235 seconds\nOutput:\nstdout\nstderr"
);
}
#[test]
fn format_exec_output_truncates_long_output() {
let mut output = sample_output();
let mut aggregated = String::new();
for i in 0..260 {
aggregated.push_str(&format!("L{i:03}\n"));
}
output.aggregated_output = StreamOutput::new(aggregated);
let formatted = format_exec_output(&output, ExecResponseFormat::StructuredText);
let expected = normalize_line_endings(TRUNCATED_STRUCTURED_EXPECTED);
assert_eq!(formatted, expected);
}
}

View File

@@ -0,0 +1,262 @@
Exit code: 0
Wall time: 1.235 seconds
Output:
L000
L001
L002
L003
L004
L005
L006
L007
L008
L009
L010
L011
L012
L013
L014
L015
L016
L017
L018
L019
L020
L021
L022
L023
L024
L025
L026
L027
L028
L029
L030
L031
L032
L033
L034
L035
L036
L037
L038
L039
L040
L041
L042
L043
L044
L045
L046
L047
L048
L049
L050
L051
L052
L053
L054
L055
L056
L057
L058
L059
L060
L061
L062
L063
L064
L065
L066
L067
L068
L069
L070
L071
L072
L073
L074
L075
L076
L077
L078
L079
L080
L081
L082
L083
L084
L085
L086
L087
L088
L089
L090
L091
L092
L093
L094
L095
L096
L097
L098
L099
L100
L101
L102
L103
L104
L105
L106
L107
L108
L109
L110
L111
L112
L113
L114
L115
L116
L117
L118
L119
L120
L121
L122
L123
L124
L125
L126
L127
[... omitted 4 of 260 lines ...]
L132
L133
L134
L135
L136
L137
L138
L139
L140
L141
L142
L143
L144
L145
L146
L147
L148
L149
L150
L151
L152
L153
L154
L155
L156
L157
L158
L159
L160
L161
L162
L163
L164
L165
L166
L167
L168
L169
L170
L171
L172
L173
L174
L175
L176
L177
L178
L179
L180
L181
L182
L183
L184
L185
L186
L187
L188
L189
L190
L191
L192
L193
L194
L195
L196
L197
L198
L199
L200
L201
L202
L203
L204
L205
L206
L207
L208
L209
L210
L211
L212
L213
L214
L215
L216
L217
L218
L219
L220
L221
L222
L223
L224
L225
L226
L227
L228
L229
L230
L231
L232
L233
L234
L235
L236
L237
L238
L239
L240
L241
L242
L243
L244
L245
L246
L247
L248
L249
L250
L251
L252
L253
L254
L255
L256
L257
L258
L259

View File

@@ -15,7 +15,8 @@ impl TestCodexExecBuilder {
.expect("should find binary for codex-exec");
cmd.current_dir(self.cwd.path())
.env("CODEX_HOME", self.home.path())
.env(CODEX_API_KEY_ENV_VAR, "dummy");
.env(CODEX_API_KEY_ENV_VAR, "dummy")
.arg("--custom-apply-patch");
cmd
}
pub fn cmd_with_server(&self, server: &MockServer) -> assert_cmd::Command {

View File

@@ -52,9 +52,7 @@ async fn shell_tool_executes_command_and_streams_output() -> anyhow::Result<()>
let server = start_mock_server().await;
let mut builder = test_codex().with_config(|config| {
config.include_apply_patch_tool = true;
});
let mut builder = test_codex();
let TestCodex {
codex,
cwd,

View File

@@ -71,6 +71,10 @@ pub struct Cli {
#[arg(long = "include-plan-tool", default_value_t = false)]
pub include_plan_tool: bool,
/// Force-enable the apply_patch tool even for models that do not opt into it by default.
#[arg(long = "custom-apply-patch", default_value_t = false)]
pub custom_apply_patch: bool,
/// Specifies file where the last message from the agent should be written.
#[arg(long = "output-last-message", short = 'o', value_name = "FILE")]
pub last_message_file: Option<PathBuf>,

View File

@@ -69,6 +69,7 @@ pub async fn run_main(cli: Cli, codex_linux_sandbox_exe: Option<PathBuf>) -> any
prompt,
output_schema: output_schema_path,
include_plan_tool,
custom_apply_patch,
config_overrides,
} = cli;
@@ -177,7 +178,7 @@ pub async fn run_main(cli: Cli, codex_linux_sandbox_exe: Option<PathBuf>) -> any
codex_linux_sandbox_exe,
base_instructions: None,
include_plan_tool: Some(include_plan_tool),
include_apply_patch_tool: Some(true),
include_apply_patch_tool: custom_apply_patch.then_some(true),
include_view_image_tool: None,
show_raw_agent_reasoning: oss.then_some(true),
tools_web_search_request: None,

View File

@@ -72,6 +72,10 @@ pub struct Cli {
#[arg(long = "search", default_value_t = false)]
pub web_search: bool,
/// Force-enable the apply_patch tool even for models that do not opt into it by default.
#[arg(long = "custom-apply-patch", default_value_t = false)]
pub custom_apply_patch: bool,
#[clap(skip)]
pub config_overrides: CliConfigOverrides,
}

View File

@@ -142,7 +142,7 @@ pub async fn run_main(
codex_linux_sandbox_exe,
base_instructions: None,
include_plan_tool: Some(true),
include_apply_patch_tool: None,
include_apply_patch_tool: cli.custom_apply_patch.then_some(true),
include_view_image_tool: None,
show_raw_agent_reasoning: cli.oss.then_some(true),
tools_web_search_request: cli.web_search.then_some(true),