shell_command returns freeform output (#6860)

Instead of returning structured out and then re-formatting it into
freeform, return the freeform output from shell_command tool.

Keep `shell` as the default tool for GPT-5.
This commit is contained in:
pakrym-oai
2025-11-18 23:38:43 -08:00
committed by GitHub
parent 7e0e675db4
commit ee0484a98c
10 changed files with 215 additions and 85 deletions

View File

@@ -88,6 +88,7 @@ pub(crate) enum ToolEmitter {
cwd: PathBuf,
source: ExecCommandSource,
parsed_cmd: Vec<ParsedCommand>,
freeform: bool,
},
ApplyPatch {
changes: HashMap<PathBuf, FileChange>,
@@ -103,13 +104,19 @@ pub(crate) enum ToolEmitter {
}
impl ToolEmitter {
pub fn shell(command: Vec<String>, cwd: PathBuf, source: ExecCommandSource) -> Self {
pub fn shell(
command: Vec<String>,
cwd: PathBuf,
source: ExecCommandSource,
freeform: bool,
) -> Self {
let parsed_cmd = parse_command(&command);
Self::Shell {
command,
cwd,
source,
parsed_cmd,
freeform,
}
}
@@ -144,6 +151,7 @@ impl ToolEmitter {
cwd,
source,
parsed_cmd,
..
},
stage,
) => {
@@ -234,6 +242,15 @@ impl ToolEmitter {
self.emit(ctx, ToolEventStage::Begin).await;
}
fn format_exec_output_for_model(&self, output: &ExecToolCallOutput) -> String {
match self {
Self::Shell { freeform: true, .. } => {
super::format_exec_output_for_model_freeform(output)
}
_ => super::format_exec_output_for_model_structured(output),
}
}
pub async fn finish(
&self,
ctx: ToolEventCtx<'_>,
@@ -241,7 +258,7 @@ impl ToolEmitter {
) -> Result<String, FunctionCallError> {
let (event, result) = match out {
Ok(output) => {
let content = super::format_exec_output_for_model(&output);
let content = self.format_exec_output_for_model(&output);
let exit_code = output.exit_code;
let event = ToolEventStage::Success(output);
let result = if exit_code == 0 {
@@ -253,7 +270,7 @@ impl ToolEmitter {
}
Err(ToolError::Codex(CodexErr::Sandbox(SandboxErr::Timeout { output })))
| Err(ToolError::Codex(CodexErr::Sandbox(SandboxErr::Denied { output }))) => {
let response = super::format_exec_output_for_model(&output);
let response = self.format_exec_output_for_model(&output);
let event = ToolEventStage::Failure(ToolEventFailure::Output(*output));
let result = Err(FunctionCallError::RespondToModel(response));
(event, result)

View File

@@ -117,6 +117,7 @@ impl ToolHandler for ShellHandler {
turn,
tracker,
call_id,
false,
)
.await
}
@@ -129,6 +130,7 @@ impl ToolHandler for ShellHandler {
turn,
tracker,
call_id,
false,
)
.await
}
@@ -176,6 +178,7 @@ impl ToolHandler for ShellCommandHandler {
turn,
tracker,
call_id,
true,
)
.await
}
@@ -189,6 +192,7 @@ impl ShellHandler {
turn: Arc<TurnContext>,
tracker: crate::tools::context::SharedTurnDiffTracker,
call_id: String,
freeform: bool,
) -> Result<ToolOutput, FunctionCallError> {
// Approval policy guard for explicit escalation in non-OnRequest modes.
if exec_params.with_escalated_permissions.unwrap_or(false)
@@ -282,8 +286,12 @@ impl ShellHandler {
}
let source = ExecCommandSource::Agent;
let emitter =
ToolEmitter::shell(exec_params.command.clone(), exec_params.cwd.clone(), source);
let emitter = ToolEmitter::shell(
exec_params.command.clone(),
exec_params.cwd.clone(),
source,
freeform,
);
let event_ctx = ToolEventCtx::new(session.as_ref(), turn.as_ref(), &call_id, None);
emitter.begin(event_ctx).await;

View File

@@ -11,6 +11,7 @@ pub mod spec;
use crate::context_manager::truncate_with_line_bytes_budget;
use crate::exec::ExecToolCallOutput;
use crate::truncate::truncate_formatted_exec_output;
pub use router::ToolRouter;
use serde::Serialize;
@@ -25,7 +26,7 @@ const SHELL_OUTPUT_MAX_BYTES: usize = 10_000;
/// Format the combined exec output for sending back to the model.
/// Includes exit code and duration metadata; truncates large bodies safely.
pub fn format_exec_output_for_model(exec_output: &ExecToolCallOutput) -> String {
pub fn format_exec_output_for_model_structured(exec_output: &ExecToolCallOutput) -> String {
let ExecToolCallOutput {
exit_code,
duration,
@@ -61,6 +62,33 @@ pub fn format_exec_output_for_model(exec_output: &ExecToolCallOutput) -> String
serde_json::to_string(&payload).expect("serialize ExecOutput")
}
pub fn format_exec_output_for_model_freeform(exec_output: &ExecToolCallOutput) -> String {
// round to 1 decimal place
let duration_seconds = ((exec_output.duration.as_secs_f32()) * 10.0).round() / 10.0;
let total_lines = exec_output.aggregated_output.text.lines().count();
let formatted_output = truncate_formatted_exec_output(
&exec_output.aggregated_output.text,
total_lines,
SHELL_OUTPUT_MAX_BYTES,
256, // TODO: to be removed
);
let mut sections = Vec::new();
sections.push(format!("Exit code: {}", exec_output.exit_code));
sections.push(format!("Wall time: {duration_seconds} seconds"));
if total_lines != formatted_output.lines().count() {
sections.push(format!("Total output lines: {total_lines}"));
}
sections.push("Output:".to_string());
sections.push(formatted_output);
sections.join("\n")
}
pub fn format_exec_output_str(exec_output: &ExecToolCallOutput) -> String {
let ExecToolCallOutput {
aggregated_output, ..

View File

@@ -1395,6 +1395,22 @@ mod tests {
);
}
#[test]
fn test_gpt_5_defaults() {
assert_model_tools(
"gpt-5",
&Features::with_defaults(),
&[
"shell",
"list_mcp_resources",
"list_mcp_resource_templates",
"read_mcp_resource",
"update_plan",
"view_image",
],
);
}
#[test]
fn test_gpt_5_1_defaults() {
assert_model_tools(