shell_command returns freeform output (#6860)

Instead of returning structured out and then re-formatting it into freeform, return the freeform output from shell_command tool. Keep `shell` as the default tool for GPT-5.
2026-06-01 19:02:59 +00:00 · 2025-11-18 23:38:43 -08:00
parent 7e0e675db4
commit ee0484a98c
10 changed files with 215 additions and 85 deletions
--- a/codex-rs/core/src/tools/events.rs
+++ b/codex-rs/core/src/tools/events.rs
@@ -88,6 +88,7 @@ pub(crate) enum ToolEmitter {
        cwd: PathBuf,
        source: ExecCommandSource,
        parsed_cmd: Vec<ParsedCommand>,
+        freeform: bool,
    },
    ApplyPatch {
        changes: HashMap<PathBuf, FileChange>,
@@ -103,13 +104,19 @@ pub(crate) enum ToolEmitter {
 }

 impl ToolEmitter {
-    pub fn shell(command: Vec<String>, cwd: PathBuf, source: ExecCommandSource) -> Self {
+    pub fn shell(
+        command: Vec<String>,
+        cwd: PathBuf,
+        source: ExecCommandSource,
+        freeform: bool,
+    ) -> Self {
        let parsed_cmd = parse_command(&command);
        Self::Shell {
            command,
            cwd,
            source,
            parsed_cmd,
+            freeform,
        }
    }

@@ -144,6 +151,7 @@ impl ToolEmitter {
                    cwd,
                    source,
                    parsed_cmd,
+                    ..
                },
                stage,
            ) => {
@@ -234,6 +242,15 @@ impl ToolEmitter {
        self.emit(ctx, ToolEventStage::Begin).await;
    }

+    fn format_exec_output_for_model(&self, output: &ExecToolCallOutput) -> String {
+        match self {
+            Self::Shell { freeform: true, .. } => {
+                super::format_exec_output_for_model_freeform(output)
+            }
+            _ => super::format_exec_output_for_model_structured(output),
+        }
+    }
+
    pub async fn finish(
        &self,
        ctx: ToolEventCtx<'_>,
@@ -241,7 +258,7 @@ impl ToolEmitter {
    ) -> Result<String, FunctionCallError> {
        let (event, result) = match out {
            Ok(output) => {
-                let content = super::format_exec_output_for_model(&output);
+                let content = self.format_exec_output_for_model(&output);
                let exit_code = output.exit_code;
                let event = ToolEventStage::Success(output);
                let result = if exit_code == 0 {
@@ -253,7 +270,7 @@ impl ToolEmitter {
            }
            Err(ToolError::Codex(CodexErr::Sandbox(SandboxErr::Timeout { output })))
            | Err(ToolError::Codex(CodexErr::Sandbox(SandboxErr::Denied { output }))) => {
-                let response = super::format_exec_output_for_model(&output);
+                let response = self.format_exec_output_for_model(&output);
                let event = ToolEventStage::Failure(ToolEventFailure::Output(*output));
                let result = Err(FunctionCallError::RespondToModel(response));
                (event, result)
--- a/codex-rs/core/src/tools/handlers/shell.rs
+++ b/codex-rs/core/src/tools/handlers/shell.rs
@@ -117,6 +117,7 @@ impl ToolHandler for ShellHandler {
                    turn,
                    tracker,
                    call_id,
+                    false,
                )
                .await
            }
@@ -129,6 +130,7 @@ impl ToolHandler for ShellHandler {
                    turn,
                    tracker,
                    call_id,
+                    false,
                )
                .await
            }
@@ -176,6 +178,7 @@ impl ToolHandler for ShellCommandHandler {
            turn,
            tracker,
            call_id,
+            true,
        )
        .await
    }
@@ -189,6 +192,7 @@ impl ShellHandler {
        turn: Arc<TurnContext>,
        tracker: crate::tools::context::SharedTurnDiffTracker,
        call_id: String,
+        freeform: bool,
    ) -> Result<ToolOutput, FunctionCallError> {
        // Approval policy guard for explicit escalation in non-OnRequest modes.
        if exec_params.with_escalated_permissions.unwrap_or(false)
@@ -282,8 +286,12 @@ impl ShellHandler {
        }

        let source = ExecCommandSource::Agent;
-        let emitter =
-            ToolEmitter::shell(exec_params.command.clone(), exec_params.cwd.clone(), source);
+        let emitter = ToolEmitter::shell(
+            exec_params.command.clone(),
+            exec_params.cwd.clone(),
+            source,
+            freeform,
+        );
        let event_ctx = ToolEventCtx::new(session.as_ref(), turn.as_ref(), &call_id, None);
        emitter.begin(event_ctx).await;

--- a/codex-rs/core/src/tools/mod.rs
+++ b/codex-rs/core/src/tools/mod.rs
@@ -11,6 +11,7 @@ pub mod spec;

 use crate::context_manager::truncate_with_line_bytes_budget;
 use crate::exec::ExecToolCallOutput;
+use crate::truncate::truncate_formatted_exec_output;
 pub use router::ToolRouter;
 use serde::Serialize;

@@ -25,7 +26,7 @@ const SHELL_OUTPUT_MAX_BYTES: usize = 10_000;

 /// Format the combined exec output for sending back to the model.
 /// Includes exit code and duration metadata; truncates large bodies safely.
-pub fn format_exec_output_for_model(exec_output: &ExecToolCallOutput) -> String {
+pub fn format_exec_output_for_model_structured(exec_output: &ExecToolCallOutput) -> String {
    let ExecToolCallOutput {
        exit_code,
        duration,
@@ -61,6 +62,33 @@ pub fn format_exec_output_for_model(exec_output: &ExecToolCallOutput) -> String
    serde_json::to_string(&payload).expect("serialize ExecOutput")
 }

+pub fn format_exec_output_for_model_freeform(exec_output: &ExecToolCallOutput) -> String {
+    // round to 1 decimal place
+    let duration_seconds = ((exec_output.duration.as_secs_f32()) * 10.0).round() / 10.0;
+
+    let total_lines = exec_output.aggregated_output.text.lines().count();
+
+    let formatted_output = truncate_formatted_exec_output(
+        &exec_output.aggregated_output.text,
+        total_lines,
+        SHELL_OUTPUT_MAX_BYTES,
+        256, // TODO: to be removed
+    );
+
+    let mut sections = Vec::new();
+
+    sections.push(format!("Exit code: {}", exec_output.exit_code));
+    sections.push(format!("Wall time: {duration_seconds} seconds"));
+    if total_lines != formatted_output.lines().count() {
+        sections.push(format!("Total output lines: {total_lines}"));
+    }
+
+    sections.push("Output:".to_string());
+    sections.push(formatted_output);
+
+    sections.join("\n")
+}
+
 pub fn format_exec_output_str(exec_output: &ExecToolCallOutput) -> String {
    let ExecToolCallOutput {
        aggregated_output, ..
--- a/codex-rs/core/src/tools/spec.rs
+++ b/codex-rs/core/src/tools/spec.rs
@@ -1395,6 +1395,22 @@ mod tests {
        );
    }

+    #[test]
+    fn test_gpt_5_defaults() {
+        assert_model_tools(
+            "gpt-5",
+            &Features::with_defaults(),
+            &[
+                "shell",
+                "list_mcp_resources",
+                "list_mcp_resource_templates",
+                "read_mcp_resource",
+                "update_plan",
+                "view_image",
+            ],
+        );
+    }
+
    #[test]
    fn test_gpt_5_1_defaults() {
        assert_model_tools(