feat(core) Add login to shell_command tool (#6846)

## Summary Adds the `login` parameter to the `shell_command` tool - optional, defaults to true. ## Testing - [x] Tested locally
2026-04-26 15:45:02 +00:00 · 2025-12-05 11:03:25 -08:00
parent d08efb1743
commit a8cbbdbc6e
8 changed files with 288 additions and 2 deletions
--- a/codex-rs/core/tests/suite/shell_command.rs
+++ b/codex-rs/core/tests/suite/shell_command.rs
@@ -0,0 +1,174 @@
+use anyhow::Result;
+use core_test_support::assert_regex_match;
+use core_test_support::responses::ev_assistant_message;
+use core_test_support::responses::ev_completed;
+use core_test_support::responses::ev_function_call;
+use core_test_support::responses::ev_response_created;
+use core_test_support::responses::mount_sse_sequence;
+use core_test_support::responses::sse;
+use core_test_support::skip_if_no_network;
+use core_test_support::skip_if_windows;
+use core_test_support::test_codex::TestCodexBuilder;
+use core_test_support::test_codex::TestCodexHarness;
+use core_test_support::test_codex::test_codex;
+use serde_json::json;
+
+fn shell_responses(call_id: &str, command: &str, login: Option<bool>) -> Vec<String> {
+    let args = json!({
+        "command": command,
+        "timeout_ms": 2_000,
+        "login": login,
+    });
+
+    #[allow(clippy::expect_used)]
+    let arguments = serde_json::to_string(&args).expect("serialize shell command arguments");
+
+    vec![
+        sse(vec![
+            ev_response_created("resp-1"),
+            ev_function_call(call_id, "shell_command", &arguments),
+            ev_completed("resp-1"),
+        ]),
+        sse(vec![
+            ev_assistant_message("msg-1", "done"),
+            ev_completed("resp-2"),
+        ]),
+    ]
+}
+
+async fn shell_command_harness_with(
+    configure: impl FnOnce(TestCodexBuilder) -> TestCodexBuilder,
+) -> Result<TestCodexHarness> {
+    let builder = configure(test_codex()).with_config(|config| {
+        config.include_apply_patch_tool = true;
+    });
+    TestCodexHarness::with_builder(builder).await
+}
+
+async fn mount_shell_responses(
+    harness: &TestCodexHarness,
+    call_id: &str,
+    command: &str,
+    login: Option<bool>,
+) {
+    mount_sse_sequence(harness.server(), shell_responses(call_id, command, login)).await;
+}
+
+fn assert_shell_command_output(output: &str, expected: &str) -> Result<()> {
+    let normalized_output = output
+        .replace("\r\n", "\n")
+        .replace('\r', "\n")
+        .trim_end_matches('\n')
+        .to_string();
+
+    let expected_pattern = format!(
+        r"(?s)^Exit code: 0\nWall time: [0-9]+(?:\.[0-9]+)? seconds\nOutput:\n{expected}\n?$"
+    );
+
+    assert_regex_match(&expected_pattern, &normalized_output);
+    Ok(())
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn shell_command_works() -> anyhow::Result<()> {
+    skip_if_no_network!(Ok(()));
+
+    let harness = shell_command_harness_with(|builder| builder.with_model("gpt-5.1")).await?;
+
+    let call_id = "shell-command-call";
+    mount_shell_responses(&harness, call_id, "echo 'hello, world'", None).await;
+    harness.submit("run the echo command").await?;
+
+    let output = harness.function_call_stdout(call_id).await;
+    assert_shell_command_output(&output, "hello, world")?;
+
+    Ok(())
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn output_with_login() -> anyhow::Result<()> {
+    skip_if_no_network!(Ok(()));
+
+    let harness = shell_command_harness_with(|builder| builder.with_model("gpt-5.1")).await?;
+
+    let call_id = "shell-command-call-login-true";
+    mount_shell_responses(&harness, call_id, "echo 'hello, world'", Some(true)).await;
+    harness.submit("run the echo command with login").await?;
+
+    let output = harness.function_call_stdout(call_id).await;
+    assert_shell_command_output(&output, "hello, world")?;
+
+    Ok(())
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn output_without_login() -> anyhow::Result<()> {
+    skip_if_no_network!(Ok(()));
+
+    let harness = shell_command_harness_with(|builder| builder.with_model("gpt-5.1")).await?;
+
+    let call_id = "shell-command-call-login-false";
+    mount_shell_responses(&harness, call_id, "echo 'hello, world'", Some(false)).await;
+    harness.submit("run the echo command without login").await?;
+
+    let output = harness.function_call_stdout(call_id).await;
+    assert_shell_command_output(&output, "hello, world")?;
+
+    Ok(())
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn multi_line_output_with_login() -> anyhow::Result<()> {
+    skip_if_no_network!(Ok(()));
+
+    let harness = shell_command_harness_with(|builder| builder.with_model("gpt-5.1")).await?;
+
+    let call_id = "shell-command-call-first-extra-login";
+    mount_shell_responses(
+        &harness,
+        call_id,
+        "echo 'first line\nsecond line'",
+        Some(true),
+    )
+    .await;
+    harness.submit("run the command with login").await?;
+
+    let output = harness.function_call_stdout(call_id).await;
+    assert_shell_command_output(&output, "first line\nsecond line")?;
+
+    Ok(())
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn pipe_output_with_login() -> anyhow::Result<()> {
+    skip_if_no_network!(Ok(()));
+    skip_if_windows!(Ok(()));
+
+    let harness = shell_command_harness_with(|builder| builder.with_model("gpt-5.1")).await?;
+
+    let call_id = "shell-command-call-second-extra-no-login";
+    mount_shell_responses(&harness, call_id, "echo 'hello, world' | cat", None).await;
+    harness.submit("run the command without login").await?;
+
+    let output = harness.function_call_stdout(call_id).await;
+    assert_shell_command_output(&output, "hello, world")?;
+
+    Ok(())
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn pipe_output_without_login() -> anyhow::Result<()> {
+    skip_if_no_network!(Ok(()));
+    skip_if_windows!(Ok(()));
+
+    let harness = shell_command_harness_with(|builder| builder.with_model("gpt-5.1")).await?;
+
+    let call_id = "shell-command-call-third-extra-login-false";
+    mount_shell_responses(&harness, call_id, "echo 'hello, world' | cat", Some(false)).await;
+    harness.submit("run the command without login").await?;
+
+    let output = harness.function_call_stdout(call_id).await;
+    assert_shell_command_output(&output, "hello, world")?;
+
+    Ok(())
+}