Files
codex/codex-rs/core/tests/suite/shell_command.rs
Dylan Hurd b9d1a087ee chore(shell_command) fix freeform timeout output (#7791)
## Summary
Adding an additional integration test for timeout_ms

## Testing
- [x] these are tests
2025-12-15 19:26:39 -08:00

227 lines
7.2 KiB
Rust

use anyhow::Result;
use core_test_support::assert_regex_match;
use core_test_support::responses::ev_assistant_message;
use core_test_support::responses::ev_completed;
use core_test_support::responses::ev_function_call;
use core_test_support::responses::ev_response_created;
use core_test_support::responses::mount_sse_sequence;
use core_test_support::responses::sse;
use core_test_support::skip_if_no_network;
use core_test_support::skip_if_windows;
use core_test_support::test_codex::TestCodexBuilder;
use core_test_support::test_codex::TestCodexHarness;
use core_test_support::test_codex::test_codex;
use serde_json::json;
fn shell_responses_with_timeout(
call_id: &str,
command: &str,
login: Option<bool>,
timeout_ms: i64,
) -> Vec<String> {
let args = json!({
"command": command,
"timeout_ms": timeout_ms,
"login": login,
});
#[allow(clippy::expect_used)]
let arguments = serde_json::to_string(&args).expect("serialize shell command arguments");
vec![
sse(vec![
ev_response_created("resp-1"),
ev_function_call(call_id, "shell_command", &arguments),
ev_completed("resp-1"),
]),
sse(vec![
ev_assistant_message("msg-1", "done"),
ev_completed("resp-2"),
]),
]
}
fn shell_responses(call_id: &str, command: &str, login: Option<bool>) -> Vec<String> {
shell_responses_with_timeout(call_id, command, login, 2_000)
}
async fn shell_command_harness_with(
configure: impl FnOnce(TestCodexBuilder) -> TestCodexBuilder,
) -> Result<TestCodexHarness> {
let builder = configure(test_codex()).with_config(|config| {
config.include_apply_patch_tool = true;
});
TestCodexHarness::with_builder(builder).await
}
async fn mount_shell_responses(
harness: &TestCodexHarness,
call_id: &str,
command: &str,
login: Option<bool>,
) {
mount_sse_sequence(harness.server(), shell_responses(call_id, command, login)).await;
}
async fn mount_shell_responses_with_timeout(
harness: &TestCodexHarness,
call_id: &str,
command: &str,
login: Option<bool>,
timeout_ms: i64,
) {
mount_sse_sequence(
harness.server(),
shell_responses_with_timeout(call_id, command, login, timeout_ms),
)
.await;
}
fn assert_shell_command_output(output: &str, expected: &str) -> Result<()> {
let normalized_output = output
.replace("\r\n", "\n")
.replace('\r', "\n")
.trim_end_matches('\n')
.to_string();
let expected_pattern = format!(
r"(?s)^Exit code: 0\nWall time: [0-9]+(?:\.[0-9]+)? seconds\nOutput:\n{expected}\n?$"
);
assert_regex_match(&expected_pattern, &normalized_output);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn shell_command_works() -> anyhow::Result<()> {
skip_if_no_network!(Ok(()));
let harness = shell_command_harness_with(|builder| builder.with_model("gpt-5.1")).await?;
let call_id = "shell-command-call";
mount_shell_responses(&harness, call_id, "echo 'hello, world'", None).await;
harness.submit("run the echo command").await?;
let output = harness.function_call_stdout(call_id).await;
assert_shell_command_output(&output, "hello, world")?;
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn output_with_login() -> anyhow::Result<()> {
skip_if_no_network!(Ok(()));
let harness = shell_command_harness_with(|builder| builder.with_model("gpt-5.1")).await?;
let call_id = "shell-command-call-login-true";
mount_shell_responses(&harness, call_id, "echo 'hello, world'", Some(true)).await;
harness.submit("run the echo command with login").await?;
let output = harness.function_call_stdout(call_id).await;
assert_shell_command_output(&output, "hello, world")?;
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn output_without_login() -> anyhow::Result<()> {
skip_if_no_network!(Ok(()));
let harness = shell_command_harness_with(|builder| builder.with_model("gpt-5.1")).await?;
let call_id = "shell-command-call-login-false";
mount_shell_responses(&harness, call_id, "echo 'hello, world'", Some(false)).await;
harness.submit("run the echo command without login").await?;
let output = harness.function_call_stdout(call_id).await;
assert_shell_command_output(&output, "hello, world")?;
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn multi_line_output_with_login() -> anyhow::Result<()> {
skip_if_no_network!(Ok(()));
let harness = shell_command_harness_with(|builder| builder.with_model("gpt-5.1")).await?;
let call_id = "shell-command-call-first-extra-login";
mount_shell_responses(
&harness,
call_id,
"echo 'first line\nsecond line'",
Some(true),
)
.await;
harness.submit("run the command with login").await?;
let output = harness.function_call_stdout(call_id).await;
assert_shell_command_output(&output, "first line\nsecond line")?;
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn pipe_output_with_login() -> anyhow::Result<()> {
skip_if_no_network!(Ok(()));
skip_if_windows!(Ok(()));
let harness = shell_command_harness_with(|builder| builder.with_model("gpt-5.1")).await?;
let call_id = "shell-command-call-second-extra-no-login";
mount_shell_responses(&harness, call_id, "echo 'hello, world' | cat", None).await;
harness.submit("run the command without login").await?;
let output = harness.function_call_stdout(call_id).await;
assert_shell_command_output(&output, "hello, world")?;
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn pipe_output_without_login() -> anyhow::Result<()> {
skip_if_no_network!(Ok(()));
skip_if_windows!(Ok(()));
let harness = shell_command_harness_with(|builder| builder.with_model("gpt-5.1")).await?;
let call_id = "shell-command-call-third-extra-login-false";
mount_shell_responses(&harness, call_id, "echo 'hello, world' | cat", Some(false)).await;
harness.submit("run the command without login").await?;
let output = harness.function_call_stdout(call_id).await;
assert_shell_command_output(&output, "hello, world")?;
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn shell_command_times_out_with_timeout_ms() -> anyhow::Result<()> {
skip_if_no_network!(Ok(()));
let harness = shell_command_harness_with(|builder| builder.with_model("gpt-5.1")).await?;
let call_id = "shell-command-timeout";
let command = if cfg!(windows) {
"timeout /t 5"
} else {
"sleep 5"
};
mount_shell_responses_with_timeout(&harness, call_id, command, None, 200).await;
harness
.submit("run a long command with a short timeout")
.await?;
let output = harness.function_call_stdout(call_id).await;
let normalized_output = output
.replace("\r\n", "\n")
.replace('\r', "\n")
.trim_end_matches('\n')
.to_string();
let expected_pattern = r"(?s)^Exit code: 124\nWall time: [0-9]+(?:\.[0-9]+)? seconds\nOutput:\ncommand timed out after [0-9]+ milliseconds\n?$";
assert_regex_match(expected_pattern, &normalized_output);
Ok(())
}