Stabilize shell serialization tests (#13877)

## What changed
- The duration-recording fixture sleep was reduced from a large
artificial delay to `0.2s`, and the assertion floor was lowered to
`0.1s`.
- The shell tool fixtures now force `login = false` so they do not
invoke login-shell startup paths.

## Why this fixes the flake
- The old tests were paying for two kinds of noise that had nothing to
do with the feature being validated: oversized sleep time and variable
shell initialization cost.
- Login shells can pick up runner-specific startup files and incur
inconsistent startup latency.
- The test only needs to prove that we record a nontrivial duration and
preserve shell output. A shorter fixture delay plus a non-login shell
keeps that coverage while removing runner-dependent wall-clock variance.

## Scope
- Test-only change.
This commit is contained in:
Ahmed Ibrahim
2026-03-08 13:37:41 -07:00
committed by GitHub
parent 7ba1fccfc1
commit 1f150eda8b

View File

@@ -349,7 +349,7 @@ async fn shell_output_for_freeform_tool_records_duration(
let test = builder.build(&server).await?;
let call_id = "shell-structured";
let responses = shell_responses(call_id, vec!["/bin/sh", "-c", "sleep 1"], output_type)?;
let responses = shell_responses(call_id, vec!["/bin/sh", "-c", "sleep 0.2"], output_type)?;
let mock = mount_sse_sequence(&server, responses).await;
test.submit_turn_with_policy(
@@ -381,7 +381,7 @@ $"#;
.and_then(|value| value.as_str().parse::<f32>().ok())
.expect("expected structured shell output to contain wall time seconds");
assert!(
wall_time_seconds > 0.5,
wall_time_seconds > 0.1,
"expected wall time to be greater than zero seconds, got {wall_time_seconds}"
);
@@ -740,6 +740,7 @@ async fn shell_command_output_is_freeform() -> Result<()> {
let call_id = "shell-command";
let args = json!({
"command": "echo shell command",
"login": false,
"timeout_ms": 1_000,
});
let responses = vec![
@@ -791,6 +792,7 @@ async fn shell_command_output_is_not_truncated_under_10k_bytes() -> Result<()> {
let call_id = "shell-command";
let args = json!({
"command": "perl -e 'print \"1\" x 10000'",
"login": false,
"timeout_ms": 1000,
});
let responses = vec![
@@ -841,6 +843,7 @@ async fn shell_command_output_is_not_truncated_over_10k_bytes() -> Result<()> {
let call_id = "shell-command";
let args = json!({
"command": "perl -e 'print \"1\" x 10001'",
"login": false,
"timeout_ms": 1000,
});
let responses = vec![