mirror of
https://github.com/openai/codex.git
synced 2026-05-03 02:46:39 +00:00
Split approval matrix test groups (#19454)
## Why Recent `main` CI repeatedly timed out in: - `codex-core::all suite::approvals::approval_matrix_covers_all_modes` It failed in runs [24909500958](https://github.com/openai/codex/actions/runs/24909500958), [24908076251](https://github.com/openai/codex/actions/runs/24908076251), [24906197645](https://github.com/openai/codex/actions/runs/24906197645), [24905823212](https://github.com/openai/codex/actions/runs/24905823212), [24903439629](https://github.com/openai/codex/actions/runs/24903439629), [24903336028](https://github.com/openai/codex/actions/runs/24903336028), and [24898949647](https://github.com/openai/codex/actions/runs/24898949647). The failure pattern was a 60s Linux remote timeout. Logs showed many approval scenarios completing before the single matrix test timed out. ## Root Cause `approval_matrix_covers_all_modes` packed every approval/sandbox/tool scenario into one test case. That made the test vulnerable to normal CI variance: one slow scenario or a slow process startup could push the whole monolithic case past the 60s per-test timeout. It also hid which part of the matrix was slow because the runner only reported the one large matrix test. ## What Changed - Keep the shared `scenarios()` table as the single source of approval matrix coverage. - Use one `#[test_case]` per `ScenarioGroup` to generate five async Tokio tests: danger/full-access, read-only, workspace-write, apply-patch, and unified-exec. - Keep the group runner small and add per-scenario error context so a failure still reports the specific scenario name. ## Why This Should Be Reliable Each scenario group now has its own test harness timeout instead of sharing one timeout window with the full matrix. That removes the long sequential loop from a single test while keeping the implementation compact and easy to scan. The tests still run through the same scenario definitions and runner, so this preserves coverage. `test-case` already composes with `#[tokio::test]` in this crate and is already available for test code. ## Verification - `cargo test -p codex-core --test all approval_matrix_ -- --list` - `cargo test -p codex-core --test all approval_matrix_`
This commit is contained in:
@@ -1,5 +1,6 @@
|
||||
#![allow(clippy::unwrap_used, clippy::expect_used)]
|
||||
|
||||
use anyhow::Context;
|
||||
use anyhow::Result;
|
||||
use codex_config::types::ApprovalsReviewer;
|
||||
use codex_core::CodexThread;
|
||||
@@ -51,6 +52,7 @@ use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
use tempfile::TempDir;
|
||||
use test_case::test_case;
|
||||
use wiremock::Mock;
|
||||
use wiremock::MockServer;
|
||||
use wiremock::Request;
|
||||
@@ -570,6 +572,15 @@ struct ScenarioSpec {
|
||||
expectation: Expectation,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
|
||||
enum ScenarioGroup {
|
||||
DangerFullAccess,
|
||||
ReadOnly,
|
||||
WorkspaceWrite,
|
||||
ApplyPatch,
|
||||
UnifiedExec,
|
||||
}
|
||||
|
||||
struct CommandResult {
|
||||
exit_code: Option<i64>,
|
||||
stdout: String,
|
||||
@@ -1659,17 +1670,52 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
||||
]
|
||||
}
|
||||
|
||||
#[test_case(ScenarioGroup::DangerFullAccess ; "danger_full_access")]
|
||||
#[test_case(ScenarioGroup::ReadOnly ; "read_only")]
|
||||
#[test_case(ScenarioGroup::WorkspaceWrite ; "workspace_write")]
|
||||
#[test_case(ScenarioGroup::ApplyPatch ; "apply_patch")]
|
||||
#[test_case(ScenarioGroup::UnifiedExec ; "unified_exec")]
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn approval_matrix_covers_all_modes() -> Result<()> {
|
||||
async fn approval_matrix_covers_group(group: ScenarioGroup) -> Result<()> {
|
||||
run_scenario_group(group).await
|
||||
}
|
||||
|
||||
async fn run_scenario_group(group: ScenarioGroup) -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
for scenario in scenarios() {
|
||||
run_scenario(&scenario).await?;
|
||||
let scenarios = scenarios()
|
||||
.into_iter()
|
||||
.filter(|scenario| scenario_group(scenario) == group)
|
||||
.collect::<Vec<_>>();
|
||||
assert!(!scenarios.is_empty(), "expected scenarios for {group:?}");
|
||||
|
||||
for scenario in scenarios {
|
||||
run_scenario(&scenario)
|
||||
.await
|
||||
.with_context(|| format!("approval scenario failed: {}", scenario.name))?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn scenario_group(scenario: &ScenarioSpec) -> ScenarioGroup {
|
||||
match &scenario.action {
|
||||
ActionKind::ApplyPatchFunction { .. } | ActionKind::ApplyPatchShell { .. } => {
|
||||
ScenarioGroup::ApplyPatch
|
||||
}
|
||||
ActionKind::RunUnifiedExecCommand { .. } => ScenarioGroup::UnifiedExec,
|
||||
ActionKind::WriteFile { .. }
|
||||
| ActionKind::FetchUrlNoProxy { .. }
|
||||
| ActionKind::FetchUrl { .. }
|
||||
| ActionKind::RunCommand { .. } => match &scenario.sandbox_policy {
|
||||
SandboxPolicy::DangerFullAccess => ScenarioGroup::DangerFullAccess,
|
||||
SandboxPolicy::ReadOnly { .. } => ScenarioGroup::ReadOnly,
|
||||
SandboxPolicy::WorkspaceWrite { .. } => ScenarioGroup::WorkspaceWrite,
|
||||
SandboxPolicy::ExternalSandbox { .. } => ScenarioGroup::WorkspaceWrite,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
async fn run_scenario(scenario: &ScenarioSpec) -> Result<()> {
|
||||
eprintln!("running approval scenario: {}", scenario.name);
|
||||
let server = start_mock_server().await;
|
||||
|
||||
Reference in New Issue
Block a user