mirror of
https://github.com/openai/codex.git
synced 2026-05-02 18:37:01 +00:00
Stabilize plugin MCP fixture tests (#19452)
## Why Recent `main` CI had repeated flakes in the plugin fixture tests: - `codex-core::all suite::plugins::explicit_plugin_mentions_inject_plugin_guidance` failed in runs [24909500958](https://github.com/openai/codex/actions/runs/24909500958), [24908076251](https://github.com/openai/codex/actions/runs/24908076251), [24906197645](https://github.com/openai/codex/actions/runs/24906197645), and [24898949647](https://github.com/openai/codex/actions/runs/24898949647). - `codex-core::all suite::plugins::plugin_mcp_tools_are_listed` failed in runs [24909500958](https://github.com/openai/codex/actions/runs/24909500958), [24908076251](https://github.com/openai/codex/actions/runs/24908076251), and [24898949647](https://github.com/openai/codex/actions/runs/24898949647). The failures were in the same plugin/MCP fixture family: assertions expected sample plugin guidance or tool inventory, but the test could observe the session before the sample MCP server had finished startup. ## Root Cause `explicit_plugin_mentions_inject_plugin_guidance` submitted the user turn immediately after constructing the session. MCP startup is asynchronous, so on a slower or busier CI runner the prompt could be built before the sample plugin MCP server had reported its tools. That made the test depend on scheduler timing rather than the fixture being ready. `plugin_mcp_tools_are_listed` already needed the same readiness condition, but its wait logic was local to that test. ## What Changed - Added a shared `wait_for_sample_mcp_ready` helper for the plugin fixture tests. - Wait for `McpStartupComplete` before submitting the explicit plugin mention turn. - Reuse the same readiness helper in the MCP tool-listing test. ## Why This Should Be Reliable The tests now wait for the explicit readiness signal from the sample MCP server before asserting guidance or tools derived from that server. This removes the startup race while still exercising the real fixture path, so the assertions should only run after the plugin inventory is deterministic. ## Verification - `cargo test -p codex-core --test all plugins::` - GitHub CI for this PR is passing.
This commit is contained in:
@@ -153,6 +153,45 @@ async fn build_apps_enabled_plugin_test_codex(
|
||||
.codex)
|
||||
}
|
||||
|
||||
async fn wait_for_sample_mcp_ready(codex: &codex_core::CodexThread) -> Result<()> {
|
||||
let startup_event = wait_for_event_with_timeout(
|
||||
codex,
|
||||
|ev| match ev {
|
||||
EventMsg::McpStartupComplete(summary) => {
|
||||
summary.ready.iter().any(|server| server == "sample")
|
||||
|| summary
|
||||
.failed
|
||||
.iter()
|
||||
.any(|failure| failure.server == "sample")
|
||||
|| summary.cancelled.iter().any(|server| server == "sample")
|
||||
}
|
||||
_ => false,
|
||||
},
|
||||
Duration::from_secs(70),
|
||||
)
|
||||
.await;
|
||||
let EventMsg::McpStartupComplete(startup) = startup_event else {
|
||||
unreachable!("event guard guarantees McpStartupComplete");
|
||||
};
|
||||
if let Some(failure) = startup
|
||||
.failed
|
||||
.iter()
|
||||
.find(|failure| failure.server == "sample")
|
||||
{
|
||||
let error = &failure.error;
|
||||
bail!("plugin MCP server failed to start: {error}");
|
||||
}
|
||||
if startup.cancelled.iter().any(|server| server == "sample") {
|
||||
bail!("plugin MCP server startup was cancelled");
|
||||
}
|
||||
assert!(
|
||||
startup.ready.iter().any(|server| server == "sample"),
|
||||
"expected plugin MCP server to be ready; startup summary: {startup:?}"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn tool_names(body: &serde_json::Value) -> Vec<String> {
|
||||
body.get("tools")
|
||||
.and_then(serde_json::Value::as_array)
|
||||
@@ -268,6 +307,7 @@ async fn explicit_plugin_mentions_inject_plugin_guidance() -> Result<()> {
|
||||
let codex =
|
||||
build_apps_enabled_plugin_test_codex(&server, codex_home, apps_server.chatgpt_base_url)
|
||||
.await?;
|
||||
wait_for_sample_mcp_ready(&codex).await?;
|
||||
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
@@ -416,41 +456,7 @@ async fn plugin_mcp_tools_are_listed() -> Result<()> {
|
||||
let rmcp_test_server_bin = stdio_server_bin()?;
|
||||
write_plugin_mcp_plugin(codex_home.as_ref(), &rmcp_test_server_bin);
|
||||
let codex = build_plugin_test_codex(&server, codex_home).await?;
|
||||
|
||||
let startup_event = wait_for_event_with_timeout(
|
||||
&codex,
|
||||
|ev| match ev {
|
||||
EventMsg::McpStartupComplete(summary) => {
|
||||
summary.ready.iter().any(|server| server == "sample")
|
||||
|| summary
|
||||
.failed
|
||||
.iter()
|
||||
.any(|failure| failure.server == "sample")
|
||||
|| summary.cancelled.iter().any(|server| server == "sample")
|
||||
}
|
||||
_ => false,
|
||||
},
|
||||
Duration::from_secs(70),
|
||||
)
|
||||
.await;
|
||||
let EventMsg::McpStartupComplete(startup) = startup_event else {
|
||||
unreachable!("event guard guarantees McpStartupComplete");
|
||||
};
|
||||
if let Some(failure) = startup
|
||||
.failed
|
||||
.iter()
|
||||
.find(|failure| failure.server == "sample")
|
||||
{
|
||||
let error = &failure.error;
|
||||
bail!("plugin MCP server failed to start: {error}");
|
||||
}
|
||||
if startup.cancelled.iter().any(|server| server == "sample") {
|
||||
bail!("plugin MCP server startup was cancelled");
|
||||
}
|
||||
assert!(
|
||||
startup.ready.iter().any(|server| server == "sample"),
|
||||
"expected plugin MCP server to be ready; startup summary: {startup:?}"
|
||||
);
|
||||
wait_for_sample_mcp_ready(&codex).await?;
|
||||
|
||||
codex.submit(Op::ListMcpTools).await?;
|
||||
let list_event = wait_for_event_with_timeout(
|
||||
|
||||
Reference in New Issue
Block a user