tests(js_repl): stabilize CI runtime test execution (#12407)

## Summary Stabilize `js_repl` runtime test setup in CI and move tool-facing `js_repl` behavior coverage into integration tests. This is a test/CI change only. No production `js_repl` behavior change is intended. ## Why - Bazel test sandboxes (especially on macOS) could resolve a different `node` than the one installed by `actions/setup-node`, which caused `js_repl` runtime/version failures. - `js_repl` runtime tests depend on platform-specific sandbox/test-harness behavior, so they need explicit gating in a base-stability commit. - Several tests in the `js_repl` unit test module were actually black-box/tool-level behavior tests and fit better in the integration suite. ## Changes - Add `actions/setup-node` to the Bazel and Rust `Tests` workflows, using the exact version pinned in the repo’s Node version file. - In Bazel (non-Windows), pass `CODEX_JS_REPL_NODE_PATH=$(which node)` into test env so `js_repl` uses the `actions/setup-node` runtime inside Bazel tests. - Add a new integration test suite for `js_repl` tool behavior and register it in the core integration test suite module. - Move black-box `js_repl` behavior tests into the integration suite (persistence/TLA, builtin tool invocation, recursive self-call rejection, `process` isolation, blocked builtin imports). - Keep white-box manager/kernel tests in the `js_repl` unit test module. - Gate `js_repl` runtime tests to run only on macOS and only when a usable Node runtime is available (skip on other platforms / missing Node in this commit). ## Impact - Reduces `js_repl` CI failures caused by Node resolution drift in Bazel. - Improves test organization by separating tool-facing behavior tests from white-box manager/kernel tests. - Keeps the base commit stable while expanding `js_repl` runtime coverage. #### [git stack](https://github.com/magus/git-stack-cli) - ✅ `1` https://github.com/openai/codex/pull/12372 - 👉 `2` https://github.com/openai/codex/pull/12407 - ⏳ `3` https://github.com/openai/codex/pull/12185 - ⏳ `4` https://github.com/openai/codex/pull/10673
2026-05-03 19:06:58 +00:00 · 2026-02-24 21:04:34 -08:00
parent 16ca527c80
commit 8f3f2c3c02
5 changed files with 291 additions and 200 deletions
--- a/codex-rs/core/src/tools/js_repl/mod.rs
+++ b/codex-rs/core/src/tools/js_repl/mod.rs
@@ -1636,6 +1636,12 @@ mod tests {
    }

    async fn can_run_js_repl_runtime_tests() -> bool {
+        // These white-box runtime tests rely on the unit-test harness and are
+        // only required on macOS. Linux uses the codex-linux-sandbox arg0
+        // dispatch path, which is exercised in integration tests instead.
+        if !cfg!(target_os = "macos") {
+            return false;
+        }
        if std::env::var_os("CODEX_SANDBOX").is_some() {
            return false;
        }
@@ -1669,47 +1675,6 @@ mod tests {
        Ok(())
    }

-    #[tokio::test]
-    async fn js_repl_persists_top_level_bindings_and_supports_tla() -> anyhow::Result<()> {
-        if !can_run_js_repl_runtime_tests().await {
-            return Ok(());
-        }
-
-        let (session, turn) = make_session_and_context().await;
-        let session = Arc::new(session);
-        let turn = Arc::new(turn);
-        let tracker = Arc::new(tokio::sync::Mutex::new(TurnDiffTracker::default()));
-        let manager = turn.js_repl.manager().await?;
-
-        let first = manager
-            .execute(
-                Arc::clone(&session),
-                Arc::clone(&turn),
-                Arc::clone(&tracker),
-                JsReplArgs {
-                    code: "let x = await Promise.resolve(41); console.log(x);".to_string(),
-                    timeout_ms: Some(10_000),
-                },
-            )
-            .await?;
-        assert!(first.output.contains("41"));
-
-        let second = manager
-            .execute(
-                Arc::clone(&session),
-                Arc::clone(&turn),
-                Arc::clone(&tracker),
-                JsReplArgs {
-                    code: "console.log(x + 1);".to_string(),
-                    timeout_ms: Some(10_000),
-                },
-            )
-            .await?;
-
-        assert!(second.output.contains("42"));
-        Ok(())
-    }
-
    #[tokio::test]
    async fn js_repl_timeout_does_not_deadlock() -> anyhow::Result<()> {
        if !can_run_js_repl_runtime_tests().await {
@@ -1965,108 +1930,9 @@ mod tests {
        Ok(())
    }

-    #[tokio::test]
-    async fn js_repl_can_call_tools() -> anyhow::Result<()> {
-        if !can_run_js_repl_runtime_tests().await {
-            return Ok(());
-        }
-
-        let (session, mut turn) = make_session_and_context().await;
-        turn.approval_policy
-            .set(AskForApproval::Never)
-            .expect("test setup should allow updating approval policy");
-        turn.sandbox_policy
-            .set(SandboxPolicy::DangerFullAccess)
-            .expect("test setup should allow updating sandbox policy");
-
-        let session = Arc::new(session);
-        let turn = Arc::new(turn);
-        let tracker = Arc::new(tokio::sync::Mutex::new(TurnDiffTracker::default()));
-        let manager = turn.js_repl.manager().await?;
-
-        let shell = manager
-            .execute(
-                Arc::clone(&session),
-                Arc::clone(&turn),
-                Arc::clone(&tracker),
-                JsReplArgs {
-                    code: "const shellOut = await codex.tool(\"shell_command\", { command: \"printf js_repl_shell_ok\" }); console.log(JSON.stringify(shellOut));".to_string(),
-                    timeout_ms: Some(15_000),
-                },
-            )
-            .await?;
-        assert!(shell.output.contains("js_repl_shell_ok"));
-
-        let tool = manager
-            .execute(
-                Arc::clone(&session),
-                Arc::clone(&turn),
-                Arc::clone(&tracker),
-                JsReplArgs {
-                    code: "const toolOut = await codex.tool(\"list_mcp_resources\", {}); console.log(toolOut.type);".to_string(),
-                    timeout_ms: Some(15_000),
-                },
-            )
-            .await?;
-        assert!(tool.output.contains("function_call_output"));
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn js_repl_tool_call_rejects_recursive_js_repl_invocation() -> anyhow::Result<()> {
-        if !can_run_js_repl_runtime_tests().await {
-            return Ok(());
-        }
-
-        let (session, mut turn) = make_session_and_context().await;
-        turn.approval_policy
-            .set(AskForApproval::Never)
-            .expect("test setup should allow updating approval policy");
-        turn.sandbox_policy
-            .set(SandboxPolicy::DangerFullAccess)
-            .expect("test setup should allow updating sandbox policy");
-
-        let session = Arc::new(session);
-        let turn = Arc::new(turn);
-        let tracker = Arc::new(tokio::sync::Mutex::new(TurnDiffTracker::default()));
-        let manager = turn.js_repl.manager().await?;
-
-        let result = manager
-            .execute(
-                session,
-                turn,
-                tracker,
-                JsReplArgs {
-                    code: r#"
-try {
-  await codex.tool("js_repl", "console.log('recursive')");
-  console.log("unexpected-success");
-} catch (err) {
-  console.log(String(err));
-}
-"#
-                    .to_string(),
-                    timeout_ms: Some(15_000),
-                },
-            )
-            .await?;
-
-        assert!(
-            result.output.contains("js_repl cannot invoke itself"),
-            "expected recursion guard message, got output: {}",
-            result.output
-        );
-        assert!(
-            !result.output.contains("unexpected-success"),
-            "recursive js_repl tool call unexpectedly succeeded: {}",
-            result.output
-        );
-        Ok(())
-    }
-
    #[tokio::test]
    async fn js_repl_waits_for_unawaited_tool_calls_before_completion() -> anyhow::Result<()> {
-        if !can_run_js_repl_runtime_tests().await || cfg!(windows) {
+        if !can_run_js_repl_runtime_tests().await {
            return Ok(());
        }

@@ -2277,65 +2143,6 @@ console.log(out.type);

        Ok(())
    }
-
-    #[tokio::test]
-    async fn js_repl_does_not_expose_process_global() -> anyhow::Result<()> {
-        if !can_run_js_repl_runtime_tests().await {
-            return Ok(());
-        }
-
-        let (session, turn) = make_session_and_context().await;
-        let session = Arc::new(session);
-        let turn = Arc::new(turn);
-        let tracker = Arc::new(tokio::sync::Mutex::new(TurnDiffTracker::default()));
-        let manager = turn.js_repl.manager().await?;
-
-        let result = manager
-            .execute(
-                session,
-                turn,
-                tracker,
-                JsReplArgs {
-                    code: "console.log(typeof process);".to_string(),
-                    timeout_ms: Some(10_000),
-                },
-            )
-            .await?;
-        assert!(result.output.contains("undefined"));
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn js_repl_blocks_sensitive_builtin_imports() -> anyhow::Result<()> {
-        if !can_run_js_repl_runtime_tests().await {
-            return Ok(());
-        }
-
-        let (session, turn) = make_session_and_context().await;
-        let session = Arc::new(session);
-        let turn = Arc::new(turn);
-        let tracker = Arc::new(tokio::sync::Mutex::new(TurnDiffTracker::default()));
-        let manager = turn.js_repl.manager().await?;
-
-        let err = manager
-            .execute(
-                session,
-                turn,
-                tracker,
-                JsReplArgs {
-                    code: "await import(\"node:process\");".to_string(),
-                    timeout_ms: Some(10_000),
-                },
-            )
-            .await
-            .expect_err("node:process import should be blocked");
-        assert!(
-            err.to_string()
-                .contains("Importing module \"node:process\" is not allowed in js_repl")
-        );
-        Ok(())
-    }
-
    #[tokio::test]
    async fn js_repl_prefers_env_node_module_dirs_over_config() -> anyhow::Result<()> {
        if !can_run_js_repl_runtime_tests().await {