mirror of
https://github.com/openai/codex.git
synced 2026-04-29 17:06:51 +00:00
## Summary Remove js_repl/node test-skip paths and make Node setup explicit in CI so js_repl tests always run instead of silently skipping. ## Why We had multiple “expediency” skip paths that let js_repl tests pass without actually exercising Node-backed behavior. This reduced CI signal and hid runtime/environment regressions. ## What changed ### CI - Added Node setup using `codex-rs/node-version.txt` in: - `.github/workflows/rust-ci.yml` - `.github/workflows/bazel.yml` - Added a Unix PATH copy step in Bazel workflow to expose the setup-node binary in common paths. ### js_repl test harness - Added explicit js_repl sandbox test configuration helpers in: - `codex-rs/core/src/tools/js_repl/mod.rs` - `codex-rs/core/src/tools/handlers/js_repl.rs` - Added Linux arg0 dispatch glue for js_repl tests so sandbox subprocess entrypoint behavior is correct under Linux test execution. ### Removed skip behavior - Deleted runtime guard function and early-return skips in js_repl tests (`can_run_js_repl_runtime_tests` and related per-test short-circuits). - Removed view_image integration test skip behavior: - dropped `skip_if_no_network!(Ok(()))` - removed “skip on Node missing/too old” branch after js_repl output inspection. ## Impact - js_repl/node tests now consistently execute and fail loudly when the environment is not correctly provisioned. - CI has stronger signal for js_repl regressions instead of false green from conditional skips. ## Testing - `cargo test -p codex-core` (locally) to validate js_repl unit/integration behavior with skips removed. - CI expected to surface any remaining environment/runtime gaps directly (rather than masking them). #### [git stack](https://github.com/magus/git-stack-cli) - ✅ `1` https://github.com/openai/codex/pull/12300 - ✅ `2` https://github.com/openai/codex/pull/12275 - ✅ `3` https://github.com/openai/codex/pull/12205 - ✅ `4` https://github.com/openai/codex/pull/12407 - ✅ `5` https://github.com/openai/codex/pull/12372 - 👉 `6` https://github.com/openai/codex/pull/12185 - ⏳ `7` https://github.com/openai/codex/pull/10673
236 lines
6.7 KiB
Rust
236 lines
6.7 KiB
Rust
#![allow(clippy::expect_used, clippy::unwrap_used)]
|
|
|
|
use anyhow::Result;
|
|
use codex_core::features::Feature;
|
|
use core_test_support::responses;
|
|
use core_test_support::responses::ResponseMock;
|
|
use core_test_support::responses::ResponsesRequest;
|
|
use core_test_support::responses::ev_assistant_message;
|
|
use core_test_support::responses::ev_completed;
|
|
use core_test_support::responses::ev_custom_tool_call;
|
|
use core_test_support::responses::ev_response_created;
|
|
use core_test_support::responses::sse;
|
|
use core_test_support::skip_if_no_network;
|
|
use core_test_support::test_codex::test_codex;
|
|
use wiremock::MockServer;
|
|
|
|
fn custom_tool_output_text_and_success(
|
|
req: &ResponsesRequest,
|
|
call_id: &str,
|
|
) -> (String, Option<bool>) {
|
|
let (output, success) = req
|
|
.custom_tool_call_output_content_and_success(call_id)
|
|
.expect("custom tool output should be present");
|
|
(output.unwrap_or_default(), success)
|
|
}
|
|
|
|
async fn run_js_repl_turn(
|
|
server: &MockServer,
|
|
prompt: &str,
|
|
calls: &[(&str, &str)],
|
|
) -> Result<ResponseMock> {
|
|
let mut builder = test_codex().with_config(|config| {
|
|
config.features.enable(Feature::JsRepl);
|
|
});
|
|
let test = builder.build(server).await?;
|
|
|
|
let mut first_events = vec![ev_response_created("resp-1")];
|
|
for (call_id, js_input) in calls {
|
|
first_events.push(ev_custom_tool_call(call_id, "js_repl", js_input));
|
|
}
|
|
first_events.push(ev_completed("resp-1"));
|
|
responses::mount_sse_once(server, sse(first_events)).await;
|
|
|
|
let second_mock = responses::mount_sse_once(
|
|
server,
|
|
sse(vec![
|
|
ev_assistant_message("msg-1", "done"),
|
|
ev_completed("resp-2"),
|
|
]),
|
|
)
|
|
.await;
|
|
|
|
test.submit_turn(prompt).await?;
|
|
Ok(second_mock)
|
|
}
|
|
|
|
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
|
async fn js_repl_persists_top_level_bindings_and_supports_tla() -> Result<()> {
|
|
skip_if_no_network!(Ok(()));
|
|
|
|
let server = responses::start_mock_server().await;
|
|
let mut builder = test_codex().with_config(|config| {
|
|
config.features.enable(Feature::JsRepl);
|
|
});
|
|
let test = builder.build(&server).await?;
|
|
|
|
responses::mount_sse_once(
|
|
&server,
|
|
sse(vec![
|
|
ev_response_created("resp-1"),
|
|
ev_custom_tool_call(
|
|
"call-1",
|
|
"js_repl",
|
|
"let x = await Promise.resolve(41); console.log(x);",
|
|
),
|
|
ev_completed("resp-1"),
|
|
]),
|
|
)
|
|
.await;
|
|
let second_mock = responses::mount_sse_once(
|
|
&server,
|
|
sse(vec![
|
|
ev_response_created("resp-2"),
|
|
ev_custom_tool_call("call-2", "js_repl", "console.log(x + 1);"),
|
|
ev_completed("resp-2"),
|
|
]),
|
|
)
|
|
.await;
|
|
let third_mock = responses::mount_sse_once(
|
|
&server,
|
|
sse(vec![
|
|
ev_assistant_message("msg-1", "done"),
|
|
ev_completed("resp-3"),
|
|
]),
|
|
)
|
|
.await;
|
|
|
|
test.submit_turn("run js_repl twice").await?;
|
|
|
|
let req2 = second_mock.single_request();
|
|
let (first_output, first_success) = custom_tool_output_text_and_success(&req2, "call-1");
|
|
assert_ne!(
|
|
first_success,
|
|
Some(false),
|
|
"first js_repl call failed unexpectedly: {first_output}"
|
|
);
|
|
assert!(first_output.contains("41"));
|
|
|
|
let req3 = third_mock.single_request();
|
|
let (second_output, second_success) = custom_tool_output_text_and_success(&req3, "call-2");
|
|
assert_ne!(
|
|
second_success,
|
|
Some(false),
|
|
"second js_repl call failed unexpectedly: {second_output}"
|
|
);
|
|
assert!(second_output.contains("42"));
|
|
|
|
Ok(())
|
|
}
|
|
|
|
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
|
async fn js_repl_can_invoke_builtin_tools() -> Result<()> {
|
|
skip_if_no_network!(Ok(()));
|
|
|
|
let server = responses::start_mock_server().await;
|
|
let mock = run_js_repl_turn(
|
|
&server,
|
|
"use js_repl to call a tool",
|
|
&[(
|
|
"call-1",
|
|
"const toolOut = await codex.tool(\"list_mcp_resources\", {}); console.log(toolOut.type);",
|
|
)],
|
|
)
|
|
.await?;
|
|
|
|
let req = mock.single_request();
|
|
let (output, success) = custom_tool_output_text_and_success(&req, "call-1");
|
|
assert_ne!(
|
|
success,
|
|
Some(false),
|
|
"js_repl call failed unexpectedly: {output}"
|
|
);
|
|
assert!(output.contains("function_call_output"));
|
|
|
|
Ok(())
|
|
}
|
|
|
|
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
|
async fn js_repl_tool_call_rejects_recursive_js_repl_invocation() -> Result<()> {
|
|
skip_if_no_network!(Ok(()));
|
|
|
|
let server = responses::start_mock_server().await;
|
|
let mock = run_js_repl_turn(
|
|
&server,
|
|
"use js_repl recursively",
|
|
&[(
|
|
"call-1",
|
|
r#"
|
|
try {
|
|
await codex.tool("js_repl", "console.log('recursive')");
|
|
console.log("unexpected-success");
|
|
} catch (err) {
|
|
console.log(String(err));
|
|
}
|
|
"#,
|
|
)],
|
|
)
|
|
.await?;
|
|
|
|
let req = mock.single_request();
|
|
let (output, success) = custom_tool_output_text_and_success(&req, "call-1");
|
|
assert_ne!(
|
|
success,
|
|
Some(false),
|
|
"js_repl call failed unexpectedly: {output}"
|
|
);
|
|
assert!(
|
|
output.contains("js_repl cannot invoke itself"),
|
|
"expected recursion guard message, got output: {output}"
|
|
);
|
|
assert!(
|
|
!output.contains("unexpected-success"),
|
|
"recursive js_repl call unexpectedly succeeded: {output}"
|
|
);
|
|
|
|
Ok(())
|
|
}
|
|
|
|
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
|
async fn js_repl_does_not_expose_process_global() -> Result<()> {
|
|
skip_if_no_network!(Ok(()));
|
|
|
|
let server = responses::start_mock_server().await;
|
|
let mock = run_js_repl_turn(
|
|
&server,
|
|
"check process visibility",
|
|
&[("call-1", "console.log(typeof process);")],
|
|
)
|
|
.await?;
|
|
|
|
let req = mock.single_request();
|
|
let (output, success) = custom_tool_output_text_and_success(&req, "call-1");
|
|
assert_ne!(
|
|
success,
|
|
Some(false),
|
|
"js_repl call failed unexpectedly: {output}"
|
|
);
|
|
assert!(output.contains("undefined"));
|
|
|
|
Ok(())
|
|
}
|
|
|
|
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
|
async fn js_repl_blocks_sensitive_builtin_imports() -> Result<()> {
|
|
skip_if_no_network!(Ok(()));
|
|
|
|
let server = responses::start_mock_server().await;
|
|
let mock = run_js_repl_turn(
|
|
&server,
|
|
"import a blocked module",
|
|
&[("call-1", "await import(\"node:process\");")],
|
|
)
|
|
.await?;
|
|
|
|
let req = mock.single_request();
|
|
let (output, success) = custom_tool_output_text_and_success(&req, "call-1");
|
|
assert_ne!(
|
|
success,
|
|
Some(true),
|
|
"blocked import unexpectedly succeeded: {output}"
|
|
);
|
|
assert!(output.contains("Importing module \"node:process\" is not allowed in js_repl"));
|
|
|
|
Ok(())
|
|
}
|