mirror of
https://github.com/openai/codex.git
synced 2026-04-24 22:54:54 +00:00
Add js_repl_tools_only model and routing restrictions (#10671)
# External (non-OpenAI) Pull Request Requirements Before opening this Pull Request, please read the dedicated "Contributing" markdown file or your PR may be closed: https://github.com/openai/codex/blob/main/docs/contributing.md If your PR conforms to our contribution guidelines, replace this text with a detailed and high quality description of your changes. Include a link to a bug report or enhancement request. #### [git stack](https://github.com/magus/git-stack-cli) - ✅ `1` https://github.com/openai/codex/pull/10674 - ✅ `2` https://github.com/openai/codex/pull/10672 - 👉 `3` https://github.com/openai/codex/pull/10671 - ⏳ `4` https://github.com/openai/codex/pull/10673 - ⏳ `5` https://github.com/openai/codex/pull/10670
This commit is contained in:
committed by
GitHub
parent
a7ce2a1c31
commit
0dcfc59171
@@ -224,6 +224,9 @@
|
||||
"js_repl": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"js_repl_tools_only": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"memory_tool": {
|
||||
"type": "boolean"
|
||||
},
|
||||
@@ -1308,6 +1311,9 @@
|
||||
"js_repl": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"js_repl_tools_only": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"memory_tool": {
|
||||
"type": "boolean"
|
||||
},
|
||||
|
||||
@@ -5515,6 +5515,7 @@ mod tests {
|
||||
use crate::tools::handlers::ShellHandler;
|
||||
use crate::tools::handlers::UnifiedExecHandler;
|
||||
use crate::tools::registry::ToolHandler;
|
||||
use crate::tools::router::ToolCallSource;
|
||||
use crate::turn_diff_tracker::TurnDiffTracker;
|
||||
use codex_app_server_protocol::AppInfo;
|
||||
use codex_otel::TelemetryAuthMode;
|
||||
@@ -7452,6 +7453,7 @@ mod tests {
|
||||
Arc::clone(&turn_context),
|
||||
tracker,
|
||||
call,
|
||||
ToolCallSource::Direct,
|
||||
)
|
||||
.await
|
||||
.expect_err("expected fatal error");
|
||||
|
||||
@@ -80,6 +80,8 @@ pub enum Feature {
|
||||
// Experimental
|
||||
/// Enable JavaScript REPL tools backed by a persistent Node kernel.
|
||||
JsRepl,
|
||||
/// Only expose js_repl tools directly to the model.
|
||||
JsReplToolsOnly,
|
||||
/// Use the single unified PTY-backed exec tool.
|
||||
UnifiedExec,
|
||||
/// Include the freeform apply_patch tool.
|
||||
@@ -322,6 +324,10 @@ impl Features {
|
||||
}
|
||||
|
||||
overrides.apply(&mut features);
|
||||
if features.enabled(Feature::JsReplToolsOnly) && !features.enabled(Feature::JsRepl) {
|
||||
tracing::warn!("js_repl_tools_only requires js_repl; disabling js_repl_tools_only");
|
||||
features.disable(Feature::JsReplToolsOnly);
|
||||
}
|
||||
|
||||
features
|
||||
}
|
||||
@@ -430,6 +436,12 @@ pub const FEATURES: &[FeatureSpec] = &[
|
||||
stage: Stage::UnderDevelopment,
|
||||
default_enabled: false,
|
||||
},
|
||||
FeatureSpec {
|
||||
id: Feature::JsReplToolsOnly,
|
||||
key: "js_repl_tools_only",
|
||||
stage: Stage::UnderDevelopment,
|
||||
default_enabled: false,
|
||||
},
|
||||
FeatureSpec {
|
||||
id: Feature::WebSearchRequest,
|
||||
key: "web_search_request",
|
||||
|
||||
@@ -48,6 +48,12 @@ fn render_js_repl_instructions(config: &Config) -> Option<String> {
|
||||
section.push_str("- Top-level bindings persist across cells. If you hit `SyntaxError: Identifier 'x' has already been declared`, reuse the binding, pick a new name, wrap in `{ ... }` for block scope, or reset the kernel with `js_repl_reset`.\n");
|
||||
section.push_str("- Top-level static import declarations (for example `import x from \"pkg\"`) are currently unsupported in `js_repl`; use dynamic imports with `await import(\"pkg\")` instead.\n");
|
||||
|
||||
if config.features.enabled(Feature::JsReplToolsOnly) {
|
||||
section.push_str("- Do not call tools directly; use `js_repl` + `codex.tool(...)` for all tool calls, including shell commands.\n");
|
||||
section
|
||||
.push_str("- MCP tools (if any) can also be called by name via `codex.tool(...)`.\n");
|
||||
}
|
||||
|
||||
section.push_str("- Avoid direct access to `process.stdout` / `process.stderr` / `process.stdin`; it can corrupt the JSON line protocol. Use `console.log` and `codex.tool(...)`.");
|
||||
|
||||
Some(section)
|
||||
@@ -413,6 +419,21 @@ mod tests {
|
||||
assert_eq!(res, expected);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn js_repl_tools_only_instructions_are_feature_gated() {
|
||||
let tmp = tempfile::tempdir().expect("tempdir");
|
||||
let mut cfg = make_config(&tmp, 4096, None).await;
|
||||
cfg.features
|
||||
.enable(Feature::JsRepl)
|
||||
.enable(Feature::JsReplToolsOnly);
|
||||
|
||||
let res = get_user_instructions(&cfg, None)
|
||||
.await
|
||||
.expect("js_repl instructions expected");
|
||||
let expected = "## JavaScript REPL (Node)\n- Use `js_repl` for Node-backed JavaScript with top-level await in a persistent kernel. `codex.state` persists for the session (best effort) and is cleared by `js_repl_reset`.\n- `js_repl` is a freeform/custom tool. Direct `js_repl` calls must send raw JavaScript tool input (optionally with first-line `// codex-js-repl: timeout_ms=15000`). Do not wrap code in JSON (for example `{\"code\":\"...\"}`), quotes, or markdown code fences.\n- Helpers: `codex.state`, `codex.tmpDir`, and `codex.tool(name, args?)`.\n- `codex.tool` executes a normal tool call and resolves to the raw tool output object. Use it for shell and non-shell tools alike.\n- To share generated images with the model, write a file under `codex.tmpDir`, call `await codex.tool(\"view_image\", { path: \"/absolute/path\" })`, then delete the file.\n- Top-level bindings persist across cells. If you hit `SyntaxError: Identifier 'x' has already been declared`, reuse the binding, pick a new name, wrap in `{ ... }` for block scope, or reset the kernel with `js_repl_reset`.\n- Top-level static import declarations (for example `import x from \"pkg\"`) are currently unsupported in `js_repl`; use dynamic imports with `await import(\"pkg\")` instead.\n- Do not call tools directly; use `js_repl` + `codex.tool(...)` for all tool calls, including shell commands.\n- MCP tools (if any) can also be called by name via `codex.tool(...)`.\n- Avoid direct access to `process.stdout` / `process.stderr` / `process.stdin`; it can corrupt the JSON line protocol. Use `console.log` and `codex.tool(...)`.";
|
||||
assert_eq!(res, expected);
|
||||
}
|
||||
|
||||
/// When both system instructions *and* a project doc are present the two
|
||||
/// should be concatenated with the separator.
|
||||
#[tokio::test]
|
||||
|
||||
@@ -680,7 +680,13 @@ impl JsReplManager {
|
||||
};
|
||||
|
||||
match router
|
||||
.dispatch_tool_call(exec.session, exec.turn, exec.tracker, call)
|
||||
.dispatch_tool_call(
|
||||
exec.session,
|
||||
exec.turn,
|
||||
exec.tracker,
|
||||
call,
|
||||
crate::tools::router::ToolCallSource::JsRepl,
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(response) => match serde_json::to_value(response) {
|
||||
|
||||
@@ -85,7 +85,13 @@ impl ToolCallRuntime {
|
||||
};
|
||||
|
||||
router
|
||||
.dispatch_tool_call(session, turn, tracker, call.clone())
|
||||
.dispatch_tool_call(
|
||||
session,
|
||||
turn,
|
||||
tracker,
|
||||
call.clone(),
|
||||
crate::tools::router::ToolCallSource::Direct,
|
||||
)
|
||||
.instrument(dispatch_span.clone())
|
||||
.await
|
||||
} => res,
|
||||
|
||||
@@ -28,6 +28,12 @@ pub struct ToolCall {
|
||||
pub payload: ToolPayload,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
|
||||
pub enum ToolCallSource {
|
||||
Direct,
|
||||
JsRepl,
|
||||
}
|
||||
|
||||
pub struct ToolRouter {
|
||||
registry: ToolRegistry,
|
||||
specs: Vec<ConfiguredToolSpec>,
|
||||
@@ -138,6 +144,7 @@ impl ToolRouter {
|
||||
turn: Arc<TurnContext>,
|
||||
tracker: SharedTurnDiffTracker,
|
||||
call: ToolCall,
|
||||
source: ToolCallSource,
|
||||
) -> Result<ResponseInputItem, FunctionCallError> {
|
||||
let ToolCall {
|
||||
tool_name,
|
||||
@@ -147,6 +154,21 @@ impl ToolRouter {
|
||||
let payload_outputs_custom = matches!(payload, ToolPayload::Custom { .. });
|
||||
let failure_call_id = call_id.clone();
|
||||
|
||||
if source == ToolCallSource::Direct
|
||||
&& turn.tools_config.js_repl_tools_only
|
||||
&& !matches!(tool_name.as_str(), "js_repl" | "js_repl_reset")
|
||||
{
|
||||
let err = FunctionCallError::RespondToModel(
|
||||
"direct tool calls are disabled; use js_repl and codex.tool(...) instead"
|
||||
.to_string(),
|
||||
);
|
||||
return Ok(Self::failure_response(
|
||||
failure_call_id,
|
||||
payload_outputs_custom,
|
||||
err,
|
||||
));
|
||||
}
|
||||
|
||||
let invocation = ToolInvocation {
|
||||
session,
|
||||
turn,
|
||||
@@ -189,3 +211,118 @@ impl ToolRouter {
|
||||
}
|
||||
}
|
||||
}
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use crate::codex::make_session_and_context;
|
||||
use crate::tools::context::ToolPayload;
|
||||
use crate::turn_diff_tracker::TurnDiffTracker;
|
||||
use codex_protocol::models::ResponseInputItem;
|
||||
|
||||
use super::ToolCall;
|
||||
use super::ToolCallSource;
|
||||
use super::ToolRouter;
|
||||
|
||||
#[tokio::test]
|
||||
async fn js_repl_tools_only_blocks_direct_tool_calls() -> anyhow::Result<()> {
|
||||
let (session, mut turn) = make_session_and_context().await;
|
||||
turn.tools_config.js_repl_tools_only = true;
|
||||
|
||||
let session = Arc::new(session);
|
||||
let turn = Arc::new(turn);
|
||||
let mcp_tools = session
|
||||
.services
|
||||
.mcp_connection_manager
|
||||
.read()
|
||||
.await
|
||||
.list_all_tools()
|
||||
.await;
|
||||
let router = ToolRouter::from_config(
|
||||
&turn.tools_config,
|
||||
Some(
|
||||
mcp_tools
|
||||
.into_iter()
|
||||
.map(|(name, tool)| (name, tool.tool))
|
||||
.collect(),
|
||||
),
|
||||
turn.dynamic_tools.as_slice(),
|
||||
);
|
||||
|
||||
let call = ToolCall {
|
||||
tool_name: "shell".to_string(),
|
||||
call_id: "call-1".to_string(),
|
||||
payload: ToolPayload::Function {
|
||||
arguments: "{}".to_string(),
|
||||
},
|
||||
};
|
||||
let tracker = Arc::new(tokio::sync::Mutex::new(TurnDiffTracker::new()));
|
||||
let response = router
|
||||
.dispatch_tool_call(session, turn, tracker, call, ToolCallSource::Direct)
|
||||
.await?;
|
||||
|
||||
match response {
|
||||
ResponseInputItem::FunctionCallOutput { output, .. } => {
|
||||
let content = output.text_content().unwrap_or_default();
|
||||
assert!(
|
||||
content.contains("direct tool calls are disabled"),
|
||||
"unexpected tool call message: {content}",
|
||||
);
|
||||
}
|
||||
other => panic!("expected function call output, got {other:?}"),
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn js_repl_tools_only_allows_js_repl_source_calls() -> anyhow::Result<()> {
|
||||
let (session, mut turn) = make_session_and_context().await;
|
||||
turn.tools_config.js_repl_tools_only = true;
|
||||
|
||||
let session = Arc::new(session);
|
||||
let turn = Arc::new(turn);
|
||||
let mcp_tools = session
|
||||
.services
|
||||
.mcp_connection_manager
|
||||
.read()
|
||||
.await
|
||||
.list_all_tools()
|
||||
.await;
|
||||
let router = ToolRouter::from_config(
|
||||
&turn.tools_config,
|
||||
Some(
|
||||
mcp_tools
|
||||
.into_iter()
|
||||
.map(|(name, tool)| (name, tool.tool))
|
||||
.collect(),
|
||||
),
|
||||
turn.dynamic_tools.as_slice(),
|
||||
);
|
||||
|
||||
let call = ToolCall {
|
||||
tool_name: "shell".to_string(),
|
||||
call_id: "call-2".to_string(),
|
||||
payload: ToolPayload::Function {
|
||||
arguments: "{}".to_string(),
|
||||
},
|
||||
};
|
||||
let tracker = Arc::new(tokio::sync::Mutex::new(TurnDiffTracker::new()));
|
||||
let response = router
|
||||
.dispatch_tool_call(session, turn, tracker, call, ToolCallSource::JsRepl)
|
||||
.await?;
|
||||
|
||||
match response {
|
||||
ResponseInputItem::FunctionCallOutput { output, .. } => {
|
||||
let content = output.text_content().unwrap_or_default();
|
||||
assert!(
|
||||
!content.contains("direct tool calls are disabled"),
|
||||
"js_repl source should bypass direct-call policy gate"
|
||||
);
|
||||
}
|
||||
other => panic!("expected function call output, got {other:?}"),
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -34,6 +34,7 @@ pub(crate) struct ToolsConfig {
|
||||
pub web_search_mode: Option<WebSearchMode>,
|
||||
pub search_tool: bool,
|
||||
pub js_repl_enabled: bool,
|
||||
pub js_repl_tools_only: bool,
|
||||
pub collab_tools: bool,
|
||||
pub collaboration_modes_tools: bool,
|
||||
pub request_rule_enabled: bool,
|
||||
@@ -55,6 +56,8 @@ impl ToolsConfig {
|
||||
} = params;
|
||||
let include_apply_patch_tool = features.enabled(Feature::ApplyPatchFreeform);
|
||||
let include_js_repl = features.enabled(Feature::JsRepl);
|
||||
let include_js_repl_tools_only =
|
||||
include_js_repl && features.enabled(Feature::JsReplToolsOnly);
|
||||
let include_collab_tools = features.enabled(Feature::Collab);
|
||||
let include_collaboration_modes_tools = features.enabled(Feature::CollaborationModes);
|
||||
let request_rule_enabled = features.enabled(Feature::RequestRule);
|
||||
@@ -91,6 +94,7 @@ impl ToolsConfig {
|
||||
web_search_mode: *web_search_mode,
|
||||
search_tool: include_search_tool,
|
||||
js_repl_enabled: include_js_repl,
|
||||
js_repl_tools_only: include_js_repl_tools_only,
|
||||
collab_tools: include_collab_tools,
|
||||
collaboration_modes_tools: include_collaboration_modes_tools,
|
||||
request_rule_enabled,
|
||||
@@ -99,8 +103,15 @@ impl ToolsConfig {
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn filter_tools_for_model(tools: Vec<ToolSpec>, _config: &ToolsConfig) -> Vec<ToolSpec> {
|
||||
pub(crate) fn filter_tools_for_model(tools: Vec<ToolSpec>, config: &ToolsConfig) -> Vec<ToolSpec> {
|
||||
if !config.js_repl_tools_only {
|
||||
return tools;
|
||||
}
|
||||
|
||||
tools
|
||||
.into_iter()
|
||||
.filter(|spec| matches!(spec.name(), "js_repl" | "js_repl_reset"))
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Generic JSON‑Schema subset needed for our tool definitions
|
||||
@@ -1685,6 +1696,27 @@ mod tests {
|
||||
}
|
||||
}
|
||||
|
||||
fn assert_contains_tool_specs(tools: &[ToolSpec], expected_subset: &[&str]) {
|
||||
use std::collections::HashSet;
|
||||
let mut names = HashSet::new();
|
||||
let mut duplicates = Vec::new();
|
||||
for name in tools.iter().map(tool_name) {
|
||||
if !names.insert(name) {
|
||||
duplicates.push(name);
|
||||
}
|
||||
}
|
||||
assert!(
|
||||
duplicates.is_empty(),
|
||||
"duplicate tool entries detected: {duplicates:?}"
|
||||
);
|
||||
for expected in expected_subset {
|
||||
assert!(
|
||||
names.contains(expected),
|
||||
"expected tool {expected} to be present; had: {names:?}"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
fn shell_tool_name(config: &ToolsConfig) -> Option<&'static str> {
|
||||
match config.shell_type {
|
||||
ConfigShellToolType::Default => Some("shell"),
|
||||
@@ -1910,6 +1942,77 @@ mod tests {
|
||||
assert_contains_tool_names(&tools, &["js_repl", "js_repl_reset"]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn js_repl_tools_only_filters_model_tools() {
|
||||
let config = test_config();
|
||||
let model_info =
|
||||
ModelsManager::construct_model_info_offline_for_tests("gpt-5-codex", &config);
|
||||
let mut features = Features::with_defaults();
|
||||
features.enable(Feature::JsRepl);
|
||||
features.enable(Feature::JsReplToolsOnly);
|
||||
|
||||
let tools_config = ToolsConfig::new(&ToolsConfigParams {
|
||||
model_info: &model_info,
|
||||
features: &features,
|
||||
web_search_mode: Some(WebSearchMode::Cached),
|
||||
});
|
||||
let (tools, _) = build_specs(&tools_config, None, &[]).build();
|
||||
let filtered = filter_tools_for_model(
|
||||
tools.iter().map(|tool| tool.spec.clone()).collect(),
|
||||
&tools_config,
|
||||
);
|
||||
assert_contains_tool_specs(&filtered, &["js_repl", "js_repl_reset"]);
|
||||
assert!(
|
||||
!filtered.iter().any(|tool| tool_name(tool) == "shell"),
|
||||
"expected non-js_repl tools to be hidden when js_repl_tools_only is enabled"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn js_repl_tools_only_hides_dynamic_tools_from_model_tools() {
|
||||
let config = test_config();
|
||||
let model_info =
|
||||
ModelsManager::construct_model_info_offline_for_tests("gpt-5-codex", &config);
|
||||
let mut features = Features::with_defaults();
|
||||
features.enable(Feature::JsRepl);
|
||||
features.enable(Feature::JsReplToolsOnly);
|
||||
|
||||
let tools_config = ToolsConfig::new(&ToolsConfigParams {
|
||||
model_info: &model_info,
|
||||
features: &features,
|
||||
web_search_mode: Some(WebSearchMode::Cached),
|
||||
});
|
||||
let dynamic_tools = vec![DynamicToolSpec {
|
||||
name: "dynamic_echo".to_string(),
|
||||
description: "echo dynamic payload".to_string(),
|
||||
input_schema: serde_json::json!({
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"text": {"type": "string"}
|
||||
},
|
||||
"required": ["text"],
|
||||
"additionalProperties": false
|
||||
}),
|
||||
}];
|
||||
let (tools, _) = build_specs(&tools_config, None, &dynamic_tools).build();
|
||||
assert!(
|
||||
tools.iter().any(|tool| tool.spec.name() == "dynamic_echo"),
|
||||
"expected dynamic tool in full router specs"
|
||||
);
|
||||
|
||||
let filtered = filter_tools_for_model(
|
||||
tools.iter().map(|tool| tool.spec.clone()).collect(),
|
||||
&tools_config,
|
||||
);
|
||||
assert!(
|
||||
!filtered
|
||||
.iter()
|
||||
.any(|tool| tool_name(tool) == "dynamic_echo"),
|
||||
"expected dynamic tools to be hidden from direct model tools in js_repl_tools_only mode"
|
||||
);
|
||||
assert_contains_tool_specs(&filtered, &["js_repl", "js_repl_reset"]);
|
||||
}
|
||||
|
||||
fn assert_model_tools(
|
||||
model_slug: &str,
|
||||
features: &Features,
|
||||
|
||||
@@ -11,6 +11,16 @@
|
||||
js_repl = true
|
||||
```
|
||||
|
||||
`js_repl_tools_only` can be enabled to force direct model tool calls through `js_repl`:
|
||||
|
||||
```toml
|
||||
[features]
|
||||
js_repl = true
|
||||
js_repl_tools_only = true
|
||||
```
|
||||
|
||||
When enabled, direct model tool calls are restricted to `js_repl` and `js_repl_reset`; other tools remain available via `await codex.tool(...)` inside js_repl.
|
||||
|
||||
## Node runtime
|
||||
|
||||
`js_repl` requires a Node version that meets or exceeds `codex-rs/node-version.txt`.
|
||||
|
||||
Reference in New Issue
Block a user