mirror of
https://github.com/openai/codex.git
synced 2026-04-28 08:34:54 +00:00
Add code_mode_only feature (#14617)
Summary - add the code_mode_only feature flag/config schema and wire its dependency on code_mode - update code mode tool descriptions to list nested tools with detailed headers - restrict available tools for prompt and exec descriptions when code_mode_only is enabled and test the behavior Testing - Not run (not requested)
This commit is contained in:
@@ -37,6 +37,23 @@ fn custom_tool_output_items(req: &ResponsesRequest, call_id: &str) -> Vec<Value>
|
||||
}
|
||||
}
|
||||
|
||||
fn tool_names(body: &Value) -> Vec<String> {
|
||||
body.get("tools")
|
||||
.and_then(Value::as_array)
|
||||
.map(|tools| {
|
||||
tools
|
||||
.iter()
|
||||
.filter_map(|tool| {
|
||||
tool.get("name")
|
||||
.or_else(|| tool.get("type"))
|
||||
.and_then(Value::as_str)
|
||||
.map(str::to_string)
|
||||
})
|
||||
.collect()
|
||||
})
|
||||
.unwrap_or_default()
|
||||
}
|
||||
|
||||
fn function_tool_output_items(req: &ResponsesRequest, call_id: &str) -> Vec<Value> {
|
||||
match req.function_call_output(call_id).get("output") {
|
||||
Some(Value::Array(items)) => items.clone(),
|
||||
@@ -233,6 +250,86 @@ text(JSON.stringify(await tools.exec_command({ cmd: "printf code_mode_exec_marke
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn code_mode_only_restricts_prompt_tools() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = responses::start_mock_server().await;
|
||||
let resp_mock = responses::mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
let _ = config.features.enable(Feature::CodeModeOnly);
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
test.submit_turn("list tools in code mode only").await?;
|
||||
|
||||
let first_body = resp_mock.single_request().body_json();
|
||||
assert_eq!(
|
||||
tool_names(&first_body),
|
||||
vec!["exec".to_string(), "exec_wait".to_string()]
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg_attr(windows, ignore = "no exec_command on Windows")]
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn code_mode_only_can_call_nested_tools() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = responses::start_mock_server().await;
|
||||
responses::mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_custom_tool_call(
|
||||
"call-1",
|
||||
"exec",
|
||||
r#"
|
||||
const output = await tools.exec_command({ cmd: "printf code_mode_only_nested_tool_marker" });
|
||||
text(output.output);
|
||||
"#,
|
||||
),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
let follow_up_mock = responses::mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
let _ = config.features.enable(Feature::CodeModeOnly);
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
test.submit_turn("use exec to run nested tool in code mode only")
|
||||
.await?;
|
||||
|
||||
let request = follow_up_mock.single_request();
|
||||
let (output, success) = custom_tool_output_body_and_success(&request, "call-1");
|
||||
assert_ne!(
|
||||
success,
|
||||
Some(false),
|
||||
"code_mode_only nested tool call failed unexpectedly: {output}"
|
||||
);
|
||||
assert_eq!(output, "code_mode_only_nested_tool_marker");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg_attr(windows, ignore = "flaky on windows")]
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn code_mode_nested_tool_calls_can_run_in_parallel() -> Result<()> {
|
||||
|
||||
Reference in New Issue
Block a user