mirror of
https://github.com/openai/codex.git
synced 2026-04-28 08:34:54 +00:00
[code mode] defer mcp tools from exec description (#17287)
## Summary - hide deferred MCP/app nested tool descriptions from the `exec` prompt in code mode - add short guidance that omitted nested tools are still available through `ALL_TOOLS` - cover the code_mode_only path with an integration test that discovers and calls a deferred app tool ## Motivation `code_mode_only` exposes only top-level `exec`/`wait`, but the `exec` description could still include a large nested-tool reference. This keeps deferred nested tools callable while avoiding that prompt bloat. ## Tests - `just fmt` - `just fix -p codex-code-mode` - `just fix -p codex-tools` - `cargo test -p codex-code-mode exec_description_mentions_deferred_nested_tools_when_available` - `cargo test -p codex-tools create_code_mode_tool_matches_expected_spec` - `cargo test -p codex-core code_mode_only_guides_all_tools_search_and_calls_deferred_app_tools`
This commit is contained in:
@@ -6,6 +6,8 @@ use base64::engine::general_purpose::STANDARD as BASE64_STANDARD;
|
||||
use codex_config::types::McpServerConfig;
|
||||
use codex_config::types::McpServerTransportConfig;
|
||||
use codex_features::Feature;
|
||||
use codex_login::CodexAuth;
|
||||
use codex_models_manager::bundled_models_response;
|
||||
use codex_protocol::dynamic_tools::DynamicToolCallOutputContentItem;
|
||||
use codex_protocol::dynamic_tools::DynamicToolResponse;
|
||||
use codex_protocol::dynamic_tools::DynamicToolSpec;
|
||||
@@ -14,6 +16,7 @@ use codex_protocol::protocol::EventMsg;
|
||||
use codex_protocol::protocol::Op;
|
||||
use codex_protocol::protocol::SandboxPolicy;
|
||||
use codex_protocol::user_input::UserInput;
|
||||
use core_test_support::apps_test_server::AppsTestServer;
|
||||
use core_test_support::assert_regex_match;
|
||||
use core_test_support::responses;
|
||||
use core_test_support::responses::ResponseMock;
|
||||
@@ -347,6 +350,131 @@ async fn code_mode_only_restricts_prompt_tools() -> Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn code_mode_only_guides_all_tools_search_and_calls_deferred_app_tools() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = responses::start_mock_server().await;
|
||||
let apps_server = AppsTestServer::mount_searchable(&server).await?;
|
||||
let resp_mock = responses::mount_sse_once(
|
||||
&server,
|
||||
sse(vec;
|
||||
text(JSON.stringify({
|
||||
found: true,
|
||||
isError: Boolean(result.isError),
|
||||
text: result.content?.[0]?.text ?? "",
|
||||
}));
|
||||
}
|
||||
"#,
|
||||
),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
let follow_up_mock = responses::mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
|
||||
let apps_base_url = apps_server.chatgpt_base_url.clone();
|
||||
let mut builder = test_codex()
|
||||
.with_auth(CodexAuth::create_dummy_chatgpt_auth_for_testing())
|
||||
.with_config(move |config| {
|
||||
config
|
||||
.features
|
||||
.enable(Feature::Apps)
|
||||
.expect("test config should allow feature update");
|
||||
config
|
||||
.features
|
||||
.enable(Feature::ToolSearch)
|
||||
.expect("test config should allow feature update");
|
||||
config
|
||||
.features
|
||||
.enable(Feature::CodeMode)
|
||||
.expect("test config should allow feature update");
|
||||
config
|
||||
.features
|
||||
.enable(Feature::CodeModeOnly)
|
||||
.expect("test config should allow feature update");
|
||||
config.chatgpt_base_url = apps_base_url;
|
||||
config.model = Some("gpt-5-codex".to_string());
|
||||
|
||||
let mut model_catalog = bundled_models_response()
|
||||
.unwrap_or_else(|err| panic!("bundled models.json should parse: {err}"));
|
||||
let model = model_catalog
|
||||
.models
|
||||
.iter_mut()
|
||||
.find(|model| model.slug == "gpt-5-codex")
|
||||
.expect("gpt-5-codex exists in bundled models.json");
|
||||
model.supports_search_tool = true;
|
||||
config.model_catalog = Some(model_catalog);
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
test.submit_turn("inspect tools in code mode only").await?;
|
||||
|
||||
let first_body = resp_mock.single_request().body_json();
|
||||
assert_eq!(
|
||||
tool_names(&first_body),
|
||||
vec!["exec".to_string(), "wait".to_string()]
|
||||
);
|
||||
|
||||
let exec_description = first_body
|
||||
.get("tools")
|
||||
.and_then(Value::as_array)
|
||||
.and_then(|tools| {
|
||||
tools.iter().find_map(|tool| {
|
||||
if tool
|
||||
.get("name")
|
||||
.or_else(|| tool.get("type"))
|
||||
.and_then(Value::as_str)
|
||||
== Some("exec")
|
||||
{
|
||||
tool.get("description").and_then(Value::as_str)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
})
|
||||
.expect("exec description should be present");
|
||||
assert!(exec_description.contains("filter `ALL_TOOLS` by `name` and `description`"));
|
||||
assert!(!exec_description.contains("calendar_timezone_option_99"));
|
||||
|
||||
let request = follow_up_mock.single_request();
|
||||
let (output, success) = custom_tool_output_body_and_success(&request, "call-1");
|
||||
assert_ne!(
|
||||
success,
|
||||
Some(false),
|
||||
"code_mode_only deferred app tool call failed unexpectedly: {output}"
|
||||
);
|
||||
let parsed: Value = serde_json::from_str(&output)?;
|
||||
assert_eq!(
|
||||
parsed,
|
||||
serde_json::json!({
|
||||
"found": true,
|
||||
"isError": false,
|
||||
"text": "called calendar_timezone_option_99 for at with ",
|
||||
})
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg_attr(windows, ignore = "no exec_command on Windows")]
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn code_mode_only_can_call_nested_tools() -> Result<()> {
|
||||
|
||||
Reference in New Issue
Block a user