diff --git a/codex-rs/app-server/tests/suite/v2/app_list.rs b/codex-rs/app-server/tests/suite/v2/app_list.rs index aa7b67a574..039fffe5cd 100644 --- a/codex-rs/app-server/tests/suite/v2/app_list.rs +++ b/codex-rs/app-server/tests/suite/v2/app_list.rs @@ -316,6 +316,73 @@ connectors = false Ok(()) } +#[tokio::test] +async fn list_apps_keeps_apps_with_app_only_tools_accessible() -> Result<()> { + let connectors = vec![AppInfo { + id: "beta".to_string(), + name: "Beta".to_string(), + description: Some("Beta connector".to_string()), + logo_url: None, + logo_url_dark: None, + distribution_channel: None, + branding: None, + app_metadata: None, + labels: None, + install_url: None, + is_accessible: false, + is_enabled: true, + plugin_display_names: Vec::new(), + }]; + let mut app_only_tool = connector_tool("beta", "Beta App")?; + app_only_tool + .meta + .as_mut() + .expect("connector tool should include metadata") + .0 + .insert("ui".to_string(), json!({ "visibility": ["app"] })); + let tools = vec![app_only_tool]; + let (server_url, server_handle) = + start_apps_server_with_delays(connectors, tools, Duration::ZERO, Duration::ZERO).await?; + + let codex_home = TempDir::new()?; + write_connectors_config(codex_home.path(), &server_url)?; + write_chatgpt_auth( + codex_home.path(), + ChatGptAuthFixture::new("chatgpt-token") + .account_id("account-123") + .chatgpt_user_id("user-app-only") + .chatgpt_account_id("account-123"), + AuthCredentialsStoreMode::File, + )?; + + let mut mcp = McpProcess::new(codex_home.path()).await?; + timeout(DEFAULT_TIMEOUT, mcp.initialize()).await??; + + let request_id = mcp + .send_apps_list_request(AppsListParams { + limit: None, + cursor: None, + thread_id: None, + force_refetch: true, + }) + .await?; + let response: JSONRPCResponse = timeout( + DEFAULT_TIMEOUT, + mcp.read_stream_until_response_message(RequestId::Integer(request_id)), + ) + .await??; + let AppsListResponse { data, next_cursor } = to_response(response)?; + + assert_eq!(data.len(), 1); + assert_eq!(data[0].id, "beta"); + assert!(data[0].is_accessible); + assert!(next_cursor.is_none()); + + server_handle.abort(); + let _ = server_handle.await; + Ok(()) +} + #[tokio::test] async fn list_apps_reports_is_enabled_from_config() -> Result<()> { let connectors = vec![AppInfo { diff --git a/codex-rs/codex-mcp/src/connection_manager.rs b/codex-rs/codex-mcp/src/connection_manager.rs index d4a35eab1e..d15a2e1500 100644 --- a/codex-rs/codex-mcp/src/connection_manager.rs +++ b/codex-rs/codex-mcp/src/connection_manager.rs @@ -64,6 +64,7 @@ use rmcp::model::ReadResourceResult; use rmcp::model::RequestId; use rmcp::model::Resource; use rmcp::model::ResourceTemplate; +use serde_json::Value as JsonValue; use tokio::task::JoinSet; use tokio_util::sync::CancellationToken; use tracing::Instrument; @@ -72,6 +73,34 @@ use tracing::trace; use tracing::trace_span; use tracing::warn; +const MCP_UI_META_KEY: &str = "ui"; +const MCP_UI_VISIBILITY_META_KEY: &str = "visibility"; +const MCP_UI_MODEL_VISIBILITY: &str = "model"; + +/// Returns whether a tool may be included in model-facing tool declarations. +/// +/// Tools without visibility metadata remain visible. +/// Tools with visibility metadata are hidden unless they explicitly include `model`. +/// +/// +pub fn tool_is_model_visible(tool: &ToolInfo) -> bool { + let Some(visibility) = tool + .tool + .meta + .as_deref() + .and_then(|meta| meta.get(MCP_UI_META_KEY)) + .and_then(JsonValue::as_object) + .and_then(|ui| ui.get(MCP_UI_VISIBILITY_META_KEY)) + .and_then(JsonValue::as_array) + else { + return true; + }; + + visibility + .iter() + .any(|target| target.as_str() == Some(MCP_UI_MODEL_VISIBILITY)) +} + /// A thin wrapper around a set of running [`RmcpClient`] instances. pub struct McpConnectionManager { clients: HashMap, diff --git a/codex-rs/codex-mcp/src/lib.rs b/codex-rs/codex-mcp/src/lib.rs index 7b45bcc7a7..83419175d4 100644 --- a/codex-rs/codex-mcp/src/lib.rs +++ b/codex-rs/codex-mcp/src/lib.rs @@ -1,4 +1,5 @@ pub use connection_manager::McpConnectionManager; +pub use connection_manager::tool_is_model_visible; pub use elicitation::ElicitationReviewRequest; pub use elicitation::ElicitationReviewer; pub use elicitation::ElicitationReviewerHandle; diff --git a/codex-rs/core/src/mcp_tool_exposure.rs b/codex-rs/core/src/mcp_tool_exposure.rs index e58be65f9c..5dc66af734 100644 --- a/codex-rs/core/src/mcp_tool_exposure.rs +++ b/codex-rs/core/src/mcp_tool_exposure.rs @@ -3,6 +3,7 @@ use std::collections::HashSet; use codex_features::Feature; use codex_mcp::CODEX_APPS_MCP_SERVER_NAME; use codex_mcp::ToolInfo as McpToolInfo; +use codex_mcp::tool_is_model_visible; use crate::config::Config; use crate::connectors; @@ -51,7 +52,9 @@ pub(crate) fn build_mcp_tool_exposure( fn filter_non_codex_apps_mcp_tools_only(mcp_tools: &[McpToolInfo]) -> Vec { mcp_tools .iter() - .filter(|tool| tool.server_name != CODEX_APPS_MCP_SERVER_NAME) + .filter(|tool| { + tool.server_name != CODEX_APPS_MCP_SERVER_NAME && tool_is_model_visible(tool) + }) .cloned() .collect() } @@ -72,6 +75,9 @@ fn filter_codex_apps_mcp_tools( if tool.server_name != CODEX_APPS_MCP_SERVER_NAME { return false; } + if !tool_is_model_visible(tool) { + return false; + } let Some(connector_id) = tool.connector_id.as_deref() else { return false; }; diff --git a/codex-rs/core/src/mcp_tool_exposure_test.rs b/codex-rs/core/src/mcp_tool_exposure_test.rs index c23e1e3765..4918f768aa 100644 --- a/codex-rs/core/src/mcp_tool_exposure_test.rs +++ b/codex-rs/core/src/mcp_tool_exposure_test.rs @@ -7,6 +7,7 @@ use codex_mcp::ToolInfo; use codex_tools::ToolName; use pretty_assertions::assert_eq; use rmcp::model::JsonObject; +use rmcp::model::Meta; use rmcp::model::Tool; use super::*; @@ -80,6 +81,16 @@ fn tool_names(tools: &[ToolInfo]) -> HashSet { .collect() } +fn with_visibility(mut tool: ToolInfo, visibility: &[&str]) -> ToolInfo { + tool.tool.meta = Some(Meta( + serde_json::json!({ "ui": { "visibility": visibility } }) + .as_object() + .expect("metadata object") + .clone(), + )); + tool +} + #[tokio::test] async fn directly_exposes_small_effective_tool_sets() { let config = test_config().await; @@ -93,6 +104,84 @@ async fn directly_exposes_small_effective_tool_sets() { assert!(exposure.deferred_tools.is_none()); } +#[tokio::test] +async fn excludes_tools_hidden_from_model_exposure() { + let config = test_config().await; + let visible_tool = make_mcp_tool( + "rmcp", + "visible_tool", + "mcp__rmcp", + "visible_tool", + /*connector_id*/ None, + /*connector_name*/ None, + ); + let hidden_tool = with_visibility( + make_mcp_tool( + "rmcp", + "hidden_tool", + "mcp__rmcp", + "hidden_tool", + /*connector_id*/ None, + /*connector_name*/ None, + ), + &["app"], + ); + let empty_visibility_tool = with_visibility( + make_mcp_tool( + "rmcp", + "empty_visibility_tool", + "mcp__rmcp", + "empty_visibility_tool", + /*connector_id*/ None, + /*connector_name*/ None, + ), + &[], + ); + let visible_app_tool = with_visibility( + make_mcp_tool( + CODEX_APPS_MCP_SERVER_NAME, + "calendar_read", + "mcp__codex_apps__calendar", + "read", + Some("calendar"), + Some("Calendar"), + ), + &["app", "model"], + ); + let hidden_app_tool = with_visibility( + make_mcp_tool( + CODEX_APPS_MCP_SERVER_NAME, + "calendar_open", + "mcp__codex_apps__calendar", + "open", + Some("calendar"), + Some("Calendar"), + ), + &["app"], + ); + let mcp_tools = vec![ + visible_tool.clone(), + hidden_tool, + empty_visibility_tool, + visible_app_tool.clone(), + hidden_app_tool, + ]; + let connectors = vec![make_connector("calendar", "Calendar")]; + + let exposure = build_mcp_tool_exposure( + &mcp_tools, + Some(connectors.as_slice()), + &config, + /*search_tool_enabled*/ false, + ); + + assert_eq!( + tool_names(&exposure.direct_tools), + tool_names(&[visible_tool, visible_app_tool]) + ); + assert!(exposure.deferred_tools.is_none()); +} + #[tokio::test] async fn searches_large_effective_tool_sets() { let config = test_config().await; diff --git a/codex-rs/core/tests/common/apps_test_server.rs b/codex-rs/core/tests/common/apps_test_server.rs index 2f2875867b..7f7873e30b 100644 --- a/codex-rs/core/tests/common/apps_test_server.rs +++ b/codex-rs/core/tests/common/apps_test_server.rs @@ -27,12 +27,15 @@ const SERVER_NAME: &str = "codex-apps-test"; const SERVER_VERSION: &str = "1.0.0"; const SEARCHABLE_TOOL_COUNT: usize = 100; const CALENDAR_CREATE_EVENT_TOOL_NAME: &str = "calendar_create_event"; +const CALENDAR_APP_ONLY_TOOL_NAME: &str = "calendar_app_only_action"; pub const CALENDAR_EXTRACT_TEXT_TOOL_NAME: &str = "calendar_extract_text"; const CALENDAR_LIST_EVENTS_TOOL_NAME: &str = "calendar_list_events"; pub const DIRECT_CALENDAR_CREATE_EVENT_TOOL: &str = "mcp__codex_apps__calendar__create_event"; +pub const DIRECT_CALENDAR_APP_ONLY_TOOL: &str = "mcp__codex_apps__calendar__app_only_action"; pub const DIRECT_CALENDAR_LIST_EVENTS_TOOL: &str = "mcp__codex_apps__calendar__list_events"; pub const DIRECT_CALENDAR_EXTRACT_TEXT_TOOL: &str = "mcp__codex_apps__calendar__extract_text"; pub const SEARCH_CALENDAR_NAMESPACE: &str = "mcp__codex_apps__calendar"; +pub const SEARCH_CALENDAR_APP_ONLY_TOOL: &str = "_app_only_action"; pub const SEARCH_CALENDAR_CREATE_TOOL: &str = "_create_event"; pub const SEARCH_CALENDAR_EXTRACT_TEXT_TOOL: &str = "_extract_text"; pub const SEARCH_CALENDAR_LIST_TOOL: &str = "_list_events"; @@ -49,6 +52,12 @@ pub struct AppsTestServer { pub chatgpt_base_url: String, } +#[derive(Clone, Copy)] +pub enum AppsTestToolLoading { + Direct, + Searchable, +} + impl AppsTestServer { pub async fn mount(server: &MockServer) -> Result { Self::mount_with_connector_name(server, CONNECTOR_NAME).await @@ -62,6 +71,7 @@ impl AppsTestServer { CONNECTOR_NAME.to_string(), CONNECTOR_DESCRIPTION.to_string(), /*searchable*/ true, + /*include_app_only_tool*/ false, ) .await; Ok(Self { @@ -80,6 +90,26 @@ impl AppsTestServer { connector_name.to_string(), CONNECTOR_DESCRIPTION.to_string(), /*searchable*/ false, + /*include_app_only_tool*/ false, + ) + .await; + Ok(Self { + chatgpt_base_url: server.uri(), + }) + } + + pub async fn mount_with_app_only_tool( + server: &MockServer, + tool_loading: AppsTestToolLoading, + ) -> Result { + mount_oauth_metadata(server).await; + mount_connectors_directory(server).await; + mount_streamable_http_json_rpc( + server, + CONNECTOR_NAME.to_string(), + CONNECTOR_DESCRIPTION.to_string(), + matches!(tool_loading, AppsTestToolLoading::Searchable), + /*include_app_only_tool*/ true, ) .await; Ok(Self { @@ -136,7 +166,7 @@ fn apps_tool_call_id(body: &Value) -> Option<&str> { .as_str() } -async fn recorded_apps_tool_calls(server: &MockServer) -> Vec { +pub async fn recorded_apps_tool_calls(server: &MockServer) -> Vec { server .received_requests() .await @@ -233,6 +263,7 @@ async fn mount_streamable_http_json_rpc( connector_name: String, connector_description: String, searchable: bool, + include_app_only_tool: bool, ) { Mock::given(method("POST")) .and(path_regex("^/api/codex/apps/?$")) @@ -240,6 +271,7 @@ async fn mount_streamable_http_json_rpc( connector_name, connector_description, searchable, + include_app_only_tool, }) .mount(server) .await; @@ -249,6 +281,7 @@ struct CodexAppsJsonRpcResponder { connector_name: String, connector_description: String, searchable: bool, + include_app_only_tool: bool, } impl Respond for CodexAppsJsonRpcResponder { @@ -419,6 +452,29 @@ impl Respond for CodexAppsJsonRpcResponder { })); } } + if self.include_app_only_tool + && let Some(tools) = response + .pointer_mut("/result/tools") + .and_then(Value::as_array_mut) + { + tools.push(json!({ + "name": CALENDAR_APP_ONLY_TOOL_NAME, + "description": "Open a calendar app-only action.", + "inputSchema": { + "type": "object", + "properties": {}, + "additionalProperties": false + }, + "_meta": { + "connector_id": CONNECTOR_ID, + "connector_name": self.connector_name.clone(), + "connector_description": self.connector_description.clone(), + "ui": { + "visibility": ["app"] + } + } + })); + } ResponseTemplate::new(200).set_body_json(response) } "tools/call" => { diff --git a/codex-rs/core/tests/suite/code_mode.rs b/codex-rs/core/tests/suite/code_mode.rs index 90daf21963..cbf6993bc9 100644 --- a/codex-rs/core/tests/suite/code_mode.rs +++ b/codex-rs/core/tests/suite/code_mode.rs @@ -18,6 +18,10 @@ use codex_protocol::protocol::EventMsg; use codex_protocol::protocol::Op; use codex_protocol::user_input::UserInput; use core_test_support::apps_test_server::AppsTestServer; +use core_test_support::apps_test_server::AppsTestToolLoading; +use core_test_support::apps_test_server::DIRECT_CALENDAR_APP_ONLY_TOOL; +use core_test_support::apps_test_server::recorded_apps_tool_calls; +use core_test_support::apps_test_server::search_capable_apps_builder; use core_test_support::assert_regex_match; use core_test_support::responses; use core_test_support::responses::ResponseMock; @@ -495,6 +499,92 @@ if (!tool) { Ok(()) } +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn app_only_tools_are_not_visible_or_runnable_by_code_mode_model() -> Result<()> { + skip_if_no_network!(Ok(())); + + let server = responses::start_mock_server().await; + let apps_server = + AppsTestServer::mount_with_app_only_tool(&server, AppsTestToolLoading::Searchable).await?; + let code = format!( + r#" +const visibleTool = ALL_TOOLS.find(({{ name }}) => name === {visible_tool_name:?}); +const tool = ALL_TOOLS.find(({{ name }}) => name === {tool_name:?}); +let error = null; +try {{ + await tools[{tool_name:?}]({{}}); +}} catch (caught) {{ + error = String(caught); +}} +text(JSON.stringify({{ + visibleListed: visibleTool !== undefined, + listed: tool !== undefined, + callable: typeof tools[{tool_name:?}] === "function", + error, +}})); +"#, + visible_tool_name = "mcp__codex_apps__calendar_timezone_option_99", + tool_name = DIRECT_CALENDAR_APP_ONLY_TOOL, + ); + + responses::mount_sse_once( + &server, + sse(vec![ + ev_response_created("resp-1"), + ev_custom_tool_call("call-1", "exec", &code), + ev_completed("resp-1"), + ]), + ) + .await; + let second_mock = responses::mount_sse_once( + &server, + sse(vec![ + ev_assistant_message("msg-1", "done"), + ev_completed("resp-2"), + ]), + ) + .await; + + let mut builder = search_capable_apps_builder(apps_server.chatgpt_base_url.clone()) + .with_config(|config| { + config + .features + .enable(Feature::CodeMode) + .expect("test config should allow feature update"); + config + .features + .enable(Feature::CodeModeOnly) + .expect("test config should allow feature update"); + }); + let test = builder.build(&server).await?; + test.submit_turn("try to call the app-only calendar tool through exec") + .await?; + + let req = second_mock.single_request(); + let (output, success) = custom_tool_output_body_and_success(&req, "call-1"); + assert_ne!( + success, + Some(false), + "code mode visibility check should complete successfully: {output}" + ); + let parsed: Value = serde_json::from_str(&output)?; + assert_eq!(parsed["visibleListed"], true); + assert_eq!(parsed["listed"], false); + assert_eq!(parsed["callable"], false); + assert!( + parsed["error"] + .as_str() + .is_some_and(|error| error.contains("is not a function")), + "app-only code mode call should fail before MCP dispatch: {parsed:?}" + ); + assert!( + recorded_apps_tool_calls(&server).await.is_empty(), + "app-only code mode call should not reach the MCP server" + ); + + Ok(()) +} + #[cfg_attr(windows, ignore = "no exec_command on Windows")] #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn code_mode_only_can_call_nested_tools() -> Result<()> { diff --git a/codex-rs/core/tests/suite/search_tool.rs b/codex-rs/core/tests/suite/search_tool.rs index e9ffeb62ef..c548ded95c 100644 --- a/codex-rs/core/tests/suite/search_tool.rs +++ b/codex-rs/core/tests/suite/search_tool.rs @@ -17,16 +17,20 @@ use codex_protocol::protocol::McpInvocation; use codex_protocol::protocol::Op; use codex_protocol::user_input::UserInput; use core_test_support::apps_test_server::AppsTestServer; +use core_test_support::apps_test_server::AppsTestToolLoading; use core_test_support::apps_test_server::CALENDAR_CREATE_EVENT_MCP_APP_RESOURCE_URI; use core_test_support::apps_test_server::CALENDAR_CREATE_EVENT_RESOURCE_URI; use core_test_support::apps_test_server::DIRECT_CALENDAR_CREATE_EVENT_TOOL as CALENDAR_CREATE_TOOL; use core_test_support::apps_test_server::DIRECT_CALENDAR_LIST_EVENTS_TOOL as CALENDAR_LIST_TOOL; +use core_test_support::apps_test_server::SEARCH_CALENDAR_APP_ONLY_TOOL; use core_test_support::apps_test_server::SEARCH_CALENDAR_CREATE_TOOL; use core_test_support::apps_test_server::SEARCH_CALENDAR_LIST_TOOL; use core_test_support::apps_test_server::SEARCH_CALENDAR_NAMESPACE; +use core_test_support::apps_test_server::apps_enabled_builder; use core_test_support::apps_test_server::configure_search_capable_apps; use core_test_support::apps_test_server::configure_search_capable_model; use core_test_support::apps_test_server::recorded_apps_tool_call_by_call_id; +use core_test_support::apps_test_server::recorded_apps_tool_calls; use core_test_support::apps_test_server::search_capable_apps_builder as configured_builder; use core_test_support::responses::ResponsesRequest; use core_test_support::responses::ev_assistant_message; @@ -216,6 +220,80 @@ async fn always_defer_feature_hides_small_app_tool_sets() -> Result<()> { Ok(()) } +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn app_only_tools_are_not_visible_or_runnable_by_direct_model_calls() -> Result<()> { + skip_if_no_network!(Ok(())); + + let server = start_mock_server().await; + let apps_server = + AppsTestServer::mount_with_app_only_tool(&server, AppsTestToolLoading::Direct).await?; + let call_id = "app-only-direct-call"; + let mock = mount_sse_sequence( + &server, + vec![ + sse(vec![ + ev_response_created("resp-1"), + ev_function_call_with_namespace( + call_id, + SEARCH_CALENDAR_NAMESPACE, + SEARCH_CALENDAR_APP_ONLY_TOOL, + "{}", + ), + ev_completed("resp-1"), + ]), + sse(vec![ + ev_response_created("resp-2"), + ev_assistant_message("msg-1", "done"), + ev_completed("resp-2"), + ]), + ], + ) + .await; + + let mut builder = apps_enabled_builder(apps_server.chatgpt_base_url.clone()); + let test = builder.build(&server).await?; + test.submit_turn_with_approval_and_permission_profile( + "Try to call the app-only calendar tool.", + AskForApproval::Never, + PermissionProfile::Disabled, + ) + .await?; + + let requests = mock.requests(); + assert!( + namespace_child_tool( + &requests[0].body_json(), + SEARCH_CALENDAR_NAMESPACE, + SEARCH_CALENDAR_CREATE_TOOL + ) + .is_some(), + "visible tool from the app-only tool's connector should be declared" + ); + assert!( + namespace_child_tool( + &requests[0].body_json(), + SEARCH_CALENDAR_NAMESPACE, + SEARCH_CALENDAR_APP_ONLY_TOOL + ) + .is_none(), + "app-only tool should not be declared to a direct model" + ); + assert!( + requests[1] + .function_call_output(call_id) + .get("output") + .and_then(Value::as_str) + .is_some_and(|output| output.contains("unsupported call")), + "forced app-only direct call should not dispatch" + ); + assert!( + recorded_apps_tool_calls(&server).await.is_empty(), + "forced app-only direct call should not reach the MCP server" + ); + + Ok(()) +} + #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn app_search_sources_are_hidden_for_api_key_auth() -> Result<()> { skip_if_no_network!(Ok(()));