Send sandbox state through MCP tool metadata (#17763)

## Changes

Allows MCPs to opt in to receiving sandbox config info through `_meta`
on model-initiated tool calls. This lets MCPs adhere to the thread's
sandbox if they choose to.

## Details

- Adds the `codex/sandbox-state-meta` experimental MCP capability.
- Tracks whether each MCP server advertises that capability.
- When a server opts in, `codex-core` injects the current `SandboxState`
into model-initiated MCP tool-call request `_meta`.

## Verification

- added an integration test for the capability
This commit is contained in:
aaronl-openai
2026-04-15 00:49:15 -07:00
committed by GitHub
parent e4a3612f11
commit 42528a905d
5 changed files with 209 additions and 6 deletions

View File

@@ -37,6 +37,7 @@ pub use mcp::with_codex_apps_mcp;
pub use mcp_connection_manager::CodexAppsToolsCacheKey;
pub use mcp_connection_manager::DEFAULT_STARTUP_TIMEOUT;
pub use mcp_connection_manager::MCP_SANDBOX_STATE_CAPABILITY;
pub use mcp_connection_manager::MCP_SANDBOX_STATE_META_CAPABILITY;
pub use mcp_connection_manager::MCP_SANDBOX_STATE_METHOD;
pub use mcp_connection_manager::McpConnectionManager;
pub use mcp_connection_manager::SandboxState;

View File

@@ -433,6 +433,7 @@ struct ManagedClient {
tool_timeout: Option<Duration>,
server_instructions: Option<String>,
server_supports_sandbox_state_capability: bool,
server_supports_sandbox_state_meta_capability: bool,
codex_apps_tools_cache_context: Option<CodexAppsToolsCacheContext>,
}
@@ -647,6 +648,10 @@ pub const MCP_SANDBOX_STATE_CAPABILITY: &str = "codex/sandbox-state";
/// When used, the `params` field of the notification is [`SandboxState`].
pub const MCP_SANDBOX_STATE_METHOD: &str = "codex/sandbox-state/update";
/// MCP server capability indicating that Codex should include [`SandboxState`]
/// in tool-call request `_meta` under this key.
pub const MCP_SANDBOX_STATE_META_CAPABILITY: &str = "codex/sandbox-state-meta";
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct SandboxState {
@@ -1142,6 +1147,16 @@ impl McpConnectionManager {
})
}
pub async fn server_supports_sandbox_state_meta_capability(
&self,
server: &str,
) -> Result<bool> {
Ok(self
.client_by_name(server)
.await?
.server_supports_sandbox_state_meta_capability)
}
/// List resources from the specified server.
pub async fn list_resources(
&self,
@@ -1479,6 +1494,12 @@ async fn start_server_task(
.as_ref()
.and_then(|exp| exp.get(MCP_SANDBOX_STATE_CAPABILITY))
.is_some();
let server_supports_sandbox_state_meta_capability = initialize_result
.capabilities
.experimental
.as_ref()
.and_then(|exp| exp.get(MCP_SANDBOX_STATE_META_CAPABILITY))
.is_some();
let list_start = Instant::now();
let fetch_start = Instant::now();
let tools = list_tools_for_client_uncached(
@@ -1515,6 +1536,7 @@ async fn start_server_task(
tool_filter,
server_instructions: initialize_result.instructions,
server_supports_sandbox_state_capability,
server_supports_sandbox_state_meta_capability,
codex_apps_tools_cache_context,
};

View File

@@ -36,6 +36,7 @@ use codex_analytics::build_track_events_context;
use codex_config::types::AppToolApproval;
use codex_features::Feature;
use codex_mcp::CODEX_APPS_MCP_SERVER_NAME;
use codex_mcp::SandboxState;
use codex_mcp::declared_openai_file_input_param_names;
use codex_mcp::mcp_permission_prompt_is_auto_approved;
use codex_otel::sanitize_metric_tag_value;
@@ -466,6 +467,10 @@ async fn execute_mcp_tool_call(
metadata.and_then(|metadata| metadata.openai_file_input_params.as_deref()),
)
.await?;
let request_meta =
augment_mcp_tool_request_meta_with_sandbox_state(sess, turn_context, server, request_meta)
.await
.map_err(|e| format!("failed to build MCP tool request metadata: {e:#}"))?;
let result = sess
.call_tool(server, tool_name, rewritten_arguments, request_meta)
.await
@@ -479,6 +484,52 @@ async fn execute_mcp_tool_call(
)
}
async fn augment_mcp_tool_request_meta_with_sandbox_state(
sess: &Session,
turn_context: &TurnContext,
server: &str,
mut meta: Option<serde_json::Value>,
) -> anyhow::Result<Option<serde_json::Value>> {
let supports_sandbox_state_meta = sess
.services
.mcp_connection_manager
.read()
.await
.server_supports_sandbox_state_meta_capability(server)
.await
.unwrap_or(false);
if !supports_sandbox_state_meta {
return Ok(meta);
}
let sandbox_state = serde_json::to_value(SandboxState {
sandbox_policy: turn_context.sandbox_policy.get().clone(),
codex_linux_sandbox_exe: turn_context.codex_linux_sandbox_exe.clone(),
sandbox_cwd: turn_context.cwd.to_path_buf(),
use_legacy_landlock: turn_context.features.use_legacy_landlock(),
})?;
match meta.as_mut() {
Some(serde_json::Value::Object(map)) => {
map.insert(
codex_mcp::MCP_SANDBOX_STATE_META_CAPABILITY.to_string(),
sandbox_state,
);
}
Some(_) => {}
None => {
let mut map = serde_json::Map::new();
map.insert(
codex_mcp::MCP_SANDBOX_STATE_META_CAPABILITY.to_string(),
sandbox_state,
);
meta = Some(serde_json::Value::Object(map));
}
}
Ok(meta)
}
async fn maybe_mark_thread_memory_mode_polluted(sess: &Session, turn_context: &TurnContext) {
if !turn_context
.config

View File

@@ -12,6 +12,7 @@ use std::time::UNIX_EPOCH;
use codex_config::types::McpServerConfig;
use codex_config::types::McpServerTransportConfig;
use codex_login::CodexAuth;
use codex_mcp::MCP_SANDBOX_STATE_META_CAPABILITY;
use codex_models_manager::manager::RefreshStrategy;
use codex_protocol::config_types::ReasoningSummary;
@@ -237,6 +238,105 @@ async fn stdio_server_round_trip() -> anyhow::Result<()> {
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 1)]
async fn stdio_mcp_tool_call_includes_sandbox_state_meta() -> anyhow::Result<()> {
skip_if_no_network!(Ok(()));
let server = responses::start_mock_server().await;
let call_id = "sandbox-meta-call";
let server_name = "rmcp";
let tool_name = format!("mcp__{server_name}__sandbox_meta");
mount_sse_once(
&server,
responses::sse(vec![
responses::ev_response_created("resp-1"),
responses::ev_function_call(call_id, &tool_name, "{}"),
responses::ev_completed("resp-1"),
]),
)
.await;
let final_mock = mount_sse_once(
&server,
responses::sse(vec![
responses::ev_assistant_message("msg-1", "rmcp sandbox meta completed successfully."),
responses::ev_completed("resp-2"),
]),
)
.await;
let rmcp_test_server_bin = stdio_server_bin()?;
let fixture = test_codex()
.with_config(move |config| {
let mut servers = config.mcp_servers.get().clone();
servers.insert(
server_name.to_string(),
McpServerConfig {
transport: McpServerTransportConfig::Stdio {
command: rmcp_test_server_bin,
args: Vec::new(),
env: None,
env_vars: Vec::new(),
cwd: None,
},
enabled: true,
required: false,
supports_parallel_tool_calls: false,
disabled_reason: None,
startup_timeout_sec: Some(Duration::from_secs(10)),
tool_timeout_sec: None,
enabled_tools: None,
disabled_tools: None,
scopes: None,
oauth_resource: None,
tools: HashMap::new(),
},
);
config
.mcp_servers
.set(servers)
.expect("test mcp servers should accept any configuration");
})
.build(&server)
.await?;
let sandbox_policy = SandboxPolicy::new_read_only_policy();
fixture
.submit_turn_with_policy("call the rmcp sandbox_meta tool", sandbox_policy.clone())
.await?;
let output_item = final_mock.single_request().function_call_output(call_id);
let output_text = output_item
.get("output")
.and_then(Value::as_str)
.expect("function_call_output output should be a string");
let wrapped_payload = split_wall_time_wrapped_output(output_text);
let output_json: Value = serde_json::from_str(wrapped_payload)
.expect("wrapped MCP output should preserve sandbox metadata JSON");
let Value::Object(meta) = output_json else {
panic!("sandbox_meta should return metadata object: {output_json:?}");
};
let sandbox_meta = meta
.get(MCP_SANDBOX_STATE_META_CAPABILITY)
.expect("sandbox state metadata should be present");
let expected_sandbox_policy = serde_json::to_value(&sandbox_policy)?;
assert_eq!(
sandbox_meta.get("sandboxPolicy"),
Some(&expected_sandbox_policy)
);
assert_eq!(
sandbox_meta.get("sandboxCwd").and_then(Value::as_str),
fixture.cwd.path().to_str()
);
assert_eq!(sandbox_meta.get("useLegacyLandlock"), Some(&json!(false)));
server.verify().await;
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn stdio_mcp_parallel_tool_calls_default_false_runs_serially() -> anyhow::Result<()> {
skip_if_no_network!(Ok(()));

View File

@@ -1,4 +1,5 @@
use std::borrow::Cow;
use std::collections::BTreeMap;
use std::collections::HashMap;
use std::collections::hash_map::Entry;
use std::sync::Arc;
@@ -41,6 +42,7 @@ struct TestToolServer {
const MEMO_URI: &str = "memo://codex/example-note";
const MEMO_CONTENT: &str = "This is a sample MCP resource served by the rmcp test server.";
const SANDBOX_STATE_META_CAPABILITY: &str = "codex/sandbox-state-meta";
const SMALL_PNG_BASE64: &str = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR4nGP4z8DwHwAFAAH/iZk9HQAAAABJRU5ErkJggg==";
pub fn stdio() -> (tokio::io::Stdin, tokio::io::Stdout) {
@@ -49,12 +51,27 @@ pub fn stdio() -> (tokio::io::Stdin, tokio::io::Stdout) {
impl TestToolServer {
fn new() -> Self {
#[expect(clippy::expect_used)]
let sandbox_meta_schema: JsonObject = serde_json::from_value(serde_json::json!({
"type": "object",
"properties": {},
"additionalProperties": false
}))
.expect("sandbox_meta tool schema should deserialize");
let mut sandbox_meta_tool = Tool::new(
Cow::Borrowed("sandbox_meta"),
Cow::Borrowed("Return the MCP request metadata received by this test server."),
Arc::new(sandbox_meta_schema),
);
sandbox_meta_tool.annotations = Some(ToolAnnotations::new().read_only(true));
let tools = vec![
Self::echo_tool(),
Self::echo_dash_tool(),
Self::sync_tool(),
Self::image_tool(),
Self::image_scenario_tool(),
sandbox_meta_tool,
];
let resources = vec![Self::memo_resource()];
let resource_templates = vec![Self::memo_template()];
@@ -341,12 +358,18 @@ struct ImageScenarioArgs {
impl ServerHandler for TestToolServer {
fn get_info(&self) -> ServerInfo {
let mut capabilities = ServerCapabilities::builder()
.enable_tools()
.enable_tool_list_changed()
.enable_resources()
.build();
capabilities.experimental = Some(BTreeMap::from([(
SANDBOX_STATE_META_CAPABILITY.to_string(),
JsonObject::new(),
)]));
ServerInfo {
capabilities: ServerCapabilities::builder()
.enable_tools()
.enable_tool_list_changed()
.enable_resources()
.build(),
capabilities,
..ServerInfo::default()
}
}
@@ -418,9 +441,15 @@ impl ServerHandler for TestToolServer {
async fn call_tool(
&self,
request: CallToolRequestParams,
_context: rmcp::service::RequestContext<rmcp::service::RoleServer>,
context: rmcp::service::RequestContext<rmcp::service::RoleServer>,
) -> Result<CallToolResult, McpError> {
match request.name.as_ref() {
"sandbox_meta" => Ok(CallToolResult {
content: Vec::new(),
structured_content: Some(serde_json::Value::Object(context.meta.0)),
is_error: Some(false),
meta: None,
}),
"echo" | "echo-tool" => {
let args: EchoArgs = match request.arguments {
Some(arguments) => serde_json::from_value(serde_json::Value::Object(