mirror of
https://github.com/openai/codex.git
synced 2026-04-26 15:45:02 +00:00
Add output schema to MCP tools and expose MCP tool results in code mode (#14236)
Summary - drop `McpToolOutput` in favor of `CallToolResult`, moving its helpers to keep MCP tooling focused on the final result shape - wire the new schema definitions through code mode, context, handlers, and spec modules so MCP tools serialize the exact output shape expected by the model - extend code mode tests to cover multiple MCP call scenarios and ensure the serialized data matches the new schema - refresh JS runner helpers and protocol models alongside the schema changes Testing - Not run (not requested)
This commit is contained in:
committed by
Michael Bolin
parent
d5694529ca
commit
ee8f84153e
@@ -1,6 +1,8 @@
|
||||
#![allow(clippy::expect_used, clippy::unwrap_used)]
|
||||
|
||||
use anyhow::Result;
|
||||
use codex_core::config::types::McpServerConfig;
|
||||
use codex_core::config::types::McpServerTransportConfig;
|
||||
use codex_core::features::Feature;
|
||||
use core_test_support::responses;
|
||||
use core_test_support::responses::ResponseMock;
|
||||
@@ -11,11 +13,14 @@ use core_test_support::responses::ev_custom_tool_call;
|
||||
use core_test_support::responses::ev_response_created;
|
||||
use core_test_support::responses::sse;
|
||||
use core_test_support::skip_if_no_network;
|
||||
use core_test_support::stdio_server_bin;
|
||||
use core_test_support::test_codex::TestCodex;
|
||||
use core_test_support::test_codex::test_codex;
|
||||
use pretty_assertions::assert_eq;
|
||||
use serde_json::Value;
|
||||
use std::collections::HashMap;
|
||||
use std::fs;
|
||||
use std::time::Duration;
|
||||
use wiremock::MockServer;
|
||||
|
||||
fn custom_tool_output_text_and_success(
|
||||
@@ -63,6 +68,70 @@ async fn run_code_mode_turn(
|
||||
Ok((test, second_mock))
|
||||
}
|
||||
|
||||
async fn run_code_mode_turn_with_rmcp(
|
||||
server: &MockServer,
|
||||
prompt: &str,
|
||||
code: &str,
|
||||
) -> Result<(TestCodex, ResponseMock)> {
|
||||
let rmcp_test_server_bin = stdio_server_bin()?;
|
||||
let mut builder = test_codex().with_config(move |config| {
|
||||
let _ = config.features.enable(Feature::CodeMode);
|
||||
|
||||
let mut servers = config.mcp_servers.get().clone();
|
||||
servers.insert(
|
||||
"rmcp".to_string(),
|
||||
McpServerConfig {
|
||||
transport: McpServerTransportConfig::Stdio {
|
||||
command: rmcp_test_server_bin,
|
||||
args: Vec::new(),
|
||||
env: Some(HashMap::from([(
|
||||
"MCP_TEST_VALUE".to_string(),
|
||||
"propagated-env".to_string(),
|
||||
)])),
|
||||
env_vars: Vec::new(),
|
||||
cwd: None,
|
||||
},
|
||||
enabled: true,
|
||||
required: false,
|
||||
disabled_reason: None,
|
||||
startup_timeout_sec: Some(Duration::from_secs(10)),
|
||||
tool_timeout_sec: None,
|
||||
enabled_tools: None,
|
||||
disabled_tools: None,
|
||||
scopes: None,
|
||||
oauth_resource: None,
|
||||
},
|
||||
);
|
||||
config
|
||||
.mcp_servers
|
||||
.set(servers)
|
||||
.expect("test mcp servers should accept any configuration");
|
||||
});
|
||||
let test = builder.build(server).await?;
|
||||
|
||||
responses::mount_sse_once(
|
||||
server,
|
||||
sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_custom_tool_call("call-1", "code_mode", code),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
|
||||
let second_mock = responses::mount_sse_once(
|
||||
server,
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
|
||||
test.submit_turn(prompt).await?;
|
||||
Ok((test, second_mock))
|
||||
}
|
||||
|
||||
#[cfg_attr(windows, ignore = "no exec_command on Windows")]
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn code_mode_can_return_exec_command_output() -> Result<()> {
|
||||
@@ -135,3 +204,170 @@ async fn code_mode_can_apply_patch_via_nested_tool() -> Result<()> {
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn code_mode_can_print_structured_mcp_tool_result_fields() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = responses::start_mock_server().await;
|
||||
let code = r#"
|
||||
import { echo } from "tools/mcp/rmcp.js";
|
||||
|
||||
const { content, structuredContent, isError } = await echo({
|
||||
message: "ping",
|
||||
});
|
||||
add_content(
|
||||
`echo=${structuredContent?.echo ?? "missing"}\n` +
|
||||
`env=${structuredContent?.env ?? "missing"}\n` +
|
||||
`isError=${String(isError)}\n` +
|
||||
`contentLength=${content.length}`
|
||||
);
|
||||
"#;
|
||||
|
||||
let (_test, second_mock) =
|
||||
run_code_mode_turn_with_rmcp(&server, "use code_mode to run the rmcp echo tool", code)
|
||||
.await?;
|
||||
|
||||
let req = second_mock.single_request();
|
||||
let (output, success) = custom_tool_output_text_and_success(&req, "call-1");
|
||||
assert_ne!(
|
||||
success,
|
||||
Some(false),
|
||||
"code_mode rmcp echo call failed unexpectedly: {output}"
|
||||
);
|
||||
assert_eq!(
|
||||
output,
|
||||
"echo=ECHOING: ping
|
||||
env=propagated-env
|
||||
isError=false
|
||||
contentLength=0"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn code_mode_can_access_namespaced_mcp_tool_from_flat_tools_namespace() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = responses::start_mock_server().await;
|
||||
let code = r#"
|
||||
import { tools } from "tools.js";
|
||||
|
||||
const { structuredContent, isError } = await tools["mcp__rmcp__echo"]({
|
||||
message: "ping",
|
||||
});
|
||||
add_content(
|
||||
`echo=${structuredContent?.echo ?? "missing"}\n` +
|
||||
`env=${structuredContent?.env ?? "missing"}\n` +
|
||||
`isError=${String(isError)}`
|
||||
);
|
||||
"#;
|
||||
|
||||
let (_test, second_mock) =
|
||||
run_code_mode_turn_with_rmcp(&server, "use code_mode to run the rmcp echo tool", code)
|
||||
.await?;
|
||||
|
||||
let req = second_mock.single_request();
|
||||
let (output, success) = custom_tool_output_text_and_success(&req, "call-1");
|
||||
assert_ne!(
|
||||
success,
|
||||
Some(false),
|
||||
"code_mode rmcp echo call failed unexpectedly: {output}"
|
||||
);
|
||||
assert_eq!(
|
||||
output,
|
||||
"echo=ECHOING: ping
|
||||
env=propagated-env
|
||||
isError=false"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn code_mode_can_print_content_only_mcp_tool_result_fields() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = responses::start_mock_server().await;
|
||||
let code = r#"
|
||||
import { image_scenario } from "tools/mcp/rmcp.js";
|
||||
|
||||
const { content, structuredContent, isError } = await image_scenario({
|
||||
scenario: "text_only",
|
||||
caption: "caption from mcp",
|
||||
});
|
||||
add_content(
|
||||
`firstType=${content[0]?.type ?? "missing"}\n` +
|
||||
`firstText=${content[0]?.text ?? "missing"}\n` +
|
||||
`structuredContent=${String(structuredContent ?? null)}\n` +
|
||||
`isError=${String(isError)}`
|
||||
);
|
||||
"#;
|
||||
|
||||
let (_test, second_mock) = run_code_mode_turn_with_rmcp(
|
||||
&server,
|
||||
"use code_mode to run the rmcp image scenario tool",
|
||||
code,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let req = second_mock.single_request();
|
||||
let (output, success) = custom_tool_output_text_and_success(&req, "call-1");
|
||||
assert_ne!(
|
||||
success,
|
||||
Some(false),
|
||||
"code_mode rmcp image scenario call failed unexpectedly: {output}"
|
||||
);
|
||||
assert_eq!(
|
||||
output,
|
||||
"firstType=text
|
||||
firstText=caption from mcp
|
||||
structuredContent=null
|
||||
isError=false"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn code_mode_can_print_error_mcp_tool_result_fields() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = responses::start_mock_server().await;
|
||||
let code = r#"
|
||||
import { echo } from "tools/mcp/rmcp.js";
|
||||
|
||||
const { content, structuredContent, isError } = await echo({});
|
||||
const firstText = content[0]?.text ?? "";
|
||||
const mentionsMissingMessage =
|
||||
firstText.includes("missing field") && firstText.includes("message");
|
||||
add_content(
|
||||
`isError=${String(isError)}\n` +
|
||||
`contentLength=${content.length}\n` +
|
||||
`mentionsMissingMessage=${String(mentionsMissingMessage)}\n` +
|
||||
`structuredContent=${String(structuredContent ?? null)}`
|
||||
);
|
||||
"#;
|
||||
|
||||
let (_test, second_mock) =
|
||||
run_code_mode_turn_with_rmcp(&server, "use code_mode to call rmcp echo badly", code)
|
||||
.await?;
|
||||
|
||||
let req = second_mock.single_request();
|
||||
let (output, success) = custom_tool_output_text_and_success(&req, "call-1");
|
||||
assert_ne!(
|
||||
success,
|
||||
Some(false),
|
||||
"code_mode rmcp error call failed unexpectedly: {output}"
|
||||
);
|
||||
assert_eq!(
|
||||
output,
|
||||
"isError=true
|
||||
contentLength=1
|
||||
mentionsMissingMessage=true
|
||||
structuredContent=null"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user