feat: support mcp_servers in config.toml

This commit is contained in:
Michael Bolin
2025-05-06 14:07:24 -07:00
parent 49d040215a
commit a4e59988db
12 changed files with 483 additions and 15 deletions

View File

@@ -1,4 +1,5 @@
use std::collections::BTreeMap;
use std::collections::HashMap;
use std::io::BufRead;
use std::path::Path;
use std::pin::Pin;
@@ -13,6 +14,7 @@ use futures::prelude::*;
use reqwest::StatusCode;
use serde::Deserialize;
use serde::Serialize;
use serde_json::json;
use serde_json::Value;
use tokio::sync::mpsc;
use tokio::time::timeout;
@@ -42,6 +44,11 @@ pub struct Prompt {
pub instructions: Option<String>,
/// Whether to store response on server side (disable_response_storage = !store).
pub store: bool,
/// Additional tools sourced from external MCP servers. Note eachthe key is
/// the "fully qualified" tool name (i.e., prefixed with the server name),
/// which should be reported to the model in place of Tool::name.
pub extra_tools: HashMap<String, mcp_types::Tool>,
}
#[derive(Debug)]
@@ -59,7 +66,7 @@ struct Payload<'a> {
// we code defensively to avoid this case, but perhaps we should use a
// separate enum for serialization.
input: &'a Vec<ResponseItem>,
tools: &'a [Tool],
tools: &'a [serde_json::Value],
tool_choice: &'static str,
parallel_tool_calls: bool,
reasoning: Option<Reasoning>,
@@ -78,7 +85,7 @@ struct Reasoning {
}
#[derive(Debug, Serialize)]
struct Tool {
struct ToolInternal {
name: &'static str,
#[serde(rename = "type")]
kind: &'static str, // "function"
@@ -105,7 +112,7 @@ enum JsonSchema {
}
/// Tool usage specification
static TOOLS: LazyLock<Vec<Tool>> = LazyLock::new(|| {
static TOOLS_INTERNAL: LazyLock<Vec<ToolInternal>> = LazyLock::new(|| {
let mut properties = BTreeMap::new();
properties.insert(
"command".to_string(),
@@ -116,7 +123,7 @@ static TOOLS: LazyLock<Vec<Tool>> = LazyLock::new(|| {
properties.insert("workdir".to_string(), JsonSchema::String);
properties.insert("timeout".to_string(), JsonSchema::Number);
vec![Tool {
vec![ToolInternal {
name: "shell",
kind: "function",
description: "Runs a shell command, and returns its output.",
@@ -149,11 +156,26 @@ impl ModelClient {
return stream_from_fixture(path).await;
}
// Assemble tool list: built-in tools + any extra tools from the prompt.
let mut tools_json: Vec<serde_json::Value> = TOOLS_INTERNAL
.iter()
.map(|t| serde_json::to_value(t).expect("serialize builtin tool"))
.collect();
tools_json.extend(
prompt
.extra_tools
.clone()
.into_iter()
.map(|(name, tool)| mcp_tool_to_openai_tool(name, tool)),
);
debug!("tools_json: {}", serde_json::to_string_pretty(&tools_json)?);
let payload = Payload {
model: &self.model,
instructions: prompt.instructions.as_ref(),
input: &prompt.input,
tools: &TOOLS,
tools: &tools_json,
tool_choice: "auto",
parallel_tool_calls: false,
reasoning: Some(Reasoning {
@@ -235,6 +257,18 @@ impl ModelClient {
}
}
fn mcp_tool_to_openai_tool(
fully_qualified_name: String,
tool: mcp_types::Tool,
) -> serde_json::Value {
json!({
"name": fully_qualified_name,
"description": tool.description,
"parameters": tool.input_schema,
"type": "function",
})
}
#[derive(Debug, Deserialize, Serialize)]
struct SseEvent {
#[serde(rename = "type")]

View File

@@ -38,6 +38,10 @@ use crate::exec::ExecParams;
use crate::exec::ExecToolCallOutput;
use crate::exec::SandboxType;
use crate::flags::OPENAI_STREAM_MAX_RETRIES;
use crate::mcp_connection_manager::create_mcp_connection_manager;
use crate::mcp_connection_manager::try_parse_fully_qualified_tool_name;
use crate::mcp_connection_manager::McpConnectionManager;
use crate::mcp_tool_call::handle_mcp_tool_call;
use crate::models::ContentItem;
use crate::models::FunctionCallOutputPayload;
use crate::models::ResponseInputItem;
@@ -188,9 +192,9 @@ impl Recorder {
/// Context for an initialized model agent
///
/// A session has at most 1 running task at a time, and can be interrupted by user input.
struct Session {
pub(crate) struct Session {
client: ModelClient,
tx_event: Sender<Event>,
pub(crate) tx_event: Sender<Event>,
ctrl_c: Arc<Notify>,
/// The session's current working directory. All relative paths provided by
@@ -202,6 +206,9 @@ struct Session {
sandbox_policy: SandboxPolicy,
writable_roots: Mutex<Vec<PathBuf>>,
/// Manager for external MCP servers/tools.
pub(crate) mcp: crate::mcp_connection_manager::McpConnectionManager,
/// External notifier command (will be passed as args to exec()). When
/// `None` this feature is disabled.
notify: Option<Vec<String>>,
@@ -433,7 +440,7 @@ impl State {
}
/// A series of Turns in response to user input.
struct AgentTask {
pub(crate) struct AgentTask {
sess: Arc<Session>,
sub_id: String,
handle: AbortHandle,
@@ -554,6 +561,30 @@ async fn submission_loop(
};
let writable_roots = Mutex::new(get_writable_roots(&cwd));
// Load config to initialise the MCP connection manager.
let config = match crate::config::Config::load_with_overrides(
crate::config::ConfigOverrides::default(),
) {
Ok(cfg) => cfg,
Err(e) => {
error!("Failed to load config for MCP servers: {e:#}");
// Fall back to empty server map so the session can still proceed.
crate::config::Config::load_default_config_for_test()
}
};
let mcp = match create_mcp_connection_manager(config.mcp_servers.clone()).await {
Ok(mgr) => mgr,
Err(e) => {
error!("Failed to create MCP connection manager: {e:#}");
// Use an empty manager so we can still continue.
McpConnectionManager::new(HashMap::new())
.await
.expect("empty manager should never fail")
}
};
sess = Some(Arc::new(Session {
client,
tx_event: tx_event.clone(),
@@ -565,6 +596,7 @@ async fn submission_loop(
writable_roots,
notify,
state: Mutex::new(state),
mcp,
}));
// ack
@@ -753,11 +785,15 @@ async fn run_turn(
} else {
None
};
let extra_tools = sess.mcp.list_all_tools();
let prompt = Prompt {
input,
prev_id,
instructions,
store,
extra_tools,
};
let mut retries = 0;
@@ -1141,13 +1177,20 @@ async fn handle_function_call(
}
}
_ => {
// Unknown function: reply with structured failure so the model can adapt.
ResponseInputItem::FunctionCallOutput {
call_id,
output: crate::models::FunctionCallOutputPayload {
content: format!("unsupported call: {}", name),
success: None,
},
match try_parse_fully_qualified_tool_name(&name) {
Some((server, tool_name)) => {
handle_mcp_tool_call(sess, &sub_id, call_id, server, tool_name, arguments).await
}
None => {
// Unknown function: reply with structured failure so the model can adapt.
ResponseInputItem::FunctionCallOutput {
call_id,
output: crate::models::FunctionCallOutputPayload {
content: format!("unsupported call: {}", name),
success: None,
},
}
}
}
}
}

View File

@@ -1,9 +1,11 @@
use crate::flags::OPENAI_DEFAULT_MODEL;
use crate::mcp_server_config::McpServerConfig;
use crate::protocol::AskForApproval;
use crate::protocol::SandboxPermission;
use crate::protocol::SandboxPolicy;
use dirs::home_dir;
use serde::Deserialize;
use std::collections::HashMap;
use std::path::PathBuf;
/// Embedded fallback instructions that mirror the TypeScript CLIs default
@@ -56,6 +58,9 @@ pub struct Config {
/// for the session. All relative paths inside the business-logic layer are
/// resolved against this path.
pub cwd: PathBuf,
/// Definition for MCP servers that Codex can reach out to for tool calls.
pub mcp_servers: HashMap<String, McpServerConfig>,
}
/// Base config deserialized from ~/.codex/config.toml.
@@ -84,6 +89,10 @@ pub struct ConfigToml {
/// System instructions.
pub instructions: Option<String>,
/// Definition for MCP servers that Codex can reach out to for tool calls.
#[serde(default)]
pub mcp_servers: HashMap<String, McpServerConfig>,
}
impl ConfigToml {
@@ -212,6 +221,7 @@ impl Config {
.unwrap_or(false),
notify: cfg.notify,
instructions,
mcp_servers: cfg.mcp_servers,
}
}

View File

@@ -15,6 +15,9 @@ mod flags;
mod is_safe_command;
#[cfg(target_os = "linux")]
pub mod linux;
mod mcp_connection_manager;
pub mod mcp_server_config;
mod mcp_tool_call;
mod models;
pub mod protocol;
mod safety;

View File

@@ -0,0 +1,192 @@
//! Connection manager for Model Context Protocol (MCP) servers.
//!
//! The [`McpConnectionManager`] owns one [`codex_mcp_client::McpClient`] per
//! configured server (keyed by the *server name*). It offers convenience
//! helpers to query the available tools across *all* servers and returns them
//! in a single aggregated map using the fully-qualified tool name
//! `"<server><MCP_TOOL_NAME_DELIMITER><tool>"` as the key.
use std::collections::HashMap;
use anyhow::anyhow;
use anyhow::Result;
use codex_mcp_client::McpClient;
use mcp_types::Tool;
use tokio::task::JoinSet;
use tracing::info;
use tracing::warn;
use crate::mcp_server_config::McpServerConfig;
/// Delimiter used to separate the server name from the tool name in a fully
/// qualified tool name.
///
/// OpenAI requires tool names to conform to `^[a-zA-Z0-9_-]+$`, so we must
/// choose a delimiter from this character set.
const MCP_TOOL_NAME_DELIMITER: &str = "__OAI_CODEX_MCP__";
fn fully_qualified_tool_name(server: &str, tool: &str) -> String {
format!("{server}{MCP_TOOL_NAME_DELIMITER}{tool}")
}
pub(crate) fn try_parse_fully_qualified_tool_name(fq_name: &str) -> Option<(String, String)> {
let (server, tool) = fq_name.split_once(MCP_TOOL_NAME_DELIMITER)?;
if server.is_empty() || tool.is_empty() {
return None;
}
Some((server.to_string(), tool.to_string()))
}
/// A thin wrapper around a set of running [`McpClient`] instances.
///
/// The struct is intentionally lightweight cloning just clones the internal
/// `HashMap` of clients which in turn clones the `Arc`s of each client.
pub(crate) struct McpConnectionManager {
/// Server-name → client instance.
///
/// The server name originates from the keys of the `mcp_servers` map in
/// the user configuration.
clients: HashMap<String, std::sync::Arc<McpClient>>, // Arc to cheaply clone
tools: HashMap<String, Tool>,
}
impl McpConnectionManager {
/// Spawn a [`McpClient`] for each configured server.
///
/// * `mcp_servers` Map loaded from the user configuration where *keys*
/// are human-readable server identifiers and *values* are the spawn
/// instructions.
pub async fn new(mcp_servers: HashMap<String, McpServerConfig>) -> Result<Self> {
// Early exit if no servers are configured.
if mcp_servers.is_empty() {
return Ok(Self {
clients: HashMap::new(),
tools: HashMap::new(),
});
}
// Spin up all servers concurrently.
let mut join_set = JoinSet::new();
// Spawn tasks to launch each server.
for (server_name, cfg) in mcp_servers {
// Perform slash validation up-front so we can return early without
// spawning any tasks when the name is invalid.
if server_name.contains('/') {
return Err(anyhow!(
"MCP server name '{server_name}' must not contain a forward slash (/)"
));
}
join_set.spawn(async move {
let McpServerConfig { command, args, env } = cfg;
let client_res = McpClient::new_stdio_client(command, args, env).await;
(server_name, client_res)
});
}
let mut clients: HashMap<String, std::sync::Arc<McpClient>> = HashMap::new();
while let Some(res) = join_set.join_next().await {
let (server_name, client_res) = res?; // propagate JoinError
let client = client_res
.map_err(|e| anyhow!("failed to spawn MCP server '{server_name}': {e}"))?;
clients.insert(server_name, std::sync::Arc::new(client));
}
let tools = list_all_tools(&clients).await?;
Ok(Self { clients, tools })
}
/// Returns a single map that contains **all** tools. Each key is the
/// fully-qualified name for the tool.
pub fn list_all_tools(&self) -> HashMap<String, Tool> {
self.tools.clone()
}
/// Route a fully-qualified tool call to the matching server.
pub async fn call_tool(
&self,
server: &str,
tool: &str,
arguments: Option<serde_json::Value>,
) -> Result<mcp_types::CallToolResult> {
let client = self
.clients
.get(server)
.ok_or_else(|| anyhow!("unknown MCP server '{server}'"))?
.clone();
client
.call_tool(tool.to_string(), arguments)
.await
.map_err(|e| anyhow!("tool call failed for '{server}/{tool}': {e}"))
}
}
/// Query every server for its available tools and return a single map that
/// contains **all** tools. Each key is the fully-qualified name for the tool.
pub async fn list_all_tools(
clients: &HashMap<String, std::sync::Arc<McpClient>>,
) -> Result<HashMap<String, Tool>> {
let mut join_set = JoinSet::new();
// Spawn one task per server so we can query them concurrently. This
// keeps the overall latency roughly at the slowest server instead of
// the cumulative latency.
for (server_name, client) in clients {
let server_name_cloned = server_name.clone();
let client_clone = client.clone();
join_set.spawn(async move {
let res = client_clone.list_tools(None).await;
(server_name_cloned, res)
});
}
let mut aggregated: HashMap<String, Tool> = HashMap::new();
while let Some(join_res) = join_set.join_next().await {
let (server_name, list_result) = join_res?; // propagate JoinError
let list_result = list_result?;
for tool in list_result.tools {
if tool.name.contains('/') {
warn!(
server = %server_name,
tool_name = %tool.name,
"tool name contains '/' skipping to avoid ambiguity"
);
continue;
}
let fq_name = fully_qualified_tool_name(&server_name, &tool.name);
if aggregated.insert(fq_name.clone(), tool).is_some() {
warn!("tool name collision for '{fq_name}' overwriting previous entry");
}
}
}
info!(
"aggregated {} tools from {} servers",
aggregated.len(),
clients.len()
);
Ok(aggregated)
}
/// Convenience helper that mirrors the previous `create_mcp_connection_manager`
/// free-standing function but returns `Result` and is **async**. Existing
/// call-sites can continue to call the function while new code can use the
/// `McpConnectionManager::new` associated function directly.
pub(crate) async fn create_mcp_connection_manager(
mcp_servers: HashMap<String, McpServerConfig>,
) -> Result<McpConnectionManager> {
McpConnectionManager::new(mcp_servers).await
}

View File

@@ -0,0 +1,14 @@
use std::collections::HashMap;
use serde::Deserialize;
#[derive(Deserialize, Debug, Clone)]
pub struct McpServerConfig {
pub command: String,
#[serde(default)]
pub args: Vec<String>,
#[serde(default)]
pub env: Option<HashMap<String, String>>,
}

View File

@@ -0,0 +1,94 @@
use tracing::error;
use crate::codex::Session;
use crate::models::FunctionCallOutputPayload;
use crate::models::ResponseInputItem;
use crate::protocol::Event;
use crate::protocol::EventMsg;
/// Handles the specified tool call dispatches the appropriate
/// `McpToolCallBegin` and `McpToolCallEnd` events to the `Session`.
pub(crate) async fn handle_mcp_tool_call(
sess: &Session,
sub_id: &str,
call_id: String,
server: String,
tool_name: String,
arguments: String,
) -> ResponseInputItem {
// Attempt to route to external MCP server.
let arguments_value: Option<serde_json::Value> = serde_json::from_str(&arguments).ok();
let tool_call_begin_event = EventMsg::McpToolCallBegin {
call_id: call_id.clone(),
server: server.clone(),
tool: tool_name.clone(),
arguments: arguments_value.clone(),
};
if let Err(e) = sess
.tx_event
.send(Event {
id: sub_id.to_string(),
msg: tool_call_begin_event,
})
.await
{
error!("failed to send tool call begin event: {e}");
}
let (tool_call_end_event, tool_call_err) = match sess
.mcp
.call_tool(&server, &tool_name, arguments_value)
.await
{
Ok(result) => (
EventMsg::McpToolCallEnd {
call_id,
success: !result.is_error.unwrap_or(false),
result: Some(result),
},
None,
),
Err(e) => (
EventMsg::McpToolCallEnd {
call_id,
success: false,
result: None,
},
Some(e),
),
};
if let Err(e) = sess
.tx_event
.send(Event {
id: sub_id.to_string(),
msg: tool_call_end_event.clone(),
})
.await
{
error!("failed to send tool call end event: {e}");
}
let EventMsg::McpToolCallEnd {
call_id,
success,
result,
} = tool_call_end_event
else {
unimplemented!("unexpected event type");
};
ResponseInputItem::FunctionCallOutput {
call_id,
output: FunctionCallOutputPayload {
content: result.map_or_else(
|| format!("err: {tool_call_err:?}"),
|result| {
serde_json::to_string(&result)
.unwrap_or_else(|e| format!("JSON serialization error: {e}"))
},
),
success: Some(success),
},
}
}

View File

@@ -7,6 +7,7 @@ use std::collections::HashMap;
use std::path::Path;
use std::path::PathBuf;
use mcp_types::CallToolResult;
use serde::Deserialize;
use serde::Serialize;
@@ -316,6 +317,32 @@ pub enum EventMsg {
model: String,
},
McpToolCallBegin {
/// Identifier so this can be paired with the McpToolCallEnd event.
call_id: String,
/// Name of the MCP server as defined in the config.
server: String,
/// Name of the tool as given by the MCP server.
tool: String,
/// Arguments to the tool call.
arguments: Option<serde_json::Value>,
},
McpToolCallEnd {
/// Identifier for the McpToolCallBegin that finished.
call_id: String,
/// Whether the tool call was successful. If `false`, `result` might
/// not be present.
success: bool,
/// Result of the tool call. Note this could be an error.
result: Option<CallToolResult>,
},
/// Notification that the server is about to execute a command.
ExecCommandBegin {
/// Identifier so this can be paired with the ExecCommandEnd event.