mirror of
https://github.com/openai/codex.git
synced 2026-04-29 00:55:38 +00:00
This introduces a new feature to Codex when it operates as an MCP
_client_ where if an MCP _server_ replies that it has an entry named
`"codex/sandbox-state"` in its _server capabilities_, then Codex will
send it an MCP notification with the following structure:
```json
{
"method": "codex/sandbox-state/update",
"params": {
"sandboxPolicy": {
"type": "workspace-write",
"network-access": false,
"exclude-tmpdir-env-var": false
"exclude-slash-tmp": false
},
"codexLinuxSandboxExe": null,
"sandboxCwd": "/Users/mbolin/code/codex2"
}
}
```
or with whatever values are appropriate for the initial `sandboxPolicy`.
**NOTE:** Codex _should_ continue to send the MCP server notifications
of the same format if these things change over the lifetime of the
thread, but that isn't wired up yet.
The result is that `shell-tool-mcp` can consume these values so that
when it calls `codex_core::exec::process_exec_tool_call()` in
`codex-rs/exec-server/src/posix/escalate_server.rs`, it is now sure to
call it with the correct values (whereas previously we relied on
hardcoded values).
While I would argue this is a supported use case within the MCP
protocol, the `rmcp` crate that we are using today does not support
custom notifications. As such, I had to patch it and I submitted it for
review, so hopefully it will be accepted in some form:
https://github.com/modelcontextprotocol/rust-sdk/pull/556
To test out this change from end-to-end:
- I ran `cargo build` in `~/code/codex2/codex-rs/exec-server`
- I built the fork of Bash in `~/code/bash/bash`
- I added the following to my `~/.codex/config.toml`:
```toml
# Use with `codex --disable shell_tool`.
[mcp_servers.execshell]
args = ["--bash", "/Users/mbolin/code/bash/bash"]
command = "/Users/mbolin/code/codex2/codex-rs/target/debug/codex-exec-mcp-server"
```
- From `~/code/codex2/codex-rs`, I ran `just codex --disable shell_tool`
- When the TUI started up, I verified that the sandbox mode is
`workspace-write`
- I ran `/mcp` to verify that the shell tool from the MCP is there:
<img width="1387" height="1400" alt="image"
src="https://github.com/user-attachments/assets/1a8addcc-5005-4e16-b59f-95cfd06fd4ab"
/>
- Then I asked it:
> what is the output of `gh issue list`
because this should be auto-approved with our existing dummy policy:
af63e6eccc/codex-rs/exec-server/src/posix.rs (L157-L164)
And it worked:
<img width="1387" height="1400" alt="image"
src="https://github.com/user-attachments/assets/7568d2f7-80da-4d68-86d0-c265a6f5e6c1"
/>
253 lines
8.4 KiB
Rust
253 lines
8.4 KiB
Rust
use std::path::PathBuf;
|
|
use std::sync::Arc;
|
|
use std::time::Duration;
|
|
|
|
use anyhow::Context as _;
|
|
use anyhow::Result;
|
|
use codex_core::MCP_SANDBOX_STATE_CAPABILITY;
|
|
use codex_core::MCP_SANDBOX_STATE_NOTIFICATION;
|
|
use codex_core::SandboxState;
|
|
use codex_core::protocol::SandboxPolicy;
|
|
use rmcp::ErrorData as McpError;
|
|
use rmcp::RoleServer;
|
|
use rmcp::ServerHandler;
|
|
use rmcp::ServiceExt;
|
|
use rmcp::handler::server::router::tool::ToolRouter;
|
|
use rmcp::handler::server::wrapper::Parameters;
|
|
use rmcp::model::*;
|
|
use rmcp::schemars;
|
|
use rmcp::service::RequestContext;
|
|
use rmcp::service::RunningService;
|
|
use rmcp::tool;
|
|
use rmcp::tool_handler;
|
|
use rmcp::tool_router;
|
|
use rmcp::transport::stdio;
|
|
use tokio::sync::RwLock;
|
|
use tracing::debug;
|
|
|
|
use crate::posix::escalate_server::EscalateServer;
|
|
use crate::posix::escalate_server::{self};
|
|
use crate::posix::mcp_escalation_policy::ExecPolicy;
|
|
use crate::posix::mcp_escalation_policy::McpEscalationPolicy;
|
|
use crate::posix::stopwatch::Stopwatch;
|
|
|
|
/// Path to our patched bash.
|
|
const CODEX_BASH_PATH_ENV_VAR: &str = "CODEX_BASH_PATH";
|
|
|
|
const SANDBOX_STATE_CAPABILITY_VERSION: &str = "1.0.0";
|
|
|
|
pub(crate) fn get_bash_path() -> Result<PathBuf> {
|
|
std::env::var(CODEX_BASH_PATH_ENV_VAR)
|
|
.map(PathBuf::from)
|
|
.context(format!("{CODEX_BASH_PATH_ENV_VAR} must be set"))
|
|
}
|
|
|
|
#[derive(Debug, serde::Deserialize, schemars::JsonSchema)]
|
|
pub struct ExecParams {
|
|
/// The bash string to execute.
|
|
pub command: String,
|
|
/// The working directory to execute the command in. Must be an absolute path.
|
|
pub workdir: String,
|
|
/// The timeout for the command in milliseconds.
|
|
pub timeout_ms: Option<u64>,
|
|
/// Launch Bash with -lc instead of -c: defaults to true.
|
|
pub login: Option<bool>,
|
|
}
|
|
|
|
#[derive(Debug, serde::Serialize, schemars::JsonSchema)]
|
|
pub struct ExecResult {
|
|
pub exit_code: i32,
|
|
pub output: String,
|
|
pub duration: Duration,
|
|
pub timed_out: bool,
|
|
}
|
|
|
|
impl From<escalate_server::ExecResult> for ExecResult {
|
|
fn from(result: escalate_server::ExecResult) -> Self {
|
|
Self {
|
|
exit_code: result.exit_code,
|
|
output: result.output,
|
|
duration: result.duration,
|
|
timed_out: result.timed_out,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[derive(Clone)]
|
|
pub struct ExecTool {
|
|
tool_router: ToolRouter<ExecTool>,
|
|
bash_path: PathBuf,
|
|
execve_wrapper: PathBuf,
|
|
policy: ExecPolicy,
|
|
sandbox_state: Arc<RwLock<Option<SandboxState>>>,
|
|
}
|
|
|
|
#[tool_router]
|
|
impl ExecTool {
|
|
pub fn new(bash_path: PathBuf, execve_wrapper: PathBuf, policy: ExecPolicy) -> Self {
|
|
Self {
|
|
tool_router: Self::tool_router(),
|
|
bash_path,
|
|
execve_wrapper,
|
|
policy,
|
|
sandbox_state: Arc::new(RwLock::new(None)),
|
|
}
|
|
}
|
|
|
|
/// Runs a shell command and returns its output. You MUST provide the workdir as an absolute path.
|
|
#[tool]
|
|
async fn shell(
|
|
&self,
|
|
context: RequestContext<RoleServer>,
|
|
Parameters(params): Parameters<ExecParams>,
|
|
) -> Result<CallToolResult, McpError> {
|
|
let effective_timeout = Duration::from_millis(
|
|
params
|
|
.timeout_ms
|
|
.unwrap_or(codex_core::exec::DEFAULT_EXEC_COMMAND_TIMEOUT_MS),
|
|
);
|
|
let stopwatch = Stopwatch::new(effective_timeout);
|
|
let cancel_token = stopwatch.cancellation_token();
|
|
let sandbox_state =
|
|
self.sandbox_state
|
|
.read()
|
|
.await
|
|
.clone()
|
|
.unwrap_or_else(|| SandboxState {
|
|
sandbox_policy: SandboxPolicy::ReadOnly,
|
|
codex_linux_sandbox_exe: None,
|
|
sandbox_cwd: PathBuf::from(¶ms.workdir),
|
|
});
|
|
let escalate_server = EscalateServer::new(
|
|
self.bash_path.clone(),
|
|
self.execve_wrapper.clone(),
|
|
McpEscalationPolicy::new(self.policy, context, stopwatch.clone()),
|
|
);
|
|
|
|
let result = escalate_server
|
|
.exec(params, cancel_token, &sandbox_state)
|
|
.await
|
|
.map_err(|e| McpError::internal_error(e.to_string(), None))?;
|
|
Ok(CallToolResult::success(vec![Content::json(
|
|
ExecResult::from(result),
|
|
)?]))
|
|
}
|
|
}
|
|
|
|
#[tool_handler]
|
|
impl ServerHandler for ExecTool {
|
|
fn get_info(&self) -> ServerInfo {
|
|
let mut experimental_capabilities = ExperimentalCapabilities::new();
|
|
let mut sandbox_state_capability = JsonObject::new();
|
|
sandbox_state_capability.insert(
|
|
"version".to_string(),
|
|
serde_json::Value::String(SANDBOX_STATE_CAPABILITY_VERSION.to_string()),
|
|
);
|
|
experimental_capabilities.insert(
|
|
MCP_SANDBOX_STATE_CAPABILITY.to_string(),
|
|
sandbox_state_capability,
|
|
);
|
|
ServerInfo {
|
|
protocol_version: ProtocolVersion::V_2025_06_18,
|
|
capabilities: ServerCapabilities::builder()
|
|
.enable_tools()
|
|
.enable_experimental_with(experimental_capabilities)
|
|
.build(),
|
|
server_info: Implementation::from_build_env(),
|
|
instructions: Some(
|
|
"This server provides a tool to execute shell commands and return their output."
|
|
.to_string(),
|
|
),
|
|
}
|
|
}
|
|
|
|
async fn initialize(
|
|
&self,
|
|
_request: InitializeRequestParam,
|
|
_context: RequestContext<RoleServer>,
|
|
) -> Result<InitializeResult, McpError> {
|
|
Ok(self.get_info())
|
|
}
|
|
|
|
async fn on_custom_notification(
|
|
&self,
|
|
notification: rmcp::model::CustomClientNotification,
|
|
_context: rmcp::service::NotificationContext<rmcp::RoleServer>,
|
|
) {
|
|
let rmcp::model::CustomClientNotification { method, params, .. } = notification;
|
|
if method == MCP_SANDBOX_STATE_NOTIFICATION
|
|
&& let Some(params) = params
|
|
{
|
|
match serde_json::from_value::<SandboxState>(params) {
|
|
Ok(sandbox_state) => {
|
|
debug!(
|
|
?sandbox_state.sandbox_policy,
|
|
"received sandbox state notification"
|
|
);
|
|
let mut state = self.sandbox_state.write().await;
|
|
*state = Some(sandbox_state);
|
|
}
|
|
Err(err) => {
|
|
tracing::warn!(?err, "failed to deserialize sandbox state notification");
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
pub(crate) async fn serve(
|
|
bash_path: PathBuf,
|
|
execve_wrapper: PathBuf,
|
|
policy: ExecPolicy,
|
|
) -> Result<RunningService<RoleServer, ExecTool>, rmcp::service::ServerInitializeError> {
|
|
let tool = ExecTool::new(bash_path, execve_wrapper, policy);
|
|
tool.serve(stdio()).await
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
use pretty_assertions::assert_eq;
|
|
use serde_json::json;
|
|
|
|
/// Verify that the way we use serde does not compromise the desired JSON
|
|
/// schema via schemars. In particular, ensure that the `login` and
|
|
/// `timeout_ms` fields are optional.
|
|
#[test]
|
|
fn exec_params_json_schema_matches_expected() {
|
|
let schema = schemars::schema_for!(ExecParams);
|
|
let actual = serde_json::to_value(schema).expect("schema should serialize");
|
|
|
|
assert_eq!(
|
|
actual,
|
|
json!({
|
|
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
"title": "ExecParams",
|
|
"type": "object",
|
|
"properties": {
|
|
"command": {
|
|
"description": "The bash string to execute.",
|
|
"type": "string"
|
|
},
|
|
"login": {
|
|
"description": "Launch Bash with -lc instead of -c: defaults to true.",
|
|
"type": ["boolean", "null"]
|
|
},
|
|
"timeout_ms": {
|
|
"description": "The timeout for the command in milliseconds.",
|
|
"format": "uint64",
|
|
"minimum": 0,
|
|
"type": ["integer", "null"]
|
|
},
|
|
"workdir": {
|
|
"description":
|
|
"The working directory to execute the command in. Must be an absolute path.",
|
|
"type": "string"
|
|
}
|
|
},
|
|
"required": ["command", "workdir"]
|
|
})
|
|
);
|
|
}
|
|
}
|