mirror of
https://github.com/openai/codex.git
synced 2026-02-01 22:47:52 +00:00
fix: change codex/sandbox-state/update from a notification to a request (#8142)
Historically, `accept_elicitation_for_prompt_rule()` was flaky because we were using a notification to update the sandbox followed by a `shell` tool request that we expected to be subject to the new sandbox config, but because [rmcp](https://crates.io/crates/rmcp) MCP servers delegate each incoming message to a new Tokio task, messages are not guaranteed to be processed in order, so sometimes the `shell` tool call would run before the notification was processed. Prior to this PR, we relied on a generous `sleep()` between the notification and the request to reduce the change of the test flaking out. This PR implements a proper fix, which is to use a _request_ instead of a notification for the sandbox update so that we can wait for the response to the sandbox request before sending the request to the `shell` tool call. Previously, `rmcp` did not support custom requests, but I fixed that in https://github.com/modelcontextprotocol/rust-sdk/pull/590, which made it into the `0.12.0` release (see #8288). This PR updates `shell-tool-mcp` to expect `"codex/sandbox-state/update"` as a _request_ instead of a notification and sends the appropriate ack. Note this behavior is tied to our custom `codex/sandbox-state` capability, which Codex honors as an MCP client, which is why `core/src/mcp_connection_manager.rs` had to be updated as part of this PR, as well. This PR also updates the docs at `shell-tool-mcp/README.md`.
This commit is contained in:
@@ -5,7 +5,7 @@ use std::time::Duration;
|
||||
use anyhow::Context as _;
|
||||
use anyhow::Result;
|
||||
use codex_core::MCP_SANDBOX_STATE_CAPABILITY;
|
||||
use codex_core::MCP_SANDBOX_STATE_NOTIFICATION;
|
||||
use codex_core::MCP_SANDBOX_STATE_METHOD;
|
||||
use codex_core::SandboxState;
|
||||
use codex_core::protocol::SandboxPolicy;
|
||||
use codex_execpolicy::Policy;
|
||||
@@ -15,6 +15,8 @@ use rmcp::ServerHandler;
|
||||
use rmcp::ServiceExt;
|
||||
use rmcp::handler::server::router::tool::ToolRouter;
|
||||
use rmcp::handler::server::wrapper::Parameters;
|
||||
use rmcp::model::CustomRequest;
|
||||
use rmcp::model::CustomResult;
|
||||
use rmcp::model::*;
|
||||
use rmcp::schemars;
|
||||
use rmcp::service::RequestContext;
|
||||
@@ -23,8 +25,8 @@ use rmcp::tool;
|
||||
use rmcp::tool_handler;
|
||||
use rmcp::tool_router;
|
||||
use rmcp::transport::stdio;
|
||||
use serde_json::json;
|
||||
use tokio::sync::RwLock;
|
||||
use tracing::debug;
|
||||
|
||||
use crate::posix::escalate_server::EscalateServer;
|
||||
use crate::posix::escalate_server::{self};
|
||||
@@ -146,6 +148,13 @@ impl ExecTool {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct CodexSandboxStateUpdateMethod;
|
||||
|
||||
impl rmcp::model::ConstString for CodexSandboxStateUpdateMethod {
|
||||
const VALUE: &'static str = MCP_SANDBOX_STATE_METHOD;
|
||||
}
|
||||
|
||||
#[tool_handler]
|
||||
impl ServerHandler for ExecTool {
|
||||
fn get_info(&self) -> ServerInfo {
|
||||
@@ -181,29 +190,33 @@ impl ServerHandler for ExecTool {
|
||||
Ok(self.get_info())
|
||||
}
|
||||
|
||||
async fn on_custom_notification(
|
||||
async fn on_custom_request(
|
||||
&self,
|
||||
notification: rmcp::model::CustomNotification,
|
||||
_context: rmcp::service::NotificationContext<rmcp::RoleServer>,
|
||||
) {
|
||||
let rmcp::model::CustomNotification { method, params, .. } = notification;
|
||||
if method == MCP_SANDBOX_STATE_NOTIFICATION
|
||||
&& let Some(params) = params
|
||||
{
|
||||
match serde_json::from_value::<SandboxState>(params) {
|
||||
Ok(sandbox_state) => {
|
||||
debug!(
|
||||
?sandbox_state.sandbox_policy,
|
||||
"received sandbox state notification"
|
||||
);
|
||||
let mut state = self.sandbox_state.write().await;
|
||||
*state = Some(sandbox_state);
|
||||
}
|
||||
Err(err) => {
|
||||
tracing::warn!(?err, "failed to deserialize sandbox state notification");
|
||||
}
|
||||
}
|
||||
request: CustomRequest,
|
||||
_context: rmcp::service::RequestContext<rmcp::RoleServer>,
|
||||
) -> Result<CustomResult, McpError> {
|
||||
let CustomRequest { method, params, .. } = request;
|
||||
if method != MCP_SANDBOX_STATE_METHOD {
|
||||
return Err(McpError::method_not_found::<CodexSandboxStateUpdateMethod>());
|
||||
}
|
||||
|
||||
let Some(params) = params else {
|
||||
return Err(McpError::invalid_params(
|
||||
"missing params for sandbox state request".to_string(),
|
||||
None,
|
||||
));
|
||||
};
|
||||
|
||||
let Ok(sandbox_state) = serde_json::from_value::<SandboxState>(params.clone()) else {
|
||||
return Err(McpError::invalid_params(
|
||||
"failed to deserialize sandbox state".to_string(),
|
||||
Some(params),
|
||||
));
|
||||
};
|
||||
|
||||
*self.sandbox_state.write().await = Some(sandbox_state);
|
||||
|
||||
Ok(CustomResult::new(json!({})))
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
use codex_core::MCP_SANDBOX_STATE_NOTIFICATION;
|
||||
use codex_core::MCP_SANDBOX_STATE_METHOD;
|
||||
use codex_core::SandboxState;
|
||||
use codex_core::protocol::SandboxPolicy;
|
||||
use rmcp::ClientHandler;
|
||||
@@ -7,10 +7,12 @@ use rmcp::RoleClient;
|
||||
use rmcp::Service;
|
||||
use rmcp::model::ClientCapabilities;
|
||||
use rmcp::model::ClientInfo;
|
||||
use rmcp::model::ClientRequest;
|
||||
use rmcp::model::CreateElicitationRequestParam;
|
||||
use rmcp::model::CreateElicitationResult;
|
||||
use rmcp::model::CustomNotification;
|
||||
use rmcp::model::CustomRequest;
|
||||
use rmcp::model::ElicitationAction;
|
||||
use rmcp::model::ServerResult;
|
||||
use rmcp::service::RunningService;
|
||||
use rmcp::transport::ConfigureCommandExt;
|
||||
use rmcp::transport::TokioChildProcess;
|
||||
@@ -82,7 +84,7 @@ pub async fn notify_readable_sandbox<P, S>(
|
||||
sandbox_cwd: P,
|
||||
codex_linux_sandbox_exe: Option<PathBuf>,
|
||||
service: &RunningService<RoleClient, S>,
|
||||
) -> anyhow::Result<()>
|
||||
) -> anyhow::Result<ServerResult>
|
||||
where
|
||||
P: AsRef<Path>,
|
||||
S: Service<RoleClient> + ClientHandler,
|
||||
@@ -92,14 +94,14 @@ where
|
||||
codex_linux_sandbox_exe,
|
||||
sandbox_cwd: sandbox_cwd.as_ref().to_path_buf(),
|
||||
};
|
||||
send_sandbox_notification(sandbox_state, service).await
|
||||
send_sandbox_state_update(sandbox_state, service).await
|
||||
}
|
||||
|
||||
pub async fn notify_writable_sandbox_only_one_folder<P, S>(
|
||||
writable_folder: P,
|
||||
codex_linux_sandbox_exe: Option<PathBuf>,
|
||||
service: &RunningService<RoleClient, S>,
|
||||
) -> anyhow::Result<()>
|
||||
) -> anyhow::Result<ServerResult>
|
||||
where
|
||||
P: AsRef<Path>,
|
||||
S: Service<RoleClient> + ClientHandler,
|
||||
@@ -119,24 +121,23 @@ where
|
||||
codex_linux_sandbox_exe,
|
||||
sandbox_cwd: writable_folder.as_ref().to_path_buf(),
|
||||
};
|
||||
send_sandbox_notification(sandbox_state, service).await
|
||||
send_sandbox_state_update(sandbox_state, service).await
|
||||
}
|
||||
|
||||
async fn send_sandbox_notification<S>(
|
||||
async fn send_sandbox_state_update<S>(
|
||||
sandbox_state: SandboxState,
|
||||
service: &RunningService<RoleClient, S>,
|
||||
) -> anyhow::Result<()>
|
||||
) -> anyhow::Result<ServerResult>
|
||||
where
|
||||
S: Service<RoleClient> + ClientHandler,
|
||||
{
|
||||
let sandbox_state_notification = CustomNotification::new(
|
||||
MCP_SANDBOX_STATE_NOTIFICATION,
|
||||
Some(serde_json::to_value(sandbox_state)?),
|
||||
);
|
||||
service
|
||||
.send_notification(sandbox_state_notification.into())
|
||||
let response = service
|
||||
.send_request(ClientRequest::CustomRequest(CustomRequest::new(
|
||||
MCP_SANDBOX_STATE_METHOD,
|
||||
Some(serde_json::to_value(sandbox_state)?),
|
||||
)))
|
||||
.await?;
|
||||
Ok(())
|
||||
Ok(response)
|
||||
}
|
||||
|
||||
pub struct InteractiveClient {
|
||||
|
||||
@@ -3,7 +3,6 @@ use std::borrow::Cow;
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
use std::sync::Mutex;
|
||||
use std::time::Duration;
|
||||
|
||||
use anyhow::Context;
|
||||
use anyhow::Result;
|
||||
@@ -19,6 +18,8 @@ use rmcp::ServiceExt;
|
||||
use rmcp::model::CallToolRequestParam;
|
||||
use rmcp::model::CallToolResult;
|
||||
use rmcp::model::CreateElicitationRequestParam;
|
||||
use rmcp::model::EmptyResult;
|
||||
use rmcp::model::ServerResult;
|
||||
use rmcp::model::object;
|
||||
use serde_json::json;
|
||||
use std::os::unix::fs::PermissionsExt;
|
||||
@@ -82,19 +83,11 @@ prefix_rule(
|
||||
} else {
|
||||
None
|
||||
};
|
||||
notify_readable_sandbox(&project_root_path, codex_linux_sandbox_exe, &service).await?;
|
||||
|
||||
// TODO(mbolin): Remove this hack to remove flakiness when possible.
|
||||
// As noted in the commentary on https://github.com/openai/codex/pull/7832,
|
||||
// an rmcp server does not process messages serially: it takes messages off
|
||||
// the queue and immediately dispatches them to handlers, which may complete
|
||||
// out of order. The proper fix is to replace our custom notification with a
|
||||
// custom request where we wait for the response before proceeding. However,
|
||||
// rmcp does not currently support custom requests, so as a temporary
|
||||
// workaround we just wait a bit to increase the probability the server has
|
||||
// processed the notification. Assuming we can upstream rmcp support for
|
||||
// custom requests, we will remove this once the functionality is available.
|
||||
tokio::time::sleep(Duration::from_secs(4)).await;
|
||||
let response =
|
||||
notify_readable_sandbox(&project_root_path, codex_linux_sandbox_exe, &service).await?;
|
||||
let ServerResult::EmptyResult(EmptyResult {}) = response else {
|
||||
panic!("expected EmptyResult from sandbox state notification but found: {response:?}");
|
||||
};
|
||||
|
||||
// Call the shell tool and verify that an elicitation was created and
|
||||
// auto-approved.
|
||||
|
||||
Reference in New Issue
Block a user