mirror of
https://github.com/openai/codex.git
synced 2026-05-28 06:55:01 +00:00
fix(core): instrument stalled tool-listing handoff (#24667)
## Why When a turn needs a follow-up request after tool output is recorded, Codex can still appear stuck in `Thinking` before the next `/responses` request is opened. The existing local trace showed the last completed response and the absence of a new backend request, but it did not show whether the stall was in tool-router preparation or later request setup. Issue: N/A (internal incident investigation) ## What Changed Added trace spans around the pre-stream tool-router handoff in `core/src/session/turn.rs`, including the `built_tools` phase and the MCP manager read lock. Added per-server MCP tool-listing spans and trace breadcrumbs in `codex-mcp/src/connection_manager.rs` with startup snapshot / startup-complete state so a pending MCP client is visible in feedback logs instead of looking like a silent hang. ## Verification - `just fmt` - `just test -p codex-mcp` - `just test -p codex-core` (prior full rerun fails in this workspace on unrelated integration tests: code-mode output length expectations, one shell timeout formatting assertion, and shell snapshot timeouts; latest review-fix rerun compiled and passed 1160 tests before I stopped the abnormally slow unrelated suite)
This commit is contained in:
@@ -64,7 +64,10 @@ use rmcp::model::Resource;
|
||||
use rmcp::model::ResourceTemplate;
|
||||
use tokio::task::JoinSet;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tracing::Instrument;
|
||||
use tracing::instrument;
|
||||
use tracing::trace;
|
||||
use tracing::trace_span;
|
||||
use tracing::warn;
|
||||
|
||||
/// A thin wrapper around a set of running [`RmcpClient`] instances.
|
||||
@@ -378,13 +381,37 @@ impl McpConnectionManager {
|
||||
}
|
||||
|
||||
/// Returns all tools with model-visible names normalized.
|
||||
#[instrument(level = "trace", skip_all)]
|
||||
#[instrument(level = "trace", skip_all, fields(mcp_server_count = self.clients.len()))]
|
||||
pub async fn list_all_tools(&self) -> Vec<ToolInfo> {
|
||||
let mut tools = Vec::new();
|
||||
for managed_client in self.clients.values() {
|
||||
let Some(server_tools) = managed_client.listed_tools().await else {
|
||||
for (server_name, managed_client) in &self.clients {
|
||||
let has_startup_snapshot = managed_client.startup_snapshot.is_some();
|
||||
let startup_complete = managed_client
|
||||
.startup_complete
|
||||
.load(std::sync::atomic::Ordering::Acquire);
|
||||
trace!(
|
||||
server_name = %server_name,
|
||||
has_startup_snapshot,
|
||||
startup_complete,
|
||||
"waiting for MCP server tools while building tool list"
|
||||
);
|
||||
let Some(server_tools) = managed_client
|
||||
.listed_tools()
|
||||
.instrument(trace_span!(
|
||||
"list_tools_for_server",
|
||||
server_name = %server_name,
|
||||
has_startup_snapshot,
|
||||
startup_complete
|
||||
))
|
||||
.await
|
||||
else {
|
||||
continue;
|
||||
};
|
||||
trace!(
|
||||
server_name = %server_name,
|
||||
tool_count = server_tools.len(),
|
||||
"listed MCP server tools while building tool list"
|
||||
);
|
||||
tools.extend(
|
||||
server_tools
|
||||
.into_iter()
|
||||
|
||||
@@ -1006,12 +1006,25 @@ async fn run_sampling_request(
|
||||
clippy::await_holding_invalid_type,
|
||||
reason = "tool router construction reads through the session-owned manager guard"
|
||||
)]
|
||||
#[instrument(level = "trace",
|
||||
skip_all,
|
||||
fields(
|
||||
turn_id = %turn_context.sub_id,
|
||||
model = %turn_context.model_info.slug,
|
||||
apps_enabled = turn_context.apps_enabled()
|
||||
)
|
||||
)]
|
||||
pub(crate) async fn built_tools(
|
||||
sess: &Session,
|
||||
turn_context: &TurnContext,
|
||||
cancellation_token: &CancellationToken,
|
||||
) -> CodexResult<Arc<ToolRouter>> {
|
||||
let mcp_connection_manager = sess.services.mcp_connection_manager.read().await;
|
||||
let mcp_connection_manager = sess
|
||||
.services
|
||||
.mcp_connection_manager
|
||||
.read()
|
||||
.instrument(trace_span!("read_mcp_connection_manager"))
|
||||
.await;
|
||||
let has_mcp_servers = mcp_connection_manager.has_servers();
|
||||
let all_mcp_tools = mcp_connection_manager
|
||||
.list_all_tools()
|
||||
|
||||
Reference in New Issue
Block a user