fix: filter dynamic deferred tools from model_visible_specs (#19771)

fixes #19486

### Problem
Right now dynamic deferred tools are filtered at normal-turn prompt
building time, rather than upstream while building the `ToolRouter`
itself. This causes issues because dynamic deferred tools are then
wrongly included in the router's `model_visible_specs`, which is what
the compaction request-building flow relies on.

### Fix
Move the dynamic deferred tool filtering to `ToolRouter` creation time
to solve this problem for every request that relies on `ToolRouter` for
`model_visible_specs`, which solves the issue generically.

### Tests
Added unit + integration tests to ensure dynamic deferred tools are
omitted from `model_visible_specs` and compaction request respectively.

Tested against live `/compact` endpoint; raw deferred dynamic tools
without `tool_search` returned `400` (current bug), while the filtered
payload (this fix) returns `200`.
This commit is contained in:
sayan-oai
2026-04-27 12:09:02 -07:00
committed by GitHub
parent e5709db6dc
commit 85c1500569
5 changed files with 293 additions and 69 deletions

View File

@@ -6,6 +6,7 @@ use std::path::PathBuf;
use anyhow::Result;
use codex_core::compact::SUMMARY_PREFIX;
use codex_login::CodexAuth;
use codex_protocol::dynamic_tools::DynamicToolSpec;
use codex_protocol::items::TurnItem;
use codex_protocol::models::ContentItem;
use codex_protocol::models::ResponseItem;
@@ -36,6 +37,7 @@ use core_test_support::wait_for_event;
use core_test_support::wait_for_event_match;
use core_test_support::wait_for_event_with_timeout;
use pretty_assertions::assert_eq;
use serde_json::Value;
use serde_json::json;
use tokio::time::Duration;
use wiremock::ResponseTemplate;
@@ -55,6 +57,53 @@ fn estimate_compact_payload_tokens(request: &responses::ResponsesRequest) -> i64
.saturating_add(approx_token_count(&request.instructions_text()))
}
fn assert_tools_payload_does_not_defer(body: &Value) {
if let Some(tools) = body.get("tools") {
assert!(
!contains_defer_loading(tools),
"model-visible tools should not include deferred declarations: {tools}"
);
}
}
fn namespace_child_tool_names(body: &Value, namespace: &str) -> Vec<String> {
body.get("tools")
.and_then(Value::as_array)
.and_then(|tools| {
tools.iter().find_map(|tool| {
if tool.get("type").and_then(Value::as_str) == Some("namespace")
&& tool.get("name").and_then(Value::as_str) == Some(namespace)
{
tool.get("tools").and_then(Value::as_array).map(|children| {
children
.iter()
.filter_map(|child| {
child
.get("name")
.and_then(Value::as_str)
.map(str::to_string)
})
.collect()
})
} else {
None
}
})
})
.unwrap_or_default()
}
fn contains_defer_loading(value: &Value) -> bool {
match value {
Value::Object(map) => {
map.get("defer_loading").and_then(Value::as_bool) == Some(true)
|| map.values().any(contains_defer_loading)
}
Value::Array(values) => values.iter().any(contains_defer_loading),
Value::Null | Value::Bool(_) | Value::Number(_) | Value::String(_) => false,
}
}
const PRETURN_CONTEXT_DIFF_CWD: &str = "/tmp/PRETURN_CONTEXT_DIFF_CWD";
const DUMMY_FUNCTION_NAME: &str = "test_tool";
const REMOTE_COMPACT_TURN_COMPLETE_TIMEOUT: Duration = Duration::from_secs(30);
@@ -358,6 +407,100 @@ async fn remote_compact_replaces_history_for_followups() -> Result<()> {
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn remote_compact_filters_deferred_dynamic_tools() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = responses::start_mock_server().await;
let mut builder = test_codex().with_auth(CodexAuth::create_dummy_chatgpt_auth_for_testing());
let mut test = builder.build(&server).await?;
let hidden_tool = "hidden_dynamic_tool";
let visible_tool = "visible_dynamic_tool";
let input_schema = json!({
"type": "object",
"properties": {},
"additionalProperties": false,
});
let dynamic_tools = vec![
DynamicToolSpec {
namespace: Some("codex_app".to_string()),
name: hidden_tool.to_string(),
description: "Hidden until discovered.".to_string(),
input_schema: input_schema.clone(),
defer_loading: true,
},
DynamicToolSpec {
namespace: Some("codex_app".to_string()),
name: visible_tool.to_string(),
description: "Visible immediately.".to_string(),
input_schema,
defer_loading: false,
},
];
let new_thread = test
.thread_manager
.start_thread_with_tools(
test.config.clone(),
dynamic_tools,
/*persist_extended_history*/ false,
)
.await?;
test.codex = new_thread.thread;
test.session_configured = new_thread.session_configured;
let codex = test.codex.clone();
let responses_mock = mount_sse_once(
&server,
sse(vec![
responses::ev_assistant_message("m1", "FIRST_REMOTE_REPLY"),
responses::ev_completed("resp-1"),
]),
)
.await;
let compact_mock = responses::mount_compact_json_once(
&server,
serde_json::json!({
"output": compacted_summary_only_output("compact summary"),
}),
)
.await;
codex
.submit(Op::UserInput {
environments: None,
items: vec![UserInput::Text {
text: "hello remote compact".into(),
text_elements: Vec::new(),
}],
final_output_json_schema: None,
responsesapi_client_metadata: None,
})
.await?;
wait_for_turn_complete(&codex).await;
codex.submit(Op::Compact).await?;
wait_for_turn_complete(&codex).await;
let first_response_body = responses_mock.single_request().body_json();
let compact_body = compact_mock.single_request().body_json();
assert_eq!(
compact_body["tools"], first_response_body["tools"],
"compact requests should send the same model-visible tools payload as /v1/responses"
);
assert_tools_payload_does_not_defer(&first_response_body);
assert_tools_payload_does_not_defer(&compact_body);
assert_eq!(
namespace_child_tool_names(&first_response_body, "codex_app"),
vec![visible_tool.to_string()]
);
assert_eq!(
namespace_child_tool_names(&compact_body, "codex_app"),
vec![visible_tool.to_string()]
);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn remote_compact_runs_automatically() -> Result<()> {
skip_if_no_network!(Ok(()));