codex-tools: extract shared tool schema parsing (#15923)

## Why

`parse_tool_input_schema` and the supporting `JsonSchema` model were
living in `core/src/tools/spec.rs`, but they already serve callers
outside `codex-core`.

Keeping that shared schema parsing logic inside `codex-core` makes the
crate boundary harder to reason about and works against the guidance in
`AGENTS.md` to avoid growing `codex-core` when reusable code can live
elsewhere.

This change takes the first extraction step by moving the schema parsing
primitive into its own crate while keeping the rest of the tool-spec
assembly in `codex-core`.

## What changed

- added a new `codex-tools` crate under `codex-rs/tools`
- moved the shared tool input schema model and sanitizer/parser into
`tools/src/json_schema.rs`
- kept `tools/src/lib.rs` exports-only, with the module-level unit tests
split into `json_schema_tests.rs`
- updated `codex-core` to use `codex-tools::JsonSchema` and re-export
`parse_tool_input_schema`
- updated `codex-app-server` dynamic tool validation to depend on
`codex-tools` directly instead of reaching through `codex-core`
- wired the new crate into the Cargo workspace and Bazel build graph
This commit is contained in:
Michael Bolin
2026-03-26 17:03:35 -07:00
committed by GitHub
parent a27cd2d281
commit 44d28f500f
14 changed files with 394 additions and 183 deletions

View File

@@ -201,6 +201,7 @@ pub use client_common::REVIEW_PROMPT;
pub use client_common::ResponseEvent;
pub use client_common::ResponseStream;
pub use codex_sandboxing::get_platform_sandbox;
pub use codex_tools::parse_tool_input_schema;
pub use compact::content_items_to_text;
pub use event_mapping::parse_turn_item;
pub use exec_policy::ExecPolicyError;
@@ -208,7 +209,6 @@ pub use exec_policy::check_execpolicy_for_warnings;
pub use exec_policy::format_exec_policy_error_with_source;
pub use exec_policy::load_exec_policy;
pub use file_watcher::FileWatcherEvent;
pub use tools::spec::parse_tool_input_schema;
pub use turn_metadata::build_turn_metadata_header;
pub mod compact;
pub mod memory_trace;

View File

@@ -46,6 +46,7 @@ use codex_protocol::openai_models::WebSearchToolType;
use codex_protocol::protocol::SandboxPolicy;
use codex_protocol::protocol::SessionSource;
use codex_protocol::protocol::SubAgentSource;
pub use codex_tools::parse_tool_input_schema;
use codex_utils_absolute_path::AbsolutePathBuf;
use serde::Deserialize;
use serde::Serialize;
@@ -55,6 +56,8 @@ use std::collections::BTreeMap;
use std::collections::HashMap;
use std::path::PathBuf;
pub type JsonSchema = codex_tools::JsonSchema;
const TOOL_SEARCH_DESCRIPTION_TEMPLATE: &str =
include_str!("../../templates/search_tool/tool_description.md");
const TOOL_SUGGEST_DESCRIPTION_TEMPLATE: &str =
@@ -539,62 +542,6 @@ fn supports_image_generation(model_info: &ModelInfo) -> bool {
model_info.input_modalities.contains(&InputModality::Image)
}
/// Generic JSONSchema subset needed for our tool definitions
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
#[serde(tag = "type", rename_all = "lowercase")]
pub enum JsonSchema {
Boolean {
#[serde(skip_serializing_if = "Option::is_none")]
description: Option<String>,
},
String {
#[serde(skip_serializing_if = "Option::is_none")]
description: Option<String>,
},
/// MCP schema allows "number" | "integer" for Number
#[serde(alias = "integer")]
Number {
#[serde(skip_serializing_if = "Option::is_none")]
description: Option<String>,
},
Array {
items: Box<JsonSchema>,
#[serde(skip_serializing_if = "Option::is_none")]
description: Option<String>,
},
Object {
properties: BTreeMap<String, JsonSchema>,
#[serde(skip_serializing_if = "Option::is_none")]
required: Option<Vec<String>>,
#[serde(
rename = "additionalProperties",
skip_serializing_if = "Option::is_none"
)]
additional_properties: Option<AdditionalProperties>,
},
}
/// Whether additional properties are allowed, and if so, any required schema
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
#[serde(untagged)]
pub enum AdditionalProperties {
Boolean(bool),
Schema(Box<JsonSchema>),
}
impl From<bool> for AdditionalProperties {
fn from(b: bool) -> Self {
Self::Boolean(b)
}
}
impl From<JsonSchema> for AdditionalProperties {
fn from(s: JsonSchema) -> Self {
Self::Schema(Box::new(s))
}
}
fn create_network_permissions_schema() -> JsonSchema {
JsonSchema::Object {
properties: BTreeMap::from([(
@@ -2458,13 +2405,6 @@ fn dynamic_tool_to_openai_tool(
})
}
/// Parse the tool input_schema or return an error for invalid schema
pub fn parse_tool_input_schema(input_schema: &JsonValue) -> Result<JsonSchema, serde_json::Error> {
let mut input_schema = input_schema.clone();
sanitize_json_schema(&mut input_schema);
serde_json::from_value::<JsonSchema>(input_schema)
}
fn mcp_tool_to_openai_tool_parts(
tool: rmcp::model::Tool,
) -> Result<(String, JsonSchema, Option<JsonValue>), serde_json::Error> {
@@ -2489,13 +2429,7 @@ fn mcp_tool_to_openai_tool_parts(
);
}
// Serialize to a raw JSON value so we can sanitize schemas coming from MCP
// servers. Some servers omit the top-level or nested `type` in JSON
// Schemas (e.g. using enum/anyOf), or use unsupported variants like
// `integer`. Our internal JsonSchema is a small subset and requires
// `type`, so we coerce/sanitize here for compatibility.
sanitize_json_schema(&mut serialized_input_schema);
let input_schema = serde_json::from_value::<JsonSchema>(serialized_input_schema)?;
let input_schema = parse_tool_input_schema(&serialized_input_schema)?;
let structured_content_schema = output_schema
.map(|output_schema| serde_json::Value::Object(output_schema.as_ref().clone()))
.unwrap_or_else(|| JsonValue::Object(serde_json::Map::new()));
@@ -2526,117 +2460,6 @@ fn mcp_call_tool_result_output_schema(structured_content_schema: JsonValue) -> J
})
}
/// Sanitize a JSON Schema (as serde_json::Value) so it can fit our limited
/// JsonSchema enum. This function:
/// - Ensures every schema object has a "type". If missing, infers it from
/// common keywords (properties => object, items => array, enum/const/format => string)
/// and otherwise defaults to "string".
/// - Fills required child fields (e.g. array items, object properties) with
/// permissive defaults when absent.
fn sanitize_json_schema(value: &mut JsonValue) {
match value {
JsonValue::Bool(_) => {
// JSON Schema boolean form: true/false. Coerce to an accept-all string.
*value = json!({ "type": "string" });
}
JsonValue::Array(arr) => {
for v in arr.iter_mut() {
sanitize_json_schema(v);
}
}
JsonValue::Object(map) => {
// First, recursively sanitize known nested schema holders
if let Some(props) = map.get_mut("properties")
&& let Some(props_map) = props.as_object_mut()
{
for (_k, v) in props_map.iter_mut() {
sanitize_json_schema(v);
}
}
if let Some(items) = map.get_mut("items") {
sanitize_json_schema(items);
}
// Some schemas use oneOf/anyOf/allOf - sanitize their entries
for combiner in ["oneOf", "anyOf", "allOf", "prefixItems"] {
if let Some(v) = map.get_mut(combiner) {
sanitize_json_schema(v);
}
}
// Normalize/ensure type
let mut ty = map.get("type").and_then(|v| v.as_str()).map(str::to_string);
// If type is an array (union), pick first supported; else leave to inference
if ty.is_none()
&& let Some(JsonValue::Array(types)) = map.get("type")
{
for t in types {
if let Some(tt) = t.as_str()
&& matches!(
tt,
"object" | "array" | "string" | "number" | "integer" | "boolean"
)
{
ty = Some(tt.to_string());
break;
}
}
}
// Infer type if still missing
if ty.is_none() {
if map.contains_key("properties")
|| map.contains_key("required")
|| map.contains_key("additionalProperties")
{
ty = Some("object".to_string());
} else if map.contains_key("items") || map.contains_key("prefixItems") {
ty = Some("array".to_string());
} else if map.contains_key("enum")
|| map.contains_key("const")
|| map.contains_key("format")
{
ty = Some("string".to_string());
} else if map.contains_key("minimum")
|| map.contains_key("maximum")
|| map.contains_key("exclusiveMinimum")
|| map.contains_key("exclusiveMaximum")
|| map.contains_key("multipleOf")
{
ty = Some("number".to_string());
}
}
// If we still couldn't infer, default to string
let ty = ty.unwrap_or_else(|| "string".to_string());
map.insert("type".to_string(), JsonValue::String(ty.to_string()));
// Ensure object schemas have properties map
if ty == "object" {
if !map.contains_key("properties") {
map.insert(
"properties".to_string(),
JsonValue::Object(serde_json::Map::new()),
);
}
// If additionalProperties is an object schema, sanitize it too.
// Leave booleans as-is, since JSON Schema allows boolean here.
if let Some(ap) = map.get_mut("additionalProperties") {
let is_bool = matches!(ap, JsonValue::Bool(_));
if !is_bool {
sanitize_json_schema(ap);
}
}
}
// Ensure array schemas have items
if ty == "array" && !map.contains_key("items") {
map.insert("items".to_string(), json!({ "type": "string" }));
}
}
_ => {}
}
}
/// Builds the tool registry builder while collecting tool specs for later serialization.
#[cfg(test)]
pub(crate) fn build_specs(

View File

@@ -11,6 +11,7 @@ use codex_app_server_protocol::AppInfo;
use codex_protocol::openai_models::InputModality;
use codex_protocol::openai_models::ModelInfo;
use codex_protocol::openai_models::ModelsResponse;
use codex_tools::AdditionalProperties;
use codex_utils_absolute_path::AbsolutePathBuf;
use pretty_assertions::assert_eq;
use std::path::PathBuf;