diff --git a/codex-rs/tools/src/json_schema.rs b/codex-rs/tools/src/json_schema.rs index 83733ffb7b..02936d52d1 100644 --- a/codex-rs/tools/src/json_schema.rs +++ b/codex-rs/tools/src/json_schema.rs @@ -160,6 +160,7 @@ pub fn parse_tool_input_schema(input_schema: &JsonValue) -> Result Result bool { + compact_normalized_schema_len(value) <= MAX_COMPACT_TOOL_SCHEMA_BYTES +} + +fn compact_normalized_schema_len(value: &JsonValue) -> usize { + serde_json::from_value::(value.clone()) + .and_then(|schema| serde_json::to_vec(&schema)) + .map(|json| json.len()) + .unwrap_or(0) +} + #[derive(Debug, Clone, Copy, PartialEq, Eq)] enum DefinitionTraversal { Include, @@ -214,6 +256,130 @@ fn for_each_schema_child( } } +fn strip_schema_descriptions(value: &mut JsonValue) { + match value { + JsonValue::Array(values) => { + for value in values { + strip_schema_descriptions(value); + } + } + JsonValue::Object(map) => { + map.remove("description"); + for_each_schema_child_mut(map, DefinitionTraversal::Include, &mut |value| { + strip_schema_descriptions(value); + }); + } + _ => {} + } +} + +fn for_each_schema_child_mut( + map: &mut serde_json::Map, + definition_traversal: DefinitionTraversal, + visitor: &mut impl FnMut(&mut JsonValue), +) { + if let Some(properties) = map.get_mut("properties") + && let Some(properties_map) = properties.as_object_mut() + { + for value in properties_map.values_mut() { + visitor(value); + } + } + + for key in SCHEMA_CHILD_KEYS { + if let Some(value) = map.get_mut(key) { + visitor(value); + } + } + + if let Some(additional_properties) = map.get_mut("additionalProperties") + && !matches!(additional_properties, JsonValue::Bool(_)) + { + visitor(additional_properties); + } + + if definition_traversal == DefinitionTraversal::Include { + for key in DEFINITION_TABLE_KEYS { + if let Some(definitions) = map.get_mut(key) + && let Some(definitions_map) = definitions.as_object_mut() + { + for value in definitions_map.values_mut() { + visitor(value); + } + } + } + } +} + +/// Replace local definition refs with empty schemas before dropping root +/// definition tables, so downstream behavior does not depend on how a schema +/// parser handles refs to missing definitions. +fn drop_schema_definitions(value: &mut JsonValue) { + rewrite_definition_refs_to_empty_schemas(value); + + let JsonValue::Object(map) = value else { + return; + }; + + for key in DEFINITION_TABLE_KEYS { + map.remove(key); + } +} + +fn rewrite_definition_refs_to_empty_schemas(value: &mut JsonValue) { + match value { + JsonValue::Array(values) => { + for value in values { + rewrite_definition_refs_to_empty_schemas(value); + } + } + JsonValue::Object(map) => { + if map + .get("$ref") + .and_then(JsonValue::as_str) + .and_then(parse_local_definition_ref) + .is_some() + { + *value = json!({}); + return; + } + + for_each_schema_child_mut(map, DefinitionTraversal::Skip, &mut |value| { + rewrite_definition_refs_to_empty_schemas(value); + }); + } + _ => {} + } +} + +fn collapse_deep_schema_objects(value: &mut JsonValue, depth: usize) { + match value { + JsonValue::Array(values) => { + for value in values { + collapse_deep_schema_objects(value, depth); + } + } + JsonValue::Object(map) => { + if depth >= MAX_COMPACT_TOOL_SCHEMA_DEPTH && is_complex_schema_object(map) { + *value = json!({}); + return; + } + + for_each_schema_child_mut(map, DefinitionTraversal::Skip, &mut |value| { + collapse_deep_schema_objects(value, depth + 1); + }); + } + _ => {} + } +} + +fn is_complex_schema_object(map: &serde_json::Map) -> bool { + SCHEMA_CHILD_KEYS.iter().any(|key| map.contains_key(*key)) + || map.contains_key("properties") + || map.contains_key("additionalProperties") + || map.contains_key("$ref") +} + /// Sanitize a JSON Schema (as serde_json::Value) so it can fit our limited /// schema representation. This function: /// - Ensures every typed schema object has a `"type"` when required. diff --git a/codex-rs/tools/src/json_schema_tests.rs b/codex-rs/tools/src/json_schema_tests.rs index 90ddcc4529..52b30138c1 100644 --- a/codex-rs/tools/src/json_schema_tests.rs +++ b/codex-rs/tools/src/json_schema_tests.rs @@ -779,6 +779,393 @@ fn parse_tool_input_schema_preserves_explicit_enum_type_union() { ); } +fn many_string_properties(count: usize) -> serde_json::Map { + (0..count) + .map(|index| { + ( + format!("field_{index:03}"), + serde_json::json!({ "type": "string" }), + ) + }) + .collect() +} + +#[test] +fn parse_large_tool_input_schema_stops_after_descriptions_when_under_budget() { + let schema = parse_tool_input_schema(&serde_json::json!({ + "type": "object", + "description": "x".repeat(4_500), + "properties": { + "metadata": { + "$ref": "#/$defs/metadata" + } + }, + "$defs": { + "metadata": { + "type": "string", + "description": "Metadata value" + } + } + })) + .expect("parse schema"); + + assert_eq!( + serde_json::to_value(schema).expect("serialize schema"), + serde_json::json!({ + "type": "object", + "properties": { + "metadata": { + "$ref": "#/$defs/metadata" + } + }, + "$defs": { + "metadata": { + "type": "string" + } + } + }) + ); +} + +#[test] +fn parse_large_tool_input_schema_ignores_dropped_metadata_for_budget() { + let schema = parse_tool_input_schema(&serde_json::json!({ + "type": "object", + "properties": { + "event": { + "type": "object", + "title": "Calendar event", + "properties": { + "recurrence": { + "type": "object", + "examples": [ + { + "payload": "x".repeat(4_500) + } + ], + "properties": { + "pattern": { + "type": "string", + "title": "Recurrence pattern" + } + } + } + } + } + } + })) + .expect("parse schema"); + + assert_eq!( + serde_json::to_value(schema).expect("serialize schema"), + serde_json::json!({ + "type": "object", + "properties": { + "event": { + "type": "object", + "properties": { + "recurrence": { + "type": "object", + "properties": { + "pattern": { + "type": "string" + } + } + } + } + } + } + }) + ); +} + +#[test] +fn parse_large_tool_input_schema_stops_after_dropping_root_definitions_when_under_budget() { + let schema = parse_tool_input_schema(&serde_json::json!({ + "type": "object", + "description": "x".repeat(4_500), + "properties": { + "event": { + "type": "object", + "description": "Calendar event", + "properties": { + "recurrence": { + "type": "object", + "description": "Recurrence settings", + "properties": { + "pattern": { + "type": "string", + "description": "Recurrence pattern" + } + } + } + } + }, + "metadata": { + "$ref": "#/$defs/metadata" + } + }, + "$defs": { + "metadata": { + "type": "object", + "description": "metadata object", + "properties": many_string_properties(/*count*/ 300) + } + } + })) + .expect("parse schema"); + + assert_eq!( + serde_json::to_value(schema).expect("serialize schema"), + serde_json::json!({ + "type": "object", + "properties": { + "event": { + "type": "object", + "properties": { + "recurrence": { + "type": "object", + "properties": { + "pattern": { + "type": "string" + } + } + } + } + }, + "metadata": {} + } + }) + ); +} + +#[test] +fn parse_large_tool_input_schema_strips_descriptions_without_removing_description_property() { + let schema = parse_tool_input_schema(&serde_json::json!({ + "type": "object", + "description": "x".repeat(4_500), + "properties": { + "description": { + "type": "string", + "description": "User-facing description value" + }, + "metadata": { + "type": "object", + "description": "Metadata object", + "properties": { + "label": { + "type": "string", + "description": "Metadata label" + } + } + }, + "tags": { + "type": "array", + "description": "Tag list", + "items": { + "type": "string", + "description": "Tag value" + } + }, + "extras": { + "type": "object", + "additionalProperties": { + "type": "string", + "description": "Extra value" + } + }, + "choice": { + "description": "Choice value", + "anyOf": [ + { + "type": "string", + "description": "String choice" + }, + { + "type": "number", + "description": "Number choice" + } + ] + } + } + })) + .expect("parse schema"); + + assert_eq!( + serde_json::to_value(schema).expect("serialize schema"), + serde_json::json!({ + "type": "object", + "properties": { + "choice": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "number" + } + ] + }, + "description": { + "type": "string" + }, + "extras": { + "type": "object", + "properties": {}, + "additionalProperties": { + "type": "string" + } + }, + "metadata": { + "type": "object", + "properties": { + "label": { + "type": "string" + } + } + }, + "tags": { + "type": "array", + "items": { + "type": "string" + } + } + } + }) + ); +} + +#[test] +fn parse_large_tool_input_schema_preserves_object_enum_literal_descriptions() { + let schema = parse_tool_input_schema(&serde_json::json!({ + "type": "object", + "description": "x".repeat(4_500), + "properties": { + "choice": { + "enum": [ + { + "description": "first literal", + "id": 1 + }, + { + "description": "second literal", + "id": 2 + } + ] + } + } + })) + .expect("parse schema"); + + assert_eq!( + serde_json::to_value(schema).expect("serialize schema"), + serde_json::json!({ + "type": "object", + "properties": { + "choice": { + "type": "string", + "enum": [ + { + "description": "first literal", + "id": 1 + }, + { + "description": "second literal", + "id": 2 + } + ] + } + } + }) + ); +} + +#[test] +fn collapse_deep_schema_objects_traverses_schema_children() { + let mut schema = serde_json::json!({ + "type": "object", + "properties": { + "object_parent": { + "type": "object", + "properties": { + "complex": { + "type": "object", + "properties": { + "leaf": { "type": "string" } + } + }, + "scalar": { + "type": "string" + } + } + }, + "array_parent": { + "type": "array", + "items": { + "type": "object", + "properties": { + "leaf": { "type": "string" } + } + } + }, + "map_parent": { + "type": "object", + "additionalProperties": { + "type": "object", + "properties": { + "leaf": { "type": "string" } + } + } + }, + "union_parent": { + "anyOf": [ + { + "type": "object", + "properties": { + "leaf": { "type": "string" } + } + }, + { "type": "string" } + ] + } + } + }); + + super::collapse_deep_schema_objects(&mut schema, /*depth*/ 0); + + assert_eq!( + schema, + serde_json::json!({ + "type": "object", + "properties": { + "object_parent": { + "type": "object", + "properties": { + "complex": {}, + "scalar": { + "type": "string" + } + } + }, + "array_parent": { + "type": "array", + "items": {} + }, + "map_parent": { + "type": "object", + "additionalProperties": {} + }, + "union_parent": { + "anyOf": [ + {}, + { "type": "string" } + ] + } + } + }) + ); +} + #[test] fn parse_tool_input_schema_preserves_string_enum_constraints() { // Example schema shape: