mirror of
https://github.com/openai/codex.git
synced 2026-05-01 09:56:37 +00:00
Compare commits
8 Commits
iceweasel/
...
codex/pres
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a10e034295 | ||
|
|
0a8919893f | ||
|
|
f3d0763364 | ||
|
|
34839759a1 | ||
|
|
ac1bfd42a9 | ||
|
|
dd8a3353db | ||
|
|
1048c68e18 | ||
|
|
ed2b627bc1 |
@@ -147,8 +147,9 @@ impl From<JsonSchema> for AdditionalProperties {
|
||||
|
||||
/// Parse the tool `input_schema` or return an error for invalid schema.
|
||||
pub fn parse_tool_input_schema(input_schema: &JsonValue) -> Result<JsonSchema, serde_json::Error> {
|
||||
let mut input_schema = input_schema.clone();
|
||||
sanitize_json_schema(&mut input_schema);
|
||||
let root_schema = input_schema.clone();
|
||||
let mut input_schema = root_schema.clone();
|
||||
sanitize_json_schema(&mut input_schema, &root_schema, &mut Vec::new());
|
||||
let schema: JsonSchema = serde_json::from_value(input_schema)?;
|
||||
if matches!(
|
||||
schema.schema_type,
|
||||
@@ -159,45 +160,87 @@ pub fn parse_tool_input_schema(input_schema: &JsonValue) -> Result<JsonSchema, s
|
||||
Ok(schema)
|
||||
}
|
||||
|
||||
/// Sanitize a JSON Schema (as serde_json::Value) so it can fit our limited
|
||||
/// schema representation. This function:
|
||||
/// Sanitize one schema node so it can fit our limited schema representation.
|
||||
///
|
||||
/// `schema_node` is the current node being normalized in place.
|
||||
/// `root_schema` is the original top-level schema so local `$ref` pointers can
|
||||
/// be resolved against it.
|
||||
/// `active_refs` tracks the local `$ref` pointers on the current recursion
|
||||
/// stack so self-referential definitions can be cut off without recursing
|
||||
/// forever.
|
||||
///
|
||||
/// This function:
|
||||
/// - Ensures every typed schema object has a `"type"` when required.
|
||||
/// - Resolves local `$ref` indirections and unwraps single-variant
|
||||
/// `oneOf`/`anyOf`/`allOf` wrappers before inferring a fallback type.
|
||||
/// - Preserves explicit `anyOf`.
|
||||
/// - Collapses `const` into single-value `enum`.
|
||||
/// - Fills required child fields for object/array schema types, including
|
||||
/// nullable unions, with permissive defaults when absent.
|
||||
fn sanitize_json_schema(value: &mut JsonValue) {
|
||||
match value {
|
||||
fn sanitize_json_schema(
|
||||
schema_node: &mut JsonValue,
|
||||
root_schema: &JsonValue,
|
||||
active_refs: &mut Vec<String>,
|
||||
) {
|
||||
match schema_node {
|
||||
JsonValue::Bool(_) => {
|
||||
// JSON Schema boolean form: true/false. Coerce to an accept-all string.
|
||||
*value = json!({ "type": "string" });
|
||||
*schema_node = json!({ "type": "string" });
|
||||
}
|
||||
JsonValue::Array(values) => {
|
||||
for value in values {
|
||||
sanitize_json_schema(value);
|
||||
for array_value in values {
|
||||
sanitize_json_schema(array_value, root_schema, active_refs);
|
||||
}
|
||||
}
|
||||
JsonValue::Object(map) => {
|
||||
let active_ref_depth = active_refs.len();
|
||||
if let Some(pointer) = map
|
||||
.get("$ref")
|
||||
.and_then(JsonValue::as_str)
|
||||
.and_then(|reference| reference.strip_prefix('#'))
|
||||
{
|
||||
if active_refs.iter().any(|active_ref| active_ref == pointer) {
|
||||
*schema_node = cyclic_reference_fallback(map, root_schema.pointer(pointer));
|
||||
sanitize_json_schema(schema_node, root_schema, active_refs);
|
||||
return;
|
||||
}
|
||||
|
||||
active_refs.push(pointer.to_string());
|
||||
}
|
||||
|
||||
if let Some(replacement) = resolve_json_schema_reference(map, root_schema) {
|
||||
*schema_node = replacement;
|
||||
sanitize_json_schema(schema_node, root_schema, active_refs);
|
||||
active_refs.pop();
|
||||
return;
|
||||
}
|
||||
|
||||
if let Some(replacement) = unwrap_single_variant_combiner(map) {
|
||||
*schema_node = replacement;
|
||||
sanitize_json_schema(schema_node, root_schema, active_refs);
|
||||
return;
|
||||
}
|
||||
|
||||
if let Some(properties) = map.get_mut("properties")
|
||||
&& let Some(properties_map) = properties.as_object_mut()
|
||||
{
|
||||
for value in properties_map.values_mut() {
|
||||
sanitize_json_schema(value);
|
||||
sanitize_json_schema(value, root_schema, active_refs);
|
||||
}
|
||||
}
|
||||
if let Some(items) = map.get_mut("items") {
|
||||
sanitize_json_schema(items);
|
||||
sanitize_json_schema(items, root_schema, active_refs);
|
||||
}
|
||||
if let Some(additional_properties) = map.get_mut("additionalProperties")
|
||||
&& !matches!(additional_properties, JsonValue::Bool(_))
|
||||
{
|
||||
sanitize_json_schema(additional_properties);
|
||||
sanitize_json_schema(additional_properties, root_schema, active_refs);
|
||||
}
|
||||
if let Some(value) = map.get_mut("prefixItems") {
|
||||
sanitize_json_schema(value);
|
||||
sanitize_json_schema(value, root_schema, active_refs);
|
||||
}
|
||||
if let Some(value) = map.get_mut("anyOf") {
|
||||
sanitize_json_schema(value);
|
||||
sanitize_json_schema(value, root_schema, active_refs);
|
||||
}
|
||||
|
||||
if let Some(const_value) = map.remove("const") {
|
||||
@@ -234,11 +277,306 @@ fn sanitize_json_schema(value: &mut JsonValue) {
|
||||
|
||||
write_schema_types(map, &schema_types);
|
||||
ensure_default_children_for_schema_types(map, &schema_types);
|
||||
|
||||
active_refs.truncate(active_ref_depth);
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
fn resolve_json_schema_reference(
|
||||
map: &serde_json::Map<String, JsonValue>,
|
||||
root_schema: &JsonValue,
|
||||
) -> Option<JsonValue> {
|
||||
let reference = map.get("$ref")?.as_str()?;
|
||||
let pointer = reference.strip_prefix('#')?;
|
||||
let mut replacement = mergeable_schema_node(root_schema.pointer(pointer)?.clone());
|
||||
if let JsonValue::Object(replacement_map) = &mut replacement {
|
||||
merge_json_schema_objects(replacement_map, map);
|
||||
}
|
||||
Some(replacement)
|
||||
}
|
||||
|
||||
fn cyclic_reference_fallback(
|
||||
map: &serde_json::Map<String, JsonValue>,
|
||||
reference_target: Option<&JsonValue>,
|
||||
) -> JsonValue {
|
||||
let mut fallback = reference_target
|
||||
.and_then(cyclic_reference_target_fallback)
|
||||
.unwrap_or_else(|| json!({ "type": "string" }));
|
||||
|
||||
if let JsonValue::Object(fallback_map) = &mut fallback {
|
||||
merge_json_schema_objects(fallback_map, map);
|
||||
fallback_map.remove("$ref");
|
||||
}
|
||||
|
||||
fallback
|
||||
}
|
||||
|
||||
fn cyclic_reference_target_fallback(reference_target: &JsonValue) -> Option<JsonValue> {
|
||||
let target_map = reference_target.as_object()?;
|
||||
let schema_types = normalized_schema_types(target_map);
|
||||
|
||||
if schema_types.contains(&JsonSchemaPrimitiveType::Object)
|
||||
|| target_map.contains_key("properties")
|
||||
|| target_map.contains_key("required")
|
||||
|| target_map.contains_key("additionalProperties")
|
||||
{
|
||||
return Some(json!({ "type": "object", "properties": {} }));
|
||||
}
|
||||
|
||||
if schema_types.contains(&JsonSchemaPrimitiveType::Array)
|
||||
|| target_map.contains_key("items")
|
||||
|| target_map.contains_key("prefixItems")
|
||||
{
|
||||
return Some(json!({ "type": "array", "items": { "type": "string" } }));
|
||||
}
|
||||
|
||||
if schema_types.contains(&JsonSchemaPrimitiveType::Number) {
|
||||
return Some(json!({ "type": "number" }));
|
||||
}
|
||||
|
||||
if schema_types.contains(&JsonSchemaPrimitiveType::Integer) {
|
||||
return Some(json!({ "type": "integer" }));
|
||||
}
|
||||
|
||||
if schema_types.contains(&JsonSchemaPrimitiveType::Boolean) {
|
||||
return Some(json!({ "type": "boolean" }));
|
||||
}
|
||||
|
||||
Some(json!({ "type": "string" }))
|
||||
}
|
||||
|
||||
/// Converts permissive schema nodes into an object form that can safely absorb
|
||||
/// adjacent constraints during local `$ref` resolution or single-variant
|
||||
/// combiner unwrapping.
|
||||
///
|
||||
/// In JSON Schema, `true` means "match anything". When a `true` node sits next
|
||||
/// to sibling keywords such as `properties` or `required`, those sibling
|
||||
/// constraints still matter. Representing `true` as `{}` keeps those constraints
|
||||
/// available to our merge logic instead of dropping them on the floor.
|
||||
fn mergeable_schema_node(schema_node: JsonValue) -> JsonValue {
|
||||
match schema_node {
|
||||
JsonValue::Bool(true) => JsonValue::Object(serde_json::Map::new()),
|
||||
other => other,
|
||||
}
|
||||
}
|
||||
|
||||
/// Merges overlay keywords into a base schema object while preserving the
|
||||
/// narrowest combined constraint for overlapping keys.
|
||||
fn merge_json_schema_objects(
|
||||
base: &mut serde_json::Map<String, JsonValue>,
|
||||
overlay: &serde_json::Map<String, JsonValue>,
|
||||
) {
|
||||
for (key, overlay_value) in overlay {
|
||||
if key == "$ref" {
|
||||
continue;
|
||||
}
|
||||
|
||||
let Some(base_value) = base.get_mut(key) else {
|
||||
base.insert(key.clone(), overlay_value.clone());
|
||||
continue;
|
||||
};
|
||||
|
||||
if base_value == overlay_value {
|
||||
continue;
|
||||
}
|
||||
|
||||
match key.as_str() {
|
||||
"enum" => merge_enum_values(base_value, overlay_value),
|
||||
"required" => merge_required_arrays(base_value, overlay_value),
|
||||
"properties" => merge_property_maps(base_value, overlay_value),
|
||||
"type" => merge_schema_types(base_value, overlay_value),
|
||||
"additionalProperties" => {
|
||||
if let Some(merged_value) =
|
||||
merge_additional_properties(base_value.clone(), overlay_value.clone())
|
||||
{
|
||||
*base_value = merged_value;
|
||||
}
|
||||
}
|
||||
"minimum" | "exclusiveMinimum" => {
|
||||
merge_numeric_bound(base_value, overlay_value, /*take_max*/ true)
|
||||
}
|
||||
"maximum" | "exclusiveMaximum" => {
|
||||
merge_numeric_bound(base_value, overlay_value, /*take_max*/ false)
|
||||
}
|
||||
_ => {
|
||||
if let (Some(base_map), Some(overlay_map)) =
|
||||
(base_value.as_object_mut(), overlay_value.as_object())
|
||||
{
|
||||
merge_json_schema_objects(base_map, overlay_map);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn merge_enum_values(base_value: &mut JsonValue, overlay_value: &JsonValue) {
|
||||
let Some(base_array) = base_value.as_array() else {
|
||||
return;
|
||||
};
|
||||
let Some(overlay_array) = overlay_value.as_array() else {
|
||||
return;
|
||||
};
|
||||
|
||||
let intersection = base_array
|
||||
.iter()
|
||||
.filter(|value| overlay_array.contains(value))
|
||||
.cloned()
|
||||
.collect::<Vec<_>>();
|
||||
*base_value = JsonValue::Array(intersection);
|
||||
}
|
||||
|
||||
fn merge_required_arrays(base_value: &mut JsonValue, overlay_value: &JsonValue) {
|
||||
let Some(base_array) = base_value.as_array_mut() else {
|
||||
return;
|
||||
};
|
||||
let Some(overlay_array) = overlay_value.as_array() else {
|
||||
return;
|
||||
};
|
||||
|
||||
for required_value in overlay_array {
|
||||
if !base_array.contains(required_value) {
|
||||
base_array.push(required_value.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn merge_property_maps(base_value: &mut JsonValue, overlay_value: &JsonValue) {
|
||||
let Some(base_map) = base_value.as_object_mut() else {
|
||||
return;
|
||||
};
|
||||
let Some(overlay_map) = overlay_value.as_object() else {
|
||||
return;
|
||||
};
|
||||
|
||||
for (property_name, overlay_property) in overlay_map {
|
||||
let Some(base_property) = base_map.get_mut(property_name) else {
|
||||
base_map.insert(property_name.clone(), overlay_property.clone());
|
||||
continue;
|
||||
};
|
||||
|
||||
if let (Some(base_property_map), Some(overlay_property_map)) =
|
||||
(base_property.as_object_mut(), overlay_property.as_object())
|
||||
{
|
||||
merge_json_schema_objects(base_property_map, overlay_property_map);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn merge_schema_types(base_value: &mut JsonValue, overlay_value: &JsonValue) {
|
||||
let base_types = json_schema_type_names(base_value);
|
||||
let overlay_types = json_schema_type_names(overlay_value);
|
||||
if base_types.is_empty() || overlay_types.is_empty() {
|
||||
return;
|
||||
}
|
||||
|
||||
let merged_types: Vec<String> = [
|
||||
"string", "number", "boolean", "integer", "object", "array", "null",
|
||||
]
|
||||
.into_iter()
|
||||
.filter(|candidate| type_sets_overlap(&base_types, &overlay_types, candidate))
|
||||
.map(str::to_string)
|
||||
.collect();
|
||||
if merged_types.is_empty() {
|
||||
return;
|
||||
}
|
||||
|
||||
*base_value = match merged_types.as_slice() {
|
||||
[schema_type] => JsonValue::String(schema_type.clone()),
|
||||
_ => JsonValue::Array(
|
||||
merged_types
|
||||
.into_iter()
|
||||
.map(JsonValue::String)
|
||||
.collect::<Vec<_>>(),
|
||||
),
|
||||
};
|
||||
}
|
||||
|
||||
fn type_sets_overlap(base_types: &[String], overlay_types: &[String], candidate: &str) -> bool {
|
||||
if base_types.iter().any(|base_type| base_type == candidate)
|
||||
&& overlay_types
|
||||
.iter()
|
||||
.any(|overlay_type| overlay_type == candidate)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
candidate == "integer"
|
||||
&& ((base_types.iter().any(|base_type| base_type == "integer")
|
||||
&& overlay_types
|
||||
.iter()
|
||||
.any(|overlay_type| overlay_type == "number"))
|
||||
|| (base_types.iter().any(|base_type| base_type == "number")
|
||||
&& overlay_types
|
||||
.iter()
|
||||
.any(|overlay_type| overlay_type == "integer")))
|
||||
}
|
||||
|
||||
fn json_schema_type_names(value: &JsonValue) -> Vec<String> {
|
||||
match value {
|
||||
JsonValue::String(schema_type) => vec![schema_type.clone()],
|
||||
JsonValue::Array(values) => values
|
||||
.iter()
|
||||
.filter_map(JsonValue::as_str)
|
||||
.map(str::to_string)
|
||||
.collect(),
|
||||
_ => Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
fn merge_additional_properties(
|
||||
base_value: JsonValue,
|
||||
overlay_value: JsonValue,
|
||||
) -> Option<JsonValue> {
|
||||
match (base_value, overlay_value) {
|
||||
(JsonValue::Bool(false), _) | (_, JsonValue::Bool(false)) => Some(JsonValue::Bool(false)),
|
||||
(JsonValue::Bool(true), other) | (other, JsonValue::Bool(true)) => Some(other),
|
||||
(JsonValue::Object(mut base_map), JsonValue::Object(overlay_map)) => {
|
||||
merge_json_schema_objects(&mut base_map, &overlay_map);
|
||||
Some(JsonValue::Object(base_map))
|
||||
}
|
||||
(base_value, overlay_value) if base_value == overlay_value => Some(base_value),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn merge_numeric_bound(base_value: &mut JsonValue, overlay_value: &JsonValue, take_max: bool) {
|
||||
let (Some(base_number), Some(overlay_number)) = (base_value.as_f64(), overlay_value.as_f64())
|
||||
else {
|
||||
return;
|
||||
};
|
||||
|
||||
let merged_number = if take_max {
|
||||
base_number.max(overlay_number)
|
||||
} else {
|
||||
base_number.min(overlay_number)
|
||||
};
|
||||
if let Some(number) = serde_json::Number::from_f64(merged_number) {
|
||||
*base_value = JsonValue::Number(number);
|
||||
}
|
||||
}
|
||||
|
||||
fn unwrap_single_variant_combiner(map: &serde_json::Map<String, JsonValue>) -> Option<JsonValue> {
|
||||
for combiner in ["oneOf", "anyOf", "allOf"] {
|
||||
let Some(variants) = map.get(combiner).and_then(JsonValue::as_array) else {
|
||||
continue;
|
||||
};
|
||||
if variants.len() != 1 {
|
||||
continue;
|
||||
}
|
||||
|
||||
let mut replacement = mergeable_schema_node(variants[0].clone());
|
||||
if let JsonValue::Object(replacement_map) = &mut replacement {
|
||||
merge_json_schema_objects(replacement_map, map);
|
||||
replacement_map.remove(combiner);
|
||||
}
|
||||
return Some(replacement);
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
fn ensure_default_children_for_schema_types(
|
||||
map: &mut serde_json::Map<String, JsonValue>,
|
||||
schema_types: &[JsonSchemaPrimitiveType],
|
||||
@@ -335,6 +673,13 @@ fn singleton_null_schema_error() -> serde_json::Error {
|
||||
))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) use crate::json_schema_structured_outputs::validate_structured_outputs_schema;
|
||||
|
||||
#[cfg(test)]
|
||||
#[path = "json_schema_tests.rs"]
|
||||
mod tests;
|
||||
|
||||
#[cfg(test)]
|
||||
#[path = "json_schema_structured_outputs_tests.rs"]
|
||||
mod structured_outputs_tests;
|
||||
|
||||
103
codex-rs/tools/src/json_schema_structured_outputs.rs
Normal file
103
codex-rs/tools/src/json_schema_structured_outputs.rs
Normal file
@@ -0,0 +1,103 @@
|
||||
use crate::AdditionalProperties;
|
||||
use crate::JsonSchema;
|
||||
use crate::JsonSchemaPrimitiveType;
|
||||
use crate::JsonSchemaType;
|
||||
use std::collections::BTreeSet;
|
||||
|
||||
/// Validates the subset of JSON Schema invariants required by OpenAI
|
||||
/// Structured Outputs for the object-heavy MCP regression tests in this crate.
|
||||
///
|
||||
/// Source of truth:
|
||||
/// https://developers.openai.com/api/docs/guides/structured-outputs#supported-schemas
|
||||
///
|
||||
/// This validator currently focuses on the object constraints that matter for
|
||||
/// the `start` / `end` nested-object regression:
|
||||
/// - the root schema must be an object and must not use root-level `anyOf`
|
||||
/// - every object must set `additionalProperties: false`
|
||||
/// - every object property must appear in `required`
|
||||
/// - nested `anyOf` branches and array items must themselves satisfy the same
|
||||
/// subset whenever they contain objects
|
||||
///
|
||||
/// It intentionally does not yet enforce the documented global size limits
|
||||
/// (property count, nesting depth, enum count, total string budget). Those are
|
||||
/// broader policy checks and can be layered on later without changing the
|
||||
/// regression tests that pin the object-shape bug fixed in PR #18159.
|
||||
pub(crate) fn validate_structured_outputs_schema(schema: &JsonSchema) -> Result<(), String> {
|
||||
validate_structured_outputs_schema_at_path(schema, "root", /*is_root*/ true)
|
||||
}
|
||||
|
||||
fn validate_structured_outputs_schema_at_path(
|
||||
schema: &JsonSchema,
|
||||
path: &str,
|
||||
is_root: bool,
|
||||
) -> Result<(), String> {
|
||||
if is_root {
|
||||
if schema.any_of.is_some() {
|
||||
return Err(format!(
|
||||
"{path}: root schema must not use `anyOf`; see https://developers.openai.com/api/docs/guides/structured-outputs#supported-schemas"
|
||||
));
|
||||
}
|
||||
|
||||
if schema.schema_type != Some(JsonSchemaType::Single(JsonSchemaPrimitiveType::Object)) {
|
||||
return Err(format!(
|
||||
"{path}: root schema must be an object; see https://developers.openai.com/api/docs/guides/structured-outputs#supported-schemas"
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(any_of) = &schema.any_of {
|
||||
for (index, variant) in any_of.iter().enumerate() {
|
||||
validate_structured_outputs_schema_at_path(
|
||||
variant,
|
||||
&format!("{path}.anyOf[{index}]"),
|
||||
/*is_root*/ false,
|
||||
)?;
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(items) = &schema.items {
|
||||
validate_structured_outputs_schema_at_path(
|
||||
items,
|
||||
&format!("{path}.items"),
|
||||
/*is_root*/ false,
|
||||
)?;
|
||||
}
|
||||
|
||||
if let Some(JsonSchemaType::Single(JsonSchemaPrimitiveType::Object)) = &schema.schema_type {
|
||||
let Some(properties) = schema.properties.as_ref() else {
|
||||
return Err(format!(
|
||||
"{path}: object schemas must carry a properties map"
|
||||
));
|
||||
};
|
||||
|
||||
if schema.additional_properties != Some(AdditionalProperties::Boolean(false)) {
|
||||
return Err(format!(
|
||||
"{path}: object schemas must set `additionalProperties: false`; see https://developers.openai.com/api/docs/guides/structured-outputs#supported-schemas"
|
||||
));
|
||||
}
|
||||
|
||||
let property_names = properties.keys().cloned().collect::<BTreeSet<_>>();
|
||||
let Some(required) = schema.required.as_ref() else {
|
||||
return Err(format!(
|
||||
"{path}: object schemas must list every field in `required`; see https://developers.openai.com/api/docs/guides/structured-outputs#supported-schemas"
|
||||
));
|
||||
};
|
||||
let required_names = required.iter().cloned().collect::<BTreeSet<_>>();
|
||||
|
||||
if required_names != property_names {
|
||||
return Err(format!(
|
||||
"{path}: object schema `required` entries must exactly match declared properties; see https://developers.openai.com/api/docs/guides/structured-outputs#supported-schemas"
|
||||
));
|
||||
}
|
||||
|
||||
for (property_name, property_schema) in properties {
|
||||
validate_structured_outputs_schema_at_path(
|
||||
property_schema,
|
||||
&format!("{path}.{property_name}"),
|
||||
/*is_root*/ false,
|
||||
)?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
150
codex-rs/tools/src/json_schema_structured_outputs_tests.rs
Normal file
150
codex-rs/tools/src/json_schema_structured_outputs_tests.rs
Normal file
@@ -0,0 +1,150 @@
|
||||
use super::JsonSchema;
|
||||
use super::parse_tool_input_schema;
|
||||
use super::validate_structured_outputs_schema;
|
||||
use pretty_assertions::assert_eq;
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
// This file codifies the Structured Outputs invariants that matter for the MCP
|
||||
// schema regression in PR #18159.
|
||||
//
|
||||
// Source of truth:
|
||||
// https://developers.openai.com/api/docs/guides/structured-outputs#supported-schemas
|
||||
//
|
||||
// The doc page calls out several constraints that are relevant to the object
|
||||
// schemas we expose through the Responses API:
|
||||
// - the root schema must be an object and must not be `anyOf`
|
||||
// - all object fields / function parameters must appear in `required`
|
||||
// - every object must set `additionalProperties: false`
|
||||
//
|
||||
// The flattening bug in this PR was specifically about `$ref` and
|
||||
// single-variant combiner wrappers causing nested object parameters like
|
||||
// `start` / `end` to collapse to `string`. The tests below intentionally use
|
||||
// Structured Outputs-compliant inputs so we can assert not only that the object
|
||||
// shape survives, but also that the surviving shape still lives inside the
|
||||
// Responses API subset documented above.
|
||||
|
||||
#[test]
|
||||
fn parse_tool_input_schema_keeps_local_ref_objects_inside_structured_outputs_subset() {
|
||||
// This mirrors the Outlook Calendar `create_event.start` / `end` shape we
|
||||
// care about, but does so with the exact Structured Outputs object
|
||||
// invariants from:
|
||||
// https://developers.openai.com/api/docs/guides/structured-outputs#supported-schemas
|
||||
//
|
||||
// We want to prove that resolving a local `$ref` no longer collapses the
|
||||
// nested object to `string`, while also preserving:
|
||||
// - root object shape
|
||||
// - `additionalProperties: false`
|
||||
// - `required` coverage for every field
|
||||
let schema = parse_tool_input_schema(&serde_json::json!({
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"start": { "$ref": "#/$defs/date_time_zone" }
|
||||
},
|
||||
"required": ["start"],
|
||||
"additionalProperties": false,
|
||||
"$defs": {
|
||||
"date_time_zone": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"dateTime": {
|
||||
"type": "string",
|
||||
"description": "RFC3339 timestamp"
|
||||
},
|
||||
"timeZone": {
|
||||
"type": "string",
|
||||
"description": "IANA time zone"
|
||||
}
|
||||
},
|
||||
"required": ["dateTime", "timeZone"],
|
||||
"additionalProperties": false
|
||||
}
|
||||
}
|
||||
}))
|
||||
.expect("parse schema");
|
||||
|
||||
validate_structured_outputs_schema(&schema).expect("schema should stay in supported subset");
|
||||
|
||||
assert_eq!(
|
||||
schema,
|
||||
JsonSchema::object(
|
||||
BTreeMap::from([(
|
||||
"start".to_string(),
|
||||
JsonSchema::object(
|
||||
BTreeMap::from([
|
||||
(
|
||||
"dateTime".to_string(),
|
||||
JsonSchema::string(Some("RFC3339 timestamp".to_string())),
|
||||
),
|
||||
(
|
||||
"timeZone".to_string(),
|
||||
JsonSchema::string(Some("IANA time zone".to_string())),
|
||||
),
|
||||
]),
|
||||
Some(vec!["dateTime".to_string(), "timeZone".to_string()]),
|
||||
Some(false.into()),
|
||||
),
|
||||
)]),
|
||||
Some(vec!["start".to_string()]),
|
||||
Some(false.into()),
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_tool_input_schema_keeps_single_variant_combiner_objects_inside_structured_outputs_subset()
|
||||
{
|
||||
// The docs allow nested objects, but those nested objects still need the
|
||||
// same strict object invariants:
|
||||
// https://developers.openai.com/api/docs/guides/structured-outputs#supported-schemas
|
||||
//
|
||||
// This test covers the second half of the regression: a single-variant
|
||||
// `allOf` wrapper around the `DateTimeTimeZone` object must unwrap back to
|
||||
// an object without losing the Structured Outputs constraints that make the
|
||||
// schema acceptable to the Responses API.
|
||||
let schema = parse_tool_input_schema(&serde_json::json!({
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"end": {
|
||||
"allOf": [{
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"dateTime": { "type": "string" },
|
||||
"timeZone": { "type": "string" }
|
||||
},
|
||||
"required": ["dateTime", "timeZone"],
|
||||
"additionalProperties": false
|
||||
}]
|
||||
}
|
||||
},
|
||||
"required": ["end"],
|
||||
"additionalProperties": false
|
||||
}))
|
||||
.expect("parse schema");
|
||||
|
||||
validate_structured_outputs_schema(&schema).expect("schema should stay in supported subset");
|
||||
|
||||
assert_eq!(
|
||||
schema,
|
||||
JsonSchema::object(
|
||||
BTreeMap::from([(
|
||||
"end".to_string(),
|
||||
JsonSchema::object(
|
||||
BTreeMap::from([
|
||||
(
|
||||
"dateTime".to_string(),
|
||||
JsonSchema::string(/*description*/ None),
|
||||
),
|
||||
(
|
||||
"timeZone".to_string(),
|
||||
JsonSchema::string(/*description*/ None),
|
||||
),
|
||||
]),
|
||||
Some(vec!["dateTime".to_string(), "timeZone".to_string()]),
|
||||
Some(false.into()),
|
||||
),
|
||||
)]),
|
||||
Some(vec!["end".to_string()]),
|
||||
Some(false.into()),
|
||||
)
|
||||
);
|
||||
}
|
||||
@@ -452,6 +452,392 @@ fn parse_tool_input_schema_fills_default_items_for_nullable_array_union() {
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_tool_input_schema_resolves_local_ref_objects() {
|
||||
let schema = parse_tool_input_schema(&serde_json::json!({
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"start": { "$ref": "#/$defs/date_time_zone" }
|
||||
},
|
||||
"$defs": {
|
||||
"date_time_zone": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"dateTime": {
|
||||
"type": "string",
|
||||
"description": "RFC3339 timestamp"
|
||||
},
|
||||
"timeZone": {
|
||||
"type": "string",
|
||||
"description": "IANA time zone"
|
||||
}
|
||||
},
|
||||
"required": ["dateTime", "timeZone"]
|
||||
}
|
||||
}
|
||||
}))
|
||||
.expect("parse schema");
|
||||
|
||||
assert_eq!(
|
||||
schema,
|
||||
JsonSchema::object(
|
||||
BTreeMap::from([(
|
||||
"start".to_string(),
|
||||
JsonSchema::object(
|
||||
BTreeMap::from([
|
||||
(
|
||||
"dateTime".to_string(),
|
||||
JsonSchema::string(Some("RFC3339 timestamp".to_string())),
|
||||
),
|
||||
(
|
||||
"timeZone".to_string(),
|
||||
JsonSchema::string(Some("IANA time zone".to_string())),
|
||||
),
|
||||
]),
|
||||
Some(vec!["dateTime".to_string(), "timeZone".to_string()]),
|
||||
/*additional_properties*/ None,
|
||||
),
|
||||
)]),
|
||||
/*required*/ None,
|
||||
/*additional_properties*/ None
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_tool_input_schema_merges_local_ref_sibling_constraints() {
|
||||
let schema = parse_tool_input_schema(&serde_json::json!({
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"start": {
|
||||
"$ref": "#/$defs/date_time_zone",
|
||||
"properties": {
|
||||
"timeZone": {
|
||||
"enum": ["UTC"]
|
||||
},
|
||||
"calendar": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"required": ["timeZone", "calendar"]
|
||||
}
|
||||
},
|
||||
"$defs": {
|
||||
"date_time_zone": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"dateTime": { "type": "string" },
|
||||
"timeZone": { "type": "string" }
|
||||
},
|
||||
"required": ["dateTime"]
|
||||
}
|
||||
}
|
||||
}))
|
||||
.expect("parse schema");
|
||||
|
||||
assert_eq!(
|
||||
schema,
|
||||
JsonSchema::object(
|
||||
BTreeMap::from([(
|
||||
"start".to_string(),
|
||||
JsonSchema::object(
|
||||
BTreeMap::from([
|
||||
(
|
||||
"calendar".to_string(),
|
||||
JsonSchema::string(/*description*/ None),
|
||||
),
|
||||
(
|
||||
"dateTime".to_string(),
|
||||
JsonSchema::string(/*description*/ None),
|
||||
),
|
||||
(
|
||||
"timeZone".to_string(),
|
||||
JsonSchema::string_enum(
|
||||
vec![serde_json::json!("UTC")],
|
||||
/*description*/ None,
|
||||
),
|
||||
),
|
||||
]),
|
||||
Some(vec![
|
||||
"dateTime".to_string(),
|
||||
"timeZone".to_string(),
|
||||
"calendar".to_string(),
|
||||
]),
|
||||
/*additional_properties*/ None,
|
||||
),
|
||||
)]),
|
||||
/*required*/ None,
|
||||
/*additional_properties*/ None
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_tool_input_schema_handles_cyclic_local_refs_without_recursing_forever() {
|
||||
let schema = parse_tool_input_schema(&serde_json::json!({
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"node": { "$ref": "#/$defs/node" }
|
||||
},
|
||||
"$defs": {
|
||||
"node": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"next": { "$ref": "#/$defs/node" }
|
||||
},
|
||||
"required": ["next"]
|
||||
}
|
||||
}
|
||||
}))
|
||||
.expect("parse schema");
|
||||
|
||||
assert_eq!(
|
||||
schema,
|
||||
JsonSchema::object(
|
||||
BTreeMap::from([(
|
||||
"node".to_string(),
|
||||
JsonSchema::object(
|
||||
BTreeMap::from([(
|
||||
"next".to_string(),
|
||||
JsonSchema::object(
|
||||
BTreeMap::new(),
|
||||
/*required*/ None,
|
||||
/*additional_properties*/ None,
|
||||
),
|
||||
)]),
|
||||
Some(vec!["next".to_string()]),
|
||||
/*additional_properties*/ None,
|
||||
),
|
||||
)]),
|
||||
/*required*/ None,
|
||||
/*additional_properties*/ None
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_tool_input_schema_merges_outer_constraints_when_unwrapping_single_variant_combiners() {
|
||||
let schema = parse_tool_input_schema(&serde_json::json!({
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"wrapped": {
|
||||
"allOf": [{
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"a": { "type": "string" }
|
||||
},
|
||||
"required": ["a"]
|
||||
}],
|
||||
"properties": {
|
||||
"b": { "type": "string" }
|
||||
},
|
||||
"required": ["b"]
|
||||
}
|
||||
}
|
||||
}))
|
||||
.expect("parse schema");
|
||||
|
||||
assert_eq!(
|
||||
schema,
|
||||
JsonSchema::object(
|
||||
BTreeMap::from([(
|
||||
"wrapped".to_string(),
|
||||
JsonSchema::object(
|
||||
BTreeMap::from([
|
||||
("a".to_string(), JsonSchema::string(/*description*/ None)),
|
||||
("b".to_string(), JsonSchema::string(/*description*/ None)),
|
||||
]),
|
||||
Some(vec!["a".to_string(), "b".to_string()]),
|
||||
/*additional_properties*/ None,
|
||||
),
|
||||
)]),
|
||||
/*required*/ None,
|
||||
/*additional_properties*/ None
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_tool_input_schema_merges_outer_constraints_when_unwrapping_true_combiners() {
|
||||
let schema = parse_tool_input_schema(&serde_json::json!({
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"wrapped": {
|
||||
"allOf": [true],
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"b": { "type": "string" }
|
||||
},
|
||||
"required": ["b"]
|
||||
}
|
||||
}
|
||||
}))
|
||||
.expect("parse schema");
|
||||
|
||||
assert_eq!(
|
||||
schema,
|
||||
JsonSchema::object(
|
||||
BTreeMap::from([(
|
||||
"wrapped".to_string(),
|
||||
JsonSchema::object(
|
||||
BTreeMap::from([("b".to_string(), JsonSchema::string(/*description*/ None),)]),
|
||||
Some(vec!["b".to_string()]),
|
||||
/*additional_properties*/ None,
|
||||
),
|
||||
)]),
|
||||
/*required*/ None,
|
||||
/*additional_properties*/ None
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_tool_input_schema_preserves_sibling_constraints_for_true_ref_targets() {
|
||||
let schema = parse_tool_input_schema(&serde_json::json!({
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"config": {
|
||||
"$ref": "#/$defs/anything",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"dateTime": { "type": "string" }
|
||||
},
|
||||
"required": ["dateTime"]
|
||||
}
|
||||
},
|
||||
"$defs": {
|
||||
"anything": true
|
||||
}
|
||||
}))
|
||||
.expect("parse schema");
|
||||
|
||||
assert_eq!(
|
||||
schema,
|
||||
JsonSchema::object(
|
||||
BTreeMap::from([(
|
||||
"config".to_string(),
|
||||
JsonSchema::object(
|
||||
BTreeMap::from([(
|
||||
"dateTime".to_string(),
|
||||
JsonSchema::string(/*description*/ None),
|
||||
)]),
|
||||
Some(vec!["dateTime".to_string()]),
|
||||
/*additional_properties*/ None,
|
||||
),
|
||||
)]),
|
||||
/*required*/ None,
|
||||
/*additional_properties*/ None
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_tool_input_schema_intersects_enums_when_merging_ref_siblings() {
|
||||
let schema = parse_tool_input_schema(&serde_json::json!({
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"timeZone": {
|
||||
"$ref": "#/$defs/supported_time_zone",
|
||||
"enum": ["UTC", "PST"]
|
||||
}
|
||||
},
|
||||
"$defs": {
|
||||
"supported_time_zone": {
|
||||
"enum": ["UTC", "EST"]
|
||||
}
|
||||
}
|
||||
}))
|
||||
.expect("parse schema");
|
||||
|
||||
assert_eq!(
|
||||
schema,
|
||||
JsonSchema::object(
|
||||
BTreeMap::from([(
|
||||
"timeZone".to_string(),
|
||||
JsonSchema::string_enum(vec![serde_json::json!("UTC")], /*description*/ None),
|
||||
)]),
|
||||
/*required*/ None,
|
||||
/*additional_properties*/ None
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_tool_input_schema_narrows_number_and_integer_type_intersections() {
|
||||
let schema = parse_tool_input_schema(&serde_json::json!({
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"count": {
|
||||
"$ref": "#/$defs/number_like",
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
"$defs": {
|
||||
"number_like": {
|
||||
"type": "number"
|
||||
}
|
||||
}
|
||||
}))
|
||||
.expect("parse schema");
|
||||
|
||||
assert_eq!(
|
||||
schema,
|
||||
JsonSchema::object(
|
||||
BTreeMap::from([(
|
||||
"count".to_string(),
|
||||
JsonSchema::integer(/*description*/ None),
|
||||
)]),
|
||||
/*required*/ None,
|
||||
/*additional_properties*/ None
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_tool_input_schema_unwraps_single_variant_all_of_objects() {
|
||||
let schema = parse_tool_input_schema(&serde_json::json!({
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"end": {
|
||||
"allOf": [{
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"dateTime": { "type": "string" },
|
||||
"timeZone": { "type": "string" }
|
||||
},
|
||||
"required": ["dateTime", "timeZone"]
|
||||
}]
|
||||
}
|
||||
}
|
||||
}))
|
||||
.expect("parse schema");
|
||||
|
||||
assert_eq!(
|
||||
schema,
|
||||
JsonSchema::object(
|
||||
BTreeMap::from([(
|
||||
"end".to_string(),
|
||||
JsonSchema::object(
|
||||
BTreeMap::from([
|
||||
(
|
||||
"dateTime".to_string(),
|
||||
JsonSchema::string(/*description*/ None),
|
||||
),
|
||||
(
|
||||
"timeZone".to_string(),
|
||||
JsonSchema::string(/*description*/ None),
|
||||
),
|
||||
]),
|
||||
Some(vec!["dateTime".to_string(), "timeZone".to_string()]),
|
||||
/*additional_properties*/ None,
|
||||
),
|
||||
)]),
|
||||
/*required*/ None,
|
||||
/*additional_properties*/ None
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
// Schemas that should be preserved for Responses API compatibility rather than
|
||||
// being rewritten into a different shape.
|
||||
|
||||
@@ -541,7 +927,7 @@ fn parse_tool_input_schema_preserves_nested_nullable_any_of_shape() {
|
||||
],
|
||||
/*description*/ None,
|
||||
),
|
||||
),]),
|
||||
)]),
|
||||
/*required*/ None,
|
||||
/*additional_properties*/ None
|
||||
)
|
||||
|
||||
@@ -25,6 +25,9 @@ mod tool_suggest;
|
||||
mod utility_tool;
|
||||
mod view_image;
|
||||
|
||||
#[cfg(test)]
|
||||
mod json_schema_structured_outputs;
|
||||
|
||||
pub use agent_job_tool::create_report_agent_job_result_tool;
|
||||
pub use agent_job_tool::create_spawn_agents_on_csv_tool;
|
||||
pub use agent_tool::SpawnAgentToolOptions;
|
||||
|
||||
Reference in New Issue
Block a user