Files
codex/codex-rs/utils/string/src/json.rs
Eric Traut 4241df4d79 Escape turn metadata headers as ASCII JSON (#19620)
## Why

`x-codex-turn-metadata` is sent as an HTTP/WebSocket header, but Codex
was serializing the metadata JSON with raw UTF-8 string contents. When a
workspace path contains non-ASCII characters, common HTTP stacks can
reject or corrupt that header before the request reaches the provider.

Fixes #17468. Also addresses the duplicate WebSocket report in #19581.

## What changed

- Added `codex_utils_string::to_ascii_json_string`, a shared helper that
serializes JSON normally while escaping non-ASCII string content as
`\uXXXX`.
- Switched turn metadata header serialization, including merged
Responses API client metadata, to use the ASCII-safe JSON helper.
- Added coverage for non-ASCII workspace paths and non-ASCII client
metadata while preserving the same parsed JSON values.

## Verification

- `cargo test -p codex-utils-string`
- `cargo test -p codex-core turn_metadata`
- `just bazel-lock-check`
2026-04-29 15:35:33 -07:00

123 lines
3.8 KiB
Rust
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
//! JSON serialization helpers for output that must remain parseable as JSON
//! while staying safe for ASCII-only transports.
use std::io;
use serde::Serialize;
struct AsciiJsonFormatter;
impl serde_json::ser::Formatter for AsciiJsonFormatter {
// serde_json has no ensure_ascii flag; this formatter keeps its serializer
// in charge and only escapes non-ASCII string fragments.
fn write_string_fragment<W>(&mut self, writer: &mut W, fragment: &str) -> io::Result<()>
where
W: ?Sized + io::Write,
{
let mut start = 0;
for (index, ch) in fragment.char_indices() {
if ch.is_ascii() {
continue;
}
if start < index {
writer.write_all(&fragment.as_bytes()[start..index])?;
}
let mut utf16 = [0; 2];
for code_unit in ch.encode_utf16(&mut utf16) {
write!(writer, "\\u{code_unit:04x}")?;
}
start = index + ch.len_utf8();
}
if start < fragment.len() {
writer.write_all(&fragment.as_bytes()[start..])?;
}
Ok(())
}
}
/// Serialize JSON while escaping non-ASCII string content as `\uXXXX`.
///
/// This is useful when JSON needs to remain parseable as JSON but must be
/// carried through ASCII-safe transports such as HTTP headers.
pub fn to_ascii_json_string<T>(value: &T) -> serde_json::Result<String>
where
T: Serialize + ?Sized,
{
let mut bytes = Vec::new();
let mut serializer = serde_json::Serializer::with_formatter(&mut bytes, AsciiJsonFormatter);
value.serialize(&mut serializer)?;
String::from_utf8(bytes)
.map_err(|err| serde_json::Error::io(io::Error::new(io::ErrorKind::InvalidData, err)))
}
#[cfg(test)]
mod tests {
use std::collections::BTreeMap;
use pretty_assertions::assert_eq;
use serde::Serialize;
use serde::ser::SerializeStruct;
use serde_json::Value;
use serde_json::json;
use super::to_ascii_json_string;
#[test]
fn to_ascii_json_string_escapes_non_ascii_strings() {
struct TestPayload;
impl Serialize for TestPayload {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
let workspaces = BTreeMap::from([("/tmp/東京", TestWorkspace)]);
let mut state = serializer.serialize_struct("TestPayload", 1)?;
state.serialize_field("workspaces", &workspaces)?;
state.end()
}
}
struct TestWorkspace;
impl Serialize for TestWorkspace {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
let mut state = serializer.serialize_struct("TestWorkspace", 2)?;
state.serialize_field("label", "Agentlarım")?;
state.serialize_field("emoji", "🚀")?;
state.end()
}
}
let value = TestPayload;
let expected_value = json!({
"workspaces": {
"/tmp/東京": {
"label": "Agentlarım",
"emoji": "🚀"
}
}
});
let serialized = to_ascii_json_string(&value).expect("serialize ascii json");
assert_eq!(
serialized,
r#"{"workspaces":{"/tmp/\u6771\u4eac":{"label":"Agentlar\u0131m","emoji":"\ud83d\ude80"}}}"#
);
assert!(serialized.is_ascii());
assert!(!serialized.contains("東京"));
assert!(!serialized.contains("Agentlarım"));
assert!(!serialized.contains("🚀"));
let parsed: Value = serde_json::from_str(&serialized).expect("serialized json");
assert_eq!(parsed, expected_value);
}
}