Escape turn metadata headers as ASCII JSON (#19620)

## Why

`x-codex-turn-metadata` is sent as an HTTP/WebSocket header, but Codex
was serializing the metadata JSON with raw UTF-8 string contents. When a
workspace path contains non-ASCII characters, common HTTP stacks can
reject or corrupt that header before the request reaches the provider.

Fixes #17468. Also addresses the duplicate WebSocket report in #19581.

## What changed

- Added `codex_utils_string::to_ascii_json_string`, a shared helper that
serializes JSON normally while escaping non-ASCII string content as
`\uXXXX`.
- Switched turn metadata header serialization, including merged
Responses API client metadata, to use the ASCII-safe JSON helper.
- Added coverage for non-ASCII workspace paths and non-ASCII client
metadata while preserving the same parsed JSON values.

## Verification

- `cargo test -p codex-utils-string`
- `cargo test -p codex-core turn_metadata`
- `just bazel-lock-check`
This commit is contained in:
Eric Traut
2026-04-29 15:35:33 -07:00
committed by GitHub
parent b1546008fc
commit 4241df4d79
6 changed files with 146 additions and 3 deletions

View File

@@ -0,0 +1,122 @@
//! JSON serialization helpers for output that must remain parseable as JSON
//! while staying safe for ASCII-only transports.
use std::io;
use serde::Serialize;
struct AsciiJsonFormatter;
impl serde_json::ser::Formatter for AsciiJsonFormatter {
// serde_json has no ensure_ascii flag; this formatter keeps its serializer
// in charge and only escapes non-ASCII string fragments.
fn write_string_fragment<W>(&mut self, writer: &mut W, fragment: &str) -> io::Result<()>
where
W: ?Sized + io::Write,
{
let mut start = 0;
for (index, ch) in fragment.char_indices() {
if ch.is_ascii() {
continue;
}
if start < index {
writer.write_all(&fragment.as_bytes()[start..index])?;
}
let mut utf16 = [0; 2];
for code_unit in ch.encode_utf16(&mut utf16) {
write!(writer, "\\u{code_unit:04x}")?;
}
start = index + ch.len_utf8();
}
if start < fragment.len() {
writer.write_all(&fragment.as_bytes()[start..])?;
}
Ok(())
}
}
/// Serialize JSON while escaping non-ASCII string content as `\uXXXX`.
///
/// This is useful when JSON needs to remain parseable as JSON but must be
/// carried through ASCII-safe transports such as HTTP headers.
pub fn to_ascii_json_string<T>(value: &T) -> serde_json::Result<String>
where
T: Serialize + ?Sized,
{
let mut bytes = Vec::new();
let mut serializer = serde_json::Serializer::with_formatter(&mut bytes, AsciiJsonFormatter);
value.serialize(&mut serializer)?;
String::from_utf8(bytes)
.map_err(|err| serde_json::Error::io(io::Error::new(io::ErrorKind::InvalidData, err)))
}
#[cfg(test)]
mod tests {
use std::collections::BTreeMap;
use pretty_assertions::assert_eq;
use serde::Serialize;
use serde::ser::SerializeStruct;
use serde_json::Value;
use serde_json::json;
use super::to_ascii_json_string;
#[test]
fn to_ascii_json_string_escapes_non_ascii_strings() {
struct TestPayload;
impl Serialize for TestPayload {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
let workspaces = BTreeMap::from([("/tmp/東京", TestWorkspace)]);
let mut state = serializer.serialize_struct("TestPayload", 1)?;
state.serialize_field("workspaces", &workspaces)?;
state.end()
}
}
struct TestWorkspace;
impl Serialize for TestWorkspace {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
let mut state = serializer.serialize_struct("TestWorkspace", 2)?;
state.serialize_field("label", "Agentlarım")?;
state.serialize_field("emoji", "🚀")?;
state.end()
}
}
let value = TestPayload;
let expected_value = json!({
"workspaces": {
"/tmp/東京": {
"label": "Agentlarım",
"emoji": "🚀"
}
}
});
let serialized = to_ascii_json_string(&value).expect("serialize ascii json");
assert_eq!(
serialized,
r#"{"workspaces":{"/tmp/\u6771\u4eac":{"label":"Agentlar\u0131m","emoji":"\ud83d\ude80"}}}"#
);
assert!(serialized.is_ascii());
assert!(!serialized.contains("東京"));
assert!(!serialized.contains("Agentlarım"));
assert!(!serialized.contains("🚀"));
let parsed: Value = serde_json::from_str(&serialized).expect("serialized json");
assert_eq!(parsed, expected_value);
}
}

View File

@@ -1,5 +1,7 @@
mod json;
mod truncate;
pub use json::to_ascii_json_string;
pub use truncate::approx_bytes_for_tokens;
pub use truncate::approx_token_count;
pub use truncate::approx_tokens_from_byte_count;