fix: sanitize surrogates (#25934)

This commit is contained in:
Aiden Cline
2026-05-05 18:07:23 -05:00
committed by GitHub
parent 837cc92586
commit 6409aceb1a
2 changed files with 164 additions and 5 deletions

View File

@@ -1123,6 +1123,98 @@ describe("ProviderTransform.message - DeepSeek reasoning content", () => {
})
})
describe("ProviderTransform.message - surrogate sanitization", () => {
const model = {
id: "test/test-model",
providerID: "test",
api: {
id: "test-model",
url: "https://api.test.com",
npm: "@ai-sdk/openai-compatible",
},
name: "Test Model",
capabilities: {
temperature: true,
reasoning: true,
attachment: true,
toolcall: true,
input: { text: true, audio: false, image: true, video: false, pdf: false },
output: { text: true, audio: false, image: false, video: false, pdf: false },
interleaved: false,
},
cost: { input: 0.001, output: 0.002, cache: { read: 0.0001, write: 0.0002 } },
limit: { context: 128000, output: 8192 },
status: "active",
options: {},
headers: {},
} as any
test("replaces lone surrogates in model-visible text", () => {
const lone = "\uD83D"
const valid = "🚀"
const sanitized = "<22>"
const text = (label: string) => `${label} ${lone} and ${valid}`
const expected = (label: string) => `${label} ${sanitized} and ${valid}`
const msgs = [
{ role: "system", content: text("system") },
{ role: "user", content: text("user string") },
{
role: "user",
content: [
{ type: "text", text: text("user text") },
{ type: "image", image: "data:image/png;base64,abcd" },
],
},
{ role: "assistant", content: text("assistant string") },
{
role: "assistant",
content: [
{ type: "text", text: text("assistant text") },
{ type: "reasoning", text: text("assistant reasoning") },
{ type: "tool-call", toolCallId: "call-1", toolName: "Read", input: { filePath: ".opencode/tool/emoji.ts" } },
{ type: "tool-result", toolCallId: "call-2", toolName: "Read", output: { type: "text", value: text("assistant tool text") } },
{ type: "tool-result", toolCallId: "call-3", toolName: "Read", output: { type: "error-text", value: text("assistant tool error") } },
{
type: "tool-result",
toolCallId: "call-4",
toolName: "Read",
output: { type: "content", value: [{ type: "text", text: text("assistant tool content") }] },
},
],
},
{
role: "tool",
content: [
{ type: "tool-result", toolCallId: "call-5", toolName: "Read", output: { type: "text", value: text("tool text") } },
{ type: "tool-result", toolCallId: "call-6", toolName: "Read", output: { type: "error-text", value: text("tool error") } },
{
type: "tool-result",
toolCallId: "call-7",
toolName: "Read",
output: { type: "content", value: [{ type: "text", text: text("tool content") }] },
},
],
},
] as any[]
const result = ProviderTransform.message(msgs, model, {}) as any[]
expect(result[0].content).toBe(expected("system"))
expect(result[1].content).toBe(expected("user string"))
expect(result[2].content[0].text).toBe(expected("user text"))
expect(result[3].content).toBe(expected("assistant string"))
expect(result[4].content[0].text).toBe(expected("assistant text"))
expect(result[4].content[1].text).toBe(expected("assistant reasoning"))
expect(result[4].content[3].output.value).toBe(expected("assistant tool text"))
expect(result[4].content[4].output.value).toBe(expected("assistant tool error"))
expect(result[4].content[5].output.value[0].text).toBe(expected("assistant tool content"))
expect(result[5].content[0].output.value).toBe(expected("tool text"))
expect(result[5].content[1].output.value).toBe(expected("tool error"))
expect(result[5].content[2].output.value[0].text).toBe(expected("tool content"))
expect(result[2].content[1]).toEqual({ type: "image", image: "data:image/png;base64,abcd" })
})
})
describe("ProviderTransform.message - empty image handling", () => {
const mockModel = {
id: "anthropic/claude-3-5-sonnet",
@@ -1993,7 +2085,7 @@ describe("ProviderTransform.message - bedrock caching with non-bedrock providerI
const msgs = [
{
role: "system",
content: [{ type: "text", text: "You are a helpful assistant" }],
content: "You are a helpful assistant",
},
{
role: "user",
@@ -2007,7 +2099,7 @@ describe("ProviderTransform.message - bedrock caching with non-bedrock providerI
expect(result[0].providerOptions?.bedrock).toEqual({
cachePoint: { type: "default" },
})
expect(result[0].content[0].providerOptions?.bedrock).toBeUndefined()
expect(result[0].content).toBe("You are a helpful assistant")
})
})
@@ -2044,7 +2136,7 @@ describe("ProviderTransform.message - cache control on gateway", () => {
const msgs = [
{
role: "system",
content: [{ type: "text", text: "You are a helpful assistant" }],
content: "You are a helpful assistant",
},
{
role: "user",
@@ -2054,7 +2146,7 @@ describe("ProviderTransform.message - cache control on gateway", () => {
const result = ProviderTransform.message(msgs, model, {}) as any[]
expect(result[0].content[0].providerOptions).toBeUndefined()
expect(result[0].content).toBe("You are a helpful assistant")
expect(result[0].providerOptions).toBeUndefined()
})