mirror of
https://github.com/anomalyco/opencode.git
synced 2026-05-23 21:04:36 +00:00
fix(llm): emit structured image blocks for tool-result media in Anthropic Messages (#28755)
This commit is contained in:
@@ -14,6 +14,7 @@ import {
|
||||
type ProviderMetadata,
|
||||
type ToolCallPart,
|
||||
type ToolDefinition,
|
||||
type ToolResultContentPart,
|
||||
type ToolResultPart,
|
||||
} from "../schema"
|
||||
import { JsonObject, optionalArray, optionalNull, ProviderShared } from "./shared"
|
||||
@@ -96,10 +97,18 @@ const AnthropicServerToolResultBlock = Schema.Struct({
|
||||
})
|
||||
type AnthropicServerToolResultBlock = Schema.Schema.Type<typeof AnthropicServerToolResultBlock>
|
||||
|
||||
// Anthropic accepts either a plain string or an ordered array of text/image
|
||||
// blocks inside `tool_result.content`. The array form is required when a tool
|
||||
// returns image bytes (screenshot, image search, etc.) so they can be passed
|
||||
// to the model as proper image inputs instead of being JSON-stringified into
|
||||
// the prompt — which silently inflates context by megabytes and can push the
|
||||
// conversation over the model's token limit.
|
||||
const AnthropicToolResultContent = Schema.Union([AnthropicTextBlock, AnthropicImageBlock])
|
||||
|
||||
const AnthropicToolResultBlock = Schema.Struct({
|
||||
type: Schema.tag("tool_result"),
|
||||
tool_use_id: Schema.String,
|
||||
content: Schema.String,
|
||||
content: Schema.Union([Schema.String, Schema.Array(AnthropicToolResultContent)]),
|
||||
is_error: Schema.optional(Schema.Boolean),
|
||||
cache_control: Schema.optional(AnthropicCacheControl),
|
||||
})
|
||||
@@ -298,6 +307,31 @@ const lowerImage = Effect.fn("AnthropicMessages.lowerImage")(function* (part: Me
|
||||
} satisfies AnthropicImageBlock
|
||||
})
|
||||
|
||||
// Tool results may carry structured text/images. Keep media as provider-native
|
||||
// content instead of JSON-stringifying base64 into a prompt string.
|
||||
const lowerToolResultContentItem = Effect.fn("AnthropicMessages.lowerToolResultContentItem")(function* (
|
||||
item: ToolResultContentPart,
|
||||
) {
|
||||
if (item.type === "text") return { type: "text" as const, text: item.text } satisfies AnthropicTextBlock
|
||||
if (item.mediaType.startsWith("image/"))
|
||||
return {
|
||||
type: "image" as const,
|
||||
source: {
|
||||
type: "base64" as const,
|
||||
media_type: item.mediaType,
|
||||
data: ProviderShared.mediaBase64(item),
|
||||
},
|
||||
} satisfies AnthropicImageBlock
|
||||
return yield* invalid(`Anthropic Messages tool-result media content only supports images, got ${item.mediaType}`)
|
||||
})
|
||||
|
||||
const lowerToolResultContent = Effect.fn("AnthropicMessages.lowerToolResultContent")(function* (part: ToolResultPart) {
|
||||
// Text / json / error results stay as a string for backward compatibility
|
||||
// with existing cassettes and provider expectations.
|
||||
if (part.result.type !== "content") return ProviderShared.toolResultText(part)
|
||||
return yield* Effect.forEach(part.result.value, lowerToolResultContentItem)
|
||||
})
|
||||
|
||||
const lowerMessages = Effect.fn("AnthropicMessages.lowerMessages")(function* (
|
||||
request: LLMRequest,
|
||||
breakpoints: Cache.Breakpoints,
|
||||
@@ -360,7 +394,7 @@ const lowerMessages = Effect.fn("AnthropicMessages.lowerMessages")(function* (
|
||||
content.push({
|
||||
type: "tool_result",
|
||||
tool_use_id: part.id,
|
||||
content: ProviderShared.toolResultText(part),
|
||||
content: yield* lowerToolResultContent(part),
|
||||
is_error: part.result.type === "error" ? true : undefined,
|
||||
cache_control: cacheControl(breakpoints, part.cache),
|
||||
})
|
||||
|
||||
File diff suppressed because one or more lines are too long
@@ -24,6 +24,19 @@ const request = LLM.request({
|
||||
generation: { maxTokens: 20, temperature: 0 },
|
||||
})
|
||||
|
||||
type AnthropicToolResult = Extract<
|
||||
AnthropicMessages.AnthropicMessagesBody["messages"][number]["content"][number],
|
||||
{ readonly type: "tool_result" }
|
||||
>
|
||||
|
||||
const expectToolResult = (body: AnthropicMessages.AnthropicMessagesBody): AnthropicToolResult => {
|
||||
const result = body.messages
|
||||
.flatMap((message) => (message.role === "user" ? message.content : []))
|
||||
.find((block): block is AnthropicToolResult => block.type === "tool_result")
|
||||
expect(result).toBeDefined()
|
||||
return result!
|
||||
}
|
||||
|
||||
describe("Anthropic Messages route", () => {
|
||||
it.effect("prepares Anthropic Messages target", () =>
|
||||
Effect.gen(function* () {
|
||||
@@ -71,6 +84,87 @@ describe("Anthropic Messages route", () => {
|
||||
}),
|
||||
)
|
||||
|
||||
// Regression: screenshot/read tool results must stay structured so base64
|
||||
// image data is not JSON-stringified into `tool_result.content`.
|
||||
it.effect("lowers image tool-result content as structured image blocks", () =>
|
||||
Effect.gen(function* () {
|
||||
const prepared = yield* LLMClient.prepare<AnthropicMessages.AnthropicMessagesBody>(
|
||||
LLM.request({
|
||||
id: "req_tool_result_image",
|
||||
model,
|
||||
messages: [
|
||||
Message.user("Show me the screenshot."),
|
||||
Message.assistant([ToolCallPart.make({ id: "call_1", name: "read", input: { filePath: "shot.png" } })]),
|
||||
Message.tool({
|
||||
id: "call_1",
|
||||
name: "read",
|
||||
resultType: "content",
|
||||
result: [
|
||||
{ type: "text", text: "Image read successfully" },
|
||||
{ type: "media", mediaType: "image/png", data: "AAECAw==" },
|
||||
],
|
||||
}),
|
||||
],
|
||||
cache: "none",
|
||||
}),
|
||||
)
|
||||
|
||||
expect(expectToolResult(prepared.body).content).toEqual([
|
||||
{ type: "text", text: "Image read successfully" },
|
||||
{ type: "image", source: { type: "base64", media_type: "image/png", data: "AAECAw==" } },
|
||||
])
|
||||
}),
|
||||
)
|
||||
|
||||
it.effect("lowers single-image tool-result content as a structured image block", () =>
|
||||
Effect.gen(function* () {
|
||||
const prepared = yield* LLMClient.prepare<AnthropicMessages.AnthropicMessagesBody>(
|
||||
LLM.request({
|
||||
id: "req_tool_result_image_only",
|
||||
model,
|
||||
messages: [
|
||||
Message.assistant([ToolCallPart.make({ id: "call_1", name: "screenshot", input: {} })]),
|
||||
Message.tool({
|
||||
id: "call_1",
|
||||
name: "screenshot",
|
||||
resultType: "content",
|
||||
result: [{ type: "media", mediaType: "image/jpeg", data: "/9j/AA==" }],
|
||||
}),
|
||||
],
|
||||
cache: "none",
|
||||
}),
|
||||
)
|
||||
|
||||
expect(expectToolResult(prepared.body).content).toEqual([
|
||||
{ type: "image", source: { type: "base64", media_type: "image/jpeg", data: "/9j/AA==" } },
|
||||
])
|
||||
}),
|
||||
)
|
||||
|
||||
it.effect("rejects non-image media in tool-result content with a clear error", () =>
|
||||
Effect.gen(function* () {
|
||||
const error = yield* LLMClient.prepare(
|
||||
LLM.request({
|
||||
id: "req_tool_result_unsupported_media",
|
||||
model,
|
||||
messages: [
|
||||
Message.assistant([ToolCallPart.make({ id: "call_1", name: "fetch", input: {} })]),
|
||||
Message.tool({
|
||||
id: "call_1",
|
||||
name: "fetch",
|
||||
resultType: "content",
|
||||
result: [{ type: "media", mediaType: "audio/mpeg", data: "AAECAw==" }],
|
||||
}),
|
||||
],
|
||||
cache: "none",
|
||||
}),
|
||||
).pipe(Effect.flip)
|
||||
|
||||
expect(error.message).toContain("Anthropic Messages")
|
||||
expect(error.message).toContain("audio/mpeg")
|
||||
}),
|
||||
)
|
||||
|
||||
it.effect("prepares the composed native continuation request", () =>
|
||||
Effect.gen(function* () {
|
||||
const prepared = yield* LLMClient.prepare<AnthropicMessages.AnthropicMessagesBody>(
|
||||
|
||||
@@ -113,7 +113,10 @@ describeRecordedGoldenScenarios([
|
||||
requires: ["ANTHROPIC_API_KEY"],
|
||||
tags: ["flagship"],
|
||||
options: { redactor: Redactor.defaults({ requestHeaders: { allow: ["content-type", "anthropic-version"] } }) },
|
||||
scenarios: [{ id: "tool-loop", temperature: false }],
|
||||
scenarios: [
|
||||
{ id: "tool-loop", temperature: false },
|
||||
{ id: "image-tool-result", temperature: false, maxTokens: 40 },
|
||||
],
|
||||
},
|
||||
{
|
||||
name: "Gemini 2.5 Flash",
|
||||
|
||||
Reference in New Issue
Block a user