Files
opencode/packages/llm/test/recorded-scenarios.ts
2026-05-12 16:16:58 -04:00

283 lines
9.1 KiB
TypeScript
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import { expect } from "bun:test"
import { Effect, Schema, Stream } from "effect"
import { LLM, LLMEvent, LLMResponse, ToolChoice, ToolDefinition, type LLMRequest, type ModelRef } from "../src"
import { LLMClient } from "../src/route"
import { tool } from "../src/tool"
export const weatherToolName = "get_weather"
// A deterministic system prompt long enough to clear every supported provider's
// minimum cacheable-prefix threshold (Anthropic Haiku 3.5: 2048 tokens; Anthropic
// Opus/Haiku 4.5: 4096 tokens; OpenAI/Gemini/Bedrock: lower). Built by repeating
// a fixed sentence — the cassette replays bit-for-bit, so the exact text matters
// only when re-recording with `RECORD=true`.
export const LARGE_CACHEABLE_SYSTEM = (() => {
const sentence = "You are a concise, factual assistant. Answer precisely and avoid filler. Cite numbers when known. "
// ~100 chars per sentence × 250 repeats ≈ 25,000 chars ≈ 5k+ tokens, safely
// above every provider's threshold.
return sentence.repeat(250)
})()
export const weatherTool = ToolDefinition.make({
name: weatherToolName,
description: "Get current weather for a city.",
inputSchema: {
type: "object",
properties: { city: { type: "string" } },
required: ["city"],
additionalProperties: false,
},
})
export const weatherRuntimeTool = tool({
description: weatherTool.description,
parameters: Schema.Struct({ city: Schema.String }),
success: Schema.Struct({ temperature: Schema.Number, condition: Schema.String }),
execute: ({ city }) =>
Effect.succeed(
city === "Paris" ? { temperature: 22, condition: "sunny" } : { temperature: 0, condition: "unknown" },
),
})
export const textRequest = (input: {
readonly id: string
readonly model: ModelRef
readonly prompt?: string
readonly maxTokens?: number
readonly temperature?: number | false
}) =>
LLM.request({
id: input.id,
model: input.model,
system: "You are concise.",
prompt: input.prompt ?? "Reply with exactly: Hello!",
cache: "none",
generation:
input.temperature === false
? { maxTokens: input.maxTokens ?? 20 }
: { maxTokens: input.maxTokens ?? 20, temperature: input.temperature ?? 0 },
})
export const weatherToolRequest = (input: {
readonly id: string
readonly model: ModelRef
readonly maxTokens?: number
readonly temperature?: number | false
}) =>
LLM.request({
id: input.id,
model: input.model,
system: "Call tools exactly as requested.",
prompt: "Call get_weather with city exactly Paris.",
tools: [weatherTool],
toolChoice: ToolChoice.make(weatherTool),
cache: "none",
generation:
input.temperature === false
? { maxTokens: input.maxTokens ?? 80 }
: { maxTokens: input.maxTokens ?? 80, temperature: input.temperature ?? 0 },
})
export const weatherToolLoopRequest = (input: {
readonly id: string
readonly model: ModelRef
readonly system?: string
readonly maxTokens?: number
readonly temperature?: number | false
}) =>
LLM.request({
id: input.id,
model: input.model,
system: input.system ?? "Use the get_weather tool, then answer in one short sentence.",
prompt: "What is the weather in Paris?",
cache: "none",
generation:
input.temperature === false
? { maxTokens: input.maxTokens ?? 80 }
: { maxTokens: input.maxTokens ?? 80, temperature: input.temperature ?? 0 },
})
export const goldenWeatherToolLoopRequest = (input: {
readonly id: string
readonly model: ModelRef
readonly maxTokens?: number
readonly temperature?: number | false
}) =>
weatherToolLoopRequest({
...input,
system: "Use the get_weather tool exactly once. After the tool result, reply exactly: Paris is sunny.",
})
export const runWeatherToolLoop = (request: LLMRequest) =>
LLMClient.stream({
request,
tools: { [weatherToolName]: weatherRuntimeTool },
stopWhen: LLMClient.stepCountIs(10),
}).pipe(
Stream.runCollect,
Effect.map((events) => Array.from(events)),
)
export const expectFinish = (
events: ReadonlyArray<LLMEvent>,
reason: Extract<LLMEvent, { readonly type: "finish" }>["reason"],
) => expect(events.at(-1)).toMatchObject({ type: "finish", reason })
export const expectWeatherToolCall = (response: LLMResponse) =>
expect(response.toolCalls).toMatchObject([
{ type: "tool-call", id: expect.any(String), name: weatherToolName, input: { city: "Paris" } },
])
export const expectWeatherToolLoop = (events: ReadonlyArray<LLMEvent>) => {
const finishes = events.filter(LLMEvent.is.finish)
expect(finishes).toHaveLength(1)
expect(finishes[0]?.reason).toBe("stop")
const stepFinishes = events.filter(LLMEvent.is.stepFinish)
expect(stepFinishes.map((event) => event.reason)).toEqual(["tool-calls", "stop"])
const toolCalls = events.filter(LLMEvent.is.toolCall)
expect(toolCalls).toHaveLength(1)
expect(toolCalls[0]).toMatchObject({ type: "tool-call", name: weatherToolName, input: { city: "Paris" } })
const toolResults = events.filter(LLMEvent.is.toolResult)
expect(toolResults).toHaveLength(1)
expect(toolResults[0]).toMatchObject({
type: "tool-result",
name: weatherToolName,
result: { type: "json", value: { temperature: 22, condition: "sunny" } },
})
const output = LLMResponse.text({ events })
expect(output).toContain("Paris")
expect(output.trim().length).toBeGreaterThan(0)
}
export const expectGoldenWeatherToolLoop = (events: ReadonlyArray<LLMEvent>) => {
expectWeatherToolLoop(events)
expect(LLMResponse.text({ events }).trim()).toMatch(/^Paris is sunny\.?$/)
}
export type GoldenScenarioID = "text" | "tool-call" | "tool-loop"
export interface GoldenScenarioContext {
readonly id: string
readonly model: ModelRef
readonly maxTokens?: number
readonly temperature?: number | false
}
const generate = (request: LLMRequest) => LLMClient.generate(request)
export const goldenScenarioTags = (id: GoldenScenarioID) => {
if (id === "text") return ["text", "golden"]
if (id === "tool-call") return ["tool", "tool-call", "golden"]
return ["tool", "tool-loop", "golden"]
}
export const runGoldenScenario = (id: GoldenScenarioID, context: GoldenScenarioContext) =>
Effect.gen(function* () {
if (id === "text") {
const response = yield* generate(
textRequest({
id: context.id,
model: context.model,
prompt: "Reply exactly with: Hello!",
maxTokens: context.maxTokens ?? 40,
temperature: context.temperature,
}),
)
expect(response.text.trim()).toMatch(/^Hello!?$/)
expectFinish(response.events, "stop")
return
}
if (id === "tool-call") {
const response = yield* generate(
weatherToolRequest({
id: context.id,
model: context.model,
maxTokens: context.maxTokens ?? 80,
temperature: context.temperature,
}),
)
expectWeatherToolCall(response)
expectFinish(response.events, "tool-calls")
return
}
expectGoldenWeatherToolLoop(
yield* runWeatherToolLoop(
goldenWeatherToolLoopRequest({
id: context.id,
model: context.model,
maxTokens: context.maxTokens ?? 80,
temperature: context.temperature,
}),
),
)
})
const usageSummary = (usage: LLMResponse["usage"] | undefined) => {
if (!usage) return undefined
return Object.fromEntries(
[
["inputTokens", usage.inputTokens],
["outputTokens", usage.outputTokens],
["reasoningTokens", usage.reasoningTokens],
["cacheReadInputTokens", usage.cacheReadInputTokens],
["cacheWriteInputTokens", usage.cacheWriteInputTokens],
["totalTokens", usage.totalTokens],
].filter((entry) => entry[1] !== undefined),
)
}
const pushText = (summary: Array<Record<string, unknown>>, type: "text" | "reasoning", value: string) => {
const last = summary.at(-1)
if (last?.type === type) {
last.value = `${last.value ?? ""}${value}`
return
}
summary.push({ type, value })
}
export const eventSummary = (events: ReadonlyArray<LLMEvent>) => {
const summary: Array<Record<string, unknown>> = []
for (const event of events) {
if (event.type === "text-delta") {
pushText(summary, "text", event.text)
continue
}
if (event.type === "reasoning-delta") {
pushText(summary, "reasoning", event.text)
continue
}
if (event.type === "tool-call") {
summary.push({
type: "tool-call",
name: event.name,
input: event.input,
providerExecuted: event.providerExecuted,
})
continue
}
if (event.type === "tool-result") {
summary.push({
type: "tool-result",
name: event.name,
result: event.result,
providerExecuted: event.providerExecuted,
})
continue
}
if (event.type === "tool-error") {
summary.push({ type: "tool-error", name: event.name, message: event.message })
continue
}
if (event.type === "finish") {
summary.push({ type: "finish", reason: event.reason, usage: usageSummary(event.usage) })
}
}
return summary.map((item) => Object.fromEntries(Object.entries(item).filter((entry) => entry[1] !== undefined)))
}