mirror of
https://github.com/anomalyco/opencode.git
synced 2026-05-18 18:43:04 +00:00
283 lines
9.1 KiB
TypeScript
283 lines
9.1 KiB
TypeScript
import { expect } from "bun:test"
|
||
import { Effect, Schema, Stream } from "effect"
|
||
import { LLM, LLMEvent, LLMResponse, ToolChoice, ToolDefinition, type LLMRequest, type ModelRef } from "../src"
|
||
import { LLMClient } from "../src/route"
|
||
import { tool } from "../src/tool"
|
||
|
||
export const weatherToolName = "get_weather"
|
||
|
||
// A deterministic system prompt long enough to clear every supported provider's
|
||
// minimum cacheable-prefix threshold (Anthropic Haiku 3.5: 2048 tokens; Anthropic
|
||
// Opus/Haiku 4.5: 4096 tokens; OpenAI/Gemini/Bedrock: lower). Built by repeating
|
||
// a fixed sentence — the cassette replays bit-for-bit, so the exact text matters
|
||
// only when re-recording with `RECORD=true`.
|
||
export const LARGE_CACHEABLE_SYSTEM = (() => {
|
||
const sentence = "You are a concise, factual assistant. Answer precisely and avoid filler. Cite numbers when known. "
|
||
// ~100 chars per sentence × 250 repeats ≈ 25,000 chars ≈ 5k+ tokens, safely
|
||
// above every provider's threshold.
|
||
return sentence.repeat(250)
|
||
})()
|
||
|
||
export const weatherTool = ToolDefinition.make({
|
||
name: weatherToolName,
|
||
description: "Get current weather for a city.",
|
||
inputSchema: {
|
||
type: "object",
|
||
properties: { city: { type: "string" } },
|
||
required: ["city"],
|
||
additionalProperties: false,
|
||
},
|
||
})
|
||
|
||
export const weatherRuntimeTool = tool({
|
||
description: weatherTool.description,
|
||
parameters: Schema.Struct({ city: Schema.String }),
|
||
success: Schema.Struct({ temperature: Schema.Number, condition: Schema.String }),
|
||
execute: ({ city }) =>
|
||
Effect.succeed(
|
||
city === "Paris" ? { temperature: 22, condition: "sunny" } : { temperature: 0, condition: "unknown" },
|
||
),
|
||
})
|
||
|
||
export const textRequest = (input: {
|
||
readonly id: string
|
||
readonly model: ModelRef
|
||
readonly prompt?: string
|
||
readonly maxTokens?: number
|
||
readonly temperature?: number | false
|
||
}) =>
|
||
LLM.request({
|
||
id: input.id,
|
||
model: input.model,
|
||
system: "You are concise.",
|
||
prompt: input.prompt ?? "Reply with exactly: Hello!",
|
||
cache: "none",
|
||
generation:
|
||
input.temperature === false
|
||
? { maxTokens: input.maxTokens ?? 20 }
|
||
: { maxTokens: input.maxTokens ?? 20, temperature: input.temperature ?? 0 },
|
||
})
|
||
|
||
export const weatherToolRequest = (input: {
|
||
readonly id: string
|
||
readonly model: ModelRef
|
||
readonly maxTokens?: number
|
||
readonly temperature?: number | false
|
||
}) =>
|
||
LLM.request({
|
||
id: input.id,
|
||
model: input.model,
|
||
system: "Call tools exactly as requested.",
|
||
prompt: "Call get_weather with city exactly Paris.",
|
||
tools: [weatherTool],
|
||
toolChoice: ToolChoice.make(weatherTool),
|
||
cache: "none",
|
||
generation:
|
||
input.temperature === false
|
||
? { maxTokens: input.maxTokens ?? 80 }
|
||
: { maxTokens: input.maxTokens ?? 80, temperature: input.temperature ?? 0 },
|
||
})
|
||
|
||
export const weatherToolLoopRequest = (input: {
|
||
readonly id: string
|
||
readonly model: ModelRef
|
||
readonly system?: string
|
||
readonly maxTokens?: number
|
||
readonly temperature?: number | false
|
||
}) =>
|
||
LLM.request({
|
||
id: input.id,
|
||
model: input.model,
|
||
system: input.system ?? "Use the get_weather tool, then answer in one short sentence.",
|
||
prompt: "What is the weather in Paris?",
|
||
cache: "none",
|
||
generation:
|
||
input.temperature === false
|
||
? { maxTokens: input.maxTokens ?? 80 }
|
||
: { maxTokens: input.maxTokens ?? 80, temperature: input.temperature ?? 0 },
|
||
})
|
||
|
||
export const goldenWeatherToolLoopRequest = (input: {
|
||
readonly id: string
|
||
readonly model: ModelRef
|
||
readonly maxTokens?: number
|
||
readonly temperature?: number | false
|
||
}) =>
|
||
weatherToolLoopRequest({
|
||
...input,
|
||
system: "Use the get_weather tool exactly once. After the tool result, reply exactly: Paris is sunny.",
|
||
})
|
||
|
||
export const runWeatherToolLoop = (request: LLMRequest) =>
|
||
LLMClient.stream({
|
||
request,
|
||
tools: { [weatherToolName]: weatherRuntimeTool },
|
||
stopWhen: LLMClient.stepCountIs(10),
|
||
}).pipe(
|
||
Stream.runCollect,
|
||
Effect.map((events) => Array.from(events)),
|
||
)
|
||
|
||
export const expectFinish = (
|
||
events: ReadonlyArray<LLMEvent>,
|
||
reason: Extract<LLMEvent, { readonly type: "finish" }>["reason"],
|
||
) => expect(events.at(-1)).toMatchObject({ type: "finish", reason })
|
||
|
||
export const expectWeatherToolCall = (response: LLMResponse) =>
|
||
expect(response.toolCalls).toMatchObject([
|
||
{ type: "tool-call", id: expect.any(String), name: weatherToolName, input: { city: "Paris" } },
|
||
])
|
||
|
||
export const expectWeatherToolLoop = (events: ReadonlyArray<LLMEvent>) => {
|
||
const finishes = events.filter(LLMEvent.is.finish)
|
||
expect(finishes).toHaveLength(1)
|
||
expect(finishes[0]?.reason).toBe("stop")
|
||
|
||
const stepFinishes = events.filter(LLMEvent.is.stepFinish)
|
||
expect(stepFinishes.map((event) => event.reason)).toEqual(["tool-calls", "stop"])
|
||
|
||
const toolCalls = events.filter(LLMEvent.is.toolCall)
|
||
expect(toolCalls).toHaveLength(1)
|
||
expect(toolCalls[0]).toMatchObject({ type: "tool-call", name: weatherToolName, input: { city: "Paris" } })
|
||
|
||
const toolResults = events.filter(LLMEvent.is.toolResult)
|
||
expect(toolResults).toHaveLength(1)
|
||
expect(toolResults[0]).toMatchObject({
|
||
type: "tool-result",
|
||
name: weatherToolName,
|
||
result: { type: "json", value: { temperature: 22, condition: "sunny" } },
|
||
})
|
||
|
||
const output = LLMResponse.text({ events })
|
||
expect(output).toContain("Paris")
|
||
expect(output.trim().length).toBeGreaterThan(0)
|
||
}
|
||
|
||
export const expectGoldenWeatherToolLoop = (events: ReadonlyArray<LLMEvent>) => {
|
||
expectWeatherToolLoop(events)
|
||
expect(LLMResponse.text({ events }).trim()).toMatch(/^Paris is sunny\.?$/)
|
||
}
|
||
|
||
export type GoldenScenarioID = "text" | "tool-call" | "tool-loop"
|
||
|
||
export interface GoldenScenarioContext {
|
||
readonly id: string
|
||
readonly model: ModelRef
|
||
readonly maxTokens?: number
|
||
readonly temperature?: number | false
|
||
}
|
||
|
||
const generate = (request: LLMRequest) => LLMClient.generate(request)
|
||
|
||
export const goldenScenarioTags = (id: GoldenScenarioID) => {
|
||
if (id === "text") return ["text", "golden"]
|
||
if (id === "tool-call") return ["tool", "tool-call", "golden"]
|
||
return ["tool", "tool-loop", "golden"]
|
||
}
|
||
|
||
export const runGoldenScenario = (id: GoldenScenarioID, context: GoldenScenarioContext) =>
|
||
Effect.gen(function* () {
|
||
if (id === "text") {
|
||
const response = yield* generate(
|
||
textRequest({
|
||
id: context.id,
|
||
model: context.model,
|
||
prompt: "Reply exactly with: Hello!",
|
||
maxTokens: context.maxTokens ?? 40,
|
||
temperature: context.temperature,
|
||
}),
|
||
)
|
||
expect(response.text.trim()).toMatch(/^Hello!?$/)
|
||
expectFinish(response.events, "stop")
|
||
return
|
||
}
|
||
|
||
if (id === "tool-call") {
|
||
const response = yield* generate(
|
||
weatherToolRequest({
|
||
id: context.id,
|
||
model: context.model,
|
||
maxTokens: context.maxTokens ?? 80,
|
||
temperature: context.temperature,
|
||
}),
|
||
)
|
||
expectWeatherToolCall(response)
|
||
expectFinish(response.events, "tool-calls")
|
||
return
|
||
}
|
||
|
||
expectGoldenWeatherToolLoop(
|
||
yield* runWeatherToolLoop(
|
||
goldenWeatherToolLoopRequest({
|
||
id: context.id,
|
||
model: context.model,
|
||
maxTokens: context.maxTokens ?? 80,
|
||
temperature: context.temperature,
|
||
}),
|
||
),
|
||
)
|
||
})
|
||
|
||
const usageSummary = (usage: LLMResponse["usage"] | undefined) => {
|
||
if (!usage) return undefined
|
||
return Object.fromEntries(
|
||
[
|
||
["inputTokens", usage.inputTokens],
|
||
["outputTokens", usage.outputTokens],
|
||
["reasoningTokens", usage.reasoningTokens],
|
||
["cacheReadInputTokens", usage.cacheReadInputTokens],
|
||
["cacheWriteInputTokens", usage.cacheWriteInputTokens],
|
||
["totalTokens", usage.totalTokens],
|
||
].filter((entry) => entry[1] !== undefined),
|
||
)
|
||
}
|
||
|
||
const pushText = (summary: Array<Record<string, unknown>>, type: "text" | "reasoning", value: string) => {
|
||
const last = summary.at(-1)
|
||
if (last?.type === type) {
|
||
last.value = `${last.value ?? ""}${value}`
|
||
return
|
||
}
|
||
summary.push({ type, value })
|
||
}
|
||
|
||
export const eventSummary = (events: ReadonlyArray<LLMEvent>) => {
|
||
const summary: Array<Record<string, unknown>> = []
|
||
for (const event of events) {
|
||
if (event.type === "text-delta") {
|
||
pushText(summary, "text", event.text)
|
||
continue
|
||
}
|
||
if (event.type === "reasoning-delta") {
|
||
pushText(summary, "reasoning", event.text)
|
||
continue
|
||
}
|
||
if (event.type === "tool-call") {
|
||
summary.push({
|
||
type: "tool-call",
|
||
name: event.name,
|
||
input: event.input,
|
||
providerExecuted: event.providerExecuted,
|
||
})
|
||
continue
|
||
}
|
||
if (event.type === "tool-result") {
|
||
summary.push({
|
||
type: "tool-result",
|
||
name: event.name,
|
||
result: event.result,
|
||
providerExecuted: event.providerExecuted,
|
||
})
|
||
continue
|
||
}
|
||
if (event.type === "tool-error") {
|
||
summary.push({ type: "tool-error", name: event.name, message: event.message })
|
||
continue
|
||
}
|
||
if (event.type === "finish") {
|
||
summary.push({ type: "finish", reason: event.reason, usage: usageSummary(event.usage) })
|
||
}
|
||
}
|
||
return summary.map((item) => Object.fromEntries(Object.entries(item).filter((entry) => entry[1] !== undefined)))
|
||
}
|