test(app): add a golden path for mocked e2e prompts (#20593)

This commit is contained in:
Kit Langton
2026-04-02 14:17:28 -04:00
committed by GitHub
parent 363891126c
commit c3ef69c866
35 changed files with 2400 additions and 2071 deletions

View File

@@ -159,7 +159,17 @@ describe("cross-spawn spawner", () => {
fx.effect(
"captures both stdout and stderr",
Effect.gen(function* () {
const handle = yield* js('process.stdout.write("stdout\\n"); process.stderr.write("stderr\\n")')
const handle = yield* js(
[
"let pending = 2",
"const done = () => {",
" pending -= 1",
" if (pending === 0) setTimeout(() => process.exit(0), 0)",
"}",
'process.stdout.write("stdout\\n", done)',
'process.stderr.write("stderr\\n", done)',
].join("\n"),
)
const [stdout, stderr] = yield* Effect.all([decodeByteStream(handle.stdout), decodeByteStream(handle.stderr)])
expect(stdout).toBe("stdout")
expect(stderr).toBe("stderr")

View File

@@ -254,6 +254,16 @@ function responseToolArgs(id: string, text: string, seq: number) {
}
}
function responseToolArgsDone(id: string, args: string, seq: number) {
return {
type: "response.function_call_arguments.done",
sequence_number: seq,
output_index: 0,
item_id: id,
arguments: args,
}
}
function responseToolDone(tool: { id: string; item: string; name: string; args: string }, seq: number) {
return {
type: "response.output_item.done",
@@ -390,6 +400,8 @@ function responses(item: Sse, model: string) {
lines.push(responseReasonDone(reason, seq))
}
if (call && !item.hang && !item.error) {
seq += 1
lines.push(responseToolArgsDone(call.item, call.args, seq))
seq += 1
lines.push(responseToolDone(call, seq))
}
@@ -599,6 +611,11 @@ function isToolResultFollowUp(body: unknown): boolean {
return false
}
function isTitleRequest(body: unknown): boolean {
if (!body || typeof body !== "object") return false
return JSON.stringify(body).includes("Generate a title for this conversation")
}
function requestSummary(body: unknown): string {
if (!body || typeof body !== "object") return "empty body"
if ("messages" in body && Array.isArray(body.messages)) {
@@ -623,6 +640,7 @@ namespace TestLLMServer {
readonly error: (status: number, body: unknown) => Effect.Effect<void>
readonly hang: Effect.Effect<void>
readonly hold: (value: string, wait: PromiseLike<unknown>) => Effect.Effect<void>
readonly reset: Effect.Effect<void>
readonly hits: Effect.Effect<Hit[]>
readonly calls: Effect.Effect<number>
readonly wait: (count: number) => Effect.Effect<void>
@@ -671,21 +689,20 @@ export class TestLLMServer extends ServiceMap.Service<TestLLMServer, TestLLMServ
const req = yield* HttpServerRequest.HttpServerRequest
const body = yield* req.json.pipe(Effect.orElseSucceed(() => ({})))
const current = hit(req.originalUrl, body)
if (isTitleRequest(body)) {
hits = [...hits, current]
yield* notify()
const auto: Sse = { type: "sse", head: [role()], tail: [textLine("E2E Title"), finishLine("stop")] }
if (mode === "responses") return send(responses(auto, modelFrom(body)))
return send(auto)
}
const next = pull(current)
if (!next) {
// Auto-acknowledge tool-result follow-ups so tests only need to
// queue one response per tool call instead of two.
if (isToolResultFollowUp(body)) {
hits = [...hits, current]
yield* notify()
const auto: Sse = { type: "sse", head: [role()], tail: [textLine("ok"), finishLine("stop")] }
if (mode === "responses") return send(responses(auto, modelFrom(body)))
return send(auto)
}
misses = [...misses, current]
const summary = requestSummary(body)
console.warn(`[TestLLMServer] unmatched request: ${req.originalUrl} (${summary}, pending=${list.length})`)
return HttpServerResponse.text(`unexpected request: ${summary}`, { status: 500 })
hits = [...hits, current]
yield* notify()
const auto: Sse = { type: "sse", head: [role()], tail: [textLine("ok"), finishLine("stop")] }
if (mode === "responses") return send(responses(auto, modelFrom(body)))
return send(auto)
}
hits = [...hits, current]
yield* notify()
@@ -755,6 +772,12 @@ export class TestLLMServer extends ServiceMap.Service<TestLLMServer, TestLLMServ
hold: Effect.fn("TestLLMServer.hold")(function* (value: string, wait: PromiseLike<unknown>) {
queue(reply().wait(wait).text(value).stop().item())
}),
reset: Effect.sync(() => {
hits = []
list = []
waits = []
misses = []
}),
hits: Effect.sync(() => [...hits]),
calls: Effect.sync(() => hits.length),
wait: Effect.fn("TestLLMServer.wait")(function* (count: number) {

View File

@@ -0,0 +1,314 @@
/**
* Reproduction test for e2e LLM URL routing.
*
* Tests whether OPENCODE_E2E_LLM_URL correctly routes LLM calls
* to the mock server when no explicit provider config is set.
* This mimics the e2e `project` fixture path (vs. withMockOpenAI).
*/
import { expect } from "bun:test"
import { Effect, Layer } from "effect"
import { Session } from "../../src/session"
import { SessionPrompt } from "../../src/session/prompt"
import { SessionSummary } from "../../src/session/summary"
import { Log } from "../../src/util/log"
import { provideTmpdirServer } from "../fixture/fixture"
import { testEffect } from "../lib/effect"
import { TestLLMServer } from "../lib/llm-server"
import { NodeFileSystem } from "@effect/platform-node"
import { Agent as AgentSvc } from "../../src/agent/agent"
import { Bus } from "../../src/bus"
import { Command } from "../../src/command"
import { Config } from "../../src/config/config"
import { FileTime } from "../../src/file/time"
import { LSP } from "../../src/lsp"
import { MCP } from "../../src/mcp"
import { Permission } from "../../src/permission"
import { Plugin } from "../../src/plugin"
import { Provider as ProviderSvc } from "../../src/provider/provider"
import { ModelID, ProviderID } from "../../src/provider/schema"
import { Server } from "../../src/server/server"
import { SessionCompaction } from "../../src/session/compaction"
import { Instruction } from "../../src/session/instruction"
import { SessionProcessor } from "../../src/session/processor"
import { SessionStatus } from "../../src/session/status"
import { LLM } from "../../src/session/llm"
import { Shell } from "../../src/shell/shell"
import { Snapshot } from "../../src/snapshot"
import { ToolRegistry } from "../../src/tool/registry"
import { Truncate } from "../../src/tool/truncate"
import { AppFileSystem } from "../../src/filesystem"
import * as CrossSpawnSpawner from "../../src/effect/cross-spawn-spawner"
Log.init({ print: false })
const mcp = Layer.succeed(
MCP.Service,
MCP.Service.of({
status: () => Effect.succeed({}),
clients: () => Effect.succeed({}),
tools: () => Effect.succeed({}),
prompts: () => Effect.succeed({}),
resources: () => Effect.succeed({}),
add: () => Effect.succeed({ status: { status: "disabled" as const } }),
connect: () => Effect.void,
disconnect: () => Effect.void,
getPrompt: () => Effect.succeed(undefined),
readResource: () => Effect.succeed(undefined),
startAuth: () => Effect.die("unexpected MCP auth"),
authenticate: () => Effect.die("unexpected MCP auth"),
finishAuth: () => Effect.die("unexpected MCP auth"),
removeAuth: () => Effect.void,
supportsOAuth: () => Effect.succeed(false),
hasStoredTokens: () => Effect.succeed(false),
getAuthStatus: () => Effect.succeed("not_authenticated" as const),
}),
)
const lsp = Layer.succeed(
LSP.Service,
LSP.Service.of({
init: () => Effect.void,
status: () => Effect.succeed([]),
hasClients: () => Effect.succeed(false),
touchFile: () => Effect.void,
diagnostics: () => Effect.succeed({}),
hover: () => Effect.succeed(undefined),
definition: () => Effect.succeed([]),
references: () => Effect.succeed([]),
implementation: () => Effect.succeed([]),
documentSymbol: () => Effect.succeed([]),
workspaceSymbol: () => Effect.succeed([]),
prepareCallHierarchy: () => Effect.succeed([]),
incomingCalls: () => Effect.succeed([]),
outgoingCalls: () => Effect.succeed([]),
}),
)
const filetime = Layer.succeed(
FileTime.Service,
FileTime.Service.of({
read: () => Effect.void,
get: () => Effect.succeed(undefined),
assert: () => Effect.void,
withLock: (_filepath, fn) => Effect.promise(fn),
}),
)
const status = SessionStatus.layer.pipe(Layer.provideMerge(Bus.layer))
const infra = Layer.mergeAll(NodeFileSystem.layer, CrossSpawnSpawner.defaultLayer)
const patchModel = { providerID: ProviderID.make("openai"), modelID: ModelID.make("gpt-5.4") } as const
function makeHttp() {
const deps = Layer.mergeAll(
Session.defaultLayer,
Snapshot.defaultLayer,
LLM.defaultLayer,
AgentSvc.defaultLayer,
Command.defaultLayer,
Permission.layer,
Plugin.defaultLayer,
Config.defaultLayer,
ProviderSvc.defaultLayer,
filetime,
lsp,
mcp,
AppFileSystem.defaultLayer,
status,
).pipe(Layer.provideMerge(infra))
const registry = ToolRegistry.layer.pipe(Layer.provideMerge(deps))
const trunc = Truncate.layer.pipe(Layer.provideMerge(deps))
const proc = SessionProcessor.layer.pipe(Layer.provideMerge(deps))
const compact = SessionCompaction.layer.pipe(Layer.provideMerge(proc), Layer.provideMerge(deps))
return Layer.mergeAll(
TestLLMServer.layer,
SessionPrompt.layer.pipe(
Layer.provideMerge(compact),
Layer.provideMerge(proc),
Layer.provideMerge(registry),
Layer.provideMerge(trunc),
Layer.provide(Instruction.defaultLayer),
Layer.provideMerge(deps),
),
)
}
const it = testEffect(makeHttp())
it.live("e2eURL routes apply_patch through mock server", () =>
provideTmpdirServer(
Effect.fnUntraced(function* ({ dir, llm }) {
// Set the env var to route all LLM calls through the mock
const prev = process.env.OPENCODE_E2E_LLM_URL
process.env.OPENCODE_E2E_LLM_URL = llm.url
yield* Effect.addFinalizer(() =>
Effect.sync(() => {
if (prev === undefined) delete process.env.OPENCODE_E2E_LLM_URL
else process.env.OPENCODE_E2E_LLM_URL = prev
}),
)
const prompt = yield* SessionPrompt.Service
const sessions = yield* Session.Service
const session = yield* sessions.create({
title: "e2e url test",
permission: [{ permission: "*", pattern: "*", action: "allow" }],
})
const patch = ["*** Begin Patch", "*** Add File: e2e-test.txt", "+line 1", "+line 2", "*** End Patch"].join("\n")
// Queue mock response: match on system prompt, return apply_patch tool call
yield* llm.toolMatch(
(hit) => JSON.stringify(hit.body).includes("Your only valid response is one apply_patch tool call"),
"apply_patch",
{ patchText: patch },
)
// After tool execution, LLM gets called again with tool result — return "done"
yield* llm.text("done")
// Seed user message
yield* prompt.prompt({
sessionID: session.id,
agent: "build",
model: patchModel,
noReply: true,
system: [
"You are seeding deterministic e2e UI state.",
"Your only valid response is one apply_patch tool call.",
`Use this JSON input: ${JSON.stringify({ patchText: patch })}`,
"Do not call any other tools.",
"Do not output plain text.",
].join("\n"),
parts: [{ type: "text", text: "Apply the provided patch exactly once." }],
})
// Run the agent loop
const result = yield* prompt.loop({ sessionID: session.id })
expect(result.info.role).toBe("assistant")
const calls = yield* llm.calls
expect(calls).toBe(2)
const missed = yield* llm.misses
expect(missed.length).toBe(0)
const content = yield* Effect.promise(() =>
Bun.file(`${dir}/e2e-test.txt`)
.text()
.catch(() => "NOT FOUND"),
)
expect(content).toContain("line 1")
let diff: Awaited<ReturnType<typeof SessionSummary.diff>> = []
for (let i = 0; i < 20; i++) {
diff = yield* Effect.promise(() => SessionSummary.diff({ sessionID: session.id }))
if (diff.length > 0) break
yield* Effect.sleep("100 millis")
}
expect(diff.length).toBeGreaterThan(0)
}),
{
git: true,
config: () => ({
model: "openai/gpt-5.4",
agent: {
build: {
model: "openai/gpt-5.4",
},
},
provider: {
openai: {
options: {
apiKey: "test-openai-key",
},
},
},
}),
},
),
)
it.live("server message route produces diff through mock server", () =>
provideTmpdirServer(
Effect.fnUntraced(function* ({ dir, llm }) {
const prev = process.env.OPENCODE_E2E_LLM_URL
process.env.OPENCODE_E2E_LLM_URL = llm.url
yield* Effect.addFinalizer(() =>
Effect.sync(() => {
if (prev === undefined) delete process.env.OPENCODE_E2E_LLM_URL
else process.env.OPENCODE_E2E_LLM_URL = prev
}),
)
const sessions = yield* Session.Service
const session = yield* sessions.create({
title: "e2e route test",
permission: [{ permission: "*", pattern: "*", action: "allow" }],
})
const app = Server.Default()
const patch = ["*** Begin Patch", "*** Add File: route-test.txt", "+line 1", "+line 2", "*** End Patch"].join(
"\n",
)
yield* llm.toolMatch(
(hit) => JSON.stringify(hit.body).includes("Your only valid response is one apply_patch tool call"),
"apply_patch",
{ patchText: patch },
)
yield* llm.text("done")
const res = yield* Effect.promise(() =>
Promise.resolve(
app.request(`/session/${session.id}/message`, {
method: "POST",
headers: {
"content-type": "application/json",
"x-opencode-directory": dir,
},
body: JSON.stringify({
agent: "build",
system: [
"You are seeding deterministic e2e UI state.",
"Your only valid response is one apply_patch tool call.",
`Use this JSON input: ${JSON.stringify({ patchText: patch })}`,
"Do not call any other tools.",
"Do not output plain text.",
].join("\n"),
parts: [{ type: "text", text: "Apply the provided patch exactly once." }],
}),
}),
),
)
expect(res.status).toBe(200)
yield* Effect.promise(() => res.json())
const calls = yield* llm.calls
expect(calls).toBe(2)
const content = yield* Effect.promise(() =>
Bun.file(`${dir}/route-test.txt`)
.text()
.catch(() => "NOT FOUND"),
)
expect(content).toContain("line 1")
let diff: Awaited<ReturnType<typeof SessionSummary.diff>> = []
for (let i = 0; i < 30; i++) {
diff = yield* Effect.promise(() => SessionSummary.diff({ sessionID: session.id }))
if (diff.length > 0) break
yield* Effect.sleep("100 millis")
}
expect(diff.length).toBeGreaterThan(0)
}),
{
git: true,
config: () => ({
model: "openai/gpt-5.4",
agent: { build: { model: "openai/gpt-5.4" } },
provider: { openai: { options: { apiKey: "test-openai-key" } } },
}),
},
),
)