fix(test): auto-acknowledge tool-result follow-ups in mock LLM server (#20528)

2026-05-05 12:17:27 +00:00 · 2026-04-01 19:47:26 -04:00
parent 48db7cf07a
commit d9d4f895bc
13 changed files with 482 additions and 117 deletions
--- a/packages/opencode/test/lib/llm-server.ts
+++ b/packages/opencode/test/lib/llm-server.ts
@@ -584,6 +584,30 @@ function hit(url: string, body: unknown) {
  } satisfies Hit
 }

+/** Auto-acknowledging tool-result follow-ups avoids requiring tests to queue two responses per tool call. */
+function isToolResultFollowUp(body: unknown): boolean {
+  if (!body || typeof body !== "object") return false
+  // OpenAI chat format: last message has role "tool"
+  if ("messages" in body && Array.isArray(body.messages)) {
+    const last = body.messages[body.messages.length - 1]
+    return last?.role === "tool"
+  }
+  // Responses API: input contains function_call_output
+  if ("input" in body && Array.isArray(body.input)) {
+    return body.input.some((item: Record<string, unknown>) => item?.type === "function_call_output")
+  }
+  return false
+}
+
+function requestSummary(body: unknown): string {
+  if (!body || typeof body !== "object") return "empty body"
+  if ("messages" in body && Array.isArray(body.messages)) {
+    const roles = body.messages.map((m: Record<string, unknown>) => m.role).join(",")
+    return `messages=[${roles}]`
+  }
+  return `keys=[${Object.keys(body).join(",")}]`
+}
+
 namespace TestLLMServer {
  export interface Service {
    readonly url: string
@@ -604,6 +628,7 @@ namespace TestLLMServer {
    readonly wait: (count: number) => Effect.Effect<void>
    readonly inputs: Effect.Effect<Record<string, unknown>[]>
    readonly pending: Effect.Effect<number>
+    readonly misses: Effect.Effect<Hit[]>
  }
 }

@@ -617,6 +642,7 @@ export class TestLLMServer extends ServiceMap.Service<TestLLMServer, TestLLMServ
      let hits: Hit[] = []
      let list: Queue[] = []
      let waits: Wait[] = []
+      let misses: Hit[] = []

      const queue = (...input: (Item | Reply)[]) => {
        list = [...list, ...input.map((value) => ({ item: item(value) }))]
@@ -646,7 +672,21 @@ export class TestLLMServer extends ServiceMap.Service<TestLLMServer, TestLLMServ
        const body = yield* req.json.pipe(Effect.orElseSucceed(() => ({})))
        const current = hit(req.originalUrl, body)
        const next = pull(current)
-        if (!next) return HttpServerResponse.text("unexpected request", { status: 500 })
+        if (!next) {
+          // Auto-acknowledge tool-result follow-ups so tests only need to
+          // queue one response per tool call instead of two.
+          if (isToolResultFollowUp(body)) {
+            hits = [...hits, current]
+            yield* notify()
+            const auto: Sse = { type: "sse", head: [role()], tail: [textLine("ok"), finishLine("stop")] }
+            if (mode === "responses") return send(responses(auto, modelFrom(body)))
+            return send(auto)
+          }
+          misses = [...misses, current]
+          const summary = requestSummary(body)
+          console.warn(`[TestLLMServer] unmatched request: ${req.originalUrl} (${summary}, pending=${list.length})`)
+          return HttpServerResponse.text(`unexpected request: ${summary}`, { status: 500 })
+        }
        hits = [...hits, current]
        yield* notify()
        if (next.type !== "sse") return fail(next)
@@ -725,6 +765,7 @@ export class TestLLMServer extends ServiceMap.Service<TestLLMServer, TestLLMServ
        }),
        inputs: Effect.sync(() => hits.map((hit) => hit.body)),
        pending: Effect.sync(() => list.length),
+        misses: Effect.sync(() => [...misses]),
      })
    }),
  ).pipe(Layer.provide(HttpRouter.layer), Layer.provide(NodeHttpServer.layer(() => Http.createServer(), { port: 0 })))
--- a/packages/opencode/test/session/snapshot-tool-race.test.ts
+++ b/packages/opencode/test/session/snapshot-tool-race.test.ts
@@ -0,0 +1,233 @@
+/**
+ * Reproducer for snapshot race condition with instant tool execution.
+ *
+ * When the mock LLM returns a tool call response instantly, the AI SDK
+ * processes the tool call and executes the tool (e.g. apply_patch) before
+ * the processor's start-step handler can capture a pre-tool snapshot.
+ * Both the "before" and "after" snapshots end up with the same git tree
+ * hash, so computeDiff returns empty and the session summary shows 0 files.
+ *
+ * This is a real bug: the snapshot system assumes it can capture state
+ * before tools run by hooking into start-step, but the AI SDK executes
+ * tools internally during multi-step processing before emitting events.
+ */
+import { expect } from "bun:test"
+import { Effect } from "effect"
+import fs from "fs/promises"
+import path from "path"
+import { Session } from "../../src/session"
+import { LLM } from "../../src/session/llm"
+import { SessionPrompt } from "../../src/session/prompt"
+import { SessionSummary } from "../../src/session/summary"
+import { MessageV2 } from "../../src/session/message-v2"
+import { Log } from "../../src/util/log"
+import { provideTmpdirServer } from "../fixture/fixture"
+import { testEffect } from "../lib/effect"
+import { TestLLMServer } from "../lib/llm-server"
+
+// Same layer setup as prompt-effect.test.ts
+import { NodeFileSystem } from "@effect/platform-node"
+import { Layer } from "effect"
+import { Agent as AgentSvc } from "../../src/agent/agent"
+import { Bus } from "../../src/bus"
+import { Command } from "../../src/command"
+import { Config } from "../../src/config/config"
+import { FileTime } from "../../src/file/time"
+import { LSP } from "../../src/lsp"
+import { MCP } from "../../src/mcp"
+import { Permission } from "../../src/permission"
+import { Plugin } from "../../src/plugin"
+import { Provider as ProviderSvc } from "../../src/provider/provider"
+import { SessionCompaction } from "../../src/session/compaction"
+import { SessionProcessor } from "../../src/session/processor"
+import { SessionStatus } from "../../src/session/status"
+import { Shell } from "../../src/shell/shell"
+import { Snapshot } from "../../src/snapshot"
+import { ToolRegistry } from "../../src/tool/registry"
+import { Truncate } from "../../src/tool/truncate"
+import { AppFileSystem } from "../../src/filesystem"
+import * as CrossSpawnSpawner from "../../src/effect/cross-spawn-spawner"
+
+Log.init({ print: false })
+
+const mcp = Layer.succeed(
+  MCP.Service,
+  MCP.Service.of({
+    status: () => Effect.succeed({}),
+    clients: () => Effect.succeed({}),
+    tools: () => Effect.succeed({}),
+    prompts: () => Effect.succeed({}),
+    resources: () => Effect.succeed({}),
+    add: () => Effect.succeed({ status: { status: "disabled" as const } }),
+    connect: () => Effect.void,
+    disconnect: () => Effect.void,
+    getPrompt: () => Effect.succeed(undefined),
+    readResource: () => Effect.succeed(undefined),
+    startAuth: () => Effect.die("unexpected MCP auth"),
+    authenticate: () => Effect.die("unexpected MCP auth"),
+    finishAuth: () => Effect.die("unexpected MCP auth"),
+    removeAuth: () => Effect.void,
+    supportsOAuth: () => Effect.succeed(false),
+    hasStoredTokens: () => Effect.succeed(false),
+    getAuthStatus: () => Effect.succeed("not_authenticated" as const),
+  }),
+)
+
+const lsp = Layer.succeed(
+  LSP.Service,
+  LSP.Service.of({
+    init: () => Effect.void,
+    status: () => Effect.succeed([]),
+    hasClients: () => Effect.succeed(false),
+    touchFile: () => Effect.void,
+    diagnostics: () => Effect.succeed({}),
+    hover: () => Effect.succeed(undefined),
+    definition: () => Effect.succeed([]),
+    references: () => Effect.succeed([]),
+    implementation: () => Effect.succeed([]),
+    documentSymbol: () => Effect.succeed([]),
+    workspaceSymbol: () => Effect.succeed([]),
+    prepareCallHierarchy: () => Effect.succeed([]),
+    incomingCalls: () => Effect.succeed([]),
+    outgoingCalls: () => Effect.succeed([]),
+  }),
+)
+
+const filetime = Layer.succeed(
+  FileTime.Service,
+  FileTime.Service.of({
+    read: () => Effect.void,
+    get: () => Effect.succeed(undefined),
+    assert: () => Effect.void,
+    withLock: (_filepath, fn) => Effect.promise(fn),
+  }),
+)
+
+const status = SessionStatus.layer.pipe(Layer.provideMerge(Bus.layer))
+const infra = Layer.mergeAll(NodeFileSystem.layer, CrossSpawnSpawner.defaultLayer)
+
+function makeHttp() {
+  const deps = Layer.mergeAll(
+    Session.defaultLayer,
+    Snapshot.defaultLayer,
+    LLM.defaultLayer,
+    AgentSvc.defaultLayer,
+    Command.defaultLayer,
+    Permission.layer,
+    Plugin.defaultLayer,
+    Config.defaultLayer,
+    ProviderSvc.defaultLayer,
+    filetime,
+    lsp,
+    mcp,
+    AppFileSystem.defaultLayer,
+    status,
+  ).pipe(Layer.provideMerge(infra))
+  const registry = ToolRegistry.layer.pipe(Layer.provideMerge(deps))
+  const trunc = Truncate.layer.pipe(Layer.provideMerge(deps))
+  const proc = SessionProcessor.layer.pipe(Layer.provideMerge(deps))
+  const compact = SessionCompaction.layer.pipe(Layer.provideMerge(proc), Layer.provideMerge(deps))
+  return Layer.mergeAll(
+    TestLLMServer.layer,
+    SessionPrompt.layer.pipe(
+      Layer.provideMerge(compact),
+      Layer.provideMerge(proc),
+      Layer.provideMerge(registry),
+      Layer.provideMerge(trunc),
+      Layer.provideMerge(deps),
+    ),
+  )
+}
+
+const it = testEffect(makeHttp())
+
+const providerCfg = (url: string) => ({
+  provider: {
+    test: {
+      name: "Test",
+      id: "test",
+      env: [],
+      npm: "@ai-sdk/openai-compatible",
+      models: {
+        "test-model": {
+          id: "test-model",
+          name: "Test Model",
+          attachment: false,
+          reasoning: false,
+          temperature: false,
+          tool_call: true,
+          release_date: "2025-01-01",
+          limit: { context: 100000, output: 10000 },
+          cost: { input: 0, output: 0 },
+          options: {},
+        },
+      },
+      options: {
+        apiKey: "test-key",
+        baseURL: url,
+      },
+    },
+  },
+})
+
+it.live("tool execution produces non-empty session diff (snapshot race)", () =>
+  provideTmpdirServer(
+    Effect.fnUntraced(function* ({ dir, llm }) {
+      const prompt = yield* SessionPrompt.Service
+      const sessions = yield* Session.Service
+
+      const session = yield* sessions.create({
+        title: "snapshot race test",
+        permission: [{ permission: "*", pattern: "*", action: "allow" }],
+      })
+
+      // Use bash tool (always registered) to create a file
+      const command = `echo 'snapshot race test content' > ${path.join(dir, "race-test.txt")}`
+      yield* llm.toolMatch(
+        (hit) => JSON.stringify(hit.body).includes("create the file"),
+        "bash",
+        { command, description: "create test file" },
+      )
+      yield* llm.textMatch(
+        (hit) => JSON.stringify(hit.body).includes("bash"),
+        "done",
+      )
+
+      // Seed user message
+      yield* prompt.prompt({
+        sessionID: session.id,
+        agent: "build",
+        noReply: true,
+        parts: [{ type: "text", text: "create the file" }],
+      })
+
+      // Run the agent loop
+      const result = yield* prompt.loop({ sessionID: session.id })
+      expect(result.info.role).toBe("assistant")
+
+      // Verify the file was created
+      const filePath = path.join(dir, "race-test.txt")
+      const fileExists = yield* Effect.promise(() =>
+        fs.access(filePath).then(() => true).catch(() => false),
+      )
+      expect(fileExists).toBe(true)
+
+      // Verify the tool call completed (in the first assistant message)
+      const allMsgs = yield* Effect.promise(() => MessageV2.filterCompacted(MessageV2.stream(session.id)))
+      const tool = allMsgs
+        .flatMap((m) => m.parts)
+        .find((p): p is MessageV2.ToolPart => p.type === "tool" && p.tool === "bash")
+      expect(tool?.state.status).toBe("completed")
+
+      // Poll for diff — summarize() is fire-and-forget
+      let diff: Awaited<ReturnType<typeof SessionSummary.diff>> = []
+      for (let i = 0; i < 50; i++) {
+        diff = yield* Effect.promise(() => SessionSummary.diff({ sessionID: session.id }))
+        if (diff.length > 0) break
+        yield* Effect.sleep("100 millis")
+      }
+      expect(diff.length).toBeGreaterThan(0)
+    }),
+    { git: true, config: providerCfg },
+  ),
+)