Compare commits

...

1 Commits

Author SHA1 Message Date
Kit Langton
91a3eef6d0 refactor(tool): convert webfetch to defineEffect with HttpClient
Replace raw fetch with Effect HttpClient service. Remove manual
AbortController/signal/clearTimeout plumbing — fiber interruption
and Effect.timeout handle cancellation natively. Update registry
to yield WebFetchTool and provide FetchHttpClient.layer. Fix tests
to provide FetchHttpClient.layer where needed.
2026-04-09 23:17:49 -04:00
6 changed files with 170 additions and 168 deletions

View File

@@ -28,6 +28,7 @@ import { Glob } from "../util/glob"
import path from "path"
import { pathToFileURL } from "url"
import { Effect, Layer, ServiceMap } from "effect"
import { FetchHttpClient, HttpClient } from "effect/unstable/http"
import { InstanceState } from "@/effect/instance-state"
import { makeRuntime } from "@/effect/run-service"
import { Env } from "../env"
@@ -80,6 +81,7 @@ export namespace ToolRegistry {
| FileTime.Service
| Instruction.Service
| AppFileSystem.Service
| HttpClient.HttpClient
> = Layer.effect(
Service,
Effect.gen(function* () {
@@ -92,6 +94,7 @@ export namespace ToolRegistry {
const read = yield* ReadTool
const question = yield* QuestionTool
const todo = yield* TodoWriteTool
const webfetch = yield* WebFetchTool
const state = yield* InstanceState.make<State>(
Effect.fn("ToolRegistry.state")(function* (ctx) {
@@ -157,7 +160,7 @@ export namespace ToolRegistry {
edit: Tool.init(EditTool),
write: Tool.init(WriteTool),
task: Tool.init(task),
fetch: Tool.init(WebFetchTool),
fetch: Tool.init(webfetch),
todo: Tool.init(todo),
search: Tool.init(WebSearchTool),
code: Tool.init(CodeSearchTool),
@@ -301,6 +304,7 @@ export namespace ToolRegistry {
Layer.provide(FileTime.defaultLayer),
Layer.provide(Instruction.defaultLayer),
Layer.provide(AppFileSystem.defaultLayer),
Layer.provide(FetchHttpClient.layer),
),
)

View File

@@ -1,178 +1,158 @@
import z from "zod"
import { Effect } from "effect"
import { HttpClient, HttpClientRequest } from "effect/unstable/http"
import { Tool } from "./tool"
import TurndownService from "turndown"
import DESCRIPTION from "./webfetch.txt"
import { abortAfterAny } from "../util/abort"
import { iife } from "@/util/iife"
const MAX_RESPONSE_SIZE = 5 * 1024 * 1024 // 5MB
const DEFAULT_TIMEOUT = 30 * 1000 // 30 seconds
const MAX_TIMEOUT = 120 * 1000 // 2 minutes
export const WebFetchTool = Tool.define("webfetch", {
description: DESCRIPTION,
parameters: z.object({
url: z.string().describe("The URL to fetch content from"),
format: z
.enum(["text", "markdown", "html"])
.default("markdown")
.describe("The format to return the content in (text, markdown, or html). Defaults to markdown."),
timeout: z.number().describe("Optional timeout in seconds (max 120)").optional(),
}),
async execute(params, ctx) {
// Validate URL
if (!params.url.startsWith("http://") && !params.url.startsWith("https://")) {
throw new Error("URL must start with http:// or https://")
}
await ctx.ask({
permission: "webfetch",
patterns: [params.url],
always: ["*"],
metadata: {
url: params.url,
format: params.format,
timeout: params.timeout,
},
})
const timeout = Math.min((params.timeout ?? DEFAULT_TIMEOUT / 1000) * 1000, MAX_TIMEOUT)
const { signal, clearTimeout } = abortAfterAny(timeout, ctx.abort)
// Build Accept header based on requested format with q parameters for fallbacks
let acceptHeader = "*/*"
switch (params.format) {
case "markdown":
acceptHeader = "text/markdown;q=1.0, text/x-markdown;q=0.9, text/plain;q=0.8, text/html;q=0.7, */*;q=0.1"
break
case "text":
acceptHeader = "text/plain;q=1.0, text/markdown;q=0.9, text/html;q=0.8, */*;q=0.1"
break
case "html":
acceptHeader = "text/html;q=1.0, application/xhtml+xml;q=0.9, text/plain;q=0.8, text/markdown;q=0.7, */*;q=0.1"
break
default:
acceptHeader =
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8"
}
const headers = {
"User-Agent":
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36",
Accept: acceptHeader,
"Accept-Language": "en-US,en;q=0.9",
}
const response = await iife(async () => {
try {
const initial = await fetch(params.url, { signal, headers })
// Retry with honest UA if blocked by Cloudflare bot detection (TLS fingerprint mismatch)
return initial.status === 403 && initial.headers.get("cf-mitigated") === "challenge"
? await fetch(params.url, { signal, headers: { ...headers, "User-Agent": "opencode" } })
: initial
} finally {
clearTimeout()
}
})
if (!response.ok) {
throw new Error(`Request failed with status code: ${response.status}`)
}
// Check content length
const contentLength = response.headers.get("content-length")
if (contentLength && parseInt(contentLength) > MAX_RESPONSE_SIZE) {
throw new Error("Response too large (exceeds 5MB limit)")
}
const arrayBuffer = await response.arrayBuffer()
if (arrayBuffer.byteLength > MAX_RESPONSE_SIZE) {
throw new Error("Response too large (exceeds 5MB limit)")
}
const contentType = response.headers.get("content-type") || ""
const mime = contentType.split(";")[0]?.trim().toLowerCase() || ""
const title = `${params.url} (${contentType})`
// Check if response is an image
const isImage = mime.startsWith("image/") && mime !== "image/svg+xml" && mime !== "image/vnd.fastbidsheet"
if (isImage) {
const base64Content = Buffer.from(arrayBuffer).toString("base64")
return {
title,
output: "Image fetched successfully",
metadata: {},
attachments: [
{
type: "file",
mime,
url: `data:${mime};base64,${base64Content}`,
},
],
}
}
const content = new TextDecoder().decode(arrayBuffer)
// Handle content based on requested format and actual content type
switch (params.format) {
case "markdown":
if (contentType.includes("text/html")) {
const markdown = convertHTMLToMarkdown(content)
return {
output: markdown,
title,
metadata: {},
}
}
return {
output: content,
title,
metadata: {},
}
case "text":
if (contentType.includes("text/html")) {
const text = await extractTextFromHTML(content)
return {
output: text,
title,
metadata: {},
}
}
return {
output: content,
title,
metadata: {},
}
case "html":
return {
output: content,
title,
metadata: {},
}
default:
return {
output: content,
title,
metadata: {},
}
}
},
const parameters = z.object({
url: z.string().describe("The URL to fetch content from"),
format: z
.enum(["text", "markdown", "html"])
.default("markdown")
.describe("The format to return the content in (text, markdown, or html). Defaults to markdown."),
timeout: z.number().describe("Optional timeout in seconds (max 120)").optional(),
})
export const WebFetchTool = Tool.defineEffect(
"webfetch",
Effect.gen(function* () {
const http = yield* HttpClient.HttpClient
return {
description: DESCRIPTION,
parameters,
async execute(params: z.infer<typeof parameters>, ctx: Tool.Context) {
if (!params.url.startsWith("http://") && !params.url.startsWith("https://")) {
throw new Error("URL must start with http:// or https://")
}
await ctx.ask({
permission: "webfetch",
patterns: [params.url],
always: ["*"],
metadata: {
url: params.url,
format: params.format,
timeout: params.timeout,
},
})
const timeout = Math.min((params.timeout ?? DEFAULT_TIMEOUT / 1000) * 1000, MAX_TIMEOUT)
let accept = "*/*"
switch (params.format) {
case "markdown":
accept = "text/markdown;q=1.0, text/x-markdown;q=0.9, text/plain;q=0.8, text/html;q=0.7, */*;q=0.1"
break
case "text":
accept = "text/plain;q=1.0, text/markdown;q=0.9, text/html;q=0.8, */*;q=0.1"
break
case "html":
accept = "text/html;q=1.0, application/xhtml+xml;q=0.9, text/plain;q=0.8, text/markdown;q=0.7, */*;q=0.1"
break
default:
accept =
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8"
}
const headers = {
"User-Agent":
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36",
Accept: accept,
"Accept-Language": "en-US,en;q=0.9",
}
const request = HttpClientRequest.get(params.url).pipe(HttpClientRequest.setHeaders(headers))
const program = Effect.gen(function* () {
const initial = yield* http.execute(request)
// Retry with honest UA if blocked by Cloudflare bot detection (TLS fingerprint mismatch)
const response =
initial.status === 403 && initial.headers["cf-mitigated"] === "challenge"
? yield* http.execute(
HttpClientRequest.get(params.url).pipe(
HttpClientRequest.setHeaders({ ...headers, "User-Agent": "opencode" }),
),
)
: initial
if (response.status < 200 || response.status >= 300) {
throw new Error(`Request failed with status code: ${response.status}`)
}
const contentLength = response.headers["content-length"]
if (contentLength && parseInt(contentLength) > MAX_RESPONSE_SIZE) {
throw new Error("Response too large (exceeds 5MB limit)")
}
const arrayBuffer = yield* response.arrayBuffer
if (arrayBuffer.byteLength > MAX_RESPONSE_SIZE) {
throw new Error("Response too large (exceeds 5MB limit)")
}
const contentType = response.headers["content-type"] || ""
const mime = contentType.split(";")[0]?.trim().toLowerCase() || ""
const title = `${params.url} (${contentType})`
const isImage = mime.startsWith("image/") && mime !== "image/svg+xml" && mime !== "image/vnd.fastbidsheet"
if (isImage) {
return {
title,
output: "Image fetched successfully",
metadata: {},
attachments: [
{
type: "file" as const,
mime,
url: `data:${mime};base64,${Buffer.from(arrayBuffer).toString("base64")}`,
},
],
}
}
const content = new TextDecoder().decode(arrayBuffer)
switch (params.format) {
case "markdown":
if (contentType.includes("text/html")) {
return { output: convertHTMLToMarkdown(content), title, metadata: {} }
}
return { output: content, title, metadata: {} }
case "text":
if (contentType.includes("text/html")) {
return { output: yield* Effect.promise(() => extractTextFromHTML(content)), title, metadata: {} }
}
return { output: content, title, metadata: {} }
case "html":
return { output: content, title, metadata: {} }
default:
return { output: content, title, metadata: {} }
}
}).pipe(Effect.timeout(timeout), Effect.catchTag("TimeoutError", () => Effect.die("Request timed out")))
return await Effect.runPromise(program)
},
}
}),
)
async function extractTextFromHTML(html: string) {
let text = ""
let skipContent = false
let skip = false
const rewriter = new HTMLRewriter()
.on("script, style, noscript, iframe, object, embed", {
element() {
skipContent = true
skip = true
},
text() {
// Skip text content inside these elements
@@ -180,13 +160,12 @@ async function extractTextFromHTML(html: string) {
})
.on("*", {
element(element) {
// Reset skip flag when entering other elements
if (!["script", "style", "noscript", "iframe", "object", "embed"].includes(element.tagName)) {
skipContent = false
skip = false
}
},
text(input) {
if (!skipContent) {
if (!skip) {
text += input.text
}
},

View File

@@ -1,5 +1,7 @@
import { describe, test, expect } from "bun:test"
import path from "path"
import { Effect } from "effect"
import { FetchHttpClient } from "effect/unstable/http"
import { Instance } from "../../src/project/instance"
import { WebFetchTool } from "../../src/tool/webfetch"
import { SessionID, MessageID } from "../../src/session/schema"
@@ -30,7 +32,11 @@ describe("memory: abort controller leak", () => {
await Instance.provide({
directory: projectRoot,
fn: async () => {
const tool = await WebFetchTool.init()
const tool = await WebFetchTool.pipe(
Effect.flatMap((info) => Effect.promise(() => info.init())),
Effect.provide(FetchHttpClient.layer),
Effect.runPromise,
)
// Warm up
await tool.execute({ url: "https://example.com", format: "text" }, ctx).catch(() => {})

View File

@@ -1,6 +1,7 @@
import { NodeFileSystem } from "@effect/platform-node"
import { expect } from "bun:test"
import { Cause, Effect, Exit, Fiber, Layer } from "effect"
import { FetchHttpClient } from "effect/unstable/http"
import path from "path"
import z from "zod"
import { Agent as AgentSvc } from "../../src/agent/agent"
@@ -169,6 +170,7 @@ function makeHttp() {
const todo = Todo.layer.pipe(Layer.provideMerge(deps))
const registry = ToolRegistry.layer.pipe(
Layer.provide(Skill.defaultLayer),
Layer.provide(FetchHttpClient.layer),
Layer.provideMerge(todo),
Layer.provideMerge(question),
Layer.provideMerge(deps),

View File

@@ -12,7 +12,8 @@
* tools internally during multi-step processing before emitting events.
*/
import { expect } from "bun:test"
import { Effect } from "effect"
import { Effect, Layer } from "effect"
import { FetchHttpClient } from "effect/unstable/http"
import fs from "fs/promises"
import path from "path"
import { Session } from "../../src/session"
@@ -28,7 +29,6 @@ import { TestLLMServer } from "../lib/llm-server"
// Same layer setup as prompt-effect.test.ts
import { NodeFileSystem } from "@effect/platform-node"
import { Layer } from "effect"
import { Agent as AgentSvc } from "../../src/agent/agent"
import { Bus } from "../../src/bus"
import { Command } from "../../src/command"
@@ -134,6 +134,7 @@ function makeHttp() {
const todo = Todo.layer.pipe(Layer.provideMerge(deps))
const registry = ToolRegistry.layer.pipe(
Layer.provide(Skill.defaultLayer),
Layer.provide(FetchHttpClient.layer),
Layer.provideMerge(todo),
Layer.provideMerge(question),
Layer.provideMerge(deps),

View File

@@ -1,5 +1,7 @@
import { describe, expect, test } from "bun:test"
import path from "path"
import { Effect } from "effect"
import { FetchHttpClient } from "effect/unstable/http"
import { Instance } from "../../src/project/instance"
import { WebFetchTool } from "../../src/tool/webfetch"
import { SessionID, MessageID } from "../../src/session/schema"
@@ -22,6 +24,14 @@ async function withFetch(fetch: (req: Request) => Response | Promise<Response>,
await fn(server.url)
}
function initTool() {
return WebFetchTool.pipe(
Effect.flatMap((info) => Effect.promise(() => info.init())),
Effect.provide(FetchHttpClient.layer),
Effect.runPromise,
)
}
describe("tool.webfetch", () => {
test("returns image responses as file attachments", async () => {
const bytes = new Uint8Array([137, 80, 78, 71, 13, 10, 26, 10])
@@ -31,7 +41,7 @@ describe("tool.webfetch", () => {
await Instance.provide({
directory: projectRoot,
fn: async () => {
const webfetch = await WebFetchTool.init()
const webfetch = await initTool()
const result = await webfetch.execute(
{ url: new URL("/image.png", url).toString(), format: "markdown" },
ctx,
@@ -63,7 +73,7 @@ describe("tool.webfetch", () => {
await Instance.provide({
directory: projectRoot,
fn: async () => {
const webfetch = await WebFetchTool.init()
const webfetch = await initTool()
const result = await webfetch.execute({ url: new URL("/image.svg", url).toString(), format: "html" }, ctx)
expect(result.output).toContain("<svg")
expect(result.attachments).toBeUndefined()
@@ -84,7 +94,7 @@ describe("tool.webfetch", () => {
await Instance.provide({
directory: projectRoot,
fn: async () => {
const webfetch = await WebFetchTool.init()
const webfetch = await initTool()
const result = await webfetch.execute({ url: new URL("/file.txt", url).toString(), format: "text" }, ctx)
expect(result.output).toBe("hello from webfetch")
expect(result.attachments).toBeUndefined()