diff --git a/packages/llm/src/protocols/anthropic-messages.ts b/packages/llm/src/protocols/anthropic-messages.ts index 740441f7b4..060e2e091a 100644 --- a/packages/llm/src/protocols/anthropic-messages.ts +++ b/packages/llm/src/protocols/anthropic-messages.ts @@ -206,7 +206,13 @@ const AnthropicEvent = Schema.Struct({ content_block: Schema.optional(AnthropicStreamBlock), delta: Schema.optional(AnthropicStreamDelta), usage: Schema.optional(AnthropicUsage), - error: Schema.optional(Schema.Struct({ type: Schema.String, message: Schema.String })), + // `type` and `message` are both required per Anthropic's spec, but + // OpenAI-compatible proxies and gateway translations occasionally drop one + // or the other; mark them optional so a partial payload still parses and + // the parser can fall back to whichever field is populated. + error: Schema.optional( + Schema.Struct({ type: Schema.optional(Schema.String), message: Schema.optional(Schema.String) }), + ), }) type AnthropicEvent = Schema.Schema.Type @@ -701,9 +707,18 @@ const onMessageDelta = (state: ParserState, event: AnthropicEvent): StepResult = return [{ ...state, lifecycle, usage }, events] } +// Prefix `error.type` so overloads, rate limits, and quota errors are visible +// even when the provider message is generic or empty. +const providerErrorMessage = (event: AnthropicEvent): string => { + const type = event.error?.type + const message = event.error?.message + if (type && message) return `${type}: ${message}` + return message || type || "Anthropic Messages stream error" +} + const onError = (state: ParserState, event: AnthropicEvent): StepResult => [ state, - [LLMEvent.providerError({ message: event.error?.message ?? "Anthropic Messages stream error" })], + [LLMEvent.providerError({ message: providerErrorMessage(event) })], ] const step = (state: ParserState, event: AnthropicEvent) => { diff --git a/packages/llm/src/protocols/openai-responses.ts b/packages/llm/src/protocols/openai-responses.ts index 300f3f19ee..88c65d187c 100644 --- a/packages/llm/src/protocols/openai-responses.ts +++ b/packages/llm/src/protocols/openai-responses.ts @@ -178,6 +178,17 @@ const OpenAIResponsesStreamItem = Schema.Struct({ }) type OpenAIResponsesStreamItem = Schema.Schema.Type +// OpenAI Responses surfaces provider failures in two related shapes. The +// streaming `error` event carries the details at the top level +// (`{ type: "error", code, message, param, sequence_number }`), while +// `response.failed` carries them under `response.error`. We capture both so +// the parser can surface a useful provider-error message in either path. +const OpenAIResponsesErrorPayload = Schema.Struct({ + code: optionalNull(Schema.String), + message: optionalNull(Schema.String), + param: optionalNull(Schema.String), +}) + const OpenAIResponsesEvent = Schema.Struct({ type: Schema.String, delta: Schema.optional(Schema.String), @@ -190,12 +201,14 @@ const OpenAIResponsesEvent = Schema.Struct({ service_tier: optionalNull(Schema.String), incomplete_details: optionalNull(Schema.Struct({ reason: Schema.String })), usage: optionalNull(OpenAIResponsesUsage), + error: optionalNull(OpenAIResponsesErrorPayload), }), [Schema.Record(Schema.String, Schema.Unknown)], ), ), code: Schema.optional(Schema.String), message: Schema.optional(Schema.String), + param: Schema.optional(Schema.String), }) type OpenAIResponsesEvent = Schema.Schema.Type @@ -633,14 +646,27 @@ const onResponseFinish = (state: ParserState, event: OpenAIResponsesEvent): Step return [{ ...state, lifecycle }, events] } +// Build a single human-readable message from whatever the provider supplied. +// When both code and message are present, prefix the code so consumers see +// the failure mode (e.g. `rate_limit_exceeded: Slow down`) instead of just +// the bare message — production rate limits and context-length failures used +// to be indistinguishable from generic stream drops. +const providerErrorMessage = (event: OpenAIResponsesEvent, fallback: string): string => { + const nested = event.response?.error ?? undefined + const message = event.message || nested?.message || undefined + const code = event.code || nested?.code || undefined + if (message && code) return `${code}: ${message}` + return message || code || fallback +} + const onResponseFailed = (state: ParserState, event: OpenAIResponsesEvent): StepResult => [ state, - [LLMEvent.providerError({ message: event.message ?? event.code ?? "OpenAI Responses response failed" })], + [LLMEvent.providerError({ message: providerErrorMessage(event, "OpenAI Responses response failed") })], ] const onError = (state: ParserState, event: OpenAIResponsesEvent): StepResult => [ state, - [LLMEvent.providerError({ message: event.message ?? event.code ?? "OpenAI Responses stream error" })], + [LLMEvent.providerError({ message: providerErrorMessage(event, "OpenAI Responses stream error") })], ] const step = (state: ParserState, event: OpenAIResponsesEvent) => { diff --git a/packages/llm/test/provider/anthropic-messages.test.ts b/packages/llm/test/provider/anthropic-messages.test.ts index 4cf1c3bee9..5198af9ab7 100644 --- a/packages/llm/test/provider/anthropic-messages.test.ts +++ b/packages/llm/test/provider/anthropic-messages.test.ts @@ -337,7 +337,29 @@ describe("Anthropic Messages route", () => { ), ) - expect(response.events).toEqual([{ type: "provider-error", message: "Overloaded" }]) + // Prefix the error type so consumers can distinguish overloads, rate + // limits, and quota errors without parsing the message string. + expect(response.events).toEqual([{ type: "provider-error", message: "overloaded_error: Overloaded" }]) + }), + ) + + it.effect("falls back to error type when no message is present", () => + Effect.gen(function* () { + const response = yield* LLMClient.generate(request).pipe( + Effect.provide(fixedResponse(sseEvents({ type: "error", error: { type: "overloaded_error", message: "" } }))), + ) + + expect(response.events).toEqual([{ type: "provider-error", message: "overloaded_error" }]) + }), + ) + + it.effect("falls back to a stable default when error payload is absent", () => + Effect.gen(function* () { + const response = yield* LLMClient.generate(request).pipe( + Effect.provide(fixedResponse(sseEvents({ type: "error" }))), + ) + + expect(response.events).toEqual([{ type: "provider-error", message: "Anthropic Messages stream error" }]) }), ) diff --git a/packages/llm/test/provider/openai-responses.test.ts b/packages/llm/test/provider/openai-responses.test.ts index 57cc7789a6..ec30d3e544 100644 --- a/packages/llm/test/provider/openai-responses.test.ts +++ b/packages/llm/test/provider/openai-responses.test.ts @@ -877,7 +877,11 @@ describe("OpenAI Responses route", () => { Effect.provide(fixedResponse(sseEvents({ type: "error", code: "rate_limit_exceeded", message: "Slow down" }))), ) - expect(response.events).toEqual([{ type: "provider-error", message: "Slow down" }]) + // Prefix the code so consumers see the failure mode, not just the + // sometimes-generic provider message. The bare message alone meant + // production errors like rate limits were indistinguishable from + // unrelated stream failures. + expect(response.events).toEqual([{ type: "provider-error", message: "rate_limit_exceeded: Slow down" }]) }), ) @@ -891,6 +895,103 @@ describe("OpenAI Responses route", () => { }), ) + it.effect("falls back to error code when message is empty", () => + Effect.gen(function* () { + const response = yield* LLMClient.generate(request).pipe( + Effect.provide(fixedResponse(sseEvents({ type: "error", code: "internal_error", message: "" }))), + ) + + expect(response.events).toEqual([{ type: "provider-error", message: "internal_error" }]) + }), + ) + + // Regression: `response.failed` carries the failure details under + // `response.error`, not at the top level. The previous handler only + // checked top-level `message`/`code` and so always emitted the bare + // "OpenAI Responses response failed" string, hiding the real cause. + it.effect("surfaces response.failed details from response.error", () => + Effect.gen(function* () { + const response = yield* LLMClient.generate(request).pipe( + Effect.provide( + fixedResponse( + sseEvents({ + type: "response.failed", + response: { + id: "resp_failed_1", + error: { code: "server_error", message: "Upstream model unavailable" }, + }, + }), + ), + ), + ) + + expect(response.events).toEqual([ + { type: "provider-error", message: "server_error: Upstream model unavailable" }, + ]) + }), + ) + + it.effect("surfaces response.failed code when no nested message is present", () => + Effect.gen(function* () { + const response = yield* LLMClient.generate(request).pipe( + Effect.provide( + fixedResponse( + sseEvents({ + type: "response.failed", + response: { id: "resp_failed_2", error: { code: "invalid_prompt" } }, + }), + ), + ), + ) + + expect(response.events).toEqual([{ type: "provider-error", message: "invalid_prompt" }]) + }), + ) + + it.effect("surfaces error event details even when they arrive nested under response.error", () => + Effect.gen(function* () { + // Some OpenAI-compatible proxies and older SDK versions wrap the + // top-level error fields into a nested `response.error` payload + // when they bubble up an HTTP error as an SSE `error` event. Honour + // both shapes so the user still sees the underlying cause instead + // of the catch-all string. + const response = yield* LLMClient.generate(request).pipe( + Effect.provide( + fixedResponse( + sseEvents({ + type: "error", + response: { error: { code: "context_length_exceeded", message: "prompt too long" } }, + }), + ), + ), + ) + + expect(response.events).toEqual([ + { type: "provider-error", message: "context_length_exceeded: prompt too long" }, + ]) + }), + ) + + it.effect("falls back to a stable default when both error and response are absent", () => + Effect.gen(function* () { + const response = yield* LLMClient.generate(request).pipe( + Effect.provide(fixedResponse(sseEvents({ type: "error" }))), + ) + + expect(response.events).toEqual([{ type: "provider-error", message: "OpenAI Responses stream error" }]) + }), + ) + + it.effect("falls back to a stable default when response.failed has no error payload", () => + Effect.gen(function* () { + const response = yield* LLMClient.generate(request).pipe( + Effect.provide(fixedResponse(sseEvents({ type: "response.failed", response: { id: "resp_failed_3" } }))), + ) + + expect(response.events).toEqual([{ type: "provider-error", message: "OpenAI Responses response failed" }]) + }), + ) + it.effect("fails HTTP provider errors before stream parsing", () => Effect.gen(function* () { const error = yield* LLMClient.generate(request).pipe(