fix(llm): surface code, type, and nested fields on provider stream errors (#28757)

This commit is contained in:
Kit Langton
2026-05-22 12:37:55 -04:00
committed by GitHub
parent a3430db73a
commit d0cb58782f
4 changed files with 170 additions and 6 deletions

View File

@@ -206,7 +206,13 @@ const AnthropicEvent = Schema.Struct({
content_block: Schema.optional(AnthropicStreamBlock),
delta: Schema.optional(AnthropicStreamDelta),
usage: Schema.optional(AnthropicUsage),
error: Schema.optional(Schema.Struct({ type: Schema.String, message: Schema.String })),
// `type` and `message` are both required per Anthropic's spec, but
// OpenAI-compatible proxies and gateway translations occasionally drop one
// or the other; mark them optional so a partial payload still parses and
// the parser can fall back to whichever field is populated.
error: Schema.optional(
Schema.Struct({ type: Schema.optional(Schema.String), message: Schema.optional(Schema.String) }),
),
})
type AnthropicEvent = Schema.Schema.Type<typeof AnthropicEvent>
@@ -701,9 +707,18 @@ const onMessageDelta = (state: ParserState, event: AnthropicEvent): StepResult =
return [{ ...state, lifecycle, usage }, events]
}
// Prefix `error.type` so overloads, rate limits, and quota errors are visible
// even when the provider message is generic or empty.
const providerErrorMessage = (event: AnthropicEvent): string => {
const type = event.error?.type
const message = event.error?.message
if (type && message) return `${type}: ${message}`
return message || type || "Anthropic Messages stream error"
}
const onError = (state: ParserState, event: AnthropicEvent): StepResult => [
state,
[LLMEvent.providerError({ message: event.error?.message ?? "Anthropic Messages stream error" })],
[LLMEvent.providerError({ message: providerErrorMessage(event) })],
]
const step = (state: ParserState, event: AnthropicEvent) => {

View File

@@ -178,6 +178,17 @@ const OpenAIResponsesStreamItem = Schema.Struct({
})
type OpenAIResponsesStreamItem = Schema.Schema.Type<typeof OpenAIResponsesStreamItem>
// OpenAI Responses surfaces provider failures in two related shapes. The
// streaming `error` event carries the details at the top level
// (`{ type: "error", code, message, param, sequence_number }`), while
// `response.failed` carries them under `response.error`. We capture both so
// the parser can surface a useful provider-error message in either path.
const OpenAIResponsesErrorPayload = Schema.Struct({
code: optionalNull(Schema.String),
message: optionalNull(Schema.String),
param: optionalNull(Schema.String),
})
const OpenAIResponsesEvent = Schema.Struct({
type: Schema.String,
delta: Schema.optional(Schema.String),
@@ -190,12 +201,14 @@ const OpenAIResponsesEvent = Schema.Struct({
service_tier: optionalNull(Schema.String),
incomplete_details: optionalNull(Schema.Struct({ reason: Schema.String })),
usage: optionalNull(OpenAIResponsesUsage),
error: optionalNull(OpenAIResponsesErrorPayload),
}),
[Schema.Record(Schema.String, Schema.Unknown)],
),
),
code: Schema.optional(Schema.String),
message: Schema.optional(Schema.String),
param: Schema.optional(Schema.String),
})
type OpenAIResponsesEvent = Schema.Schema.Type<typeof OpenAIResponsesEvent>
@@ -633,14 +646,27 @@ const onResponseFinish = (state: ParserState, event: OpenAIResponsesEvent): Step
return [{ ...state, lifecycle }, events]
}
// Build a single human-readable message from whatever the provider supplied.
// When both code and message are present, prefix the code so consumers see
// the failure mode (e.g. `rate_limit_exceeded: Slow down`) instead of just
// the bare message — production rate limits and context-length failures used
// to be indistinguishable from generic stream drops.
const providerErrorMessage = (event: OpenAIResponsesEvent, fallback: string): string => {
const nested = event.response?.error ?? undefined
const message = event.message || nested?.message || undefined
const code = event.code || nested?.code || undefined
if (message && code) return `${code}: ${message}`
return message || code || fallback
}
const onResponseFailed = (state: ParserState, event: OpenAIResponsesEvent): StepResult => [
state,
[LLMEvent.providerError({ message: event.message ?? event.code ?? "OpenAI Responses response failed" })],
[LLMEvent.providerError({ message: providerErrorMessage(event, "OpenAI Responses response failed") })],
]
const onError = (state: ParserState, event: OpenAIResponsesEvent): StepResult => [
state,
[LLMEvent.providerError({ message: event.message ?? event.code ?? "OpenAI Responses stream error" })],
[LLMEvent.providerError({ message: providerErrorMessage(event, "OpenAI Responses stream error") })],
]
const step = (state: ParserState, event: OpenAIResponsesEvent) => {

View File

@@ -337,7 +337,29 @@ describe("Anthropic Messages route", () => {
),
)
expect(response.events).toEqual([{ type: "provider-error", message: "Overloaded" }])
// Prefix the error type so consumers can distinguish overloads, rate
// limits, and quota errors without parsing the message string.
expect(response.events).toEqual([{ type: "provider-error", message: "overloaded_error: Overloaded" }])
}),
)
it.effect("falls back to error type when no message is present", () =>
Effect.gen(function* () {
const response = yield* LLMClient.generate(request).pipe(
Effect.provide(fixedResponse(sseEvents({ type: "error", error: { type: "overloaded_error", message: "" } }))),
)
expect(response.events).toEqual([{ type: "provider-error", message: "overloaded_error" }])
}),
)
it.effect("falls back to a stable default when error payload is absent", () =>
Effect.gen(function* () {
const response = yield* LLMClient.generate(request).pipe(
Effect.provide(fixedResponse(sseEvents({ type: "error" }))),
)
expect(response.events).toEqual([{ type: "provider-error", message: "Anthropic Messages stream error" }])
}),
)

View File

@@ -877,7 +877,11 @@ describe("OpenAI Responses route", () => {
Effect.provide(fixedResponse(sseEvents({ type: "error", code: "rate_limit_exceeded", message: "Slow down" }))),
)
expect(response.events).toEqual([{ type: "provider-error", message: "Slow down" }])
// Prefix the code so consumers see the failure mode, not just the
// sometimes-generic provider message. The bare message alone meant
// production errors like rate limits were indistinguishable from
// unrelated stream failures.
expect(response.events).toEqual([{ type: "provider-error", message: "rate_limit_exceeded: Slow down" }])
}),
)
@@ -891,6 +895,103 @@ describe("OpenAI Responses route", () => {
}),
)
it.effect("falls back to error code when message is empty", () =>
Effect.gen(function* () {
const response = yield* LLMClient.generate(request).pipe(
Effect.provide(fixedResponse(sseEvents({ type: "error", code: "internal_error", message: "" }))),
)
expect(response.events).toEqual([{ type: "provider-error", message: "internal_error" }])
}),
)
// Regression: `response.failed` carries the failure details under
// `response.error`, not at the top level. The previous handler only
// checked top-level `message`/`code` and so always emitted the bare
// "OpenAI Responses response failed" string, hiding the real cause.
it.effect("surfaces response.failed details from response.error", () =>
Effect.gen(function* () {
const response = yield* LLMClient.generate(request).pipe(
Effect.provide(
fixedResponse(
sseEvents({
type: "response.failed",
response: {
id: "resp_failed_1",
error: { code: "server_error", message: "Upstream model unavailable" },
},
}),
),
),
)
expect(response.events).toEqual([
{ type: "provider-error", message: "server_error: Upstream model unavailable" },
])
}),
)
it.effect("surfaces response.failed code when no nested message is present", () =>
Effect.gen(function* () {
const response = yield* LLMClient.generate(request).pipe(
Effect.provide(
fixedResponse(
sseEvents({
type: "response.failed",
response: { id: "resp_failed_2", error: { code: "invalid_prompt" } },
}),
),
),
)
expect(response.events).toEqual([{ type: "provider-error", message: "invalid_prompt" }])
}),
)
it.effect("surfaces error event details even when they arrive nested under response.error", () =>
Effect.gen(function* () {
// Some OpenAI-compatible proxies and older SDK versions wrap the
// top-level error fields into a nested `response.error` payload
// when they bubble up an HTTP error as an SSE `error` event. Honour
// both shapes so the user still sees the underlying cause instead
// of the catch-all string.
const response = yield* LLMClient.generate(request).pipe(
Effect.provide(
fixedResponse(
sseEvents({
type: "error",
response: { error: { code: "context_length_exceeded", message: "prompt too long" } },
}),
),
),
)
expect(response.events).toEqual([
{ type: "provider-error", message: "context_length_exceeded: prompt too long" },
])
}),
)
it.effect("falls back to a stable default when both error and response are absent", () =>
Effect.gen(function* () {
const response = yield* LLMClient.generate(request).pipe(
Effect.provide(fixedResponse(sseEvents({ type: "error" }))),
)
expect(response.events).toEqual([{ type: "provider-error", message: "OpenAI Responses stream error" }])
}),
)
it.effect("falls back to a stable default when response.failed has no error payload", () =>
Effect.gen(function* () {
const response = yield* LLMClient.generate(request).pipe(
Effect.provide(fixedResponse(sseEvents({ type: "response.failed", response: { id: "resp_failed_3" } }))),
)
expect(response.events).toEqual([{ type: "provider-error", message: "OpenAI Responses response failed" }])
}),
)
it.effect("fails HTTP provider errors before stream parsing", () =>
Effect.gen(function* () {
const error = yield* LLMClient.generate(request).pipe(