This commit is contained in:
Aiden Cline
2026-02-10 00:12:30 -06:00
parent a794489b10
commit 3cf3325240
7 changed files with 14 additions and 159 deletions

View File

@@ -5,6 +5,7 @@ import type { JSONSchema } from "zod/v4/core"
import type { Provider } from "./provider"
import type { ModelsDev } from "./models"
import { iife } from "@/util/iife"
import { Flag } from "@/flag/flag"
type Modality = NonNullable<ModelsDev.Model["modalities"]>["input"][number]
@@ -17,6 +18,8 @@ function mimeToModality(mime: string): Modality | undefined {
}
export namespace ProviderTransform {
export const OUTPUT_TOKEN_MAX = Flag.OPENCODE_EXPERIMENTAL_OUTPUT_TOKEN_MAX || 32_000
// Maps npm package to the key the AI SDK expects for providerOptions
function sdkKey(npm: string): string | undefined {
switch (npm) {
@@ -723,29 +726,8 @@ export namespace ProviderTransform {
return { [key]: options }
}
export function maxOutputTokens(
npm: string,
options: Record<string, any>,
modelLimit: number,
globalLimit: number,
): number {
const modelCap = modelLimit || globalLimit
const standardLimit = Math.min(modelCap, globalLimit)
if (npm === "@ai-sdk/anthropic" || npm === "@ai-sdk/google-vertex/anthropic") {
const thinking = options?.["thinking"]
const budgetTokens = typeof thinking?.["budgetTokens"] === "number" ? thinking["budgetTokens"] : 0
const enabled = thinking?.["type"] === "enabled"
if (enabled && budgetTokens > 0) {
// Return text tokens so that text + thinking <= model cap, preferring 32k text when possible.
if (budgetTokens + standardLimit <= modelCap) {
return standardLimit
}
return modelCap - budgetTokens
}
}
return standardLimit
export function maxOutputTokens(model: Provider.Model): number {
return Math.min(model.limit.output, OUTPUT_TOKEN_MAX) || OUTPUT_TOKEN_MAX
}
export function schema(model: Provider.Model, schema: JSONSchema.BaseSchema | JSONSchema7): JSONSchema7 {

View File

@@ -6,7 +6,6 @@ import { Instance } from "../project/instance"
import { Provider } from "../provider/provider"
import { MessageV2 } from "./message-v2"
import z from "zod"
import { SessionPrompt } from "./prompt"
import { Token } from "../util/token"
import { Log } from "../util/log"
import { SessionProcessor } from "./processor"
@@ -14,6 +13,7 @@ import { fn } from "@/util/fn"
import { Agent } from "@/agent/agent"
import { Plugin } from "@/plugin"
import { Config } from "@/config/config"
import { ProviderTransform } from "@/provider/transform"
export namespace SessionCompaction {
const log = Log.create({ service: "session.compaction" })
@@ -37,7 +37,7 @@ export namespace SessionCompaction {
input.tokens.total ||
input.tokens.input + input.tokens.output + input.tokens.cache.read + input.tokens.cache.write
const output = Math.min(input.model.limit.output, SessionPrompt.OUTPUT_TOKEN_MAX) || SessionPrompt.OUTPUT_TOKEN_MAX
const output = ProviderTransform.maxOutputTokens(input.model)
const usable = input.model.limit.input || context - output
return count > usable
}

View File

@@ -4,7 +4,7 @@ import { BusEvent } from "@/bus/bus-event"
import { Bus } from "@/bus"
import { Decimal } from "decimal.js"
import z from "zod"
import { type LanguageModelUsage, type ProviderMetadata } from "ai"
import { type ProviderMetadata } from "ai"
import { Config } from "../config/config"
import { Flag } from "../flag/flag"
import { Identifier } from "../id/id"

View File

@@ -25,8 +25,7 @@ import { Auth } from "@/auth"
export namespace LLM {
const log = Log.create({ service: "llm" })
export const OUTPUT_TOKEN_MAX = Flag.OPENCODE_EXPERIMENTAL_OUTPUT_TOKEN_MAX || 32_000
export const OUTPUT_TOKEN_MAX = ProviderTransform.OUTPUT_TOKEN_MAX
export type StreamInput = {
user: MessageV2.User
@@ -149,14 +148,7 @@ export namespace LLM {
)
const maxOutputTokens =
isCodex || provider.id.includes("github-copilot")
? undefined
: ProviderTransform.maxOutputTokens(
input.model.api.npm,
params.options,
input.model.limit.output,
OUTPUT_TOKEN_MAX,
)
isCodex || provider.id.includes("github-copilot") ? undefined : ProviderTransform.maxOutputTokens(input.model)
const tools = await resolveTools(input)

View File

@@ -52,7 +52,6 @@ globalThis.AI_SDK_LOG_WARNINGS = false
export namespace SessionPrompt {
const log = Log.create({ service: "session.prompt" })
export const OUTPUT_TOKEN_MAX = Flag.OPENCODE_EXPERIMENTAL_OUTPUT_TOKEN_MAX || 32_000
const state = Instance.state(
() => {

View File

@@ -175,100 +175,6 @@ describe("ProviderTransform.options - gpt-5 textVerbosity", () => {
})
})
describe("ProviderTransform.maxOutputTokens", () => {
test("returns 32k when modelLimit > 32k", () => {
const modelLimit = 100000
const result = ProviderTransform.maxOutputTokens("@ai-sdk/openai", {}, modelLimit, OUTPUT_TOKEN_MAX)
expect(result).toBe(OUTPUT_TOKEN_MAX)
})
test("returns modelLimit when modelLimit < 32k", () => {
const modelLimit = 16000
const result = ProviderTransform.maxOutputTokens("@ai-sdk/openai", {}, modelLimit, OUTPUT_TOKEN_MAX)
expect(result).toBe(16000)
})
describe("azure", () => {
test("returns 32k when modelLimit > 32k", () => {
const modelLimit = 100000
const result = ProviderTransform.maxOutputTokens("@ai-sdk/azure", {}, modelLimit, OUTPUT_TOKEN_MAX)
expect(result).toBe(OUTPUT_TOKEN_MAX)
})
test("returns modelLimit when modelLimit < 32k", () => {
const modelLimit = 16000
const result = ProviderTransform.maxOutputTokens("@ai-sdk/azure", {}, modelLimit, OUTPUT_TOKEN_MAX)
expect(result).toBe(16000)
})
})
describe("bedrock", () => {
test("returns 32k when modelLimit > 32k", () => {
const modelLimit = 100000
const result = ProviderTransform.maxOutputTokens("@ai-sdk/amazon-bedrock", {}, modelLimit, OUTPUT_TOKEN_MAX)
expect(result).toBe(OUTPUT_TOKEN_MAX)
})
test("returns modelLimit when modelLimit < 32k", () => {
const modelLimit = 16000
const result = ProviderTransform.maxOutputTokens("@ai-sdk/amazon-bedrock", {}, modelLimit, OUTPUT_TOKEN_MAX)
expect(result).toBe(16000)
})
})
describe("anthropic without thinking options", () => {
test("returns 32k when modelLimit > 32k", () => {
const modelLimit = 100000
const result = ProviderTransform.maxOutputTokens("@ai-sdk/anthropic", {}, modelLimit, OUTPUT_TOKEN_MAX)
expect(result).toBe(OUTPUT_TOKEN_MAX)
})
test("returns modelLimit when modelLimit < 32k", () => {
const modelLimit = 16000
const result = ProviderTransform.maxOutputTokens("@ai-sdk/anthropic", {}, modelLimit, OUTPUT_TOKEN_MAX)
expect(result).toBe(16000)
})
})
describe("anthropic with thinking options", () => {
test("returns 32k when budgetTokens + 32k <= modelLimit", () => {
const modelLimit = 100000
const options = {
thinking: {
type: "enabled",
budgetTokens: 10000,
},
}
const result = ProviderTransform.maxOutputTokens("@ai-sdk/anthropic", options, modelLimit, OUTPUT_TOKEN_MAX)
expect(result).toBe(OUTPUT_TOKEN_MAX)
})
test("returns modelLimit - budgetTokens when budgetTokens + 32k > modelLimit", () => {
const modelLimit = 50000
const options = {
thinking: {
type: "enabled",
budgetTokens: 30000,
},
}
const result = ProviderTransform.maxOutputTokens("@ai-sdk/anthropic", options, modelLimit, OUTPUT_TOKEN_MAX)
expect(result).toBe(20000)
})
test("returns 32k when thinking type is not enabled", () => {
const modelLimit = 100000
const options = {
thinking: {
type: "disabled",
budgetTokens: 10000,
},
}
const result = ProviderTransform.maxOutputTokens("@ai-sdk/anthropic", options, modelLimit, OUTPUT_TOKEN_MAX)
expect(result).toBe(OUTPUT_TOKEN_MAX)
})
})
})
describe("ProviderTransform.schema - gemini array items", () => {
test("adds missing items for array properties", () => {
const geminiModel = {

View File

@@ -314,12 +314,7 @@ describe("session.llm.stream", () => {
expect(body.stream).toBe(true)
const maxTokens = (body.max_tokens as number | undefined) ?? (body.max_output_tokens as number | undefined)
const expectedMaxTokens = ProviderTransform.maxOutputTokens(
resolved.api.npm,
ProviderTransform.options({ model: resolved, sessionID }),
resolved.limit.output,
LLM.OUTPUT_TOKEN_MAX,
)
const expectedMaxTokens = ProviderTransform.maxOutputTokens(resolved)
expect(maxTokens).toBe(expectedMaxTokens)
const reasoning = (body.reasoningEffort as string | undefined) ?? (body.reasoning_effort as string | undefined)
@@ -442,12 +437,7 @@ describe("session.llm.stream", () => {
expect((body.reasoning as { effort?: string } | undefined)?.effort).toBe("high")
const maxTokens = body.max_output_tokens as number | undefined
const expectedMaxTokens = ProviderTransform.maxOutputTokens(
resolved.api.npm,
ProviderTransform.options({ model: resolved, sessionID }),
resolved.limit.output,
LLM.OUTPUT_TOKEN_MAX,
)
const expectedMaxTokens = ProviderTransform.maxOutputTokens(resolved)
expect(maxTokens).toBe(expectedMaxTokens)
},
})
@@ -565,14 +555,7 @@ describe("session.llm.stream", () => {
expect(capture.url.pathname.endsWith("/messages")).toBe(true)
expect(body.model).toBe(resolved.api.id)
expect(body.max_tokens).toBe(
ProviderTransform.maxOutputTokens(
resolved.api.npm,
ProviderTransform.options({ model: resolved, sessionID }),
resolved.limit.output,
LLM.OUTPUT_TOKEN_MAX,
),
)
expect(body.max_tokens).toBe(ProviderTransform.maxOutputTokens(resolved))
expect(body.temperature).toBe(0.4)
expect(body.top_p).toBe(0.9)
},
@@ -677,14 +660,7 @@ describe("session.llm.stream", () => {
expect(capture.url.pathname).toBe(pathSuffix)
expect(config?.temperature).toBe(0.3)
expect(config?.topP).toBe(0.8)
expect(config?.maxOutputTokens).toBe(
ProviderTransform.maxOutputTokens(
resolved.api.npm,
ProviderTransform.options({ model: resolved, sessionID }),
resolved.limit.output,
LLM.OUTPUT_TOKEN_MAX,
),
)
expect(config?.maxOutputTokens).toBe(ProviderTransform.maxOutputTokens(resolved))
},
})
})