This commit is contained in:
Eason Goodale
2025-04-20 01:28:42 -07:00
parent 0613fd35e2
commit ada5e2249a
3 changed files with 230 additions and 68 deletions

View File

@@ -1,6 +1,7 @@
import type { ReviewDecision } from "./review.js";
import type { ApplyPatchCommand, ApprovalPolicy } from "../../approvals.js";
import type { AppConfig } from "../config.js";
import type { UsageBreakdown } from "../estimate-cost.js";
import type {
ResponseFunctionToolCall,
ResponseInputItem,
@@ -805,20 +806,9 @@ export class AgentLoop {
try {
const usage = (event as MaybeUsageEvent).response?.usage;
if (usage && typeof usage === "object") {
const u = usage as {
total_tokens?: number;
input_tokens?: number;
output_tokens?: number;
};
const tokens =
u.total_tokens ??
(typeof u.input_tokens === "number" &&
typeof u.output_tokens === "number"
? u.input_tokens + u.output_tokens
: undefined);
if (typeof tokens === "number" && tokens > 0) {
ensureSessionTracker(this.model).addTokens(tokens);
}
ensureSessionTracker(this.model).addUsage(
usage as unknown as UsageBreakdown,
);
}
} catch {
/* besteffort only */

View File

@@ -1,81 +1,214 @@
/* eslint-disable no-irregular-whitespace */
/**
* Costestimation helpers for OpenAI responses.
*
* The implementation now distinguishes between *input*, *cached input* and
* *output* tokens, reflecting OpenAIs 202504 pricing scheme. For models
* where we only have a single blended rate we gracefully fall back to the
* legacy logic so existing callsites continue to work.
*/
import type { ResponseItem } from "openai/resources/responses/responses.mjs";
import { approximateTokensUsed } from "./approximate-tokens-used.js";
// ────────────────────────────────────────────────────────────────────────────
// Pricing tables
// ────────────────────────────────────────────────────────────────────────────
/** Breakdown of pertoken prices (in USD). */
type TokenRates = {
/** Price for *noncached* input prompt tokens. */
input: number;
/** Preferential price for *cached* input tokens. */
cachedInput: number;
/** Price for completion / output tokens. */
output: number;
};
/**
* Approximate pertoken pricing (in USD) for common OpenAI models.
*
* The list is intentionally *nonexhaustive*: OpenAI regularly introduces new
* variants. Unknown model names simply result in a `null` cost estimate so
* that callers can gracefully fall back (e.g. by omitting cost figures from
* uservisible summaries).
* Pricing table (exact model name -> pertoken rates).
* All keys must be lowercase.
*/
const priceMap: Array<{ pattern: RegExp; pricePerThousandTokens: number }> = [
const detailedPriceMap: Record<string, TokenRates> = {
// OpenAI “oseries” experimental
"o3": {
input: 10 / 1_000_000,
cachedInput: 2.5 / 1_000_000,
output: 40 / 1_000_000,
},
"o4-mini": {
input: 1.1 / 1_000_000,
cachedInput: 0.275 / 1_000_000,
output: 4.4 / 1_000_000,
},
// GPT4.1 family
"gpt-4.1-nano": {
input: 0.1 / 1_000_000,
cachedInput: 0.025 / 1_000_000,
output: 0.4 / 1_000_000,
},
"gpt-4.1-mini": {
input: 0.4 / 1_000_000,
cachedInput: 0.1 / 1_000_000,
output: 1.6 / 1_000_000,
},
"gpt-4.1": {
input: 2 / 1_000_000,
cachedInput: 0.5 / 1_000_000,
output: 8 / 1_000_000,
},
// GPT4o family
{ pattern: /gpt-4o-search-preview/i, pricePerThousandTokens: 0.0025 },
{ pattern: /gpt-4o-mini-search-preview/i, pricePerThousandTokens: 0.00015 },
{ pattern: /gpt-4o-realtime-preview/i, pricePerThousandTokens: 0.005 },
{ pattern: /gpt-4o-audio-preview/i, pricePerThousandTokens: 0.0025 },
{ pattern: /gpt-4o-mini-audio-preview/i, pricePerThousandTokens: 0.00015 },
{ pattern: /gpt-4o-mini-realtime-preview/i, pricePerThousandTokens: 0.0006 },
{ pattern: /gpt-4o-mini/i, pricePerThousandTokens: 0.00015 },
{ pattern: /gpt-4o/i, pricePerThousandTokens: 0.0025 },
// GPT4.1 / 4.5
{ pattern: /gpt-4\.1-nano/i, pricePerThousandTokens: 0.0001 },
{ pattern: /gpt-4\.1-mini/i, pricePerThousandTokens: 0.0004 },
{ pattern: /gpt-4\.1/i, pricePerThousandTokens: 0.002 },
{ pattern: /gpt-4\.5-preview/i, pricePerThousandTokens: 0.075 },
{ pattern: /gpt-4\.5/i, pricePerThousandTokens: 0.075 },
// “oseries” experimental
{ pattern: /o4-mini/i, pricePerThousandTokens: 0.0011 },
{ pattern: /o3-mini/i, pricePerThousandTokens: 0.0011 },
{ pattern: /o1-mini/i, pricePerThousandTokens: 0.0011 },
{ pattern: /\bo3\b/i, pricePerThousandTokens: 0.015 },
{ pattern: /o1[- ]?pro/i, pricePerThousandTokens: 0.15 },
{ pattern: /\bo1\b/i, pricePerThousandTokens: 0.015 },
// Misc
{ pattern: /computer-use-preview/i, pricePerThousandTokens: 0.003 },
"gpt-4o-mini": {
input: 0.6 / 1_000_000,
cachedInput: 0.3 / 1_000_000,
output: 2.4 / 1_000_000,
},
"gpt-4o": {
input: 5 / 1_000_000,
cachedInput: 2.5 / 1_000_000,
output: 20 / 1_000_000,
},
};
/**
* Legacy singlerate pricing entries (per *thousand* tokens). These are kept
* to provide sensible fallbacks for models that do not yet expose a detailed
* breakdown or where we have no published split pricing. The figures stem
* from older OpenAI announcements and are only meant for *approximation*
* callers that rely on exact accounting should upgrade to models covered by
* {@link detailedPriceMap}.
*/
const blendedPriceMap: Record<string, number> = {
// GPT4 Turbo (Apr 2024)
{ pattern: /gpt-4-turbo/i, pricePerThousandTokens: 0.01 },
"gpt-4-turbo": 0.01,
// Legacy GPT4 8k / 32k context models
{ pattern: /gpt-4\b/i, pricePerThousandTokens: 0.03 },
"gpt-4": 0.03,
// GPT3.5Turbo family
{ pattern: /gpt-3\.5-turbo/i, pricePerThousandTokens: 0.0005 },
];
"gpt-3.5-turbo": 0.0005,
// Remaining preview variants (exact names)
"gpt-4o-search-preview": 0.0025,
"gpt-4o-mini-search-preview": 0.00015,
"gpt-4o-realtime-preview": 0.005,
"gpt-4o-audio-preview": 0.0025,
"gpt-4o-mini-audio-preview": 0.00015,
"gpt-4o-mini-realtime-preview": 0.0006,
"gpt-4o-mini": 0.00015,
// Older experimental oseries rates
"o3-mini": 0.0011,
"o1-mini": 0.0011,
"o1-pro": 0.15,
"o1": 0.015,
// Additional internal preview models
"computer-use-preview": 0.003,
};
// ────────────────────────────────────────────────────────────────────────────
// Public helpers
// ────────────────────────────────────────────────────────────────────────────
/**
* Convert the *perthousandtokens* price entry to a *pertoken* figure. If
* the model is unrecognised we return `null` so that callers can fall back.
* Return the pertoken input/cached/output rates for the supplied model, or
* `null` when no detailed pricing is available.
*/
function normalize(model: string): string {
// Lowercase and strip date/version suffixes like “20250414”.
const lower = model.toLowerCase();
const dateSuffix = /-\d{4}-\d{2}-\d{2}$/;
return lower.replace(dateSuffix, "");
}
export function priceRates(model: string): TokenRates | null {
return detailedPriceMap[normalize(model)] ?? null;
}
/**
* Fallback that returns a *single* blended pertoken rate when no detailed
* split is available. This mirrors the behaviour of the pre2025 version so
* that existing callers keep working unmodified.
*/
export function pricePerToken(model: string): number | null {
const entry = priceMap.find(({ pattern }) => pattern.test(model));
if (!entry) {
// Prefer an *average* of the detailed rates when we have them this avoids
// surprises where callers mix `pricePerToken()` with the new detailed
// helpers.
const rates = priceRates(model);
if (rates) {
return (rates.input + rates.output) / 2; // simple average heuristic
}
const entry = blendedPriceMap[normalize(model)];
if (entry == null) {
return null;
}
return entry.pricePerThousandTokens / 1000;
return entry / 1000;
}
// ────────────────────────────────────────────────────────────────────────────
// Cost estimation
// ────────────────────────────────────────────────────────────────────────────
/** Shape of the `usage` object returned by OpenAIs Responses API. */
export type UsageBreakdown = {
input_tokens?: number;
input_tokens_details?: { cached_tokens?: number } | null;
output_tokens?: number;
total_tokens?: number;
};
/**
* Calculate the exact cost (in USD) for a single usage breakdown. Returns
* `null` when the model is unknown.
*/
export function estimateCostFromUsage(
usage: UsageBreakdown,
model: string,
): number | null {
const rates = priceRates(model);
if (!rates) {
// fall back to blended pricing
const per = pricePerToken(model);
if (per == null) {
return null;
}
const tokens =
usage.total_tokens ??
(usage.input_tokens ?? 0) + (usage.output_tokens ?? 0);
return tokens * per;
}
const input = usage.input_tokens ?? 0;
const cached = usage.input_tokens_details?.cached_tokens ?? 0;
const nonCachedInput = Math.max(0, input - cached);
const output = usage.output_tokens ?? 0;
return (
nonCachedInput * rates.input +
cached * rates.cachedInput +
output * rates.output
);
}
/**
* Rough cost estimate (USD) for a series of {@link ResponseItem}s when using
* the specified model. Returns `null` when the model is unknown.
* the specified model. When no detailed usage object is available we fall
* back to estimating token counts based on the message contents.
*/
export function estimateCostUSD(
items: Array<ResponseItem>,
model: string,
): number | null {
const perToken = pricePerToken(model);
if (perToken == null) {
const per = pricePerToken(model);
if (per == null) {
return null;
}
const tokens = approximateTokensUsed(items);
return tokens * perToken;
return approximateTokensUsed(items) * per;
}

View File

@@ -1,7 +1,11 @@
import type { ResponseItem } from "openai/resources/responses/responses.mjs";
import { approximateTokensUsed } from "./approximate-tokens-used.js";
import { pricePerToken } from "./estimate-cost.js";
import {
estimateCostFromUsage,
pricePerToken,
type UsageBreakdown,
} from "./estimate-cost.js";
/**
* Simple accumulator for {@link ResponseItem}s that exposes aggregate token
@@ -10,7 +14,15 @@ import { pricePerToken } from "./estimate-cost.js";
export class SessionCostTracker {
private readonly model: string;
private readonly items: Array<ResponseItem> = [];
private tokensUsed: number | null = null;
private tokensUsedPrecise: number | null = null;
/**
* Aggregated exact cost when we have detailed `usage` information from the
* OpenAI API. Falls back to `null` when we only have the rough estimate
* path available.
*/
private costPrecise: number | null = null;
constructor(model: string) {
this.model = model;
@@ -21,23 +33,50 @@ export class SessionCostTracker {
this.items.push(...items);
}
/** Add the exact number of tokens returned by the API usage object. */
/**
* Add a full usage breakdown as returned by the Responses API. This gives
* us exact token counts and allows truetospec cost accounting that
* factors in cached tokens.
*/
addUsage(usage: UsageBreakdown): void {
const tokens =
usage.total_tokens ??
(usage.input_tokens ?? 0) + (usage.output_tokens ?? 0);
if (Number.isFinite(tokens) && tokens > 0) {
this.tokensUsedPrecise = (this.tokensUsedPrecise ?? 0) + tokens;
}
const cost = estimateCostFromUsage(usage, this.model);
if (cost != null) {
this.costPrecise = (this.costPrecise ?? 0) + cost;
}
}
/** Legacy helper for callers that only know the total token count. */
addTokens(count: number): void {
if (Number.isFinite(count) && count > 0) {
this.tokensUsed = (this.tokensUsed ?? 0) + count;
this.tokensUsedPrecise = (this.tokensUsedPrecise ?? 0) + count;
// We deliberately do *not* update costPrecise here without a detailed
// breakdown we cannot know whether tokens were input/output/cached. We
// therefore fall back to the blended rate during `getCostUSD()`.
}
}
/** Approximate total token count so far. */
getTokensUsed(): number {
if (this.tokensUsed != null) {
return this.tokensUsed;
if (this.tokensUsedPrecise != null) {
return this.tokensUsedPrecise;
}
return approximateTokensUsed(this.items);
}
/** Besteffort USD cost estimate. Returns `null` when the model is unknown. */
getCostUSD(): number | null {
if (this.costPrecise != null) {
return this.costPrecise;
}
const per = pricePerToken(this.model);
if (per == null) {
return null;