mirror of
https://github.com/openai/codex.git
synced 2026-04-24 14:45:27 +00:00
format
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
import type { ReviewDecision } from "./review.js";
|
||||
import type { ApplyPatchCommand, ApprovalPolicy } from "../../approvals.js";
|
||||
import type { AppConfig } from "../config.js";
|
||||
import type { UsageBreakdown } from "../estimate-cost.js";
|
||||
import type {
|
||||
ResponseFunctionToolCall,
|
||||
ResponseInputItem,
|
||||
@@ -805,20 +806,9 @@ export class AgentLoop {
|
||||
try {
|
||||
const usage = (event as MaybeUsageEvent).response?.usage;
|
||||
if (usage && typeof usage === "object") {
|
||||
const u = usage as {
|
||||
total_tokens?: number;
|
||||
input_tokens?: number;
|
||||
output_tokens?: number;
|
||||
};
|
||||
const tokens =
|
||||
u.total_tokens ??
|
||||
(typeof u.input_tokens === "number" &&
|
||||
typeof u.output_tokens === "number"
|
||||
? u.input_tokens + u.output_tokens
|
||||
: undefined);
|
||||
if (typeof tokens === "number" && tokens > 0) {
|
||||
ensureSessionTracker(this.model).addTokens(tokens);
|
||||
}
|
||||
ensureSessionTracker(this.model).addUsage(
|
||||
usage as unknown as UsageBreakdown,
|
||||
);
|
||||
}
|
||||
} catch {
|
||||
/* best‑effort only */
|
||||
|
||||
@@ -1,81 +1,214 @@
|
||||
/* eslint-disable no-irregular-whitespace */
|
||||
|
||||
/**
|
||||
* Cost‑estimation helpers for OpenAI responses.
|
||||
*
|
||||
* The implementation now distinguishes between *input*, *cached input* and
|
||||
* *output* tokens, reflecting OpenAI’s 2025‑04 pricing scheme. For models
|
||||
* where we only have a single blended rate we gracefully fall back to the
|
||||
* legacy logic so existing call‑sites continue to work.
|
||||
*/
|
||||
|
||||
import type { ResponseItem } from "openai/resources/responses/responses.mjs";
|
||||
|
||||
import { approximateTokensUsed } from "./approximate-tokens-used.js";
|
||||
|
||||
// ────────────────────────────────────────────────────────────────────────────
|
||||
// Pricing tables
|
||||
// ────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/** Breakdown of per‑token prices (in USD). */
|
||||
type TokenRates = {
|
||||
/** Price for *non‑cached* input prompt tokens. */
|
||||
input: number;
|
||||
/** Preferential price for *cached* input tokens. */
|
||||
cachedInput: number;
|
||||
/** Price for completion / output tokens. */
|
||||
output: number;
|
||||
};
|
||||
|
||||
/**
|
||||
* Approximate per‑token pricing (in USD) for common OpenAI models.
|
||||
*
|
||||
* The list is intentionally *non‑exhaustive*: OpenAI regularly introduces new
|
||||
* variants. Unknown model names simply result in a `null` cost estimate so
|
||||
* that callers can gracefully fall back (e.g. by omitting cost figures from
|
||||
* user‑visible summaries).
|
||||
* Pricing table (exact model name -> per‑token rates).
|
||||
* All keys must be lower‑case.
|
||||
*/
|
||||
const priceMap: Array<{ pattern: RegExp; pricePerThousandTokens: number }> = [
|
||||
const detailedPriceMap: Record<string, TokenRates> = {
|
||||
// –––––––––––––– OpenAI “o‑series” experimental ––––––––––––––
|
||||
"o3": {
|
||||
input: 10 / 1_000_000,
|
||||
cachedInput: 2.5 / 1_000_000,
|
||||
output: 40 / 1_000_000,
|
||||
},
|
||||
"o4-mini": {
|
||||
input: 1.1 / 1_000_000,
|
||||
cachedInput: 0.275 / 1_000_000,
|
||||
output: 4.4 / 1_000_000,
|
||||
},
|
||||
|
||||
// –––––––––––––– GPT‑4.1 family ––––––––––––––
|
||||
"gpt-4.1-nano": {
|
||||
input: 0.1 / 1_000_000,
|
||||
cachedInput: 0.025 / 1_000_000,
|
||||
output: 0.4 / 1_000_000,
|
||||
},
|
||||
"gpt-4.1-mini": {
|
||||
input: 0.4 / 1_000_000,
|
||||
cachedInput: 0.1 / 1_000_000,
|
||||
output: 1.6 / 1_000_000,
|
||||
},
|
||||
"gpt-4.1": {
|
||||
input: 2 / 1_000_000,
|
||||
cachedInput: 0.5 / 1_000_000,
|
||||
output: 8 / 1_000_000,
|
||||
},
|
||||
|
||||
// –––––––––––––– GPT‑4o family ––––––––––––––
|
||||
{ pattern: /gpt-4o-search-preview/i, pricePerThousandTokens: 0.0025 },
|
||||
{ pattern: /gpt-4o-mini-search-preview/i, pricePerThousandTokens: 0.00015 },
|
||||
{ pattern: /gpt-4o-realtime-preview/i, pricePerThousandTokens: 0.005 },
|
||||
{ pattern: /gpt-4o-audio-preview/i, pricePerThousandTokens: 0.0025 },
|
||||
{ pattern: /gpt-4o-mini-audio-preview/i, pricePerThousandTokens: 0.00015 },
|
||||
{ pattern: /gpt-4o-mini-realtime-preview/i, pricePerThousandTokens: 0.0006 },
|
||||
{ pattern: /gpt-4o-mini/i, pricePerThousandTokens: 0.00015 },
|
||||
{ pattern: /gpt-4o/i, pricePerThousandTokens: 0.0025 },
|
||||
|
||||
// –––––––––––––– GPT‑4.1 / 4.5 ––––––––––––––
|
||||
{ pattern: /gpt-4\.1-nano/i, pricePerThousandTokens: 0.0001 },
|
||||
{ pattern: /gpt-4\.1-mini/i, pricePerThousandTokens: 0.0004 },
|
||||
{ pattern: /gpt-4\.1/i, pricePerThousandTokens: 0.002 },
|
||||
|
||||
{ pattern: /gpt-4\.5-preview/i, pricePerThousandTokens: 0.075 },
|
||||
{ pattern: /gpt-4\.5/i, pricePerThousandTokens: 0.075 },
|
||||
|
||||
// –––––––––––––– “o‑series” experimental ––––––––––––––
|
||||
{ pattern: /o4-mini/i, pricePerThousandTokens: 0.0011 },
|
||||
{ pattern: /o3-mini/i, pricePerThousandTokens: 0.0011 },
|
||||
{ pattern: /o1-mini/i, pricePerThousandTokens: 0.0011 },
|
||||
{ pattern: /\bo3\b/i, pricePerThousandTokens: 0.015 },
|
||||
{ pattern: /o1[- ]?pro/i, pricePerThousandTokens: 0.15 },
|
||||
{ pattern: /\bo1\b/i, pricePerThousandTokens: 0.015 },
|
||||
|
||||
// –––––––––––––– Misc ––––––––––––––
|
||||
{ pattern: /computer-use-preview/i, pricePerThousandTokens: 0.003 },
|
||||
"gpt-4o-mini": {
|
||||
input: 0.6 / 1_000_000,
|
||||
cachedInput: 0.3 / 1_000_000,
|
||||
output: 2.4 / 1_000_000,
|
||||
},
|
||||
"gpt-4o": {
|
||||
input: 5 / 1_000_000,
|
||||
cachedInput: 2.5 / 1_000_000,
|
||||
output: 20 / 1_000_000,
|
||||
},
|
||||
};
|
||||
|
||||
/**
|
||||
* Legacy single‑rate pricing entries (per *thousand* tokens). These are kept
|
||||
* to provide sensible fall‑backs for models that do not yet expose a detailed
|
||||
* breakdown or where we have no published split pricing. The figures stem
|
||||
* from older OpenAI announcements and are only meant for *approximation* –
|
||||
* callers that rely on exact accounting should upgrade to models covered by
|
||||
* {@link detailedPriceMap}.
|
||||
*/
|
||||
const blendedPriceMap: Record<string, number> = {
|
||||
// GPT‑4 Turbo (Apr 2024)
|
||||
{ pattern: /gpt-4-turbo/i, pricePerThousandTokens: 0.01 },
|
||||
"gpt-4-turbo": 0.01,
|
||||
|
||||
// Legacy GPT‑4 8k / 32k context models
|
||||
{ pattern: /gpt-4\b/i, pricePerThousandTokens: 0.03 },
|
||||
"gpt-4": 0.03,
|
||||
|
||||
// GPT‑3.5‑Turbo family
|
||||
{ pattern: /gpt-3\.5-turbo/i, pricePerThousandTokens: 0.0005 },
|
||||
];
|
||||
"gpt-3.5-turbo": 0.0005,
|
||||
|
||||
// Remaining preview variants (exact names)
|
||||
"gpt-4o-search-preview": 0.0025,
|
||||
"gpt-4o-mini-search-preview": 0.00015,
|
||||
"gpt-4o-realtime-preview": 0.005,
|
||||
"gpt-4o-audio-preview": 0.0025,
|
||||
"gpt-4o-mini-audio-preview": 0.00015,
|
||||
"gpt-4o-mini-realtime-preview": 0.0006,
|
||||
"gpt-4o-mini": 0.00015,
|
||||
|
||||
// Older experimental o‑series rates
|
||||
"o3-mini": 0.0011,
|
||||
"o1-mini": 0.0011,
|
||||
"o1-pro": 0.15,
|
||||
"o1": 0.015,
|
||||
|
||||
// Additional internal preview models
|
||||
"computer-use-preview": 0.003,
|
||||
};
|
||||
|
||||
// ────────────────────────────────────────────────────────────────────────────
|
||||
// Public helpers
|
||||
// ────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Convert the *per‑thousand‑tokens* price entry to a *per‑token* figure. If
|
||||
* the model is unrecognised we return `null` so that callers can fall back.
|
||||
* Return the per‑token input/cached/output rates for the supplied model, or
|
||||
* `null` when no detailed pricing is available.
|
||||
*/
|
||||
function normalize(model: string): string {
|
||||
// Lower‑case and strip date/version suffixes like “‑2025‑04‑14”.
|
||||
const lower = model.toLowerCase();
|
||||
const dateSuffix = /-\d{4}-\d{2}-\d{2}$/;
|
||||
return lower.replace(dateSuffix, "");
|
||||
}
|
||||
|
||||
export function priceRates(model: string): TokenRates | null {
|
||||
return detailedPriceMap[normalize(model)] ?? null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Fallback that returns a *single* blended per‑token rate when no detailed
|
||||
* split is available. This mirrors the behaviour of the pre‑2025 version so
|
||||
* that existing callers keep working unmodified.
|
||||
*/
|
||||
export function pricePerToken(model: string): number | null {
|
||||
const entry = priceMap.find(({ pattern }) => pattern.test(model));
|
||||
if (!entry) {
|
||||
// Prefer an *average* of the detailed rates when we have them – this avoids
|
||||
// surprises where callers mix `pricePerToken()` with the new detailed
|
||||
// helpers.
|
||||
const rates = priceRates(model);
|
||||
if (rates) {
|
||||
return (rates.input + rates.output) / 2; // simple average heuristic
|
||||
}
|
||||
|
||||
const entry = blendedPriceMap[normalize(model)];
|
||||
if (entry == null) {
|
||||
return null;
|
||||
}
|
||||
return entry.pricePerThousandTokens / 1000;
|
||||
return entry / 1000;
|
||||
}
|
||||
|
||||
// ────────────────────────────────────────────────────────────────────────────
|
||||
// Cost estimation
|
||||
// ────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/** Shape of the `usage` object returned by OpenAI’s Responses API. */
|
||||
export type UsageBreakdown = {
|
||||
input_tokens?: number;
|
||||
input_tokens_details?: { cached_tokens?: number } | null;
|
||||
output_tokens?: number;
|
||||
total_tokens?: number;
|
||||
};
|
||||
|
||||
/**
|
||||
* Calculate the exact cost (in USD) for a single usage breakdown. Returns
|
||||
* `null` when the model is unknown.
|
||||
*/
|
||||
export function estimateCostFromUsage(
|
||||
usage: UsageBreakdown,
|
||||
model: string,
|
||||
): number | null {
|
||||
const rates = priceRates(model);
|
||||
if (!rates) {
|
||||
// fall back to blended pricing
|
||||
const per = pricePerToken(model);
|
||||
if (per == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const tokens =
|
||||
usage.total_tokens ??
|
||||
(usage.input_tokens ?? 0) + (usage.output_tokens ?? 0);
|
||||
return tokens * per;
|
||||
}
|
||||
|
||||
const input = usage.input_tokens ?? 0;
|
||||
const cached = usage.input_tokens_details?.cached_tokens ?? 0;
|
||||
const nonCachedInput = Math.max(0, input - cached);
|
||||
const output = usage.output_tokens ?? 0;
|
||||
|
||||
return (
|
||||
nonCachedInput * rates.input +
|
||||
cached * rates.cachedInput +
|
||||
output * rates.output
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Rough cost estimate (USD) for a series of {@link ResponseItem}s when using
|
||||
* the specified model. Returns `null` when the model is unknown.
|
||||
* the specified model. When no detailed usage object is available we fall
|
||||
* back to estimating token counts based on the message contents.
|
||||
*/
|
||||
export function estimateCostUSD(
|
||||
items: Array<ResponseItem>,
|
||||
model: string,
|
||||
): number | null {
|
||||
const perToken = pricePerToken(model);
|
||||
if (perToken == null) {
|
||||
const per = pricePerToken(model);
|
||||
if (per == null) {
|
||||
return null;
|
||||
}
|
||||
const tokens = approximateTokensUsed(items);
|
||||
return tokens * perToken;
|
||||
return approximateTokensUsed(items) * per;
|
||||
}
|
||||
|
||||
@@ -1,7 +1,11 @@
|
||||
import type { ResponseItem } from "openai/resources/responses/responses.mjs";
|
||||
|
||||
import { approximateTokensUsed } from "./approximate-tokens-used.js";
|
||||
import { pricePerToken } from "./estimate-cost.js";
|
||||
import {
|
||||
estimateCostFromUsage,
|
||||
pricePerToken,
|
||||
type UsageBreakdown,
|
||||
} from "./estimate-cost.js";
|
||||
|
||||
/**
|
||||
* Simple accumulator for {@link ResponseItem}s that exposes aggregate token
|
||||
@@ -10,7 +14,15 @@ import { pricePerToken } from "./estimate-cost.js";
|
||||
export class SessionCostTracker {
|
||||
private readonly model: string;
|
||||
private readonly items: Array<ResponseItem> = [];
|
||||
private tokensUsed: number | null = null;
|
||||
|
||||
private tokensUsedPrecise: number | null = null;
|
||||
|
||||
/**
|
||||
* Aggregated exact cost when we have detailed `usage` information from the
|
||||
* OpenAI API. Falls back to `null` when we only have the rough estimate
|
||||
* path available.
|
||||
*/
|
||||
private costPrecise: number | null = null;
|
||||
|
||||
constructor(model: string) {
|
||||
this.model = model;
|
||||
@@ -21,23 +33,50 @@ export class SessionCostTracker {
|
||||
this.items.push(...items);
|
||||
}
|
||||
|
||||
/** Add the exact number of tokens returned by the API usage object. */
|
||||
/**
|
||||
* Add a full usage breakdown as returned by the Responses API. This gives
|
||||
* us exact token counts and allows true‑to‑spec cost accounting that
|
||||
* factors in cached tokens.
|
||||
*/
|
||||
addUsage(usage: UsageBreakdown): void {
|
||||
const tokens =
|
||||
usage.total_tokens ??
|
||||
(usage.input_tokens ?? 0) + (usage.output_tokens ?? 0);
|
||||
|
||||
if (Number.isFinite(tokens) && tokens > 0) {
|
||||
this.tokensUsedPrecise = (this.tokensUsedPrecise ?? 0) + tokens;
|
||||
}
|
||||
|
||||
const cost = estimateCostFromUsage(usage, this.model);
|
||||
if (cost != null) {
|
||||
this.costPrecise = (this.costPrecise ?? 0) + cost;
|
||||
}
|
||||
}
|
||||
|
||||
/** Legacy helper for callers that only know the total token count. */
|
||||
addTokens(count: number): void {
|
||||
if (Number.isFinite(count) && count > 0) {
|
||||
this.tokensUsed = (this.tokensUsed ?? 0) + count;
|
||||
this.tokensUsedPrecise = (this.tokensUsedPrecise ?? 0) + count;
|
||||
// We deliberately do *not* update costPrecise here – without a detailed
|
||||
// breakdown we cannot know whether tokens were input/output/cached. We
|
||||
// therefore fall back to the blended rate during `getCostUSD()`.
|
||||
}
|
||||
}
|
||||
|
||||
/** Approximate total token count so far. */
|
||||
getTokensUsed(): number {
|
||||
if (this.tokensUsed != null) {
|
||||
return this.tokensUsed;
|
||||
if (this.tokensUsedPrecise != null) {
|
||||
return this.tokensUsedPrecise;
|
||||
}
|
||||
return approximateTokensUsed(this.items);
|
||||
}
|
||||
|
||||
/** Best‑effort USD cost estimate. Returns `null` when the model is unknown. */
|
||||
getCostUSD(): number | null {
|
||||
if (this.costPrecise != null) {
|
||||
return this.costPrecise;
|
||||
}
|
||||
|
||||
const per = pricePerToken(this.model);
|
||||
if (per == null) {
|
||||
return null;
|
||||
|
||||
Reference in New Issue
Block a user