format

2026-04-24 14:45:27 +00:00 · 2025-04-20 01:28:42 -07:00
parent 0613fd35e2
commit ada5e2249a
3 changed files with 230 additions and 68 deletions
--- a/codex-cli/src/utils/agent/agent-loop.ts
+++ b/codex-cli/src/utils/agent/agent-loop.ts
@@ -1,6 +1,7 @@
 import type { ReviewDecision } from "./review.js";
 import type { ApplyPatchCommand, ApprovalPolicy } from "../../approvals.js";
 import type { AppConfig } from "../config.js";
+import type { UsageBreakdown } from "../estimate-cost.js";
 import type {
  ResponseFunctionToolCall,
  ResponseInputItem,
@@ -805,20 +806,9 @@ export class AgentLoop {
              try {
                const usage = (event as MaybeUsageEvent).response?.usage;
                if (usage && typeof usage === "object") {
-                  const u = usage as {
-                    total_tokens?: number;
-                    input_tokens?: number;
-                    output_tokens?: number;
-                  };
-                  const tokens =
-                    u.total_tokens ??
-                    (typeof u.input_tokens === "number" &&
-                    typeof u.output_tokens === "number"
-                      ? u.input_tokens + u.output_tokens
-                      : undefined);
-                  if (typeof tokens === "number" && tokens > 0) {
-                    ensureSessionTracker(this.model).addTokens(tokens);
-                  }
+                  ensureSessionTracker(this.model).addUsage(
+                    usage as unknown as UsageBreakdown,
+                  );
                }
              } catch {
                /* best‑effort only */
--- a/codex-cli/src/utils/estimate-cost.ts
+++ b/codex-cli/src/utils/estimate-cost.ts
@@ -1,81 +1,214 @@
 /* eslint-disable no-irregular-whitespace */

+/**
+ * Cost‑estimation helpers for OpenAI responses.
+ *
+ * The implementation now distinguishes between *input*, *cached input* and
+ * *output* tokens, reflecting OpenAI’s 2025‑04 pricing scheme.  For models
+ * where we only have a single blended rate we gracefully fall back to the
+ * legacy logic so existing call‑sites continue to work.
+ */
+
 import type { ResponseItem } from "openai/resources/responses/responses.mjs";

 import { approximateTokensUsed } from "./approximate-tokens-used.js";

+// ────────────────────────────────────────────────────────────────────────────
+// Pricing tables
+// ────────────────────────────────────────────────────────────────────────────
+
+/** Breakdown of per‑token prices (in USD). */
+type TokenRates = {
+  /** Price for *non‑cached* input prompt tokens. */
+  input: number;
+  /** Preferential price for *cached* input tokens. */
+  cachedInput: number;
+  /** Price for completion / output tokens. */
+  output: number;
+};
+
 /**
- * Approximate per‑token pricing (in USD) for common OpenAI models.
- *
- * The list is intentionally *non‑exhaustive*: OpenAI regularly introduces new
- * variants.  Unknown model names simply result in a `null` cost estimate so
- * that callers can gracefully fall back (e.g. by omitting cost figures from
- * user‑visible summaries).
+ * Pricing table (exact model name -> per‑token rates).
+ * All keys must be lower‑case.
 */
-const priceMap: Array<{ pattern: RegExp; pricePerThousandTokens: number }> = [
+const detailedPriceMap: Record<string, TokenRates> = {
+  // –––––––––––––– OpenAI “o‑series” experimental ––––––––––––––
+  "o3": {
+    input: 10 / 1_000_000,
+    cachedInput: 2.5 / 1_000_000,
+    output: 40 / 1_000_000,
+  },
+  "o4-mini": {
+    input: 1.1 / 1_000_000,
+    cachedInput: 0.275 / 1_000_000,
+    output: 4.4 / 1_000_000,
+  },
+
+  // –––––––––––––– GPT‑4.1 family ––––––––––––––
+  "gpt-4.1-nano": {
+    input: 0.1 / 1_000_000,
+    cachedInput: 0.025 / 1_000_000,
+    output: 0.4 / 1_000_000,
+  },
+  "gpt-4.1-mini": {
+    input: 0.4 / 1_000_000,
+    cachedInput: 0.1 / 1_000_000,
+    output: 1.6 / 1_000_000,
+  },
+  "gpt-4.1": {
+    input: 2 / 1_000_000,
+    cachedInput: 0.5 / 1_000_000,
+    output: 8 / 1_000_000,
+  },
+
  // –––––––––––––– GPT‑4o family ––––––––––––––
-  { pattern: /gpt-4o-search-preview/i, pricePerThousandTokens: 0.0025 },
-  { pattern: /gpt-4o-mini-search-preview/i, pricePerThousandTokens: 0.00015 },
-  { pattern: /gpt-4o-realtime-preview/i, pricePerThousandTokens: 0.005 },
-  { pattern: /gpt-4o-audio-preview/i, pricePerThousandTokens: 0.0025 },
-  { pattern: /gpt-4o-mini-audio-preview/i, pricePerThousandTokens: 0.00015 },
-  { pattern: /gpt-4o-mini-realtime-preview/i, pricePerThousandTokens: 0.0006 },
-  { pattern: /gpt-4o-mini/i, pricePerThousandTokens: 0.00015 },
-  { pattern: /gpt-4o/i, pricePerThousandTokens: 0.0025 },
-
-  // –––––––––––––– GPT‑4.1 / 4.5 ––––––––––––––
-  { pattern: /gpt-4\.1-nano/i, pricePerThousandTokens: 0.0001 },
-  { pattern: /gpt-4\.1-mini/i, pricePerThousandTokens: 0.0004 },
-  { pattern: /gpt-4\.1/i, pricePerThousandTokens: 0.002 },
-
-  { pattern: /gpt-4\.5-preview/i, pricePerThousandTokens: 0.075 },
-  { pattern: /gpt-4\.5/i, pricePerThousandTokens: 0.075 },
-
-  // –––––––––––––– “o‑series” experimental ––––––––––––––
-  { pattern: /o4-mini/i, pricePerThousandTokens: 0.0011 },
-  { pattern: /o3-mini/i, pricePerThousandTokens: 0.0011 },
-  { pattern: /o1-mini/i, pricePerThousandTokens: 0.0011 },
-  { pattern: /\bo3\b/i, pricePerThousandTokens: 0.015 },
-  { pattern: /o1[- ]?pro/i, pricePerThousandTokens: 0.15 },
-  { pattern: /\bo1\b/i, pricePerThousandTokens: 0.015 },
-
-  // –––––––––––––– Misc ––––––––––––––
-  { pattern: /computer-use-preview/i, pricePerThousandTokens: 0.003 },
+  "gpt-4o-mini": {
+    input: 0.6 / 1_000_000,
+    cachedInput: 0.3 / 1_000_000,
+    output: 2.4 / 1_000_000,
+  },
+  "gpt-4o": {
+    input: 5 / 1_000_000,
+    cachedInput: 2.5 / 1_000_000,
+    output: 20 / 1_000_000,
+  },
+};

+/**
+ * Legacy single‑rate pricing entries (per *thousand* tokens).  These are kept
+ * to provide sensible fall‑backs for models that do not yet expose a detailed
+ * breakdown or where we have no published split pricing.  The figures stem
+ * from older OpenAI announcements and are only meant for *approximation* –
+ * callers that rely on exact accounting should upgrade to models covered by
+ * {@link detailedPriceMap}.
+ */
+const blendedPriceMap: Record<string, number> = {
  // GPT‑4 Turbo (Apr 2024)
-  { pattern: /gpt-4-turbo/i, pricePerThousandTokens: 0.01 },
+  "gpt-4-turbo": 0.01,

  // Legacy GPT‑4 8k / 32k context models
-  { pattern: /gpt-4\b/i, pricePerThousandTokens: 0.03 },
+  "gpt-4": 0.03,

  // GPT‑3.5‑Turbo family
-  { pattern: /gpt-3\.5-turbo/i, pricePerThousandTokens: 0.0005 },
-];
+  "gpt-3.5-turbo": 0.0005,
+
+  // Remaining preview variants (exact names)
+  "gpt-4o-search-preview": 0.0025,
+  "gpt-4o-mini-search-preview": 0.00015,
+  "gpt-4o-realtime-preview": 0.005,
+  "gpt-4o-audio-preview": 0.0025,
+  "gpt-4o-mini-audio-preview": 0.00015,
+  "gpt-4o-mini-realtime-preview": 0.0006,
+  "gpt-4o-mini": 0.00015,
+
+  // Older experimental o‑series rates
+  "o3-mini": 0.0011,
+  "o1-mini": 0.0011,
+  "o1-pro": 0.15,
+  "o1": 0.015,
+
+  // Additional internal preview models
+  "computer-use-preview": 0.003,
+};
+
+// ────────────────────────────────────────────────────────────────────────────
+// Public helpers
+// ────────────────────────────────────────────────────────────────────────────

 /**
- * Convert the *per‑thousand‑tokens* price entry to a *per‑token* figure.  If
- * the model is unrecognised we return `null` so that callers can fall back.
+ * Return the per‑token input/cached/output rates for the supplied model, or
+ * `null` when no detailed pricing is available.
+ */
+function normalize(model: string): string {
+  // Lower‑case and strip date/version suffixes like “‑2025‑04‑14”.
+  const lower = model.toLowerCase();
+  const dateSuffix = /-\d{4}-\d{2}-\d{2}$/;
+  return lower.replace(dateSuffix, "");
+}
+
+export function priceRates(model: string): TokenRates | null {
+  return detailedPriceMap[normalize(model)] ?? null;
+}
+
+/**
+ * Fallback that returns a *single* blended per‑token rate when no detailed
+ * split is available.  This mirrors the behaviour of the pre‑2025 version so
+ * that existing callers keep working unmodified.
 */
 export function pricePerToken(model: string): number | null {
-  const entry = priceMap.find(({ pattern }) => pattern.test(model));
-  if (!entry) {
+  // Prefer an *average* of the detailed rates when we have them – this avoids
+  // surprises where callers mix `pricePerToken()` with the new detailed
+  // helpers.
+  const rates = priceRates(model);
+  if (rates) {
+    return (rates.input + rates.output) / 2; // simple average heuristic
+  }
+
+  const entry = blendedPriceMap[normalize(model)];
+  if (entry == null) {
    return null;
  }
-  return entry.pricePerThousandTokens / 1000;
+  return entry / 1000;
+}
+
+// ────────────────────────────────────────────────────────────────────────────
+// Cost estimation
+// ────────────────────────────────────────────────────────────────────────────
+
+/** Shape of the `usage` object returned by OpenAI’s Responses API. */
+export type UsageBreakdown = {
+  input_tokens?: number;
+  input_tokens_details?: { cached_tokens?: number } | null;
+  output_tokens?: number;
+  total_tokens?: number;
+};
+
+/**
+ * Calculate the exact cost (in USD) for a single usage breakdown.  Returns
+ * `null` when the model is unknown.
+ */
+export function estimateCostFromUsage(
+  usage: UsageBreakdown,
+  model: string,
+): number | null {
+  const rates = priceRates(model);
+  if (!rates) {
+    // fall back to blended pricing
+    const per = pricePerToken(model);
+    if (per == null) {
+      return null;
+    }
+
+    const tokens =
+      usage.total_tokens ??
+      (usage.input_tokens ?? 0) + (usage.output_tokens ?? 0);
+    return tokens * per;
+  }
+
+  const input = usage.input_tokens ?? 0;
+  const cached = usage.input_tokens_details?.cached_tokens ?? 0;
+  const nonCachedInput = Math.max(0, input - cached);
+  const output = usage.output_tokens ?? 0;
+
+  return (
+    nonCachedInput * rates.input +
+    cached * rates.cachedInput +
+    output * rates.output
+  );
 }

 /**
 * Rough cost estimate (USD) for a series of {@link ResponseItem}s when using
- * the specified model.  Returns `null` when the model is unknown.
+ * the specified model.  When no detailed usage object is available we fall
+ * back to estimating token counts based on the message contents.
 */
 export function estimateCostUSD(
  items: Array<ResponseItem>,
  model: string,
 ): number | null {
-  const perToken = pricePerToken(model);
-  if (perToken == null) {
+  const per = pricePerToken(model);
+  if (per == null) {
    return null;
  }
-  const tokens = approximateTokensUsed(items);
-  return tokens * perToken;
+  return approximateTokensUsed(items) * per;
 }
--- a/codex-cli/src/utils/session-cost.ts
+++ b/codex-cli/src/utils/session-cost.ts
@@ -1,7 +1,11 @@
 import type { ResponseItem } from "openai/resources/responses/responses.mjs";

 import { approximateTokensUsed } from "./approximate-tokens-used.js";
-import { pricePerToken } from "./estimate-cost.js";
+import {
+  estimateCostFromUsage,
+  pricePerToken,
+  type UsageBreakdown,
+} from "./estimate-cost.js";

 /**
 * Simple accumulator for {@link ResponseItem}s that exposes aggregate token
@@ -10,7 +14,15 @@ import { pricePerToken } from "./estimate-cost.js";
 export class SessionCostTracker {
  private readonly model: string;
  private readonly items: Array<ResponseItem> = [];
-  private tokensUsed: number | null = null;
+
+  private tokensUsedPrecise: number | null = null;
+
+  /**
+   * Aggregated exact cost when we have detailed `usage` information from the
+   * OpenAI API.  Falls back to `null` when we only have the rough estimate
+   * path available.
+   */
+  private costPrecise: number | null = null;

  constructor(model: string) {
    this.model = model;
@@ -21,23 +33,50 @@ export class SessionCostTracker {
    this.items.push(...items);
  }

-  /** Add the exact number of tokens returned by the API usage object. */
+  /**
+   * Add a full usage breakdown as returned by the Responses API.  This gives
+   * us exact token counts and allows true‑to‑spec cost accounting that
+   * factors in cached tokens.
+   */
+  addUsage(usage: UsageBreakdown): void {
+    const tokens =
+      usage.total_tokens ??
+      (usage.input_tokens ?? 0) + (usage.output_tokens ?? 0);
+
+    if (Number.isFinite(tokens) && tokens > 0) {
+      this.tokensUsedPrecise = (this.tokensUsedPrecise ?? 0) + tokens;
+    }
+
+    const cost = estimateCostFromUsage(usage, this.model);
+    if (cost != null) {
+      this.costPrecise = (this.costPrecise ?? 0) + cost;
+    }
+  }
+
+  /** Legacy helper for callers that only know the total token count. */
  addTokens(count: number): void {
    if (Number.isFinite(count) && count > 0) {
-      this.tokensUsed = (this.tokensUsed ?? 0) + count;
+      this.tokensUsedPrecise = (this.tokensUsedPrecise ?? 0) + count;
+      // We deliberately do *not* update costPrecise here – without a detailed
+      // breakdown we cannot know whether tokens were input/output/cached.  We
+      // therefore fall back to the blended rate during `getCostUSD()`.
    }
  }

  /** Approximate total token count so far. */
  getTokensUsed(): number {
-    if (this.tokensUsed != null) {
-      return this.tokensUsed;
+    if (this.tokensUsedPrecise != null) {
+      return this.tokensUsedPrecise;
    }
    return approximateTokensUsed(this.items);
  }

  /** Best‑effort USD cost estimate. Returns `null` when the model is unknown. */
  getCostUSD(): number | null {
+    if (this.costPrecise != null) {
+      return this.costPrecise;
+    }
+
    const per = pricePerToken(this.model);
    if (per == null) {
      return null;