initial cost tracking

Signed-off-by: Eason Goodale <easong@openai.com>
2026-04-24 14:45:27 +00:00 · 2025-04-18 03:10:54 -07:00
parent 0d6a98f9af
commit cdc0897a25
11 changed files with 421 additions and 4 deletions
--- a/codex-cli/src/components/chat/terminal-chat-input.tsx
+++ b/codex-cli/src/components/chat/terminal-chat-input.tsx
@@ -15,6 +15,7 @@ import {
  addToHistory,
 } from "../../utils/storage/command-history.js";
 import { clearTerminal, onExit } from "../../utils/terminal.js";
+import { printAndResetSessionSummary } from "../../utils/session-cost.js";
 import Spinner from "../vendor/ink-spinner.js";
 import TextInput from "../vendor/ink-text-input.js";
 import { Box, Text, useApp, useInput, useStdin } from "ink";
@@ -199,8 +200,14 @@ export default function TerminalChatInput({
        setInput("");
        setSessionId("");
        setLastResponseId("");
+
+        // Clear the terminal first so the summary is printed on a fresh
+        // screen before the new session starts.
        clearTerminal();

+        // Show the token/cost summary for the session that just ended.
+        printAndResetSessionSummary();
+
        // Emit a system message to confirm the clear action.  We *append*
        // it so Ink's <Static> treats it as new output and actually renders it.
        setItems((prev) => [
--- a/codex-cli/src/components/chat/terminal-chat-new-input.tsx
+++ b/codex-cli/src/components/chat/terminal-chat-new-input.tsx
@@ -17,6 +17,7 @@ import {
  addToHistory,
 } from "../../utils/storage/command-history.js";
 import { clearTerminal, onExit } from "../../utils/terminal.js";
+import { printAndResetSessionSummary } from "../../utils/session-cost.js";
 import Spinner from "../vendor/ink-spinner.js";
 import { Box, Text, useApp, useInput, useStdin } from "ink";
 import { fileURLToPath } from "node:url";
@@ -286,8 +287,12 @@ export default function TerminalChatInput({
        setInput("");
        setSessionId("");
        setLastResponseId("");
+
+        // Clear screen then display session summary so the user sees it.
        clearTerminal();

+        printAndResetSessionSummary();
+
        // Emit a system message to confirm the clear action.  We *append*
        // it so Ink's <Static> treats it as new output and actually renders it.
        setItems((prev) => [
--- a/codex-cli/src/components/chat/terminal-chat-utils.ts
+++ b/codex-cli/src/components/chat/terminal-chat-utils.ts
@@ -24,6 +24,25 @@ function isUserMessage(
 */
 export function maxTokensForModel(model: string): number {
  const lower = model.toLowerCase();
+  // Heuristics for common context window sizes. Keep the checks loosely
+  // ordered from *largest* to *smallest* so that more specific long‑context
+  // models are detected before their shorter generic counterparts.
+
+  // Special‑case for 1,047,576‑token demo model (gpt‑4‑long). We match either
+  // the literal number or "gpt-4.1" variants we occasionally encounter.
+  if (lower.includes("1,047,576") || /gpt-4\.1/i.test(lower)) {
+    return 1047576;
+  }
+
+  if (lower.includes("128k") || /gpt-4\.5|gpt-4o-mini|gpt-4o\b/i.test(lower)) {
+    return 128000;
+  }
+
+  // Experimental o‑series advertised at ~200k context
+  if (/\bo[134]\b|o[134]-mini|o1[- ]?pro/i.test(lower)) {
+    return 200000;
+  }
+
  if (lower.includes("32k")) {
    return 32000;
  }
@@ -46,8 +65,11 @@ export function maxTokensForModel(model: string): number {
 export function calculateContextPercentRemaining(
  items: Array<ResponseItem>,
  model: string,
+  extraContextChars = 0,
 ): number {
-  const used = approximateTokensUsed(items);
+  const tokensFromItems = approximateTokensUsed(items);
+  const extraTokens = Math.ceil(extraContextChars / 4);
+  const used = tokensFromItems + extraTokens;
  const max = maxTokensForModel(model);
  const remaining = Math.max(0, max - used);
  return (remaining / max) * 100;
--- a/codex-cli/src/components/chat/terminal-chat.tsx
+++ b/codex-cli/src/components/chat/terminal-chat.tsx
@@ -427,8 +427,14 @@ export default function TerminalChat({
  ).length;

  const contextLeftPercent = useMemo(
-    () => calculateContextPercentRemaining(items, model),
-    [items, model],
+    () =>
+      calculateContextPercentRemaining(
+        items,
+        model,
+        // static system instructions count towards the context budget too
+        config.instructions?.length ?? 0,
+      ),
+    [items, model, config.instructions],
  );

  return (
--- a/codex-cli/src/utils/agent/agent-loop.ts
+++ b/codex-cli/src/utils/agent/agent-loop.ts
@@ -11,6 +11,7 @@ import type { Reasoning } from "openai/resources.mjs";
 import { log, isLoggingEnabled } from "./log.js";
 import { OPENAI_BASE_URL, OPENAI_TIMEOUT_MS } from "../config.js";
 import { parseToolCallArguments } from "../parsers.js";
+import { ensureSessionTracker } from "../session-cost.js";
 import {
  ORIGIN,
  CLI_VERSION,
@@ -235,7 +236,18 @@ export class AgentLoop {
        instructions: instructions ?? "",
      } as AppConfig);
    this.additionalWritableRoots = additionalWritableRoots;
-    this.onItem = onItem;
+    // Capture usage for cost‑tracking before delegating to the caller‑supplied
+    // callback.  Wrapping here avoids repeating the bookkeeping logic across
+    // every UI surface.
+    this.onItem = (item: ResponseItem) => {
+      try {
+        ensureSessionTracker(this.model).addItems([item]);
+      } catch {
+        /* best‑effort – never block user‑visible updates */
+      }
+
+      onItem(item);
+    };
    this.onLoading = onLoading;
    this.getCommandConfirmation = getCommandConfirmation;
    this.onLastResponseId = onLastResponseId;
@@ -778,6 +790,27 @@ export class AgentLoop {
              }
              lastResponseId = event.response.id;
              this.onLastResponseId(event.response.id);
+
+              // Capture exact token usage for cost tracking when provided by
+              // the API. `responses.completed` events include a `usage` field
+              // with {input_tokens, output_tokens, total_tokens}. We record
+              // the total (or fallback to summing the parts if needed).
+              try {
+                const usage: unknown = (event as any).response?.usage;
+                if (usage && typeof usage === "object") {
+                  const u = usage as { total_tokens?: number; input_tokens?: number; output_tokens?: number };
+                  const tokens =
+                    u.total_tokens ??
+                    (typeof u.input_tokens === "number" && typeof u.output_tokens === "number"
+                      ? u.input_tokens + u.output_tokens
+                      : undefined);
+                  if (typeof tokens === "number" && tokens > 0) {
+                    ensureSessionTracker(this.model).addTokens(tokens);
+                  }
+                }
+              } catch {
+                /* best‑effort only */
+              }
            }
          }
        } catch (err: unknown) {
--- a/codex-cli/src/utils/estimate-cost.ts
+++ b/codex-cli/src/utils/estimate-cost.ts
@@ -0,0 +1,79 @@
+import type { ResponseItem } from "openai/resources/responses/responses.mjs";
+
+import { approximateTokensUsed } from "./approximate-tokens-used.js";
+
+/**
+ * Approximate per‑token pricing (in USD) for common OpenAI models.
+ *
+ * The list is intentionally *non‑exhaustive*: OpenAI regularly introduces new
+ * variants.  Unknown model names simply result in a `null` cost estimate so
+ * that callers can gracefully fall back (e.g. by omitting cost figures from
+ * user‑visible summaries).
+ */
+const priceMap: Array<{ pattern: RegExp; pricePerThousandTokens: number }> = [
+  // –––––––––––––– GPT‑4o family ––––––––––––––
+  { pattern: /gpt-4o-search-preview/i, pricePerThousandTokens: 0.0025 },
+  { pattern: /gpt-4o-mini-search-preview/i, pricePerThousandTokens: 0.00015 },
+  { pattern: /gpt-4o-realtime-preview/i, pricePerThousandTokens: 0.005 },
+  { pattern: /gpt-4o-audio-preview/i, pricePerThousandTokens: 0.0025 },
+  { pattern: /gpt-4o-mini-audio-preview/i, pricePerThousandTokens: 0.00015 },
+  { pattern: /gpt-4o-mini-realtime-preview/i, pricePerThousandTokens: 0.0006 },
+  { pattern: /gpt-4o-mini/i, pricePerThousandTokens: 0.00015 },
+  { pattern: /gpt-4o/i, pricePerThousandTokens: 0.0025 },
+
+  // –––––––––––––– GPT‑4.1 / 4.5 ––––––––––––––
+  { pattern: /gpt-4\.1-nano/i, pricePerThousandTokens: 0.0001 },
+  { pattern: /gpt-4\.1-mini/i, pricePerThousandTokens: 0.0004 },
+  { pattern: /gpt-4\.1/i, pricePerThousandTokens: 0.002 },
+
+  { pattern: /gpt-4\.5-preview/i, pricePerThousandTokens: 0.075 },
+  { pattern: /gpt-4\.5/i, pricePerThousandTokens: 0.075 },
+
+  // –––––––––––––– “o‑series” experimental ––––––––––––––
+  { pattern: /o4-mini/i, pricePerThousandTokens: 0.0011 },
+  { pattern: /o3-mini/i, pricePerThousandTokens: 0.0011 },
+  { pattern: /o1-mini/i, pricePerThousandTokens: 0.0011 },
+  { pattern: /\bo3\b/i, pricePerThousandTokens: 0.015 },
+  { pattern: /o1[- ]?pro/i, pricePerThousandTokens: 0.15 },
+  { pattern: /\bo1\b/i, pricePerThousandTokens: 0.015 },
+
+  // –––––––––––––– Misc ––––––––––––––
+  { pattern: /computer-use-preview/i, pricePerThousandTokens: 0.003 },
+
+  // GPT‑4 Turbo (Apr 2024)
+  { pattern: /gpt-4-turbo/i, pricePerThousandTokens: 0.01 },
+
+  // Legacy GPT‑4 8k / 32k context models
+  { pattern: /gpt-4\b/i, pricePerThousandTokens: 0.03 },
+
+  // GPT‑3.5‑Turbo family
+  { pattern: /gpt-3\.5-turbo/i, pricePerThousandTokens: 0.0005 },
+];
+
+/**
+ * Convert the *per‑thousand‑tokens* price entry to a *per‑token* figure.  If
+ * the model is unrecognised we return `null` so that callers can fall back.
+ */
+export function pricePerToken(model: string): number | null {
+  const entry = priceMap.find(({ pattern }) => pattern.test(model));
+  if (!entry) {
+    return null;
+  }
+  return entry.pricePerThousandTokens / 1000;
+}
+
+/**
+ * Rough cost estimate (USD) for a series of {@link ResponseItem}s when using
+ * the specified model.  Returns `null` when the model is unknown.
+ */
+export function estimateCostUSD(
+  items: Array<ResponseItem>,
+  model: string,
+): number | null {
+  const perToken = pricePerToken(model);
+  if (perToken == null) {
+    return null;
+  }
+  const tokens = approximateTokensUsed(items);
+  return tokens * perToken;
+}
--- a/codex-cli/src/utils/session-cost.ts
+++ b/codex-cli/src/utils/session-cost.ts
@@ -0,0 +1,97 @@
+import type { ResponseItem } from "openai/resources/responses/responses.mjs";
+
+import { approximateTokensUsed } from "./approximate-tokens-used.js";
+import { pricePerToken } from "./estimate-cost.js";
+
+/**
+ * Simple accumulator for {@link ResponseItem}s that exposes aggregate token
+ * and (approximate) dollar‑cost statistics for the current conversation.
+ */
+export class SessionCostTracker {
+  private readonly model: string;
+  private readonly items: Array<ResponseItem> = [];
+  private tokensUsed: number | null = null;
+
+  constructor(model: string) {
+    this.model = model;
+  }
+
+  /** Append newly‑received items to the internal history. */
+  addItems(items: Array<ResponseItem>): void {
+    this.items.push(...items);
+  }
+
+  /** Add the exact number of tokens returned by the API usage object. */
+  addTokens(count: number): void {
+    if (Number.isFinite(count) && count > 0) {
+      this.tokensUsed = (this.tokensUsed ?? 0) + count;
+    }
+  }
+
+  /** Approximate total token count so far. */
+  getTokensUsed(): number {
+    if (this.tokensUsed != null) {
+      return this.tokensUsed;
+    }
+    return approximateTokensUsed(this.items);
+  }
+
+  /** Best‑effort USD cost estimate. Returns `null` when the model is unknown. */
+  getCostUSD(): number | null {
+    const per = pricePerToken(this.model);
+    if (per == null) {
+      return null;
+    }
+    return this.getTokensUsed() * per;
+  }
+
+  /**
+   * Human‑readable one‑liner suitable for printing at session end (e.g. on
+   * Ctrl‑C or `/clear`).
+   */
+  summary(): string {
+    const tokens = this.getTokensUsed();
+    const cost = this.getCostUSD();
+    if (cost == null) {
+      return `Session complete – approx. ${tokens} tokens used.`;
+    }
+    return `Session complete – approx. ${tokens} tokens, $${cost.toFixed(4)} USD.`;
+  }
+}
+
+// ────────────────────────────────────────────────────────────────────────────
+// Global helpers so disparate parts of the codebase can share a single
+// tracker instance without threading it through countless function calls.
+// ────────────────────────────────────────────────────────────────────────────
+
+let globalTracker: SessionCostTracker | null = null;
+
+export function getSessionTracker(): SessionCostTracker | null {
+  return globalTracker;
+}
+
+export function ensureSessionTracker(model: string): SessionCostTracker {
+  if (!globalTracker) {
+    globalTracker = new SessionCostTracker(model);
+  }
+  return globalTracker;
+}
+
+export function resetSessionTracker(): void {
+  globalTracker = null;
+}
+
+/**
+ * Convenience helper that prints the session summary (if any) and resets the
+ * global tracker so that the next conversation starts with a clean slate.
+ */
+export function printAndResetSessionSummary(): void {
+  if (!globalTracker) {
+    return; // nothing to do
+  }
+
+  // eslint-disable-next-line no-console -- explicit, user‑visible log
+  console.log("\n" + globalTracker.summary() + "\n");
+
+  resetSessionTracker();
+}
--- a/codex-cli/src/utils/terminal.ts
+++ b/codex-cli/src/utils/terminal.ts
@@ -1,6 +1,9 @@
 import type { Instance } from "ink";
 import type React from "react";

+// Cost‑tracking
+import { printAndResetSessionSummary } from "./session-cost.js";
+
 let inkRenderer: Instance | null = null;

 // Track whether the clean‑up routine has already executed so repeat calls are
@@ -79,4 +82,12 @@ export function onExit(): void {
      /* best‑effort – continue even if Ink throws */
    }
  }
+
+  // Finally, print a brief token/cost summary for the session – best effort
+  // only, errors are swallowed so that shutdown always succeeds.
+  try {
+    printAndResetSessionSummary();
+  } catch {
+    /* ignore */
+  }
 }
--- a/codex-cli/tests/context-percent.test.ts
+++ b/codex-cli/tests/context-percent.test.ts
@@ -0,0 +1,28 @@
+import { describe, expect, it } from "vitest";
+
+import type { ResponseItem } from "openai/resources/responses/responses.mjs";
+
+import { calculateContextPercentRemaining } from "../src/components/chat/terminal-chat-utils.js";
+
+function makeUserMessage(id: string, text: string): ResponseItem {
+  return {
+    id,
+    type: "message",
+    role: "user",
+    content: [{ type: "input_text", text }],
+  } as ResponseItem;
+}
+
+describe("calculateContextPercentRemaining", () => {
+  it("includes extra context characters in calculation", () => {
+    const msgText = "a".repeat(40); // 40 chars → 10 tokens
+    const items = [makeUserMessage("1", msgText)];
+
+    const model = "gpt-4-16k";
+
+    const base = calculateContextPercentRemaining(items, model);
+    const withExtra = calculateContextPercentRemaining(items, model, 8); // +8 chars → +2 tokens
+
+    expect(withExtra).toBeLessThan(base);
+  });
+});
--- a/codex-cli/tests/estimate-cost.test.ts
+++ b/codex-cli/tests/estimate-cost.test.ts
@@ -0,0 +1,50 @@
+import { describe, expect, test } from "vitest";
+
+import { estimateCostUSD } from "../src/utils/estimate-cost.js";
+import { SessionCostTracker } from "../src/utils/session-cost.js";
+import type { ResponseItem } from "openai/resources/responses/responses.mjs";
+
+// Helper to craft a minimal ResponseItem for tests
+function makeMessage(
+  id: string,
+  role: "user" | "assistant",
+  text: string,
+): ResponseItem {
+  return {
+    id,
+    type: "message",
+    role,
+    content: [{ type: role === "user" ? "input_text" : "output_text", text }],
+  } as ResponseItem;
+}
+
+describe("estimateCostUSD", () => {
+  test("returns a proportional, positive estimate for known models", () => {
+    const items: Array<ResponseItem> = [
+      makeMessage("1", "user", "hello world"),
+      makeMessage("2", "assistant", "hi there"),
+    ];
+
+    const cost = estimateCostUSD(items, "gpt-3.5-turbo");
+    expect(cost).not.toBeNull();
+    expect(cost!).toBeGreaterThan(0);
+
+    // Adding another token should increase the estimate
+    const cost2 = estimateCostUSD(
+      items.concat([makeMessage("3", "user", "extra")]),
+      "gpt-3.5-turbo",
+    );
+    expect(cost2!).toBeGreaterThan(cost!);
+  });
+});
+
+describe("SessionCostTracker", () => {
+  test("accumulates items and reports tokens & cost", () => {
+    const tracker = new SessionCostTracker("gpt-3.5-turbo");
+    tracker.addItems([makeMessage("1", "user", "foo")]);
+    tracker.addItems([makeMessage("2", "assistant", "bar baz")]);
+
+    expect(tracker.getTokensUsed()).toBeGreaterThan(0);
+    expect(tracker.getCostUSD()!).toBeGreaterThan(0);
+  });
+});
--- a/codex-cli/tests/session-cost.test.ts
+++ b/codex-cli/tests/session-cost.test.ts
@@ -0,0 +1,79 @@
+import { afterEach, describe, expect, it, vi } from "vitest";
+
+import type { ResponseItem } from "openai/resources/responses/responses.mjs";
+
+import {
+  ensureSessionTracker,
+  getSessionTracker,
+  printAndResetSessionSummary,
+} from "../src/utils/session-cost.js";
+
+function makeMessage(id: string, role: "user" | "assistant", text: string): ResponseItem {
+  return {
+    id,
+    type: "message",
+    role,
+    content: [{ type: role === "user" ? "input_text" : "output_text", text }],
+  } as ResponseItem;
+}
+
+describe("printAndResetSessionSummary", () => {
+  afterEach(() => {
+    vi.restoreAllMocks();
+  });
+
+  it("/clear resets tracker so successive conversations start fresh", () => {
+    const spy = vi.spyOn(console, "log").mockImplementation(() => {});
+
+    const perSessionTokens: Array<number> = [];
+
+    for (let i = 1; i <= 3; i++) {
+      const tracker = ensureSessionTracker("gpt-3.5-turbo");
+      tracker.addTokens(i * 10); // 10, 20, 30
+      perSessionTokens.push(tracker.getTokensUsed());
+
+      // Simulate user typing /clear which prints & resets
+      printAndResetSessionSummary();
+
+      expect(getSessionTracker()).toBeNull();
+    }
+
+    expect(perSessionTokens).toEqual([10, 20, 30]);
+
+    spy.mockRestore();
+  });
+
+  it("prints a summary and resets the global tracker", () => {
+    const spy = vi.spyOn(console, "log").mockImplementation(() => {});
+
+    const tracker = ensureSessionTracker("gpt-3.5-turbo");
+    tracker.addItems([
+      makeMessage("1", "user", "hello"),
+      makeMessage("2", "assistant", "hi"),
+    ]);
+
+    printAndResetSessionSummary();
+
+    expect(spy).toHaveBeenCalled();
+    expect(getSessionTracker()).toBeNull();
+  });
+
+  it("prefers exact token counts added via addTokens() over heuristic", () => {
+    const tracker = ensureSessionTracker("gpt-3.5-turbo");
+
+    // Add a long message (heuristic would count >1 token)
+    tracker.addItems([
+      makeMessage("x", "user", "a".repeat(400)), // ~100 tokens
+    ]);
+
+    const heuristicTokens = tracker.getTokensUsed();
+    expect(heuristicTokens).toBeGreaterThan(50);
+
+    // Now inject an exact low token count and ensure it overrides
+    tracker.addTokens(10);
+    expect(tracker.getTokensUsed()).toBe(heuristicTokens + (10 - heuristicTokens));
+
+    const cost = tracker.getCostUSD();
+    expect(cost).not.toBeNull();
+  });
+});