test cache discount

whitespace
format
2026-02-02 06:57:03 +00:00 · 2025-04-26 12:15:34 -07:00 · 2025-04-20 04:04:12 -07:00 · 2025-04-20 01:28:42 -07:00 · 2025-04-18 03:20:10 -07:00 · 2025-04-18 03:10:54 -07:00
11 changed files with 622 additions and 4 deletions
--- a/codex-cli/src/components/chat/terminal-chat-input.tsx
+++ b/codex-cli/src/components/chat/terminal-chat-input.tsx
@@ -9,6 +9,7 @@ import { TerminalChatCommandReview } from "./terminal-chat-command-review.js";
 import { log, isLoggingEnabled } from "../../utils/agent/log.js";
 import { loadConfig } from "../../utils/config.js";
 import { createInputItem } from "../../utils/input-utils.js";
+import { printAndResetSessionSummary } from "../../utils/session-cost.js";
 import { setSessionId } from "../../utils/session.js";
 import {
  loadCommandHistory,
@@ -199,8 +200,14 @@ export default function TerminalChatInput({
        setInput("");
        setSessionId("");
        setLastResponseId("");
+
+        // Clear the terminal first so the summary is printed on a fresh
+        // screen before the new session starts.
        clearTerminal();

+        // Show the token/cost summary for the session that just ended.
+        printAndResetSessionSummary();
+
        // Emit a system message to confirm the clear action.  We *append*
        // it so Ink's <Static> treats it as new output and actually renders it.
        setItems((prev) => [
--- a/codex-cli/src/components/chat/terminal-chat-new-input.tsx
+++ b/codex-cli/src/components/chat/terminal-chat-new-input.tsx
@@ -11,6 +11,7 @@ import { TerminalChatCommandReview } from "./terminal-chat-command-review.js";
 import { log, isLoggingEnabled } from "../../utils/agent/log.js";
 import { loadConfig } from "../../utils/config.js";
 import { createInputItem } from "../../utils/input-utils.js";
+import { printAndResetSessionSummary } from "../../utils/session-cost.js";
 import { setSessionId } from "../../utils/session.js";
 import {
  loadCommandHistory,
@@ -286,8 +287,12 @@ export default function TerminalChatInput({
        setInput("");
        setSessionId("");
        setLastResponseId("");
+
+        // Clear screen then display session summary so the user sees it.
        clearTerminal();

+        printAndResetSessionSummary();
+
        // Emit a system message to confirm the clear action.  We *append*
        // it so Ink's <Static> treats it as new output and actually renders it.
        setItems((prev) => [
--- a/codex-cli/src/components/chat/terminal-chat-utils.ts
+++ b/codex-cli/src/components/chat/terminal-chat-utils.ts
@@ -24,6 +24,25 @@ function isUserMessage(
 */
 export function maxTokensForModel(model: string): number {
  const lower = model.toLowerCase();
+  // Heuristics for common context window sizes. Keep the checks loosely
+  // ordered from *largest* to *smallest* so that more specific long‑context
+  // models are detected before their shorter generic counterparts.
+
+  // Special‑case for 1,047,576‑token demo model (gpt‑4‑long). We match either
+  // the literal number or "gpt-4.1" variants we occasionally encounter.
+  if (lower.includes("1,047,576") || /gpt-4\.1/i.test(lower)) {
+    return 1047576;
+  }
+
+  if (lower.includes("128k") || /gpt-4\.5|gpt-4o-mini|gpt-4o\b/i.test(lower)) {
+    return 128000;
+  }
+
+  // Experimental o‑series advertised at ~200k context
+  if (/\bo[134]\b|o[134]-mini|o1[- ]?pro/i.test(lower)) {
+    return 200000;
+  }
+
  if (lower.includes("32k")) {
    return 32000;
  }
@@ -46,8 +65,11 @@ export function maxTokensForModel(model: string): number {
 export function calculateContextPercentRemaining(
  items: Array<ResponseItem>,
  model: string,
+  extraContextChars = 0,
 ): number {
-  const used = approximateTokensUsed(items);
+  const tokensFromItems = approximateTokensUsed(items);
+  const extraTokens = Math.ceil(extraContextChars / 4);
+  const used = tokensFromItems + extraTokens;
  const max = maxTokensForModel(model);
  const remaining = Math.max(0, max - used);
  return (remaining / max) * 100;
--- a/codex-cli/src/components/chat/terminal-chat.tsx
+++ b/codex-cli/src/components/chat/terminal-chat.tsx
@@ -427,8 +427,14 @@ export default function TerminalChat({
  ).length;

  const contextLeftPercent = useMemo(
-    () => calculateContextPercentRemaining(items, model),
-    [items, model],
+    () =>
+      calculateContextPercentRemaining(
+        items,
+        model,
+        // static system instructions count towards the context budget too
+        config.instructions?.length ?? 0,
+      ),
+    [items, model, config.instructions],
  );

  return (
--- a/codex-cli/src/utils/agent/agent-loop.ts
+++ b/codex-cli/src/utils/agent/agent-loop.ts
@@ -1,6 +1,7 @@
 import type { ReviewDecision } from "./review.js";
 import type { ApplyPatchCommand, ApprovalPolicy } from "../../approvals.js";
 import type { AppConfig } from "../config.js";
+import type { UsageBreakdown } from "../estimate-cost.js";
 import type {
  ResponseFunctionToolCall,
  ResponseInputItem,
@@ -11,6 +12,7 @@ import type { Reasoning } from "openai/resources.mjs";
 import { log, isLoggingEnabled } from "./log.js";
 import { OPENAI_BASE_URL, OPENAI_TIMEOUT_MS } from "../config.js";
 import { parseToolCallArguments } from "../parsers.js";
+import { ensureSessionTracker } from "../session-cost.js";
 import {
  ORIGIN,
  CLI_VERSION,
@@ -56,6 +58,13 @@ type AgentLoopParams = {
  onLastResponseId: (lastResponseId: string) => void;
 };

+type Usage = {
+  total_tokens?: number;
+  input_tokens?: number;
+  output_tokens?: number;
+};
+type MaybeUsageEvent = { response?: { usage?: Usage } };
+
 export class AgentLoop {
  private model: string;
  private instructions?: string;
@@ -235,7 +244,18 @@ export class AgentLoop {
        instructions: instructions ?? "",
      } as AppConfig);
    this.additionalWritableRoots = additionalWritableRoots;
-    this.onItem = onItem;
+    // Capture usage for cost‑tracking before delegating to the caller‑supplied
+    // callback.  Wrapping here avoids repeating the bookkeeping logic across
+    // every UI surface.
+    this.onItem = (item: ResponseItem) => {
+      try {
+        ensureSessionTracker(this.model).addItems([item]);
+      } catch {
+        /* best‑effort – never block user‑visible updates */
+      }
+
+      onItem(item);
+    };
    this.onLoading = onLoading;
    this.getCommandConfirmation = getCommandConfirmation;
    this.onLastResponseId = onLastResponseId;
@@ -778,6 +798,21 @@ export class AgentLoop {
              }
              lastResponseId = event.response.id;
              this.onLastResponseId(event.response.id);
+
+              // Capture exact token usage for cost tracking when provided by
+              // the API. `responses.completed` events include a `usage` field
+              // with {input_tokens, output_tokens, total_tokens}. We record
+              // the total (or fallback to summing the parts if needed).
+              try {
+                const usage = (event as MaybeUsageEvent).response?.usage;
+                if (usage && typeof usage === "object") {
+                  ensureSessionTracker(this.model).addUsage(
+                    usage as unknown as UsageBreakdown,
+                  );
+                }
+              } catch {
+                /* best‑effort only */
+              }
            }
          }
        } catch (err: unknown) {
--- a/codex-cli/src/utils/estimate-cost.ts
+++ b/codex-cli/src/utils/estimate-cost.ts
@@ -0,0 +1,212 @@
+/**
+ * Cost‑estimation helpers for OpenAI responses.
+ *
+ * The implementation now distinguishes between *input*, *cached input* and
+ * *output* tokens, reflecting OpenAI’s 2025‑04 pricing scheme.  For models
+ * where we only have a single blended rate we gracefully fall back to the
+ * legacy logic so existing call‑sites continue to work.
+ */
+
+import type { ResponseItem } from "openai/resources/responses/responses.mjs";
+
+import { approximateTokensUsed } from "./approximate-tokens-used.js";
+
+// ────────────────────────────────────────────────────────────────────────────
+// Pricing tables
+// ────────────────────────────────────────────────────────────────────────────
+
+/** Breakdown of per‑token prices (in USD). */
+type TokenRates = {
+  /** Price for *non‑cached* input prompt tokens. */
+  input: number;
+  /** Preferential price for *cached* input tokens. */
+  cachedInput: number;
+  /** Price for completion / output tokens. */
+  output: number;
+};
+
+/**
+ * Pricing table (exact model name -> per‑token rates).
+ * All keys must be lower‑case.
+ */
+const detailedPriceMap: Record<string, TokenRates> = {
+  // –––––––––––––– OpenAI “o‑series” experimental ––––––––––––––
+  "o3": {
+    input: 10 / 1_000_000,
+    cachedInput: 2.5 / 1_000_000,
+    output: 40 / 1_000_000,
+  },
+  "o4-mini": {
+    input: 1.1 / 1_000_000,
+    cachedInput: 0.275 / 1_000_000,
+    output: 4.4 / 1_000_000,
+  },
+
+  // –––––––––––––– GPT‑4.1 family ––––––––––––––
+  "gpt-4.1-nano": {
+    input: 0.1 / 1_000_000,
+    cachedInput: 0.025 / 1_000_000,
+    output: 0.4 / 1_000_000,
+  },
+  "gpt-4.1-mini": {
+    input: 0.4 / 1_000_000,
+    cachedInput: 0.1 / 1_000_000,
+    output: 1.6 / 1_000_000,
+  },
+  "gpt-4.1": {
+    input: 2 / 1_000_000,
+    cachedInput: 0.5 / 1_000_000,
+    output: 8 / 1_000_000,
+  },
+
+  // –––––––––––––– GPT‑4o family ––––––––––––––
+  "gpt-4o-mini": {
+    input: 0.6 / 1_000_000,
+    cachedInput: 0.3 / 1_000_000,
+    output: 2.4 / 1_000_000,
+  },
+  "gpt-4o": {
+    input: 5 / 1_000_000,
+    cachedInput: 2.5 / 1_000_000,
+    output: 20 / 1_000_000,
+  },
+};
+
+/**
+ * Legacy single‑rate pricing entries (per *thousand* tokens).  These are kept
+ * to provide sensible fall‑backs for models that do not yet expose a detailed
+ * breakdown or where we have no published split pricing.  The figures stem
+ * from older OpenAI announcements and are only meant for *approximation* –
+ * callers that rely on exact accounting should upgrade to models covered by
+ * {@link detailedPriceMap}.
+ */
+const blendedPriceMap: Record<string, number> = {
+  // GPT‑4 Turbo (Apr 2024)
+  "gpt-4-turbo": 0.01,
+
+  // Legacy GPT‑4 8k / 32k context models
+  "gpt-4": 0.03,
+
+  // GPT‑3.5‑Turbo family
+  "gpt-3.5-turbo": 0.0005,
+
+  // Remaining preview variants (exact names)
+  "gpt-4o-search-preview": 0.0025,
+  "gpt-4o-mini-search-preview": 0.00015,
+  "gpt-4o-realtime-preview": 0.005,
+  "gpt-4o-audio-preview": 0.0025,
+  "gpt-4o-mini-audio-preview": 0.00015,
+  "gpt-4o-mini-realtime-preview": 0.0006,
+  "gpt-4o-mini": 0.00015,
+
+  // Older experimental o‑series rates
+  "o3-mini": 0.0011,
+  "o1-mini": 0.0011,
+  "o1-pro": 0.15,
+  "o1": 0.015,
+
+  // Additional internal preview models
+  "computer-use-preview": 0.003,
+};
+
+// ────────────────────────────────────────────────────────────────────────────
+// Public helpers
+// ────────────────────────────────────────────────────────────────────────────
+
+/**
+ * Return the per‑token input/cached/output rates for the supplied model, or
+ * `null` when no detailed pricing is available.
+ */
+function normalize(model: string): string {
+  // Lower‑case and strip date/version suffixes like “‑2025‑04‑14”.
+  const lower = model.toLowerCase();
+  const dateSuffix = /-\d{4}-\d{2}-\d{2}$/;
+  return lower.replace(dateSuffix, "");
+}
+
+export function priceRates(model: string): TokenRates | null {
+  return detailedPriceMap[normalize(model)] ?? null;
+}
+
+/**
+ * Fallback that returns a *single* blended per‑token rate when no detailed
+ * split is available.  This mirrors the behaviour of the pre‑2025 version so
+ * that existing callers keep working unmodified.
+ */
+export function pricePerToken(model: string): number | null {
+  // Prefer an *average* of the detailed rates when we have them – this avoids
+  // surprises where callers mix `pricePerToken()` with the new detailed
+  // helpers.
+  const rates = priceRates(model);
+  if (rates) {
+    return (rates.input + rates.output) / 2; // simple average heuristic
+  }
+
+  const entry = blendedPriceMap[normalize(model)];
+  if (entry == null) {
+    return null;
+  }
+  return entry / 1000;
+}
+
+// ────────────────────────────────────────────────────────────────────────────
+// Cost estimation
+// ────────────────────────────────────────────────────────────────────────────
+
+/** Shape of the `usage` object returned by OpenAI’s Responses API. */
+export type UsageBreakdown = {
+  input_tokens?: number;
+  input_tokens_details?: { cached_tokens?: number } | null;
+  output_tokens?: number;
+  total_tokens?: number;
+};
+
+/**
+ * Calculate the exact cost (in USD) for a single usage breakdown.  Returns
+ * `null` when the model is unknown.
+ */
+export function estimateCostFromUsage(
+  usage: UsageBreakdown,
+  model: string,
+): number | null {
+  const rates = priceRates(model);
+  if (!rates) {
+    // fall back to blended pricing
+    const per = pricePerToken(model);
+    if (per == null) {
+      return null;
+    }
+
+    const tokens =
+      usage.total_tokens ??
+      (usage.input_tokens ?? 0) + (usage.output_tokens ?? 0);
+    return tokens * per;
+  }
+
+  const input = usage.input_tokens ?? 0;
+  const cached = usage.input_tokens_details?.cached_tokens ?? 0;
+  const nonCachedInput = Math.max(0, input - cached);
+  const output = usage.output_tokens ?? 0;
+
+  return (
+    nonCachedInput * rates.input +
+    cached * rates.cachedInput +
+    output * rates.output
+  );
+}
+
+/**
+ * Rough cost estimate (USD) for a series of {@link ResponseItem}s when using
+ * the specified model.  When no detailed usage object is available we fall
+ * back to estimating token counts based on the message contents.
+ */
+export function estimateCostUSD(
+  items: Array<ResponseItem>,
+  model: string,
+): number | null {
+  const per = pricePerToken(model);
+  if (per == null) {
+    return null;
+  }
+  return approximateTokensUsed(items) * per;
+}
--- a/codex-cli/src/utils/session-cost.ts
+++ b/codex-cli/src/utils/session-cost.ts
@@ -0,0 +1,138 @@
+import type { ResponseItem } from "openai/resources/responses/responses.mjs";
+
+import { approximateTokensUsed } from "./approximate-tokens-used.js";
+import {
+  estimateCostFromUsage,
+  pricePerToken,
+  type UsageBreakdown,
+} from "./estimate-cost.js";
+
+/**
+ * Simple accumulator for {@link ResponseItem}s that exposes aggregate token
+ * and (approximate) dollar‑cost statistics for the current conversation.
+ */
+export class SessionCostTracker {
+  private readonly model: string;
+  private readonly items: Array<ResponseItem> = [];
+
+  private tokensUsedPrecise: number | null = null;
+
+  /**
+   * Aggregated exact cost when we have detailed `usage` information from the
+   * OpenAI API.  Falls back to `null` when we only have the rough estimate
+   * path available.
+   */
+  private costPrecise: number | null = null;
+
+  constructor(model: string) {
+    this.model = model;
+  }
+
+  /** Append newly‑received items to the internal history. */
+  addItems(items: Array<ResponseItem>): void {
+    this.items.push(...items);
+  }
+
+  /**
+   * Add a full usage breakdown as returned by the Responses API.  This gives
+   * us exact token counts and allows true‑to‑spec cost accounting that
+   * factors in cached tokens.
+   */
+  addUsage(usage: UsageBreakdown): void {
+    const tokens =
+      usage.total_tokens ??
+      (usage.input_tokens ?? 0) + (usage.output_tokens ?? 0);
+
+    if (Number.isFinite(tokens) && tokens > 0) {
+      this.tokensUsedPrecise = (this.tokensUsedPrecise ?? 0) + tokens;
+    }
+
+    const cost = estimateCostFromUsage(usage, this.model);
+    if (cost != null) {
+      this.costPrecise = (this.costPrecise ?? 0) + cost;
+    }
+  }
+
+  /** Legacy helper for callers that only know the total token count. */
+  addTokens(count: number): void {
+    if (Number.isFinite(count) && count > 0) {
+      this.tokensUsedPrecise = (this.tokensUsedPrecise ?? 0) + count;
+      // We deliberately do *not* update costPrecise here – without a detailed
+      // breakdown we cannot know whether tokens were input/output/cached.  We
+      // therefore fall back to the blended rate during `getCostUSD()`.
+    }
+  }
+
+  /** Approximate total token count so far. */
+  getTokensUsed(): number {
+    if (this.tokensUsedPrecise != null) {
+      return this.tokensUsedPrecise;
+    }
+    return approximateTokensUsed(this.items);
+  }
+
+  /** Best‑effort USD cost estimate. Returns `null` when the model is unknown. */
+  getCostUSD(): number | null {
+    if (this.costPrecise != null) {
+      return this.costPrecise;
+    }
+
+    const per = pricePerToken(this.model);
+    if (per == null) {
+      return null;
+    }
+    return this.getTokensUsed() * per;
+  }
+
+  /**
+   * Human‑readable one‑liner suitable for printing at session end (e.g. on
+   * Ctrl‑C or `/clear`).
+   */
+  summary(): string {
+    const tokens = this.getTokensUsed();
+    const cost = this.getCostUSD();
+    if (cost == null) {
+      return `Session complete – approx. ${tokens} tokens used.`;
+    }
+    return `Session complete – approx. ${tokens} tokens, $${cost.toFixed(
+      4,
+    )} USD.`;
+  }
+}
+
+// ────────────────────────────────────────────────────────────────────────────
+// Global helpers so disparate parts of the codebase can share a single
+// tracker instance without threading it through countless function calls.
+// ────────────────────────────────────────────────────────────────────────────
+
+let globalTracker: SessionCostTracker | null = null;
+
+export function getSessionTracker(): SessionCostTracker | null {
+  return globalTracker;
+}
+
+export function ensureSessionTracker(model: string): SessionCostTracker {
+  if (!globalTracker) {
+    globalTracker = new SessionCostTracker(model);
+  }
+  return globalTracker;
+}
+
+export function resetSessionTracker(): void {
+  globalTracker = null;
+}
+
+/**
+ * Convenience helper that prints the session summary (if any) and resets the
+ * global tracker so that the next conversation starts with a clean slate.
+ */
+export function printAndResetSessionSummary(): void {
+  if (!globalTracker) {
+    return; // nothing to do
+  }
+
+  // eslint-disable-next-line no-console -- explicit, user‑visible log
+  console.log("\n" + globalTracker.summary() + "\n");
+
+  resetSessionTracker();
+}
--- a/codex-cli/src/utils/terminal.ts
+++ b/codex-cli/src/utils/terminal.ts
@@ -1,6 +1,9 @@
 import type { Instance } from "ink";
 import type React from "react";

+// Cost‑tracking
+import { printAndResetSessionSummary } from "./session-cost.js";
+
 let inkRenderer: Instance | null = null;

 // Track whether the clean‑up routine has already executed so repeat calls are
@@ -79,4 +82,12 @@ export function onExit(): void {
      /* best‑effort – continue even if Ink throws */
    }
  }
+
+  // Finally, print a brief token/cost summary for the session – best effort
+  // only, errors are swallowed so that shutdown always succeeds.
+  try {
+    printAndResetSessionSummary();
+  } catch {
+    /* ignore */
+  }
 }
--- a/codex-cli/tests/context-percent.test.ts
+++ b/codex-cli/tests/context-percent.test.ts
@@ -0,0 +1,28 @@
+import { describe, expect, it } from "vitest";
+
+import type { ResponseItem } from "openai/resources/responses/responses.mjs";
+
+import { calculateContextPercentRemaining } from "../src/components/chat/terminal-chat-utils.js";
+
+function makeUserMessage(id: string, text: string): ResponseItem {
+  return {
+    id,
+    type: "message",
+    role: "user",
+    content: [{ type: "input_text", text }],
+  } as ResponseItem;
+}
+
+describe("calculateContextPercentRemaining", () => {
+  it("includes extra context characters in calculation", () => {
+    const msgText = "a".repeat(40); // 40 chars → 10 tokens
+    const items = [makeUserMessage("1", msgText)];
+
+    const model = "gpt-4-16k";
+
+    const base = calculateContextPercentRemaining(items, model);
+    const withExtra = calculateContextPercentRemaining(items, model, 8); // +8 chars → +2 tokens
+
+    expect(withExtra).toBeLessThan(base);
+  });
+});
--- a/codex-cli/tests/estimate-cost.test.ts
+++ b/codex-cli/tests/estimate-cost.test.ts
@@ -0,0 +1,69 @@
+import { describe, expect, test } from "vitest";
+
+import {
+  estimateCostUSD,
+  estimateCostFromUsage,
+} from "../src/utils/estimate-cost.js";
+import { SessionCostTracker } from "../src/utils/session-cost.js";
+import type { ResponseItem } from "openai/resources/responses/responses.mjs";
+
+// Helper to craft a minimal ResponseItem for tests
+function makeMessage(
+  id: string,
+  role: "user" | "assistant",
+  text: string,
+): ResponseItem {
+  return {
+    id,
+    type: "message",
+    role,
+    content: [{ type: role === "user" ? "input_text" : "output_text", text }],
+  } as ResponseItem;
+}
+
+describe("estimateCostUSD", () => {
+  test("returns a proportional, positive estimate for known models", () => {
+    const items: Array<ResponseItem> = [
+      makeMessage("1", "user", "hello world"),
+      makeMessage("2", "assistant", "hi there"),
+    ];
+
+    const cost = estimateCostUSD(items, "gpt-3.5-turbo");
+    expect(cost).not.toBeNull();
+    expect(cost!).toBeGreaterThan(0);
+
+    // Adding another token should increase the estimate
+    const cost2 = estimateCostUSD(
+      items.concat([makeMessage("3", "user", "extra")]),
+      "gpt-3.5-turbo",
+    );
+    expect(cost2!).toBeGreaterThan(cost!);
+  });
+
+  test("cost calculation honours cached input token discount", () => {
+    const usage = {
+      input_tokens: 1000,
+      input_tokens_details: { cached_tokens: 600 },
+      output_tokens: 500,
+      total_tokens: 1500,
+    } as any; // simple literal structure for test
+
+    const cost = estimateCostFromUsage(usage, "gpt-4.1");
+
+    // Expected: (1000-600)*0.000002 + 600*0.0000005 + 500*0.000008
+    const expected = 400 * 0.000002 + 600 * 0.0000005 + 500 * 0.000008;
+    expect(cost).not.toBeNull();
+    expect(cost!).toBeCloseTo(expected, 8);
+  });
+});
+
+describe("SessionCostTracker", () => {
+  test("accumulates items and reports tokens & cost", () => {
+    const tracker = new SessionCostTracker("gpt-3.5-turbo");
+    tracker.addItems([makeMessage("1", "user", "foo")]);
+    tracker.addItems([makeMessage("2", "assistant", "bar baz")]);
+
+    expect(tracker.getTokensUsed()).toBeGreaterThan(0);
+    expect(tracker.getCostUSD()!).toBeGreaterThan(0);
+  });
+});
--- a/codex-cli/tests/session-cost.test.ts
+++ b/codex-cli/tests/session-cost.test.ts
@@ -0,0 +1,85 @@
+import { afterEach, describe, expect, it, vi } from "vitest";
+
+import type { ResponseItem } from "openai/resources/responses/responses.mjs";
+
+import {
+  ensureSessionTracker,
+  getSessionTracker,
+  printAndResetSessionSummary,
+} from "../src/utils/session-cost.js";
+
+function makeMessage(
+  id: string,
+  role: "user" | "assistant",
+  text: string,
+): ResponseItem {
+  return {
+    id,
+    type: "message",
+    role,
+    content: [{ type: role === "user" ? "input_text" : "output_text", text }],
+  } as ResponseItem;
+}
+
+describe("printAndResetSessionSummary", () => {
+  afterEach(() => {
+    vi.restoreAllMocks();
+  });
+
+  it("/clear resets tracker so successive conversations start fresh", () => {
+    const spy = vi.spyOn(console, "log").mockImplementation(() => {});
+
+    const perSessionTokens: Array<number> = [];
+
+    for (let i = 1; i <= 3; i++) {
+      const tracker = ensureSessionTracker("gpt-3.5-turbo");
+      tracker.addTokens(i * 10); // 10, 20, 30
+      perSessionTokens.push(tracker.getTokensUsed());
+
+      // Simulate user typing /clear which prints & resets
+      printAndResetSessionSummary();
+
+      expect(getSessionTracker()).toBeNull();
+    }
+
+    expect(perSessionTokens).toEqual([10, 20, 30]);
+
+    spy.mockRestore();
+  });
+
+  it("prints a summary and resets the global tracker", () => {
+    const spy = vi.spyOn(console, "log").mockImplementation(() => {});
+
+    const tracker = ensureSessionTracker("gpt-3.5-turbo");
+    tracker.addItems([
+      makeMessage("1", "user", "hello"),
+      makeMessage("2", "assistant", "hi"),
+    ]);
+
+    printAndResetSessionSummary();
+
+    expect(spy).toHaveBeenCalled();
+    expect(getSessionTracker()).toBeNull();
+  });
+
+  it("prefers exact token counts added via addTokens() over heuristic", () => {
+    const tracker = ensureSessionTracker("gpt-3.5-turbo");
+
+    // Add a long message (heuristic would count >1 token)
+    tracker.addItems([
+      makeMessage("x", "user", "a".repeat(400)), // ~100 tokens
+    ]);
+
+    const heuristicTokens = tracker.getTokensUsed();
+    expect(heuristicTokens).toBeGreaterThan(50);
+
+    // Now inject an exact low token count and ensure it overrides
+    tracker.addTokens(10);
+    expect(tracker.getTokensUsed()).toBe(
+      heuristicTokens + (10 - heuristicTokens),
+    );
+
+    const cost = tracker.getCostUSD();
+    expect(cost).not.toBeNull();
+  });
+});
Author	SHA1	Message	Date
Eason Goodale	ba45d2f601	test cache discount	2025-04-26 12:15:34 -07:00
Eason Goodale	b051fcb804	whitespace	2025-04-20 04:04:12 -07:00
Eason Goodale	ada5e2249a	format	2025-04-20 01:28:42 -07:00
Eason Goodale	0613fd35e2	lint, formatting	2025-04-18 03:20:10 -07:00
Eason Goodale	cdc0897a25	initial cost tracking Signed-off-by: Eason Goodale <easong@openai.com>	2025-04-18 03:10:54 -07:00