initial cost tracking

Signed-off-by: Eason Goodale <easong@openai.com>
This commit is contained in:
Eason Goodale
2025-04-18 03:10:54 -07:00
parent 0d6a98f9af
commit cdc0897a25
11 changed files with 421 additions and 4 deletions

View File

@@ -15,6 +15,7 @@ import {
addToHistory,
} from "../../utils/storage/command-history.js";
import { clearTerminal, onExit } from "../../utils/terminal.js";
import { printAndResetSessionSummary } from "../../utils/session-cost.js";
import Spinner from "../vendor/ink-spinner.js";
import TextInput from "../vendor/ink-text-input.js";
import { Box, Text, useApp, useInput, useStdin } from "ink";
@@ -199,8 +200,14 @@ export default function TerminalChatInput({
setInput("");
setSessionId("");
setLastResponseId("");
// Clear the terminal first so the summary is printed on a fresh
// screen before the new session starts.
clearTerminal();
// Show the token/cost summary for the session that just ended.
printAndResetSessionSummary();
// Emit a system message to confirm the clear action. We *append*
// it so Ink's <Static> treats it as new output and actually renders it.
setItems((prev) => [

View File

@@ -17,6 +17,7 @@ import {
addToHistory,
} from "../../utils/storage/command-history.js";
import { clearTerminal, onExit } from "../../utils/terminal.js";
import { printAndResetSessionSummary } from "../../utils/session-cost.js";
import Spinner from "../vendor/ink-spinner.js";
import { Box, Text, useApp, useInput, useStdin } from "ink";
import { fileURLToPath } from "node:url";
@@ -286,8 +287,12 @@ export default function TerminalChatInput({
setInput("");
setSessionId("");
setLastResponseId("");
// Clear screen then display session summary so the user sees it.
clearTerminal();
printAndResetSessionSummary();
// Emit a system message to confirm the clear action. We *append*
// it so Ink's <Static> treats it as new output and actually renders it.
setItems((prev) => [

View File

@@ -24,6 +24,25 @@ function isUserMessage(
*/
export function maxTokensForModel(model: string): number {
const lower = model.toLowerCase();
// Heuristics for common context window sizes. Keep the checks loosely
// ordered from *largest* to *smallest* so that more specific longcontext
// models are detected before their shorter generic counterparts.
// Specialcase for 1,047,576token demo model (gpt4long). We match either
// the literal number or "gpt-4.1" variants we occasionally encounter.
if (lower.includes("1,047,576") || /gpt-4\.1/i.test(lower)) {
return 1047576;
}
if (lower.includes("128k") || /gpt-4\.5|gpt-4o-mini|gpt-4o\b/i.test(lower)) {
return 128000;
}
// Experimental oseries advertised at ~200k context
if (/\bo[134]\b|o[134]-mini|o1[- ]?pro/i.test(lower)) {
return 200000;
}
if (lower.includes("32k")) {
return 32000;
}
@@ -46,8 +65,11 @@ export function maxTokensForModel(model: string): number {
export function calculateContextPercentRemaining(
items: Array<ResponseItem>,
model: string,
extraContextChars = 0,
): number {
const used = approximateTokensUsed(items);
const tokensFromItems = approximateTokensUsed(items);
const extraTokens = Math.ceil(extraContextChars / 4);
const used = tokensFromItems + extraTokens;
const max = maxTokensForModel(model);
const remaining = Math.max(0, max - used);
return (remaining / max) * 100;

View File

@@ -427,8 +427,14 @@ export default function TerminalChat({
).length;
const contextLeftPercent = useMemo(
() => calculateContextPercentRemaining(items, model),
[items, model],
() =>
calculateContextPercentRemaining(
items,
model,
// static system instructions count towards the context budget too
config.instructions?.length ?? 0,
),
[items, model, config.instructions],
);
return (

View File

@@ -11,6 +11,7 @@ import type { Reasoning } from "openai/resources.mjs";
import { log, isLoggingEnabled } from "./log.js";
import { OPENAI_BASE_URL, OPENAI_TIMEOUT_MS } from "../config.js";
import { parseToolCallArguments } from "../parsers.js";
import { ensureSessionTracker } from "../session-cost.js";
import {
ORIGIN,
CLI_VERSION,
@@ -235,7 +236,18 @@ export class AgentLoop {
instructions: instructions ?? "",
} as AppConfig);
this.additionalWritableRoots = additionalWritableRoots;
this.onItem = onItem;
// Capture usage for costtracking before delegating to the callersupplied
// callback. Wrapping here avoids repeating the bookkeeping logic across
// every UI surface.
this.onItem = (item: ResponseItem) => {
try {
ensureSessionTracker(this.model).addItems([item]);
} catch {
/* besteffort never block uservisible updates */
}
onItem(item);
};
this.onLoading = onLoading;
this.getCommandConfirmation = getCommandConfirmation;
this.onLastResponseId = onLastResponseId;
@@ -778,6 +790,27 @@ export class AgentLoop {
}
lastResponseId = event.response.id;
this.onLastResponseId(event.response.id);
// Capture exact token usage for cost tracking when provided by
// the API. `responses.completed` events include a `usage` field
// with {input_tokens, output_tokens, total_tokens}. We record
// the total (or fallback to summing the parts if needed).
try {
const usage: unknown = (event as any).response?.usage;
if (usage && typeof usage === "object") {
const u = usage as { total_tokens?: number; input_tokens?: number; output_tokens?: number };
const tokens =
u.total_tokens ??
(typeof u.input_tokens === "number" && typeof u.output_tokens === "number"
? u.input_tokens + u.output_tokens
: undefined);
if (typeof tokens === "number" && tokens > 0) {
ensureSessionTracker(this.model).addTokens(tokens);
}
}
} catch {
/* besteffort only */
}
}
}
} catch (err: unknown) {

View File

@@ -0,0 +1,79 @@
import type { ResponseItem } from "openai/resources/responses/responses.mjs";
import { approximateTokensUsed } from "./approximate-tokens-used.js";
/**
* Approximate pertoken pricing (in USD) for common OpenAI models.
*
* The list is intentionally *nonexhaustive*: OpenAI regularly introduces new
* variants. Unknown model names simply result in a `null` cost estimate so
* that callers can gracefully fall back (e.g. by omitting cost figures from
* uservisible summaries).
*/
const priceMap: Array<{ pattern: RegExp; pricePerThousandTokens: number }> = [
// GPT4o family
{ pattern: /gpt-4o-search-preview/i, pricePerThousandTokens: 0.0025 },
{ pattern: /gpt-4o-mini-search-preview/i, pricePerThousandTokens: 0.00015 },
{ pattern: /gpt-4o-realtime-preview/i, pricePerThousandTokens: 0.005 },
{ pattern: /gpt-4o-audio-preview/i, pricePerThousandTokens: 0.0025 },
{ pattern: /gpt-4o-mini-audio-preview/i, pricePerThousandTokens: 0.00015 },
{ pattern: /gpt-4o-mini-realtime-preview/i, pricePerThousandTokens: 0.0006 },
{ pattern: /gpt-4o-mini/i, pricePerThousandTokens: 0.00015 },
{ pattern: /gpt-4o/i, pricePerThousandTokens: 0.0025 },
// GPT4.1 / 4.5
{ pattern: /gpt-4\.1-nano/i, pricePerThousandTokens: 0.0001 },
{ pattern: /gpt-4\.1-mini/i, pricePerThousandTokens: 0.0004 },
{ pattern: /gpt-4\.1/i, pricePerThousandTokens: 0.002 },
{ pattern: /gpt-4\.5-preview/i, pricePerThousandTokens: 0.075 },
{ pattern: /gpt-4\.5/i, pricePerThousandTokens: 0.075 },
// “oseries” experimental
{ pattern: /o4-mini/i, pricePerThousandTokens: 0.0011 },
{ pattern: /o3-mini/i, pricePerThousandTokens: 0.0011 },
{ pattern: /o1-mini/i, pricePerThousandTokens: 0.0011 },
{ pattern: /\bo3\b/i, pricePerThousandTokens: 0.015 },
{ pattern: /o1[- ]?pro/i, pricePerThousandTokens: 0.15 },
{ pattern: /\bo1\b/i, pricePerThousandTokens: 0.015 },
// Misc
{ pattern: /computer-use-preview/i, pricePerThousandTokens: 0.003 },
// GPT4 Turbo (Apr 2024)
{ pattern: /gpt-4-turbo/i, pricePerThousandTokens: 0.01 },
// Legacy GPT4 8k / 32k context models
{ pattern: /gpt-4\b/i, pricePerThousandTokens: 0.03 },
// GPT3.5Turbo family
{ pattern: /gpt-3\.5-turbo/i, pricePerThousandTokens: 0.0005 },
];
/**
* Convert the *perthousandtokens* price entry to a *pertoken* figure. If
* the model is unrecognised we return `null` so that callers can fall back.
*/
export function pricePerToken(model: string): number | null {
const entry = priceMap.find(({ pattern }) => pattern.test(model));
if (!entry) {
return null;
}
return entry.pricePerThousandTokens / 1000;
}
/**
* Rough cost estimate (USD) for a series of {@link ResponseItem}s when using
* the specified model. Returns `null` when the model is unknown.
*/
export function estimateCostUSD(
items: Array<ResponseItem>,
model: string,
): number | null {
const perToken = pricePerToken(model);
if (perToken == null) {
return null;
}
const tokens = approximateTokensUsed(items);
return tokens * perToken;
}

View File

@@ -0,0 +1,97 @@
import type { ResponseItem } from "openai/resources/responses/responses.mjs";
import { approximateTokensUsed } from "./approximate-tokens-used.js";
import { pricePerToken } from "./estimate-cost.js";
/**
* Simple accumulator for {@link ResponseItem}s that exposes aggregate token
* and (approximate) dollarcost statistics for the current conversation.
*/
export class SessionCostTracker {
private readonly model: string;
private readonly items: Array<ResponseItem> = [];
private tokensUsed: number | null = null;
constructor(model: string) {
this.model = model;
}
/** Append newlyreceived items to the internal history. */
addItems(items: Array<ResponseItem>): void {
this.items.push(...items);
}
/** Add the exact number of tokens returned by the API usage object. */
addTokens(count: number): void {
if (Number.isFinite(count) && count > 0) {
this.tokensUsed = (this.tokensUsed ?? 0) + count;
}
}
/** Approximate total token count so far. */
getTokensUsed(): number {
if (this.tokensUsed != null) {
return this.tokensUsed;
}
return approximateTokensUsed(this.items);
}
/** Besteffort USD cost estimate. Returns `null` when the model is unknown. */
getCostUSD(): number | null {
const per = pricePerToken(this.model);
if (per == null) {
return null;
}
return this.getTokensUsed() * per;
}
/**
* Humanreadable oneliner suitable for printing at session end (e.g. on
* CtrlC or `/clear`).
*/
summary(): string {
const tokens = this.getTokensUsed();
const cost = this.getCostUSD();
if (cost == null) {
return `Session complete approx. ${tokens} tokens used.`;
}
return `Session complete approx. ${tokens} tokens, $${cost.toFixed(4)} USD.`;
}
}
// ────────────────────────────────────────────────────────────────────────────
// Global helpers so disparate parts of the codebase can share a single
// tracker instance without threading it through countless function calls.
// ────────────────────────────────────────────────────────────────────────────
let globalTracker: SessionCostTracker | null = null;
export function getSessionTracker(): SessionCostTracker | null {
return globalTracker;
}
export function ensureSessionTracker(model: string): SessionCostTracker {
if (!globalTracker) {
globalTracker = new SessionCostTracker(model);
}
return globalTracker;
}
export function resetSessionTracker(): void {
globalTracker = null;
}
/**
* Convenience helper that prints the session summary (if any) and resets the
* global tracker so that the next conversation starts with a clean slate.
*/
export function printAndResetSessionSummary(): void {
if (!globalTracker) {
return; // nothing to do
}
// eslint-disable-next-line no-console -- explicit, uservisible log
console.log("\n" + globalTracker.summary() + "\n");
resetSessionTracker();
}

View File

@@ -1,6 +1,9 @@
import type { Instance } from "ink";
import type React from "react";
// Costtracking
import { printAndResetSessionSummary } from "./session-cost.js";
let inkRenderer: Instance | null = null;
// Track whether the cleanup routine has already executed so repeat calls are
@@ -79,4 +82,12 @@ export function onExit(): void {
/* besteffort continue even if Ink throws */
}
}
// Finally, print a brief token/cost summary for the session best effort
// only, errors are swallowed so that shutdown always succeeds.
try {
printAndResetSessionSummary();
} catch {
/* ignore */
}
}

View File

@@ -0,0 +1,28 @@
import { describe, expect, it } from "vitest";
import type { ResponseItem } from "openai/resources/responses/responses.mjs";
import { calculateContextPercentRemaining } from "../src/components/chat/terminal-chat-utils.js";
function makeUserMessage(id: string, text: string): ResponseItem {
return {
id,
type: "message",
role: "user",
content: [{ type: "input_text", text }],
} as ResponseItem;
}
describe("calculateContextPercentRemaining", () => {
it("includes extra context characters in calculation", () => {
const msgText = "a".repeat(40); // 40 chars → 10 tokens
const items = [makeUserMessage("1", msgText)];
const model = "gpt-4-16k";
const base = calculateContextPercentRemaining(items, model);
const withExtra = calculateContextPercentRemaining(items, model, 8); // +8 chars → +2 tokens
expect(withExtra).toBeLessThan(base);
});
});

View File

@@ -0,0 +1,50 @@
import { describe, expect, test } from "vitest";
import { estimateCostUSD } from "../src/utils/estimate-cost.js";
import { SessionCostTracker } from "../src/utils/session-cost.js";
import type { ResponseItem } from "openai/resources/responses/responses.mjs";
// Helper to craft a minimal ResponseItem for tests
function makeMessage(
id: string,
role: "user" | "assistant",
text: string,
): ResponseItem {
return {
id,
type: "message",
role,
content: [{ type: role === "user" ? "input_text" : "output_text", text }],
} as ResponseItem;
}
describe("estimateCostUSD", () => {
test("returns a proportional, positive estimate for known models", () => {
const items: Array<ResponseItem> = [
makeMessage("1", "user", "hello world"),
makeMessage("2", "assistant", "hi there"),
];
const cost = estimateCostUSD(items, "gpt-3.5-turbo");
expect(cost).not.toBeNull();
expect(cost!).toBeGreaterThan(0);
// Adding another token should increase the estimate
const cost2 = estimateCostUSD(
items.concat([makeMessage("3", "user", "extra")]),
"gpt-3.5-turbo",
);
expect(cost2!).toBeGreaterThan(cost!);
});
});
describe("SessionCostTracker", () => {
test("accumulates items and reports tokens & cost", () => {
const tracker = new SessionCostTracker("gpt-3.5-turbo");
tracker.addItems([makeMessage("1", "user", "foo")]);
tracker.addItems([makeMessage("2", "assistant", "bar baz")]);
expect(tracker.getTokensUsed()).toBeGreaterThan(0);
expect(tracker.getCostUSD()!).toBeGreaterThan(0);
});
});

View File

@@ -0,0 +1,79 @@
import { afterEach, describe, expect, it, vi } from "vitest";
import type { ResponseItem } from "openai/resources/responses/responses.mjs";
import {
ensureSessionTracker,
getSessionTracker,
printAndResetSessionSummary,
} from "../src/utils/session-cost.js";
function makeMessage(id: string, role: "user" | "assistant", text: string): ResponseItem {
return {
id,
type: "message",
role,
content: [{ type: role === "user" ? "input_text" : "output_text", text }],
} as ResponseItem;
}
describe("printAndResetSessionSummary", () => {
afterEach(() => {
vi.restoreAllMocks();
});
it("/clear resets tracker so successive conversations start fresh", () => {
const spy = vi.spyOn(console, "log").mockImplementation(() => {});
const perSessionTokens: Array<number> = [];
for (let i = 1; i <= 3; i++) {
const tracker = ensureSessionTracker("gpt-3.5-turbo");
tracker.addTokens(i * 10); // 10, 20, 30
perSessionTokens.push(tracker.getTokensUsed());
// Simulate user typing /clear which prints & resets
printAndResetSessionSummary();
expect(getSessionTracker()).toBeNull();
}
expect(perSessionTokens).toEqual([10, 20, 30]);
spy.mockRestore();
});
it("prints a summary and resets the global tracker", () => {
const spy = vi.spyOn(console, "log").mockImplementation(() => {});
const tracker = ensureSessionTracker("gpt-3.5-turbo");
tracker.addItems([
makeMessage("1", "user", "hello"),
makeMessage("2", "assistant", "hi"),
]);
printAndResetSessionSummary();
expect(spy).toHaveBeenCalled();
expect(getSessionTracker()).toBeNull();
});
it("prefers exact token counts added via addTokens() over heuristic", () => {
const tracker = ensureSessionTracker("gpt-3.5-turbo");
// Add a long message (heuristic would count >1 token)
tracker.addItems([
makeMessage("x", "user", "a".repeat(400)), // ~100 tokens
]);
const heuristicTokens = tracker.getTokensUsed();
expect(heuristicTokens).toBeGreaterThan(50);
// Now inject an exact low token count and ensure it overrides
tracker.addTokens(10);
expect(tracker.getTokensUsed()).toBe(heuristicTokens + (10 - heuristicTokens));
const cost = tracker.getCostUSD();
expect(cost).not.toBeNull();
});
});