mirror of
https://github.com/openai/codex.git
synced 2026-02-02 06:57:03 +00:00
Compare commits
5 Commits
patch-squa
...
cost-track
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ba45d2f601 | ||
|
|
b051fcb804 | ||
|
|
ada5e2249a | ||
|
|
0613fd35e2 | ||
|
|
cdc0897a25 |
@@ -9,6 +9,7 @@ import { TerminalChatCommandReview } from "./terminal-chat-command-review.js";
|
||||
import { log, isLoggingEnabled } from "../../utils/agent/log.js";
|
||||
import { loadConfig } from "../../utils/config.js";
|
||||
import { createInputItem } from "../../utils/input-utils.js";
|
||||
import { printAndResetSessionSummary } from "../../utils/session-cost.js";
|
||||
import { setSessionId } from "../../utils/session.js";
|
||||
import {
|
||||
loadCommandHistory,
|
||||
@@ -199,8 +200,14 @@ export default function TerminalChatInput({
|
||||
setInput("");
|
||||
setSessionId("");
|
||||
setLastResponseId("");
|
||||
|
||||
// Clear the terminal first so the summary is printed on a fresh
|
||||
// screen before the new session starts.
|
||||
clearTerminal();
|
||||
|
||||
// Show the token/cost summary for the session that just ended.
|
||||
printAndResetSessionSummary();
|
||||
|
||||
// Emit a system message to confirm the clear action. We *append*
|
||||
// it so Ink's <Static> treats it as new output and actually renders it.
|
||||
setItems((prev) => [
|
||||
|
||||
@@ -11,6 +11,7 @@ import { TerminalChatCommandReview } from "./terminal-chat-command-review.js";
|
||||
import { log, isLoggingEnabled } from "../../utils/agent/log.js";
|
||||
import { loadConfig } from "../../utils/config.js";
|
||||
import { createInputItem } from "../../utils/input-utils.js";
|
||||
import { printAndResetSessionSummary } from "../../utils/session-cost.js";
|
||||
import { setSessionId } from "../../utils/session.js";
|
||||
import {
|
||||
loadCommandHistory,
|
||||
@@ -286,8 +287,12 @@ export default function TerminalChatInput({
|
||||
setInput("");
|
||||
setSessionId("");
|
||||
setLastResponseId("");
|
||||
|
||||
// Clear screen then display session summary so the user sees it.
|
||||
clearTerminal();
|
||||
|
||||
printAndResetSessionSummary();
|
||||
|
||||
// Emit a system message to confirm the clear action. We *append*
|
||||
// it so Ink's <Static> treats it as new output and actually renders it.
|
||||
setItems((prev) => [
|
||||
|
||||
@@ -24,6 +24,25 @@ function isUserMessage(
|
||||
*/
|
||||
export function maxTokensForModel(model: string): number {
|
||||
const lower = model.toLowerCase();
|
||||
// Heuristics for common context window sizes. Keep the checks loosely
|
||||
// ordered from *largest* to *smallest* so that more specific long‑context
|
||||
// models are detected before their shorter generic counterparts.
|
||||
|
||||
// Special‑case for 1,047,576‑token demo model (gpt‑4‑long). We match either
|
||||
// the literal number or "gpt-4.1" variants we occasionally encounter.
|
||||
if (lower.includes("1,047,576") || /gpt-4\.1/i.test(lower)) {
|
||||
return 1047576;
|
||||
}
|
||||
|
||||
if (lower.includes("128k") || /gpt-4\.5|gpt-4o-mini|gpt-4o\b/i.test(lower)) {
|
||||
return 128000;
|
||||
}
|
||||
|
||||
// Experimental o‑series advertised at ~200k context
|
||||
if (/\bo[134]\b|o[134]-mini|o1[- ]?pro/i.test(lower)) {
|
||||
return 200000;
|
||||
}
|
||||
|
||||
if (lower.includes("32k")) {
|
||||
return 32000;
|
||||
}
|
||||
@@ -46,8 +65,11 @@ export function maxTokensForModel(model: string): number {
|
||||
export function calculateContextPercentRemaining(
|
||||
items: Array<ResponseItem>,
|
||||
model: string,
|
||||
extraContextChars = 0,
|
||||
): number {
|
||||
const used = approximateTokensUsed(items);
|
||||
const tokensFromItems = approximateTokensUsed(items);
|
||||
const extraTokens = Math.ceil(extraContextChars / 4);
|
||||
const used = tokensFromItems + extraTokens;
|
||||
const max = maxTokensForModel(model);
|
||||
const remaining = Math.max(0, max - used);
|
||||
return (remaining / max) * 100;
|
||||
|
||||
@@ -427,8 +427,14 @@ export default function TerminalChat({
|
||||
).length;
|
||||
|
||||
const contextLeftPercent = useMemo(
|
||||
() => calculateContextPercentRemaining(items, model),
|
||||
[items, model],
|
||||
() =>
|
||||
calculateContextPercentRemaining(
|
||||
items,
|
||||
model,
|
||||
// static system instructions count towards the context budget too
|
||||
config.instructions?.length ?? 0,
|
||||
),
|
||||
[items, model, config.instructions],
|
||||
);
|
||||
|
||||
return (
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import type { ReviewDecision } from "./review.js";
|
||||
import type { ApplyPatchCommand, ApprovalPolicy } from "../../approvals.js";
|
||||
import type { AppConfig } from "../config.js";
|
||||
import type { UsageBreakdown } from "../estimate-cost.js";
|
||||
import type {
|
||||
ResponseFunctionToolCall,
|
||||
ResponseInputItem,
|
||||
@@ -11,6 +12,7 @@ import type { Reasoning } from "openai/resources.mjs";
|
||||
import { log, isLoggingEnabled } from "./log.js";
|
||||
import { OPENAI_BASE_URL, OPENAI_TIMEOUT_MS } from "../config.js";
|
||||
import { parseToolCallArguments } from "../parsers.js";
|
||||
import { ensureSessionTracker } from "../session-cost.js";
|
||||
import {
|
||||
ORIGIN,
|
||||
CLI_VERSION,
|
||||
@@ -56,6 +58,13 @@ type AgentLoopParams = {
|
||||
onLastResponseId: (lastResponseId: string) => void;
|
||||
};
|
||||
|
||||
type Usage = {
|
||||
total_tokens?: number;
|
||||
input_tokens?: number;
|
||||
output_tokens?: number;
|
||||
};
|
||||
type MaybeUsageEvent = { response?: { usage?: Usage } };
|
||||
|
||||
export class AgentLoop {
|
||||
private model: string;
|
||||
private instructions?: string;
|
||||
@@ -235,7 +244,18 @@ export class AgentLoop {
|
||||
instructions: instructions ?? "",
|
||||
} as AppConfig);
|
||||
this.additionalWritableRoots = additionalWritableRoots;
|
||||
this.onItem = onItem;
|
||||
// Capture usage for cost‑tracking before delegating to the caller‑supplied
|
||||
// callback. Wrapping here avoids repeating the bookkeeping logic across
|
||||
// every UI surface.
|
||||
this.onItem = (item: ResponseItem) => {
|
||||
try {
|
||||
ensureSessionTracker(this.model).addItems([item]);
|
||||
} catch {
|
||||
/* best‑effort – never block user‑visible updates */
|
||||
}
|
||||
|
||||
onItem(item);
|
||||
};
|
||||
this.onLoading = onLoading;
|
||||
this.getCommandConfirmation = getCommandConfirmation;
|
||||
this.onLastResponseId = onLastResponseId;
|
||||
@@ -778,6 +798,21 @@ export class AgentLoop {
|
||||
}
|
||||
lastResponseId = event.response.id;
|
||||
this.onLastResponseId(event.response.id);
|
||||
|
||||
// Capture exact token usage for cost tracking when provided by
|
||||
// the API. `responses.completed` events include a `usage` field
|
||||
// with {input_tokens, output_tokens, total_tokens}. We record
|
||||
// the total (or fallback to summing the parts if needed).
|
||||
try {
|
||||
const usage = (event as MaybeUsageEvent).response?.usage;
|
||||
if (usage && typeof usage === "object") {
|
||||
ensureSessionTracker(this.model).addUsage(
|
||||
usage as unknown as UsageBreakdown,
|
||||
);
|
||||
}
|
||||
} catch {
|
||||
/* best‑effort only */
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (err: unknown) {
|
||||
|
||||
212
codex-cli/src/utils/estimate-cost.ts
Normal file
212
codex-cli/src/utils/estimate-cost.ts
Normal file
@@ -0,0 +1,212 @@
|
||||
/**
|
||||
* Cost‑estimation helpers for OpenAI responses.
|
||||
*
|
||||
* The implementation now distinguishes between *input*, *cached input* and
|
||||
* *output* tokens, reflecting OpenAI’s 2025‑04 pricing scheme. For models
|
||||
* where we only have a single blended rate we gracefully fall back to the
|
||||
* legacy logic so existing call‑sites continue to work.
|
||||
*/
|
||||
|
||||
import type { ResponseItem } from "openai/resources/responses/responses.mjs";
|
||||
|
||||
import { approximateTokensUsed } from "./approximate-tokens-used.js";
|
||||
|
||||
// ────────────────────────────────────────────────────────────────────────────
|
||||
// Pricing tables
|
||||
// ────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/** Breakdown of per‑token prices (in USD). */
|
||||
type TokenRates = {
|
||||
/** Price for *non‑cached* input prompt tokens. */
|
||||
input: number;
|
||||
/** Preferential price for *cached* input tokens. */
|
||||
cachedInput: number;
|
||||
/** Price for completion / output tokens. */
|
||||
output: number;
|
||||
};
|
||||
|
||||
/**
|
||||
* Pricing table (exact model name -> per‑token rates).
|
||||
* All keys must be lower‑case.
|
||||
*/
|
||||
const detailedPriceMap: Record<string, TokenRates> = {
|
||||
// –––––––––––––– OpenAI “o‑series” experimental ––––––––––––––
|
||||
"o3": {
|
||||
input: 10 / 1_000_000,
|
||||
cachedInput: 2.5 / 1_000_000,
|
||||
output: 40 / 1_000_000,
|
||||
},
|
||||
"o4-mini": {
|
||||
input: 1.1 / 1_000_000,
|
||||
cachedInput: 0.275 / 1_000_000,
|
||||
output: 4.4 / 1_000_000,
|
||||
},
|
||||
|
||||
// –––––––––––––– GPT‑4.1 family ––––––––––––––
|
||||
"gpt-4.1-nano": {
|
||||
input: 0.1 / 1_000_000,
|
||||
cachedInput: 0.025 / 1_000_000,
|
||||
output: 0.4 / 1_000_000,
|
||||
},
|
||||
"gpt-4.1-mini": {
|
||||
input: 0.4 / 1_000_000,
|
||||
cachedInput: 0.1 / 1_000_000,
|
||||
output: 1.6 / 1_000_000,
|
||||
},
|
||||
"gpt-4.1": {
|
||||
input: 2 / 1_000_000,
|
||||
cachedInput: 0.5 / 1_000_000,
|
||||
output: 8 / 1_000_000,
|
||||
},
|
||||
|
||||
// –––––––––––––– GPT‑4o family ––––––––––––––
|
||||
"gpt-4o-mini": {
|
||||
input: 0.6 / 1_000_000,
|
||||
cachedInput: 0.3 / 1_000_000,
|
||||
output: 2.4 / 1_000_000,
|
||||
},
|
||||
"gpt-4o": {
|
||||
input: 5 / 1_000_000,
|
||||
cachedInput: 2.5 / 1_000_000,
|
||||
output: 20 / 1_000_000,
|
||||
},
|
||||
};
|
||||
|
||||
/**
|
||||
* Legacy single‑rate pricing entries (per *thousand* tokens). These are kept
|
||||
* to provide sensible fall‑backs for models that do not yet expose a detailed
|
||||
* breakdown or where we have no published split pricing. The figures stem
|
||||
* from older OpenAI announcements and are only meant for *approximation* –
|
||||
* callers that rely on exact accounting should upgrade to models covered by
|
||||
* {@link detailedPriceMap}.
|
||||
*/
|
||||
const blendedPriceMap: Record<string, number> = {
|
||||
// GPT‑4 Turbo (Apr 2024)
|
||||
"gpt-4-turbo": 0.01,
|
||||
|
||||
// Legacy GPT‑4 8k / 32k context models
|
||||
"gpt-4": 0.03,
|
||||
|
||||
// GPT‑3.5‑Turbo family
|
||||
"gpt-3.5-turbo": 0.0005,
|
||||
|
||||
// Remaining preview variants (exact names)
|
||||
"gpt-4o-search-preview": 0.0025,
|
||||
"gpt-4o-mini-search-preview": 0.00015,
|
||||
"gpt-4o-realtime-preview": 0.005,
|
||||
"gpt-4o-audio-preview": 0.0025,
|
||||
"gpt-4o-mini-audio-preview": 0.00015,
|
||||
"gpt-4o-mini-realtime-preview": 0.0006,
|
||||
"gpt-4o-mini": 0.00015,
|
||||
|
||||
// Older experimental o‑series rates
|
||||
"o3-mini": 0.0011,
|
||||
"o1-mini": 0.0011,
|
||||
"o1-pro": 0.15,
|
||||
"o1": 0.015,
|
||||
|
||||
// Additional internal preview models
|
||||
"computer-use-preview": 0.003,
|
||||
};
|
||||
|
||||
// ────────────────────────────────────────────────────────────────────────────
|
||||
// Public helpers
|
||||
// ────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Return the per‑token input/cached/output rates for the supplied model, or
|
||||
* `null` when no detailed pricing is available.
|
||||
*/
|
||||
function normalize(model: string): string {
|
||||
// Lower‑case and strip date/version suffixes like “‑2025‑04‑14”.
|
||||
const lower = model.toLowerCase();
|
||||
const dateSuffix = /-\d{4}-\d{2}-\d{2}$/;
|
||||
return lower.replace(dateSuffix, "");
|
||||
}
|
||||
|
||||
export function priceRates(model: string): TokenRates | null {
|
||||
return detailedPriceMap[normalize(model)] ?? null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Fallback that returns a *single* blended per‑token rate when no detailed
|
||||
* split is available. This mirrors the behaviour of the pre‑2025 version so
|
||||
* that existing callers keep working unmodified.
|
||||
*/
|
||||
export function pricePerToken(model: string): number | null {
|
||||
// Prefer an *average* of the detailed rates when we have them – this avoids
|
||||
// surprises where callers mix `pricePerToken()` with the new detailed
|
||||
// helpers.
|
||||
const rates = priceRates(model);
|
||||
if (rates) {
|
||||
return (rates.input + rates.output) / 2; // simple average heuristic
|
||||
}
|
||||
|
||||
const entry = blendedPriceMap[normalize(model)];
|
||||
if (entry == null) {
|
||||
return null;
|
||||
}
|
||||
return entry / 1000;
|
||||
}
|
||||
|
||||
// ────────────────────────────────────────────────────────────────────────────
|
||||
// Cost estimation
|
||||
// ────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/** Shape of the `usage` object returned by OpenAI’s Responses API. */
|
||||
export type UsageBreakdown = {
|
||||
input_tokens?: number;
|
||||
input_tokens_details?: { cached_tokens?: number } | null;
|
||||
output_tokens?: number;
|
||||
total_tokens?: number;
|
||||
};
|
||||
|
||||
/**
|
||||
* Calculate the exact cost (in USD) for a single usage breakdown. Returns
|
||||
* `null` when the model is unknown.
|
||||
*/
|
||||
export function estimateCostFromUsage(
|
||||
usage: UsageBreakdown,
|
||||
model: string,
|
||||
): number | null {
|
||||
const rates = priceRates(model);
|
||||
if (!rates) {
|
||||
// fall back to blended pricing
|
||||
const per = pricePerToken(model);
|
||||
if (per == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const tokens =
|
||||
usage.total_tokens ??
|
||||
(usage.input_tokens ?? 0) + (usage.output_tokens ?? 0);
|
||||
return tokens * per;
|
||||
}
|
||||
|
||||
const input = usage.input_tokens ?? 0;
|
||||
const cached = usage.input_tokens_details?.cached_tokens ?? 0;
|
||||
const nonCachedInput = Math.max(0, input - cached);
|
||||
const output = usage.output_tokens ?? 0;
|
||||
|
||||
return (
|
||||
nonCachedInput * rates.input +
|
||||
cached * rates.cachedInput +
|
||||
output * rates.output
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Rough cost estimate (USD) for a series of {@link ResponseItem}s when using
|
||||
* the specified model. When no detailed usage object is available we fall
|
||||
* back to estimating token counts based on the message contents.
|
||||
*/
|
||||
export function estimateCostUSD(
|
||||
items: Array<ResponseItem>,
|
||||
model: string,
|
||||
): number | null {
|
||||
const per = pricePerToken(model);
|
||||
if (per == null) {
|
||||
return null;
|
||||
}
|
||||
return approximateTokensUsed(items) * per;
|
||||
}
|
||||
138
codex-cli/src/utils/session-cost.ts
Normal file
138
codex-cli/src/utils/session-cost.ts
Normal file
@@ -0,0 +1,138 @@
|
||||
import type { ResponseItem } from "openai/resources/responses/responses.mjs";
|
||||
|
||||
import { approximateTokensUsed } from "./approximate-tokens-used.js";
|
||||
import {
|
||||
estimateCostFromUsage,
|
||||
pricePerToken,
|
||||
type UsageBreakdown,
|
||||
} from "./estimate-cost.js";
|
||||
|
||||
/**
|
||||
* Simple accumulator for {@link ResponseItem}s that exposes aggregate token
|
||||
* and (approximate) dollar‑cost statistics for the current conversation.
|
||||
*/
|
||||
export class SessionCostTracker {
|
||||
private readonly model: string;
|
||||
private readonly items: Array<ResponseItem> = [];
|
||||
|
||||
private tokensUsedPrecise: number | null = null;
|
||||
|
||||
/**
|
||||
* Aggregated exact cost when we have detailed `usage` information from the
|
||||
* OpenAI API. Falls back to `null` when we only have the rough estimate
|
||||
* path available.
|
||||
*/
|
||||
private costPrecise: number | null = null;
|
||||
|
||||
constructor(model: string) {
|
||||
this.model = model;
|
||||
}
|
||||
|
||||
/** Append newly‑received items to the internal history. */
|
||||
addItems(items: Array<ResponseItem>): void {
|
||||
this.items.push(...items);
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a full usage breakdown as returned by the Responses API. This gives
|
||||
* us exact token counts and allows true‑to‑spec cost accounting that
|
||||
* factors in cached tokens.
|
||||
*/
|
||||
addUsage(usage: UsageBreakdown): void {
|
||||
const tokens =
|
||||
usage.total_tokens ??
|
||||
(usage.input_tokens ?? 0) + (usage.output_tokens ?? 0);
|
||||
|
||||
if (Number.isFinite(tokens) && tokens > 0) {
|
||||
this.tokensUsedPrecise = (this.tokensUsedPrecise ?? 0) + tokens;
|
||||
}
|
||||
|
||||
const cost = estimateCostFromUsage(usage, this.model);
|
||||
if (cost != null) {
|
||||
this.costPrecise = (this.costPrecise ?? 0) + cost;
|
||||
}
|
||||
}
|
||||
|
||||
/** Legacy helper for callers that only know the total token count. */
|
||||
addTokens(count: number): void {
|
||||
if (Number.isFinite(count) && count > 0) {
|
||||
this.tokensUsedPrecise = (this.tokensUsedPrecise ?? 0) + count;
|
||||
// We deliberately do *not* update costPrecise here – without a detailed
|
||||
// breakdown we cannot know whether tokens were input/output/cached. We
|
||||
// therefore fall back to the blended rate during `getCostUSD()`.
|
||||
}
|
||||
}
|
||||
|
||||
/** Approximate total token count so far. */
|
||||
getTokensUsed(): number {
|
||||
if (this.tokensUsedPrecise != null) {
|
||||
return this.tokensUsedPrecise;
|
||||
}
|
||||
return approximateTokensUsed(this.items);
|
||||
}
|
||||
|
||||
/** Best‑effort USD cost estimate. Returns `null` when the model is unknown. */
|
||||
getCostUSD(): number | null {
|
||||
if (this.costPrecise != null) {
|
||||
return this.costPrecise;
|
||||
}
|
||||
|
||||
const per = pricePerToken(this.model);
|
||||
if (per == null) {
|
||||
return null;
|
||||
}
|
||||
return this.getTokensUsed() * per;
|
||||
}
|
||||
|
||||
/**
|
||||
* Human‑readable one‑liner suitable for printing at session end (e.g. on
|
||||
* Ctrl‑C or `/clear`).
|
||||
*/
|
||||
summary(): string {
|
||||
const tokens = this.getTokensUsed();
|
||||
const cost = this.getCostUSD();
|
||||
if (cost == null) {
|
||||
return `Session complete – approx. ${tokens} tokens used.`;
|
||||
}
|
||||
return `Session complete – approx. ${tokens} tokens, $${cost.toFixed(
|
||||
4,
|
||||
)} USD.`;
|
||||
}
|
||||
}
|
||||
|
||||
// ────────────────────────────────────────────────────────────────────────────
|
||||
// Global helpers so disparate parts of the codebase can share a single
|
||||
// tracker instance without threading it through countless function calls.
|
||||
// ────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
let globalTracker: SessionCostTracker | null = null;
|
||||
|
||||
export function getSessionTracker(): SessionCostTracker | null {
|
||||
return globalTracker;
|
||||
}
|
||||
|
||||
export function ensureSessionTracker(model: string): SessionCostTracker {
|
||||
if (!globalTracker) {
|
||||
globalTracker = new SessionCostTracker(model);
|
||||
}
|
||||
return globalTracker;
|
||||
}
|
||||
|
||||
export function resetSessionTracker(): void {
|
||||
globalTracker = null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Convenience helper that prints the session summary (if any) and resets the
|
||||
* global tracker so that the next conversation starts with a clean slate.
|
||||
*/
|
||||
export function printAndResetSessionSummary(): void {
|
||||
if (!globalTracker) {
|
||||
return; // nothing to do
|
||||
}
|
||||
|
||||
// eslint-disable-next-line no-console -- explicit, user‑visible log
|
||||
console.log("\n" + globalTracker.summary() + "\n");
|
||||
|
||||
resetSessionTracker();
|
||||
}
|
||||
@@ -1,6 +1,9 @@
|
||||
import type { Instance } from "ink";
|
||||
import type React from "react";
|
||||
|
||||
// Cost‑tracking
|
||||
import { printAndResetSessionSummary } from "./session-cost.js";
|
||||
|
||||
let inkRenderer: Instance | null = null;
|
||||
|
||||
// Track whether the clean‑up routine has already executed so repeat calls are
|
||||
@@ -79,4 +82,12 @@ export function onExit(): void {
|
||||
/* best‑effort – continue even if Ink throws */
|
||||
}
|
||||
}
|
||||
|
||||
// Finally, print a brief token/cost summary for the session – best effort
|
||||
// only, errors are swallowed so that shutdown always succeeds.
|
||||
try {
|
||||
printAndResetSessionSummary();
|
||||
} catch {
|
||||
/* ignore */
|
||||
}
|
||||
}
|
||||
|
||||
28
codex-cli/tests/context-percent.test.ts
Normal file
28
codex-cli/tests/context-percent.test.ts
Normal file
@@ -0,0 +1,28 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
|
||||
import type { ResponseItem } from "openai/resources/responses/responses.mjs";
|
||||
|
||||
import { calculateContextPercentRemaining } from "../src/components/chat/terminal-chat-utils.js";
|
||||
|
||||
function makeUserMessage(id: string, text: string): ResponseItem {
|
||||
return {
|
||||
id,
|
||||
type: "message",
|
||||
role: "user",
|
||||
content: [{ type: "input_text", text }],
|
||||
} as ResponseItem;
|
||||
}
|
||||
|
||||
describe("calculateContextPercentRemaining", () => {
|
||||
it("includes extra context characters in calculation", () => {
|
||||
const msgText = "a".repeat(40); // 40 chars → 10 tokens
|
||||
const items = [makeUserMessage("1", msgText)];
|
||||
|
||||
const model = "gpt-4-16k";
|
||||
|
||||
const base = calculateContextPercentRemaining(items, model);
|
||||
const withExtra = calculateContextPercentRemaining(items, model, 8); // +8 chars → +2 tokens
|
||||
|
||||
expect(withExtra).toBeLessThan(base);
|
||||
});
|
||||
});
|
||||
69
codex-cli/tests/estimate-cost.test.ts
Normal file
69
codex-cli/tests/estimate-cost.test.ts
Normal file
@@ -0,0 +1,69 @@
|
||||
import { describe, expect, test } from "vitest";
|
||||
|
||||
import {
|
||||
estimateCostUSD,
|
||||
estimateCostFromUsage,
|
||||
} from "../src/utils/estimate-cost.js";
|
||||
import { SessionCostTracker } from "../src/utils/session-cost.js";
|
||||
import type { ResponseItem } from "openai/resources/responses/responses.mjs";
|
||||
|
||||
// Helper to craft a minimal ResponseItem for tests
|
||||
function makeMessage(
|
||||
id: string,
|
||||
role: "user" | "assistant",
|
||||
text: string,
|
||||
): ResponseItem {
|
||||
return {
|
||||
id,
|
||||
type: "message",
|
||||
role,
|
||||
content: [{ type: role === "user" ? "input_text" : "output_text", text }],
|
||||
} as ResponseItem;
|
||||
}
|
||||
|
||||
describe("estimateCostUSD", () => {
|
||||
test("returns a proportional, positive estimate for known models", () => {
|
||||
const items: Array<ResponseItem> = [
|
||||
makeMessage("1", "user", "hello world"),
|
||||
makeMessage("2", "assistant", "hi there"),
|
||||
];
|
||||
|
||||
const cost = estimateCostUSD(items, "gpt-3.5-turbo");
|
||||
expect(cost).not.toBeNull();
|
||||
expect(cost!).toBeGreaterThan(0);
|
||||
|
||||
// Adding another token should increase the estimate
|
||||
const cost2 = estimateCostUSD(
|
||||
items.concat([makeMessage("3", "user", "extra")]),
|
||||
"gpt-3.5-turbo",
|
||||
);
|
||||
expect(cost2!).toBeGreaterThan(cost!);
|
||||
});
|
||||
|
||||
test("cost calculation honours cached input token discount", () => {
|
||||
const usage = {
|
||||
input_tokens: 1000,
|
||||
input_tokens_details: { cached_tokens: 600 },
|
||||
output_tokens: 500,
|
||||
total_tokens: 1500,
|
||||
} as any; // simple literal structure for test
|
||||
|
||||
const cost = estimateCostFromUsage(usage, "gpt-4.1");
|
||||
|
||||
// Expected: (1000-600)*0.000002 + 600*0.0000005 + 500*0.000008
|
||||
const expected = 400 * 0.000002 + 600 * 0.0000005 + 500 * 0.000008;
|
||||
expect(cost).not.toBeNull();
|
||||
expect(cost!).toBeCloseTo(expected, 8);
|
||||
});
|
||||
});
|
||||
|
||||
describe("SessionCostTracker", () => {
|
||||
test("accumulates items and reports tokens & cost", () => {
|
||||
const tracker = new SessionCostTracker("gpt-3.5-turbo");
|
||||
tracker.addItems([makeMessage("1", "user", "foo")]);
|
||||
tracker.addItems([makeMessage("2", "assistant", "bar baz")]);
|
||||
|
||||
expect(tracker.getTokensUsed()).toBeGreaterThan(0);
|
||||
expect(tracker.getCostUSD()!).toBeGreaterThan(0);
|
||||
});
|
||||
});
|
||||
85
codex-cli/tests/session-cost.test.ts
Normal file
85
codex-cli/tests/session-cost.test.ts
Normal file
@@ -0,0 +1,85 @@
|
||||
import { afterEach, describe, expect, it, vi } from "vitest";
|
||||
|
||||
import type { ResponseItem } from "openai/resources/responses/responses.mjs";
|
||||
|
||||
import {
|
||||
ensureSessionTracker,
|
||||
getSessionTracker,
|
||||
printAndResetSessionSummary,
|
||||
} from "../src/utils/session-cost.js";
|
||||
|
||||
function makeMessage(
|
||||
id: string,
|
||||
role: "user" | "assistant",
|
||||
text: string,
|
||||
): ResponseItem {
|
||||
return {
|
||||
id,
|
||||
type: "message",
|
||||
role,
|
||||
content: [{ type: role === "user" ? "input_text" : "output_text", text }],
|
||||
} as ResponseItem;
|
||||
}
|
||||
|
||||
describe("printAndResetSessionSummary", () => {
|
||||
afterEach(() => {
|
||||
vi.restoreAllMocks();
|
||||
});
|
||||
|
||||
it("/clear resets tracker so successive conversations start fresh", () => {
|
||||
const spy = vi.spyOn(console, "log").mockImplementation(() => {});
|
||||
|
||||
const perSessionTokens: Array<number> = [];
|
||||
|
||||
for (let i = 1; i <= 3; i++) {
|
||||
const tracker = ensureSessionTracker("gpt-3.5-turbo");
|
||||
tracker.addTokens(i * 10); // 10, 20, 30
|
||||
perSessionTokens.push(tracker.getTokensUsed());
|
||||
|
||||
// Simulate user typing /clear which prints & resets
|
||||
printAndResetSessionSummary();
|
||||
|
||||
expect(getSessionTracker()).toBeNull();
|
||||
}
|
||||
|
||||
expect(perSessionTokens).toEqual([10, 20, 30]);
|
||||
|
||||
spy.mockRestore();
|
||||
});
|
||||
|
||||
it("prints a summary and resets the global tracker", () => {
|
||||
const spy = vi.spyOn(console, "log").mockImplementation(() => {});
|
||||
|
||||
const tracker = ensureSessionTracker("gpt-3.5-turbo");
|
||||
tracker.addItems([
|
||||
makeMessage("1", "user", "hello"),
|
||||
makeMessage("2", "assistant", "hi"),
|
||||
]);
|
||||
|
||||
printAndResetSessionSummary();
|
||||
|
||||
expect(spy).toHaveBeenCalled();
|
||||
expect(getSessionTracker()).toBeNull();
|
||||
});
|
||||
|
||||
it("prefers exact token counts added via addTokens() over heuristic", () => {
|
||||
const tracker = ensureSessionTracker("gpt-3.5-turbo");
|
||||
|
||||
// Add a long message (heuristic would count >1 token)
|
||||
tracker.addItems([
|
||||
makeMessage("x", "user", "a".repeat(400)), // ~100 tokens
|
||||
]);
|
||||
|
||||
const heuristicTokens = tracker.getTokensUsed();
|
||||
expect(heuristicTokens).toBeGreaterThan(50);
|
||||
|
||||
// Now inject an exact low token count and ensure it overrides
|
||||
tracker.addTokens(10);
|
||||
expect(tracker.getTokensUsed()).toBe(
|
||||
heuristicTokens + (10 - heuristicTokens),
|
||||
);
|
||||
|
||||
const cost = tracker.getCostUSD();
|
||||
expect(cost).not.toBeNull();
|
||||
});
|
||||
});
|
||||
Reference in New Issue
Block a user