Compare commits

...

1 Commits

Author SHA1 Message Date
Dax Raad
184b6d0117 tui: show token generation speed (tok/s) in assistant messages
Users can now see how fast the model generates tokens alongside the total response time, making it easier to compare model performance and identify slower responses.
2026-02-20 23:49:32 -05:00
4 changed files with 48 additions and 9 deletions

View File

@@ -1101,6 +1101,7 @@ export function Session() {
</Match>
<Match when={message.role === "assistant"}>
<AssistantMessage
index={index()}
last={lastAssistant()?.id === message.id}
message={message as AssistantMessage}
parts={sync.data.part[message.id] ?? []}
@@ -1269,7 +1270,7 @@ function UserMessage(props: {
)
}
function AssistantMessage(props: { message: AssistantMessage; parts: Part[]; last: boolean }) {
function AssistantMessage(props: { index: number; message: AssistantMessage; parts: Part[]; last: boolean }) {
const local = useLocal()
const { theme } = useTheme()
const sync = useSync()
@@ -1279,12 +1280,32 @@ function AssistantMessage(props: { message: AssistantMessage; parts: Part[]; las
return props.message.finish && !["tool-calls", "unknown"].includes(props.message.finish)
})
const duration = createMemo(() => {
if (!final()) return 0
if (!props.message.time.completed) return 0
const user = messages().find((x) => x.role === "user" && x.id === props.message.parentID)
if (!user || !user.time) return 0
return props.message.time.completed - user.time.created
const stats = createMemo(() => {
if (!final() || !props.message.time.completed) return null
const list = messages()
let tokens = 0
let active = 0
for (let i = props.index; i >= 0; i--) {
const msg = list[i]
if (msg.role === "assistant") {
tokens += msg.tokens?.output || 0
if (msg.time.started && msg.time.streamed) {
const delta = msg.time.streamed - msg.time.started
if (delta > 0) active += delta
}
}
if (msg.role === "user" && msg.id === props.message.parentID) {
if (!msg.time?.created) return null
const total = props.message.time.completed - msg.time.created
if (total <= 0 || active <= 0) return null
const tps = tokens > 0 ? tokens / (active / 1000) : null
return { total, tps }
}
}
return null
})
return (
@@ -1334,8 +1355,14 @@ function AssistantMessage(props: { message: AssistantMessage; parts: Part[]; las
</span>{" "}
<span style={{ fg: theme.text }}>{Locale.titlecase(props.message.mode)}</span>
<span style={{ fg: theme.textMuted }}> · {props.message.modelID}</span>
<Show when={duration()}>
<span style={{ fg: theme.textMuted }}> · {Locale.duration(duration())}</span>
<Show when={stats()}>
{(s) => (
<span style={{ fg: theme.textMuted }}>
{" "}
· {Locale.duration(s().total)}
<Show when={s().tps !== null}> · {s().tps!.toFixed(1)} tok/s</Show>
</span>
)}
</Show>
<Show when={props.message.error?.name === "MessageAbortedError"}>
<span style={{ fg: theme.textMuted }}> · interrupted</span>

View File

@@ -392,7 +392,9 @@ export namespace MessageV2 {
role: z.literal("assistant"),
time: z.object({
created: z.number(),
started: z.number().optional(),
completed: z.number().optional(),
streamed: z.number().optional(),
}),
error: z
.discriminatedUnion("name", [

View File

@@ -137,7 +137,9 @@ export namespace Message {
.object({
time: z.object({
created: z.number(),
started: z.number().optional(),
completed: z.number().optional(),
streamed: z.number().optional(),
}),
error: z
.discriminatedUnion("name", [AuthError.Schema, NamedError.Unknown.Schema, OutputLengthError.Schema])

View File

@@ -57,6 +57,10 @@ export namespace SessionProcessor {
switch (value.type) {
case "start":
SessionStatus.set(input.sessionID, { type: "busy" })
if (!input.assistantMessage.time.started) {
input.assistantMessage.time.started = Date.now()
await Session.updateMessage(input.assistantMessage)
}
break
case "reasoning-start":
@@ -337,6 +341,10 @@ export namespace SessionProcessor {
break
case "finish":
if (!input.assistantMessage.time.streamed) {
input.assistantMessage.time.streamed = Date.now()
await Session.updateMessage(input.assistantMessage)
}
break
default: