Apply PR #21822: refactor: improve compaction to retain recent conversation context

This commit is contained in:
opencode-agent[bot]
2026-04-10 23:27:34 +00:00
6 changed files with 610 additions and 16 deletions

View File

@@ -1,6 +1,7 @@
You are a helpful AI assistant tasked with summarizing conversations.
When asked to summarize, provide a detailed but concise summary of the conversation.
When asked to summarize, provide a detailed but concise summary of the older conversation history.
The most recent turns may be preserved verbatim outside your summary, so focus on information that would still be needed to continue the work with that recent context available.
Focus on information that would be helpful for continuing the conversation, including:
- What was done
- What is currently being worked on

View File

@@ -1074,6 +1074,18 @@ export namespace Config {
.object({
auto: z.boolean().optional().describe("Enable automatic compaction when context is full (default: true)"),
prune: z.boolean().optional().describe("Enable pruning of old tool outputs (default: true)"),
tail_turns: z
.number()
.int()
.min(0)
.optional()
.describe("Number of recent real user turns to keep verbatim during compaction (default: 2)"),
tail_tokens: z
.number()
.int()
.min(0)
.optional()
.describe("Token budget for retained recent turns during compaction"),
reserved: z
.number()
.int()

View File

@@ -4,6 +4,7 @@ import { Session } from "."
import { SessionID, MessageID, PartID } from "./schema"
import { Instance } from "../project/instance"
import { Provider } from "../provider/provider"
import { ProviderTransform } from "../provider/transform"
import { MessageV2 } from "./message-v2"
import z from "zod"
import { Token } from "../util/token"
@@ -35,6 +36,46 @@ export namespace SessionCompaction {
export const PRUNE_MINIMUM = 20_000
export const PRUNE_PROTECT = 40_000
const PRUNE_PROTECTED_TOOLS = ["skill"]
const DEFAULT_TAIL_TURNS = 2
const MIN_TAIL_TOKENS = 2_000
const MAX_TAIL_TOKENS = 8_000
type Turn = {
start: number
end: number
id: MessageID
}
function usable(input: { cfg: Config.Info; model: Provider.Model }) {
const reserved = input.cfg.compaction?.reserved ?? Math.min(20_000, ProviderTransform.maxOutputTokens(input.model))
return input.model.limit.input
? Math.max(0, input.model.limit.input - reserved)
: Math.max(0, input.model.limit.context - ProviderTransform.maxOutputTokens(input.model))
}
function tailBudget(input: { cfg: Config.Info; model: Provider.Model }) {
return (
input.cfg.compaction?.tail_tokens ??
Math.min(MAX_TAIL_TOKENS, Math.max(MIN_TAIL_TOKENS, Math.floor(usable(input) * 0.25)))
)
}
function turns(messages: MessageV2.WithParts[]) {
const result: Turn[] = []
for (let i = 0; i < messages.length; i++) {
const msg = messages[i]
if (msg.info.role !== "user") continue
if (msg.parts.some((part) => part.type === "compaction")) continue
result.push({
start: i,
end: messages.length,
id: msg.info.id,
})
}
for (let i = 0; i < result.length - 1; i++) {
result[i].end = result[i + 1].start
}
return result
}
export interface Interface {
readonly isOverflow: (input: {
@@ -88,11 +129,60 @@ export namespace SessionCompaction {
return overflow({ cfg: yield* config.get(), tokens: input.tokens, model: input.model })
})
const estimate = Effect.fn("SessionCompaction.estimate")(function* (input: {
messages: MessageV2.WithParts[]
model: Provider.Model
}) {
const msgs = yield* MessageV2.toModelMessagesEffect(input.messages, input.model, { stripMedia: true })
return Token.estimate(JSON.stringify(msgs))
})
const select = Effect.fn("SessionCompaction.select")(function* (input: {
messages: MessageV2.WithParts[]
cfg: Config.Info
model: Provider.Model
}) {
const limit = input.cfg.compaction?.tail_turns ?? DEFAULT_TAIL_TURNS
if (limit <= 0) return { head: input.messages, tail_start_id: undefined }
const budget = tailBudget({ cfg: input.cfg, model: input.model })
const all = turns(input.messages)
if (!all.length) return { head: input.messages, tail_start_id: undefined }
const recent = all.slice(-limit)
const sizes = yield* Effect.forEach(
recent,
(turn) =>
estimate({
messages: input.messages.slice(turn.start, turn.end),
model: input.model,
}),
{ concurrency: 1 },
)
if (sizes.at(-1)! > budget) {
log.info("tail fallback", { budget, size: sizes.at(-1) })
return { head: input.messages, tail_start_id: undefined }
}
let total = 0
let keep: Turn | undefined
for (let i = recent.length - 1; i >= 0; i--) {
const size = sizes[i]
if (total + size > budget) break
total += size
keep = recent[i]
}
if (!keep || keep.start === 0) return { head: input.messages, tail_start_id: undefined }
return {
head: input.messages.slice(0, keep.start),
tail_start_id: keep.id,
}
})
// goes backwards through parts until there are PRUNE_PROTECT tokens worth of tool
// calls, then erases output of older tool calls to free context space
const prune = Effect.fn("SessionCompaction.prune")(function* (input: { sessionID: SessionID }) {
const cfg = yield* config.get()
if (cfg.compaction?.prune === false) return
if (cfg.compaction?.prune !== true) return
log.info("pruning")
const msgs = yield* session
@@ -150,6 +240,7 @@ export namespace SessionCompaction {
throw new Error(`Compaction parent must be a user message: ${input.parentID}`)
}
const userMessage = parent.info
const compactionPart = parent.parts.find((part): part is MessageV2.CompactionPart => part.type === "compaction")
let messages = input.messages
let replay:
@@ -180,14 +271,22 @@ export namespace SessionCompaction {
const model = agent.model
? yield* provider.getModel(agent.model.providerID, agent.model.modelID)
: yield* provider.getModel(userMessage.model.providerID, userMessage.model.modelID)
const cfg = yield* config.get()
const history = compactionPart && messages.at(-1)?.info.id === input.parentID ? messages.slice(0, -1) : messages
const selected = yield* select({
messages: history,
cfg,
model,
})
// Allow plugins to inject context or replace compaction prompt.
const compacting = yield* plugin.trigger(
"experimental.session.compacting",
{ sessionID: input.sessionID },
{ context: [], prompt: undefined },
)
const defaultPrompt = `Provide a detailed prompt for continuing our conversation above.
Focus on information that would be helpful for continuing the conversation, including what we did, what we're doing, which files we're working on, and what we're going to do next.
const defaultPrompt = `Summarize the older conversation history so another agent can continue the work with the retained recent turns.
The most recent conversation turns will remain verbatim outside this summary, so focus on older context that is still needed to understand and continue the work.
Include what we did, what we're doing, which files we're working on, and what we're going to do next.
The summary that you construct will be used so that another agent can read it and continue the work.
Do not call any tools. Respond only with the summary text.
Respond in the same language as the user's messages in the conversation.
@@ -217,7 +316,7 @@ When constructing the summary, try to stick to this template:
---`
const prompt = compacting.prompt ?? [defaultPrompt, ...compacting.context].join("\n\n")
const msgs = structuredClone(messages)
const msgs = structuredClone(selected.head)
yield* plugin.trigger("experimental.chat.messages.transform", {}, { messages: msgs })
const modelMessages = yield* MessageV2.toModelMessagesEffect(msgs, model, { stripMedia: true })
const ctx = yield* InstanceState.context
@@ -280,6 +379,13 @@ When constructing the summary, try to stick to this template:
return "stop"
}
if (compactionPart && selected.tail_start_id && compactionPart.tail_start_id !== selected.tail_start_id) {
yield* session.updatePart({
...compactionPart,
tail_start_id: selected.tail_start_id,
})
}
if (result === "continue" && input.auto) {
if (replay) {
const original = replay.info

View File

@@ -208,6 +208,7 @@ export namespace MessageV2 {
type: z.literal("compaction"),
auto: z.boolean(),
overflow: z.boolean().optional(),
tail_start_id: MessageID.zod.optional(),
}).meta({
ref: "CompactionPart",
})
@@ -926,14 +927,21 @@ export namespace MessageV2 {
export function filterCompacted(msgs: Iterable<MessageV2.WithParts>) {
const result = [] as MessageV2.WithParts[]
const completed = new Set<string>()
let retain: MessageID | undefined
for (const msg of msgs) {
result.push(msg)
if (
msg.info.role === "user" &&
completed.has(msg.info.id) &&
msg.parts.some((part) => part.type === "compaction")
)
break
if (retain) {
if (msg.info.id === retain) break
continue
}
if (msg.info.role === "user" && completed.has(msg.info.id)) {
const part = msg.parts.find((item): item is MessageV2.CompactionPart => item.type === "compaction")
if (!part) continue
if (!part.tail_start_id) break
retain = part.tail_start_id
if (msg.info.id === retain) break
continue
}
if (msg.info.role === "assistant" && msg.info.summary && msg.info.finish && !msg.info.error)
completed.add(msg.info.parentID)
}

View File

@@ -154,7 +154,19 @@ function layer(result: "continue" | "compact") {
)
}
function runtime(result: "continue" | "compact", plugin = Plugin.defaultLayer, provider = ProviderTest.fake()) {
function cfg(compaction?: Config.Info["compaction"]) {
const base = Config.Info.parse({})
return Layer.mock(Config.Service)({
get: () => Effect.succeed({ ...base, compaction }),
})
}
function runtime(
result: "continue" | "compact",
plugin = Plugin.defaultLayer,
provider = ProviderTest.fake(),
config = Config.defaultLayer,
) {
const bus = Bus.layer
return ManagedRuntime.make(
Layer.mergeAll(SessionCompaction.layer, bus).pipe(
@@ -164,7 +176,7 @@ function runtime(result: "continue" | "compact", plugin = Plugin.defaultLayer, p
Layer.provide(Agent.defaultLayer),
Layer.provide(plugin),
Layer.provide(bus),
Layer.provide(Config.defaultLayer),
Layer.provide(config),
),
)
}
@@ -191,7 +203,7 @@ function llm() {
}
}
function liveRuntime(layer: Layer.Layer<LLM.Service>, provider = ProviderTest.fake()) {
function liveRuntime(layer: Layer.Layer<LLM.Service>, provider = ProviderTest.fake(), config = Config.defaultLayer) {
const bus = Bus.layer
const status = SessionStatus.layer.pipe(Layer.provide(bus))
const processor = SessionProcessorModule.SessionProcessor.layer
@@ -206,11 +218,66 @@ function liveRuntime(layer: Layer.Layer<LLM.Service>, provider = ProviderTest.fa
Layer.provide(Plugin.defaultLayer),
Layer.provide(status),
Layer.provide(bus),
Layer.provide(Config.defaultLayer),
Layer.provide(config),
),
)
}
function reply(
text: string,
capture?: (input: LLM.StreamInput) => void,
): (input: LLM.StreamInput) => Stream.Stream<LLM.Event, unknown> {
return (input) => {
capture?.(input)
return Stream.make(
{ type: "start" } satisfies LLM.Event,
{ type: "text-start", id: "txt-0" } satisfies LLM.Event,
{ type: "text-delta", id: "txt-0", delta: text, text } as LLM.Event,
{ type: "text-end", id: "txt-0" } satisfies LLM.Event,
{
type: "finish-step",
finishReason: "stop",
rawFinishReason: "stop",
response: { id: "res", modelId: "test-model", timestamp: new Date() },
providerMetadata: undefined,
usage: {
inputTokens: 1,
outputTokens: 1,
totalTokens: 2,
inputTokenDetails: {
noCacheTokens: undefined,
cacheReadTokens: undefined,
cacheWriteTokens: undefined,
},
outputTokenDetails: {
textTokens: undefined,
reasoningTokens: undefined,
},
},
} satisfies LLM.Event,
{
type: "finish",
finishReason: "stop",
rawFinishReason: "stop",
totalUsage: {
inputTokens: 1,
outputTokens: 1,
totalTokens: 2,
inputTokenDetails: {
noCacheTokens: undefined,
cacheReadTokens: undefined,
cacheWriteTokens: undefined,
},
outputTokenDetails: {
textTokens: undefined,
reasoningTokens: undefined,
},
},
} satisfies LLM.Event,
)
}
}
function wait(ms = 50) {
return new Promise((resolve) => setTimeout(resolve, ms))
}
@@ -661,6 +728,148 @@ describe("session.compaction.process", () => {
})
})
test("persists tail_start_id for retained recent turns", async () => {
await using tmp = await tmpdir()
await Instance.provide({
directory: tmp.path,
fn: async () => {
const session = await Session.create({})
await user(session.id, "first")
const keep = await user(session.id, "second")
await user(session.id, "third")
await SessionCompaction.create({
sessionID: session.id,
agent: "build",
model: ref,
auto: false,
})
const rt = runtime("continue", Plugin.defaultLayer, wide(), cfg({ tail_turns: 2, tail_tokens: 10_000 }))
try {
const msgs = await Session.messages({ sessionID: session.id })
const parent = msgs.at(-1)?.info.id
expect(parent).toBeTruthy()
await rt.runPromise(
SessionCompaction.Service.use((svc) =>
svc.process({
parentID: parent!,
messages: msgs,
sessionID: session.id,
auto: false,
}),
),
)
const part = (await Session.messages({ sessionID: session.id })).at(-2)?.parts.find(
(item) => item.type === "compaction",
)
expect(part?.type).toBe("compaction")
if (part?.type === "compaction") expect(part.tail_start_id).toBe(keep.id)
} finally {
await rt.dispose()
}
},
})
})
test("shrinks retained tail to fit tail token budget", async () => {
await using tmp = await tmpdir()
await Instance.provide({
directory: tmp.path,
fn: async () => {
const session = await Session.create({})
await user(session.id, "first")
await user(session.id, "x".repeat(2_000))
const keep = await user(session.id, "tiny")
await SessionCompaction.create({
sessionID: session.id,
agent: "build",
model: ref,
auto: false,
})
const rt = runtime("continue", Plugin.defaultLayer, wide(), cfg({ tail_turns: 2, tail_tokens: 100 }))
try {
const msgs = await Session.messages({ sessionID: session.id })
const parent = msgs.at(-1)?.info.id
expect(parent).toBeTruthy()
await rt.runPromise(
SessionCompaction.Service.use((svc) =>
svc.process({
parentID: parent!,
messages: msgs,
sessionID: session.id,
auto: false,
}),
),
)
const part = (await Session.messages({ sessionID: session.id })).at(-2)?.parts.find(
(item) => item.type === "compaction",
)
expect(part?.type).toBe("compaction")
if (part?.type === "compaction") expect(part.tail_start_id).toBe(keep.id)
} finally {
await rt.dispose()
}
},
})
})
test("falls back to full summary when even one recent turn exceeds tail budget", async () => {
await using tmp = await tmpdir({ git: true })
const stub = llm()
let captured = ""
stub.push(
reply("summary", (input) => {
captured = JSON.stringify(input.messages)
}),
)
await Instance.provide({
directory: tmp.path,
fn: async () => {
const session = await Session.create({})
await user(session.id, "first")
await user(session.id, "y".repeat(2_000))
await SessionCompaction.create({
sessionID: session.id,
agent: "build",
model: ref,
auto: false,
})
const rt = liveRuntime(stub.layer, wide(), cfg({ tail_turns: 1, tail_tokens: 20 }))
try {
const msgs = await Session.messages({ sessionID: session.id })
const parent = msgs.at(-1)?.info.id
expect(parent).toBeTruthy()
await rt.runPromise(
SessionCompaction.Service.use((svc) =>
svc.process({
parentID: parent!,
messages: msgs,
sessionID: session.id,
auto: false,
}),
),
)
const part = (await Session.messages({ sessionID: session.id })).at(-2)?.parts.find(
(item) => item.type === "compaction",
)
expect(part?.type).toBe("compaction")
if (part?.type === "compaction") expect(part.tail_start_id).toBeUndefined()
expect(captured).toContain("yyyy")
} finally {
await rt.dispose()
}
},
})
})
test("replays the prior user turn on overflow when earlier context exists", async () => {
await using tmp = await tmpdir()
await Instance.provide({
@@ -978,6 +1187,130 @@ describe("session.compaction.process", () => {
},
})
})
test("summarizes only the head while keeping recent tail out of summary input", async () => {
const stub = llm()
let captured = ""
stub.push(
reply("summary", (input) => {
captured = JSON.stringify(input.messages)
}),
)
await using tmp = await tmpdir({ git: true })
await Instance.provide({
directory: tmp.path,
fn: async () => {
const session = await Session.create({})
await user(session.id, "older context")
await user(session.id, "keep this turn")
await user(session.id, "and this one too")
await SessionCompaction.create({
sessionID: session.id,
agent: "build",
model: ref,
auto: false,
})
const rt = liveRuntime(stub.layer, wide())
try {
const msgs = await Session.messages({ sessionID: session.id })
const parent = msgs.at(-1)?.info.id
expect(parent).toBeTruthy()
await rt.runPromise(
SessionCompaction.Service.use((svc) =>
svc.process({
parentID: parent!,
messages: msgs,
sessionID: session.id,
auto: false,
}),
),
)
expect(captured).toContain("older context")
expect(captured).not.toContain("keep this turn")
expect(captured).not.toContain("and this one too")
expect(captured).not.toContain("What did we do so far?")
} finally {
await rt.dispose()
}
},
})
})
test("keeps recent pre-compaction turns across repeated compactions", async () => {
const stub = llm()
stub.push(reply("summary one"))
stub.push(reply("summary two"))
await using tmp = await tmpdir()
await Instance.provide({
directory: tmp.path,
fn: async () => {
const session = await Session.create({})
const u1 = await user(session.id, "one")
const u2 = await user(session.id, "two")
const u3 = await user(session.id, "three")
await SessionCompaction.create({
sessionID: session.id,
agent: "build",
model: ref,
auto: false,
})
const rt = liveRuntime(stub.layer, wide(), cfg({ tail_turns: 2, tail_tokens: 10_000 }))
try {
let msgs = await Session.messages({ sessionID: session.id })
let parent = msgs.at(-1)?.info.id
expect(parent).toBeTruthy()
await rt.runPromise(
SessionCompaction.Service.use((svc) =>
svc.process({
parentID: parent!,
messages: msgs,
sessionID: session.id,
auto: false,
}),
),
)
const u4 = await user(session.id, "four")
await SessionCompaction.create({
sessionID: session.id,
agent: "build",
model: ref,
auto: false,
})
msgs = MessageV2.filterCompacted(MessageV2.stream(session.id))
parent = msgs.at(-1)?.info.id
expect(parent).toBeTruthy()
await rt.runPromise(
SessionCompaction.Service.use((svc) =>
svc.process({
parentID: parent!,
messages: msgs,
sessionID: session.id,
auto: false,
}),
),
)
const filtered = MessageV2.filterCompacted(MessageV2.stream(session.id))
const ids = filtered.map((msg) => msg.info.id)
expect(ids).not.toContain(u1.id)
expect(ids).not.toContain(u2.id)
expect(ids).toContain(u3.id)
expect(ids).toContain(u4.id)
expect(filtered.some((msg) => msg.info.role === "assistant" && msg.info.summary)).toBe(true)
expect(filtered.some((msg) => msg.info.role === "user" && msg.parts.some((part) => part.type === "compaction"))).toBe(true)
} finally {
await rt.dispose()
}
},
})
})
})
describe("util.token.estimate", () => {

View File

@@ -86,13 +86,14 @@ async function addAssistant(
return id
}
async function addCompactionPart(sessionID: SessionID, messageID: MessageID) {
async function addCompactionPart(sessionID: SessionID, messageID: MessageID, tailStartID?: MessageID) {
await Session.updatePart({
id: PartID.ascending(),
sessionID,
messageID,
type: "compaction",
auto: true,
tail_start_id: tailStartID,
} as any)
}
@@ -759,6 +760,139 @@ describe("MessageV2.filterCompacted", () => {
})
})
test("retains original tail when compaction stores tail_start_id", async () => {
await Instance.provide({
directory: root,
fn: async () => {
const session = await Session.create({})
const u1 = await addUser(session.id, "first")
const a1 = await addAssistant(session.id, u1, { finish: "end_turn" })
await Session.updatePart({
id: PartID.ascending(),
sessionID: session.id,
messageID: a1,
type: "text",
text: "first reply",
})
const u2 = await addUser(session.id, "second")
const a2 = await addAssistant(session.id, u2, { finish: "end_turn" })
await Session.updatePart({
id: PartID.ascending(),
sessionID: session.id,
messageID: a2,
type: "text",
text: "second reply",
})
const c1 = await addUser(session.id)
await addCompactionPart(session.id, c1, u2)
const s1 = await addAssistant(session.id, c1, { summary: true, finish: "end_turn" })
await Session.updatePart({
id: PartID.ascending(),
sessionID: session.id,
messageID: s1,
type: "text",
text: "summary",
})
const u3 = await addUser(session.id, "third")
const a3 = await addAssistant(session.id, u3, { finish: "end_turn" })
await Session.updatePart({
id: PartID.ascending(),
sessionID: session.id,
messageID: a3,
type: "text",
text: "third reply",
})
const result = MessageV2.filterCompacted(MessageV2.stream(session.id))
expect(result.map((item) => item.info.id)).toEqual([u2, a2, c1, s1, u3, a3])
await Session.remove(session.id)
},
})
})
test("prefers latest compaction boundary when repeated compactions exist", async () => {
await Instance.provide({
directory: root,
fn: async () => {
const session = await Session.create({})
const u1 = await addUser(session.id, "first")
const a1 = await addAssistant(session.id, u1, { finish: "end_turn" })
await Session.updatePart({
id: PartID.ascending(),
sessionID: session.id,
messageID: a1,
type: "text",
text: "first reply",
})
const u2 = await addUser(session.id, "second")
const a2 = await addAssistant(session.id, u2, { finish: "end_turn" })
await Session.updatePart({
id: PartID.ascending(),
sessionID: session.id,
messageID: a2,
type: "text",
text: "second reply",
})
const c1 = await addUser(session.id)
await addCompactionPart(session.id, c1, u2)
const s1 = await addAssistant(session.id, c1, { summary: true, finish: "end_turn" })
await Session.updatePart({
id: PartID.ascending(),
sessionID: session.id,
messageID: s1,
type: "text",
text: "summary one",
})
const u3 = await addUser(session.id, "third")
const a3 = await addAssistant(session.id, u3, { finish: "end_turn" })
await Session.updatePart({
id: PartID.ascending(),
sessionID: session.id,
messageID: a3,
type: "text",
text: "third reply",
})
const c2 = await addUser(session.id)
await addCompactionPart(session.id, c2, u3)
const s2 = await addAssistant(session.id, c2, { summary: true, finish: "end_turn" })
await Session.updatePart({
id: PartID.ascending(),
sessionID: session.id,
messageID: s2,
type: "text",
text: "summary two",
})
const u4 = await addUser(session.id, "fourth")
const a4 = await addAssistant(session.id, u4, { finish: "end_turn" })
await Session.updatePart({
id: PartID.ascending(),
sessionID: session.id,
messageID: a4,
type: "text",
text: "fourth reply",
})
const result = MessageV2.filterCompacted(MessageV2.stream(session.id))
expect(result.map((item) => item.info.id)).toEqual([u3, a3, c2, s2, u4, a4])
await Session.remove(session.id)
},
})
})
test("works with array input", () => {
// filterCompacted accepts any Iterable, not just generators
const id = MessageID.ascending()