late append

This commit is contained in:
Your Name
2026-05-20 16:37:22 +00:00
parent 89cc0fc678
commit 604b2dcb6d
5 changed files with 233 additions and 43 deletions

View File

@@ -125,4 +125,27 @@ describe('ContextManager', () => {
);
expect(passedNodeIds.has(passedNodes[0].id)).toBe(true);
});
it('renderHistory should exclude pendingRequest from the result (late binding)', async () => {
const contextManager = new ContextManager(
mockSidecar,
mockEnv,
mockTracer,
mockOrchestrator,
mockChatHistory,
mockAdvancedTokenCalculator,
);
const pendingRequest: HistoryTurn = {
id: 'pending-turn-1',
content: { role: 'user', parts: [{ text: 'Active prompt' }] },
};
const { history, apiHistory } =
await contextManager.renderHistory(pendingRequest);
// Should be empty because mockChatHistory has no historical turns
expect(history).toHaveLength(0);
expect(apiHistory).toHaveLength(0);
});
});

View File

@@ -44,6 +44,7 @@ export class ContextManager {
result: {
history: HistoryTurn[];
apiHistory: Content[];
pendingApiHistory: Content[];
didApplyManagement: boolean;
baseUnits: number;
processedNodes: readonly ConcreteNode[];
@@ -294,6 +295,7 @@ export class ContextManager {
): Promise<{
history: HistoryTurn[];
apiHistory: Content[];
pendingApiHistory: Content[];
didApplyManagement: boolean;
baseUnits: number;
processedNodes: readonly ConcreteNode[];
@@ -373,12 +375,16 @@ export class ContextManager {
this.tracer,
this.env,
this.advancedTokenCalculator,
protectionReasons,
header,
{
protectionReasons,
header,
lateBindPrompt: !!pendingRequest,
},
);
const {
history: renderedHistory,
pendingHistory,
didApplyManagement,
baseUnits,
processedNodes,
@@ -400,11 +406,19 @@ export class ContextManager {
this.tracer.logEvent('ContextManager', 'Finished rendering');
const hardenedHistory = hardenHistory([...renderedHistory], {
const allHistory = [...renderedHistory, ...pendingHistory];
const hardenedAllHistory = hardenHistory(allHistory, {
sentinels: this.sidecar.sentinels,
});
const apiHistory = hardenedHistory.map((h) => h.content);
const apiHistory = hardenedAllHistory
.slice(0, renderedHistory.length)
.map((h) => h.content);
const pendingApiHistory = hardenedAllHistory
.slice(renderedHistory.length)
.map((h) => h.content);
if (header) {
apiHistory.unshift(header);
}
@@ -412,6 +426,7 @@ export class ContextManager {
const result = {
history: renderedHistory,
apiHistory,
pendingApiHistory,
didApplyManagement,
baseUnits,
processedNodes,

View File

@@ -69,8 +69,10 @@ describe('render', () => {
tracer,
env,
mockAdvancedTokenCalculator as unknown as AdvancedTokenCalculator,
new Map(),
undefined,
{
protectionReasons: new Map(),
header: undefined,
},
);
expect(result.history).toEqual([
@@ -172,8 +174,10 @@ describe('render', () => {
tracer,
env,
mockAdvancedTokenCalculator as unknown as AdvancedTokenCalculator,
new Map(),
undefined,
{
protectionReasons: new Map(),
header: undefined,
},
);
// eslint-disable-next-line @typescript-eslint/no-explicit-any
@@ -269,8 +273,10 @@ describe('render', () => {
tracer,
env,
mockAdvancedTokenCalculator as unknown as AdvancedTokenCalculator,
new Map(),
undefined,
{
protectionReasons: new Map(),
header: undefined,
},
);
// eslint-disable-next-line @typescript-eslint/no-explicit-any
@@ -279,4 +285,66 @@ describe('render', () => {
expect(surviving).toEqual(['B', 'C']); // A is dropped
expect(result.baseUnits).toBe(160000);
});
it('should exclude the last turn when lateBindPrompt is true', async () => {
const mockNodes: ConcreteNode[] = [
{
id: '1',
type: NodeType.USER_PROMPT,
turnId: 'turn-1',
payload: {} as Part,
} as unknown as ConcreteNode,
{
id: '2',
type: NodeType.AGENT_THOUGHT,
turnId: 'turn-2',
payload: {} as Part,
} as unknown as ConcreteNode,
];
const orchestrator = {
executeTriggerSync: vi.fn(async (trigger, nodes) => nodes),
} as unknown as PipelineOrchestrator;
const sidecar = { config: {} } as ContextProfile; // No budget
const mockAdvancedTokenCalculator = {
calculateTokensAndBaseUnits: vi.fn().mockReturnValue({
tokens: 100,
baseUnits: 100,
}),
getRawBaseUnits: vi.fn().mockReturnValue(50),
calculateConcreteListTokens: vi.fn().mockReturnValue(100),
getRawBaseUnitsForContent: vi.fn().mockReturnValue(0),
};
const env = {
tokenCalculator: {
calculateConcreteListTokens: vi.fn().mockReturnValue(100),
calculateTokenBreakdown: vi.fn().mockReturnValue({}),
},
graphMapper: {
fromGraph: vi.fn((nodes: readonly ConcreteNode[]) =>
nodes.map((n) => ({ text: n.id })),
),
},
} as unknown as ContextEnvironment;
const tracer = {
logEvent: vi.fn(),
} as unknown as ContextTracer;
const result = await render(
mockNodes,
orchestrator,
sidecar,
tracer,
env,
mockAdvancedTokenCalculator as unknown as AdvancedTokenCalculator,
{
lateBindPrompt: true,
},
);
expect(result.history).toEqual([{ text: '1' }]); // Turn 2 (node 2) is excluded
expect(result.pendingHistory).toEqual([{ text: '2' }]); // Turn 2 is included here
expect(result.baseUnits).toBe(50);
});
});

View File

@@ -15,6 +15,17 @@ import { performCalibration } from '../utils/tokenCalibration.js';
import type { AdvancedTokenCalculator } from '../utils/contextTokenCalculator.js';
import type { HistoryTurn } from '../../core/agentChatHistory.js';
export interface RenderOptions {
protectionReasons?: Map<string, string>;
header?: Content;
/**
* If true, the most recent turn in the graph will not be considered for
* consolidation (snapshots) or included in the returned history.
* This is used for "late-binding" the prompt.
*/
lateBindPrompt?: boolean;
}
/**
* Maps the Episodic Context Graph back into a list of HistoryTurns for transmission.
* It applies synchronous context management (GC backstop) if the budget is exceeded.
@@ -26,14 +37,15 @@ export async function render(
tracer: ContextTracer,
env: ContextEnvironment,
advancedTokenCalculator: AdvancedTokenCalculator,
protectionReasons: Map<string, string> = new Map(),
header?: Content,
options: RenderOptions = {},
): Promise<{
history: HistoryTurn[];
pendingHistory: HistoryTurn[];
didApplyManagement: boolean;
baseUnits: number;
processedNodes: readonly ConcreteNode[];
}> {
const { protectionReasons = new Map(), header, lateBindPrompt } = options;
let headerTokens = 0;
let headerBaseUnits = 0;
if (header) {
@@ -43,18 +55,36 @@ export async function render(
headerBaseUnits = costs.baseUnits;
}
const lastTurnId = nodes[nodes.length - 1]?.turnId;
if (!sidecar.config.budget) {
const contents = env.graphMapper.fromGraph(nodes);
const allVisibleNodes = nodes;
const managedNodes =
lateBindPrompt && lastTurnId
? allVisibleNodes.filter((n) => n.turnId !== lastTurnId)
: allVisibleNodes;
const pendingNodes =
lateBindPrompt && lastTurnId
? allVisibleNodes.filter((n) => n.turnId === lastTurnId)
: [];
const history = env.graphMapper.fromGraph(managedNodes);
const pendingHistory = env.graphMapper.fromGraph(pendingNodes);
tracer.logEvent('Render', 'Render Context to LLM (No Budget)', {
renderedContext: contents,
renderedContext: history,
pendingContext: pendingHistory,
});
// In all cases, retrieve raw base units from the token calculator interface
const baseUnits =
advancedTokenCalculator.getRawBaseUnits(nodes) + headerBaseUnits;
advancedTokenCalculator.getRawBaseUnits(allVisibleNodes) +
headerBaseUnits;
return {
history: contents,
history,
pendingHistory,
didApplyManagement: false,
baseUnits,
processedNodes: nodes,
@@ -63,7 +93,7 @@ export async function render(
const maxTokens = sidecar.config.budget.maxTokens;
const { tokens: graphTokens, baseUnits: graphBaseUnits } =
const { tokens: graphTokens } =
advancedTokenCalculator.calculateTokensAndBaseUnits(nodes);
const currentTokens = graphTokens + headerTokens;
@@ -93,19 +123,39 @@ export async function render(
'Render',
`View is within maxTokens (${currentTokens} <= ${maxTokens}). Returning view.`,
);
const contents = env.graphMapper.fromGraph(nodes);
const allVisibleNodes = nodes;
const managedNodes =
lateBindPrompt && lastTurnId
? allVisibleNodes.filter((n) => n.turnId !== lastTurnId)
: allVisibleNodes;
const pendingNodes =
lateBindPrompt && lastTurnId
? allVisibleNodes.filter((n) => n.turnId === lastTurnId)
: [];
const history = env.graphMapper.fromGraph(managedNodes);
const pendingHistory = env.graphMapper.fromGraph(pendingNodes);
tracer.logEvent('Render', 'Render Context for LLM', {
renderedContext: contents,
renderedContext: history,
pendingContext: pendingHistory,
});
performCalibration(
env,
nodes,
contents.map((h) => h.content),
);
performCalibration(env, allVisibleNodes, [
...history.map((h) => h.content),
...pendingHistory.map((h) => h.content),
]);
return {
history: contents,
history,
pendingHistory,
didApplyManagement: false,
baseUnits: graphBaseUnits + headerBaseUnits,
baseUnits:
advancedTokenCalculator.getRawBaseUnits(allVisibleNodes) +
headerBaseUnits,
processedNodes: nodes,
};
}
@@ -119,22 +169,27 @@ export async function render(
`Context Manager Synchronous Barrier triggered: View at ${currentTokens} tokens (limit: ${maxTokens}).`,
);
// Calculate exactly which nodes aged out of the retainedTokens budget to form our target delta
const agedOutNodes = new Set<string>();
let rollingTokens = 0;
// Start from newest and count backwards
for (let i = nodes.length - 1; i >= 0; i--) {
const node = nodes[i];
const priorTokens = rollingTokens;
const nodeTokens = env.tokenCalculator.calculateConcreteListTokens([node]);
rollingTokens += nodeTokens;
// Loose Boundary Policy: Keep the node that crosses the boundary
if (priorTokens > sidecar.config.budget.retainedTokens) {
agedOutNodes.add(node.id);
}
}
if (lateBindPrompt && lastTurnId) {
for (const node of nodes) {
if (node.turnId === lastTurnId) {
agedOutNodes.delete(node.id);
}
}
}
const processedNodes = await orchestrator.executeTriggerSync(
'gc_backstop',
nodes,
@@ -142,7 +197,6 @@ export async function render(
protectedIds,
);
// Apply skipList logic to abstract over summarized nodes
const skipList = new Set<string>();
for (const node of processedNodes) {
if (node.abstractsIds) {
@@ -150,27 +204,43 @@ export async function render(
}
}
const visibleNodes = processedNodes.filter((n) => !skipList.has(n.id));
const allVisibleNodes = processedNodes.filter((n) => !skipList.has(n.id));
const managedNodes =
lateBindPrompt && lastTurnId
? allVisibleNodes.filter((n) => n.turnId !== lastTurnId)
: allVisibleNodes;
const pendingNodes =
lateBindPrompt && lastTurnId
? allVisibleNodes.filter((n) => n.turnId === lastTurnId)
: [];
const history = env.graphMapper.fromGraph(managedNodes);
const pendingHistory = env.graphMapper.fromGraph(pendingNodes);
const contents = env.graphMapper.fromGraph(visibleNodes);
const finalTokens =
advancedTokenCalculator.calculateConcreteListTokens(visibleNodes);
advancedTokenCalculator.calculateConcreteListTokens(allVisibleNodes);
tracer.logEvent('Render', 'Render Sanitized Context for LLM', {
renderedContextSanitized: contents,
renderedContextSanitized: history,
pendingContextSanitized: pendingHistory,
});
debugLogger.log(
`Context Manager finished. Final actual token count: ${finalTokens}.`,
);
performCalibration(
env,
visibleNodes,
contents.map((h) => h.content),
);
performCalibration(env, allVisibleNodes, [
...history.map((h) => h.content),
...pendingHistory.map((h) => h.content),
]);
return {
history: contents,
history,
pendingHistory,
didApplyManagement: true,
baseUnits:
advancedTokenCalculator.getRawBaseUnits(visibleNodes) + headerBaseUnits,
advancedTokenCalculator.getRawBaseUnits(allVisibleNodes) +
headerBaseUnits,
processedNodes,
};
}

View File

@@ -654,6 +654,7 @@ export class GeminiClient {
const {
history: newHistory,
apiHistory,
pendingApiHistory,
baseUnits,
} = await this.contextManager.renderHistory(
pendingRequest,
@@ -662,9 +663,22 @@ export class GeminiClient {
);
currentBaseUnits = baseUnits;
apiHistoryOverride = apiHistory;
// Use the PROCESSED pending content if available (e.g. if cleaned or distilled)
const finalPendingContent =
pendingApiHistory.length > 0
? pendingApiHistory[0]
: rawPendingRequest;
// Late-bind the prompt: Append the active request to the managed history
// only for the purpose of the upcoming API call.
apiHistoryOverride = [...apiHistory, finalPendingContent];
this.getChat().setHistory(newHistory, { silent: true });
// Update the request for turn.run so that the final history record
// matches the processed/cleaned content sent to the API.
request = finalPendingContent.parts || [];
} else {
const newHistory = await this.agentHistoryProvider.manageHistory(
this.getHistory(),