From 604b2dcb6d6e1053c6fa4bea9f4a3a5cc10ea8f5 Mon Sep 17 00:00:00 2001 From: Your Name Date: Wed, 20 May 2026 16:37:22 +0000 Subject: [PATCH] late append --- .../core/src/context/contextManager.test.ts | 23 +++ packages/core/src/context/contextManager.ts | 23 ++- .../core/src/context/graph/render.test.ts | 80 ++++++++++- packages/core/src/context/graph/render.ts | 134 +++++++++++++----- packages/core/src/core/client.ts | 16 ++- 5 files changed, 233 insertions(+), 43 deletions(-) diff --git a/packages/core/src/context/contextManager.test.ts b/packages/core/src/context/contextManager.test.ts index 55fb865dc6..5d00ad54b8 100644 --- a/packages/core/src/context/contextManager.test.ts +++ b/packages/core/src/context/contextManager.test.ts @@ -125,4 +125,27 @@ describe('ContextManager', () => { ); expect(passedNodeIds.has(passedNodes[0].id)).toBe(true); }); + + it('renderHistory should exclude pendingRequest from the result (late binding)', async () => { + const contextManager = new ContextManager( + mockSidecar, + mockEnv, + mockTracer, + mockOrchestrator, + mockChatHistory, + mockAdvancedTokenCalculator, + ); + + const pendingRequest: HistoryTurn = { + id: 'pending-turn-1', + content: { role: 'user', parts: [{ text: 'Active prompt' }] }, + }; + + const { history, apiHistory } = + await contextManager.renderHistory(pendingRequest); + + // Should be empty because mockChatHistory has no historical turns + expect(history).toHaveLength(0); + expect(apiHistory).toHaveLength(0); + }); }); diff --git a/packages/core/src/context/contextManager.ts b/packages/core/src/context/contextManager.ts index 182c629a02..b26aff77a9 100644 --- a/packages/core/src/context/contextManager.ts +++ b/packages/core/src/context/contextManager.ts @@ -44,6 +44,7 @@ export class ContextManager { result: { history: HistoryTurn[]; apiHistory: Content[]; + pendingApiHistory: Content[]; didApplyManagement: boolean; baseUnits: number; processedNodes: readonly ConcreteNode[]; @@ -294,6 +295,7 @@ export class ContextManager { ): Promise<{ history: HistoryTurn[]; apiHistory: Content[]; + pendingApiHistory: Content[]; didApplyManagement: boolean; baseUnits: number; processedNodes: readonly ConcreteNode[]; @@ -373,12 +375,16 @@ export class ContextManager { this.tracer, this.env, this.advancedTokenCalculator, - protectionReasons, - header, + { + protectionReasons, + header, + lateBindPrompt: !!pendingRequest, + }, ); const { history: renderedHistory, + pendingHistory, didApplyManagement, baseUnits, processedNodes, @@ -400,11 +406,19 @@ export class ContextManager { this.tracer.logEvent('ContextManager', 'Finished rendering'); - const hardenedHistory = hardenHistory([...renderedHistory], { + const allHistory = [...renderedHistory, ...pendingHistory]; + const hardenedAllHistory = hardenHistory(allHistory, { sentinels: this.sidecar.sentinels, }); - const apiHistory = hardenedHistory.map((h) => h.content); + const apiHistory = hardenedAllHistory + .slice(0, renderedHistory.length) + .map((h) => h.content); + + const pendingApiHistory = hardenedAllHistory + .slice(renderedHistory.length) + .map((h) => h.content); + if (header) { apiHistory.unshift(header); } @@ -412,6 +426,7 @@ export class ContextManager { const result = { history: renderedHistory, apiHistory, + pendingApiHistory, didApplyManagement, baseUnits, processedNodes, diff --git a/packages/core/src/context/graph/render.test.ts b/packages/core/src/context/graph/render.test.ts index b6572cef6d..1dc99f09cc 100644 --- a/packages/core/src/context/graph/render.test.ts +++ b/packages/core/src/context/graph/render.test.ts @@ -69,8 +69,10 @@ describe('render', () => { tracer, env, mockAdvancedTokenCalculator as unknown as AdvancedTokenCalculator, - new Map(), - undefined, + { + protectionReasons: new Map(), + header: undefined, + }, ); expect(result.history).toEqual([ @@ -172,8 +174,10 @@ describe('render', () => { tracer, env, mockAdvancedTokenCalculator as unknown as AdvancedTokenCalculator, - new Map(), - undefined, + { + protectionReasons: new Map(), + header: undefined, + }, ); // eslint-disable-next-line @typescript-eslint/no-explicit-any @@ -269,8 +273,10 @@ describe('render', () => { tracer, env, mockAdvancedTokenCalculator as unknown as AdvancedTokenCalculator, - new Map(), - undefined, + { + protectionReasons: new Map(), + header: undefined, + }, ); // eslint-disable-next-line @typescript-eslint/no-explicit-any @@ -279,4 +285,66 @@ describe('render', () => { expect(surviving).toEqual(['B', 'C']); // A is dropped expect(result.baseUnits).toBe(160000); }); + + it('should exclude the last turn when lateBindPrompt is true', async () => { + const mockNodes: ConcreteNode[] = [ + { + id: '1', + type: NodeType.USER_PROMPT, + turnId: 'turn-1', + payload: {} as Part, + } as unknown as ConcreteNode, + { + id: '2', + type: NodeType.AGENT_THOUGHT, + turnId: 'turn-2', + payload: {} as Part, + } as unknown as ConcreteNode, + ]; + + const orchestrator = { + executeTriggerSync: vi.fn(async (trigger, nodes) => nodes), + } as unknown as PipelineOrchestrator; + const sidecar = { config: {} } as ContextProfile; // No budget + const mockAdvancedTokenCalculator = { + calculateTokensAndBaseUnits: vi.fn().mockReturnValue({ + tokens: 100, + baseUnits: 100, + }), + getRawBaseUnits: vi.fn().mockReturnValue(50), + calculateConcreteListTokens: vi.fn().mockReturnValue(100), + getRawBaseUnitsForContent: vi.fn().mockReturnValue(0), + }; + + const env = { + tokenCalculator: { + calculateConcreteListTokens: vi.fn().mockReturnValue(100), + calculateTokenBreakdown: vi.fn().mockReturnValue({}), + }, + graphMapper: { + fromGraph: vi.fn((nodes: readonly ConcreteNode[]) => + nodes.map((n) => ({ text: n.id })), + ), + }, + } as unknown as ContextEnvironment; + const tracer = { + logEvent: vi.fn(), + } as unknown as ContextTracer; + + const result = await render( + mockNodes, + orchestrator, + sidecar, + tracer, + env, + mockAdvancedTokenCalculator as unknown as AdvancedTokenCalculator, + { + lateBindPrompt: true, + }, + ); + + expect(result.history).toEqual([{ text: '1' }]); // Turn 2 (node 2) is excluded + expect(result.pendingHistory).toEqual([{ text: '2' }]); // Turn 2 is included here + expect(result.baseUnits).toBe(50); + }); }); diff --git a/packages/core/src/context/graph/render.ts b/packages/core/src/context/graph/render.ts index 0cb803171b..b4e2dc0bdb 100644 --- a/packages/core/src/context/graph/render.ts +++ b/packages/core/src/context/graph/render.ts @@ -15,6 +15,17 @@ import { performCalibration } from '../utils/tokenCalibration.js'; import type { AdvancedTokenCalculator } from '../utils/contextTokenCalculator.js'; import type { HistoryTurn } from '../../core/agentChatHistory.js'; +export interface RenderOptions { + protectionReasons?: Map; + header?: Content; + /** + * If true, the most recent turn in the graph will not be considered for + * consolidation (snapshots) or included in the returned history. + * This is used for "late-binding" the prompt. + */ + lateBindPrompt?: boolean; +} + /** * Maps the Episodic Context Graph back into a list of HistoryTurns for transmission. * It applies synchronous context management (GC backstop) if the budget is exceeded. @@ -26,14 +37,15 @@ export async function render( tracer: ContextTracer, env: ContextEnvironment, advancedTokenCalculator: AdvancedTokenCalculator, - protectionReasons: Map = new Map(), - header?: Content, + options: RenderOptions = {}, ): Promise<{ history: HistoryTurn[]; + pendingHistory: HistoryTurn[]; didApplyManagement: boolean; baseUnits: number; processedNodes: readonly ConcreteNode[]; }> { + const { protectionReasons = new Map(), header, lateBindPrompt } = options; let headerTokens = 0; let headerBaseUnits = 0; if (header) { @@ -43,18 +55,36 @@ export async function render( headerBaseUnits = costs.baseUnits; } + const lastTurnId = nodes[nodes.length - 1]?.turnId; + if (!sidecar.config.budget) { - const contents = env.graphMapper.fromGraph(nodes); + const allVisibleNodes = nodes; + + const managedNodes = + lateBindPrompt && lastTurnId + ? allVisibleNodes.filter((n) => n.turnId !== lastTurnId) + : allVisibleNodes; + + const pendingNodes = + lateBindPrompt && lastTurnId + ? allVisibleNodes.filter((n) => n.turnId === lastTurnId) + : []; + + const history = env.graphMapper.fromGraph(managedNodes); + const pendingHistory = env.graphMapper.fromGraph(pendingNodes); + tracer.logEvent('Render', 'Render Context to LLM (No Budget)', { - renderedContext: contents, + renderedContext: history, + pendingContext: pendingHistory, }); - // In all cases, retrieve raw base units from the token calculator interface const baseUnits = - advancedTokenCalculator.getRawBaseUnits(nodes) + headerBaseUnits; + advancedTokenCalculator.getRawBaseUnits(allVisibleNodes) + + headerBaseUnits; return { - history: contents, + history, + pendingHistory, didApplyManagement: false, baseUnits, processedNodes: nodes, @@ -63,7 +93,7 @@ export async function render( const maxTokens = sidecar.config.budget.maxTokens; - const { tokens: graphTokens, baseUnits: graphBaseUnits } = + const { tokens: graphTokens } = advancedTokenCalculator.calculateTokensAndBaseUnits(nodes); const currentTokens = graphTokens + headerTokens; @@ -93,19 +123,39 @@ export async function render( 'Render', `View is within maxTokens (${currentTokens} <= ${maxTokens}). Returning view.`, ); - const contents = env.graphMapper.fromGraph(nodes); + + const allVisibleNodes = nodes; + + const managedNodes = + lateBindPrompt && lastTurnId + ? allVisibleNodes.filter((n) => n.turnId !== lastTurnId) + : allVisibleNodes; + + const pendingNodes = + lateBindPrompt && lastTurnId + ? allVisibleNodes.filter((n) => n.turnId === lastTurnId) + : []; + + const history = env.graphMapper.fromGraph(managedNodes); + const pendingHistory = env.graphMapper.fromGraph(pendingNodes); + tracer.logEvent('Render', 'Render Context for LLM', { - renderedContext: contents, + renderedContext: history, + pendingContext: pendingHistory, }); - performCalibration( - env, - nodes, - contents.map((h) => h.content), - ); + + performCalibration(env, allVisibleNodes, [ + ...history.map((h) => h.content), + ...pendingHistory.map((h) => h.content), + ]); + return { - history: contents, + history, + pendingHistory, didApplyManagement: false, - baseUnits: graphBaseUnits + headerBaseUnits, + baseUnits: + advancedTokenCalculator.getRawBaseUnits(allVisibleNodes) + + headerBaseUnits, processedNodes: nodes, }; } @@ -119,22 +169,27 @@ export async function render( `Context Manager Synchronous Barrier triggered: View at ${currentTokens} tokens (limit: ${maxTokens}).`, ); - // Calculate exactly which nodes aged out of the retainedTokens budget to form our target delta const agedOutNodes = new Set(); let rollingTokens = 0; - // Start from newest and count backwards for (let i = nodes.length - 1; i >= 0; i--) { const node = nodes[i]; const priorTokens = rollingTokens; const nodeTokens = env.tokenCalculator.calculateConcreteListTokens([node]); rollingTokens += nodeTokens; - // Loose Boundary Policy: Keep the node that crosses the boundary if (priorTokens > sidecar.config.budget.retainedTokens) { agedOutNodes.add(node.id); } } + if (lateBindPrompt && lastTurnId) { + for (const node of nodes) { + if (node.turnId === lastTurnId) { + agedOutNodes.delete(node.id); + } + } + } + const processedNodes = await orchestrator.executeTriggerSync( 'gc_backstop', nodes, @@ -142,7 +197,6 @@ export async function render( protectedIds, ); - // Apply skipList logic to abstract over summarized nodes const skipList = new Set(); for (const node of processedNodes) { if (node.abstractsIds) { @@ -150,27 +204,43 @@ export async function render( } } - const visibleNodes = processedNodes.filter((n) => !skipList.has(n.id)); + const allVisibleNodes = processedNodes.filter((n) => !skipList.has(n.id)); + + const managedNodes = + lateBindPrompt && lastTurnId + ? allVisibleNodes.filter((n) => n.turnId !== lastTurnId) + : allVisibleNodes; + + const pendingNodes = + lateBindPrompt && lastTurnId + ? allVisibleNodes.filter((n) => n.turnId === lastTurnId) + : []; + + const history = env.graphMapper.fromGraph(managedNodes); + const pendingHistory = env.graphMapper.fromGraph(pendingNodes); - const contents = env.graphMapper.fromGraph(visibleNodes); const finalTokens = - advancedTokenCalculator.calculateConcreteListTokens(visibleNodes); + advancedTokenCalculator.calculateConcreteListTokens(allVisibleNodes); tracer.logEvent('Render', 'Render Sanitized Context for LLM', { - renderedContextSanitized: contents, + renderedContextSanitized: history, + pendingContextSanitized: pendingHistory, }); debugLogger.log( `Context Manager finished. Final actual token count: ${finalTokens}.`, ); - performCalibration( - env, - visibleNodes, - contents.map((h) => h.content), - ); + + performCalibration(env, allVisibleNodes, [ + ...history.map((h) => h.content), + ...pendingHistory.map((h) => h.content), + ]); + return { - history: contents, + history, + pendingHistory, didApplyManagement: true, baseUnits: - advancedTokenCalculator.getRawBaseUnits(visibleNodes) + headerBaseUnits, + advancedTokenCalculator.getRawBaseUnits(allVisibleNodes) + + headerBaseUnits, processedNodes, }; } diff --git a/packages/core/src/core/client.ts b/packages/core/src/core/client.ts index 3994a21a32..a495cdf25f 100644 --- a/packages/core/src/core/client.ts +++ b/packages/core/src/core/client.ts @@ -654,6 +654,7 @@ export class GeminiClient { const { history: newHistory, apiHistory, + pendingApiHistory, baseUnits, } = await this.contextManager.renderHistory( pendingRequest, @@ -662,9 +663,22 @@ export class GeminiClient { ); currentBaseUnits = baseUnits; - apiHistoryOverride = apiHistory; + + // Use the PROCESSED pending content if available (e.g. if cleaned or distilled) + const finalPendingContent = + pendingApiHistory.length > 0 + ? pendingApiHistory[0] + : rawPendingRequest; + + // Late-bind the prompt: Append the active request to the managed history + // only for the purpose of the upcoming API call. + apiHistoryOverride = [...apiHistory, finalPendingContent]; this.getChat().setHistory(newHistory, { silent: true }); + + // Update the request for turn.run so that the final history record + // matches the processed/cleaned content sent to the API. + request = finalPendingContent.parts || []; } else { const newHistory = await this.agentHistoryProvider.manageHistory( this.getHistory(),