mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-05-24 05:04:28 +00:00
late append
This commit is contained in:
@@ -125,4 +125,27 @@ describe('ContextManager', () => {
|
||||
);
|
||||
expect(passedNodeIds.has(passedNodes[0].id)).toBe(true);
|
||||
});
|
||||
|
||||
it('renderHistory should exclude pendingRequest from the result (late binding)', async () => {
|
||||
const contextManager = new ContextManager(
|
||||
mockSidecar,
|
||||
mockEnv,
|
||||
mockTracer,
|
||||
mockOrchestrator,
|
||||
mockChatHistory,
|
||||
mockAdvancedTokenCalculator,
|
||||
);
|
||||
|
||||
const pendingRequest: HistoryTurn = {
|
||||
id: 'pending-turn-1',
|
||||
content: { role: 'user', parts: [{ text: 'Active prompt' }] },
|
||||
};
|
||||
|
||||
const { history, apiHistory } =
|
||||
await contextManager.renderHistory(pendingRequest);
|
||||
|
||||
// Should be empty because mockChatHistory has no historical turns
|
||||
expect(history).toHaveLength(0);
|
||||
expect(apiHistory).toHaveLength(0);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -44,6 +44,7 @@ export class ContextManager {
|
||||
result: {
|
||||
history: HistoryTurn[];
|
||||
apiHistory: Content[];
|
||||
pendingApiHistory: Content[];
|
||||
didApplyManagement: boolean;
|
||||
baseUnits: number;
|
||||
processedNodes: readonly ConcreteNode[];
|
||||
@@ -294,6 +295,7 @@ export class ContextManager {
|
||||
): Promise<{
|
||||
history: HistoryTurn[];
|
||||
apiHistory: Content[];
|
||||
pendingApiHistory: Content[];
|
||||
didApplyManagement: boolean;
|
||||
baseUnits: number;
|
||||
processedNodes: readonly ConcreteNode[];
|
||||
@@ -373,12 +375,16 @@ export class ContextManager {
|
||||
this.tracer,
|
||||
this.env,
|
||||
this.advancedTokenCalculator,
|
||||
protectionReasons,
|
||||
header,
|
||||
{
|
||||
protectionReasons,
|
||||
header,
|
||||
lateBindPrompt: !!pendingRequest,
|
||||
},
|
||||
);
|
||||
|
||||
const {
|
||||
history: renderedHistory,
|
||||
pendingHistory,
|
||||
didApplyManagement,
|
||||
baseUnits,
|
||||
processedNodes,
|
||||
@@ -400,11 +406,19 @@ export class ContextManager {
|
||||
|
||||
this.tracer.logEvent('ContextManager', 'Finished rendering');
|
||||
|
||||
const hardenedHistory = hardenHistory([...renderedHistory], {
|
||||
const allHistory = [...renderedHistory, ...pendingHistory];
|
||||
const hardenedAllHistory = hardenHistory(allHistory, {
|
||||
sentinels: this.sidecar.sentinels,
|
||||
});
|
||||
|
||||
const apiHistory = hardenedHistory.map((h) => h.content);
|
||||
const apiHistory = hardenedAllHistory
|
||||
.slice(0, renderedHistory.length)
|
||||
.map((h) => h.content);
|
||||
|
||||
const pendingApiHistory = hardenedAllHistory
|
||||
.slice(renderedHistory.length)
|
||||
.map((h) => h.content);
|
||||
|
||||
if (header) {
|
||||
apiHistory.unshift(header);
|
||||
}
|
||||
@@ -412,6 +426,7 @@ export class ContextManager {
|
||||
const result = {
|
||||
history: renderedHistory,
|
||||
apiHistory,
|
||||
pendingApiHistory,
|
||||
didApplyManagement,
|
||||
baseUnits,
|
||||
processedNodes,
|
||||
|
||||
@@ -69,8 +69,10 @@ describe('render', () => {
|
||||
tracer,
|
||||
env,
|
||||
mockAdvancedTokenCalculator as unknown as AdvancedTokenCalculator,
|
||||
new Map(),
|
||||
undefined,
|
||||
{
|
||||
protectionReasons: new Map(),
|
||||
header: undefined,
|
||||
},
|
||||
);
|
||||
|
||||
expect(result.history).toEqual([
|
||||
@@ -172,8 +174,10 @@ describe('render', () => {
|
||||
tracer,
|
||||
env,
|
||||
mockAdvancedTokenCalculator as unknown as AdvancedTokenCalculator,
|
||||
new Map(),
|
||||
undefined,
|
||||
{
|
||||
protectionReasons: new Map(),
|
||||
header: undefined,
|
||||
},
|
||||
);
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
@@ -269,8 +273,10 @@ describe('render', () => {
|
||||
tracer,
|
||||
env,
|
||||
mockAdvancedTokenCalculator as unknown as AdvancedTokenCalculator,
|
||||
new Map(),
|
||||
undefined,
|
||||
{
|
||||
protectionReasons: new Map(),
|
||||
header: undefined,
|
||||
},
|
||||
);
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
@@ -279,4 +285,66 @@ describe('render', () => {
|
||||
expect(surviving).toEqual(['B', 'C']); // A is dropped
|
||||
expect(result.baseUnits).toBe(160000);
|
||||
});
|
||||
|
||||
it('should exclude the last turn when lateBindPrompt is true', async () => {
|
||||
const mockNodes: ConcreteNode[] = [
|
||||
{
|
||||
id: '1',
|
||||
type: NodeType.USER_PROMPT,
|
||||
turnId: 'turn-1',
|
||||
payload: {} as Part,
|
||||
} as unknown as ConcreteNode,
|
||||
{
|
||||
id: '2',
|
||||
type: NodeType.AGENT_THOUGHT,
|
||||
turnId: 'turn-2',
|
||||
payload: {} as Part,
|
||||
} as unknown as ConcreteNode,
|
||||
];
|
||||
|
||||
const orchestrator = {
|
||||
executeTriggerSync: vi.fn(async (trigger, nodes) => nodes),
|
||||
} as unknown as PipelineOrchestrator;
|
||||
const sidecar = { config: {} } as ContextProfile; // No budget
|
||||
const mockAdvancedTokenCalculator = {
|
||||
calculateTokensAndBaseUnits: vi.fn().mockReturnValue({
|
||||
tokens: 100,
|
||||
baseUnits: 100,
|
||||
}),
|
||||
getRawBaseUnits: vi.fn().mockReturnValue(50),
|
||||
calculateConcreteListTokens: vi.fn().mockReturnValue(100),
|
||||
getRawBaseUnitsForContent: vi.fn().mockReturnValue(0),
|
||||
};
|
||||
|
||||
const env = {
|
||||
tokenCalculator: {
|
||||
calculateConcreteListTokens: vi.fn().mockReturnValue(100),
|
||||
calculateTokenBreakdown: vi.fn().mockReturnValue({}),
|
||||
},
|
||||
graphMapper: {
|
||||
fromGraph: vi.fn((nodes: readonly ConcreteNode[]) =>
|
||||
nodes.map((n) => ({ text: n.id })),
|
||||
),
|
||||
},
|
||||
} as unknown as ContextEnvironment;
|
||||
const tracer = {
|
||||
logEvent: vi.fn(),
|
||||
} as unknown as ContextTracer;
|
||||
|
||||
const result = await render(
|
||||
mockNodes,
|
||||
orchestrator,
|
||||
sidecar,
|
||||
tracer,
|
||||
env,
|
||||
mockAdvancedTokenCalculator as unknown as AdvancedTokenCalculator,
|
||||
{
|
||||
lateBindPrompt: true,
|
||||
},
|
||||
);
|
||||
|
||||
expect(result.history).toEqual([{ text: '1' }]); // Turn 2 (node 2) is excluded
|
||||
expect(result.pendingHistory).toEqual([{ text: '2' }]); // Turn 2 is included here
|
||||
expect(result.baseUnits).toBe(50);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -15,6 +15,17 @@ import { performCalibration } from '../utils/tokenCalibration.js';
|
||||
import type { AdvancedTokenCalculator } from '../utils/contextTokenCalculator.js';
|
||||
import type { HistoryTurn } from '../../core/agentChatHistory.js';
|
||||
|
||||
export interface RenderOptions {
|
||||
protectionReasons?: Map<string, string>;
|
||||
header?: Content;
|
||||
/**
|
||||
* If true, the most recent turn in the graph will not be considered for
|
||||
* consolidation (snapshots) or included in the returned history.
|
||||
* This is used for "late-binding" the prompt.
|
||||
*/
|
||||
lateBindPrompt?: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
* Maps the Episodic Context Graph back into a list of HistoryTurns for transmission.
|
||||
* It applies synchronous context management (GC backstop) if the budget is exceeded.
|
||||
@@ -26,14 +37,15 @@ export async function render(
|
||||
tracer: ContextTracer,
|
||||
env: ContextEnvironment,
|
||||
advancedTokenCalculator: AdvancedTokenCalculator,
|
||||
protectionReasons: Map<string, string> = new Map(),
|
||||
header?: Content,
|
||||
options: RenderOptions = {},
|
||||
): Promise<{
|
||||
history: HistoryTurn[];
|
||||
pendingHistory: HistoryTurn[];
|
||||
didApplyManagement: boolean;
|
||||
baseUnits: number;
|
||||
processedNodes: readonly ConcreteNode[];
|
||||
}> {
|
||||
const { protectionReasons = new Map(), header, lateBindPrompt } = options;
|
||||
let headerTokens = 0;
|
||||
let headerBaseUnits = 0;
|
||||
if (header) {
|
||||
@@ -43,18 +55,36 @@ export async function render(
|
||||
headerBaseUnits = costs.baseUnits;
|
||||
}
|
||||
|
||||
const lastTurnId = nodes[nodes.length - 1]?.turnId;
|
||||
|
||||
if (!sidecar.config.budget) {
|
||||
const contents = env.graphMapper.fromGraph(nodes);
|
||||
const allVisibleNodes = nodes;
|
||||
|
||||
const managedNodes =
|
||||
lateBindPrompt && lastTurnId
|
||||
? allVisibleNodes.filter((n) => n.turnId !== lastTurnId)
|
||||
: allVisibleNodes;
|
||||
|
||||
const pendingNodes =
|
||||
lateBindPrompt && lastTurnId
|
||||
? allVisibleNodes.filter((n) => n.turnId === lastTurnId)
|
||||
: [];
|
||||
|
||||
const history = env.graphMapper.fromGraph(managedNodes);
|
||||
const pendingHistory = env.graphMapper.fromGraph(pendingNodes);
|
||||
|
||||
tracer.logEvent('Render', 'Render Context to LLM (No Budget)', {
|
||||
renderedContext: contents,
|
||||
renderedContext: history,
|
||||
pendingContext: pendingHistory,
|
||||
});
|
||||
|
||||
// In all cases, retrieve raw base units from the token calculator interface
|
||||
const baseUnits =
|
||||
advancedTokenCalculator.getRawBaseUnits(nodes) + headerBaseUnits;
|
||||
advancedTokenCalculator.getRawBaseUnits(allVisibleNodes) +
|
||||
headerBaseUnits;
|
||||
|
||||
return {
|
||||
history: contents,
|
||||
history,
|
||||
pendingHistory,
|
||||
didApplyManagement: false,
|
||||
baseUnits,
|
||||
processedNodes: nodes,
|
||||
@@ -63,7 +93,7 @@ export async function render(
|
||||
|
||||
const maxTokens = sidecar.config.budget.maxTokens;
|
||||
|
||||
const { tokens: graphTokens, baseUnits: graphBaseUnits } =
|
||||
const { tokens: graphTokens } =
|
||||
advancedTokenCalculator.calculateTokensAndBaseUnits(nodes);
|
||||
|
||||
const currentTokens = graphTokens + headerTokens;
|
||||
@@ -93,19 +123,39 @@ export async function render(
|
||||
'Render',
|
||||
`View is within maxTokens (${currentTokens} <= ${maxTokens}). Returning view.`,
|
||||
);
|
||||
const contents = env.graphMapper.fromGraph(nodes);
|
||||
|
||||
const allVisibleNodes = nodes;
|
||||
|
||||
const managedNodes =
|
||||
lateBindPrompt && lastTurnId
|
||||
? allVisibleNodes.filter((n) => n.turnId !== lastTurnId)
|
||||
: allVisibleNodes;
|
||||
|
||||
const pendingNodes =
|
||||
lateBindPrompt && lastTurnId
|
||||
? allVisibleNodes.filter((n) => n.turnId === lastTurnId)
|
||||
: [];
|
||||
|
||||
const history = env.graphMapper.fromGraph(managedNodes);
|
||||
const pendingHistory = env.graphMapper.fromGraph(pendingNodes);
|
||||
|
||||
tracer.logEvent('Render', 'Render Context for LLM', {
|
||||
renderedContext: contents,
|
||||
renderedContext: history,
|
||||
pendingContext: pendingHistory,
|
||||
});
|
||||
performCalibration(
|
||||
env,
|
||||
nodes,
|
||||
contents.map((h) => h.content),
|
||||
);
|
||||
|
||||
performCalibration(env, allVisibleNodes, [
|
||||
...history.map((h) => h.content),
|
||||
...pendingHistory.map((h) => h.content),
|
||||
]);
|
||||
|
||||
return {
|
||||
history: contents,
|
||||
history,
|
||||
pendingHistory,
|
||||
didApplyManagement: false,
|
||||
baseUnits: graphBaseUnits + headerBaseUnits,
|
||||
baseUnits:
|
||||
advancedTokenCalculator.getRawBaseUnits(allVisibleNodes) +
|
||||
headerBaseUnits,
|
||||
processedNodes: nodes,
|
||||
};
|
||||
}
|
||||
@@ -119,22 +169,27 @@ export async function render(
|
||||
`Context Manager Synchronous Barrier triggered: View at ${currentTokens} tokens (limit: ${maxTokens}).`,
|
||||
);
|
||||
|
||||
// Calculate exactly which nodes aged out of the retainedTokens budget to form our target delta
|
||||
const agedOutNodes = new Set<string>();
|
||||
let rollingTokens = 0;
|
||||
// Start from newest and count backwards
|
||||
for (let i = nodes.length - 1; i >= 0; i--) {
|
||||
const node = nodes[i];
|
||||
const priorTokens = rollingTokens;
|
||||
const nodeTokens = env.tokenCalculator.calculateConcreteListTokens([node]);
|
||||
rollingTokens += nodeTokens;
|
||||
|
||||
// Loose Boundary Policy: Keep the node that crosses the boundary
|
||||
if (priorTokens > sidecar.config.budget.retainedTokens) {
|
||||
agedOutNodes.add(node.id);
|
||||
}
|
||||
}
|
||||
|
||||
if (lateBindPrompt && lastTurnId) {
|
||||
for (const node of nodes) {
|
||||
if (node.turnId === lastTurnId) {
|
||||
agedOutNodes.delete(node.id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const processedNodes = await orchestrator.executeTriggerSync(
|
||||
'gc_backstop',
|
||||
nodes,
|
||||
@@ -142,7 +197,6 @@ export async function render(
|
||||
protectedIds,
|
||||
);
|
||||
|
||||
// Apply skipList logic to abstract over summarized nodes
|
||||
const skipList = new Set<string>();
|
||||
for (const node of processedNodes) {
|
||||
if (node.abstractsIds) {
|
||||
@@ -150,27 +204,43 @@ export async function render(
|
||||
}
|
||||
}
|
||||
|
||||
const visibleNodes = processedNodes.filter((n) => !skipList.has(n.id));
|
||||
const allVisibleNodes = processedNodes.filter((n) => !skipList.has(n.id));
|
||||
|
||||
const managedNodes =
|
||||
lateBindPrompt && lastTurnId
|
||||
? allVisibleNodes.filter((n) => n.turnId !== lastTurnId)
|
||||
: allVisibleNodes;
|
||||
|
||||
const pendingNodes =
|
||||
lateBindPrompt && lastTurnId
|
||||
? allVisibleNodes.filter((n) => n.turnId === lastTurnId)
|
||||
: [];
|
||||
|
||||
const history = env.graphMapper.fromGraph(managedNodes);
|
||||
const pendingHistory = env.graphMapper.fromGraph(pendingNodes);
|
||||
|
||||
const contents = env.graphMapper.fromGraph(visibleNodes);
|
||||
const finalTokens =
|
||||
advancedTokenCalculator.calculateConcreteListTokens(visibleNodes);
|
||||
advancedTokenCalculator.calculateConcreteListTokens(allVisibleNodes);
|
||||
tracer.logEvent('Render', 'Render Sanitized Context for LLM', {
|
||||
renderedContextSanitized: contents,
|
||||
renderedContextSanitized: history,
|
||||
pendingContextSanitized: pendingHistory,
|
||||
});
|
||||
debugLogger.log(
|
||||
`Context Manager finished. Final actual token count: ${finalTokens}.`,
|
||||
);
|
||||
performCalibration(
|
||||
env,
|
||||
visibleNodes,
|
||||
contents.map((h) => h.content),
|
||||
);
|
||||
|
||||
performCalibration(env, allVisibleNodes, [
|
||||
...history.map((h) => h.content),
|
||||
...pendingHistory.map((h) => h.content),
|
||||
]);
|
||||
|
||||
return {
|
||||
history: contents,
|
||||
history,
|
||||
pendingHistory,
|
||||
didApplyManagement: true,
|
||||
baseUnits:
|
||||
advancedTokenCalculator.getRawBaseUnits(visibleNodes) + headerBaseUnits,
|
||||
advancedTokenCalculator.getRawBaseUnits(allVisibleNodes) +
|
||||
headerBaseUnits,
|
||||
processedNodes,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -654,6 +654,7 @@ export class GeminiClient {
|
||||
const {
|
||||
history: newHistory,
|
||||
apiHistory,
|
||||
pendingApiHistory,
|
||||
baseUnits,
|
||||
} = await this.contextManager.renderHistory(
|
||||
pendingRequest,
|
||||
@@ -662,9 +663,22 @@ export class GeminiClient {
|
||||
);
|
||||
|
||||
currentBaseUnits = baseUnits;
|
||||
apiHistoryOverride = apiHistory;
|
||||
|
||||
// Use the PROCESSED pending content if available (e.g. if cleaned or distilled)
|
||||
const finalPendingContent =
|
||||
pendingApiHistory.length > 0
|
||||
? pendingApiHistory[0]
|
||||
: rawPendingRequest;
|
||||
|
||||
// Late-bind the prompt: Append the active request to the managed history
|
||||
// only for the purpose of the upcoming API call.
|
||||
apiHistoryOverride = [...apiHistory, finalPendingContent];
|
||||
|
||||
this.getChat().setHistory(newHistory, { silent: true });
|
||||
|
||||
// Update the request for turn.run so that the final history record
|
||||
// matches the processed/cleaned content sent to the API.
|
||||
request = finalPendingContent.parts || [];
|
||||
} else {
|
||||
const newHistory = await this.agentHistoryProvider.manageHistory(
|
||||
this.getHistory(),
|
||||
|
||||
Reference in New Issue
Block a user