Refactor file context for better testability; start fixing tests

This commit is contained in:
Keir Mierle
2025-06-12 15:12:01 -07:00
parent 99751c847c
commit 820275b63f
5 changed files with 481 additions and 37 deletions

View File

@@ -122,7 +122,19 @@ vi.mock('@gemini-cli/core', async (importOriginal) => {
getShowMemoryUsage: vi.fn(() => opts.showMemoryUsage ?? false),
getAccessibility: vi.fn(() => opts.accessibility ?? {}),
getProjectRoot: vi.fn(() => opts.projectRoot),
getGeminiClient: vi.fn(() => ({})),
getGeminiClient: vi.fn(() => ({
getTokenLimit: vi.fn(() => 8192),
getTokenBreakdown: vi.fn().mockResolvedValue({
chatTokens: 0,
conventionsTokens: 0,
systemTokens: 0,
}),
})),
getFileContextService: vi.fn(() => ({
on: vi.fn(),
off: vi.fn(),
getTrackedFilesWithTokenCounts: vi.fn().mockResolvedValue([]),
})),
getCheckpointEnabled: vi.fn(() => opts.checkpoint ?? true),
};
});

View File

@@ -0,0 +1,143 @@
/**
* @license
* Copyright 2025 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
import { Content, Models, GenerateContentConfig } from '@google/genai';
import { GeminiChat } from './geminiChat.js';
import { Config } from '../config/config.js';
// Mocks
const mockModelsModule = {
generateContent: vi.fn(),
generateContentStream: vi.fn(),
countTokens: vi.fn(),
embedContent: vi.fn(),
batchEmbedContents: vi.fn(),
} as unknown as Models;
const mockConfig = {
getSessionId: () => 'test-session-id',
} as unknown as Config;
import { createFileContextPart } from '../utils/fileContextUtils.js';
describe('GeminiChat - Tracked File History Sanitization', () => {
let chat: GeminiChat;
const model = 'gemini-pro';
const config: GenerateContentConfig = {};
beforeEach(() => {
vi.clearAllMocks();
chat = new GeminiChat(mockConfig, mockModelsModule, model, config, []);
});
afterEach(() => {
vi.restoreAllMocks();
});
it('should replace file content with a summary in the history', () => {
const userInput: Content = {
role: 'user',
parts: [
{ text: 'User input' },
createFileContextPart('/test/file.txt', 'This is file content.'),
],
};
const modelOutput: Content[] = [
{ role: 'model', parts: [{ text: 'Model output' }] },
];
// @ts-expect-error - private method
chat.recordHistory(userInput, modelOutput);
const history = chat.getHistory();
expect(history).toHaveLength(2);
const userHistory = history[0];
expect(userHistory.parts).toBeDefined();
expect(userHistory.parts).toHaveLength(2);
expect(userHistory.parts![0].text).toBe('User input');
expect(userHistory.parts![1].text).toBe(
'[CONTEXT] File: /test/file.txt'
);
});
it('should update the history summary as files are added and removed over time', () => {
const modelOutput: Content[] = [
{ role: 'model', parts: [{ text: 'OK' }] },
];
// 1. Track file1
const userInput1: Content = {
role: 'user',
parts: [
{ text: 'Message 1' },
createFileContextPart('/test/file1.txt', 'Content 1'),
],
};
// @ts-expect-error - private method
chat.recordHistory(userInput1, modelOutput);
let history = chat.getHistory();
expect(history[0].parts).toBeDefined();
expect(history[0].parts).toHaveLength(2);
expect(history[0].parts![0].text).toBe('Message 1');
expect(history[0].parts![1].text).toBe('[CONTEXT] File: /test/file1.txt');
// 2. Track file2 (file1 is still tracked)
const userInput2: Content = {
role: 'user',
parts: [
{ text: 'Message 2' },
createFileContextPart('/test/file1.txt', 'Content 1'),
createFileContextPart('/test/file2.txt', 'Content 2'),
],
};
// @ts-expect-error - private method
chat.recordHistory(userInput2, modelOutput);
history = chat.getHistory();
expect(history[2].parts).toBeDefined();
expect(history[2].parts).toHaveLength(3);
expect(history[2].parts![0].text).toBe('Message 2');
expect(history[2].parts![1].text).toBe('[CONTEXT] File: /test/file1.txt');
expect(history[2].parts![2].text).toBe('[CONTEXT] File: /test/file2.txt');
// 3. Untrack file1
const userInput3: Content = {
role: 'user',
parts: [
{ text: 'Message 3' },
createFileContextPart('/test/file2.txt', 'Content 2'),
],
};
// @ts-expect-error - private method
chat.recordHistory(userInput3, modelOutput);
history = chat.getHistory();
expect(history[4].parts).toBeDefined();
expect(history[4].parts).toHaveLength(2);
expect(history[4].parts![0].text).toBe('Message 3');
expect(history[4].parts![1].text).toBe(
'[CONTEXT] File: /test/file2.txt'
);
});
it('should not alter history if no tracked files are present', () => {
const userInput: Content = {
role: 'user',
parts: [{ text: 'A normal message' }],
};
const modelOutput: Content[] = [
{ role: 'model', parts: [{ text: 'A normal response' }] },
];
// @ts-expect-error - private method
chat.recordHistory(userInput, modelOutput);
const history = chat.getHistory();
expect(history).toHaveLength(2);
expect(history[0]).toEqual(userInput);
expect(history[1]).toEqual(modelOutput[0]);
});
});

View File

@@ -17,6 +17,7 @@ import {
} from '@google/genai';
import { retryWithBackoff } from '../utils/retry.js';
import { isFunctionResponse } from '../utils/messageInspectors.js';
import { sanitizeUserContent } from '../utils/fileContextUtils.js';
import { ContentGenerator } from './contentGenerator.js';
import { Config } from '../config/config.js';
@@ -355,7 +356,7 @@ export class GeminiChat {
...extractCuratedHistory(automaticFunctionCallingHistory!),
);
} else {
this.history.push(this.getSanitizedUserInput(userInput));
this.history.push(sanitizeUserContent(userInput));
}
// Consolidate adjacent model roles in outputContents
@@ -405,41 +406,6 @@ export class GeminiChat {
}
}
private getSanitizedUserInput(userInput: Content): Content {
if (!userInput.parts) {
return userInput;
}
const markerPrefix = 'GEMINI_TRACKED_FILE_V1:';
const trackedFilePaths: string[] = [];
const sanitizedParts: Part[] = [];
for (const part of userInput.parts) {
if (part.text?.startsWith(markerPrefix)) {
try {
const data = JSON.parse(part.text.substring(markerPrefix.length));
if (data.relativePath) {
trackedFilePaths.push(data.relativePath);
}
} catch (e) {
// It's not a valid marker, so keep the part as is.
sanitizedParts.push(part);
}
} else {
sanitizedParts.push(part);
}
}
if (trackedFilePaths.length > 0) {
const fileListText = `The following files were provided as context:\n- ${trackedFilePaths.join('\n- ')}`;
sanitizedParts.unshift({ text: fileListText });
}
return {
role: userInput.role,
parts: sanitizedParts,
};
}
private isTextContent(
content: Content | undefined,
): content is Content & { parts: [{ text: string }, ...Part[]] } {

View File

@@ -0,0 +1,198 @@
/**
* @license
* Copyright 2025 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { describe, it, expect } from 'vitest';
import { Content, Part } from '@google/genai';
import {
createFileContextPart,
sanitizeUserContent,
isFileContextPart,
summarizeFileContext,
summarizePartsFileContext,
} from './fileContextUtils.js';
// The test needs access to this, so we'll define it here.
// In the implementation it is not exported.
const GEMINI_TRACKED_FILE_MARKER = 'GEMINI_TRACKED_FILE_V1:';
describe('File Context Utils', () => {
describe('createFileContextPart', () => {
it('should create a correctly formatted file context part', () => {
const part = createFileContextPart(
'test/file.txt',
'This is the content.',
);
const expectedPayload = JSON.stringify({
relativePath: 'test/file.txt',
absolutePath: undefined,
});
expect(part.text).toBe(
`${GEMINI_TRACKED_FILE_MARKER}${expectedPayload}\nThis is the content.`,
);
});
it('should create a correctly formatted file context part with absolute path', () => {
const part = createFileContextPart(
'test/file.txt',
'This is the content.',
'/abs/test/file.txt',
);
const expectedPayload = JSON.stringify({
relativePath: 'test/file.txt',
absolutePath: '/abs/test/file.txt',
});
expect(part.text).toBe(
`${GEMINI_TRACKED_FILE_MARKER}${expectedPayload}\nThis is the content.`,
);
});
});
describe('isFileContextPart', () => {
it('should return true and the path for a valid file context part', () => {
const part = createFileContextPart('a/b.txt', 'content', '/abs/a/b.txt');
const result = isFileContextPart(part);
expect(result.isFile).toBe(true);
expect(result.relativePath).toBe('a/b.txt');
expect(result.absolutePath).toBe('/abs/a/b.txt');
});
it('should return true and the relative path for a valid file context part without absolute path', () => {
const part = createFileContextPart('a/b.txt', 'content');
const result = isFileContextPart(part);
expect(result.isFile).toBe(true);
expect(result.relativePath).toBe('a/b.txt');
expect(result.absolutePath).toBeUndefined();
});
it('should return false for a regular text part', () => {
const part: Part = { text: 'hello world' };
const result = isFileContextPart(part);
expect(result.isFile).toBe(false);
expect(result.relativePath).toBeUndefined();
});
it('should return false for a part with a malformed marker', () => {
const part: Part = { text: `${GEMINI_TRACKED_FILE_MARKER}{not-json}` };
const result = isFileContextPart(part);
expect(result.isFile).toBe(false);
expect(result.relativePath).toBeUndefined();
});
it('should return false for a part with a marker but no relativePath', () => {
const part: Part = {
text: `${GEMINI_TRACKED_FILE_MARKER}${'{"foo":"bar"}'}`,
};
const result = isFileContextPart(part);
expect(result.isFile).toBe(false);
expect(result.relativePath).toBeUndefined();
});
});
describe('summarizeFileContext', () => {
it('should summarize a file context part', () => {
const part = createFileContextPart('a/b.txt', 'content');
const summaryPart = summarizeFileContext(part);
expect(summaryPart.text).toBe('[CONTEXT] File: a/b.txt');
});
it('should summarize a file context part with absolute path', () => {
const part = createFileContextPart('a/b.txt', 'content', '/abs/a/b.txt');
const summaryPart = summarizeFileContext(part);
expect(summaryPart.text).toBe('[CONTEXT] File: a/b.txt (/abs/a/b.txt)');
});
it('should return the original part if it is not a file context part', () => {
const part: Part = { text: 'hello world' };
const summaryPart = summarizeFileContext(part);
expect(summaryPart).toBe(part);
});
});
describe('summarizePartsFileContext', () => {
it('should summarize multiple file context parts in an array', () => {
const parts: Part[] = [
{ text: 'Here are files:' },
createFileContextPart('a/b.txt', 'content1'),
createFileContextPart('c/d.txt', 'content2', '/abs/c/d.txt'),
];
const summarizedParts = summarizePartsFileContext(parts);
expect(summarizedParts).toEqual([
{ text: 'Here are files:' },
{ text: '[CONTEXT] File: a/b.txt' },
{ text: '[CONTEXT] File: c/d.txt (/abs/c/d.txt)' },
]);
});
it('should return a new array with the same parts if no file context parts are present', () => {
const parts: Part[] = [{ text: 'Hello' }, { text: 'World' }];
const summarizedParts = summarizePartsFileContext(parts);
// It should return a new array with the same parts.
expect(summarizedParts).toEqual(parts);
// But not the same array instance because of map.
expect(summarizedParts).not.toBe(parts);
});
});
describe('sanitizeUserContent', () => {
it('should replace file content with a summary', () => {
const userInput: Content = {
role: 'user',
parts: [
{ text: 'User input' },
createFileContextPart(
'/test/file.txt',
'This is file content.',
'/abs/test/file.txt',
),
],
};
const sanitized = sanitizeUserContent(userInput);
expect(sanitized.parts).toEqual([
{ text: 'User input' },
{ text: '[CONTEXT] File: /test/file.txt (/abs/test/file.txt)' },
]);
});
it('should not alter content if no tracked files are present', () => {
const userInput: Content = {
role: 'user',
parts: [{ text: 'A normal message' }],
};
const sanitized = sanitizeUserContent(userInput);
expect(sanitized).toBe(userInput); // Should return original object
});
it('should handle multiple file contexts', () => {
const userInput: Content = {
role: 'user',
parts: [
{ text: 'Here are two files' },
createFileContextPart('file1.txt', 'content1'),
createFileContextPart('file2.txt', 'content2', '/abs/file2.txt'),
],
};
const sanitized = sanitizeUserContent(userInput);
expect(sanitized.parts).toEqual([
{ text: 'Here are two files' },
{ text: '[CONTEXT] File: file1.txt' },
{ text: '[CONTEXT] File: file2.txt (/abs/file2.txt)' },
]);
});
it('should handle content with no parts', () => {
const userInput: Content = { role: 'user', parts: [] };
const sanitized = sanitizeUserContent(userInput);
expect(sanitized).toBe(userInput);
});
it('should handle content where parts is undefined', () => {
const userInput: Content = { role: 'user', parts: undefined as any };
const sanitized = sanitizeUserContent(userInput);
expect(sanitized).toBe(userInput);
});
});
});

View File

@@ -0,0 +1,125 @@
/**
* @license
* Copyright 2025 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { Content, Part } from '@google/genai';
// This is not exported, as per instructions.
const GEMINI_TRACKED_FILE_MARKER = 'GEMINI_TRACKED_FILE_V1:';
interface FileContextInfo {
isFile: boolean;
relativePath?: string;
absolutePath?: string;
}
/**
* Creates a `Part` object containing the specially formatted marker
* for a tracked file, to be sent to the model.
*
* @param relativePath The relative path of the file.
* @param content The full content of the file.
* @param absolutePath The absolute path of the file.
* @returns A `Part` object with the marked-up text.
*/
export function createFileContextPart(
relativePath: string,
content: string,
absolutePath?: string,
): Part {
const markerPayload = JSON.stringify({ relativePath, absolutePath });
return { text: `${GEMINI_TRACKED_FILE_MARKER}${markerPayload}\n${content}` };
}
/**
* Detects if a Part is a file context part.
* @param part The Part to check.
* @returns Information about the file context if it is one.
*/
export function isFileContextPart(part: Part): FileContextInfo {
if (part.text?.startsWith(GEMINI_TRACKED_FILE_MARKER)) {
try {
const endOfMarker = part.text.indexOf('\n');
const markerJson =
endOfMarker === -1
? part.text.substring(GEMINI_TRACKED_FILE_MARKER.length)
: part.text.substring(
GEMINI_TRACKED_FILE_MARKER.length,
endOfMarker,
);
const data = JSON.parse(markerJson);
if (data.relativePath) {
return {
isFile: true,
relativePath: data.relativePath,
absolutePath: data.absolutePath,
};
}
} catch (e) {
// Malformed, treat as not a file context part.
}
}
return { isFile: false };
}
/**
* If a part is a file context, returns a new one with the summarized version.
* @param part The part to summarize.
* @returns A summarized part if it was a file context part, otherwise the original part.
*/
export function summarizeFileContext(part: Part): Part {
const fileInfo = isFileContextPart(part);
if (fileInfo.isFile && fileInfo.relativePath) {
let text = `[CONTEXT] File: ${fileInfo.relativePath}`;
if (fileInfo.absolutePath) {
text += ` (${fileInfo.absolutePath})`;
}
return {
text,
};
}
return part;
}
/**
* Replaces any parts that are full file contexts with just a summary.
* @param parts The parts to summarize.
* @returns A new array of parts with file contexts summarized.
*/
export function summarizePartsFileContext(parts: Part[]): Part[] {
return parts.map(summarizeFileContext);
}
/**
* Sanitizes a user-provided Content object by replacing any parts containing
* tracked file content with a simple summary part.
*
* @param userInput The original user Content object.
* @returns A new Content object with file content replaced by a summary.
* Returns the original object if no sanitization is needed.
*/
export function sanitizeUserContent(userInput: Content): Content {
if (!userInput.parts) {
return userInput;
}
const partsWithFileContextsSummarized = summarizePartsFileContext(
userInput.parts,
);
// Avoid creating a new object if no parts were changed.
const hasChanged = userInput.parts.some(
(part, i) => part !== partsWithFileContextsSummarized[i],
);
if (!hasChanged) {
return userInput;
}
return {
role: userInput.role,
parts: partsWithFileContextsSummarized,
};
}