diff --git a/packages/cli/src/ui/App.test.tsx b/packages/cli/src/ui/App.test.tsx index bfd2efaf80..8562a2cc09 100644 --- a/packages/cli/src/ui/App.test.tsx +++ b/packages/cli/src/ui/App.test.tsx @@ -122,7 +122,19 @@ vi.mock('@gemini-cli/core', async (importOriginal) => { getShowMemoryUsage: vi.fn(() => opts.showMemoryUsage ?? false), getAccessibility: vi.fn(() => opts.accessibility ?? {}), getProjectRoot: vi.fn(() => opts.projectRoot), - getGeminiClient: vi.fn(() => ({})), + getGeminiClient: vi.fn(() => ({ + getTokenLimit: vi.fn(() => 8192), + getTokenBreakdown: vi.fn().mockResolvedValue({ + chatTokens: 0, + conventionsTokens: 0, + systemTokens: 0, + }), + })), + getFileContextService: vi.fn(() => ({ + on: vi.fn(), + off: vi.fn(), + getTrackedFilesWithTokenCounts: vi.fn().mockResolvedValue([]), + })), getCheckpointEnabled: vi.fn(() => opts.checkpoint ?? true), }; }); diff --git a/packages/core/src/core/geminiChat.tracked_files.test.ts b/packages/core/src/core/geminiChat.tracked_files.test.ts new file mode 100644 index 0000000000..d2adb62a3c --- /dev/null +++ b/packages/core/src/core/geminiChat.tracked_files.test.ts @@ -0,0 +1,143 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'; +import { Content, Models, GenerateContentConfig } from '@google/genai'; +import { GeminiChat } from './geminiChat.js'; +import { Config } from '../config/config.js'; + +// Mocks +const mockModelsModule = { + generateContent: vi.fn(), + generateContentStream: vi.fn(), + countTokens: vi.fn(), + embedContent: vi.fn(), + batchEmbedContents: vi.fn(), +} as unknown as Models; + +const mockConfig = { + getSessionId: () => 'test-session-id', +} as unknown as Config; + +import { createFileContextPart } from '../utils/fileContextUtils.js'; + +describe('GeminiChat - Tracked File History Sanitization', () => { + let chat: GeminiChat; + const model = 'gemini-pro'; + const config: GenerateContentConfig = {}; + + beforeEach(() => { + vi.clearAllMocks(); + chat = new GeminiChat(mockConfig, mockModelsModule, model, config, []); + }); + + afterEach(() => { + vi.restoreAllMocks(); + }); + + it('should replace file content with a summary in the history', () => { + const userInput: Content = { + role: 'user', + parts: [ + { text: 'User input' }, + createFileContextPart('/test/file.txt', 'This is file content.'), + ], + }; + + const modelOutput: Content[] = [ + { role: 'model', parts: [{ text: 'Model output' }] }, + ]; + + // @ts-expect-error - private method + chat.recordHistory(userInput, modelOutput); + + const history = chat.getHistory(); + expect(history).toHaveLength(2); + const userHistory = history[0]; + expect(userHistory.parts).toBeDefined(); + expect(userHistory.parts).toHaveLength(2); + expect(userHistory.parts![0].text).toBe('User input'); + expect(userHistory.parts![1].text).toBe( + '[CONTEXT] File: /test/file.txt' + ); + }); + + it('should update the history summary as files are added and removed over time', () => { + const modelOutput: Content[] = [ + { role: 'model', parts: [{ text: 'OK' }] }, + ]; + + // 1. Track file1 + const userInput1: Content = { + role: 'user', + parts: [ + { text: 'Message 1' }, + createFileContextPart('/test/file1.txt', 'Content 1'), + ], + }; + // @ts-expect-error - private method + chat.recordHistory(userInput1, modelOutput); + let history = chat.getHistory(); + expect(history[0].parts).toBeDefined(); + expect(history[0].parts).toHaveLength(2); + expect(history[0].parts![0].text).toBe('Message 1'); + expect(history[0].parts![1].text).toBe('[CONTEXT] File: /test/file1.txt'); + + // 2. Track file2 (file1 is still tracked) + const userInput2: Content = { + role: 'user', + parts: [ + { text: 'Message 2' }, + createFileContextPart('/test/file1.txt', 'Content 1'), + createFileContextPart('/test/file2.txt', 'Content 2'), + ], + }; + // @ts-expect-error - private method + chat.recordHistory(userInput2, modelOutput); + history = chat.getHistory(); + expect(history[2].parts).toBeDefined(); + expect(history[2].parts).toHaveLength(3); + expect(history[2].parts![0].text).toBe('Message 2'); + expect(history[2].parts![1].text).toBe('[CONTEXT] File: /test/file1.txt'); + expect(history[2].parts![2].text).toBe('[CONTEXT] File: /test/file2.txt'); + + // 3. Untrack file1 + const userInput3: Content = { + role: 'user', + parts: [ + { text: 'Message 3' }, + createFileContextPart('/test/file2.txt', 'Content 2'), + ], + }; + // @ts-expect-error - private method + chat.recordHistory(userInput3, modelOutput); + history = chat.getHistory(); + expect(history[4].parts).toBeDefined(); + expect(history[4].parts).toHaveLength(2); + expect(history[4].parts![0].text).toBe('Message 3'); + expect(history[4].parts![1].text).toBe( + '[CONTEXT] File: /test/file2.txt' + ); + }); + + it('should not alter history if no tracked files are present', () => { + const userInput: Content = { + role: 'user', + parts: [{ text: 'A normal message' }], + }; + const modelOutput: Content[] = [ + { role: 'model', parts: [{ text: 'A normal response' }] }, + ]; + + // @ts-expect-error - private method + chat.recordHistory(userInput, modelOutput); + + const history = chat.getHistory(); + expect(history).toHaveLength(2); + expect(history[0]).toEqual(userInput); + expect(history[1]).toEqual(modelOutput[0]); + }); +}); \ No newline at end of file diff --git a/packages/core/src/core/geminiChat.ts b/packages/core/src/core/geminiChat.ts index dac9277f47..d35793e47d 100644 --- a/packages/core/src/core/geminiChat.ts +++ b/packages/core/src/core/geminiChat.ts @@ -17,6 +17,7 @@ import { } from '@google/genai'; import { retryWithBackoff } from '../utils/retry.js'; import { isFunctionResponse } from '../utils/messageInspectors.js'; +import { sanitizeUserContent } from '../utils/fileContextUtils.js'; import { ContentGenerator } from './contentGenerator.js'; import { Config } from '../config/config.js'; @@ -355,7 +356,7 @@ export class GeminiChat { ...extractCuratedHistory(automaticFunctionCallingHistory!), ); } else { - this.history.push(this.getSanitizedUserInput(userInput)); + this.history.push(sanitizeUserContent(userInput)); } // Consolidate adjacent model roles in outputContents @@ -405,41 +406,6 @@ export class GeminiChat { } } - private getSanitizedUserInput(userInput: Content): Content { - if (!userInput.parts) { - return userInput; - } - const markerPrefix = 'GEMINI_TRACKED_FILE_V1:'; - const trackedFilePaths: string[] = []; - const sanitizedParts: Part[] = []; - - for (const part of userInput.parts) { - if (part.text?.startsWith(markerPrefix)) { - try { - const data = JSON.parse(part.text.substring(markerPrefix.length)); - if (data.relativePath) { - trackedFilePaths.push(data.relativePath); - } - } catch (e) { - // It's not a valid marker, so keep the part as is. - sanitizedParts.push(part); - } - } else { - sanitizedParts.push(part); - } - } - - if (trackedFilePaths.length > 0) { - const fileListText = `The following files were provided as context:\n- ${trackedFilePaths.join('\n- ')}`; - sanitizedParts.unshift({ text: fileListText }); - } - - return { - role: userInput.role, - parts: sanitizedParts, - }; - } - private isTextContent( content: Content | undefined, ): content is Content & { parts: [{ text: string }, ...Part[]] } { diff --git a/packages/core/src/utils/fileContextUtils.test.ts b/packages/core/src/utils/fileContextUtils.test.ts new file mode 100644 index 0000000000..ce45849fec --- /dev/null +++ b/packages/core/src/utils/fileContextUtils.test.ts @@ -0,0 +1,198 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, expect } from 'vitest'; +import { Content, Part } from '@google/genai'; +import { + createFileContextPart, + sanitizeUserContent, + isFileContextPart, + summarizeFileContext, + summarizePartsFileContext, +} from './fileContextUtils.js'; + +// The test needs access to this, so we'll define it here. +// In the implementation it is not exported. +const GEMINI_TRACKED_FILE_MARKER = 'GEMINI_TRACKED_FILE_V1:'; + +describe('File Context Utils', () => { + describe('createFileContextPart', () => { + it('should create a correctly formatted file context part', () => { + const part = createFileContextPart( + 'test/file.txt', + 'This is the content.', + ); + const expectedPayload = JSON.stringify({ + relativePath: 'test/file.txt', + absolutePath: undefined, + }); + expect(part.text).toBe( + `${GEMINI_TRACKED_FILE_MARKER}${expectedPayload}\nThis is the content.`, + ); + }); + + it('should create a correctly formatted file context part with absolute path', () => { + const part = createFileContextPart( + 'test/file.txt', + 'This is the content.', + '/abs/test/file.txt', + ); + const expectedPayload = JSON.stringify({ + relativePath: 'test/file.txt', + absolutePath: '/abs/test/file.txt', + }); + expect(part.text).toBe( + `${GEMINI_TRACKED_FILE_MARKER}${expectedPayload}\nThis is the content.`, + ); + }); + }); + + describe('isFileContextPart', () => { + it('should return true and the path for a valid file context part', () => { + const part = createFileContextPart('a/b.txt', 'content', '/abs/a/b.txt'); + const result = isFileContextPart(part); + expect(result.isFile).toBe(true); + expect(result.relativePath).toBe('a/b.txt'); + expect(result.absolutePath).toBe('/abs/a/b.txt'); + }); + + it('should return true and the relative path for a valid file context part without absolute path', () => { + const part = createFileContextPart('a/b.txt', 'content'); + const result = isFileContextPart(part); + expect(result.isFile).toBe(true); + expect(result.relativePath).toBe('a/b.txt'); + expect(result.absolutePath).toBeUndefined(); + }); + + it('should return false for a regular text part', () => { + const part: Part = { text: 'hello world' }; + const result = isFileContextPart(part); + expect(result.isFile).toBe(false); + expect(result.relativePath).toBeUndefined(); + }); + + it('should return false for a part with a malformed marker', () => { + const part: Part = { text: `${GEMINI_TRACKED_FILE_MARKER}{not-json}` }; + const result = isFileContextPart(part); + expect(result.isFile).toBe(false); + expect(result.relativePath).toBeUndefined(); + }); + + it('should return false for a part with a marker but no relativePath', () => { + const part: Part = { + text: `${GEMINI_TRACKED_FILE_MARKER}${'{"foo":"bar"}'}`, + }; + const result = isFileContextPart(part); + expect(result.isFile).toBe(false); + expect(result.relativePath).toBeUndefined(); + }); + }); + + describe('summarizeFileContext', () => { + it('should summarize a file context part', () => { + const part = createFileContextPart('a/b.txt', 'content'); + const summaryPart = summarizeFileContext(part); + expect(summaryPart.text).toBe('[CONTEXT] File: a/b.txt'); + }); + + it('should summarize a file context part with absolute path', () => { + const part = createFileContextPart('a/b.txt', 'content', '/abs/a/b.txt'); + const summaryPart = summarizeFileContext(part); + expect(summaryPart.text).toBe('[CONTEXT] File: a/b.txt (/abs/a/b.txt)'); + }); + + it('should return the original part if it is not a file context part', () => { + const part: Part = { text: 'hello world' }; + const summaryPart = summarizeFileContext(part); + expect(summaryPart).toBe(part); + }); + }); + + describe('summarizePartsFileContext', () => { + it('should summarize multiple file context parts in an array', () => { + const parts: Part[] = [ + { text: 'Here are files:' }, + createFileContextPart('a/b.txt', 'content1'), + createFileContextPart('c/d.txt', 'content2', '/abs/c/d.txt'), + ]; + const summarizedParts = summarizePartsFileContext(parts); + expect(summarizedParts).toEqual([ + { text: 'Here are files:' }, + { text: '[CONTEXT] File: a/b.txt' }, + { text: '[CONTEXT] File: c/d.txt (/abs/c/d.txt)' }, + ]); + }); + + it('should return a new array with the same parts if no file context parts are present', () => { + const parts: Part[] = [{ text: 'Hello' }, { text: 'World' }]; + const summarizedParts = summarizePartsFileContext(parts); + // It should return a new array with the same parts. + expect(summarizedParts).toEqual(parts); + // But not the same array instance because of map. + expect(summarizedParts).not.toBe(parts); + }); + }); + + describe('sanitizeUserContent', () => { + it('should replace file content with a summary', () => { + const userInput: Content = { + role: 'user', + parts: [ + { text: 'User input' }, + createFileContextPart( + '/test/file.txt', + 'This is file content.', + '/abs/test/file.txt', + ), + ], + }; + + const sanitized = sanitizeUserContent(userInput); + expect(sanitized.parts).toEqual([ + { text: 'User input' }, + { text: '[CONTEXT] File: /test/file.txt (/abs/test/file.txt)' }, + ]); + }); + + it('should not alter content if no tracked files are present', () => { + const userInput: Content = { + role: 'user', + parts: [{ text: 'A normal message' }], + }; + const sanitized = sanitizeUserContent(userInput); + expect(sanitized).toBe(userInput); // Should return original object + }); + + it('should handle multiple file contexts', () => { + const userInput: Content = { + role: 'user', + parts: [ + { text: 'Here are two files' }, + createFileContextPart('file1.txt', 'content1'), + createFileContextPart('file2.txt', 'content2', '/abs/file2.txt'), + ], + }; + const sanitized = sanitizeUserContent(userInput); + expect(sanitized.parts).toEqual([ + { text: 'Here are two files' }, + { text: '[CONTEXT] File: file1.txt' }, + { text: '[CONTEXT] File: file2.txt (/abs/file2.txt)' }, + ]); + }); + + it('should handle content with no parts', () => { + const userInput: Content = { role: 'user', parts: [] }; + const sanitized = sanitizeUserContent(userInput); + expect(sanitized).toBe(userInput); + }); + + it('should handle content where parts is undefined', () => { + const userInput: Content = { role: 'user', parts: undefined as any }; + const sanitized = sanitizeUserContent(userInput); + expect(sanitized).toBe(userInput); + }); + }); +}); diff --git a/packages/core/src/utils/fileContextUtils.ts b/packages/core/src/utils/fileContextUtils.ts new file mode 100644 index 0000000000..3a7b6c8bb6 --- /dev/null +++ b/packages/core/src/utils/fileContextUtils.ts @@ -0,0 +1,125 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { Content, Part } from '@google/genai'; + +// This is not exported, as per instructions. +const GEMINI_TRACKED_FILE_MARKER = 'GEMINI_TRACKED_FILE_V1:'; + +interface FileContextInfo { + isFile: boolean; + relativePath?: string; + absolutePath?: string; +} + +/** + * Creates a `Part` object containing the specially formatted marker + * for a tracked file, to be sent to the model. + * + * @param relativePath The relative path of the file. + * @param content The full content of the file. + * @param absolutePath The absolute path of the file. + * @returns A `Part` object with the marked-up text. + */ +export function createFileContextPart( + relativePath: string, + content: string, + absolutePath?: string, +): Part { + const markerPayload = JSON.stringify({ relativePath, absolutePath }); + return { text: `${GEMINI_TRACKED_FILE_MARKER}${markerPayload}\n${content}` }; +} + +/** + * Detects if a Part is a file context part. + * @param part The Part to check. + * @returns Information about the file context if it is one. + */ +export function isFileContextPart(part: Part): FileContextInfo { + if (part.text?.startsWith(GEMINI_TRACKED_FILE_MARKER)) { + try { + const endOfMarker = part.text.indexOf('\n'); + const markerJson = + endOfMarker === -1 + ? part.text.substring(GEMINI_TRACKED_FILE_MARKER.length) + : part.text.substring( + GEMINI_TRACKED_FILE_MARKER.length, + endOfMarker, + ); + const data = JSON.parse(markerJson); + if (data.relativePath) { + return { + isFile: true, + relativePath: data.relativePath, + absolutePath: data.absolutePath, + }; + } + } catch (e) { + // Malformed, treat as not a file context part. + } + } + return { isFile: false }; +} + +/** + * If a part is a file context, returns a new one with the summarized version. + * @param part The part to summarize. + * @returns A summarized part if it was a file context part, otherwise the original part. + */ +export function summarizeFileContext(part: Part): Part { + const fileInfo = isFileContextPart(part); + if (fileInfo.isFile && fileInfo.relativePath) { + let text = `[CONTEXT] File: ${fileInfo.relativePath}`; + if (fileInfo.absolutePath) { + text += ` (${fileInfo.absolutePath})`; + } + return { + text, + }; + } + return part; +} + +/** + * Replaces any parts that are full file contexts with just a summary. + * @param parts The parts to summarize. + * @returns A new array of parts with file contexts summarized. + */ +export function summarizePartsFileContext(parts: Part[]): Part[] { + return parts.map(summarizeFileContext); +} + +/** + * Sanitizes a user-provided Content object by replacing any parts containing + * tracked file content with a simple summary part. + * + * @param userInput The original user Content object. + * @returns A new Content object with file content replaced by a summary. + * Returns the original object if no sanitization is needed. + */ +export function sanitizeUserContent(userInput: Content): Content { + if (!userInput.parts) { + return userInput; + } + + const partsWithFileContextsSummarized = summarizePartsFileContext( + userInput.parts, + ); + + // Avoid creating a new object if no parts were changed. + const hasChanged = userInput.parts.some( + (part, i) => part !== partsWithFileContextsSummarized[i], + ); + + if (!hasChanged) { + return userInput; + } + + return { + role: userInput.role, + parts: partsWithFileContextsSummarized, + }; +}