Refactor file context for better testability; start fixing tests

2026-02-01 22:48:03 +00:00 · 2025-06-12 15:12:01 -07:00
parent 99751c847c
commit 820275b63f
5 changed files with 481 additions and 37 deletions
--- a/packages/cli/src/ui/App.test.tsx
+++ b/packages/cli/src/ui/App.test.tsx
@@ -122,7 +122,19 @@ vi.mock('@gemini-cli/core', async (importOriginal) => {
        getShowMemoryUsage: vi.fn(() => opts.showMemoryUsage ?? false),
        getAccessibility: vi.fn(() => opts.accessibility ?? {}),
        getProjectRoot: vi.fn(() => opts.projectRoot),
-        getGeminiClient: vi.fn(() => ({})),
+        getGeminiClient: vi.fn(() => ({
+          getTokenLimit: vi.fn(() => 8192),
+          getTokenBreakdown: vi.fn().mockResolvedValue({
+            chatTokens: 0,
+            conventionsTokens: 0,
+            systemTokens: 0,
+          }),
+        })),
+        getFileContextService: vi.fn(() => ({
+          on: vi.fn(),
+          off: vi.fn(),
+          getTrackedFilesWithTokenCounts: vi.fn().mockResolvedValue([]),
+        })),
        getCheckpointEnabled: vi.fn(() => opts.checkpoint ?? true),
      };
    });
--- a/packages/core/src/core/geminiChat.tracked_files.test.ts
+++ b/packages/core/src/core/geminiChat.tracked_files.test.ts
@@ -0,0 +1,143 @@
+/**
+ * @license
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
+import { Content, Models, GenerateContentConfig } from '@google/genai';
+import { GeminiChat } from './geminiChat.js';
+import { Config } from '../config/config.js';
+
+// Mocks
+const mockModelsModule = {
+  generateContent: vi.fn(),
+  generateContentStream: vi.fn(),
+  countTokens: vi.fn(),
+  embedContent: vi.fn(),
+  batchEmbedContents: vi.fn(),
+} as unknown as Models;
+
+const mockConfig = {
+  getSessionId: () => 'test-session-id',
+} as unknown as Config;
+
+import { createFileContextPart } from '../utils/fileContextUtils.js';
+
+describe('GeminiChat - Tracked File History Sanitization', () => {
+  let chat: GeminiChat;
+  const model = 'gemini-pro';
+  const config: GenerateContentConfig = {};
+
+  beforeEach(() => {
+    vi.clearAllMocks();
+    chat = new GeminiChat(mockConfig, mockModelsModule, model, config, []);
+  });
+
+  afterEach(() => {
+    vi.restoreAllMocks();
+  });
+
+  it('should replace file content with a summary in the history', () => {
+    const userInput: Content = {
+      role: 'user',
+      parts: [
+        { text: 'User input' },
+        createFileContextPart('/test/file.txt', 'This is file content.'),
+      ],
+    };
+
+    const modelOutput: Content[] = [
+      { role: 'model', parts: [{ text: 'Model output' }] },
+    ];
+
+    // @ts-expect-error - private method
+    chat.recordHistory(userInput, modelOutput);
+
+    const history = chat.getHistory();
+    expect(history).toHaveLength(2);
+    const userHistory = history[0];
+    expect(userHistory.parts).toBeDefined();
+    expect(userHistory.parts).toHaveLength(2);
+    expect(userHistory.parts![0].text).toBe('User input');
+    expect(userHistory.parts![1].text).toBe(
+      '[CONTEXT] File: /test/file.txt'
+    );
+  });
+
+  it('should update the history summary as files are added and removed over time', () => {
+    const modelOutput: Content[] = [
+      { role: 'model', parts: [{ text: 'OK' }] },
+    ];
+
+    // 1. Track file1
+    const userInput1: Content = {
+      role: 'user',
+      parts: [
+        { text: 'Message 1' },
+        createFileContextPart('/test/file1.txt', 'Content 1'),
+      ],
+    };
+    // @ts-expect-error - private method
+    chat.recordHistory(userInput1, modelOutput);
+    let history = chat.getHistory();
+    expect(history[0].parts).toBeDefined();
+    expect(history[0].parts).toHaveLength(2);
+    expect(history[0].parts![0].text).toBe('Message 1');
+    expect(history[0].parts![1].text).toBe('[CONTEXT] File: /test/file1.txt');
+
+    // 2. Track file2 (file1 is still tracked)
+    const userInput2: Content = {
+      role: 'user',
+      parts: [
+        { text: 'Message 2' },
+        createFileContextPart('/test/file1.txt', 'Content 1'),
+        createFileContextPart('/test/file2.txt', 'Content 2'),
+      ],
+    };
+    // @ts-expect-error - private method
+    chat.recordHistory(userInput2, modelOutput);
+    history = chat.getHistory();
+    expect(history[2].parts).toBeDefined();
+    expect(history[2].parts).toHaveLength(3);
+    expect(history[2].parts![0].text).toBe('Message 2');
+    expect(history[2].parts![1].text).toBe('[CONTEXT] File: /test/file1.txt');
+    expect(history[2].parts![2].text).toBe('[CONTEXT] File: /test/file2.txt');
+
+    // 3. Untrack file1
+    const userInput3: Content = {
+      role: 'user',
+      parts: [
+        { text: 'Message 3' },
+        createFileContextPart('/test/file2.txt', 'Content 2'),
+      ],
+    };
+    // @ts-expect-error - private method
+    chat.recordHistory(userInput3, modelOutput);
+    history = chat.getHistory();
+    expect(history[4].parts).toBeDefined();
+    expect(history[4].parts).toHaveLength(2);
+    expect(history[4].parts![0].text).toBe('Message 3');
+    expect(history[4].parts![1].text).toBe(
+      '[CONTEXT] File: /test/file2.txt'
+    );
+  });
+
+  it('should not alter history if no tracked files are present', () => {
+    const userInput: Content = {
+      role: 'user',
+      parts: [{ text: 'A normal message' }],
+    };
+    const modelOutput: Content[] = [
+      { role: 'model', parts: [{ text: 'A normal response' }] },
+    ];
+
+    // @ts-expect-error - private method
+    chat.recordHistory(userInput, modelOutput);
+
+    const history = chat.getHistory();
+    expect(history).toHaveLength(2);
+    expect(history[0]).toEqual(userInput);
+    expect(history[1]).toEqual(modelOutput[0]);
+  });
+});
--- a/packages/core/src/core/geminiChat.ts
+++ b/packages/core/src/core/geminiChat.ts
@@ -17,6 +17,7 @@ import {
 } from '@google/genai';
 import { retryWithBackoff } from '../utils/retry.js';
 import { isFunctionResponse } from '../utils/messageInspectors.js';
+import { sanitizeUserContent } from '../utils/fileContextUtils.js';
 import { ContentGenerator } from './contentGenerator.js';
 import { Config } from '../config/config.js';

@@ -355,7 +356,7 @@ export class GeminiChat {
        ...extractCuratedHistory(automaticFunctionCallingHistory!),
      );
    } else {
-      this.history.push(this.getSanitizedUserInput(userInput));
+      this.history.push(sanitizeUserContent(userInput));
    }

    // Consolidate adjacent model roles in outputContents
@@ -405,41 +406,6 @@ export class GeminiChat {
    }
  }

-  private getSanitizedUserInput(userInput: Content): Content {
-    if (!userInput.parts) {
-      return userInput;
-    }
-    const markerPrefix = 'GEMINI_TRACKED_FILE_V1:';
-    const trackedFilePaths: string[] = [];
-    const sanitizedParts: Part[] = [];
-
-    for (const part of userInput.parts) {
-      if (part.text?.startsWith(markerPrefix)) {
-        try {
-          const data = JSON.parse(part.text.substring(markerPrefix.length));
-          if (data.relativePath) {
-            trackedFilePaths.push(data.relativePath);
-          }
-        } catch (e) {
-          // It's not a valid marker, so keep the part as is.
-          sanitizedParts.push(part);
-        }
-      } else {
-        sanitizedParts.push(part);
-      }
-    }
-
-    if (trackedFilePaths.length > 0) {
-      const fileListText = `The following files were provided as context:\n- ${trackedFilePaths.join('\n- ')}`;
-      sanitizedParts.unshift({ text: fileListText });
-    }
-
-    return {
-      role: userInput.role,
-      parts: sanitizedParts,
-    };
-  }
-
  private isTextContent(
    content: Content | undefined,
  ): content is Content & { parts: [{ text: string }, ...Part[]] } {
--- a/packages/core/src/utils/fileContextUtils.test.ts
+++ b/packages/core/src/utils/fileContextUtils.test.ts
@@ -0,0 +1,198 @@
+/**
+ * @license
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { describe, it, expect } from 'vitest';
+import { Content, Part } from '@google/genai';
+import {
+  createFileContextPart,
+  sanitizeUserContent,
+  isFileContextPart,
+  summarizeFileContext,
+  summarizePartsFileContext,
+} from './fileContextUtils.js';
+
+// The test needs access to this, so we'll define it here.
+// In the implementation it is not exported.
+const GEMINI_TRACKED_FILE_MARKER = 'GEMINI_TRACKED_FILE_V1:';
+
+describe('File Context Utils', () => {
+  describe('createFileContextPart', () => {
+    it('should create a correctly formatted file context part', () => {
+      const part = createFileContextPart(
+        'test/file.txt',
+        'This is the content.',
+      );
+      const expectedPayload = JSON.stringify({
+        relativePath: 'test/file.txt',
+        absolutePath: undefined,
+      });
+      expect(part.text).toBe(
+        `${GEMINI_TRACKED_FILE_MARKER}${expectedPayload}\nThis is the content.`,
+      );
+    });
+
+    it('should create a correctly formatted file context part with absolute path', () => {
+      const part = createFileContextPart(
+        'test/file.txt',
+        'This is the content.',
+        '/abs/test/file.txt',
+      );
+      const expectedPayload = JSON.stringify({
+        relativePath: 'test/file.txt',
+        absolutePath: '/abs/test/file.txt',
+      });
+      expect(part.text).toBe(
+        `${GEMINI_TRACKED_FILE_MARKER}${expectedPayload}\nThis is the content.`,
+      );
+    });
+  });
+
+  describe('isFileContextPart', () => {
+    it('should return true and the path for a valid file context part', () => {
+      const part = createFileContextPart('a/b.txt', 'content', '/abs/a/b.txt');
+      const result = isFileContextPart(part);
+      expect(result.isFile).toBe(true);
+      expect(result.relativePath).toBe('a/b.txt');
+      expect(result.absolutePath).toBe('/abs/a/b.txt');
+    });
+
+    it('should return true and the relative path for a valid file context part without absolute path', () => {
+      const part = createFileContextPart('a/b.txt', 'content');
+      const result = isFileContextPart(part);
+      expect(result.isFile).toBe(true);
+      expect(result.relativePath).toBe('a/b.txt');
+      expect(result.absolutePath).toBeUndefined();
+    });
+
+    it('should return false for a regular text part', () => {
+      const part: Part = { text: 'hello world' };
+      const result = isFileContextPart(part);
+      expect(result.isFile).toBe(false);
+      expect(result.relativePath).toBeUndefined();
+    });
+
+    it('should return false for a part with a malformed marker', () => {
+      const part: Part = { text: `${GEMINI_TRACKED_FILE_MARKER}{not-json}` };
+      const result = isFileContextPart(part);
+      expect(result.isFile).toBe(false);
+      expect(result.relativePath).toBeUndefined();
+    });
+
+    it('should return false for a part with a marker but no relativePath', () => {
+      const part: Part = {
+        text: `${GEMINI_TRACKED_FILE_MARKER}${'{"foo":"bar"}'}`,
+      };
+      const result = isFileContextPart(part);
+      expect(result.isFile).toBe(false);
+      expect(result.relativePath).toBeUndefined();
+    });
+  });
+
+  describe('summarizeFileContext', () => {
+    it('should summarize a file context part', () => {
+      const part = createFileContextPart('a/b.txt', 'content');
+      const summaryPart = summarizeFileContext(part);
+      expect(summaryPart.text).toBe('[CONTEXT] File: a/b.txt');
+    });
+
+    it('should summarize a file context part with absolute path', () => {
+      const part = createFileContextPart('a/b.txt', 'content', '/abs/a/b.txt');
+      const summaryPart = summarizeFileContext(part);
+      expect(summaryPart.text).toBe('[CONTEXT] File: a/b.txt (/abs/a/b.txt)');
+    });
+
+    it('should return the original part if it is not a file context part', () => {
+      const part: Part = { text: 'hello world' };
+      const summaryPart = summarizeFileContext(part);
+      expect(summaryPart).toBe(part);
+    });
+  });
+
+  describe('summarizePartsFileContext', () => {
+    it('should summarize multiple file context parts in an array', () => {
+      const parts: Part[] = [
+        { text: 'Here are files:' },
+        createFileContextPart('a/b.txt', 'content1'),
+        createFileContextPart('c/d.txt', 'content2', '/abs/c/d.txt'),
+      ];
+      const summarizedParts = summarizePartsFileContext(parts);
+      expect(summarizedParts).toEqual([
+        { text: 'Here are files:' },
+        { text: '[CONTEXT] File: a/b.txt' },
+        { text: '[CONTEXT] File: c/d.txt (/abs/c/d.txt)' },
+      ]);
+    });
+
+    it('should return a new array with the same parts if no file context parts are present', () => {
+      const parts: Part[] = [{ text: 'Hello' }, { text: 'World' }];
+      const summarizedParts = summarizePartsFileContext(parts);
+      // It should return a new array with the same parts.
+      expect(summarizedParts).toEqual(parts);
+      // But not the same array instance because of map.
+      expect(summarizedParts).not.toBe(parts);
+    });
+  });
+
+  describe('sanitizeUserContent', () => {
+    it('should replace file content with a summary', () => {
+      const userInput: Content = {
+        role: 'user',
+        parts: [
+          { text: 'User input' },
+          createFileContextPart(
+            '/test/file.txt',
+            'This is file content.',
+            '/abs/test/file.txt',
+          ),
+        ],
+      };
+
+      const sanitized = sanitizeUserContent(userInput);
+      expect(sanitized.parts).toEqual([
+        { text: 'User input' },
+        { text: '[CONTEXT] File: /test/file.txt (/abs/test/file.txt)' },
+      ]);
+    });
+
+    it('should not alter content if no tracked files are present', () => {
+      const userInput: Content = {
+        role: 'user',
+        parts: [{ text: 'A normal message' }],
+      };
+      const sanitized = sanitizeUserContent(userInput);
+      expect(sanitized).toBe(userInput); // Should return original object
+    });
+
+    it('should handle multiple file contexts', () => {
+      const userInput: Content = {
+        role: 'user',
+        parts: [
+          { text: 'Here are two files' },
+          createFileContextPart('file1.txt', 'content1'),
+          createFileContextPart('file2.txt', 'content2', '/abs/file2.txt'),
+        ],
+      };
+      const sanitized = sanitizeUserContent(userInput);
+      expect(sanitized.parts).toEqual([
+        { text: 'Here are two files' },
+        { text: '[CONTEXT] File: file1.txt' },
+        { text: '[CONTEXT] File: file2.txt (/abs/file2.txt)' },
+      ]);
+    });
+
+    it('should handle content with no parts', () => {
+      const userInput: Content = { role: 'user', parts: [] };
+      const sanitized = sanitizeUserContent(userInput);
+      expect(sanitized).toBe(userInput);
+    });
+
+    it('should handle content where parts is undefined', () => {
+      const userInput: Content = { role: 'user', parts: undefined as any };
+      const sanitized = sanitizeUserContent(userInput);
+      expect(sanitized).toBe(userInput);
+    });
+  });
+});
--- a/packages/core/src/utils/fileContextUtils.ts
+++ b/packages/core/src/utils/fileContextUtils.ts
@@ -0,0 +1,125 @@
+/**
+ * @license
+ * Copyright 2025 Google LLC
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { Content, Part } from '@google/genai';
+
+// This is not exported, as per instructions.
+const GEMINI_TRACKED_FILE_MARKER = 'GEMINI_TRACKED_FILE_V1:';
+
+interface FileContextInfo {
+  isFile: boolean;
+  relativePath?: string;
+  absolutePath?: string;
+}
+
+/**
+ * Creates a `Part` object containing the specially formatted marker
+ * for a tracked file, to be sent to the model.
+ *
+ * @param relativePath The relative path of the file.
+ * @param content The full content of the file.
+ * @param absolutePath The absolute path of the file.
+ * @returns A `Part` object with the marked-up text.
+ */
+export function createFileContextPart(
+  relativePath: string,
+  content: string,
+  absolutePath?: string,
+): Part {
+  const markerPayload = JSON.stringify({ relativePath, absolutePath });
+  return { text: `${GEMINI_TRACKED_FILE_MARKER}${markerPayload}\n${content}` };
+}
+
+/**
+ * Detects if a Part is a file context part.
+ * @param part The Part to check.
+ * @returns Information about the file context if it is one.
+ */
+export function isFileContextPart(part: Part): FileContextInfo {
+  if (part.text?.startsWith(GEMINI_TRACKED_FILE_MARKER)) {
+    try {
+      const endOfMarker = part.text.indexOf('\n');
+      const markerJson =
+        endOfMarker === -1
+          ? part.text.substring(GEMINI_TRACKED_FILE_MARKER.length)
+          : part.text.substring(
+              GEMINI_TRACKED_FILE_MARKER.length,
+              endOfMarker,
+            );
+      const data = JSON.parse(markerJson);
+      if (data.relativePath) {
+        return {
+          isFile: true,
+          relativePath: data.relativePath,
+          absolutePath: data.absolutePath,
+        };
+      }
+    } catch (e) {
+      // Malformed, treat as not a file context part.
+    }
+  }
+  return { isFile: false };
+}
+
+/**
+ * If a part is a file context, returns a new one with the summarized version.
+ * @param part The part to summarize.
+ * @returns A summarized part if it was a file context part, otherwise the original part.
+ */
+export function summarizeFileContext(part: Part): Part {
+  const fileInfo = isFileContextPart(part);
+  if (fileInfo.isFile && fileInfo.relativePath) {
+    let text = `[CONTEXT] File: ${fileInfo.relativePath}`;
+    if (fileInfo.absolutePath) {
+      text += ` (${fileInfo.absolutePath})`;
+    }
+    return {
+      text,
+    };
+  }
+  return part;
+}
+
+/**
+ * Replaces any parts that are full file contexts with just a summary.
+ * @param parts The parts to summarize.
+ * @returns A new array of parts with file contexts summarized.
+ */
+export function summarizePartsFileContext(parts: Part[]): Part[] {
+  return parts.map(summarizeFileContext);
+}
+
+/**
+ * Sanitizes a user-provided Content object by replacing any parts containing
+ * tracked file content with a simple summary part.
+ *
+ * @param userInput The original user Content object.
+ * @returns A new Content object with file content replaced by a summary.
+ *          Returns the original object if no sanitization is needed.
+ */
+export function sanitizeUserContent(userInput: Content): Content {
+  if (!userInput.parts) {
+    return userInput;
+  }
+
+  const partsWithFileContextsSummarized = summarizePartsFileContext(
+    userInput.parts,
+  );
+
+  // Avoid creating a new object if no parts were changed.
+  const hasChanged = userInput.parts.some(
+    (part, i) => part !== partsWithFileContextsSummarized[i],
+  );
+
+  if (!hasChanged) {
+    return userInput;
+  }
+
+  return {
+    role: userInput.role,
+    parts: partsWithFileContextsSummarized,
+  };
+}