First take at mocking out gemini cli responses in integration tests (#11156)

2026-06-01 19:03:42 +00:00 · 2025-10-23 16:10:43 -07:00
parent 6b4e7c6590
commit 039db4a0f3
12 changed files with 507 additions and 25 deletions
--- a/integration-tests/context-compress-interactive.compress-empty.json
+++ b/integration-tests/context-compress-interactive.compress-empty.json
@@ -0,0 +1,18 @@
+{
+  "generateContent": [
+    {
+      "candidates": [
+        {
+          "content": {
+            "role": "model",
+            "parts": [
+              {
+                "text": "This is more than the 5 tokens we return below which will trigger an error"
+              }
+            ]
+          }
+        }
+      ]
+    }
+  ]
+}
--- a/integration-tests/context-compress-interactive.compress-failure.json
+++ b/integration-tests/context-compress-interactive.compress-failure.json
@@ -0,0 +1,40 @@
+{
+  "generateContent": [
+    {
+      "candidates": [
+        {
+          "content": {
+            "role": "model",
+            "parts": [
+              {
+                "text": "This is more than the 5 tokens we return below which will trigger an error"
+              }
+            ]
+          }
+        }
+      ]
+    }
+  ],
+  "generateContentStream": [
+    [
+      {
+        "candidates": [
+          {
+            "content": {
+              "role": "model",
+              "parts": [
+                {
+                  "text": "The initial response from the model"
+                }
+              ]
+            },
+            "finishReason": "STOP"
+          }
+        ],
+        "usageMetadata": {
+          "promptTokenCount": 5
+        }
+      }
+    ]
+  ]
+}
--- a/integration-tests/context-compress-interactive.compress.json
+++ b/integration-tests/context-compress-interactive.compress.json
@@ -0,0 +1,40 @@
+{
+  "generateContent": [
+    {
+      "candidates": [
+        {
+          "content": {
+            "role": "model",
+            "parts": [
+              {
+                "text": "A summary of the conversation."
+              }
+            ]
+          }
+        }
+      ]
+    }
+  ],
+  "generateContentStream": [
+    [
+      {
+        "candidates": [
+          {
+            "content": {
+              "role": "model",
+              "parts": [
+                {
+                  "text": "The initial response from the model"
+                }
+              ]
+            },
+            "finishReason": "STOP"
+          }
+        ],
+        "usageMetadata": {
+          "promptTokenCount": 100000
+        }
+      }
+    ]
+  ]
+}
--- a/integration-tests/context-compress-interactive.test.ts
+++ b/integration-tests/context-compress-interactive.test.ts
@@ -6,6 +6,7 @@

 import { expect, describe, it, beforeEach, afterEach } from 'vitest';
 import { TestRig } from './test-helper.js';
+import { join } from 'node:path';

 describe('Interactive Mode', () => {
  let rig: TestRig;
@@ -18,50 +19,78 @@ describe('Interactive Mode', () => {
    await rig.cleanup();
  });

-  // TODO(#11062): Make this test reliable by not using the actual Gemini model
-  // We could not rely on the following mechanisms that have already shown to be
-  // flakey:
-  //    1. Asking a prompt like "Output 1000 tokens and the inventor of the lightbulb"
-  //        --> This was b/c the model occasionally did not output einstein and
-  //            we are not able to trigger the compression piece
-  //    2. Asking it to out a specific output and waiting for that.
-  //       --> The expect catches the input and thinks that is the output so the
-  //            /compress gets called too early
-  it.skip('should trigger chat compression with /compress command', async () => {
-    rig.setup('interactive-compress-success');
+  it('should trigger chat compression with /compress command', async () => {
+    await rig.setup('interactive-compress-test', {
+      fakeResponsesPath: join(
+        import.meta.dirname,
+        'context-compress-interactive.compress.json',
+      ),
+    });

    const run = await rig.runInteractive();

-    // Generate a long context to make compression viable.
-    const longPrompt =
-      'Write a 200 word story about a robot. The story MUST end with the following output: THE_END';
+    await run.type('Initial prompt');
+    await run.type('\r');

-    await run.sendKeys(longPrompt);
-    await run.sendKeys('\r');
-
-    // Wait for the specific end marker.
-    await run.expectText('THE_END', 30000);
+    await run.expectText('The initial response from the model', 5000);

    await run.type('/compress');
-    await run.sendKeys('\r');
+    await run.type('\r');

    const foundEvent = await rig.waitForTelemetryEvent(
      'chat_compression',
-      90000,
+      5000,
    );
    expect(foundEvent, 'chat_compression telemetry event was not found').toBe(
      true,
    );
+
+    await run.expectText('Chat history compressed', 5000);
  });

-  it('should handle /compress command on empty history', async () => {
-    rig.setup('interactive-compress-empty');
+  it('should handle compression failure on token inflation', async () => {
+    await rig.setup('interactive-compress-failure', {
+      fakeResponsesPath: join(
+        import.meta.dirname,
+        'context-compress-interactive.compress-failure.json',
+      ),
+    });

    const run = await rig.runInteractive();

+    await run.type('Initial prompt');
+    await run.type('\r');
+
+    await run.expectText('The initial response from the model', 25000);
+
    await run.type('/compress');
    await run.type('\r');
-    await run.expectText('Nothing to compress.', 25000);
+    await run.expectText('compression was not beneficial', 5000);
+
+    // Verify no telemetry event is logged for NOOP
+    const foundEvent = await rig.waitForTelemetryEvent(
+      'chat_compression',
+      5000,
+    );
+    expect(
+      foundEvent,
+      'chat_compression telemetry event should be found for failures',
+    ).toBe(true);
+  });
+
+  it('should handle /compress command on empty history', async () => {
+    rig.setup('interactive-compress-empty', {
+      fakeResponsesPath: join(
+        import.meta.dirname,
+        'context-compress-interactive.compress-empty.json',
+      ),
+    });
+
+    const run = await rig.runInteractive();
+    await run.type('/compress');
+    await run.type('\r');
+
+    await run.expectText('Nothing to compress.', 5000);

    // Verify no telemetry event is logged for NOOP
    const foundEvent = await rig.waitForTelemetryEvent(
--- a/integration-tests/test-helper.ts
+++ b/integration-tests/test-helper.ts
@@ -255,6 +255,7 @@ export class TestRig {
  testDir: string | null;
  testName?: string;
  _lastRunStdout?: string;
+  fakeResponsesPath?: string;

  constructor() {
    this.bundlePath = join(__dirname, '..', 'bundle/gemini.js');
@@ -263,12 +264,19 @@ export class TestRig {

  setup(
    testName: string,
-    options: { settings?: Record<string, unknown> } = {},
+    options: {
+      settings?: Record<string, unknown>;
+      fakeResponsesPath?: string;
+    } = {},
  ) {
    this.testName = testName;
    const sanitizedName = sanitizeTestName(testName);
    this.testDir = join(env['INTEGRATION_TEST_FILE_DIR']!, sanitizedName);
    mkdirSync(this.testDir, { recursive: true });
+    if (options.fakeResponsesPath) {
+      this.fakeResponsesPath = join(this.testDir, 'fake-responses.json');
+      fs.copyFileSync(options.fakeResponsesPath, this.fakeResponsesPath);
+    }

    // Create a settings file to point the CLI to the local collector
    const geminiDir = join(this.testDir, GEMINI_DIR);
@@ -335,6 +343,9 @@ export class TestRig {
    const initialArgs = isNpmReleaseTest
      ? extraInitialArgs
      : [this.bundlePath, ...extraInitialArgs];
+    if (this.fakeResponsesPath) {
+      initialArgs.push('--fake-responses', this.fakeResponsesPath);
+    }
    return { command, initialArgs };
  }