First take at mocking out gemini cli responses in integration tests (#11156)

This commit is contained in:
Jacob MacDonald
2025-10-23 16:10:43 -07:00
committed by GitHub
parent 6b4e7c6590
commit 039db4a0f3
12 changed files with 507 additions and 25 deletions

View File

@@ -0,0 +1,18 @@
{
"generateContent": [
{
"candidates": [
{
"content": {
"role": "model",
"parts": [
{
"text": "This is more than the 5 tokens we return below which will trigger an error"
}
]
}
}
]
}
]
}

View File

@@ -0,0 +1,40 @@
{
"generateContent": [
{
"candidates": [
{
"content": {
"role": "model",
"parts": [
{
"text": "This is more than the 5 tokens we return below which will trigger an error"
}
]
}
}
]
}
],
"generateContentStream": [
[
{
"candidates": [
{
"content": {
"role": "model",
"parts": [
{
"text": "The initial response from the model"
}
]
},
"finishReason": "STOP"
}
],
"usageMetadata": {
"promptTokenCount": 5
}
}
]
]
}

View File

@@ -0,0 +1,40 @@
{
"generateContent": [
{
"candidates": [
{
"content": {
"role": "model",
"parts": [
{
"text": "A summary of the conversation."
}
]
}
}
]
}
],
"generateContentStream": [
[
{
"candidates": [
{
"content": {
"role": "model",
"parts": [
{
"text": "The initial response from the model"
}
]
},
"finishReason": "STOP"
}
],
"usageMetadata": {
"promptTokenCount": 100000
}
}
]
]
}

View File

@@ -6,6 +6,7 @@
import { expect, describe, it, beforeEach, afterEach } from 'vitest';
import { TestRig } from './test-helper.js';
import { join } from 'node:path';
describe('Interactive Mode', () => {
let rig: TestRig;
@@ -18,50 +19,78 @@ describe('Interactive Mode', () => {
await rig.cleanup();
});
// TODO(#11062): Make this test reliable by not using the actual Gemini model
// We could not rely on the following mechanisms that have already shown to be
// flakey:
// 1. Asking a prompt like "Output 1000 tokens and the inventor of the lightbulb"
// --> This was b/c the model occasionally did not output einstein and
// we are not able to trigger the compression piece
// 2. Asking it to out a specific output and waiting for that.
// --> The expect catches the input and thinks that is the output so the
// /compress gets called too early
it.skip('should trigger chat compression with /compress command', async () => {
rig.setup('interactive-compress-success');
it('should trigger chat compression with /compress command', async () => {
await rig.setup('interactive-compress-test', {
fakeResponsesPath: join(
import.meta.dirname,
'context-compress-interactive.compress.json',
),
});
const run = await rig.runInteractive();
// Generate a long context to make compression viable.
const longPrompt =
'Write a 200 word story about a robot. The story MUST end with the following output: THE_END';
await run.type('Initial prompt');
await run.type('\r');
await run.sendKeys(longPrompt);
await run.sendKeys('\r');
// Wait for the specific end marker.
await run.expectText('THE_END', 30000);
await run.expectText('The initial response from the model', 5000);
await run.type('/compress');
await run.sendKeys('\r');
await run.type('\r');
const foundEvent = await rig.waitForTelemetryEvent(
'chat_compression',
90000,
5000,
);
expect(foundEvent, 'chat_compression telemetry event was not found').toBe(
true,
);
await run.expectText('Chat history compressed', 5000);
});
it('should handle /compress command on empty history', async () => {
rig.setup('interactive-compress-empty');
it('should handle compression failure on token inflation', async () => {
await rig.setup('interactive-compress-failure', {
fakeResponsesPath: join(
import.meta.dirname,
'context-compress-interactive.compress-failure.json',
),
});
const run = await rig.runInteractive();
await run.type('Initial prompt');
await run.type('\r');
await run.expectText('The initial response from the model', 25000);
await run.type('/compress');
await run.type('\r');
await run.expectText('Nothing to compress.', 25000);
await run.expectText('compression was not beneficial', 5000);
// Verify no telemetry event is logged for NOOP
const foundEvent = await rig.waitForTelemetryEvent(
'chat_compression',
5000,
);
expect(
foundEvent,
'chat_compression telemetry event should be found for failures',
).toBe(true);
});
it('should handle /compress command on empty history', async () => {
rig.setup('interactive-compress-empty', {
fakeResponsesPath: join(
import.meta.dirname,
'context-compress-interactive.compress-empty.json',
),
});
const run = await rig.runInteractive();
await run.type('/compress');
await run.type('\r');
await run.expectText('Nothing to compress.', 5000);
// Verify no telemetry event is logged for NOOP
const foundEvent = await rig.waitForTelemetryEvent(

View File

@@ -255,6 +255,7 @@ export class TestRig {
testDir: string | null;
testName?: string;
_lastRunStdout?: string;
fakeResponsesPath?: string;
constructor() {
this.bundlePath = join(__dirname, '..', 'bundle/gemini.js');
@@ -263,12 +264,19 @@ export class TestRig {
setup(
testName: string,
options: { settings?: Record<string, unknown> } = {},
options: {
settings?: Record<string, unknown>;
fakeResponsesPath?: string;
} = {},
) {
this.testName = testName;
const sanitizedName = sanitizeTestName(testName);
this.testDir = join(env['INTEGRATION_TEST_FILE_DIR']!, sanitizedName);
mkdirSync(this.testDir, { recursive: true });
if (options.fakeResponsesPath) {
this.fakeResponsesPath = join(this.testDir, 'fake-responses.json');
fs.copyFileSync(options.fakeResponsesPath, this.fakeResponsesPath);
}
// Create a settings file to point the CLI to the local collector
const geminiDir = join(this.testDir, GEMINI_DIR);
@@ -335,6 +343,9 @@ export class TestRig {
const initialArgs = isNpmReleaseTest
? extraInitialArgs
: [this.bundlePath, ...extraInitialArgs];
if (this.fakeResponsesPath) {
initialArgs.push('--fake-responses', this.fakeResponsesPath);
}
return { command, initialArgs };
}