Enhance TestRig with process management and timeouts (#15908)

This commit is contained in:
N. Taylor Mullen
2026-01-06 23:58:13 -08:00
committed by GitHub
parent 5fe5d1da46
commit 416d243027
4 changed files with 205 additions and 111 deletions

View File

@@ -14,7 +14,6 @@ describe('JSON output', () => {
beforeEach(async () => {
rig = new TestRig();
await rig.setup('json-output-test');
});
afterEach(async () => {
@@ -22,6 +21,7 @@ describe('JSON output', () => {
});
it('should return a valid JSON with response and stats', async () => {
await rig.setup('json-output-response-stats');
const result = await rig.run({
args: ['What is the capital of France?', '--output-format', 'json'],
});
@@ -36,6 +36,7 @@ describe('JSON output', () => {
});
it('should return a valid JSON with a session ID', async () => {
await rig.setup('json-output-session-id');
const result = await rig.run({
args: ['Hello', '--output-format', 'json'],
});
@@ -47,7 +48,6 @@ describe('JSON output', () => {
});
it('should return a JSON error for sd auth mismatch before running', async () => {
process.env['GOOGLE_GENAI_USE_GCA'] = 'true';
await rig.setup('json-output-auth-mismatch', {
settings: {
security: {
@@ -58,12 +58,13 @@ describe('JSON output', () => {
let thrown: Error | undefined;
try {
await rig.run({ args: ['Hello', '--output-format', 'json'] });
await rig.run({
args: ['Hello', '--output-format', 'json'],
env: { GOOGLE_GENAI_USE_GCA: 'true' },
});
expect.fail('Expected process to exit with error');
} catch (e) {
thrown = e as Error;
} finally {
delete process.env['GOOGLE_GENAI_USE_GCA'];
}
expect(thrown).toBeDefined();

View File

@@ -5,11 +5,12 @@
*/
import { expect } from 'vitest';
import { execSync, spawn } from 'node:child_process';
import { execSync, spawn, type ChildProcess } from 'node:child_process';
import { mkdirSync, writeFileSync, readFileSync } from 'node:fs';
import { join, dirname } from 'node:path';
import { fileURLToPath } from 'node:url';
import { env } from 'node:process';
import { setTimeout as sleep } from 'node:timers/promises';
import { DEFAULT_GEMINI_MODEL } from '../packages/core/src/config/models.js';
import fs from 'node:fs';
import * as pty from '@lydell/node-pty';
@@ -45,7 +46,7 @@ export async function poll(
if (result) {
return true;
}
await new Promise((resolve) => setTimeout(resolve, interval));
await sleep(interval);
}
if (env['VERBOSE'] === 'true') {
console.log(`Poll timed out after ${attempts} attempts`);
@@ -212,7 +213,7 @@ export class InteractiveRun {
if (char === '\r') {
// wait >30ms before `enter` to avoid fast return conversion
// from bufferFastReturn() in KeypressContent.tsx
await new Promise((resolve) => setTimeout(resolve, 50));
await sleep(50);
}
this.ptyProcess.write(char);
@@ -239,7 +240,7 @@ export class InteractiveRun {
// but may run into paste detection issues for larger strings.
async sendText(text: string) {
this.ptyProcess.write(text);
await new Promise((resolve) => setTimeout(resolve, 5));
await sleep(5);
}
// Simulates typing a string one character at a time to avoid paste detection.
@@ -247,7 +248,7 @@ export class InteractiveRun {
const delay = 5;
for (const char of text) {
this.ptyProcess.write(char);
await new Promise((resolve) => setTimeout(resolve, delay));
await sleep(delay);
}
}
@@ -282,6 +283,7 @@ export class TestRig {
// Original fake responses file path for rewriting goldens in record mode.
originalFakeResponsesPath?: string;
private _interactiveRuns: InteractiveRun[] = [];
private _spawnedProcesses: ChildProcess[] = [];
setup(
testName: string,
@@ -307,15 +309,16 @@ export class TestRig {
}
// Create a settings file to point the CLI to the local collector
const projectGeminiDir = join(this.testDir, GEMINI_DIR);
this._createSettingsFile(options.settings);
}
private _createSettingsFile(overrideSettings?: Record<string, unknown>) {
const projectGeminiDir = join(this.testDir!, GEMINI_DIR);
mkdirSync(projectGeminiDir, { recursive: true });
// In sandbox mode, use an absolute path for telemetry inside the container
// The container mounts the test directory at the same path as the host
const telemetryPath = join(this.homeDir, 'telemetry.log'); // Always use home directory for telemetry
// Ensure the CLI uses our separate home directory for global state
process.env['GEMINI_CLI_HOME'] = this.homeDir;
const telemetryPath = join(this.homeDir!, 'telemetry.log'); // Always use home directory for telemetry
const settings = {
general: {
@@ -343,7 +346,7 @@ export class TestRig {
env['GEMINI_SANDBOX'] !== 'false' ? env['GEMINI_SANDBOX'] : false,
// Don't show the IDE connection dialog when running from VsCode
ide: { enabled: false, hasSeenNudge: true },
...options.settings, // Allow tests to override/add settings
...overrideSettings, // Allow tests to override/add settings
};
writeFileSync(
join(projectGeminiDir, 'settings.json'),
@@ -362,6 +365,7 @@ export class TestRig {
}
sync() {
if (os.platform() === 'win32') return;
// ensure file system is done before spawning
execSync('sync', { cwd: this.testDir! });
}
@@ -396,6 +400,8 @@ export class TestRig {
stdin?: string;
stdinDoesNotEnd?: boolean;
yolo?: boolean;
timeout?: number;
env?: Record<string, string | undefined>;
}): Promise<string> {
const yolo = options.yolo !== false;
const { command, initialArgs } = this._getCommandAndArgs(
@@ -426,8 +432,13 @@ export class TestRig {
const child = spawn(command, commandArgs, {
cwd: this.testDir!,
stdio: 'pipe',
env: env,
env: {
...process.env,
GEMINI_CLI_HOME: this.homeDir!,
...options.env,
},
});
this._spawnedProcesses.push(child);
let stdout = '';
let stderr = '';
@@ -441,63 +452,46 @@ export class TestRig {
child.stdin!.end();
}
child.stdout!.on('data', (data: Buffer) => {
child.stdout!.setEncoding('utf8');
child.stdout!.on('data', (data: string) => {
stdout += data;
if (env['KEEP_OUTPUT'] === 'true' || env['VERBOSE'] === 'true') {
process.stdout.write(data);
}
});
child.stderr!.on('data', (data: Buffer) => {
child.stderr!.setEncoding('utf8');
child.stderr!.on('data', (data: string) => {
stderr += data;
if (env['KEEP_OUTPUT'] === 'true' || env['VERBOSE'] === 'true') {
process.stderr.write(data);
}
});
const timeout = options.timeout ?? 120000;
const promise = new Promise<string>((resolve, reject) => {
const timer = setTimeout(() => {
child.kill('SIGKILL');
reject(
new Error(
`Process timed out after ${timeout}ms.\nStdout:\n${stdout}\nStderr:\n${stderr}`,
),
);
}, timeout);
child.on('error', (err) => {
clearTimeout(timer);
reject(err);
});
child.on('close', (code: number) => {
clearTimeout(timer);
if (code === 0) {
// Store the raw stdout for Podman telemetry parsing
this._lastRunStdout = stdout;
// Filter out telemetry output when running with Podman
// Podman seems to output telemetry to stdout even when writing to file
let result = stdout;
if (env['GEMINI_SANDBOX'] === 'podman') {
// Remove telemetry JSON objects from output
// They are multi-line JSON objects that start with { and contain telemetry fields
const lines = result.split(os.EOL);
const filteredLines = [];
let inTelemetryObject = false;
let braceDepth = 0;
for (const line of lines) {
if (!inTelemetryObject && line.trim() === '{') {
// Check if this might be start of telemetry object
inTelemetryObject = true;
braceDepth = 1;
} else if (inTelemetryObject) {
// Count braces to track nesting
for (const char of line) {
if (char === '{') braceDepth++;
else if (char === '}') braceDepth--;
}
// Check if we've closed all braces
if (braceDepth === 0) {
inTelemetryObject = false;
// Skip this line (the closing brace)
continue;
}
} else {
// Not in telemetry object, keep the line
filteredLines.push(line);
}
}
result = filteredLines.join('\n');
}
const result = this._filterPodmanTelemetry(stdout);
// Check if this is a JSON output test - if so, don't include stderr
// as it would corrupt the JSON
@@ -506,11 +500,12 @@ export class TestRig {
commandArgs.includes('json');
// If we have stderr output and it's not a JSON test, include that also
if (stderr && !isJsonOutput) {
result += `\n\nStdErr:\n${stderr}`;
}
const finalResult =
stderr && !isJsonOutput
? `${result}\n\nStdErr:\n${stderr}`
: result;
resolve(result);
resolve(finalResult);
} else {
reject(new Error(`Process exited with code ${code}:\n${stderr}`));
}
@@ -520,9 +515,52 @@ export class TestRig {
return promise;
}
private _filterPodmanTelemetry(stdout: string): string {
if (env['GEMINI_SANDBOX'] !== 'podman') {
return stdout;
}
// Remove telemetry JSON objects from output
// They are multi-line JSON objects that start with { and contain telemetry fields
const lines = stdout.split(os.EOL);
const filteredLines = [];
let inTelemetryObject = false;
let braceDepth = 0;
for (const line of lines) {
if (!inTelemetryObject && line.trim() === '{') {
// Check if this might be start of telemetry object
inTelemetryObject = true;
braceDepth = 1;
} else if (inTelemetryObject) {
// Count braces to track nesting
for (const char of line) {
if (char === '{') braceDepth++;
else if (char === '}') braceDepth--;
}
// Check if we've closed all braces
if (braceDepth === 0) {
inTelemetryObject = false;
// Skip this line (the closing brace)
continue;
}
} else {
// Not in telemetry object, keep the line
filteredLines.push(line);
}
}
return filteredLines.join('\n');
}
runCommand(
args: string[],
options: { stdin?: string } = {},
options: {
stdin?: string;
timeout?: number;
env?: Record<string, string | undefined>;
} = {},
): Promise<string> {
const { command, initialArgs } = this._getCommandAndArgs();
const commandArgs = [...initialArgs, ...args];
@@ -530,7 +568,13 @@ export class TestRig {
const child = spawn(command, commandArgs, {
cwd: this.testDir!,
stdio: 'pipe',
env: {
...process.env,
GEMINI_CLI_HOME: this.homeDir!,
...options.env,
},
});
this._spawnedProcesses.push(child);
let stdout = '';
let stderr = '';
@@ -540,29 +584,55 @@ export class TestRig {
child.stdin!.end();
}
child.stdout!.on('data', (data: Buffer) => {
child.stdout!.setEncoding('utf8');
child.stdout!.on('data', (data: string) => {
stdout += data;
if (env['KEEP_OUTPUT'] === 'true' || env['VERBOSE'] === 'true') {
process.stdout.write(data);
}
});
child.stderr!.on('data', (data: Buffer) => {
child.stderr!.setEncoding('utf8');
child.stderr!.on('data', (data: string) => {
stderr += data;
if (env['KEEP_OUTPUT'] === 'true' || env['VERBOSE'] === 'true') {
process.stderr.write(data);
}
});
const timeout = options.timeout ?? 120000;
const promise = new Promise<string>((resolve, reject) => {
const timer = setTimeout(() => {
child.kill('SIGKILL');
reject(
new Error(
`Process timed out after ${timeout}ms.\nStdout:\n${stdout}\nStderr:\n${stderr}`,
),
);
}, timeout);
child.on('error', (err) => {
clearTimeout(timer);
reject(err);
});
child.on('close', (code: number) => {
clearTimeout(timer);
if (code === 0) {
this._lastRunStdout = stdout;
let result = stdout;
if (stderr) {
result += `\n\nStdErr:\n${stderr}`;
}
resolve(result);
const result = this._filterPodmanTelemetry(stdout);
// Check if this is a JSON output test - if so, don't include stderr
// as it would corrupt the JSON
const isJsonOutput =
commandArgs.includes('--output-format') &&
commandArgs.includes('json');
const finalResult =
stderr && !isJsonOutput
? `${result}\n\nStdErr:\n${stderr}`
: result;
resolve(finalResult);
} else {
reject(new Error(`Process exited with code ${code}:\n${stderr}`));
}
@@ -596,6 +666,23 @@ export class TestRig {
}
this._interactiveRuns = [];
// Kill any other spawned processes that are still running
for (const child of this._spawnedProcesses) {
if (child.exitCode === null && child.signalCode === null) {
try {
child.kill('SIGKILL');
} catch (error) {
if (env['VERBOSE'] === 'true') {
console.warn(
'Failed to kill spawned process during cleanup:',
error,
);
}
}
}
}
this._spawnedProcesses = [];
if (
process.env['REGENERATE_MODEL_GOLDENS'] === 'true' &&
this.fakeResponsesPath
@@ -732,7 +819,6 @@ export class TestRig {
}
async waitForAnyToolCall(toolNames: string[], timeout?: number) {
// Use environment-specific timeout
if (!timeout) {
timeout = getDefaultTimeout();
}
@@ -980,7 +1066,7 @@ export class TestRig {
const apiRequests = logs.filter(
(logData) =>
logData.attributes &&
logData.attributes['event.name'] === 'gemini_cli.api_request',
logData.attributes['event.name'] === `gemini_cli.api_request`,
);
return apiRequests;
}
@@ -990,7 +1076,7 @@ export class TestRig {
const apiRequests = logs.filter(
(logData) =>
logData.attributes &&
logData.attributes['event.name'] === 'gemini_cli.api_request',
logData.attributes['event.name'] === `gemini_cli.api_request`,
);
return apiRequests.pop() || null;
}
@@ -1042,6 +1128,7 @@ export class TestRig {
async runInteractive(options?: {
args?: string | string[];
yolo?: boolean;
env?: Record<string, string | undefined>;
}): Promise<InteractiveRun> {
const yolo = options?.yolo !== false;
const { command, initialArgs } = this._getCommandAndArgs(
@@ -1049,13 +1136,11 @@ export class TestRig {
);
const commandArgs = [...initialArgs];
if (options?.args) {
if (Array.isArray(options.args)) {
commandArgs.push(...options.args);
} else {
commandArgs.push(options.args);
}
}
const envVars = {
...process.env,
GEMINI_CLI_HOME: this.homeDir!,
...options?.env,
};
const ptyOptions: pty.IPtyForkOptions = {
name: 'xterm-color',
@@ -1063,7 +1148,7 @@ export class TestRig {
rows: 80,
cwd: this.testDir!,
env: Object.fromEntries(
Object.entries(env).filter(([, v]) => v !== undefined),
Object.entries(envVars).filter(([, v]) => v !== undefined),
) as { [key: string]: string },
};
@@ -1130,11 +1215,11 @@ export class TestRig {
while (Date.now() - startTime < timeout) {
await commandFn();
// Give it a moment to process
await new Promise((resolve) => setTimeout(resolve, 500));
await sleep(500);
if (predicateFn()) {
return;
}
await new Promise((resolve) => setTimeout(resolve, interval));
await sleep(interval);
}
throw new Error(`pollCommand timed out after ${timeout}ms`);
}

View File

@@ -12,6 +12,8 @@ vi.mock('@google/gemini-cli-core', () => ({
Storage: vi.fn().mockImplementation(() => ({
getProjectTempDir: vi.fn().mockReturnValue('/tmp/project'),
})),
shutdownTelemetry: vi.fn(),
isTelemetrySdkInitialized: vi.fn().mockReturnValue(false),
}));
vi.mock('node:fs', () => ({
@@ -20,61 +22,61 @@ vi.mock('node:fs', () => ({
},
}));
import {
registerCleanup,
runExitCleanup,
registerSyncCleanup,
runSyncCleanup,
cleanupCheckpoints,
resetCleanupForTesting,
} from './cleanup.js';
describe('cleanup', () => {
beforeEach(async () => {
vi.resetModules();
vi.clearAllMocks();
// No need to re-assign, we can use the imported functions directly
// because we are using vi.resetModules() and re-importing if necessary,
// but actually, since we are mocking dependencies, we might not need to re-import cleanup.js
// unless it has internal state that needs resetting. It does (cleanupFunctions array).
// So we DO need to re-import it to get fresh state.
resetCleanupForTesting();
});
it('should run a registered synchronous function', async () => {
const cleanupModule = await import('./cleanup.js');
const cleanupFn = vi.fn();
cleanupModule.registerCleanup(cleanupFn);
registerCleanup(cleanupFn);
await cleanupModule.runExitCleanup();
await runExitCleanup();
expect(cleanupFn).toHaveBeenCalledTimes(1);
});
it('should run a registered asynchronous function', async () => {
const cleanupModule = await import('./cleanup.js');
const cleanupFn = vi.fn().mockResolvedValue(undefined);
cleanupModule.registerCleanup(cleanupFn);
registerCleanup(cleanupFn);
await cleanupModule.runExitCleanup();
await runExitCleanup();
expect(cleanupFn).toHaveBeenCalledTimes(1);
});
it('should run multiple registered functions', async () => {
const cleanupModule = await import('./cleanup.js');
const syncFn = vi.fn();
const asyncFn = vi.fn().mockResolvedValue(undefined);
cleanupModule.registerCleanup(syncFn);
cleanupModule.registerCleanup(asyncFn);
registerCleanup(syncFn);
registerCleanup(asyncFn);
await cleanupModule.runExitCleanup();
await runExitCleanup();
expect(syncFn).toHaveBeenCalledTimes(1);
expect(asyncFn).toHaveBeenCalledTimes(1);
});
it('should continue running cleanup functions even if one throws an error', async () => {
const cleanupModule = await import('./cleanup.js');
const errorFn = vi.fn().mockImplementation(() => {
throw new Error('test error');
});
const successFn = vi.fn();
cleanupModule.registerCleanup(errorFn);
cleanupModule.registerCleanup(successFn);
registerCleanup(errorFn);
registerCleanup(successFn);
await expect(cleanupModule.runExitCleanup()).resolves.not.toThrow();
await expect(runExitCleanup()).resolves.not.toThrow();
expect(errorFn).toHaveBeenCalledTimes(1);
expect(successFn).toHaveBeenCalledTimes(1);
@@ -82,23 +84,21 @@ describe('cleanup', () => {
describe('sync cleanup', () => {
it('should run registered sync functions', async () => {
const cleanupModule = await import('./cleanup.js');
const syncFn = vi.fn();
cleanupModule.registerSyncCleanup(syncFn);
cleanupModule.runSyncCleanup();
registerSyncCleanup(syncFn);
runSyncCleanup();
expect(syncFn).toHaveBeenCalledTimes(1);
});
it('should continue running sync cleanup functions even if one throws', async () => {
const cleanupModule = await import('./cleanup.js');
const errorFn = vi.fn().mockImplementation(() => {
throw new Error('test error');
});
const successFn = vi.fn();
cleanupModule.registerSyncCleanup(errorFn);
cleanupModule.registerSyncCleanup(successFn);
registerSyncCleanup(errorFn);
registerSyncCleanup(successFn);
expect(() => cleanupModule.runSyncCleanup()).not.toThrow();
expect(() => runSyncCleanup()).not.toThrow();
expect(errorFn).toHaveBeenCalledTimes(1);
expect(successFn).toHaveBeenCalledTimes(1);
});
@@ -106,8 +106,7 @@ describe('cleanup', () => {
describe('cleanupCheckpoints', () => {
it('should remove checkpoints directory', async () => {
const cleanupModule = await import('./cleanup.js');
await cleanupModule.cleanupCheckpoints();
await cleanupCheckpoints();
expect(fs.rm).toHaveBeenCalledWith(
path.join('/tmp/project', 'checkpoints'),
{
@@ -118,9 +117,8 @@ describe('cleanup', () => {
});
it('should ignore errors during checkpoint removal', async () => {
const cleanupModule = await import('./cleanup.js');
vi.mocked(fs.rm).mockRejectedValue(new Error('Failed to remove'));
await expect(cleanupModule.cleanupCheckpoints()).resolves.not.toThrow();
await expect(cleanupCheckpoints()).resolves.not.toThrow();
});
});
});

View File

@@ -25,6 +25,16 @@ export function registerSyncCleanup(fn: () => void) {
syncCleanupFunctions.push(fn);
}
/**
* Resets the internal cleanup state for testing purposes.
* This allows tests to run in isolation without vi.resetModules().
*/
export function resetCleanupForTesting() {
cleanupFunctions.length = 0;
syncCleanupFunctions.length = 0;
configForTelemetry = null;
}
export function runSyncCleanup() {
for (const fn of syncCleanupFunctions) {
try {