From 416d243027d975c284a8ca939e2ebcbefdf5c07f Mon Sep 17 00:00:00 2001 From: "N. Taylor Mullen" Date: Tue, 6 Jan 2026 23:58:13 -0800 Subject: [PATCH] Enhance TestRig with process management and timeouts (#15908) --- integration-tests/json-output.test.ts | 11 +- integration-tests/test-helper.ts | 235 +++++++++++++++++-------- packages/cli/src/utils/cleanup.test.ts | 60 +++---- packages/cli/src/utils/cleanup.ts | 10 ++ 4 files changed, 205 insertions(+), 111 deletions(-) diff --git a/integration-tests/json-output.test.ts b/integration-tests/json-output.test.ts index 7d364d8c6c..4d3bdb6a18 100644 --- a/integration-tests/json-output.test.ts +++ b/integration-tests/json-output.test.ts @@ -14,7 +14,6 @@ describe('JSON output', () => { beforeEach(async () => { rig = new TestRig(); - await rig.setup('json-output-test'); }); afterEach(async () => { @@ -22,6 +21,7 @@ describe('JSON output', () => { }); it('should return a valid JSON with response and stats', async () => { + await rig.setup('json-output-response-stats'); const result = await rig.run({ args: ['What is the capital of France?', '--output-format', 'json'], }); @@ -36,6 +36,7 @@ describe('JSON output', () => { }); it('should return a valid JSON with a session ID', async () => { + await rig.setup('json-output-session-id'); const result = await rig.run({ args: ['Hello', '--output-format', 'json'], }); @@ -47,7 +48,6 @@ describe('JSON output', () => { }); it('should return a JSON error for sd auth mismatch before running', async () => { - process.env['GOOGLE_GENAI_USE_GCA'] = 'true'; await rig.setup('json-output-auth-mismatch', { settings: { security: { @@ -58,12 +58,13 @@ describe('JSON output', () => { let thrown: Error | undefined; try { - await rig.run({ args: ['Hello', '--output-format', 'json'] }); + await rig.run({ + args: ['Hello', '--output-format', 'json'], + env: { GOOGLE_GENAI_USE_GCA: 'true' }, + }); expect.fail('Expected process to exit with error'); } catch (e) { thrown = e as Error; - } finally { - delete process.env['GOOGLE_GENAI_USE_GCA']; } expect(thrown).toBeDefined(); diff --git a/integration-tests/test-helper.ts b/integration-tests/test-helper.ts index 818951afd9..53b409b733 100644 --- a/integration-tests/test-helper.ts +++ b/integration-tests/test-helper.ts @@ -5,11 +5,12 @@ */ import { expect } from 'vitest'; -import { execSync, spawn } from 'node:child_process'; +import { execSync, spawn, type ChildProcess } from 'node:child_process'; import { mkdirSync, writeFileSync, readFileSync } from 'node:fs'; import { join, dirname } from 'node:path'; import { fileURLToPath } from 'node:url'; import { env } from 'node:process'; +import { setTimeout as sleep } from 'node:timers/promises'; import { DEFAULT_GEMINI_MODEL } from '../packages/core/src/config/models.js'; import fs from 'node:fs'; import * as pty from '@lydell/node-pty'; @@ -45,7 +46,7 @@ export async function poll( if (result) { return true; } - await new Promise((resolve) => setTimeout(resolve, interval)); + await sleep(interval); } if (env['VERBOSE'] === 'true') { console.log(`Poll timed out after ${attempts} attempts`); @@ -212,7 +213,7 @@ export class InteractiveRun { if (char === '\r') { // wait >30ms before `enter` to avoid fast return conversion // from bufferFastReturn() in KeypressContent.tsx - await new Promise((resolve) => setTimeout(resolve, 50)); + await sleep(50); } this.ptyProcess.write(char); @@ -239,7 +240,7 @@ export class InteractiveRun { // but may run into paste detection issues for larger strings. async sendText(text: string) { this.ptyProcess.write(text); - await new Promise((resolve) => setTimeout(resolve, 5)); + await sleep(5); } // Simulates typing a string one character at a time to avoid paste detection. @@ -247,7 +248,7 @@ export class InteractiveRun { const delay = 5; for (const char of text) { this.ptyProcess.write(char); - await new Promise((resolve) => setTimeout(resolve, delay)); + await sleep(delay); } } @@ -282,6 +283,7 @@ export class TestRig { // Original fake responses file path for rewriting goldens in record mode. originalFakeResponsesPath?: string; private _interactiveRuns: InteractiveRun[] = []; + private _spawnedProcesses: ChildProcess[] = []; setup( testName: string, @@ -307,15 +309,16 @@ export class TestRig { } // Create a settings file to point the CLI to the local collector - const projectGeminiDir = join(this.testDir, GEMINI_DIR); + this._createSettingsFile(options.settings); + } + + private _createSettingsFile(overrideSettings?: Record) { + const projectGeminiDir = join(this.testDir!, GEMINI_DIR); mkdirSync(projectGeminiDir, { recursive: true }); // In sandbox mode, use an absolute path for telemetry inside the container // The container mounts the test directory at the same path as the host - const telemetryPath = join(this.homeDir, 'telemetry.log'); // Always use home directory for telemetry - - // Ensure the CLI uses our separate home directory for global state - process.env['GEMINI_CLI_HOME'] = this.homeDir; + const telemetryPath = join(this.homeDir!, 'telemetry.log'); // Always use home directory for telemetry const settings = { general: { @@ -343,7 +346,7 @@ export class TestRig { env['GEMINI_SANDBOX'] !== 'false' ? env['GEMINI_SANDBOX'] : false, // Don't show the IDE connection dialog when running from VsCode ide: { enabled: false, hasSeenNudge: true }, - ...options.settings, // Allow tests to override/add settings + ...overrideSettings, // Allow tests to override/add settings }; writeFileSync( join(projectGeminiDir, 'settings.json'), @@ -362,6 +365,7 @@ export class TestRig { } sync() { + if (os.platform() === 'win32') return; // ensure file system is done before spawning execSync('sync', { cwd: this.testDir! }); } @@ -396,6 +400,8 @@ export class TestRig { stdin?: string; stdinDoesNotEnd?: boolean; yolo?: boolean; + timeout?: number; + env?: Record; }): Promise { const yolo = options.yolo !== false; const { command, initialArgs } = this._getCommandAndArgs( @@ -426,8 +432,13 @@ export class TestRig { const child = spawn(command, commandArgs, { cwd: this.testDir!, stdio: 'pipe', - env: env, + env: { + ...process.env, + GEMINI_CLI_HOME: this.homeDir!, + ...options.env, + }, }); + this._spawnedProcesses.push(child); let stdout = ''; let stderr = ''; @@ -441,63 +452,46 @@ export class TestRig { child.stdin!.end(); } - child.stdout!.on('data', (data: Buffer) => { + child.stdout!.setEncoding('utf8'); + child.stdout!.on('data', (data: string) => { stdout += data; if (env['KEEP_OUTPUT'] === 'true' || env['VERBOSE'] === 'true') { process.stdout.write(data); } }); - child.stderr!.on('data', (data: Buffer) => { + child.stderr!.setEncoding('utf8'); + child.stderr!.on('data', (data: string) => { stderr += data; if (env['KEEP_OUTPUT'] === 'true' || env['VERBOSE'] === 'true') { process.stderr.write(data); } }); + const timeout = options.timeout ?? 120000; const promise = new Promise((resolve, reject) => { + const timer = setTimeout(() => { + child.kill('SIGKILL'); + reject( + new Error( + `Process timed out after ${timeout}ms.\nStdout:\n${stdout}\nStderr:\n${stderr}`, + ), + ); + }, timeout); + + child.on('error', (err) => { + clearTimeout(timer); + reject(err); + }); + child.on('close', (code: number) => { + clearTimeout(timer); if (code === 0) { // Store the raw stdout for Podman telemetry parsing this._lastRunStdout = stdout; // Filter out telemetry output when running with Podman - // Podman seems to output telemetry to stdout even when writing to file - let result = stdout; - if (env['GEMINI_SANDBOX'] === 'podman') { - // Remove telemetry JSON objects from output - // They are multi-line JSON objects that start with { and contain telemetry fields - const lines = result.split(os.EOL); - const filteredLines = []; - let inTelemetryObject = false; - let braceDepth = 0; - - for (const line of lines) { - if (!inTelemetryObject && line.trim() === '{') { - // Check if this might be start of telemetry object - inTelemetryObject = true; - braceDepth = 1; - } else if (inTelemetryObject) { - // Count braces to track nesting - for (const char of line) { - if (char === '{') braceDepth++; - else if (char === '}') braceDepth--; - } - - // Check if we've closed all braces - if (braceDepth === 0) { - inTelemetryObject = false; - // Skip this line (the closing brace) - continue; - } - } else { - // Not in telemetry object, keep the line - filteredLines.push(line); - } - } - - result = filteredLines.join('\n'); - } + const result = this._filterPodmanTelemetry(stdout); // Check if this is a JSON output test - if so, don't include stderr // as it would corrupt the JSON @@ -506,11 +500,12 @@ export class TestRig { commandArgs.includes('json'); // If we have stderr output and it's not a JSON test, include that also - if (stderr && !isJsonOutput) { - result += `\n\nStdErr:\n${stderr}`; - } + const finalResult = + stderr && !isJsonOutput + ? `${result}\n\nStdErr:\n${stderr}` + : result; - resolve(result); + resolve(finalResult); } else { reject(new Error(`Process exited with code ${code}:\n${stderr}`)); } @@ -520,9 +515,52 @@ export class TestRig { return promise; } + private _filterPodmanTelemetry(stdout: string): string { + if (env['GEMINI_SANDBOX'] !== 'podman') { + return stdout; + } + + // Remove telemetry JSON objects from output + // They are multi-line JSON objects that start with { and contain telemetry fields + const lines = stdout.split(os.EOL); + const filteredLines = []; + let inTelemetryObject = false; + let braceDepth = 0; + + for (const line of lines) { + if (!inTelemetryObject && line.trim() === '{') { + // Check if this might be start of telemetry object + inTelemetryObject = true; + braceDepth = 1; + } else if (inTelemetryObject) { + // Count braces to track nesting + for (const char of line) { + if (char === '{') braceDepth++; + else if (char === '}') braceDepth--; + } + + // Check if we've closed all braces + if (braceDepth === 0) { + inTelemetryObject = false; + // Skip this line (the closing brace) + continue; + } + } else { + // Not in telemetry object, keep the line + filteredLines.push(line); + } + } + + return filteredLines.join('\n'); + } + runCommand( args: string[], - options: { stdin?: string } = {}, + options: { + stdin?: string; + timeout?: number; + env?: Record; + } = {}, ): Promise { const { command, initialArgs } = this._getCommandAndArgs(); const commandArgs = [...initialArgs, ...args]; @@ -530,7 +568,13 @@ export class TestRig { const child = spawn(command, commandArgs, { cwd: this.testDir!, stdio: 'pipe', + env: { + ...process.env, + GEMINI_CLI_HOME: this.homeDir!, + ...options.env, + }, }); + this._spawnedProcesses.push(child); let stdout = ''; let stderr = ''; @@ -540,29 +584,55 @@ export class TestRig { child.stdin!.end(); } - child.stdout!.on('data', (data: Buffer) => { + child.stdout!.setEncoding('utf8'); + child.stdout!.on('data', (data: string) => { stdout += data; if (env['KEEP_OUTPUT'] === 'true' || env['VERBOSE'] === 'true') { process.stdout.write(data); } }); - child.stderr!.on('data', (data: Buffer) => { + child.stderr!.setEncoding('utf8'); + child.stderr!.on('data', (data: string) => { stderr += data; if (env['KEEP_OUTPUT'] === 'true' || env['VERBOSE'] === 'true') { process.stderr.write(data); } }); + const timeout = options.timeout ?? 120000; const promise = new Promise((resolve, reject) => { + const timer = setTimeout(() => { + child.kill('SIGKILL'); + reject( + new Error( + `Process timed out after ${timeout}ms.\nStdout:\n${stdout}\nStderr:\n${stderr}`, + ), + ); + }, timeout); + + child.on('error', (err) => { + clearTimeout(timer); + reject(err); + }); + child.on('close', (code: number) => { + clearTimeout(timer); if (code === 0) { this._lastRunStdout = stdout; - let result = stdout; - if (stderr) { - result += `\n\nStdErr:\n${stderr}`; - } - resolve(result); + const result = this._filterPodmanTelemetry(stdout); + + // Check if this is a JSON output test - if so, don't include stderr + // as it would corrupt the JSON + const isJsonOutput = + commandArgs.includes('--output-format') && + commandArgs.includes('json'); + + const finalResult = + stderr && !isJsonOutput + ? `${result}\n\nStdErr:\n${stderr}` + : result; + resolve(finalResult); } else { reject(new Error(`Process exited with code ${code}:\n${stderr}`)); } @@ -596,6 +666,23 @@ export class TestRig { } this._interactiveRuns = []; + // Kill any other spawned processes that are still running + for (const child of this._spawnedProcesses) { + if (child.exitCode === null && child.signalCode === null) { + try { + child.kill('SIGKILL'); + } catch (error) { + if (env['VERBOSE'] === 'true') { + console.warn( + 'Failed to kill spawned process during cleanup:', + error, + ); + } + } + } + } + this._spawnedProcesses = []; + if ( process.env['REGENERATE_MODEL_GOLDENS'] === 'true' && this.fakeResponsesPath @@ -732,7 +819,6 @@ export class TestRig { } async waitForAnyToolCall(toolNames: string[], timeout?: number) { - // Use environment-specific timeout if (!timeout) { timeout = getDefaultTimeout(); } @@ -980,7 +1066,7 @@ export class TestRig { const apiRequests = logs.filter( (logData) => logData.attributes && - logData.attributes['event.name'] === 'gemini_cli.api_request', + logData.attributes['event.name'] === `gemini_cli.api_request`, ); return apiRequests; } @@ -990,7 +1076,7 @@ export class TestRig { const apiRequests = logs.filter( (logData) => logData.attributes && - logData.attributes['event.name'] === 'gemini_cli.api_request', + logData.attributes['event.name'] === `gemini_cli.api_request`, ); return apiRequests.pop() || null; } @@ -1042,6 +1128,7 @@ export class TestRig { async runInteractive(options?: { args?: string | string[]; yolo?: boolean; + env?: Record; }): Promise { const yolo = options?.yolo !== false; const { command, initialArgs } = this._getCommandAndArgs( @@ -1049,13 +1136,11 @@ export class TestRig { ); const commandArgs = [...initialArgs]; - if (options?.args) { - if (Array.isArray(options.args)) { - commandArgs.push(...options.args); - } else { - commandArgs.push(options.args); - } - } + const envVars = { + ...process.env, + GEMINI_CLI_HOME: this.homeDir!, + ...options?.env, + }; const ptyOptions: pty.IPtyForkOptions = { name: 'xterm-color', @@ -1063,7 +1148,7 @@ export class TestRig { rows: 80, cwd: this.testDir!, env: Object.fromEntries( - Object.entries(env).filter(([, v]) => v !== undefined), + Object.entries(envVars).filter(([, v]) => v !== undefined), ) as { [key: string]: string }, }; @@ -1130,11 +1215,11 @@ export class TestRig { while (Date.now() - startTime < timeout) { await commandFn(); // Give it a moment to process - await new Promise((resolve) => setTimeout(resolve, 500)); + await sleep(500); if (predicateFn()) { return; } - await new Promise((resolve) => setTimeout(resolve, interval)); + await sleep(interval); } throw new Error(`pollCommand timed out after ${timeout}ms`); } diff --git a/packages/cli/src/utils/cleanup.test.ts b/packages/cli/src/utils/cleanup.test.ts index 64935d838d..3bc38e9110 100644 --- a/packages/cli/src/utils/cleanup.test.ts +++ b/packages/cli/src/utils/cleanup.test.ts @@ -12,6 +12,8 @@ vi.mock('@google/gemini-cli-core', () => ({ Storage: vi.fn().mockImplementation(() => ({ getProjectTempDir: vi.fn().mockReturnValue('/tmp/project'), })), + shutdownTelemetry: vi.fn(), + isTelemetrySdkInitialized: vi.fn().mockReturnValue(false), })); vi.mock('node:fs', () => ({ @@ -20,61 +22,61 @@ vi.mock('node:fs', () => ({ }, })); +import { + registerCleanup, + runExitCleanup, + registerSyncCleanup, + runSyncCleanup, + cleanupCheckpoints, + resetCleanupForTesting, +} from './cleanup.js'; + describe('cleanup', () => { beforeEach(async () => { - vi.resetModules(); vi.clearAllMocks(); - // No need to re-assign, we can use the imported functions directly - // because we are using vi.resetModules() and re-importing if necessary, - // but actually, since we are mocking dependencies, we might not need to re-import cleanup.js - // unless it has internal state that needs resetting. It does (cleanupFunctions array). - // So we DO need to re-import it to get fresh state. + resetCleanupForTesting(); }); it('should run a registered synchronous function', async () => { - const cleanupModule = await import('./cleanup.js'); const cleanupFn = vi.fn(); - cleanupModule.registerCleanup(cleanupFn); + registerCleanup(cleanupFn); - await cleanupModule.runExitCleanup(); + await runExitCleanup(); expect(cleanupFn).toHaveBeenCalledTimes(1); }); it('should run a registered asynchronous function', async () => { - const cleanupModule = await import('./cleanup.js'); const cleanupFn = vi.fn().mockResolvedValue(undefined); - cleanupModule.registerCleanup(cleanupFn); + registerCleanup(cleanupFn); - await cleanupModule.runExitCleanup(); + await runExitCleanup(); expect(cleanupFn).toHaveBeenCalledTimes(1); }); it('should run multiple registered functions', async () => { - const cleanupModule = await import('./cleanup.js'); const syncFn = vi.fn(); const asyncFn = vi.fn().mockResolvedValue(undefined); - cleanupModule.registerCleanup(syncFn); - cleanupModule.registerCleanup(asyncFn); + registerCleanup(syncFn); + registerCleanup(asyncFn); - await cleanupModule.runExitCleanup(); + await runExitCleanup(); expect(syncFn).toHaveBeenCalledTimes(1); expect(asyncFn).toHaveBeenCalledTimes(1); }); it('should continue running cleanup functions even if one throws an error', async () => { - const cleanupModule = await import('./cleanup.js'); const errorFn = vi.fn().mockImplementation(() => { throw new Error('test error'); }); const successFn = vi.fn(); - cleanupModule.registerCleanup(errorFn); - cleanupModule.registerCleanup(successFn); + registerCleanup(errorFn); + registerCleanup(successFn); - await expect(cleanupModule.runExitCleanup()).resolves.not.toThrow(); + await expect(runExitCleanup()).resolves.not.toThrow(); expect(errorFn).toHaveBeenCalledTimes(1); expect(successFn).toHaveBeenCalledTimes(1); @@ -82,23 +84,21 @@ describe('cleanup', () => { describe('sync cleanup', () => { it('should run registered sync functions', async () => { - const cleanupModule = await import('./cleanup.js'); const syncFn = vi.fn(); - cleanupModule.registerSyncCleanup(syncFn); - cleanupModule.runSyncCleanup(); + registerSyncCleanup(syncFn); + runSyncCleanup(); expect(syncFn).toHaveBeenCalledTimes(1); }); it('should continue running sync cleanup functions even if one throws', async () => { - const cleanupModule = await import('./cleanup.js'); const errorFn = vi.fn().mockImplementation(() => { throw new Error('test error'); }); const successFn = vi.fn(); - cleanupModule.registerSyncCleanup(errorFn); - cleanupModule.registerSyncCleanup(successFn); + registerSyncCleanup(errorFn); + registerSyncCleanup(successFn); - expect(() => cleanupModule.runSyncCleanup()).not.toThrow(); + expect(() => runSyncCleanup()).not.toThrow(); expect(errorFn).toHaveBeenCalledTimes(1); expect(successFn).toHaveBeenCalledTimes(1); }); @@ -106,8 +106,7 @@ describe('cleanup', () => { describe('cleanupCheckpoints', () => { it('should remove checkpoints directory', async () => { - const cleanupModule = await import('./cleanup.js'); - await cleanupModule.cleanupCheckpoints(); + await cleanupCheckpoints(); expect(fs.rm).toHaveBeenCalledWith( path.join('/tmp/project', 'checkpoints'), { @@ -118,9 +117,8 @@ describe('cleanup', () => { }); it('should ignore errors during checkpoint removal', async () => { - const cleanupModule = await import('./cleanup.js'); vi.mocked(fs.rm).mockRejectedValue(new Error('Failed to remove')); - await expect(cleanupModule.cleanupCheckpoints()).resolves.not.toThrow(); + await expect(cleanupCheckpoints()).resolves.not.toThrow(); }); }); }); diff --git a/packages/cli/src/utils/cleanup.ts b/packages/cli/src/utils/cleanup.ts index 5d063927a2..ccb467572b 100644 --- a/packages/cli/src/utils/cleanup.ts +++ b/packages/cli/src/utils/cleanup.ts @@ -25,6 +25,16 @@ export function registerSyncCleanup(fn: () => void) { syncCleanupFunctions.push(fn); } +/** + * Resets the internal cleanup state for testing purposes. + * This allows tests to run in isolation without vi.resetModules(). + */ +export function resetCleanupForTesting() { + cleanupFunctions.length = 0; + syncCleanupFunctions.length = 0; + configForTelemetry = null; +} + export function runSyncCleanup() { for (const fn of syncCleanupFunctions) { try {