From 416d243027d975c284a8ca939e2ebcbefdf5c07f Mon Sep 17 00:00:00 2001
From: "N. Taylor Mullen" <ntaylormullen@google.com>
Date: Tue, 6 Jan 2026 23:58:13 -0800
Subject: [PATCH] Enhance TestRig with process management and timeouts (#15908)

---
 integration-tests/json-output.test.ts  |  11 +-
 integration-tests/test-helper.ts       | 235 +++++++++++++++++--------
 packages/cli/src/utils/cleanup.test.ts |  60 +++----
 packages/cli/src/utils/cleanup.ts      |  10 ++
 4 files changed, 205 insertions(+), 111 deletions(-)

diff --git a/integration-tests/json-output.test.ts b/integration-tests/json-output.test.ts
index 7d364d8c6c..4d3bdb6a18 100644
--- a/integration-tests/json-output.test.ts
+++ b/integration-tests/json-output.test.ts
@@ -14,7 +14,6 @@ describe('JSON output', () => {
 
   beforeEach(async () => {
     rig = new TestRig();
-    await rig.setup('json-output-test');
   });
 
   afterEach(async () => {
@@ -22,6 +21,7 @@ describe('JSON output', () => {
   });
 
   it('should return a valid JSON with response and stats', async () => {
+    await rig.setup('json-output-response-stats');
     const result = await rig.run({
       args: ['What is the capital of France?', '--output-format', 'json'],
     });
@@ -36,6 +36,7 @@ describe('JSON output', () => {
   });
 
   it('should return a valid JSON with a session ID', async () => {
+    await rig.setup('json-output-session-id');
     const result = await rig.run({
       args: ['Hello', '--output-format', 'json'],
     });
@@ -47,7 +48,6 @@ describe('JSON output', () => {
   });
 
   it('should return a JSON error for sd auth mismatch before running', async () => {
-    process.env['GOOGLE_GENAI_USE_GCA'] = 'true';
     await rig.setup('json-output-auth-mismatch', {
       settings: {
         security: {
@@ -58,12 +58,13 @@ describe('JSON output', () => {
 
     let thrown: Error | undefined;
     try {
-      await rig.run({ args: ['Hello', '--output-format', 'json'] });
+      await rig.run({
+        args: ['Hello', '--output-format', 'json'],
+        env: { GOOGLE_GENAI_USE_GCA: 'true' },
+      });
       expect.fail('Expected process to exit with error');
     } catch (e) {
       thrown = e as Error;
-    } finally {
-      delete process.env['GOOGLE_GENAI_USE_GCA'];
     }
 
     expect(thrown).toBeDefined();
diff --git a/integration-tests/test-helper.ts b/integration-tests/test-helper.ts
index 818951afd9..53b409b733 100644
--- a/integration-tests/test-helper.ts
+++ b/integration-tests/test-helper.ts
@@ -5,11 +5,12 @@
  */
 
 import { expect } from 'vitest';
-import { execSync, spawn } from 'node:child_process';
+import { execSync, spawn, type ChildProcess } from 'node:child_process';
 import { mkdirSync, writeFileSync, readFileSync } from 'node:fs';
 import { join, dirname } from 'node:path';
 import { fileURLToPath } from 'node:url';
 import { env } from 'node:process';
+import { setTimeout as sleep } from 'node:timers/promises';
 import { DEFAULT_GEMINI_MODEL } from '../packages/core/src/config/models.js';
 import fs from 'node:fs';
 import * as pty from '@lydell/node-pty';
@@ -45,7 +46,7 @@ export async function poll(
     if (result) {
       return true;
     }
-    await new Promise((resolve) => setTimeout(resolve, interval));
+    await sleep(interval);
   }
   if (env['VERBOSE'] === 'true') {
     console.log(`Poll timed out after ${attempts} attempts`);
@@ -212,7 +213,7 @@ export class InteractiveRun {
       if (char === '\r') {
         // wait >30ms before `enter` to avoid fast return conversion
         // from bufferFastReturn() in KeypressContent.tsx
-        await new Promise((resolve) => setTimeout(resolve, 50));
+        await sleep(50);
       }
 
       this.ptyProcess.write(char);
@@ -239,7 +240,7 @@ export class InteractiveRun {
   // but may run into paste detection issues for larger strings.
   async sendText(text: string) {
     this.ptyProcess.write(text);
-    await new Promise((resolve) => setTimeout(resolve, 5));
+    await sleep(5);
   }
 
   // Simulates typing a string one character at a time to avoid paste detection.
@@ -247,7 +248,7 @@ export class InteractiveRun {
     const delay = 5;
     for (const char of text) {
       this.ptyProcess.write(char);
-      await new Promise((resolve) => setTimeout(resolve, delay));
+      await sleep(delay);
     }
   }
 
@@ -282,6 +283,7 @@ export class TestRig {
   // Original fake responses file path for rewriting goldens in record mode.
   originalFakeResponsesPath?: string;
   private _interactiveRuns: InteractiveRun[] = [];
+  private _spawnedProcesses: ChildProcess[] = [];
 
   setup(
     testName: string,
@@ -307,15 +309,16 @@ export class TestRig {
     }
 
     // Create a settings file to point the CLI to the local collector
-    const projectGeminiDir = join(this.testDir, GEMINI_DIR);
+    this._createSettingsFile(options.settings);
+  }
+
+  private _createSettingsFile(overrideSettings?: Record<string, unknown>) {
+    const projectGeminiDir = join(this.testDir!, GEMINI_DIR);
     mkdirSync(projectGeminiDir, { recursive: true });
 
     // In sandbox mode, use an absolute path for telemetry inside the container
     // The container mounts the test directory at the same path as the host
-    const telemetryPath = join(this.homeDir, 'telemetry.log'); // Always use home directory for telemetry
-
-    // Ensure the CLI uses our separate home directory for global state
-    process.env['GEMINI_CLI_HOME'] = this.homeDir;
+    const telemetryPath = join(this.homeDir!, 'telemetry.log'); // Always use home directory for telemetry
 
     const settings = {
       general: {
@@ -343,7 +346,7 @@ export class TestRig {
         env['GEMINI_SANDBOX'] !== 'false' ? env['GEMINI_SANDBOX'] : false,
       // Don't show the IDE connection dialog when running from VsCode
       ide: { enabled: false, hasSeenNudge: true },
-      ...options.settings, // Allow tests to override/add settings
+      ...overrideSettings, // Allow tests to override/add settings
     };
     writeFileSync(
       join(projectGeminiDir, 'settings.json'),
@@ -362,6 +365,7 @@ export class TestRig {
   }
 
   sync() {
+    if (os.platform() === 'win32') return;
     // ensure file system is done before spawning
     execSync('sync', { cwd: this.testDir! });
   }
@@ -396,6 +400,8 @@ export class TestRig {
     stdin?: string;
     stdinDoesNotEnd?: boolean;
     yolo?: boolean;
+    timeout?: number;
+    env?: Record<string, string | undefined>;
   }): Promise<string> {
     const yolo = options.yolo !== false;
     const { command, initialArgs } = this._getCommandAndArgs(
@@ -426,8 +432,13 @@ export class TestRig {
     const child = spawn(command, commandArgs, {
       cwd: this.testDir!,
       stdio: 'pipe',
-      env: env,
+      env: {
+        ...process.env,
+        GEMINI_CLI_HOME: this.homeDir!,
+        ...options.env,
+      },
     });
+    this._spawnedProcesses.push(child);
 
     let stdout = '';
     let stderr = '';
@@ -441,63 +452,46 @@ export class TestRig {
       child.stdin!.end();
     }
 
-    child.stdout!.on('data', (data: Buffer) => {
+    child.stdout!.setEncoding('utf8');
+    child.stdout!.on('data', (data: string) => {
       stdout += data;
       if (env['KEEP_OUTPUT'] === 'true' || env['VERBOSE'] === 'true') {
         process.stdout.write(data);
       }
     });
 
-    child.stderr!.on('data', (data: Buffer) => {
+    child.stderr!.setEncoding('utf8');
+    child.stderr!.on('data', (data: string) => {
       stderr += data;
       if (env['KEEP_OUTPUT'] === 'true' || env['VERBOSE'] === 'true') {
         process.stderr.write(data);
       }
     });
 
+    const timeout = options.timeout ?? 120000;
     const promise = new Promise<string>((resolve, reject) => {
+      const timer = setTimeout(() => {
+        child.kill('SIGKILL');
+        reject(
+          new Error(
+            `Process timed out after ${timeout}ms.\nStdout:\n${stdout}\nStderr:\n${stderr}`,
+          ),
+        );
+      }, timeout);
+
+      child.on('error', (err) => {
+        clearTimeout(timer);
+        reject(err);
+      });
+
       child.on('close', (code: number) => {
+        clearTimeout(timer);
         if (code === 0) {
           // Store the raw stdout for Podman telemetry parsing
           this._lastRunStdout = stdout;
 
           // Filter out telemetry output when running with Podman
-          // Podman seems to output telemetry to stdout even when writing to file
-          let result = stdout;
-          if (env['GEMINI_SANDBOX'] === 'podman') {
-            // Remove telemetry JSON objects from output
-            // They are multi-line JSON objects that start with { and contain telemetry fields
-            const lines = result.split(os.EOL);
-            const filteredLines = [];
-            let inTelemetryObject = false;
-            let braceDepth = 0;
-
-            for (const line of lines) {
-              if (!inTelemetryObject && line.trim() === '{') {
-                // Check if this might be start of telemetry object
-                inTelemetryObject = true;
-                braceDepth = 1;
-              } else if (inTelemetryObject) {
-                // Count braces to track nesting
-                for (const char of line) {
-                  if (char === '{') braceDepth++;
-                  else if (char === '}') braceDepth--;
-                }
-
-                // Check if we've closed all braces
-                if (braceDepth === 0) {
-                  inTelemetryObject = false;
-                  // Skip this line (the closing brace)
-                  continue;
-                }
-              } else {
-                // Not in telemetry object, keep the line
-                filteredLines.push(line);
-              }
-            }
-
-            result = filteredLines.join('\n');
-          }
+          const result = this._filterPodmanTelemetry(stdout);
 
           // Check if this is a JSON output test - if so, don't include stderr
           // as it would corrupt the JSON
@@ -506,11 +500,12 @@ export class TestRig {
             commandArgs.includes('json');
 
           // If we have stderr output and it's not a JSON test, include that also
-          if (stderr && !isJsonOutput) {
-            result += `\n\nStdErr:\n${stderr}`;
-          }
+          const finalResult =
+            stderr && !isJsonOutput
+              ? `${result}\n\nStdErr:\n${stderr}`
+              : result;
 
-          resolve(result);
+          resolve(finalResult);
         } else {
           reject(new Error(`Process exited with code ${code}:\n${stderr}`));
         }
@@ -520,9 +515,52 @@ export class TestRig {
     return promise;
   }
 
+  private _filterPodmanTelemetry(stdout: string): string {
+    if (env['GEMINI_SANDBOX'] !== 'podman') {
+      return stdout;
+    }
+
+    // Remove telemetry JSON objects from output
+    // They are multi-line JSON objects that start with { and contain telemetry fields
+    const lines = stdout.split(os.EOL);
+    const filteredLines = [];
+    let inTelemetryObject = false;
+    let braceDepth = 0;
+
+    for (const line of lines) {
+      if (!inTelemetryObject && line.trim() === '{') {
+        // Check if this might be start of telemetry object
+        inTelemetryObject = true;
+        braceDepth = 1;
+      } else if (inTelemetryObject) {
+        // Count braces to track nesting
+        for (const char of line) {
+          if (char === '{') braceDepth++;
+          else if (char === '}') braceDepth--;
+        }
+
+        // Check if we've closed all braces
+        if (braceDepth === 0) {
+          inTelemetryObject = false;
+          // Skip this line (the closing brace)
+          continue;
+        }
+      } else {
+        // Not in telemetry object, keep the line
+        filteredLines.push(line);
+      }
+    }
+
+    return filteredLines.join('\n');
+  }
+
   runCommand(
     args: string[],
-    options: { stdin?: string } = {},
+    options: {
+      stdin?: string;
+      timeout?: number;
+      env?: Record<string, string | undefined>;
+    } = {},
   ): Promise<string> {
     const { command, initialArgs } = this._getCommandAndArgs();
     const commandArgs = [...initialArgs, ...args];
@@ -530,7 +568,13 @@ export class TestRig {
     const child = spawn(command, commandArgs, {
       cwd: this.testDir!,
       stdio: 'pipe',
+      env: {
+        ...process.env,
+        GEMINI_CLI_HOME: this.homeDir!,
+        ...options.env,
+      },
     });
+    this._spawnedProcesses.push(child);
 
     let stdout = '';
     let stderr = '';
@@ -540,29 +584,55 @@ export class TestRig {
       child.stdin!.end();
     }
 
-    child.stdout!.on('data', (data: Buffer) => {
+    child.stdout!.setEncoding('utf8');
+    child.stdout!.on('data', (data: string) => {
       stdout += data;
       if (env['KEEP_OUTPUT'] === 'true' || env['VERBOSE'] === 'true') {
         process.stdout.write(data);
       }
     });
 
-    child.stderr!.on('data', (data: Buffer) => {
+    child.stderr!.setEncoding('utf8');
+    child.stderr!.on('data', (data: string) => {
       stderr += data;
       if (env['KEEP_OUTPUT'] === 'true' || env['VERBOSE'] === 'true') {
         process.stderr.write(data);
       }
     });
 
+    const timeout = options.timeout ?? 120000;
     const promise = new Promise<string>((resolve, reject) => {
+      const timer = setTimeout(() => {
+        child.kill('SIGKILL');
+        reject(
+          new Error(
+            `Process timed out after ${timeout}ms.\nStdout:\n${stdout}\nStderr:\n${stderr}`,
+          ),
+        );
+      }, timeout);
+
+      child.on('error', (err) => {
+        clearTimeout(timer);
+        reject(err);
+      });
+
       child.on('close', (code: number) => {
+        clearTimeout(timer);
         if (code === 0) {
           this._lastRunStdout = stdout;
-          let result = stdout;
-          if (stderr) {
-            result += `\n\nStdErr:\n${stderr}`;
-          }
-          resolve(result);
+          const result = this._filterPodmanTelemetry(stdout);
+
+          // Check if this is a JSON output test - if so, don't include stderr
+          // as it would corrupt the JSON
+          const isJsonOutput =
+            commandArgs.includes('--output-format') &&
+            commandArgs.includes('json');
+
+          const finalResult =
+            stderr && !isJsonOutput
+              ? `${result}\n\nStdErr:\n${stderr}`
+              : result;
+          resolve(finalResult);
         } else {
           reject(new Error(`Process exited with code ${code}:\n${stderr}`));
         }
@@ -596,6 +666,23 @@ export class TestRig {
     }
     this._interactiveRuns = [];
 
+    // Kill any other spawned processes that are still running
+    for (const child of this._spawnedProcesses) {
+      if (child.exitCode === null && child.signalCode === null) {
+        try {
+          child.kill('SIGKILL');
+        } catch (error) {
+          if (env['VERBOSE'] === 'true') {
+            console.warn(
+              'Failed to kill spawned process during cleanup:',
+              error,
+            );
+          }
+        }
+      }
+    }
+    this._spawnedProcesses = [];
+
     if (
       process.env['REGENERATE_MODEL_GOLDENS'] === 'true' &&
       this.fakeResponsesPath
@@ -732,7 +819,6 @@ export class TestRig {
   }
 
   async waitForAnyToolCall(toolNames: string[], timeout?: number) {
-    // Use environment-specific timeout
     if (!timeout) {
       timeout = getDefaultTimeout();
     }
@@ -980,7 +1066,7 @@ export class TestRig {
     const apiRequests = logs.filter(
       (logData) =>
         logData.attributes &&
-        logData.attributes['event.name'] === 'gemini_cli.api_request',
+        logData.attributes['event.name'] === `gemini_cli.api_request`,
     );
     return apiRequests;
   }
@@ -990,7 +1076,7 @@ export class TestRig {
     const apiRequests = logs.filter(
       (logData) =>
         logData.attributes &&
-        logData.attributes['event.name'] === 'gemini_cli.api_request',
+        logData.attributes['event.name'] === `gemini_cli.api_request`,
     );
     return apiRequests.pop() || null;
   }
@@ -1042,6 +1128,7 @@ export class TestRig {
   async runInteractive(options?: {
     args?: string | string[];
     yolo?: boolean;
+    env?: Record<string, string | undefined>;
   }): Promise<InteractiveRun> {
     const yolo = options?.yolo !== false;
     const { command, initialArgs } = this._getCommandAndArgs(
@@ -1049,13 +1136,11 @@ export class TestRig {
     );
     const commandArgs = [...initialArgs];
 
-    if (options?.args) {
-      if (Array.isArray(options.args)) {
-        commandArgs.push(...options.args);
-      } else {
-        commandArgs.push(options.args);
-      }
-    }
+    const envVars = {
+      ...process.env,
+      GEMINI_CLI_HOME: this.homeDir!,
+      ...options?.env,
+    };
 
     const ptyOptions: pty.IPtyForkOptions = {
       name: 'xterm-color',
@@ -1063,7 +1148,7 @@ export class TestRig {
       rows: 80,
       cwd: this.testDir!,
       env: Object.fromEntries(
-        Object.entries(env).filter(([, v]) => v !== undefined),
+        Object.entries(envVars).filter(([, v]) => v !== undefined),
       ) as { [key: string]: string },
     };
 
@@ -1130,11 +1215,11 @@ export class TestRig {
     while (Date.now() - startTime < timeout) {
       await commandFn();
       // Give it a moment to process
-      await new Promise((resolve) => setTimeout(resolve, 500));
+      await sleep(500);
       if (predicateFn()) {
         return;
       }
-      await new Promise((resolve) => setTimeout(resolve, interval));
+      await sleep(interval);
     }
     throw new Error(`pollCommand timed out after ${timeout}ms`);
   }
diff --git a/packages/cli/src/utils/cleanup.test.ts b/packages/cli/src/utils/cleanup.test.ts
index 64935d838d..3bc38e9110 100644
--- a/packages/cli/src/utils/cleanup.test.ts
+++ b/packages/cli/src/utils/cleanup.test.ts
@@ -12,6 +12,8 @@ vi.mock('@google/gemini-cli-core', () => ({
   Storage: vi.fn().mockImplementation(() => ({
     getProjectTempDir: vi.fn().mockReturnValue('/tmp/project'),
   })),
+  shutdownTelemetry: vi.fn(),
+  isTelemetrySdkInitialized: vi.fn().mockReturnValue(false),
 }));
 
 vi.mock('node:fs', () => ({
@@ -20,61 +22,61 @@ vi.mock('node:fs', () => ({
   },
 }));
 
+import {
+  registerCleanup,
+  runExitCleanup,
+  registerSyncCleanup,
+  runSyncCleanup,
+  cleanupCheckpoints,
+  resetCleanupForTesting,
+} from './cleanup.js';
+
 describe('cleanup', () => {
   beforeEach(async () => {
-    vi.resetModules();
     vi.clearAllMocks();
-    // No need to re-assign, we can use the imported functions directly
-    // because we are using vi.resetModules() and re-importing if necessary,
-    // but actually, since we are mocking dependencies, we might not need to re-import cleanup.js
-    // unless it has internal state that needs resetting. It does (cleanupFunctions array).
-    // So we DO need to re-import it to get fresh state.
+    resetCleanupForTesting();
   });
 
   it('should run a registered synchronous function', async () => {
-    const cleanupModule = await import('./cleanup.js');
     const cleanupFn = vi.fn();
-    cleanupModule.registerCleanup(cleanupFn);
+    registerCleanup(cleanupFn);
 
-    await cleanupModule.runExitCleanup();
+    await runExitCleanup();
 
     expect(cleanupFn).toHaveBeenCalledTimes(1);
   });
 
   it('should run a registered asynchronous function', async () => {
-    const cleanupModule = await import('./cleanup.js');
     const cleanupFn = vi.fn().mockResolvedValue(undefined);
-    cleanupModule.registerCleanup(cleanupFn);
+    registerCleanup(cleanupFn);
 
-    await cleanupModule.runExitCleanup();
+    await runExitCleanup();
 
     expect(cleanupFn).toHaveBeenCalledTimes(1);
   });
 
   it('should run multiple registered functions', async () => {
-    const cleanupModule = await import('./cleanup.js');
     const syncFn = vi.fn();
     const asyncFn = vi.fn().mockResolvedValue(undefined);
 
-    cleanupModule.registerCleanup(syncFn);
-    cleanupModule.registerCleanup(asyncFn);
+    registerCleanup(syncFn);
+    registerCleanup(asyncFn);
 
-    await cleanupModule.runExitCleanup();
+    await runExitCleanup();
 
     expect(syncFn).toHaveBeenCalledTimes(1);
     expect(asyncFn).toHaveBeenCalledTimes(1);
   });
 
   it('should continue running cleanup functions even if one throws an error', async () => {
-    const cleanupModule = await import('./cleanup.js');
     const errorFn = vi.fn().mockImplementation(() => {
       throw new Error('test error');
     });
     const successFn = vi.fn();
-    cleanupModule.registerCleanup(errorFn);
-    cleanupModule.registerCleanup(successFn);
+    registerCleanup(errorFn);
+    registerCleanup(successFn);
 
-    await expect(cleanupModule.runExitCleanup()).resolves.not.toThrow();
+    await expect(runExitCleanup()).resolves.not.toThrow();
 
     expect(errorFn).toHaveBeenCalledTimes(1);
     expect(successFn).toHaveBeenCalledTimes(1);
@@ -82,23 +84,21 @@ describe('cleanup', () => {
 
   describe('sync cleanup', () => {
     it('should run registered sync functions', async () => {
-      const cleanupModule = await import('./cleanup.js');
       const syncFn = vi.fn();
-      cleanupModule.registerSyncCleanup(syncFn);
-      cleanupModule.runSyncCleanup();
+      registerSyncCleanup(syncFn);
+      runSyncCleanup();
       expect(syncFn).toHaveBeenCalledTimes(1);
     });
 
     it('should continue running sync cleanup functions even if one throws', async () => {
-      const cleanupModule = await import('./cleanup.js');
       const errorFn = vi.fn().mockImplementation(() => {
         throw new Error('test error');
       });
       const successFn = vi.fn();
-      cleanupModule.registerSyncCleanup(errorFn);
-      cleanupModule.registerSyncCleanup(successFn);
+      registerSyncCleanup(errorFn);
+      registerSyncCleanup(successFn);
 
-      expect(() => cleanupModule.runSyncCleanup()).not.toThrow();
+      expect(() => runSyncCleanup()).not.toThrow();
       expect(errorFn).toHaveBeenCalledTimes(1);
       expect(successFn).toHaveBeenCalledTimes(1);
     });
@@ -106,8 +106,7 @@ describe('cleanup', () => {
 
   describe('cleanupCheckpoints', () => {
     it('should remove checkpoints directory', async () => {
-      const cleanupModule = await import('./cleanup.js');
-      await cleanupModule.cleanupCheckpoints();
+      await cleanupCheckpoints();
       expect(fs.rm).toHaveBeenCalledWith(
         path.join('/tmp/project', 'checkpoints'),
         {
@@ -118,9 +117,8 @@ describe('cleanup', () => {
     });
 
     it('should ignore errors during checkpoint removal', async () => {
-      const cleanupModule = await import('./cleanup.js');
       vi.mocked(fs.rm).mockRejectedValue(new Error('Failed to remove'));
-      await expect(cleanupModule.cleanupCheckpoints()).resolves.not.toThrow();
+      await expect(cleanupCheckpoints()).resolves.not.toThrow();
     });
   });
 });
diff --git a/packages/cli/src/utils/cleanup.ts b/packages/cli/src/utils/cleanup.ts
index 5d063927a2..ccb467572b 100644
--- a/packages/cli/src/utils/cleanup.ts
+++ b/packages/cli/src/utils/cleanup.ts
@@ -25,6 +25,16 @@ export function registerSyncCleanup(fn: () => void) {
   syncCleanupFunctions.push(fn);
 }
 
+/**
+ * Resets the internal cleanup state for testing purposes.
+ * This allows tests to run in isolation without vi.resetModules().
+ */
+export function resetCleanupForTesting() {
+  cleanupFunctions.length = 0;
+  syncCleanupFunctions.length = 0;
+  configForTelemetry = null;
+}
+
 export function runSyncCleanup() {
   for (const fn of syncCleanupFunctions) {
     try {