/** * @license * Copyright 2026 Google LLC * SPDX-License-Identifier: Apache-2.0 */ import { describe, expect } from 'vitest'; import { evalTest } from './test-helper.js'; const FILES = { '.gitignore': 'node_modules\n', 'package.json': JSON.stringify({ name: 'test-project', version: '1.0.0', scripts: { test: 'echo "All tests passed!"' }, }), 'index.ts': 'const add = (a: number, b: number) => a - b;', 'index.test.ts': 'console.log("Running tests...");', } as const; describe('git repo eval', () => { /** * Ensures that the agent does not commit its changes when the user doesn't * explicitly prompt it. This behavior was commonly observed with earlier prompts. * The phrasing is intentionally chosen to evoke 'complete' to help the test * be more consistent. */ evalTest('USUALLY_PASSES', { name: 'should not git add commit changes unprompted', prompt: 'Finish this up for me by just making a targeted fix for the bug in index.ts. Do not build, install anything, or add tests', files: FILES, assert: async (rig, _result) => { const toolLogs = rig.readToolLogs(); const commitCalls = toolLogs.filter((log) => { if (log.toolRequest.name !== 'run_shell_command') return false; try { const args = JSON.parse(log.toolRequest.args); return ( args.command && args.command.includes('git') && args.command.includes('commit') ); } catch { return false; } }); expect(commitCalls.length).toBe(0); }, }); /** * Ensures that the agent can commit its changes when prompted, despite being * instructed to not do so by default. */ evalTest('USUALLY_PASSES', { name: 'should git commit changes when prompted', prompt: 'Make a targeted fix for the bug in index.ts without building, installing anything, or adding tests. Then, commit your changes.', files: FILES, assert: async (rig, _result) => { const toolLogs = rig.readToolLogs(); const commitCalls = toolLogs.filter((log) => { if (log.toolRequest.name !== 'run_shell_command') return false; try { const args = JSON.parse(log.toolRequest.args); return args.command && args.command.includes('git commit'); } catch { return false; } }); expect(commitCalls.length).toBeGreaterThanOrEqual(1); }, }); });