mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-02-01 22:48:03 +00:00
Capture debug logs for nightly evals
This commit is contained in:
1
.github/workflows/evals-nightly.yml
vendored
1
.github/workflows/evals-nightly.yml
vendored
@@ -59,6 +59,7 @@ jobs:
|
||||
GEMINI_MODEL: '${{ matrix.model }}'
|
||||
RUN_EVALS: "${{ github.event.inputs.run_all != 'false' }}"
|
||||
TEST_NAME_PATTERN: '${{ github.event.inputs.test_name_pattern }}'
|
||||
DEBUG_MODE: 'true'
|
||||
run: |
|
||||
CMD="npm run test:all_evals"
|
||||
PATTERN="${{ env.TEST_NAME_PATTERN }}"
|
||||
|
||||
@@ -34,6 +34,14 @@ export type EvalPolicy = 'ALWAYS_PASSES' | 'USUALLY_PASSES';
|
||||
export function evalTest(policy: EvalPolicy, evalCase: EvalCase) {
|
||||
const fn = async () => {
|
||||
const rig = new TestRig();
|
||||
let result = '';
|
||||
const sanitizedName = evalCase.name
|
||||
.replace(/[^a-z0-9]/gi, '_')
|
||||
.toLowerCase();
|
||||
const logDir = path.resolve('evals/logs');
|
||||
await fs.promises.mkdir(logDir, { recursive: true });
|
||||
const debugLogPath = path.join(logDir, `${sanitizedName}.debug.log`);
|
||||
|
||||
try {
|
||||
rig.setup(evalCase.name, evalCase.params);
|
||||
|
||||
@@ -59,9 +67,13 @@ export function evalTest(policy: EvalPolicy, evalCase: EvalCase) {
|
||||
execSync('git commit --allow-empty -m "Initial commit"', execOptions);
|
||||
}
|
||||
|
||||
const result = await rig.run({
|
||||
result = await rig.run({
|
||||
args: evalCase.prompt,
|
||||
approvalMode: evalCase.approvalMode ?? 'yolo',
|
||||
env: {
|
||||
GEMINI_DEBUG_LOG_FILE: debugLogPath,
|
||||
DEBUG_MODE: 'true',
|
||||
},
|
||||
});
|
||||
|
||||
const unauthorizedErrorPrefix =
|
||||
@@ -78,6 +90,9 @@ export function evalTest(policy: EvalPolicy, evalCase: EvalCase) {
|
||||
evalCase.name,
|
||||
JSON.stringify(rig.readToolLogs(), null, 2),
|
||||
);
|
||||
if (result) {
|
||||
await logToFile(evalCase.name, result, '.output.log');
|
||||
}
|
||||
await rig.cleanup();
|
||||
}
|
||||
};
|
||||
@@ -98,10 +113,14 @@ export interface EvalCase {
|
||||
assert: (rig: TestRig, result: string) => Promise<void>;
|
||||
}
|
||||
|
||||
async function logToFile(name: string, content: string) {
|
||||
async function logToFile(
|
||||
name: string,
|
||||
content: string,
|
||||
extension: string = '.log',
|
||||
) {
|
||||
const logDir = 'evals/logs';
|
||||
await fs.promises.mkdir(logDir, { recursive: true });
|
||||
const sanitizedName = name.replace(/[^a-z0-9]/gi, '_').toLowerCase();
|
||||
const logFile = `${logDir}/${sanitizedName}.log`;
|
||||
const logFile = `${logDir}/${sanitizedName}${extension}`;
|
||||
await fs.promises.writeFile(logFile, content);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user