mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-05-16 01:12:49 +00:00
fix(simulator): improve PTY stabilization and stall recovery
This commit is contained in:
@@ -190,42 +190,42 @@ describe('UserSimulator', () => {
|
||||
vi.useRealTimers();
|
||||
});
|
||||
|
||||
it('should re-evaluate if internal tool state changes even if screen content is static', async () => {
|
||||
it('should terminate if terminal state does not change after 3 consecutive inputs', async () => {
|
||||
const exitSpy = vi.spyOn(process, 'exit').mockImplementation(() => {
|
||||
return undefined as never;
|
||||
});
|
||||
const simulator = new UserSimulator(
|
||||
mockConfig,
|
||||
mockGetScreen,
|
||||
mockStdinBuffer,
|
||||
);
|
||||
mockGetScreen.mockReturnValue('Responding...');
|
||||
mockGetScreen.mockReturnValue('Static Screen');
|
||||
mockContentGenerator.generateContent.mockResolvedValue({
|
||||
text: JSON.stringify({ action: 'y\r' }),
|
||||
});
|
||||
|
||||
vi.useFakeTimers();
|
||||
simulator.start();
|
||||
|
||||
// Trigger first tick
|
||||
// Tick 1: Action sent, state recorded
|
||||
await vi.advanceTimersByTimeAsync(2000);
|
||||
expect(mockContentGenerator.generateContent).toHaveBeenCalledTimes(1);
|
||||
|
||||
// Trigger second tick with same screen - should skip
|
||||
await vi.advanceTimersByTimeAsync(2000);
|
||||
expect(mockContentGenerator.generateContent).toHaveBeenCalledTimes(1);
|
||||
|
||||
// Simulate tool call update
|
||||
const handler = mockMessageBus.subscribe.mock.calls[0][1];
|
||||
handler({
|
||||
type: MessageBusType.TOOL_CALLS_UPDATE,
|
||||
toolCalls: [
|
||||
{
|
||||
status: CoreToolCallStatus.AwaitingApproval,
|
||||
request: { callId: '123', name: 'test_tool' },
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
// Trigger third tick with same screen but new tool state - should NOT skip
|
||||
// Tick 2: Same screen, action sent, stall count = 1
|
||||
await vi.advanceTimersByTimeAsync(2000);
|
||||
expect(mockContentGenerator.generateContent).toHaveBeenCalledTimes(2);
|
||||
|
||||
// Tick 3: Same screen, action sent, stall count = 2
|
||||
await vi.advanceTimersByTimeAsync(2000);
|
||||
expect(mockContentGenerator.generateContent).toHaveBeenCalledTimes(3);
|
||||
|
||||
// Tick 4: Same screen, should trigger termination
|
||||
await vi.advanceTimersByTimeAsync(2000);
|
||||
|
||||
expect(exitSpy).toHaveBeenCalledWith(1);
|
||||
|
||||
simulator.stop();
|
||||
exitSpy.mockRestore();
|
||||
vi.useRealTimers();
|
||||
});
|
||||
|
||||
@@ -304,9 +304,11 @@ describe('UserSimulator', () => {
|
||||
(call) => call[1] === 'simulator-compression',
|
||||
);
|
||||
expect(compressionCall).toBeDefined();
|
||||
expect(compressionCall[0].contents[0].parts[0].text).toContain(
|
||||
'Summarize the following chronological session notes',
|
||||
);
|
||||
if (compressionCall) {
|
||||
expect(compressionCall[0].contents[0].parts[0].text).toContain(
|
||||
'Summarize the following chronological session notes',
|
||||
);
|
||||
}
|
||||
|
||||
// Wait for the compression to finish and merge.
|
||||
// We need to resolve the promise for the compression call.
|
||||
@@ -332,10 +334,11 @@ describe('UserSimulator', () => {
|
||||
call[1] === 'simulator-prompt',
|
||||
);
|
||||
|
||||
const finalPrompt = finalCall[0].contents[0].parts[0].text;
|
||||
expect(finalPrompt).toContain('1. Compressed Summary');
|
||||
// Note 5 (the one added during or after compression trigger) might be there too
|
||||
// depending on timing, but 'Compressed Summary' must be there.
|
||||
expect(finalCall).toBeDefined();
|
||||
if (finalCall) {
|
||||
const finalPrompt = finalCall[0].contents[0].parts[0].text;
|
||||
expect(finalPrompt).toContain('1. Compressed Summary');
|
||||
}
|
||||
|
||||
simulator.stop();
|
||||
vi.useRealTimers();
|
||||
|
||||
@@ -32,6 +32,7 @@ export class UserSimulator {
|
||||
private lastStateKey = '';
|
||||
private isProcessing = false;
|
||||
private isCompressingMemory = false;
|
||||
private consecutiveStallCount = 0;
|
||||
private staleCycleCount = 0;
|
||||
private interactionsFile: string | null = null;
|
||||
|
||||
@@ -116,6 +117,12 @@ export class UserSimulator {
|
||||
|
||||
try {
|
||||
this.isProcessing = true;
|
||||
|
||||
// Stabilization delay: Wait for the terminal UI to finish rendering
|
||||
// (e.g. ANSI clear/repaint sequences) before looking at the screen.
|
||||
// Increased to 1s to handle high-latency PTYs in Docker.
|
||||
await new Promise((resolve) => setTimeout(resolve, 1000));
|
||||
|
||||
const screen = this.getScreen();
|
||||
if (!screen) return;
|
||||
|
||||
@@ -128,10 +135,12 @@ export class UserSimulator {
|
||||
.replace(/\n([ \t]*\n)+/g, '\n\n');
|
||||
|
||||
const normalizedScreen = strippedScreen
|
||||
.replace(/[\u2800-\u28FF]/g, '')
|
||||
.replace(/[|/-\\]/g, '')
|
||||
.replace(/\b\d+(\.\d+)?s\b/g, '')
|
||||
.replace(/\b\d+m(\s+\d+s)?\b/g, '')
|
||||
.replace(/[\u2800-\u28FF]/g, '') // Braille patterns
|
||||
.replace(/[|/-\\]/g, '') // Spinners
|
||||
.replace(/\b\d+(\.\d+)?s\b/g, '') // Timers (seconds)
|
||||
.replace(/\b\d+m(\s+\d+s)?\b/g, '') // Timers (minutes)
|
||||
.replace(/\b\d+%\b/g, '') // Percentages
|
||||
.replace(/\b\d+\/\d+\b/g, '') // Progress ratios (e.g. 1/10)
|
||||
.replace(/\(\s*\)/g, '')
|
||||
.trim();
|
||||
|
||||
@@ -142,16 +151,46 @@ export class UserSimulator {
|
||||
const currentStateKey = `${normalizedScreen}::${pendingIds}`;
|
||||
|
||||
if (currentStateKey === this.lastStateKey) {
|
||||
if (this.pendingToolCalls.length > 0) {
|
||||
this.staleCycleCount++;
|
||||
// Every 10 ticks (10s) on a static screen while blocked, we try a prompt
|
||||
if (this.staleCycleCount % 10 !== 0) {
|
||||
return;
|
||||
const lastAction = this.actionHistory[this.actionHistory.length - 1];
|
||||
if (lastAction && lastAction !== '<WAIT>') {
|
||||
this.consecutiveStallCount++;
|
||||
|
||||
// Increased limit to 10 for high-load environments.
|
||||
if (this.consecutiveStallCount >= 10) {
|
||||
const errorMsg =
|
||||
`[SIMULATOR] CRITICAL STALL DETECTED: Terminal state has not changed after ${this.consecutiveStallCount} consecutive inputs. Terminating to prevent loop.`;
|
||||
debugLogger.error(errorMsg);
|
||||
if (this.interactionsFile) {
|
||||
fs.appendFileSync(
|
||||
this.interactionsFile,
|
||||
`[ERROR] ${errorMsg}\n\n`,
|
||||
);
|
||||
}
|
||||
// eslint-disable-next-line no-console
|
||||
console.error(`\n${errorMsg}`);
|
||||
this.stop();
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
// RECOVERY: If screen is blank and we are stalled, try a terminal refresh.
|
||||
if (normalizedScreen.length === 0 && this.pendingToolCalls.length > 0) {
|
||||
debugLogger.log('[SIMULATOR] Screen is blank but system is BLOCKED. Sending refresh carriage return.');
|
||||
this.stdinBuffer.write('\r');
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
return;
|
||||
// If it was a <WAIT> action or no action yet, we still want the 10s fallback for internal state sync
|
||||
if (this.pendingToolCalls.length > 0) {
|
||||
this.staleCycleCount++;
|
||||
if (this.staleCycleCount % 10 !== 0) {
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
return;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
this.consecutiveStallCount = 0;
|
||||
this.staleCycleCount = 0;
|
||||
}
|
||||
this.lastStateKey = currentStateKey;
|
||||
@@ -277,7 +316,7 @@ ${strippedScreen}
|
||||
if (startIdx !== -1 && endIdx !== -1 && endIdx > startIdx) {
|
||||
cleanJson = cleanJson.substring(startIdx, endIdx + 1);
|
||||
} else {
|
||||
cleanJson = cleanJson.replace(/^```json\s*|\s*```$/gm, '').trim();
|
||||
cleanJson = cleanJson.replace(/^\`\`\`json\s*|\s*\`\`\`$/gm, '').trim();
|
||||
}
|
||||
// eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
|
||||
parsedJson = JSON.parse(cleanJson) as SimulatorResponse;
|
||||
@@ -301,7 +340,7 @@ ${strippedScreen}
|
||||
/^\d+\\r$/.test(text) ||
|
||||
text === '\\r'
|
||||
) {
|
||||
responseText = text.replace(/^[`"']+|[`"']+$/g, '');
|
||||
responseText = text.replace(/^[\`\"']+|[\`\"']+$/g, '');
|
||||
} else {
|
||||
responseText = ''; // Prevent typing broken JSON string
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user