diff --git a/packages/cli/src/services/UserSimulator.test.ts b/packages/cli/src/services/UserSimulator.test.ts index 4442a77b41..7b1128d313 100644 --- a/packages/cli/src/services/UserSimulator.test.ts +++ b/packages/cli/src/services/UserSimulator.test.ts @@ -190,42 +190,42 @@ describe('UserSimulator', () => { vi.useRealTimers(); }); - it('should re-evaluate if internal tool state changes even if screen content is static', async () => { + it('should terminate if terminal state does not change after 3 consecutive inputs', async () => { + const exitSpy = vi.spyOn(process, 'exit').mockImplementation(() => { + return undefined as never; + }); const simulator = new UserSimulator( mockConfig, mockGetScreen, mockStdinBuffer, ); - mockGetScreen.mockReturnValue('Responding...'); + mockGetScreen.mockReturnValue('Static Screen'); + mockContentGenerator.generateContent.mockResolvedValue({ + text: JSON.stringify({ action: 'y\r' }), + }); vi.useFakeTimers(); simulator.start(); - // Trigger first tick + // Tick 1: Action sent, state recorded await vi.advanceTimersByTimeAsync(2000); expect(mockContentGenerator.generateContent).toHaveBeenCalledTimes(1); - // Trigger second tick with same screen - should skip - await vi.advanceTimersByTimeAsync(2000); - expect(mockContentGenerator.generateContent).toHaveBeenCalledTimes(1); - - // Simulate tool call update - const handler = mockMessageBus.subscribe.mock.calls[0][1]; - handler({ - type: MessageBusType.TOOL_CALLS_UPDATE, - toolCalls: [ - { - status: CoreToolCallStatus.AwaitingApproval, - request: { callId: '123', name: 'test_tool' }, - }, - ], - }); - - // Trigger third tick with same screen but new tool state - should NOT skip + // Tick 2: Same screen, action sent, stall count = 1 await vi.advanceTimersByTimeAsync(2000); expect(mockContentGenerator.generateContent).toHaveBeenCalledTimes(2); + // Tick 3: Same screen, action sent, stall count = 2 + await vi.advanceTimersByTimeAsync(2000); + expect(mockContentGenerator.generateContent).toHaveBeenCalledTimes(3); + + // Tick 4: Same screen, should trigger termination + await vi.advanceTimersByTimeAsync(2000); + + expect(exitSpy).toHaveBeenCalledWith(1); + simulator.stop(); + exitSpy.mockRestore(); vi.useRealTimers(); }); @@ -304,9 +304,11 @@ describe('UserSimulator', () => { (call) => call[1] === 'simulator-compression', ); expect(compressionCall).toBeDefined(); - expect(compressionCall[0].contents[0].parts[0].text).toContain( - 'Summarize the following chronological session notes', - ); + if (compressionCall) { + expect(compressionCall[0].contents[0].parts[0].text).toContain( + 'Summarize the following chronological session notes', + ); + } // Wait for the compression to finish and merge. // We need to resolve the promise for the compression call. @@ -332,10 +334,11 @@ describe('UserSimulator', () => { call[1] === 'simulator-prompt', ); - const finalPrompt = finalCall[0].contents[0].parts[0].text; - expect(finalPrompt).toContain('1. Compressed Summary'); - // Note 5 (the one added during or after compression trigger) might be there too - // depending on timing, but 'Compressed Summary' must be there. + expect(finalCall).toBeDefined(); + if (finalCall) { + const finalPrompt = finalCall[0].contents[0].parts[0].text; + expect(finalPrompt).toContain('1. Compressed Summary'); + } simulator.stop(); vi.useRealTimers(); diff --git a/packages/cli/src/services/UserSimulator.ts b/packages/cli/src/services/UserSimulator.ts index ecb928dccf..b2ad9b609e 100644 --- a/packages/cli/src/services/UserSimulator.ts +++ b/packages/cli/src/services/UserSimulator.ts @@ -32,6 +32,7 @@ export class UserSimulator { private lastStateKey = ''; private isProcessing = false; private isCompressingMemory = false; + private consecutiveStallCount = 0; private staleCycleCount = 0; private interactionsFile: string | null = null; @@ -116,6 +117,12 @@ export class UserSimulator { try { this.isProcessing = true; + + // Stabilization delay: Wait for the terminal UI to finish rendering + // (e.g. ANSI clear/repaint sequences) before looking at the screen. + // Increased to 1s to handle high-latency PTYs in Docker. + await new Promise((resolve) => setTimeout(resolve, 1000)); + const screen = this.getScreen(); if (!screen) return; @@ -128,10 +135,12 @@ export class UserSimulator { .replace(/\n([ \t]*\n)+/g, '\n\n'); const normalizedScreen = strippedScreen - .replace(/[\u2800-\u28FF]/g, '') - .replace(/[|/-\\]/g, '') - .replace(/\b\d+(\.\d+)?s\b/g, '') - .replace(/\b\d+m(\s+\d+s)?\b/g, '') + .replace(/[\u2800-\u28FF]/g, '') // Braille patterns + .replace(/[|/-\\]/g, '') // Spinners + .replace(/\b\d+(\.\d+)?s\b/g, '') // Timers (seconds) + .replace(/\b\d+m(\s+\d+s)?\b/g, '') // Timers (minutes) + .replace(/\b\d+%\b/g, '') // Percentages + .replace(/\b\d+\/\d+\b/g, '') // Progress ratios (e.g. 1/10) .replace(/\(\s*\)/g, '') .trim(); @@ -142,16 +151,46 @@ export class UserSimulator { const currentStateKey = `${normalizedScreen}::${pendingIds}`; if (currentStateKey === this.lastStateKey) { - if (this.pendingToolCalls.length > 0) { - this.staleCycleCount++; - // Every 10 ticks (10s) on a static screen while blocked, we try a prompt - if (this.staleCycleCount % 10 !== 0) { - return; + const lastAction = this.actionHistory[this.actionHistory.length - 1]; + if (lastAction && lastAction !== '') { + this.consecutiveStallCount++; + + // Increased limit to 10 for high-load environments. + if (this.consecutiveStallCount >= 10) { + const errorMsg = + `[SIMULATOR] CRITICAL STALL DETECTED: Terminal state has not changed after ${this.consecutiveStallCount} consecutive inputs. Terminating to prevent loop.`; + debugLogger.error(errorMsg); + if (this.interactionsFile) { + fs.appendFileSync( + this.interactionsFile, + `[ERROR] ${errorMsg}\n\n`, + ); + } + // eslint-disable-next-line no-console + console.error(`\n${errorMsg}`); + this.stop(); + process.exit(1); + } + + // RECOVERY: If screen is blank and we are stalled, try a terminal refresh. + if (normalizedScreen.length === 0 && this.pendingToolCalls.length > 0) { + debugLogger.log('[SIMULATOR] Screen is blank but system is BLOCKED. Sending refresh carriage return.'); + this.stdinBuffer.write('\r'); + return; } } else { - return; + // If it was a action or no action yet, we still want the 10s fallback for internal state sync + if (this.pendingToolCalls.length > 0) { + this.staleCycleCount++; + if (this.staleCycleCount % 10 !== 0) { + return; + } + } else { + return; + } } } else { + this.consecutiveStallCount = 0; this.staleCycleCount = 0; } this.lastStateKey = currentStateKey; @@ -277,7 +316,7 @@ ${strippedScreen} if (startIdx !== -1 && endIdx !== -1 && endIdx > startIdx) { cleanJson = cleanJson.substring(startIdx, endIdx + 1); } else { - cleanJson = cleanJson.replace(/^```json\s*|\s*```$/gm, '').trim(); + cleanJson = cleanJson.replace(/^\`\`\`json\s*|\s*\`\`\`$/gm, '').trim(); } // eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion parsedJson = JSON.parse(cleanJson) as SimulatorResponse; @@ -301,7 +340,7 @@ ${strippedScreen} /^\d+\\r$/.test(text) || text === '\\r' ) { - responseText = text.replace(/^[`"']+|[`"']+$/g, ''); + responseText = text.replace(/^[\`\"']+|[\`\"']+$/g, ''); } else { responseText = ''; // Prevent typing broken JSON string }