Feat/retry fetch notifications (#21813)

2026-06-01 19:03:42 +00:00 · 2026-03-10 23:33:50 -04:00
parent 8b09ccc288
commit f8ad3a200a
24 changed files with 165 additions and 92 deletions
--- a/packages/core/src/config/config.ts
+++ b/packages/core/src/config/config.ts
@@ -1024,7 +1024,7 @@ export class Config implements McpContext, AgentLoopContext {
          params.gemmaModelRouter?.classifier?.model ?? 'gemma3-1b-gpu-custom',
      },
    };
-    this.retryFetchErrors = params.retryFetchErrors ?? false;
+    this.retryFetchErrors = params.retryFetchErrors ?? true;
    this.maxAttempts = Math.min(
      params.maxAttempts ?? DEFAULT_MAX_ATTEMPTS,
      DEFAULT_MAX_ATTEMPTS,
--- a/packages/core/src/core/baseLlmClient.test.ts
+++ b/packages/core/src/core/baseLlmClient.test.ts
@@ -118,6 +118,8 @@ describe('BaseLlmClient', () => {
        .mockReturnValue(createAvailabilityServiceMock()),
      setActiveModel: vi.fn(),
      getUserTier: vi.fn().mockReturnValue(undefined),
+      getRetryFetchErrors: vi.fn().mockReturnValue(true),
+      getMaxAttempts: vi.fn().mockReturnValue(3),
      getModel: vi.fn().mockReturnValue('test-model'),
      getActiveModel: vi.fn().mockReturnValue('test-model'),
    } as unknown as Mocked<Config>;
--- a/packages/core/src/core/baseLlmClient.ts
+++ b/packages/core/src/core/baseLlmClient.ts
@@ -21,6 +21,8 @@ import { getErrorMessage } from '../utils/errors.js';
 import { logMalformedJsonResponse } from '../telemetry/loggers.js';
 import { MalformedJsonResponseEvent, LlmRole } from '../telemetry/types.js';
 import { retryWithBackoff } from '../utils/retry.js';
+import { coreEvents } from '../utils/events.js';
+import { getDisplayString } from '../config/models.js';
 import type { ModelConfigKey } from '../services/modelConfigService.js';
 import {
  applyModelSelection,
@@ -327,6 +329,17 @@ export class BaseLlmClient {
          : undefined,
        authType:
          this.authType ?? this.config.getContentGeneratorConfig()?.authType,
+        retryFetchErrors: this.config.getRetryFetchErrors(),
+        onRetry: (attempt, error, delayMs) => {
+          coreEvents.emitRetryAttempt({
+            attempt,
+            maxAttempts:
+              availabilityMaxAttempts ?? maxAttempts ?? DEFAULT_MAX_ATTEMPTS,
+            delayMs,
+            error: error instanceof Error ? error.message : String(error),
+            model: getDisplayString(currentModel),
+          });
+        },
      });
    } catch (error) {
      if (abortSignal?.aborted) {
--- a/packages/core/src/core/client.test.ts
+++ b/packages/core/src/core/client.test.ts
@@ -235,6 +235,8 @@ describe('Gemini Client (client.ts)', () => {
        getDirectories: vi.fn().mockReturnValue(['/test/dir']),
      }),
      getGeminiClient: vi.fn(),
+      getRetryFetchErrors: vi.fn().mockReturnValue(true),
+      getMaxAttempts: vi.fn().mockReturnValue(3),
      getModelRouterService: vi
        .fn()
        .mockReturnValue(mockRouterService as unknown as ModelRouterService),
--- a/packages/core/src/core/client.ts
+++ b/packages/core/src/core/client.ts
@@ -30,6 +30,12 @@ import { getCoreSystemPrompt } from './prompts.js';
 import { checkNextSpeaker } from '../utils/nextSpeakerChecker.js';
 import { reportError } from '../utils/errorReporting.js';
 import { GeminiChat } from './geminiChat.js';
+import { coreEvents, CoreEvent } from '../utils/events.js';
+import {
+  getDisplayString,
+  resolveModel,
+  isGemini2Model,
+} from '../config/models.js';
 import {
  retryWithBackoff,
  type RetryAvailabilityContext,
@@ -70,9 +76,7 @@ import {
  applyModelSelection,
  createAvailabilityContextProvider,
 } from '../availability/policyHelpers.js';
-import { resolveModel, isGemini2Model } from '../config/models.js';
 import { partToString } from '../utils/partUtils.js';
-import { coreEvents, CoreEvent } from '../utils/events.js';

 const MAX_TURNS = 100;

@@ -1093,7 +1097,18 @@ export class GeminiClient {
        onValidationRequired: onValidationRequiredCallback,
        authType: this.config.getContentGeneratorConfig()?.authType,
        maxAttempts: availabilityMaxAttempts,
+        retryFetchErrors: this.config.getRetryFetchErrors(),
        getAvailabilityContext,
+        onRetry: (attempt, error, delayMs) => {
+          coreEvents.emitRetryAttempt({
+            attempt,
+            maxAttempts:
+              availabilityMaxAttempts ?? this.config.getMaxAttempts(),
+            delayMs,
+            error: error instanceof Error ? error.message : String(error),
+            model: getDisplayString(currentAttemptModel),
+          });
+        },
      });

      return result;
--- a/packages/core/src/tools/web-fetch.test.ts
+++ b/packages/core/src/tools/web-fetch.test.ts
@@ -248,6 +248,7 @@ describe('WebFetchTool', () => {
      getProxy: vi.fn(),
      getGeminiClient: mockGetGeminiClient,
      getRetryFetchErrors: vi.fn().mockReturnValue(false),
+      getMaxAttempts: vi.fn().mockReturnValue(3),
      getDirectWebFetch: vi.fn().mockReturnValue(false),
      modelConfigService: {
        getResolvedConfig: vi.fn().mockImplementation(({ model }) => ({
--- a/packages/core/src/tools/web-fetch.ts
+++ b/packages/core/src/tools/web-fetch.ts
@@ -31,6 +31,7 @@ import {
 import { LlmRole } from '../telemetry/llmRole.js';
 import { WEB_FETCH_TOOL_NAME } from './tool-names.js';
 import { debugLogger } from '../utils/debugLogger.js';
+import { coreEvents } from '../utils/events.js';
 import { retryWithBackoff } from '../utils/retry.js';
 import { WEB_FETCH_DEFINITION } from './definitions/coreTools.js';
 import { resolveToolDeclaration } from './definitions/resolver.js';
@@ -186,6 +187,16 @@ class WebFetchToolInvocation extends BaseToolInvocation<
    super(params, messageBus, _toolName, _toolDisplayName);
  }

+  private handleRetry(attempt: number, error: unknown, delayMs: number): void {
+    coreEvents.emitRetryAttempt({
+      attempt,
+      maxAttempts: this.config.getMaxAttempts(),
+      delayMs,
+      error: error instanceof Error ? error.message : String(error),
+      model: 'Web Fetch',
+    });
+  }
+
  private async executeFallback(signal: AbortSignal): Promise<ToolResult> {
    const { validUrls: urls } = parsePrompt(this.params.prompt!);
    // For now, we only support one URL for fallback
@@ -214,6 +225,8 @@ class WebFetchToolInvocation extends BaseToolInvocation<
        },
        {
          retryFetchErrors: this.config.getRetryFetchErrors(),
+          onRetry: (attempt, error, delayMs) =>
+            this.handleRetry(attempt, error, delayMs),
        },
      );

@@ -423,6 +436,8 @@ ${textContent}
        },
        {
          retryFetchErrors: this.config.getRetryFetchErrors(),
+          onRetry: (attempt, error, delayMs) =>
+            this.handleRetry(attempt, error, delayMs),
        },
      );

--- a/packages/core/src/utils/retry.test.ts
+++ b/packages/core/src/utils/retry.test.ts
@@ -350,6 +350,25 @@ describe('retryWithBackoff', () => {
      expect(mockFn).toHaveBeenCalledTimes(2);
    });

+    it("should retry on 'Incomplete JSON segment' when retryFetchErrors is true", async () => {
+      const mockFn = vi.fn();
+      mockFn.mockRejectedValueOnce(
+        new Error('Incomplete JSON segment at the end'),
+      );
+      mockFn.mockResolvedValueOnce('success');
+
+      const promise = retryWithBackoff(mockFn, {
+        retryFetchErrors: true,
+        initialDelayMs: 10,
+      });
+
+      await vi.runAllTimersAsync();
+
+      const result = await promise;
+      expect(result).toBe('success');
+      expect(mockFn).toHaveBeenCalledTimes(2);
+    });
+
    it('should retry on common network error codes (ECONNRESET)', async () => {
      const mockFn = vi.fn();
      const error = new Error('read ECONNRESET');
--- a/packages/core/src/utils/retry.ts
+++ b/packages/core/src/utils/retry.ts
@@ -100,6 +100,7 @@ function getNetworkErrorCode(error: unknown): string | undefined {
 }

 const FETCH_FAILED_MESSAGE = 'fetch failed';
+const INCOMPLETE_JSON_MESSAGE = 'incomplete json segment';

 /**
 * Default predicate function to determine if a retry should be attempted.
@@ -119,8 +120,12 @@ export function isRetryableError(
  }

  if (retryFetchErrors && error instanceof Error) {
-    // Check for generic fetch failed message (case-insensitive)
-    if (error.message.toLowerCase().includes(FETCH_FAILED_MESSAGE)) {
+    const lowerMessage = error.message.toLowerCase();
+    // Check for generic fetch failed message or incomplete JSON segment (common stream error)
+    if (
+      lowerMessage.includes(FETCH_FAILED_MESSAGE) ||
+      lowerMessage.includes(INCOMPLETE_JSON_MESSAGE)
+    ) {
      return true;
    }
  }