From a16d5983cf08965de261ddeaeedb12bd3ef695d3 Mon Sep 17 00:00:00 2001 From: Adam Weidman <65992621+adamfweidman@users.noreply.github.com> Date: Tue, 20 Jan 2026 14:43:43 -0500 Subject: [PATCH] feat: Add flash lite utility fallback chain (#17056) --- docs/cli/model-routing.md | 5 ++ docs/core/index.md | 4 ++ .../core/src/availability/policyCatalog.ts | 28 +++++++++++ .../src/availability/policyHelpers.test.ts | 47 ++++++++++++++++++- .../core/src/availability/policyHelpers.ts | 28 +++++++---- 5 files changed, 101 insertions(+), 11 deletions(-) diff --git a/docs/cli/model-routing.md b/docs/cli/model-routing.md index 1f833d3f6e..1f7ba5da09 100644 --- a/docs/cli/model-routing.md +++ b/docs/cli/model-routing.md @@ -17,6 +17,11 @@ policies. may prompt you to switch to a fallback model (by default always prompts you). + Some internal utility calls (such as prompt completion and classification) + use a silent fallback chain for `gemini-2.5-flash-lite` and will fall back + to `gemini-2.5-flash` and `gemini-2.5-pro` without prompting or changing the + configured model. + 3. **Model switch:** If approved, or if the policy allows for silent fallback, the CLI will use an available fallback model for the current turn or the remainder of the session. diff --git a/docs/core/index.md b/docs/core/index.md index dc6237fff5..e16af9af77 100644 --- a/docs/core/index.md +++ b/docs/core/index.md @@ -68,6 +68,10 @@ If you are using the default "pro" model and the CLI detects that you are being rate-limited, it automatically switches to the "flash" model for the current session. This allows you to continue working without interruption. +Internal utility calls that use `gemini-2.5-flash-lite` (for example, prompt +completion and classification) silently fall back to `gemini-2.5-flash` and +`gemini-2.5-pro` when quota is exhausted, without changing the configured model. + ## File discovery service The file discovery service is responsible for finding files in the project that diff --git a/packages/core/src/availability/policyCatalog.ts b/packages/core/src/availability/policyCatalog.ts index 98f8fddc8a..48713621cf 100644 --- a/packages/core/src/availability/policyCatalog.ts +++ b/packages/core/src/availability/policyCatalog.ts @@ -11,6 +11,7 @@ import type { ModelPolicyStateMap, } from './modelPolicy.js'; import { + DEFAULT_GEMINI_FLASH_LITE_MODEL, DEFAULT_GEMINI_FLASH_MODEL, DEFAULT_GEMINI_MODEL, PREVIEW_GEMINI_FLASH_MODEL, @@ -36,6 +37,13 @@ const DEFAULT_ACTIONS: ModelPolicyActionMap = { unknown: 'prompt', }; +const SILENT_ACTIONS: ModelPolicyActionMap = { + terminal: 'silent', + transient: 'silent', + not_found: 'silent', + unknown: 'silent', +}; + const DEFAULT_STATE: ModelPolicyStateMap = { terminal: 'terminal', transient: 'terminal', @@ -53,6 +61,22 @@ const PREVIEW_CHAIN: ModelPolicyChain = [ definePolicy({ model: PREVIEW_GEMINI_FLASH_MODEL, isLastResort: true }), ]; +const FLASH_LITE_CHAIN: ModelPolicyChain = [ + definePolicy({ + model: DEFAULT_GEMINI_FLASH_LITE_MODEL, + actions: SILENT_ACTIONS, + }), + definePolicy({ + model: DEFAULT_GEMINI_FLASH_MODEL, + actions: SILENT_ACTIONS, + }), + definePolicy({ + model: DEFAULT_GEMINI_MODEL, + isLastResort: true, + actions: SILENT_ACTIONS, + }), +]; + /** * Returns the default ordered model policy chain for the user. */ @@ -70,6 +94,10 @@ export function createSingleModelChain(model: string): ModelPolicyChain { return [definePolicy({ model, isLastResort: true })]; } +export function getFlashLitePolicyChain(): ModelPolicyChain { + return cloneChain(FLASH_LITE_CHAIN); +} + /** * Provides a default policy scaffold for models not present in the catalog. */ diff --git a/packages/core/src/availability/policyHelpers.test.ts b/packages/core/src/availability/policyHelpers.test.ts index 026d09f123..bc64ba419b 100644 --- a/packages/core/src/availability/policyHelpers.test.ts +++ b/packages/core/src/availability/policyHelpers.test.ts @@ -12,7 +12,10 @@ import { } from './policyHelpers.js'; import { createDefaultPolicy } from './policyCatalog.js'; import type { Config } from '../config/config.js'; -import { DEFAULT_GEMINI_MODEL_AUTO } from '../config/models.js'; +import { + DEFAULT_GEMINI_FLASH_LITE_MODEL, + DEFAULT_GEMINI_MODEL_AUTO, +} from '../config/models.js'; const createMockConfig = (overrides: Partial = {}): Config => ({ @@ -53,6 +56,26 @@ describe('policyHelpers', () => { expect(chain[1]?.model).toBe('gemini-2.5-flash'); }); + it('uses auto chain when preferred model is auto', () => { + const config = createMockConfig({ + getModel: () => 'gemini-2.5-pro', + }); + const chain = resolvePolicyChain(config, DEFAULT_GEMINI_MODEL_AUTO); + expect(chain).toHaveLength(2); + expect(chain[0]?.model).toBe('gemini-2.5-pro'); + expect(chain[1]?.model).toBe('gemini-2.5-flash'); + }); + + it('uses auto chain when configured model is auto even if preferred is concrete', () => { + const config = createMockConfig({ + getModel: () => DEFAULT_GEMINI_MODEL_AUTO, + }); + const chain = resolvePolicyChain(config, 'gemini-2.5-pro'); + expect(chain).toHaveLength(2); + expect(chain[0]?.model).toBe('gemini-2.5-pro'); + expect(chain[1]?.model).toBe('gemini-2.5-flash'); + }); + it('starts chain from preferredModel when model is "auto"', () => { const config = createMockConfig({ getModel: () => DEFAULT_GEMINI_MODEL_AUTO, @@ -62,6 +85,28 @@ describe('policyHelpers', () => { expect(chain[0]?.model).toBe('gemini-2.5-flash'); }); + it('returns flash-lite chain when preferred model is flash-lite', () => { + const config = createMockConfig({ + getModel: () => DEFAULT_GEMINI_MODEL_AUTO, + }); + const chain = resolvePolicyChain(config, DEFAULT_GEMINI_FLASH_LITE_MODEL); + expect(chain).toHaveLength(3); + expect(chain[0]?.model).toBe('gemini-2.5-flash-lite'); + expect(chain[1]?.model).toBe('gemini-2.5-flash'); + expect(chain[2]?.model).toBe('gemini-2.5-pro'); + }); + + it('returns flash-lite chain when configured model is flash-lite', () => { + const config = createMockConfig({ + getModel: () => DEFAULT_GEMINI_FLASH_LITE_MODEL, + }); + const chain = resolvePolicyChain(config); + expect(chain).toHaveLength(3); + expect(chain[0]?.model).toBe('gemini-2.5-flash-lite'); + expect(chain[1]?.model).toBe('gemini-2.5-flash'); + expect(chain[2]?.model).toBe('gemini-2.5-pro'); + }); + it('wraps around the chain when wrapsAround is true', () => { const config = createMockConfig({ getModel: () => DEFAULT_GEMINI_MODEL_AUTO, diff --git a/packages/core/src/availability/policyHelpers.ts b/packages/core/src/availability/policyHelpers.ts index 1864f0c00e..4d65b84d77 100644 --- a/packages/core/src/availability/policyHelpers.ts +++ b/packages/core/src/availability/policyHelpers.ts @@ -17,11 +17,13 @@ import { createDefaultPolicy, createSingleModelChain, getModelPolicyChain, + getFlashLitePolicyChain, } from './policyCatalog.js'; import { + DEFAULT_GEMINI_FLASH_LITE_MODEL, DEFAULT_GEMINI_MODEL, - DEFAULT_GEMINI_MODEL_AUTO, PREVIEW_GEMINI_MODEL_AUTO, + isAutoModel, resolveModel, } from '../config/models.js'; import type { ModelSelectionResult } from './modelAvailabilityService.js'; @@ -38,24 +40,30 @@ export function resolvePolicyChain( ): ModelPolicyChain { const modelFromConfig = preferredModel ?? config.getActiveModel?.() ?? config.getModel(); + const configuredModel = config.getModel(); let chain; + const resolvedModel = resolveModel(modelFromConfig); + const isAutoPreferred = preferredModel ? isAutoModel(preferredModel) : false; + const isAutoConfigured = isAutoModel(configuredModel); - if ( - config.getModel() === PREVIEW_GEMINI_MODEL_AUTO || - config.getModel() === DEFAULT_GEMINI_MODEL_AUTO - ) { + if (resolvedModel === DEFAULT_GEMINI_FLASH_LITE_MODEL) { + chain = getFlashLitePolicyChain(); + } else if (isAutoPreferred || isAutoConfigured) { + const previewEnabled = + preferredModel === PREVIEW_GEMINI_MODEL_AUTO || + configuredModel === PREVIEW_GEMINI_MODEL_AUTO; chain = getModelPolicyChain({ - previewEnabled: config.getModel() === PREVIEW_GEMINI_MODEL_AUTO, + previewEnabled, userTier: config.getUserTier(), }); } else { chain = createSingleModelChain(modelFromConfig); } - const activeModel = resolveModel(modelFromConfig); - - const activeIndex = chain.findIndex((policy) => policy.model === activeModel); + const activeIndex = chain.findIndex( + (policy) => policy.model === resolvedModel, + ); if (activeIndex !== -1) { return wrapsAround ? [...chain.slice(activeIndex), ...chain.slice(0, activeIndex)] @@ -64,7 +72,7 @@ export function resolvePolicyChain( // If the user specified a model not in the default chain, we assume they want // *only* that model. We do not fallback to the default chain. - return [createDefaultPolicy(activeModel, { isLastResort: true })]; + return [createDefaultPolicy(resolvedModel, { isLastResort: true })]; } /**