feat: launch Gemini 3 Flash in Gemini CLI ⚡️⚡️⚡️ (#15196)

Co-authored-by: gemini-cli-robot <gemini-cli-robot@google.com> Co-authored-by: joshualitt <joshualitt@google.com> Co-authored-by: Sehoon Shon <sshon@google.com> Co-authored-by: Adam Weidman <65992621+adamfweidman@users.noreply.github.com> Co-authored-by: Adib234 <30782825+Adib234@users.noreply.github.com> Co-authored-by: Jenna Inouye <jinouye@google.com>
2026-06-01 19:03:42 +00:00 · 2025-12-17 09:43:21 -08:00
parent 6f4168d3f2
commit 1082081ff0
65 changed files with 1898 additions and 2060 deletions
--- a/packages/core/index.ts
+++ b/packages/core/index.ts
@@ -12,9 +12,6 @@ export {
  DEFAULT_GEMINI_FLASH_MODEL,
  DEFAULT_GEMINI_FLASH_LITE_MODEL,
  DEFAULT_GEMINI_EMBEDDING_MODEL,
-  GEMINI_MODEL_ALIAS_PRO,
-  GEMINI_MODEL_ALIAS_FLASH,
-  GEMINI_MODEL_ALIAS_FLASH_LITE,
 } from './src/config/models.js';
 export {
  serializeTerminalToObject,
--- a/packages/core/src/agents/codebase-investigator.test.ts
+++ b/packages/core/src/agents/codebase-investigator.test.ts
@@ -12,7 +12,7 @@ import {
  LS_TOOL_NAME,
  READ_FILE_TOOL_NAME,
 } from '../tools/tool-names.js';
-import { GEMINI_MODEL_ALIAS_PRO } from '../config/models.js';
+import { DEFAULT_GEMINI_MODEL } from '../config/models.js';

 describe('CodebaseInvestigatorAgent', () => {
  it('should have the correct agent definition', () => {
@@ -26,7 +26,7 @@ describe('CodebaseInvestigatorAgent', () => {
    ).toBe(true);
    expect(CodebaseInvestigatorAgent.outputConfig?.outputName).toBe('report');
    expect(CodebaseInvestigatorAgent.modelConfig?.model).toBe(
-      GEMINI_MODEL_ALIAS_PRO,
+      DEFAULT_GEMINI_MODEL,
    );
    expect(CodebaseInvestigatorAgent.toolConfig?.tools).toEqual([
      LS_TOOL_NAME,
--- a/packages/core/src/agents/codebase-investigator.ts
+++ b/packages/core/src/agents/codebase-investigator.ts
@@ -11,7 +11,7 @@ import {
  LS_TOOL_NAME,
  READ_FILE_TOOL_NAME,
 } from '../tools/tool-names.js';
-import { GEMINI_MODEL_ALIAS_PRO } from '../config/models.js';
+import { DEFAULT_GEMINI_MODEL } from '../config/models.js';
 import { z } from 'zod';

 // Define a type that matches the outputConfig schema for type safety.
@@ -70,7 +70,7 @@ export const CodebaseInvestigatorAgent: LocalAgentDefinition<
  processOutput: (output) => JSON.stringify(output, null, 2),

  modelConfig: {
-    model: GEMINI_MODEL_ALIAS_PRO,
+    model: DEFAULT_GEMINI_MODEL,
    temp: 0.1,
    top_p: 0.95,
    thinkingBudget: -1,
--- a/packages/core/src/agents/registry.test.ts
+++ b/packages/core/src/agents/registry.test.ts
@@ -10,6 +10,13 @@ import { makeFakeConfig } from '../test-utils/config.js';
 import type { AgentDefinition, LocalAgentDefinition } from './types.js';
 import type { Config } from '../config/config.js';
 import { debugLogger } from '../utils/debugLogger.js';
+import {
+  DEFAULT_GEMINI_FLASH_LITE_MODEL,
+  GEMINI_MODEL_ALIAS_AUTO,
+  PREVIEW_GEMINI_FLASH_MODEL,
+  PREVIEW_GEMINI_MODEL,
+  PREVIEW_GEMINI_MODEL_AUTO,
+} from '../config/models.js';

 // A test-only subclass to expose the protected `registerAgent` method.
 class TestableAgentRegistry extends AgentRegistry {
@@ -74,12 +81,12 @@ describe('AgentRegistry', () => {
      );
    });

-    it('should use preview model for codebase investigator if main model is preview', async () => {
+    it('should use preview flash model for codebase investigator if main model is preview pro', async () => {
      const previewConfig = makeFakeConfig({
-        model: 'gemini-3-pro-preview',
+        model: PREVIEW_GEMINI_MODEL,
        codebaseInvestigatorSettings: {
          enabled: true,
-          model: 'pro',
+          model: GEMINI_MODEL_ALIAS_AUTO,
        },
      });
      const previewRegistry = new TestableAgentRegistry(previewConfig);
@@ -88,10 +95,52 @@ describe('AgentRegistry', () => {

      const investigatorDef = previewRegistry.getDefinition(
        'codebase_investigator',
-      );
+      ) as LocalAgentDefinition;
      expect(investigatorDef).toBeDefined();
-      expect((investigatorDef as LocalAgentDefinition).modelConfig.model).toBe(
-        'gemini-3-pro-preview',
+      expect(investigatorDef?.modelConfig.model).toBe(
+        PREVIEW_GEMINI_FLASH_MODEL,
+      );
+    });
+
+    it('should use preview flash model for codebase investigator if main model is preview auto', async () => {
+      const previewConfig = makeFakeConfig({
+        model: PREVIEW_GEMINI_MODEL_AUTO,
+        codebaseInvestigatorSettings: {
+          enabled: true,
+          model: GEMINI_MODEL_ALIAS_AUTO,
+        },
+      });
+      const previewRegistry = new TestableAgentRegistry(previewConfig);
+
+      await previewRegistry.initialize();
+
+      const investigatorDef = previewRegistry.getDefinition(
+        'codebase_investigator',
+      ) as LocalAgentDefinition;
+      expect(investigatorDef).toBeDefined();
+      expect(investigatorDef?.modelConfig.model).toBe(
+        PREVIEW_GEMINI_FLASH_MODEL,
+      );
+    });
+
+    it('should use the model from the investigator settings', async () => {
+      const previewConfig = makeFakeConfig({
+        model: PREVIEW_GEMINI_MODEL,
+        codebaseInvestigatorSettings: {
+          enabled: true,
+          model: DEFAULT_GEMINI_FLASH_LITE_MODEL,
+        },
+      });
+      const previewRegistry = new TestableAgentRegistry(previewConfig);
+
+      await previewRegistry.initialize();
+
+      const investigatorDef = previewRegistry.getDefinition(
+        'codebase_investigator',
+      ) as LocalAgentDefinition;
+      expect(investigatorDef).toBeDefined();
+      expect(investigatorDef?.modelConfig.model).toBe(
+        DEFAULT_GEMINI_FLASH_LITE_MODEL,
      );
    });
  });
--- a/packages/core/src/agents/registry.ts
+++ b/packages/core/src/agents/registry.ts
@@ -10,11 +10,13 @@ import { CodebaseInvestigatorAgent } from './codebase-investigator.js';
 import { type z } from 'zod';
 import { debugLogger } from '../utils/debugLogger.js';
 import {
-  DEFAULT_GEMINI_MODEL_AUTO,
-  GEMINI_MODEL_ALIAS_PRO,
-  PREVIEW_GEMINI_MODEL,
+  DEFAULT_GEMINI_MODEL,
+  GEMINI_MODEL_ALIAS_AUTO,
+  PREVIEW_GEMINI_FLASH_MODEL,
+  isPreviewModel,
 } from '../config/models.js';
 import type { ModelConfigAlias } from '../services/modelConfigService.js';
+import { coreEvents, CoreEvent } from '../utils/events.js';

 /**
 * Returns the model config alias for a given agent definition.
@@ -41,6 +43,10 @@ export class AgentRegistry {
  async initialize(): Promise<void> {
    this.loadBuiltInAgents();

+    coreEvents.on(CoreEvent.ModelChanged, () => {
+      this.loadBuiltInAgents();
+    });
+
    if (this.config.getDebugMode()) {
      debugLogger.log(
        `[AgentRegistry] Initialized with ${this.agents.size} agents.`,
@@ -53,19 +59,17 @@ export class AgentRegistry {

    // Only register the agent if it's enabled in the settings.
    if (investigatorSettings?.enabled) {
-      let model =
-        investigatorSettings.model ??
-        CodebaseInvestigatorAgent.modelConfig.model;
-
-      // If the user is using the preview model for the main agent, force the sub-agent to use it too
-      // if it's configured to use 'pro' or 'auto'.
-      if (this.config.getModel() === PREVIEW_GEMINI_MODEL) {
-        if (
-          model === GEMINI_MODEL_ALIAS_PRO ||
-          model === DEFAULT_GEMINI_MODEL_AUTO
-        ) {
-          model = PREVIEW_GEMINI_MODEL;
-        }
+      let model;
+      const settingsModel = investigatorSettings.model;
+      // Check if the user explicitly set a model in the settings.
+      if (settingsModel && settingsModel !== GEMINI_MODEL_ALIAS_AUTO) {
+        model = settingsModel;
+      } else {
+        // Use Preview Flash model if the main model is any of the preview models
+        // If the main model is not preview model, use default pro model.
+        model = isPreviewModel(this.config.getModel())
+          ? PREVIEW_GEMINI_FLASH_MODEL
+          : DEFAULT_GEMINI_MODEL;
      }

      const agentDef = {
--- a/packages/core/src/availability/policyCatalog.test.ts
+++ b/packages/core/src/availability/policyCatalog.test.ts
@@ -19,7 +19,7 @@ describe('policyCatalog', () => {
  it('returns preview chain when preview enabled', () => {
    const chain = getModelPolicyChain({ previewEnabled: true });
    expect(chain[0]?.model).toBe(PREVIEW_GEMINI_MODEL);
-    expect(chain).toHaveLength(3);
+    expect(chain).toHaveLength(2);
  });

  it('returns default chain when preview disabled', () => {
@@ -31,7 +31,7 @@ describe('policyCatalog', () => {
  it('marks preview transients as sticky retries', () => {
    const [previewPolicy] = getModelPolicyChain({ previewEnabled: true });
    expect(previewPolicy.model).toBe(PREVIEW_GEMINI_MODEL);
-    expect(previewPolicy.stateTransitions.transient).toBe('sticky_retry');
+    expect(previewPolicy.stateTransitions.transient).toBe('terminal');
  });

  it('applies default actions and state transitions for unspecified kinds', () => {
--- a/packages/core/src/availability/policyCatalog.ts
+++ b/packages/core/src/availability/policyCatalog.ts
@@ -13,6 +13,7 @@ import type {
 import {
  DEFAULT_GEMINI_FLASH_MODEL,
  DEFAULT_GEMINI_MODEL,
+  PREVIEW_GEMINI_FLASH_MODEL,
  PREVIEW_GEMINI_MODEL,
 } from '../config/models.js';
 import type { UserTierId } from '../code_assist/types.js';
@@ -48,13 +49,8 @@ const DEFAULT_CHAIN: ModelPolicyChain = [
 ];

 const PREVIEW_CHAIN: ModelPolicyChain = [
-  definePolicy({
-    model: PREVIEW_GEMINI_MODEL,
-    stateTransitions: { transient: 'sticky_retry' },
-    actions: { transient: 'silent' },
-  }),
-  definePolicy({ model: DEFAULT_GEMINI_MODEL }),
-  definePolicy({ model: DEFAULT_GEMINI_FLASH_MODEL, isLastResort: true }),
+  definePolicy({ model: PREVIEW_GEMINI_MODEL }),
+  definePolicy({ model: PREVIEW_GEMINI_FLASH_MODEL, isLastResort: true }),
 ];

 /**
@@ -70,6 +66,10 @@ export function getModelPolicyChain(
  return cloneChain(DEFAULT_CHAIN);
 }

+export function createSingleModelChain(model: string): ModelPolicyChain {
+  return [definePolicy({ model, isLastResort: true })];
+}
+
 /**
 * Provides a default policy scaffold for models not present in the catalog.
 */
--- a/packages/core/src/availability/policyHelpers.test.ts
+++ b/packages/core/src/availability/policyHelpers.test.ts
@@ -12,6 +12,7 @@ import {
 } from './policyHelpers.js';
 import { createDefaultPolicy } from './policyCatalog.js';
 import type { Config } from '../config/config.js';
+import { DEFAULT_GEMINI_MODEL_AUTO } from '../config/models.js';

 const createMockConfig = (overrides: Partial<Config> = {}): Config =>
  ({
@@ -24,7 +25,7 @@ const createMockConfig = (overrides: Partial<Config> = {}): Config =>

 describe('policyHelpers', () => {
  describe('resolvePolicyChain', () => {
-    it('inserts the active model when missing from the catalog', () => {
+    it('returns a single-model chain for a custom model', () => {
      const config = createMockConfig({
        getModel: () => 'custom-model',
      });
@@ -43,7 +44,7 @@ describe('policyHelpers', () => {

    it('returns the default chain when active model is "auto"', () => {
      const config = createMockConfig({
-        getModel: () => 'auto',
+        getModel: () => DEFAULT_GEMINI_MODEL_AUTO,
      });
      const chain = resolvePolicyChain(config);

@@ -52,6 +53,25 @@ describe('policyHelpers', () => {
      expect(chain[0]?.model).toBe('gemini-2.5-pro');
      expect(chain[1]?.model).toBe('gemini-2.5-flash');
    });
+
+    it('starts chain from preferredModel when model is "auto"', () => {
+      const config = createMockConfig({
+        getModel: () => DEFAULT_GEMINI_MODEL_AUTO,
+      });
+      const chain = resolvePolicyChain(config, 'gemini-2.5-flash');
+      expect(chain).toHaveLength(1);
+      expect(chain[0]?.model).toBe('gemini-2.5-flash');
+    });
+
+    it('wraps around the chain when wrapsAround is true', () => {
+      const config = createMockConfig({
+        getModel: () => DEFAULT_GEMINI_MODEL_AUTO,
+      });
+      const chain = resolvePolicyChain(config, 'gemini-2.5-flash', true);
+      expect(chain).toHaveLength(2);
+      expect(chain[0]?.model).toBe('gemini-2.5-flash');
+      expect(chain[1]?.model).toBe('gemini-2.5-pro');
+    });
  });

  describe('buildFallbackPolicyContext', () => {
@@ -63,6 +83,17 @@ describe('policyHelpers', () => {
      ];
      const context = buildFallbackPolicyContext(chain, 'b');
      expect(context.failedPolicy?.model).toBe('b');
+      expect(context.candidates.map((p) => p.model)).toEqual(['c']);
+    });
+
+    it('wraps around when building fallback context if wrapsAround is true', () => {
+      const chain = [
+        createDefaultPolicy('a'),
+        createDefaultPolicy('b'),
+        createDefaultPolicy('c'),
+      ];
+      const context = buildFallbackPolicyContext(chain, 'b', true);
+      expect(context.failedPolicy?.model).toBe('b');
      expect(context.candidates.map((p) => p.model)).toEqual(['c', 'a']);
    });

--- a/packages/core/src/availability/policyHelpers.ts
+++ b/packages/core/src/availability/policyHelpers.ts
@@ -13,8 +13,17 @@ import type {
  ModelPolicyChain,
  RetryAvailabilityContext,
 } from './modelPolicy.js';
-import { createDefaultPolicy, getModelPolicyChain } from './policyCatalog.js';
-import { DEFAULT_GEMINI_MODEL, getEffectiveModel } from '../config/models.js';
+import {
+  createDefaultPolicy,
+  createSingleModelChain,
+  getModelPolicyChain,
+} from './policyCatalog.js';
+import {
+  DEFAULT_GEMINI_MODEL,
+  DEFAULT_GEMINI_MODEL_AUTO,
+  PREVIEW_GEMINI_MODEL_AUTO,
+  resolveModel,
+} from '../config/models.js';
 import type { ModelSelectionResult } from './modelAvailabilityService.js';

 /**
@@ -24,27 +33,34 @@ import type { ModelSelectionResult } from './modelAvailabilityService.js';
 export function resolvePolicyChain(
  config: Config,
  preferredModel?: string,
+  wrapsAround: boolean = false,
 ): ModelPolicyChain {
-  const chain = getModelPolicyChain({
-    previewEnabled: !!config.getPreviewFeatures(),
-    userTier: config.getUserTier(),
-  });
-  // TODO: This will be replaced when we get rid of Fallback Modes.
-  // Switch to getActiveModel()
-  const activeModel =
-    preferredModel ??
-    getEffectiveModel(
-      config.isInFallbackMode(),
-      config.getModel(),
-      config.getPreviewFeatures(),
-    );
+  // Availability uses the active/requested model directly. Legacy fallback logic
+  // (getEffectiveModel) only applies when availability is disabled.
+  const modelFromConfig =
+    preferredModel ?? config.getActiveModel?.() ?? config.getModel();

-  if (activeModel === 'auto') {
-    return [...chain];
+  let chain;
+
+  if (
+    config.getModel() === PREVIEW_GEMINI_MODEL_AUTO ||
+    config.getModel() === DEFAULT_GEMINI_MODEL_AUTO
+  ) {
+    chain = getModelPolicyChain({
+      previewEnabled: config.getModel() === PREVIEW_GEMINI_MODEL_AUTO,
+      userTier: config.getUserTier(),
+    });
+  } else {
+    chain = createSingleModelChain(modelFromConfig);
  }

-  if (chain.some((policy) => policy.model === activeModel)) {
-    return [...chain];
+  const activeModel = resolveModel(modelFromConfig);
+
+  const activeIndex = chain.findIndex((policy) => policy.model === activeModel);
+  if (activeIndex !== -1) {
+    return wrapsAround
+      ? [...chain.slice(activeIndex), ...chain.slice(0, activeIndex)]
+      : [...chain.slice(activeIndex)];
  }

  // If the user specified a model not in the default chain, we assume they want
@@ -55,10 +71,14 @@ export function resolvePolicyChain(
 /**
 * Produces the failed policy (if it exists in the chain) and the list of
 * fallback candidates that follow it.
+ * @param chain - The ordered list of available model policies.
+ * @param failedModel - The identifier of the model that failed.
+ * @param wrapsAround - If true, treats the chain as a circular buffer.
 */
 export function buildFallbackPolicyContext(
  chain: ModelPolicyChain,
  failedModel: string,
+  wrapsAround: boolean = false,
 ): {
  failedPolicy?: ModelPolicy;
  candidates: ModelPolicy[];
@@ -69,9 +89,12 @@ export function buildFallbackPolicyContext(
  }
  // Return [candidates_after, candidates_before] to prioritize downgrades
  // (continuing the chain) before wrapping around to upgrades.
+  const candidates = wrapsAround
+    ? [...chain.slice(index + 1), ...chain.slice(0, index)]
+    : [...chain.slice(index + 1)];
  return {
    failedPolicy: chain[index],
-    candidates: [...chain.slice(index + 1), ...chain.slice(0, index)],
+    candidates,
  };
 }

--- a/packages/core/src/config/config.test.ts
+++ b/packages/core/src/config/config.test.ts
@@ -34,6 +34,11 @@ import { logRipgrepFallback } from '../telemetry/loggers.js';
 import { RipgrepFallbackEvent } from '../telemetry/types.js';
 import { ToolRegistry } from '../tools/tool-registry.js';
 import { DEFAULT_MODEL_CONFIGS } from './defaultModelConfigs.js';
+import {
+  DEFAULT_GEMINI_MODEL,
+  DEFAULT_GEMINI_MODEL_AUTO,
+  PREVIEW_GEMINI_MODEL,
+} from './models.js';

 vi.mock('fs', async (importOriginal) => {
  const actual = await importOriginal<typeof import('fs')>();
@@ -177,7 +182,7 @@ vi.mock('../code_assist/codeAssist.js');
 vi.mock('../code_assist/experiments/experiments.js');

 describe('Server Config (config.ts)', () => {
-  const MODEL = 'gemini-pro';
+  const MODEL = DEFAULT_GEMINI_MODEL;
  const SANDBOX: SandboxConfig = {
    command: 'docker',
    image: 'gemini-cli-sandbox',
@@ -769,6 +774,40 @@ describe('Server Config (config.ts)', () => {
    });
  });

+  describe('UseWriteTodos Configuration', () => {
+    it('should default useWriteTodos to true when not provided', () => {
+      const config = new Config(baseParams);
+      expect(config.getUseWriteTodos()).toBe(true);
+    });
+
+    it('should set useWriteTodos to false when provided as false', () => {
+      const params: ConfigParameters = {
+        ...baseParams,
+        useWriteTodos: false,
+      };
+      const config = new Config(params);
+      expect(config.getUseWriteTodos()).toBe(false);
+    });
+
+    it('should disable useWriteTodos for preview models', () => {
+      const params: ConfigParameters = {
+        ...baseParams,
+        model: 'gemini-3-pro-preview',
+      };
+      const config = new Config(params);
+      expect(config.getUseWriteTodos()).toBe(false);
+    });
+
+    it('should NOT disable useWriteTodos for non-preview models', () => {
+      const params: ConfigParameters = {
+        ...baseParams,
+        model: 'gemini-2.5-pro',
+      };
+      const config = new Config(params);
+      expect(config.getUseWriteTodos()).toBe(true);
+    });
+  });
+
  describe('Shell Tool Inactivity Timeout', () => {
    it('should default to 300000ms (300 seconds) when not provided', () => {
      const config = new Config(baseParams);
@@ -1703,18 +1742,16 @@ describe('Availability Service Integration', () => {
    cwd: '.',
  };

-  it('setActiveModel updates active model and emits event', async () => {
+  it('setActiveModel updates active model', async () => {
    const config = new Config(baseParams);
    const model1 = 'model1';
    const model2 = 'model2';

    config.setActiveModel(model1);
    expect(config.getActiveModel()).toBe(model1);
-    expect(mockCoreEvents.emitModelChanged).toHaveBeenCalledWith(model1);

    config.setActiveModel(model2);
    expect(config.getActiveModel()).toBe(model2);
-    expect(mockCoreEvents.emitModelChanged).toHaveBeenCalledWith(model2);
  });

  it('getActiveModel defaults to configured model if not set', () => {
@@ -1731,3 +1768,107 @@ describe('Availability Service Integration', () => {
    expect(spy).toHaveBeenCalled();
  });
 });
+
+describe('Config Quota & Preview Model Access', () => {
+  let config: Config;
+  // eslint-disable-next-line @typescript-eslint/no-explicit-any
+  let mockCodeAssistServer: any;
+
+  const baseParams: ConfigParameters = {
+    cwd: '/tmp',
+    targetDir: '/tmp',
+    debugMode: false,
+    sessionId: 'test-session',
+    model: 'gemini-pro',
+    usageStatisticsEnabled: false,
+    embeddingModel: 'gemini-embedding', // required in type but not in the original file I copied, adding here
+    sandbox: {
+      command: 'docker',
+      image: 'gemini-cli-sandbox',
+    },
+  };
+
+  beforeEach(() => {
+    vi.clearAllMocks();
+    mockCodeAssistServer = {
+      projectId: 'test-project',
+      retrieveUserQuota: vi.fn(),
+    };
+    vi.mocked(getCodeAssistServer).mockReturnValue(mockCodeAssistServer);
+    config = new Config(baseParams);
+  });
+
+  describe('refreshUserQuota', () => {
+    it('should update hasAccessToPreviewModel to true if quota includes preview model', async () => {
+      mockCodeAssistServer.retrieveUserQuota.mockResolvedValue({
+        buckets: [{ modelId: PREVIEW_GEMINI_MODEL }],
+      });
+
+      await config.refreshUserQuota();
+      expect(config.getHasAccessToPreviewModel()).toBe(true);
+    });
+
+    it('should update hasAccessToPreviewModel to false if quota does not include preview model', async () => {
+      mockCodeAssistServer.retrieveUserQuota.mockResolvedValue({
+        buckets: [{ modelId: 'some-other-model' }],
+      });
+
+      await config.refreshUserQuota();
+      expect(config.getHasAccessToPreviewModel()).toBe(false);
+    });
+
+    it('should update hasAccessToPreviewModel to false if buckets are undefined', async () => {
+      mockCodeAssistServer.retrieveUserQuota.mockResolvedValue({});
+
+      await config.refreshUserQuota();
+      expect(config.getHasAccessToPreviewModel()).toBe(false);
+    });
+
+    it('should return undefined and not update if codeAssistServer is missing', async () => {
+      vi.mocked(getCodeAssistServer).mockReturnValue(undefined);
+      const result = await config.refreshUserQuota();
+      expect(result).toBeUndefined();
+      expect(config.getHasAccessToPreviewModel()).toBe(false);
+    });
+
+    it('should return undefined if retrieveUserQuota fails', async () => {
+      mockCodeAssistServer.retrieveUserQuota.mockRejectedValue(
+        new Error('Network error'),
+      );
+      const result = await config.refreshUserQuota();
+      expect(result).toBeUndefined();
+      // Should remain default (false)
+      expect(config.getHasAccessToPreviewModel()).toBe(false);
+    });
+  });
+
+  describe('setPreviewFeatures', () => {
+    it('should reset model to default auto if disabling preview features while using a preview model', () => {
+      config.setPreviewFeatures(true);
+      config.setModel(PREVIEW_GEMINI_MODEL);
+
+      config.setPreviewFeatures(false);
+
+      expect(config.getModel()).toBe(DEFAULT_GEMINI_MODEL_AUTO);
+    });
+
+    it('should NOT reset model if disabling preview features while NOT using a preview model', () => {
+      config.setPreviewFeatures(true);
+      const nonPreviewModel = 'gemini-1.5-pro';
+      config.setModel(nonPreviewModel);
+
+      config.setPreviewFeatures(false);
+
+      expect(config.getModel()).toBe(nonPreviewModel);
+    });
+
+    it('should NOT reset model if enabling preview features', () => {
+      config.setPreviewFeatures(false);
+      config.setModel(PREVIEW_GEMINI_MODEL); // Just pretending it was set somehow
+
+      config.setPreviewFeatures(true);
+
+      expect(config.getModel()).toBe(PREVIEW_GEMINI_MODEL);
+    });
+  });
+});
--- a/packages/core/src/config/config.ts
+++ b/packages/core/src/config/config.ts
@@ -48,7 +48,10 @@ import { tokenLimit } from '../core/tokenLimits.js';
 import {
  DEFAULT_GEMINI_EMBEDDING_MODEL,
  DEFAULT_GEMINI_FLASH_MODEL,
+  DEFAULT_GEMINI_MODEL_AUTO,
  DEFAULT_THINKING_MODE,
+  isPreviewModel,
+  PREVIEW_GEMINI_MODEL,
 } from './models.js';
 import { shouldAttemptBrowserLaunch } from '../utils/browser.js';
 import type { MCPOAuthConfig } from '../mcp/oauth-provider.js';
@@ -80,6 +83,7 @@ import { PolicyEngine } from '../policy/policy-engine.js';
 import type { PolicyEngineConfig } from '../policy/types.js';
 import { HookSystem } from '../hooks/index.js';
 import type { UserTierId } from '../code_assist/types.js';
+import type { RetrieveUserQuotaResponse } from '../code_assist/types.js';
 import { getCodeAssistServer } from '../code_assist/codeAssist.js';
 import type { Experiments } from '../code_assist/experiments/experiments.js';
 import { AgentRegistry } from '../agents/registry.js';
@@ -379,6 +383,7 @@ export class Config {
  private readonly bugCommand: BugCommandSettings | undefined;
  private model: string;
  private previewFeatures: boolean | undefined;
+  private hasAccessToPreviewModel: boolean = false;
  private readonly noBrowser: boolean;
  private readonly folderTrust: boolean;
  private ideMode: boolean;
@@ -508,8 +513,7 @@ export class Config {
    this.bugCommand = params.bugCommand;
    this.model = params.model;
    this._activeModel = params.model;
-    this.enableModelAvailabilityService =
-      params.enableModelAvailabilityService ?? false;
+    this.enableModelAvailabilityService = true;
    this.enableAgents = params.enableAgents ?? false;
    this.experimentalJitContext = params.experimentalJitContext ?? false;
    this.modelAvailabilityService = new ModelAvailabilityService();
@@ -551,7 +555,10 @@ export class Config {
      params.truncateToolOutputLines ?? DEFAULT_TRUNCATE_TOOL_OUTPUT_LINES;
    this.enableToolOutputTruncation = params.enableToolOutputTruncation ?? true;
    this.useSmartEdit = params.useSmartEdit ?? true;
-    this.useWriteTodos = params.useWriteTodos ?? true;
+    // // TODO(joshualitt): Re-evaluate the todo tool for 3 family.
+    this.useWriteTodos = isPreviewModel(this.model)
+      ? false
+      : (params.useWriteTodos ?? true);
    this.enableHooks = params.enableHooks ?? false;
    this.disabledHooks =
      (params.hooks && 'disabled' in params.hooks
@@ -716,6 +723,9 @@ export class Config {
      this.geminiClient.stripThoughtsFromHistory();
    }

+    // Reset availability status when switching auth (e.g. from limited key to OAuth)
+    this.modelAvailabilityService.reset();
+
    const newContentGeneratorConfig = await createContentGeneratorConfig(
      this,
      authMethod,
@@ -735,6 +745,10 @@ export class Config {

    const codeAssistServer = getCodeAssistServer(this);
    if (codeAssistServer) {
+      if (codeAssistServer.projectId) {
+        await this.refreshUserQuota();
+      }
+
      this.experimentsPromise = getExperiments(codeAssistServer)
        .then((experiments) => {
          this.setExperiments(experiments);
@@ -756,8 +770,21 @@ export class Config {
      this.experimentsPromise = undefined;
    }

+    const authType = this.contentGeneratorConfig.authType;
+    if (
+      authType === AuthType.USE_GEMINI ||
+      authType === AuthType.USE_VERTEX_AI
+    ) {
+      this.setHasAccessToPreviewModel(true);
+    }
+
    // Reset the session flag since we're explicitly changing auth and using default model
    this.inFallbackMode = false;
+
+    // Update model if user no longer has access to the preview model
+    if (!this.hasAccessToPreviewModel && isPreviewModel(this.model)) {
+      this.setModel(DEFAULT_GEMINI_MODEL_AUTO);
+    }
  }

  async getExperimentsAsync(): Promise<Experiments | undefined> {
@@ -841,7 +868,6 @@ export class Config {
  setActiveModel(model: string): void {
    if (this._activeModel !== model) {
      this._activeModel = model;
-      coreEvents.emitModelChanged(model);
    }
  }

@@ -952,9 +978,43 @@ export class Config {
  }

  setPreviewFeatures(previewFeatures: boolean) {
+    // If it's using a preview model and it's turning off previewFeatures,
+    // switch the model to the default auto mode.
+    if (this.previewFeatures && !previewFeatures) {
+      if (isPreviewModel(this.getModel())) {
+        this.setModel(DEFAULT_GEMINI_MODEL_AUTO);
+      }
+    }
    this.previewFeatures = previewFeatures;
  }

+  getHasAccessToPreviewModel(): boolean {
+    return this.hasAccessToPreviewModel;
+  }
+
+  setHasAccessToPreviewModel(hasAccess: boolean): void {
+    this.hasAccessToPreviewModel = hasAccess;
+  }
+
+  async refreshUserQuota(): Promise<RetrieveUserQuotaResponse | undefined> {
+    const codeAssistServer = getCodeAssistServer(this);
+    if (!codeAssistServer || !codeAssistServer.projectId) {
+      return undefined;
+    }
+    try {
+      const quota = await codeAssistServer.retrieveUserQuota({
+        project: codeAssistServer.projectId,
+      });
+      const hasAccess =
+        quota.buckets?.some((b) => b.modelId === PREVIEW_GEMINI_MODEL) ?? false;
+      this.setHasAccessToPreviewModel(hasAccess);
+      return quota;
+    } catch (e) {
+      debugLogger.debug('Failed to retrieve user quota', e);
+      return undefined;
+    }
+  }
+
  getCoreTools(): string[] | undefined {
    return this.coreTools;
  }
--- a/packages/core/src/config/defaultModelConfigs.ts
+++ b/packages/core/src/config/defaultModelConfigs.ts
@@ -65,6 +65,12 @@ export const DEFAULT_MODEL_CONFIGS: ModelConfigServiceConfig = {
        model: 'gemini-3-pro-preview',
      },
    },
+    'gemini-3-flash-preview': {
+      extends: 'chat-base-3',
+      modelConfig: {
+        model: 'gemini-3-flash-preview',
+      },
+    },
    'gemini-2.5-pro': {
      extends: 'chat-base-2.5',
      modelConfig: {
@@ -188,6 +194,11 @@ export const DEFAULT_MODEL_CONFIGS: ModelConfigServiceConfig = {
        model: 'gemini-3-pro-preview',
      },
    },
+    'chat-compression-3-flash': {
+      modelConfig: {
+        model: 'gemini-3-flash-preview',
+      },
+    },
    'chat-compression-2.5-pro': {
      modelConfig: {
        model: 'gemini-2.5-pro',
--- a/packages/core/src/config/models.test.ts
+++ b/packages/core/src/config/models.test.ts
@@ -7,14 +7,19 @@
 import { describe, it, expect } from 'vitest';
 import {
  getEffectiveModel,
+  resolveClassifierModel,
+  isGemini2Model,
  DEFAULT_GEMINI_MODEL,
  PREVIEW_GEMINI_MODEL,
  DEFAULT_GEMINI_FLASH_MODEL,
  DEFAULT_GEMINI_FLASH_LITE_MODEL,
+  supportsMultimodalFunctionResponse,
  GEMINI_MODEL_ALIAS_PRO,
  GEMINI_MODEL_ALIAS_FLASH,
  GEMINI_MODEL_ALIAS_FLASH_LITE,
-  supportsMultimodalFunctionResponse,
+  PREVIEW_GEMINI_FLASH_MODEL,
+  PREVIEW_GEMINI_MODEL_AUTO,
+  DEFAULT_GEMINI_MODEL_AUTO,
 } from './models.js';

 describe('supportsMultimodalFunctionResponse', () => {
@@ -34,210 +39,136 @@ describe('supportsMultimodalFunctionResponse', () => {
 });

 describe('getEffectiveModel', () => {
-  describe('When NOT in fallback mode', () => {
-    const isInFallbackMode = false;
+  describe('delegation to resolveModel', () => {
+    it('should return the Preview Pro model when auto-gemini-3 is requested', () => {
+      const model = getEffectiveModel(PREVIEW_GEMINI_MODEL_AUTO, false);
+      expect(model).toBe(PREVIEW_GEMINI_MODEL);
+    });

-    it('should return the Pro model when Pro is requested', () => {
-      const model = getEffectiveModel(
-        isInFallbackMode,
-        DEFAULT_GEMINI_MODEL,
-        false,
-      );
+    it('should return the Default Pro model when auto-gemini-2.5 is requested', () => {
+      const model = getEffectiveModel(DEFAULT_GEMINI_MODEL_AUTO, false);
      expect(model).toBe(DEFAULT_GEMINI_MODEL);
    });

-    it('should return the Flash model when Flash is requested', () => {
-      const model = getEffectiveModel(
-        isInFallbackMode,
+    it('should return the requested model as-is for explicit specific models', () => {
+      expect(getEffectiveModel(DEFAULT_GEMINI_MODEL, false)).toBe(
+        DEFAULT_GEMINI_MODEL,
+      );
+      expect(getEffectiveModel(DEFAULT_GEMINI_FLASH_MODEL, false)).toBe(
        DEFAULT_GEMINI_FLASH_MODEL,
-        false,
      );
-      expect(model).toBe(DEFAULT_GEMINI_FLASH_MODEL);
-    });
-
-    it('should return the Lite model when Lite is requested', () => {
-      const model = getEffectiveModel(
-        isInFallbackMode,
+      expect(getEffectiveModel(DEFAULT_GEMINI_FLASH_LITE_MODEL, false)).toBe(
        DEFAULT_GEMINI_FLASH_LITE_MODEL,
-        false,
      );
-      expect(model).toBe(DEFAULT_GEMINI_FLASH_LITE_MODEL);
    });

    it('should return a custom model name when requested', () => {
      const customModel = 'custom-model-v1';
-      const model = getEffectiveModel(isInFallbackMode, customModel, false);
+      const model = getEffectiveModel(customModel, false);
      expect(model).toBe(customModel);
    });

    describe('with preview features', () => {
      it('should return the preview model when pro alias is requested', () => {
-        const model = getEffectiveModel(
-          isInFallbackMode,
-          GEMINI_MODEL_ALIAS_PRO,
-          true,
-        );
+        const model = getEffectiveModel(GEMINI_MODEL_ALIAS_PRO, true);
        expect(model).toBe(PREVIEW_GEMINI_MODEL);
      });

      it('should return the default pro model when pro alias is requested and preview is off', () => {
-        const model = getEffectiveModel(
-          isInFallbackMode,
-          GEMINI_MODEL_ALIAS_PRO,
-          false,
-        );
+        const model = getEffectiveModel(GEMINI_MODEL_ALIAS_PRO, false);
        expect(model).toBe(DEFAULT_GEMINI_MODEL);
      });

      it('should return the flash model when flash is requested and preview is on', () => {
-        const model = getEffectiveModel(
-          isInFallbackMode,
-          GEMINI_MODEL_ALIAS_FLASH,
-          true,
-        );
-        expect(model).toBe(DEFAULT_GEMINI_FLASH_MODEL);
+        const model = getEffectiveModel(GEMINI_MODEL_ALIAS_FLASH, true);
+        expect(model).toBe(PREVIEW_GEMINI_FLASH_MODEL);
      });

      it('should return the flash model when lite is requested and preview is on', () => {
-        const model = getEffectiveModel(
-          isInFallbackMode,
-          GEMINI_MODEL_ALIAS_FLASH_LITE,
-          true,
-        );
+        const model = getEffectiveModel(GEMINI_MODEL_ALIAS_FLASH_LITE, true);
        expect(model).toBe(DEFAULT_GEMINI_FLASH_LITE_MODEL);
      });

      it('should return the flash model when the flash model name is explicitly requested and preview is on', () => {
-        const model = getEffectiveModel(
-          isInFallbackMode,
-          DEFAULT_GEMINI_FLASH_MODEL,
-          true,
-        );
+        const model = getEffectiveModel(DEFAULT_GEMINI_FLASH_MODEL, true);
        expect(model).toBe(DEFAULT_GEMINI_FLASH_MODEL);
      });

      it('should return the lite model when the lite model name is requested and preview is on', () => {
-        const model = getEffectiveModel(
-          isInFallbackMode,
-          DEFAULT_GEMINI_FLASH_LITE_MODEL,
-          true,
-        );
+        const model = getEffectiveModel(DEFAULT_GEMINI_FLASH_LITE_MODEL, true);
        expect(model).toBe(DEFAULT_GEMINI_FLASH_LITE_MODEL);
      });

      it('should return the default gemini model when the model is explicitly set and preview is on', () => {
-        const model = getEffectiveModel(
-          isInFallbackMode,
-          DEFAULT_GEMINI_MODEL,
-          true,
-        );
+        const model = getEffectiveModel(DEFAULT_GEMINI_MODEL, true);
        expect(model).toBe(DEFAULT_GEMINI_MODEL);
      });
    });
  });
+});

-  describe('When IN fallback mode', () => {
-    const isInFallbackMode = true;
+describe('isGemini2Model', () => {
+  it('should return true for gemini-2.5-pro', () => {
+    expect(isGemini2Model('gemini-2.5-pro')).toBe(true);
+  });

-    it('should downgrade the Pro model to the Flash model', () => {
-      const model = getEffectiveModel(
-        isInFallbackMode,
-        DEFAULT_GEMINI_MODEL,
-        false,
-      );
-      expect(model).toBe(DEFAULT_GEMINI_FLASH_MODEL);
-    });
+  it('should return true for gemini-2.5-flash', () => {
+    expect(isGemini2Model('gemini-2.5-flash')).toBe(true);
+  });

-    it('should return the Flash model when Flash is requested', () => {
-      const model = getEffectiveModel(
-        isInFallbackMode,
-        DEFAULT_GEMINI_FLASH_MODEL,
-        false,
-      );
-      expect(model).toBe(DEFAULT_GEMINI_FLASH_MODEL);
-    });
+  it('should return true for gemini-2.0-flash', () => {
+    expect(isGemini2Model('gemini-2.0-flash')).toBe(true);
+  });

-    it('should HONOR the Lite model when Lite is requested', () => {
-      const model = getEffectiveModel(
-        isInFallbackMode,
-        DEFAULT_GEMINI_FLASH_LITE_MODEL,
-        false,
-      );
-      expect(model).toBe(DEFAULT_GEMINI_FLASH_LITE_MODEL);
-    });
+  it('should return false for gemini-1.5-pro', () => {
+    expect(isGemini2Model('gemini-1.5-pro')).toBe(false);
+  });

-    it('should HONOR any model with "lite" in its name', () => {
-      const customLiteModel = 'gemini-2.5-custom-lite-vNext';
-      const model = getEffectiveModel(isInFallbackMode, customLiteModel, false);
-      expect(model).toBe(customLiteModel);
-    });
+  it('should return false for gemini-3-pro', () => {
+    expect(isGemini2Model('gemini-3-pro')).toBe(false);
+  });

-    it('should downgrade any other custom model to the Flash model', () => {
-      const customModel = 'custom-model-v1-unlisted';
-      const model = getEffectiveModel(isInFallbackMode, customModel, false);
-      expect(model).toBe(DEFAULT_GEMINI_FLASH_MODEL);
-    });
-
-    describe('with preview features', () => {
-      it('should downgrade the Pro alias to the Flash model', () => {
-        const model = getEffectiveModel(
-          isInFallbackMode,
-          GEMINI_MODEL_ALIAS_PRO,
-          true,
-        );
-        expect(model).toBe(DEFAULT_GEMINI_FLASH_MODEL);
-      });
-
-      it('should return the Flash alias when requested', () => {
-        const model = getEffectiveModel(
-          isInFallbackMode,
-          GEMINI_MODEL_ALIAS_FLASH,
-          true,
-        );
-        expect(model).toBe(DEFAULT_GEMINI_FLASH_MODEL);
-      });
-
-      it('should return the Lite alias when requested', () => {
-        const model = getEffectiveModel(
-          isInFallbackMode,
-          GEMINI_MODEL_ALIAS_FLASH_LITE,
-          true,
-        );
-        expect(model).toBe(DEFAULT_GEMINI_FLASH_LITE_MODEL);
-      });
-
-      it('should downgrade the default Gemini model to the Flash model', () => {
-        const model = getEffectiveModel(
-          isInFallbackMode,
-          DEFAULT_GEMINI_MODEL,
-          true,
-        );
-        expect(model).toBe(DEFAULT_GEMINI_FLASH_MODEL);
-      });
-
-      it('should return the default Flash model when requested', () => {
-        const model = getEffectiveModel(
-          isInFallbackMode,
-          DEFAULT_GEMINI_FLASH_MODEL,
-          true,
-        );
-        expect(model).toBe(DEFAULT_GEMINI_FLASH_MODEL);
-      });
-
-      it('should return the default Lite model when requested', () => {
-        const model = getEffectiveModel(
-          isInFallbackMode,
-          DEFAULT_GEMINI_FLASH_LITE_MODEL,
-          true,
-        );
-        expect(model).toBe(DEFAULT_GEMINI_FLASH_LITE_MODEL);
-      });
-
-      it('should downgrade any other custom model to the Flash model', () => {
-        const customModel = 'custom-model-v1-unlisted';
-        const model = getEffectiveModel(isInFallbackMode, customModel, true);
-        expect(model).toBe(DEFAULT_GEMINI_FLASH_MODEL);
-      });
-    });
+  it('should return false for arbitrary strings', () => {
+    expect(isGemini2Model('gpt-4')).toBe(false);
+  });
+});
+
+describe('resolveClassifierModel', () => {
+  it('should return flash model when alias is flash', () => {
+    expect(
+      resolveClassifierModel(
+        DEFAULT_GEMINI_MODEL_AUTO,
+        GEMINI_MODEL_ALIAS_FLASH,
+      ),
+    ).toBe(DEFAULT_GEMINI_FLASH_MODEL);
+    expect(
+      resolveClassifierModel(
+        PREVIEW_GEMINI_MODEL_AUTO,
+        GEMINI_MODEL_ALIAS_FLASH,
+      ),
+    ).toBe(PREVIEW_GEMINI_FLASH_MODEL);
+  });
+
+  it('should return pro model when alias is pro', () => {
+    expect(
+      resolveClassifierModel(DEFAULT_GEMINI_MODEL_AUTO, GEMINI_MODEL_ALIAS_PRO),
+    ).toBe(DEFAULT_GEMINI_MODEL);
+    expect(
+      resolveClassifierModel(PREVIEW_GEMINI_MODEL_AUTO, GEMINI_MODEL_ALIAS_PRO),
+    ).toBe(PREVIEW_GEMINI_MODEL);
+  });
+
+  it('should handle preview features being enabled', () => {
+    // If preview is enabled, resolving 'flash' without context (fallback) might switch to preview flash,
+    // but here we test explicit auto models which should stick to their families if possible?
+    // Actually our logic forces DEFAULT_GEMINI_FLASH_MODEL for DEFAULT_GEMINI_MODEL_AUTO even if preview is on,
+    // because the USER requested 2.5 explicitly via "auto-gemini-2.5".
+    expect(
+      resolveClassifierModel(
+        DEFAULT_GEMINI_MODEL_AUTO,
+        GEMINI_MODEL_ALIAS_FLASH,
+        true,
+      ),
+    ).toBe(DEFAULT_GEMINI_FLASH_MODEL);
  });
 });
--- a/packages/core/src/config/models.ts
+++ b/packages/core/src/config/models.ts
@@ -5,20 +5,24 @@
 */

 export const PREVIEW_GEMINI_MODEL = 'gemini-3-pro-preview';
+export const PREVIEW_GEMINI_FLASH_MODEL = 'gemini-3-flash-preview';
 export const DEFAULT_GEMINI_MODEL = 'gemini-2.5-pro';
 export const DEFAULT_GEMINI_FLASH_MODEL = 'gemini-2.5-flash';
 export const DEFAULT_GEMINI_FLASH_LITE_MODEL = 'gemini-2.5-flash-lite';

 export const VALID_GEMINI_MODELS = new Set([
  PREVIEW_GEMINI_MODEL,
+  PREVIEW_GEMINI_FLASH_MODEL,
  DEFAULT_GEMINI_MODEL,
  DEFAULT_GEMINI_FLASH_MODEL,
  DEFAULT_GEMINI_FLASH_LITE_MODEL,
 ]);

-export const DEFAULT_GEMINI_MODEL_AUTO = 'auto';
+export const PREVIEW_GEMINI_MODEL_AUTO = 'auto-gemini-3';
+export const DEFAULT_GEMINI_MODEL_AUTO = 'auto-gemini-2.5';

 // Model aliases for user convenience.
+export const GEMINI_MODEL_ALIAS_AUTO = 'auto';
 export const GEMINI_MODEL_ALIAS_PRO = 'pro';
 export const GEMINI_MODEL_ALIAS_FLASH = 'flash';
 export const GEMINI_MODEL_ALIAS_FLASH_LITE = 'flash-lite';
@@ -38,17 +42,24 @@ export const DEFAULT_THINKING_MODE = 8192;
 */
 export function resolveModel(
  requestedModel: string,
-  previewFeaturesEnabled: boolean | undefined,
+  previewFeaturesEnabled: boolean = false,
 ): string {
  switch (requestedModel) {
-    case DEFAULT_GEMINI_MODEL_AUTO:
+    case PREVIEW_GEMINI_MODEL_AUTO: {
+      return PREVIEW_GEMINI_MODEL;
+    }
+    case DEFAULT_GEMINI_MODEL_AUTO: {
+      return DEFAULT_GEMINI_MODEL;
+    }
    case GEMINI_MODEL_ALIAS_PRO: {
      return previewFeaturesEnabled
        ? PREVIEW_GEMINI_MODEL
        : DEFAULT_GEMINI_MODEL;
    }
    case GEMINI_MODEL_ALIAS_FLASH: {
-      return DEFAULT_GEMINI_FLASH_MODEL;
+      return previewFeaturesEnabled
+        ? PREVIEW_GEMINI_FLASH_MODEL
+        : DEFAULT_GEMINI_FLASH_MODEL;
    }
    case GEMINI_MODEL_ALIAS_FLASH_LITE: {
      return DEFAULT_GEMINI_FLASH_LITE_MODEL;
@@ -60,39 +71,86 @@ export function resolveModel(
 }

 /**
- * Determines the effective model to use, applying fallback logic if necessary.
+ * Resolves the appropriate model based on the classifier's decision.
 *
- * When fallback mode is active, this function enforces the use of the standard
- * fallback model. However, it makes an exception for "lite" models (any model
- * with "lite" in its name), allowing them to be used to preserve cost savings.
- * This ensures that "pro" models are always downgraded, while "lite" model
- * requests are honored.
+ * @param requestedModel The current requested model (e.g. auto-gemini-2.5).
+ * @param modelAlias The alias selected by the classifier ('flash' or 'pro').
+ * @param previewFeaturesEnabled Whether preview features are enabled.
+ * @returns The resolved concrete model name.
+ */
+export function resolveClassifierModel(
+  requestedModel: string,
+  modelAlias: string,
+  previewFeaturesEnabled: boolean = false,
+): string {
+  if (modelAlias === GEMINI_MODEL_ALIAS_FLASH) {
+    if (
+      requestedModel === DEFAULT_GEMINI_MODEL_AUTO ||
+      requestedModel === DEFAULT_GEMINI_MODEL
+    ) {
+      return DEFAULT_GEMINI_FLASH_MODEL;
+    }
+    if (
+      requestedModel === PREVIEW_GEMINI_MODEL_AUTO ||
+      requestedModel === PREVIEW_GEMINI_MODEL
+    ) {
+      return PREVIEW_GEMINI_FLASH_MODEL;
+    }
+    return resolveModel(GEMINI_MODEL_ALIAS_FLASH, previewFeaturesEnabled);
+  }
+  return resolveModel(requestedModel, previewFeaturesEnabled);
+}
+
+/**
+ * Determines the effective model to use.
 *
- * @param isInFallbackMode Whether the application is in fallback mode.
 * @param requestedModel The model that was originally requested.
 * @param previewFeaturesEnabled A boolean indicating if preview features are enabled.
 * @returns The effective model name.
 */
 export function getEffectiveModel(
-  isInFallbackMode: boolean,
  requestedModel: string,
  previewFeaturesEnabled: boolean | undefined,
 ): string {
-  const resolvedModel = resolveModel(requestedModel, previewFeaturesEnabled);
+  return resolveModel(requestedModel, previewFeaturesEnabled);
+}

-  // If we are not in fallback mode, simply use the resolved model.
-  if (!isInFallbackMode) {
-    return resolvedModel;
+export function getDisplayString(
+  model: string,
+  previewFeaturesEnabled: boolean = false,
+) {
+  switch (model) {
+    case PREVIEW_GEMINI_MODEL_AUTO:
+      return 'Auto (Gemini 3)';
+    case DEFAULT_GEMINI_MODEL_AUTO:
+      return 'Auto (Gemini 2.5)';
+    case GEMINI_MODEL_ALIAS_PRO:
+      return `Manual (${
+        previewFeaturesEnabled ? PREVIEW_GEMINI_MODEL : DEFAULT_GEMINI_MODEL
+      })`;
+    case GEMINI_MODEL_ALIAS_FLASH:
+      return `Manual (${
+        previewFeaturesEnabled
+          ? PREVIEW_GEMINI_FLASH_MODEL
+          : DEFAULT_GEMINI_FLASH_MODEL
+      })`;
+    default:
+      return `Manual (${model})`;
  }
+}

-  // If a "lite" model is requested, honor it. This allows for variations of
-  // lite models without needing to list them all as constants.
-  if (resolvedModel.includes('lite')) {
-    return resolvedModel;
-  }
-
-  // Default fallback for Gemini CLI.
-  return DEFAULT_GEMINI_FLASH_MODEL;
+/**
+ * Checks if the model is a preview model.
+ *
+ * @param model The model name to check.
+ * @returns True if the model is a preview model.
+ */
+export function isPreviewModel(model: string): boolean {
+  return (
+    model === PREVIEW_GEMINI_MODEL ||
+    model === PREVIEW_GEMINI_FLASH_MODEL ||
+    model === PREVIEW_GEMINI_MODEL_AUTO
+  );
 }

 /**
--- a/packages/core/src/core/snapshots/prompts.test.ts.snap
+++ b/packages/core/src/core/snapshots/prompts.test.ts.snap
@@ -21,11 +21,10 @@ Mock Agent Directory

 ## Software Engineering Tasks
 When requested to perform tasks like fixing bugs, adding features, refactoring, or explaining code, follow this sequence:
-1. **Understand:** Think about the user's request and the relevant codebase context. Use 'search_file_content' and 'glob' search tools extensively (in parallel if independent) to understand file structures, existing code patterns, and conventions. 
+1. **Understand:** Think about the user's request and the relevant codebase context. Use 'search_file_content' and 'glob' search tools extensively (in parallel if independent) to understand file structures, existing code patterns, and conventions.
 Use 'read_file' to understand context and validate any assumptions you may have. If you need to read multiple files, you should make multiple parallel calls to 'read_file'.
 2. **Plan:** Build a coherent and grounded (based on the understanding in step 1) plan for how you intend to resolve the user's task. Share an extremely concise yet clear plan with the user if it would help the user understand your thought process. As part of the plan, you should use an iterative development process that includes writing unit tests to verify your changes. Use output logs or debug statements as part of this process to arrive at a solution.
-3. **Implement:** Use the available tools (e.g., 'replace', 'write_file' 'run_shell_command' ...) to act on the plan, strictly adhering to the project's established conventions (detailed under 'Core 
-Mandates').
+3. **Implement:** Use the available tools (e.g., 'replace', 'write_file' 'run_shell_command' ...) to act on the plan, strictly adhering to the project's established conventions (detailed under 'Core Mandates').
 4. **Verify (Tests):** If applicable and feasible, verify the changes using the project's testing procedures. Identify the correct test commands and frameworks by examining 'README' files, build/package configuration (e.g., 'package.json'), or existing test execution patterns. NEVER assume standard test commands.
 5. **Verify (Standards):** VERY IMPORTANT: After making code changes, execute the project-specific build, linting and type-checking commands (e.g., 'tsc', 'npm run lint', 'ruff check .') that you have identified for this project (or obtained from the user). This ensures code quality and adherence to standards. If unsure about these commands, you can ask the user if they'd like you to run them and if so how to.
 6. **Finalize:** After all verification passes, consider the task complete. Do not remove or revert any changes or created files (like tests). Await the user's next instruction.
@@ -125,11 +124,10 @@ Mock Agent Directory

 ## Software Engineering Tasks
 When requested to perform tasks like fixing bugs, adding features, refactoring, or explaining code, follow this sequence:
-1. **Understand:** Think about the user's request and the relevant codebase context. Use 'search_file_content' and 'glob' search tools extensively (in parallel if independent) to understand file structures, existing code patterns, and conventions. 
+1. **Understand:** Think about the user's request and the relevant codebase context. Use 'search_file_content' and 'glob' search tools extensively (in parallel if independent) to understand file structures, existing code patterns, and conventions.
 Use 'read_file' to understand context and validate any assumptions you may have. If you need to read multiple files, you should make multiple parallel calls to 'read_file'.
 2. **Plan:** Build a coherent and grounded (based on the understanding in step 1) plan for how you intend to resolve the user's task. Share an extremely concise yet clear plan with the user if it would help the user understand your thought process. As part of the plan, you should use an iterative development process that includes writing unit tests to verify your changes. Use output logs or debug statements as part of this process to arrive at a solution.
-3. **Implement:** Use the available tools (e.g., 'replace', 'write_file' 'run_shell_command' ...) to act on the plan, strictly adhering to the project's established conventions (detailed under 'Core 
-Mandates').
+3. **Implement:** Use the available tools (e.g., 'replace', 'write_file' 'run_shell_command' ...) to act on the plan, strictly adhering to the project's established conventions (detailed under 'Core Mandates').
 4. **Verify (Tests):** If applicable and feasible, verify the changes using the project's testing procedures. Identify the correct test commands and frameworks by examining 'README' files, build/package configuration (e.g., 'package.json'), or existing test execution patterns. NEVER assume standard test commands.
 5. **Verify (Standards):** VERY IMPORTANT: After making code changes, execute the project-specific build, linting and type-checking commands (e.g., 'tsc', 'npm run lint', 'ruff check .') that you have identified for this project (or obtained from the user). This ensures code quality and adherence to standards. If unsure about these commands, you can ask the user if they'd like you to run them and if so how to.
 6. **Finalize:** After all verification passes, consider the task complete. Do not remove or revert any changes or created files (like tests). Await the user's next instruction.
@@ -224,11 +222,10 @@ Mock Agent Directory

 ## Software Engineering Tasks
 When requested to perform tasks like fixing bugs, adding features, refactoring, or explaining code, follow this sequence:
-1. **Understand:** Think about the user's request and the relevant codebase context. Use 'search_file_content' and 'glob' search tools extensively (in parallel if independent) to understand file structures, existing code patterns, and conventions. 
+1. **Understand:** Think about the user's request and the relevant codebase context. Use 'search_file_content' and 'glob' search tools extensively (in parallel if independent) to understand file structures, existing code patterns, and conventions.
 Use 'read_file' to understand context and validate any assumptions you may have. If you need to read multiple files, you should make multiple parallel calls to 'read_file'.
 2. **Plan:** Build a coherent and grounded (based on the understanding in step 1) plan for how you intend to resolve the user's task. Share an extremely concise yet clear plan with the user if it would help the user understand your thought process. As part of the plan, you should use an iterative development process that includes writing unit tests to verify your changes. Use output logs or debug statements as part of this process to arrive at a solution.
-3. **Implement:** Use the available tools (e.g., 'replace', 'write_file' 'run_shell_command' ...) to act on the plan, strictly adhering to the project's established conventions (detailed under 'Core 
-Mandates').
+3. **Implement:** Use the available tools (e.g., 'replace', 'write_file' 'run_shell_command' ...) to act on the plan, strictly adhering to the project's established conventions (detailed under 'Core Mandates').
 4. **Verify (Tests):** If applicable and feasible, verify the changes using the project's testing procedures. Identify the correct test commands and frameworks by examining 'README' files, build/package configuration (e.g., 'package.json'), or existing test execution patterns. NEVER assume standard test commands.
 5. **Verify (Standards):** VERY IMPORTANT: After making code changes, execute the project-specific build, linting and type-checking commands (e.g., 'tsc', 'npm run lint', 'ruff check .') that you have identified for this project (or obtained from the user). This ensures code quality and adherence to standards. If unsure about these commands, you can ask the user if they'd like you to run them and if so how to.
 6. **Finalize:** After all verification passes, consider the task complete. Do not remove or revert any changes or created files (like tests). Await the user's next instruction.
@@ -338,11 +335,10 @@ Mock Agent Directory

 ## Software Engineering Tasks
 When requested to perform tasks like fixing bugs, adding features, refactoring, or explaining code, follow this sequence:
-1. **Understand:** Think about the user's request and the relevant codebase context. Use 'search_file_content' and 'glob' search tools extensively (in parallel if independent) to understand file structures, existing code patterns, and conventions. 
+1. **Understand:** Think about the user's request and the relevant codebase context. Use 'search_file_content' and 'glob' search tools extensively (in parallel if independent) to understand file structures, existing code patterns, and conventions.
 Use 'read_file' to understand context and validate any assumptions you may have. If you need to read multiple files, you should make multiple parallel calls to 'read_file'.
 2. **Plan:** Build a coherent and grounded (based on the understanding in step 1) plan for how you intend to resolve the user's task. Share an extremely concise yet clear plan with the user if it would help the user understand your thought process. As part of the plan, you should use an iterative development process that includes writing unit tests to verify your changes. Use output logs or debug statements as part of this process to arrive at a solution.
-3. **Implement:** Use the available tools (e.g., 'replace', 'write_file' 'run_shell_command' ...) to act on the plan, strictly adhering to the project's established conventions (detailed under 'Core 
-Mandates').
+3. **Implement:** Use the available tools (e.g., 'replace', 'write_file' 'run_shell_command' ...) to act on the plan, strictly adhering to the project's established conventions (detailed under 'Core Mandates').
 4. **Verify (Tests):** If applicable and feasible, verify the changes using the project's testing procedures. Identify the correct test commands and frameworks by examining 'README' files, build/package configuration (e.g., 'package.json'), or existing test execution patterns. NEVER assume standard test commands.
 5. **Verify (Standards):** VERY IMPORTANT: After making code changes, execute the project-specific build, linting and type-checking commands (e.g., 'tsc', 'npm run lint', 'ruff check .') that you have identified for this project (or obtained from the user). This ensures code quality and adherence to standards. If unsure about these commands, you can ask the user if they'd like you to run them and if so how to.
 6. **Finalize:** After all verification passes, consider the task complete. Do not remove or revert any changes or created files (like tests). Await the user's next instruction.
@@ -437,11 +433,10 @@ Mock Agent Directory

 ## Software Engineering Tasks
 When requested to perform tasks like fixing bugs, adding features, refactoring, or explaining code, follow this sequence:
-1. **Understand:** Think about the user's request and the relevant codebase context. Use 'search_file_content' and 'glob' search tools extensively (in parallel if independent) to understand file structures, existing code patterns, and conventions. 
+1. **Understand:** Think about the user's request and the relevant codebase context. Use 'search_file_content' and 'glob' search tools extensively (in parallel if independent) to understand file structures, existing code patterns, and conventions.
 Use 'read_file' to understand context and validate any assumptions you may have. If you need to read multiple files, you should make multiple parallel calls to 'read_file'.
 2. **Plan:** Build a coherent and grounded (based on the understanding in step 1) plan for how you intend to resolve the user's task. Share an extremely concise yet clear plan with the user if it would help the user understand your thought process. As part of the plan, you should use an iterative development process that includes writing unit tests to verify your changes. Use output logs or debug statements as part of this process to arrive at a solution.
-3. **Implement:** Use the available tools (e.g., 'replace', 'write_file' 'run_shell_command' ...) to act on the plan, strictly adhering to the project's established conventions (detailed under 'Core 
-Mandates').
+3. **Implement:** Use the available tools (e.g., 'replace', 'write_file' 'run_shell_command' ...) to act on the plan, strictly adhering to the project's established conventions (detailed under 'Core Mandates').
 4. **Verify (Tests):** If applicable and feasible, verify the changes using the project's testing procedures. Identify the correct test commands and frameworks by examining 'README' files, build/package configuration (e.g., 'package.json'), or existing test execution patterns. NEVER assume standard test commands.
 5. **Verify (Standards):** VERY IMPORTANT: After making code changes, execute the project-specific build, linting and type-checking commands (e.g., 'tsc', 'npm run lint', 'ruff check .') that you have identified for this project (or obtained from the user). This ensures code quality and adherence to standards. If unsure about these commands, you can ask the user if they'd like you to run them and if so how to.
 6. **Finalize:** After all verification passes, consider the task complete. Do not remove or revert any changes or created files (like tests). Await the user's next instruction.
@@ -536,11 +531,10 @@ Mock Agent Directory

 ## Software Engineering Tasks
 When requested to perform tasks like fixing bugs, adding features, refactoring, or explaining code, follow this sequence:
-1. **Understand:** Think about the user's request and the relevant codebase context. Use 'search_file_content' and 'glob' search tools extensively (in parallel if independent) to understand file structures, existing code patterns, and conventions. 
+1. **Understand:** Think about the user's request and the relevant codebase context. Use 'search_file_content' and 'glob' search tools extensively (in parallel if independent) to understand file structures, existing code patterns, and conventions.
 Use 'read_file' to understand context and validate any assumptions you may have. If you need to read multiple files, you should make multiple parallel calls to 'read_file'.
 2. **Plan:** Build a coherent and grounded (based on the understanding in step 1) plan for how you intend to resolve the user's task. Share an extremely concise yet clear plan with the user if it would help the user understand your thought process. As part of the plan, you should use an iterative development process that includes writing unit tests to verify your changes. Use output logs or debug statements as part of this process to arrive at a solution.
-3. **Implement:** Use the available tools (e.g., 'replace', 'write_file' 'run_shell_command' ...) to act on the plan, strictly adhering to the project's established conventions (detailed under 'Core 
-Mandates').
+3. **Implement:** Use the available tools (e.g., 'replace', 'write_file' 'run_shell_command' ...) to act on the plan, strictly adhering to the project's established conventions (detailed under 'Core Mandates').
 4. **Verify (Tests):** If applicable and feasible, verify the changes using the project's testing procedures. Identify the correct test commands and frameworks by examining 'README' files, build/package configuration (e.g., 'package.json'), or existing test execution patterns. NEVER assume standard test commands.
 5. **Verify (Standards):** VERY IMPORTANT: After making code changes, execute the project-specific build, linting and type-checking commands (e.g., 'tsc', 'npm run lint', 'ruff check .') that you have identified for this project (or obtained from the user). This ensures code quality and adherence to standards. If unsure about these commands, you can ask the user if they'd like you to run them and if so how to.
 6. **Finalize:** After all verification passes, consider the task complete. Do not remove or revert any changes or created files (like tests). Await the user's next instruction.
@@ -635,11 +629,10 @@ Mock Agent Directory

 ## Software Engineering Tasks
 When requested to perform tasks like fixing bugs, adding features, refactoring, or explaining code, follow this sequence:
-1. **Understand:** Think about the user's request and the relevant codebase context. Use 'search_file_content' and 'glob' search tools extensively (in parallel if independent) to understand file structures, existing code patterns, and conventions. 
+1. **Understand:** Think about the user's request and the relevant codebase context. Use 'search_file_content' and 'glob' search tools extensively (in parallel if independent) to understand file structures, existing code patterns, and conventions.
 Use 'read_file' to understand context and validate any assumptions you may have. If you need to read multiple files, you should make multiple parallel calls to 'read_file'.
 2. **Plan:** Build a coherent and grounded (based on the understanding in step 1) plan for how you intend to resolve the user's task. Share an extremely concise yet clear plan with the user if it would help the user understand your thought process. As part of the plan, you should use an iterative development process that includes writing unit tests to verify your changes. Use output logs or debug statements as part of this process to arrive at a solution.
-3. **Implement:** Use the available tools (e.g., 'replace', 'write_file' 'run_shell_command' ...) to act on the plan, strictly adhering to the project's established conventions (detailed under 'Core 
-Mandates').
+3. **Implement:** Use the available tools (e.g., 'replace', 'write_file' 'run_shell_command' ...) to act on the plan, strictly adhering to the project's established conventions (detailed under 'Core Mandates').
 4. **Verify (Tests):** If applicable and feasible, verify the changes using the project's testing procedures. Identify the correct test commands and frameworks by examining 'README' files, build/package configuration (e.g., 'package.json'), or existing test execution patterns. NEVER assume standard test commands.
 5. **Verify (Standards):** VERY IMPORTANT: After making code changes, execute the project-specific build, linting and type-checking commands (e.g., 'tsc', 'npm run lint', 'ruff check .') that you have identified for this project (or obtained from the user). This ensures code quality and adherence to standards. If unsure about these commands, you can ask the user if they'd like you to run them and if so how to.
 6. **Finalize:** After all verification passes, consider the task complete. Do not remove or revert any changes or created files (like tests). Await the user's next instruction.
@@ -734,11 +727,10 @@ Mock Agent Directory

 ## Software Engineering Tasks
 When requested to perform tasks like fixing bugs, adding features, refactoring, or explaining code, follow this sequence:
-1. **Understand:** Think about the user's request and the relevant codebase context. Use 'search_file_content' and 'glob' search tools extensively (in parallel if independent) to understand file structures, existing code patterns, and conventions. 
+1. **Understand:** Think about the user's request and the relevant codebase context. Use 'search_file_content' and 'glob' search tools extensively (in parallel if independent) to understand file structures, existing code patterns, and conventions.
 Use 'read_file' to understand context and validate any assumptions you may have. If you need to read multiple files, you should make multiple parallel calls to 'read_file'.
 2. **Plan:** Build a coherent and grounded (based on the understanding in step 1) plan for how you intend to resolve the user's task. Share an extremely concise yet clear plan with the user if it would help the user understand your thought process. As part of the plan, you should use an iterative development process that includes writing unit tests to verify your changes. Use output logs or debug statements as part of this process to arrive at a solution.
-3. **Implement:** Use the available tools (e.g., 'replace', 'write_file' 'run_shell_command' ...) to act on the plan, strictly adhering to the project's established conventions (detailed under 'Core 
-Mandates').
+3. **Implement:** Use the available tools (e.g., 'replace', 'write_file' 'run_shell_command' ...) to act on the plan, strictly adhering to the project's established conventions (detailed under 'Core Mandates').
 4. **Verify (Tests):** If applicable and feasible, verify the changes using the project's testing procedures. Identify the correct test commands and frameworks by examining 'README' files, build/package configuration (e.g., 'package.json'), or existing test execution patterns. NEVER assume standard test commands.
 5. **Verify (Standards):** VERY IMPORTANT: After making code changes, execute the project-specific build, linting and type-checking commands (e.g., 'tsc', 'npm run lint', 'ruff check .') that you have identified for this project (or obtained from the user). This ensures code quality and adherence to standards. If unsure about these commands, you can ask the user if they'd like you to run them and if so how to.
 6. **Finalize:** After all verification passes, consider the task complete. Do not remove or revert any changes or created files (like tests). Await the user's next instruction.
@@ -833,11 +825,10 @@ Mock Agent Directory

 ## Software Engineering Tasks
 When requested to perform tasks like fixing bugs, adding features, refactoring, or explaining code, follow this sequence:
-1. **Understand:** Think about the user's request and the relevant codebase context. Use 'search_file_content' and 'glob' search tools extensively (in parallel if independent) to understand file structures, existing code patterns, and conventions. 
+1. **Understand:** Think about the user's request and the relevant codebase context. Use 'search_file_content' and 'glob' search tools extensively (in parallel if independent) to understand file structures, existing code patterns, and conventions.
 Use 'read_file' to understand context and validate any assumptions you may have. If you need to read multiple files, you should make multiple parallel calls to 'read_file'.
 2. **Plan:** Build a coherent and grounded (based on the understanding in step 1) plan for how you intend to resolve the user's task. Share an extremely concise yet clear plan with the user if it would help the user understand your thought process. As part of the plan, you should use an iterative development process that includes writing unit tests to verify your changes. Use output logs or debug statements as part of this process to arrive at a solution.
-3. **Implement:** Use the available tools (e.g., 'replace', 'write_file' 'run_shell_command' ...) to act on the plan, strictly adhering to the project's established conventions (detailed under 'Core 
-Mandates').
+3. **Implement:** Use the available tools (e.g., 'replace', 'write_file' 'run_shell_command' ...) to act on the plan, strictly adhering to the project's established conventions (detailed under 'Core Mandates').
 4. **Verify (Tests):** If applicable and feasible, verify the changes using the project's testing procedures. Identify the correct test commands and frameworks by examining 'README' files, build/package configuration (e.g., 'package.json'), or existing test execution patterns. NEVER assume standard test commands.
 5. **Verify (Standards):** VERY IMPORTANT: After making code changes, execute the project-specific build, linting and type-checking commands (e.g., 'tsc', 'npm run lint', 'ruff check .') that you have identified for this project (or obtained from the user). This ensures code quality and adherence to standards. If unsure about these commands, you can ask the user if they'd like you to run them and if so how to.
 6. **Finalize:** After all verification passes, consider the task complete. Do not remove or revert any changes or created files (like tests). Await the user's next instruction.
@@ -933,11 +924,10 @@ Mock Agent Directory

 ## Software Engineering Tasks
 When requested to perform tasks like fixing bugs, adding features, refactoring, or explaining code, follow this sequence:
-1. **Understand:** Think about the user's request and the relevant codebase context. Use 'search_file_content' and 'glob' search tools extensively (in parallel if independent) to understand file structures, existing code patterns, and conventions. 
+1. **Understand:** Think about the user's request and the relevant codebase context. Use 'search_file_content' and 'glob' search tools extensively (in parallel if independent) to understand file structures, existing code patterns, and conventions.
 Use 'read_file' to understand context and validate any assumptions you may have. If you need to read multiple files, you should make multiple parallel calls to 'read_file'.
 2. **Plan:** Build a coherent and grounded (based on the understanding in step 1) plan for how you intend to resolve the user's task. Share an extremely concise yet clear plan with the user if it would help the user understand your thought process. As part of the plan, you should use an iterative development process that includes writing unit tests to verify your changes. Use output logs or debug statements as part of this process to arrive at a solution.
-3. **Implement:** Use the available tools (e.g., 'replace', 'write_file' 'run_shell_command' ...) to act on the plan, strictly adhering to the project's established conventions (detailed under 'Core 
-Mandates').
+3. **Implement:** Use the available tools (e.g., 'replace', 'write_file' 'run_shell_command' ...) to act on the plan, strictly adhering to the project's established conventions (detailed under 'Core Mandates').
 4. **Verify (Tests):** If applicable and feasible, verify the changes using the project's testing procedures. Identify the correct test commands and frameworks by examining 'README' files, build/package configuration (e.g., 'package.json'), or existing test execution patterns. NEVER assume standard test commands.
 5. **Verify (Standards):** VERY IMPORTANT: After making code changes, execute the project-specific build, linting and type-checking commands (e.g., 'tsc', 'npm run lint', 'ruff check .') that you have identified for this project (or obtained from the user). This ensures code quality and adherence to standards. If unsure about these commands, you can ask the user if they'd like you to run them and if so how to.
 6. **Finalize:** After all verification passes, consider the task complete. Do not remove or revert any changes or created files (like tests). Await the user's next instruction.
--- a/packages/core/src/core/baseLlmClient.ts
+++ b/packages/core/src/core/baseLlmClient.ts
@@ -13,6 +13,8 @@ import type {
 } from '@google/genai';
 import type { Config } from '../config/config.js';
 import type { ContentGenerator } from './contentGenerator.js';
+import type { AuthType } from './contentGenerator.js';
+import { handleFallback } from '../fallback/handler.js';
 import { getResponseText } from '../utils/partUtils.js';
 import { reportError } from '../utils/errorReporting.js';
 import { getErrorMessage } from '../utils/errors.js';
@@ -86,6 +88,7 @@ export class BaseLlmClient {
  constructor(
    private readonly contentGenerator: ContentGenerator,
    private readonly config: Config,
+    private readonly authType?: AuthType,
  ) {}

  async generateJson(
@@ -286,6 +289,12 @@ export class BaseLlmClient {
        maxAttempts:
          availabilityMaxAttempts ?? maxAttempts ?? DEFAULT_MAX_ATTEMPTS,
        getAvailabilityContext,
+        onPersistent429: this.config.isInteractive()
+          ? (authType, error) =>
+              handleFallback(this.config, requestParams.model, authType, error)
+          : undefined,
+        authType:
+          this.authType ?? this.config.getContentGeneratorConfig()?.authType,
      });
    } catch (error) {
      if (abortSignal?.aborted) {
--- a/packages/core/src/core/client.test.ts
+++ b/packages/core/src/core/client.test.ts
@@ -30,7 +30,10 @@ import {
  type ChatCompressionInfo,
 } from './turn.js';
 import { getCoreSystemPrompt } from './prompts.js';
-import { DEFAULT_GEMINI_FLASH_MODEL } from '../config/models.js';
+import {
+  DEFAULT_GEMINI_FLASH_MODEL,
+  DEFAULT_GEMINI_MODEL_AUTO,
+} from '../config/models.js';
 import { FileDiscoveryService } from '../services/fileDiscoveryService.js';
 import { setSimulate429 } from '../utils/testUtils.js';
 import { tokenLimit } from './tokenLimits.js';
@@ -2044,7 +2047,9 @@ ${JSON.stringify(
            skipped: [],
          },
        );
-
+        vi.mocked(mockConfig.getModel).mockReturnValue(
+          DEFAULT_GEMINI_MODEL_AUTO,
+        );
        const stream = client.sendMessageStream(
          [{ text: 'Hi' }],
          new AbortController().signal,
@@ -2074,7 +2079,9 @@ ${JSON.stringify(
            skipped: [],
          },
        );
-
+        vi.mocked(mockConfig.getModel).mockReturnValue(
+          DEFAULT_GEMINI_MODEL_AUTO,
+        );
        const stream = client.sendMessageStream(
          [{ text: 'Hi' }],
          new AbortController().signal,
--- a/packages/core/src/core/client.ts
+++ b/packages/core/src/core/client.ts
@@ -31,10 +31,7 @@ import type {
  ResumedSessionData,
 } from '../services/chatRecordingService.js';
 import type { ContentGenerator } from './contentGenerator.js';
-import {
-  DEFAULT_GEMINI_FLASH_MODEL,
-  getEffectiveModel,
-} from '../config/models.js';
+import { DEFAULT_GEMINI_FLASH_MODEL } from '../config/models.js';
 import { LoopDetectionService } from '../services/loopDetectionService.js';
 import { ChatCompressionService } from '../services/chatCompressionService.js';
 import { ideContextStore } from '../ide/ideContext.js';
@@ -395,12 +392,9 @@ export class GeminiClient {
      return this.currentSequenceModel;
    }

-    const configModel = this.config.getModel();
-    return getEffectiveModel(
-      this.config.isInFallbackMode(),
-      configModel,
-      this.config.getPreviewFeatures(),
-    );
+    // Availability logic: The configured model is the source of truth,
+    // including any permanent fallbacks (config.setModel) or manual overrides.
+    return this.config.getActiveModel();
  }

  async *sendMessageStream(
--- a/packages/core/src/core/contentGenerator.ts
+++ b/packages/core/src/core/contentGenerator.ts
@@ -118,7 +118,6 @@ export async function createContentGenerator(
    }
    const version = await getVersion();
    const model = getEffectiveModel(
-      gcConfig.isInFallbackMode(),
      gcConfig.getModel(),
      gcConfig.getPreviewFeatures(),
    );
--- a/packages/core/src/core/geminiChat.test.ts
+++ b/packages/core/src/core/geminiChat.test.ts
@@ -19,13 +19,13 @@ import type { Config } from '../config/config.js';
 import { setSimulate429 } from '../utils/testUtils.js';
 import {
  DEFAULT_GEMINI_FLASH_MODEL,
-  DEFAULT_GEMINI_MODEL,
  DEFAULT_THINKING_MODE,
  PREVIEW_GEMINI_MODEL,
+  PREVIEW_GEMINI_FLASH_MODEL,
 } from '../config/models.js';
 import { AuthType } from './contentGenerator.js';
 import { TerminalQuotaError } from '../utils/googleQuotaErrors.js';
-import { retryWithBackoff, type RetryOptions } from '../utils/retry.js';
+import { type RetryOptions } from '../utils/retry.js';
 import { uiTelemetryService } from '../telemetry/uiTelemetry.js';
 import { HookSystem } from '../hooks/hookSystem.js';
 import { createMockMessageBus } from '../test-utils/mock-message-bus.js';
@@ -127,18 +127,25 @@ describe('GeminiChat', () => {
      }
      return result;
    });
+    let currentModel = 'gemini-pro';
+    let currentActiveModel = 'gemini-pro';
+
    mockConfig = {
      getSessionId: () => 'test-session-id',
      getTelemetryLogPromptsEnabled: () => true,
      getUsageStatisticsEnabled: () => true,
      getDebugMode: () => false,
      getPreviewFeatures: () => false,
-      getContentGeneratorConfig: vi.fn().mockReturnValue({
-        authType: 'oauth-personal', // Ensure this is set for fallback tests
-        model: 'test-model',
+      getContentGeneratorConfig: vi.fn().mockImplementation(() => ({
+        authType: 'oauth-personal',
+        model: currentModel,
+      })),
+      getModel: vi.fn().mockImplementation(() => currentModel),
+      setModel: vi.fn().mockImplementation((m: string) => {
+        currentModel = m;
+        // When model is explicitly set, active model usually resets or updates to it
+        currentActiveModel = m;
      }),
-      getModel: vi.fn().mockReturnValue('gemini-pro'),
-      setModel: vi.fn(),
      isInFallbackMode: vi.fn().mockReturnValue(false),
      getQuotaErrorOccurred: vi.fn().mockReturnValue(false),
      setQuotaErrorOccurred: vi.fn(),
@@ -155,7 +162,8 @@ describe('GeminiChat', () => {
      getUserTier: vi.fn().mockReturnValue(undefined),
      modelConfigService: {
        getResolvedConfig: vi.fn().mockImplementation((modelConfigKey) => {
-          const thinkingConfig = modelConfigKey.model.startsWith('gemini-3')
+          const model = modelConfigKey.model ?? mockConfig.getModel();
+          const thinkingConfig = model.startsWith('gemini-3')
            ? {
                thinkingLevel: ThinkingLevel.HIGH,
              }
@@ -163,7 +171,7 @@ describe('GeminiChat', () => {
                thinkingBudget: DEFAULT_THINKING_MODE,
              };
          return {
-            model: modelConfigKey.model,
+            model,
            generateContentConfig: {
              temperature: 0,
              thinkingConfig,
@@ -178,8 +186,10 @@ describe('GeminiChat', () => {
      isInteractive: vi.fn().mockReturnValue(false),
      getEnableHooks: vi.fn().mockReturnValue(false),
      isModelAvailabilityServiceEnabled: vi.fn().mockReturnValue(false),
-      getActiveModel: vi.fn().mockReturnValue('gemini-pro'),
-      setActiveModel: vi.fn(),
+      getActiveModel: vi.fn().mockImplementation(() => currentActiveModel),
+      setActiveModel: vi
+        .fn()
+        .mockImplementation((m: string) => (currentActiveModel = m)),
      getModelAvailabilityService: vi.fn(),
    } as unknown as Config;

@@ -570,6 +580,39 @@ describe('GeminiChat', () => {
      );
    });

+    it('should use maxAttempts=1 for retryWithBackoff when in Preview Model Fallback Mode (Flash)', async () => {
+      vi.mocked(mockConfig.isPreviewModelFallbackMode).mockReturnValue(true);
+      vi.mocked(mockContentGenerator.generateContentStream).mockResolvedValue(
+        (async function* () {
+          yield {
+            candidates: [
+              {
+                content: { parts: [{ text: 'Success' }] },
+                finishReason: 'STOP',
+              },
+            ],
+          } as unknown as GenerateContentResponse;
+        })(),
+      );
+
+      const stream = await chat.sendMessageStream(
+        { model: PREVIEW_GEMINI_FLASH_MODEL },
+        'test',
+        'prompt-id-fast-retry-flash',
+        new AbortController().signal,
+      );
+      for await (const _ of stream) {
+        // consume stream
+      }
+
+      expect(mockRetryWithBackoff).toHaveBeenCalledWith(
+        expect.any(Function),
+        expect.objectContaining({
+          maxAttempts: 1,
+        }),
+      );
+    });
+
    it('should NOT use maxAttempts=1 for other models even in Preview Model Fallback Mode', async () => {
      vi.mocked(mockConfig.isPreviewModelFallbackMode).mockReturnValue(true);
      vi.mocked(mockContentGenerator.generateContentStream).mockResolvedValue(
@@ -603,62 +646,6 @@ describe('GeminiChat', () => {
      );
    });

-    it('should pass DEFAULT_GEMINI_MODEL to handleFallback when Preview Model is bypassed (downgraded)', async () => {
-      // ARRANGE
-      vi.mocked(mockConfig.isPreviewModelBypassMode).mockReturnValue(true);
-      // Mock retryWithBackoff to simulate catching the error and calling onPersistent429
-      vi.mocked(retryWithBackoff).mockImplementation(
-        async (apiCall, options) => {
-          const onPersistent429 = options?.onPersistent429;
-          try {
-            await apiCall();
-          } catch (error) {
-            if (onPersistent429) {
-              await onPersistent429(AuthType.LOGIN_WITH_GOOGLE, error);
-            }
-            throw error;
-          }
-        },
-      );
-
-      // We need the API call to fail so retryWithBackoff calls the callback.
-      vi.mocked(mockContentGenerator.generateContentStream).mockRejectedValue(
-        new TerminalQuotaError('Simulated Quota Error', {
-          code: 429,
-          message: 'Simulated Quota Error',
-          details: [],
-        }),
-      );
-
-      // ACT
-      const consumeStream = async () => {
-        const stream = await chat.sendMessageStream(
-          { model: PREVIEW_GEMINI_MODEL },
-          'test',
-          'prompt-id-bypass',
-          new AbortController().signal,
-        );
-        // Consume the stream to trigger execution
-        for await (const _ of stream) {
-          // do nothing
-        }
-      };
-
-      await expect(consumeStream()).rejects.toThrow('Simulated Quota Error');
-
-      expect(retryWithBackoff).toHaveBeenCalled();
-
-      // ASSERT
-      // handleFallback is called via onPersistent429Callback
-      // We verify it was called with DEFAULT_GEMINI_MODEL
-      expect(mockHandleFallback).toHaveBeenCalledWith(
-        expect.anything(),
-        DEFAULT_GEMINI_MODEL, // This is the key assertion
-        expect.anything(),
-        expect.anything(),
-      );
-    });
-
    it('should throw an error when a tool call is followed by an empty stream response', async () => {
      // 1. Setup: A history where the model has just made a function call.
      const initialHistory: Content[] = [
@@ -1848,45 +1835,6 @@ describe('GeminiChat', () => {
    expect(turn4.parts[0].text).toBe('second response');
  });

-  describe('Model Resolution', () => {
-    const mockResponse = {
-      candidates: [
-        {
-          content: { parts: [{ text: 'response' }], role: 'model' },
-          finishReason: 'STOP',
-        },
-      ],
-    } as unknown as GenerateContentResponse;
-
-    it('should use the FLASH model when in fallback mode (sendMessageStream)', async () => {
-      vi.mocked(mockConfig.getModel).mockReturnValue('gemini-pro');
-      vi.mocked(mockConfig.isInFallbackMode).mockReturnValue(true);
-      vi.mocked(mockContentGenerator.generateContentStream).mockImplementation(
-        async () =>
-          (async function* () {
-            yield mockResponse;
-          })(),
-      );
-
-      const stream = await chat.sendMessageStream(
-        { model: 'test-model' },
-        'test message',
-        'prompt-id-res3',
-        new AbortController().signal,
-      );
-      for await (const _ of stream) {
-        // consume stream
-      }
-
-      expect(mockContentGenerator.generateContentStream).toHaveBeenCalledWith(
-        expect.objectContaining({
-          model: DEFAULT_GEMINI_FLASH_MODEL,
-        }),
-        'prompt-id-res3',
-      );
-    });
-  });
-
  describe('Fallback Integration (Retries)', () => {
    const error429 = new ApiError({
      message: 'API Error 429: Quota exceeded',
@@ -1983,92 +1931,6 @@ describe('GeminiChat', () => {
      expect(modelTurn.parts![0].text).toBe('Success on retry');
    });

-    it('should switch to DEFAULT_GEMINI_FLASH_MODEL and use thinkingBudget when falling back from a gemini-3 model', async () => {
-      // ARRANGE
-      const authType = AuthType.LOGIN_WITH_GOOGLE;
-      vi.mocked(mockConfig.getContentGeneratorConfig).mockReturnValue({
-        authType,
-      });
-
-      // Initial state: Not in fallback mode
-      const isInFallbackModeSpy = vi.spyOn(mockConfig, 'isInFallbackMode');
-      isInFallbackModeSpy.mockReturnValue(false);
-
-      // Mock API calls:
-      // 1. Fails with 429 (simulating gemini-3 failure)
-      // 2. Succeeds (simulating fallback success)
-      vi.mocked(mockContentGenerator.generateContentStream)
-        .mockRejectedValueOnce(error429)
-        .mockResolvedValueOnce(
-          (async function* () {
-            yield {
-              candidates: [
-                {
-                  content: { parts: [{ text: 'Fallback success' }] },
-                  finishReason: 'STOP',
-                },
-              ],
-            } as unknown as GenerateContentResponse;
-          })(),
-        );
-
-      // Mock handleFallback to enable fallback mode and signal retry
-      mockHandleFallback.mockImplementation(async () => {
-        isInFallbackModeSpy.mockReturnValue(true); // Next call will see fallback mode = true
-        return true;
-      });
-
-      // ACT
-      const stream = await chat.sendMessageStream(
-        { model: 'gemini-3-test-model' }, // Start with a gemini-3 model
-        'test fallback thinking',
-        'prompt-id-fb3',
-        new AbortController().signal,
-      );
-      for await (const _ of stream) {
-        // consume stream
-      }
-
-      // ASSERT
-      expect(mockContentGenerator.generateContentStream).toHaveBeenCalledTimes(
-        2,
-      );
-
-      // First call: gemini-3 model, thinkingLevel set
-      expect(
-        mockContentGenerator.generateContentStream,
-      ).toHaveBeenNthCalledWith(
-        1,
-        expect.objectContaining({
-          model: 'gemini-3-test-model',
-          config: expect.objectContaining({
-            thinkingConfig: {
-              thinkingBudget: undefined,
-              thinkingLevel: ThinkingLevel.HIGH,
-            },
-          }),
-        }),
-        'prompt-id-fb3',
-      );
-
-      // Second call: DEFAULT_GEMINI_FLASH_MODEL (due to fallback), thinkingBudget set (due to fix)
-      expect(
-        mockContentGenerator.generateContentStream,
-      ).toHaveBeenNthCalledWith(
-        2,
-        expect.objectContaining({
-          model: DEFAULT_GEMINI_FLASH_MODEL,
-          config: expect.objectContaining({
-            thinkingConfig: {
-              thinkingBudget: DEFAULT_THINKING_MODE,
-              thinkingLevel: undefined,
-            },
-          }),
-        }),
-        'prompt-id-fb3',
-      );
-    });
-
    it('should stop retrying if handleFallback returns false (e.g., auth intent)', async () => {
      vi.mocked(mockConfig.getModel).mockReturnValue('gemini-pro');
      vi.mocked(mockContentGenerator.generateContentStream).mockRejectedValue(
@@ -2200,92 +2062,6 @@ describe('GeminiChat', () => {
    });
  });

-  describe('Preview Model Fallback Logic', () => {
-    it('should reset previewModelBypassMode to false at the start of sendMessageStream', async () => {
-      const stream = (async function* () {
-        yield {
-          candidates: [
-            {
-              content: { role: 'model', parts: [{ text: 'Success' }] },
-              finishReason: 'STOP',
-            },
-          ],
-        } as unknown as GenerateContentResponse;
-      })();
-      vi.mocked(mockContentGenerator.generateContentStream).mockResolvedValue(
-        stream,
-      );
-
-      await chat.sendMessageStream(
-        { model: 'test-model' },
-        'test',
-        'prompt-id-preview-model-reset',
-        new AbortController().signal,
-      );
-
-      expect(mockConfig.setPreviewModelBypassMode).toHaveBeenCalledWith(false);
-    });
-
-    it('should reset previewModelFallbackMode to false upon successful Preview Model usage', async () => {
-      const stream = (async function* () {
-        yield {
-          candidates: [
-            {
-              content: { role: 'model', parts: [{ text: 'Success' }] },
-              finishReason: 'STOP',
-            },
-          ],
-        } as unknown as GenerateContentResponse;
-      })();
-      vi.mocked(mockContentGenerator.generateContentStream).mockResolvedValue(
-        stream,
-      );
-
-      const resultStream = await chat.sendMessageStream(
-        { model: PREVIEW_GEMINI_MODEL },
-        'test',
-        'prompt-id-preview-model-healing',
-        new AbortController().signal,
-      );
-      for await (const _ of resultStream) {
-        // consume stream
-      }
-
-      expect(mockConfig.setPreviewModelFallbackMode).toHaveBeenCalledWith(
-        false,
-      );
-    });
-    it('should NOT reset previewModelFallbackMode if Preview Model was bypassed (downgraded)', async () => {
-      const stream = (async function* () {
-        yield {
-          candidates: [
-            {
-              content: { role: 'model', parts: [{ text: 'Success' }] },
-              finishReason: 'STOP',
-            },
-          ],
-        } as unknown as GenerateContentResponse;
-      })();
-      vi.mocked(mockContentGenerator.generateContentStream).mockResolvedValue(
-        stream,
-      );
-      // Simulate bypass mode being active (downgrade happened)
-      vi.mocked(mockConfig.isPreviewModelBypassMode).mockReturnValue(true);
-
-      const resultStream = await chat.sendMessageStream(
-        { model: PREVIEW_GEMINI_MODEL },
-        'test',
-        'prompt-id-bypass-no-healing',
-        new AbortController().signal,
-      );
-      for await (const _ of resultStream) {
-        // consume stream
-      }
-
-      expect(mockConfig.setPreviewModelFallbackMode).not.toHaveBeenCalled();
-    });
-  });
-
  describe('ensureActiveLoopHasThoughtSignatures', () => {
    it('should add thoughtSignature to the first functionCall in each model turn of the active loop', () => {
      const chat = new GeminiChat(mockConfig, '', [], []);
--- a/packages/core/src/core/geminiChat.ts
+++ b/packages/core/src/core/geminiChat.ts
@@ -22,11 +22,10 @@ import { createUserContent, FinishReason } from '@google/genai';
 import { retryWithBackoff, isRetryableError } from '../utils/retry.js';
 import type { Config } from '../config/config.js';
 import {
-  DEFAULT_GEMINI_MODEL,
  DEFAULT_THINKING_MODE,
-  PREVIEW_GEMINI_MODEL,
-  getEffectiveModel,
+  resolveModel,
  isGemini2Model,
+  isPreviewModel,
 } from '../config/models.js';
 import { hasCycleInSchema } from '../tools/tools.js';
 import type { StructuredError } from './turn.js';
@@ -306,10 +305,7 @@ export class GeminiChat {
        let maxAttempts = INVALID_CONTENT_RETRY_OPTIONS.maxAttempts;
        // If we are in Preview Model Fallback Mode, we want to fail fast (1 attempt)
        // when probing the Preview Model.
-        if (
-          this.config.isPreviewModelFallbackMode() &&
-          model === PREVIEW_GEMINI_MODEL
-        ) {
+        if (this.config.isPreviewModelFallbackMode() && isPreviewModel(model)) {
          maxAttempts = 1;
        }

@@ -388,7 +384,7 @@ export class GeminiChat {
          // Preview Model successfully used, disable fallback mode.
          // We only do this if we didn't bypass Preview Model (i.e. we actually used it).
          if (
-            model === PREVIEW_GEMINI_MODEL &&
+            isPreviewModel(model) &&
            !this.config.isPreviewModelBypassMode()
          ) {
            this.config.setPreviewModelFallbackMode(false);
@@ -435,11 +431,24 @@ export class GeminiChat {
      this.config,
      () => lastModelToUse,
    );
-    const apiCall = async () => {
-      let modelToUse: string;
+    // Track initial active model to detect fallback changes
+    const initialActiveModel = this.config.getActiveModel();
+
+    const apiCall = async () => {
+      // Default to the last used model (which respects arguments/availability selection)
+      let modelToUse = resolveModel(
+        lastModelToUse,
+        this.config.getPreviewFeatures(),
+      );
+
+      // If the active model has changed (e.g. due to a fallback updating the config),
+      // we switch to the new active model.
+      if (this.config.getActiveModel() !== initialActiveModel) {
+        modelToUse = resolveModel(
+          this.config.getActiveModel(),
+          this.config.getPreviewFeatures(),
+        );

-      if (this.config.isModelAvailabilityServiceEnabled()) {
-        modelToUse = this.config.getActiveModel();
        if (modelToUse !== lastModelToUse) {
          const { generateContentConfig: newConfig } =
            this.config.modelConfigService.getResolvedConfig({
@@ -453,24 +462,6 @@ export class GeminiChat {
            currentGenerateContentConfig.abortSignal = abortSignal;
          }
        }
-      } else {
-        modelToUse = getEffectiveModel(
-          this.config.isInFallbackMode(),
-          model,
-          this.config.getPreviewFeatures(),
-        );
-
-        // Preview Model Bypass Logic:
-        // If we are in "Preview Model Bypass Mode" (transient failure), we force downgrade to 2.5 Pro
-        // IF the effective model is currently Preview Model.
-        // Note: In availability mode, this should ideally be handled by policy, but preserving
-        // bypass logic for now as it handles specific transient behavior.
-        if (
-          this.config.isPreviewModelBypassMode() &&
-          modelToUse === PREVIEW_GEMINI_MODEL
-        ) {
-          modelToUse = DEFAULT_GEMINI_MODEL;
-        }
      }

      lastModelToUse = modelToUse;
@@ -498,10 +489,9 @@ export class GeminiChat {
        };
        delete config.thinkingConfig?.thinkingLevel;
      }
-      let contentsToUse =
-        modelToUse === PREVIEW_GEMINI_MODEL
-          ? contentsForPreviewModel
-          : requestContents;
+      let contentsToUse = isPreviewModel(modelToUse)
+        ? contentsForPreviewModel
+        : requestContents;

      // Fire BeforeModel and BeforeToolSelection hooks if enabled
      const hooksEnabled = this.config.getEnableHooks();
@@ -589,8 +579,7 @@ export class GeminiChat {
      signal: generateContentConfig.abortSignal,
      maxAttempts:
        availabilityMaxAttempts ??
-        (this.config.isPreviewModelFallbackMode() &&
-        model === PREVIEW_GEMINI_MODEL
+        (this.config.isPreviewModelFallbackMode() && isPreviewModel(model)
          ? 1
          : undefined),
      getAvailabilityContext,
--- a/packages/core/src/core/geminiChat_network_retry.test.ts
+++ b/packages/core/src/core/geminiChat_network_retry.test.ts
@@ -78,6 +78,8 @@ describe('GeminiChat Network Retries', () => {
        model: 'test-model',
      }),
      getModel: vi.fn().mockReturnValue('gemini-pro'),
+      getActiveModel: vi.fn().mockReturnValue('gemini-pro'),
+      setActiveModel: vi.fn(),
      isInFallbackMode: vi.fn().mockReturnValue(false),
      getQuotaErrorOccurred: vi.fn().mockReturnValue(false),
      getProjectRoot: vi.fn().mockReturnValue('/test/project/root'),
--- a/packages/core/src/core/prompts.test.ts
+++ b/packages/core/src/core/prompts.test.ts
@@ -15,9 +15,10 @@ import { CodebaseInvestigatorAgent } from '../agents/codebase-investigator.js';
 import { GEMINI_DIR } from '../utils/paths.js';
 import { debugLogger } from '../utils/debugLogger.js';
 import {
-  DEFAULT_GEMINI_MODEL,
-  getEffectiveModel,
  PREVIEW_GEMINI_MODEL,
+  PREVIEW_GEMINI_FLASH_MODEL,
+  DEFAULT_GEMINI_MODEL_AUTO,
+  DEFAULT_GEMINI_MODEL,
 } from '../config/models.js';

 // Mock tool names if they are dynamically generated or complex
@@ -43,10 +44,9 @@ vi.mock('../utils/gitUtils', () => ({
 }));
 vi.mock('node:fs');
 vi.mock('../config/models.js', async (importOriginal) => {
-  const actual = await importOriginal<typeof import('../config/models.js')>();
+  const actual = await importOriginal();
  return {
-    ...actual,
-    getEffectiveModel: vi.fn(),
+    ...(actual as object),
  };
 });

@@ -66,24 +66,32 @@ describe('Core System Prompt (prompts.ts)', () => {
      },
      isInteractive: vi.fn().mockReturnValue(true),
      isInteractiveShellEnabled: vi.fn().mockReturnValue(true),
-      getModel: vi.fn().mockReturnValue('auto'),
+      getModel: vi.fn().mockReturnValue(DEFAULT_GEMINI_MODEL_AUTO),
+      getActiveModel: vi.fn().mockReturnValue(DEFAULT_GEMINI_MODEL),
      getPreviewFeatures: vi.fn().mockReturnValue(false),
      isInFallbackMode: vi.fn().mockReturnValue(false),
      getAgentRegistry: vi.fn().mockReturnValue({
        getDirectoryContext: vi.fn().mockReturnValue('Mock Agent Directory'),
      }),
    } as unknown as Config;
-    vi.mocked(getEffectiveModel).mockReturnValue(DEFAULT_GEMINI_MODEL);
  });

  it('should use chatty system prompt for preview model', () => {
-    vi.mocked(getEffectiveModel).mockReturnValue(PREVIEW_GEMINI_MODEL);
+    vi.mocked(mockConfig.getActiveModel).mockReturnValue(PREVIEW_GEMINI_MODEL);
    const prompt = getCoreSystemPrompt(mockConfig);
    expect(prompt).toContain('You are an interactive CLI agent'); // Check for core content
    expect(prompt).not.toContain('No Chitchat:');
    expect(prompt).toMatchSnapshot();
  });

+  it('should use chatty system prompt for preview flash model', () => {
+    vi.mocked(mockConfig.getActiveModel).mockReturnValue(
+      PREVIEW_GEMINI_FLASH_MODEL,
+    );
+    const prompt = getCoreSystemPrompt(mockConfig);
+    expect(prompt).toContain('Do not call tools in silence');
+  });
+
  it.each([
    ['empty string', ''],
    ['whitespace only', '   \n  \t '],
@@ -163,6 +171,7 @@ describe('Core System Prompt (prompts.ts)', () => {
        isInteractive: vi.fn().mockReturnValue(false),
        isInteractiveShellEnabled: vi.fn().mockReturnValue(false),
        getModel: vi.fn().mockReturnValue('auto'),
+        getActiveModel: vi.fn().mockReturnValue(DEFAULT_GEMINI_MODEL),
        getPreviewFeatures: vi.fn().mockReturnValue(false),
        isInFallbackMode: vi.fn().mockReturnValue(false),
        getAgentRegistry: vi.fn().mockReturnValue({
--- a/packages/core/src/core/prompts.ts
+++ b/packages/core/src/core/prompts.ts
@@ -25,7 +25,7 @@ import type { Config } from '../config/config.js';
 import { GEMINI_DIR } from '../utils/paths.js';
 import { debugLogger } from '../utils/debugLogger.js';
 import { WriteTodosTool } from '../tools/write-todos.js';
-import { getEffectiveModel, PREVIEW_GEMINI_MODEL } from '../config/models.js';
+import { resolveModel, isPreviewModel } from '../config/models.js';

 export function resolvePathFromEnv(envVar?: string): {
  isSwitch: boolean;
@@ -106,13 +106,12 @@ export function getCoreSystemPrompt(
  }

  // TODO(joshualitt): Replace with system instructions on model configs.
-  const desiredModel = getEffectiveModel(
-    config.isInFallbackMode(),
-    config.getModel(),
+  const desiredModel = resolveModel(
+    config.getActiveModel(),
    config.getPreviewFeatures(),
  );

-  const isGemini3 = desiredModel === PREVIEW_GEMINI_MODEL;
+  const isGemini3 = isPreviewModel(desiredModel);

  const mandatesVariant = isGemini3
    ? `
@@ -161,7 +160,7 @@ ${config.getAgentRegistry().getDirectoryContext()}`,

 ## Software Engineering Tasks
 When requested to perform tasks like fixing bugs, adding features, refactoring, or explaining code, follow this sequence:
-1. **Understand:** Think about the user's request and the relevant codebase context. Use '${GREP_TOOL_NAME}' and '${GLOB_TOOL_NAME}' search tools extensively (in parallel if independent) to understand file structures, existing code patterns, and conventions. 
+1. **Understand:** Think about the user's request and the relevant codebase context. Use '${GREP_TOOL_NAME}' and '${GLOB_TOOL_NAME}' search tools extensively (in parallel if independent) to understand file structures, existing code patterns, and conventions.
 Use '${READ_FILE_TOOL_NAME}' to understand context and validate any assumptions you may have. If you need to read multiple files, you should make multiple parallel calls to '${READ_FILE_TOOL_NAME}'.
 2. **Plan:** Build a coherent and grounded (based on the understanding in step 1) plan for how you intend to resolve the user's task. Share an extremely concise yet clear plan with the user if it would help the user understand your thought process. As part of the plan, you should use an iterative development process that includes writing unit tests to verify your changes. Use output logs or debug statements as part of this process to arrive at a solution.`,

@@ -188,8 +187,7 @@ When requested to perform tasks like fixing bugs, adding features, refactoring,
 When requested to perform tasks like fixing bugs, adding features, refactoring, or explaining code, follow this sequence:
 1. **Understand:** Think about the user's request and the relevant codebase context. Use '${GREP_TOOL_NAME}' and '${GLOB_TOOL_NAME}' search tools extensively (in parallel if independent) to understand file structures, existing code patterns, and conventions. Use '${READ_FILE_TOOL_NAME}' to understand context and validate any assumptions you may have. If you need to read multiple files, you should make multiple parallel calls to '${READ_FILE_TOOL_NAME}'.
 2. **Plan:** Build a coherent and grounded (based on the understanding in step 1) plan for how you intend to resolve the user's task. For complex tasks, break them down into smaller, manageable subtasks and use the \`${WRITE_TODOS_TOOL_NAME}\` tool to track your progress. Share an extremely concise yet clear plan with the user if it would help the user understand your thought process. As part of the plan, you should use an iterative development process that includes writing unit tests to verify your changes. Use output logs or debug statements as part of this process to arrive at a solution.`,
-      primaryWorkflows_suffix: `3. **Implement:** Use the available tools (e.g., '${EDIT_TOOL_NAME}', '${WRITE_FILE_TOOL_NAME}' '${SHELL_TOOL_NAME}' ...) to act on the plan, strictly adhering to the project's established conventions (detailed under 'Core 
-Mandates').
+      primaryWorkflows_suffix: `3. **Implement:** Use the available tools (e.g., '${EDIT_TOOL_NAME}', '${WRITE_FILE_TOOL_NAME}' '${SHELL_TOOL_NAME}' ...) to act on the plan, strictly adhering to the project's established conventions (detailed under 'Core Mandates').
 4. **Verify (Tests):** If applicable and feasible, verify the changes using the project's testing procedures. Identify the correct test commands and frameworks by examining 'README' files, build/package configuration (e.g., 'package.json'), or existing test execution patterns. NEVER assume standard test commands.
 5. **Verify (Standards):** VERY IMPORTANT: After making code changes, execute the project-specific build, linting and type-checking commands (e.g., 'tsc', 'npm run lint', 'ruff check .') that you have identified for this project (or obtained from the user). This ensures code quality and adherence to standards.${interactiveMode ? " If unsure about these commands, you can ask the user if they'd like you to run them and if so how to." : ''}
 6. **Finalize:** After all verification passes, consider the task complete. Do not remove or revert any changes or created files (like tests). Await the user's next instruction.
@@ -416,7 +414,7 @@ The structure MUST be as follows:
         - Build Command: \`npm run build\`
         - Testing: Tests are run with \`npm test\`. Test files must end in \`.test.ts\`.
         - API Endpoint: The primary API endpoint is \`https://api.example.com/v2\`.
-         
+
        -->
    </key_knowledge>

--- a/packages/core/src/fallback/handler.test.ts
+++ b/packages/core/src/fallback/handler.test.ts
@@ -22,11 +22,12 @@ import { AuthType } from '../core/contentGenerator.js';
 import {
  DEFAULT_GEMINI_FLASH_MODEL,
  DEFAULT_GEMINI_MODEL,
+  DEFAULT_GEMINI_MODEL_AUTO,
+  PREVIEW_GEMINI_FLASH_MODEL,
  PREVIEW_GEMINI_MODEL,
+  PREVIEW_GEMINI_MODEL_AUTO,
 } from '../config/models.js';
-import { logFlashFallback } from '../telemetry/index.js';
 import type { FallbackModelHandler } from './types.js';
-import { ModelNotFoundError } from '../utils/httpErrors.js';
 import { openBrowserSecurely } from '../utils/secure-browser-launcher.js';
 import { coreEvents } from '../utils/events.js';
 import { debugLogger } from '../utils/debugLogger.js';
@@ -64,7 +65,7 @@ const createMockConfig = (overrides: Partial<Config> = {}): Config =>
  ({
    isInFallbackMode: vi.fn(() => false),
    setFallbackMode: vi.fn(),
-    isModelAvailabilityServiceEnabled: vi.fn(() => false),
+    isModelAvailabilityServiceEnabled: vi.fn(() => true),
    isPreviewModelFallbackMode: vi.fn(() => false),
    setPreviewModelFallbackMode: vi.fn(),
    isPreviewModelBypassMode: vi.fn(() => false),
@@ -78,6 +79,7 @@ const createMockConfig = (overrides: Partial<Config> = {}): Config =>
        skipped: [],
      }),
    ),
+    getActiveModel: vi.fn(() => MOCK_PRO_MODEL),
    getModel: vi.fn(() => MOCK_PRO_MODEL),
    getPreviewFeatures: vi.fn(() => false),
    getUserTier: vi.fn(() => undefined),
@@ -113,430 +115,6 @@ describe('handleFallback', () => {
    fallbackEventSpy.mockRestore();
  });

-  it('should return null immediately if authType is not OAuth', async () => {
-    const result = await handleFallback(
-      mockConfig,
-      MOCK_PRO_MODEL,
-      AUTH_API_KEY,
-    );
-    expect(result).toBeNull();
-    expect(mockHandler).not.toHaveBeenCalled();
-    expect(mockConfig.setFallbackMode).not.toHaveBeenCalled();
-  });
-
-  it('should still consult the handler if the failed model is the fallback model', async () => {
-    mockHandler.mockResolvedValue('stop');
-    const result = await handleFallback(
-      mockConfig,
-      FALLBACK_MODEL, // Failed model is Flash
-      AUTH_OAUTH,
-    );
-    expect(result).toBe(false);
-    expect(mockHandler).toHaveBeenCalled();
-  });
-
-  it('should return null if no fallbackHandler is injected in config', async () => {
-    const configWithoutHandler = createMockConfig({
-      fallbackModelHandler: undefined,
-    });
-    const result = await handleFallback(
-      configWithoutHandler,
-      MOCK_PRO_MODEL,
-      AUTH_OAUTH,
-    );
-    expect(result).toBeNull();
-  });
-
-  describe('when handler returns "retry_always"', () => {
-    it('should activate fallback mode, log telemetry, and return true', async () => {
-      mockHandler.mockResolvedValue('retry_always');
-
-      const result = await handleFallback(
-        mockConfig,
-        MOCK_PRO_MODEL,
-        AUTH_OAUTH,
-      );
-
-      expect(result).toBe(true);
-      expect(mockConfig.setFallbackMode).toHaveBeenCalledWith(true);
-      expect(logFlashFallback).toHaveBeenCalled();
-    });
-  });
-
-  describe('when handler returns "stop"', () => {
-    it('should activate fallback mode, log telemetry, and return false', async () => {
-      mockHandler.mockResolvedValue('stop');
-
-      const result = await handleFallback(
-        mockConfig,
-        MOCK_PRO_MODEL,
-        AUTH_OAUTH,
-      );
-
-      expect(result).toBe(false);
-      expect(mockConfig.setFallbackMode).toHaveBeenCalledWith(true);
-      expect(logFlashFallback).toHaveBeenCalled();
-    });
-  });
-
-  it('should return false without toggling fallback when handler returns "retry_later"', async () => {
-    mockHandler.mockResolvedValue('retry_later');
-
-    const result = await handleFallback(mockConfig, MOCK_PRO_MODEL, AUTH_OAUTH);
-
-    expect(result).toBe(false);
-    expect(mockConfig.setFallbackMode).not.toHaveBeenCalled();
-    expect(logFlashFallback).not.toHaveBeenCalled();
-    expect(fallbackEventSpy).not.toHaveBeenCalled();
-  });
-
-  it('should launch upgrade flow and avoid fallback mode when handler returns "upgrade"', async () => {
-    mockHandler.mockResolvedValue('upgrade');
-    vi.mocked(openBrowserSecurely).mockResolvedValue(undefined);
-
-    const result = await handleFallback(mockConfig, MOCK_PRO_MODEL, AUTH_OAUTH);
-
-    expect(result).toBe(false);
-    expect(openBrowserSecurely).toHaveBeenCalledWith(
-      'https://goo.gle/set-up-gemini-code-assist',
-    );
-    expect(mockConfig.setFallbackMode).not.toHaveBeenCalled();
-    expect(logFlashFallback).not.toHaveBeenCalled();
-    expect(fallbackEventSpy).not.toHaveBeenCalled();
-  });
-
-  it('should log a warning and continue when upgrade flow fails to open a browser', async () => {
-    mockHandler.mockResolvedValue('upgrade');
-    const debugWarnSpy = vi.spyOn(debugLogger, 'warn');
-    const consoleWarnSpy = vi
-      .spyOn(console, 'warn')
-      .mockImplementation(() => {});
-    vi.mocked(openBrowserSecurely).mockRejectedValue(new Error('blocked'));
-
-    const result = await handleFallback(mockConfig, MOCK_PRO_MODEL, AUTH_OAUTH);
-
-    expect(result).toBe(false);
-    expect(debugWarnSpy).toHaveBeenCalledWith(
-      'Failed to open browser automatically:',
-      'blocked',
-    );
-    expect(mockConfig.setFallbackMode).not.toHaveBeenCalled();
-    expect(fallbackEventSpy).not.toHaveBeenCalled();
-    debugWarnSpy.mockRestore();
-    consoleWarnSpy.mockRestore();
-  });
-
-  describe('when handler returns an unexpected value', () => {
-    it('should log an error and return null', async () => {
-      mockHandler.mockResolvedValue(null);
-
-      const result = await handleFallback(
-        mockConfig,
-        MOCK_PRO_MODEL,
-        AUTH_OAUTH,
-      );
-
-      expect(result).toBeNull();
-      expect(consoleErrorSpy).toHaveBeenCalledWith(
-        'Fallback UI handler failed:',
-        new Error(
-          'Unexpected fallback intent received from fallbackModelHandler: "null"',
-        ),
-      );
-      expect(mockConfig.setFallbackMode).not.toHaveBeenCalled();
-    });
-  });
-
-  it('should pass the correct context (failedModel, fallbackModel, error) to the handler', async () => {
-    const mockError = new Error('Quota Exceeded');
-    mockHandler.mockResolvedValue('retry_always');
-
-    await handleFallback(mockConfig, MOCK_PRO_MODEL, AUTH_OAUTH, mockError);
-
-    expect(mockHandler).toHaveBeenCalledWith(
-      MOCK_PRO_MODEL,
-      FALLBACK_MODEL,
-      mockError,
-    );
-  });
-
-  it('should not call setFallbackMode or log telemetry if already in fallback mode', async () => {
-    // Setup config where fallback mode is already active
-    const activeFallbackConfig = createMockConfig({
-      fallbackModelHandler: mockHandler,
-      isInFallbackMode: vi.fn(() => true), // Already active
-      setFallbackMode: vi.fn(),
-    });
-
-    mockHandler.mockResolvedValue('retry_always');
-
-    const result = await handleFallback(
-      activeFallbackConfig,
-      MOCK_PRO_MODEL,
-      AUTH_OAUTH,
-    );
-
-    // Should still return true to allow the retry (which will use the active fallback mode)
-    expect(result).toBe(true);
-    // Should still consult the handler
-    expect(mockHandler).toHaveBeenCalled();
-    // But should not mutate state or log telemetry again
-    expect(activeFallbackConfig.setFallbackMode).not.toHaveBeenCalled();
-    expect(logFlashFallback).not.toHaveBeenCalled();
-  });
-
-  it('should catch errors from the handler, log an error, and return null', async () => {
-    const handlerError = new Error('UI interaction failed');
-    mockHandler.mockRejectedValue(handlerError);
-
-    const result = await handleFallback(mockConfig, MOCK_PRO_MODEL, AUTH_OAUTH);
-
-    expect(result).toBeNull();
-    expect(consoleErrorSpy).toHaveBeenCalledWith(
-      'Fallback UI handler failed:',
-      handlerError,
-    );
-    expect(mockConfig.setFallbackMode).not.toHaveBeenCalled();
-  });
-
-  describe('Preview Model Fallback Logic', () => {
-    const previewModel = PREVIEW_GEMINI_MODEL;
-
-    it('should only set Preview Model bypass mode on retryable quota failure', async () => {
-      const mockGoogleApiError = {
-        code: 429,
-        message: 'mock error',
-        details: [],
-      };
-      const retryableQuotaError = new RetryableQuotaError(
-        'Capacity error',
-        mockGoogleApiError,
-        5,
-      );
-      await handleFallback(
-        mockConfig,
-        previewModel,
-        AUTH_OAUTH,
-        retryableQuotaError,
-      );
-      expect(mockConfig.setPreviewModelBypassMode).toHaveBeenCalledWith(true);
-    });
-
-    it('should not set Preview Model bypass mode on non-retryable quota failure', async () => {
-      const mockGoogleApiError = {
-        code: 429,
-        message: 'mock error',
-        details: [],
-      };
-      const terminalQuotaError = new TerminalQuotaError(
-        'quota error',
-        mockGoogleApiError,
-        5,
-      );
-      await handleFallback(
-        mockConfig,
-        previewModel,
-        AUTH_OAUTH,
-        terminalQuotaError,
-      );
-
-      expect(mockConfig.setPreviewModelBypassMode).not.toHaveBeenCalled();
-    });
-
-    it('should silently retry if Preview Model fallback mode is already active and error is retryable error', async () => {
-      vi.spyOn(mockConfig, 'isPreviewModelFallbackMode').mockReturnValue(true);
-      const mockGoogleApiError = {
-        code: 429,
-        message: 'mock error',
-        details: [],
-      };
-      const retryableQuotaError = new RetryableQuotaError(
-        'Capacity error',
-        mockGoogleApiError,
-        5,
-      );
-      const result = await handleFallback(
-        mockConfig,
-        previewModel,
-        AUTH_OAUTH,
-        retryableQuotaError,
-      );
-
-      expect(result).toBe(true);
-      expect(mockHandler).not.toHaveBeenCalled();
-    });
-
-    it('should activate Preview Model fallback mode when handler returns "retry_always" and is RetryableQuotaError', async () => {
-      mockHandler.mockResolvedValue('retry_always');
-      const mockGoogleApiError = {
-        code: 429,
-        message: 'mock error',
-        details: [],
-      };
-      const retryableQuotaError = new RetryableQuotaError(
-        'Capacity error',
-        mockGoogleApiError,
-        5,
-      );
-      const result = await handleFallback(
-        mockConfig,
-        previewModel,
-        AUTH_OAUTH,
-        retryableQuotaError,
-      );
-
-      expect(result).toBe(true);
-      expect(mockConfig.setPreviewModelBypassMode).toHaveBeenCalledWith(true);
-      expect(mockConfig.setPreviewModelFallbackMode).toHaveBeenCalledWith(true);
-    });
-
-    it('should activate regular fallback when handler returns "retry_always" and is TerminalQuotaError', async () => {
-      mockHandler.mockResolvedValue('retry_always');
-      const mockGoogleApiError = {
-        code: 503,
-        message: 'mock error',
-        details: [],
-      };
-      const terminalError = new TerminalQuotaError(
-        'Quota error',
-        mockGoogleApiError,
-        5,
-      );
-      const result = await handleFallback(
-        mockConfig,
-        previewModel,
-        AUTH_OAUTH,
-        terminalError,
-      );
-
-      expect(result).toBe(true);
-      expect(mockConfig.setPreviewModelFallbackMode).not.toBeCalled();
-      expect(mockConfig.setFallbackMode).toHaveBeenCalledWith(true);
-    });
-
-    it('should NOT set fallback mode if user chooses "retry_once"', async () => {
-      const mockGoogleApiError = {
-        code: 429,
-        message: 'mock error',
-        details: [],
-      };
-      const terminalQuotaError = new TerminalQuotaError(
-        'quota error',
-        mockGoogleApiError,
-        5,
-      );
-      mockHandler.mockResolvedValue('retry_once');
-
-      const result = await handleFallback(
-        mockConfig,
-        PREVIEW_GEMINI_MODEL,
-        AuthType.LOGIN_WITH_GOOGLE,
-        terminalQuotaError,
-      );
-
-      expect(result).toBe(true);
-      expect(mockConfig.setPreviewModelBypassMode).not.toHaveBeenCalled();
-      expect(mockConfig.setPreviewModelFallbackMode).not.toHaveBeenCalled();
-      expect(mockConfig.setFallbackMode).not.toHaveBeenCalled();
-    });
-
-    it('should pass DEFAULT_GEMINI_MODEL as fallback when Preview Model fails with Retryable Error', async () => {
-      const mockFallbackHandler = vi.fn().mockResolvedValue('stop');
-      vi.mocked(mockConfig.fallbackModelHandler!).mockImplementation(
-        mockFallbackHandler,
-      );
-      const mockGoogleApiError = {
-        code: 429,
-        message: 'mock error',
-        details: [],
-      };
-      const retryableQuotaError = new RetryableQuotaError(
-        'Capacity error',
-        mockGoogleApiError,
-        5,
-      );
-
-      await handleFallback(
-        mockConfig,
-        PREVIEW_GEMINI_MODEL,
-        AuthType.LOGIN_WITH_GOOGLE,
-        retryableQuotaError,
-      );
-
-      expect(mockConfig.fallbackModelHandler).toHaveBeenCalledWith(
-        PREVIEW_GEMINI_MODEL,
-        DEFAULT_GEMINI_MODEL,
-        retryableQuotaError,
-      );
-    });
-
-    it('should pass DEFAULT_GEMINI_MODEL as fallback when Preview Model fails with other error', async () => {
-      await handleFallback(
-        mockConfig,
-        PREVIEW_GEMINI_MODEL,
-        AuthType.LOGIN_WITH_GOOGLE,
-      );
-
-      expect(mockConfig.fallbackModelHandler).toHaveBeenCalledWith(
-        PREVIEW_GEMINI_MODEL,
-        DEFAULT_GEMINI_MODEL,
-        undefined,
-      );
-    });
-
-    it('should pass DEFAULT_GEMINI_FLASH_MODEL as fallback when Preview Model fails with other error', async () => {
-      const mockGoogleApiError = {
-        code: 429,
-        message: 'mock error',
-        details: [],
-      };
-      const terminalQuotaError = new TerminalQuotaError(
-        'quota error',
-        mockGoogleApiError,
-        5,
-      );
-      await handleFallback(
-        mockConfig,
-        PREVIEW_GEMINI_MODEL,
-        AuthType.LOGIN_WITH_GOOGLE,
-        terminalQuotaError,
-      );
-
-      expect(mockConfig.fallbackModelHandler).toHaveBeenCalledWith(
-        PREVIEW_GEMINI_MODEL,
-        DEFAULT_GEMINI_FLASH_MODEL,
-        terminalQuotaError,
-      );
-    });
-  });
-
-  it('should return null if ModelNotFoundError occurs for a non-preview model', async () => {
-    const modelNotFoundError = new ModelNotFoundError('Not found');
-    const result = await handleFallback(
-      mockConfig,
-      DEFAULT_GEMINI_MODEL, // Not preview model
-      AUTH_OAUTH,
-      modelNotFoundError,
-    );
-    expect(result).toBeNull();
-    expect(mockHandler).not.toHaveBeenCalled();
-  });
-
-  it('should consult handler if ModelNotFoundError occurs for preview model', async () => {
-    const modelNotFoundError = new ModelNotFoundError('Not found');
-    mockHandler.mockResolvedValue('retry_always');
-
-    const result = await handleFallback(
-      mockConfig,
-      PREVIEW_GEMINI_MODEL,
-      AUTH_OAUTH,
-      modelNotFoundError,
-    );
-
-    expect(result).toBe(true);
-    expect(mockHandler).toHaveBeenCalled();
-  });
-
  describe('policy-driven flow', () => {
    let policyConfig: Config;
    let availability: ModelAvailabilityService;
@@ -550,31 +128,47 @@ describe('handleFallback', () => {
      });
      policyHandler = vi.fn().mockResolvedValue('retry_once');
      policyConfig = createMockConfig();
-      vi.spyOn(
-        policyConfig,
-        'isModelAvailabilityServiceEnabled',
-      ).mockReturnValue(true);
-      vi.spyOn(policyConfig, 'getModelAvailabilityService').mockReturnValue(
+
+      // Ensure we test the availability path
+      vi.mocked(policyConfig.isModelAvailabilityServiceEnabled).mockReturnValue(
+        true,
+      );
+      vi.mocked(policyConfig.getModelAvailabilityService).mockReturnValue(
        availability,
      );
-      vi.spyOn(policyConfig, 'getFallbackModelHandler').mockReturnValue(
+      vi.mocked(policyConfig.getFallbackModelHandler).mockReturnValue(
        policyHandler,
      );
    });

+    it('should return null immediately if authType is not OAuth', async () => {
+      const result = await handleFallback(
+        policyConfig,
+        MOCK_PRO_MODEL,
+        AUTH_API_KEY,
+      );
+      expect(result).toBeNull();
+      expect(policyHandler).not.toHaveBeenCalled();
+    });
+
    it('uses availability selection with correct candidates when enabled', async () => {
-      vi.spyOn(policyConfig, 'getPreviewFeatures').mockReturnValue(true);
-      vi.spyOn(policyConfig, 'getModel').mockReturnValue(DEFAULT_GEMINI_MODEL);
+      // Direct mock manipulation since it's already a vi.fn()
+      vi.mocked(policyConfig.getPreviewFeatures).mockReturnValue(true);
+      vi.mocked(policyConfig.getModel).mockReturnValue(
+        DEFAULT_GEMINI_MODEL_AUTO,
+      );

      await handleFallback(policyConfig, DEFAULT_GEMINI_MODEL, AUTH_OAUTH);

      expect(availability.selectFirstAvailable).toHaveBeenCalledWith([
        DEFAULT_GEMINI_FLASH_MODEL,
-        PREVIEW_GEMINI_MODEL,
      ]);
    });

    it('falls back to last resort when availability returns null', async () => {
+      vi.mocked(policyConfig.getModel).mockReturnValue(
+        DEFAULT_GEMINI_MODEL_AUTO,
+      );
      availability.selectFirstAvailable = vi
        .fn()
        .mockReturnValue({ selectedModel: null, skipped: [] });
@@ -634,9 +228,12 @@ describe('handleFallback', () => {
      }
    });

-    it('wraps around to upgrade candidates if the current model was selected mid-chain (e.g. by router)', async () => {
+    it('does not wrap around to upgrade candidates if the current model was selected at the end (e.g. by router)', async () => {
      // Last-resort failure (Flash) in [Preview, Pro, Flash] checks Preview then Pro (all upstream).
-      vi.spyOn(policyConfig, 'getPreviewFeatures').mockReturnValue(true);
+      vi.mocked(policyConfig.getPreviewFeatures).mockReturnValue(true);
+      vi.mocked(policyConfig.getModel).mockReturnValue(
+        DEFAULT_GEMINI_MODEL_AUTO,
+      );

      availability.selectFirstAvailable = vi.fn().mockReturnValue({
        selectedModel: MOCK_PRO_MODEL,
@@ -650,43 +247,27 @@ describe('handleFallback', () => {
        AUTH_OAUTH,
      );

-      expect(availability.selectFirstAvailable).toHaveBeenCalledWith([
-        PREVIEW_GEMINI_MODEL,
-        MOCK_PRO_MODEL,
-      ]);
+      expect(availability.selectFirstAvailable).not.toHaveBeenCalled();
      expect(policyHandler).toHaveBeenCalledWith(
        DEFAULT_GEMINI_FLASH_MODEL,
-        MOCK_PRO_MODEL,
+        DEFAULT_GEMINI_FLASH_MODEL,
        undefined,
      );
    });

-    it('logs and returns null when handler resolves to null', async () => {
-      policyHandler.mockResolvedValue(null);
-      const debugLoggerErrorSpy = vi.spyOn(debugLogger, 'error');
-      const result = await handleFallback(
-        policyConfig,
-        MOCK_PRO_MODEL,
-        AUTH_OAUTH,
-      );
-
-      expect(result).toBeNull();
-      expect(debugLoggerErrorSpy).toHaveBeenCalledWith(
-        'Fallback handler failed:',
-        new Error(
-          'Unexpected fallback intent received from fallbackModelHandler: "null"',
-        ),
-      );
-      debugLoggerErrorSpy.mockRestore();
-    });
-
    it('successfully follows expected availability response for Preview Chain', async () => {
-      availability.selectFirstAvailable = vi
-        .fn()
-        .mockReturnValue({ selectedModel: DEFAULT_GEMINI_MODEL, skipped: [] });
+      availability.selectFirstAvailable = vi.fn().mockReturnValue({
+        selectedModel: PREVIEW_GEMINI_FLASH_MODEL,
+        skipped: [],
+      });
      policyHandler.mockResolvedValue('retry_once');
-      vi.spyOn(policyConfig, 'getPreviewFeatures').mockReturnValue(true);
-      vi.spyOn(policyConfig, 'getModel').mockReturnValue(PREVIEW_GEMINI_MODEL);
+      vi.mocked(policyConfig.getPreviewFeatures).mockReturnValue(true);
+      vi.mocked(policyConfig.getActiveModel).mockReturnValue(
+        PREVIEW_GEMINI_MODEL,
+      );
+      vi.mocked(policyConfig.getModel).mockReturnValue(
+        PREVIEW_GEMINI_MODEL_AUTO,
+      );

      const result = await handleFallback(
        policyConfig,
@@ -696,21 +277,112 @@ describe('handleFallback', () => {

      expect(result).toBe(true);
      expect(availability.selectFirstAvailable).toHaveBeenCalledWith([
-        DEFAULT_GEMINI_MODEL,
-        DEFAULT_GEMINI_FLASH_MODEL,
+        PREVIEW_GEMINI_FLASH_MODEL,
      ]);
-      expect(policyHandler).toHaveBeenCalledWith(
-        PREVIEW_GEMINI_MODEL,
-        DEFAULT_GEMINI_MODEL,
-        undefined,
+    });
+
+    it('should launch upgrade flow and avoid fallback mode when handler returns "upgrade"', async () => {
+      policyHandler.mockResolvedValue('upgrade');
+      vi.mocked(openBrowserSecurely).mockResolvedValue(undefined);
+
+      const result = await handleFallback(
+        policyConfig,
+        MOCK_PRO_MODEL,
+        AUTH_OAUTH,
+      );
+
+      expect(result).toBe(false);
+      expect(openBrowserSecurely).toHaveBeenCalledWith(
+        'https://goo.gle/set-up-gemini-code-assist',
+      );
+      expect(policyConfig.setActiveModel).not.toHaveBeenCalled();
+    });
+
+    it('should catch errors from the handler, log an error, and return null', async () => {
+      const handlerError = new Error('UI interaction failed');
+      policyHandler.mockRejectedValue(handlerError);
+
+      const result = await handleFallback(
+        policyConfig,
+        MOCK_PRO_MODEL,
+        AUTH_OAUTH,
+      );
+
+      expect(result).toBeNull();
+      expect(debugLogger.error).toHaveBeenCalledWith(
+        'Fallback handler failed:',
+        handlerError,
      );
    });

-    it('short-circuits when the failed model is the last-resort policy AND candidates are unavailable', async () => {
+    it('should pass TerminalQuotaError (429) correctly to the handler', async () => {
+      const mockGoogleApiError = {
+        code: 429,
+        message: 'mock error',
+        details: [],
+      };
+      const terminalError = new TerminalQuotaError(
+        'Quota error',
+        mockGoogleApiError,
+        5,
+      );
+      policyHandler.mockResolvedValue('retry_always');
+      vi.mocked(policyConfig.getModel).mockReturnValue(
+        DEFAULT_GEMINI_MODEL_AUTO,
+      );
+
+      await handleFallback(
+        policyConfig,
+        MOCK_PRO_MODEL,
+        AUTH_OAUTH,
+        terminalError,
+      );
+
+      expect(policyHandler).toHaveBeenCalledWith(
+        MOCK_PRO_MODEL,
+        DEFAULT_GEMINI_FLASH_MODEL,
+        terminalError,
+      );
+    });
+
+    it('should pass RetryableQuotaError correctly to the handler', async () => {
+      const mockGoogleApiError = {
+        code: 503,
+        message: 'mock error',
+        details: [],
+      };
+      const retryableError = new RetryableQuotaError(
+        'Service unavailable',
+        mockGoogleApiError,
+        1000,
+      );
+      policyHandler.mockResolvedValue('retry_once');
+      vi.mocked(policyConfig.getModel).mockReturnValue(
+        DEFAULT_GEMINI_MODEL_AUTO,
+      );
+
+      await handleFallback(
+        policyConfig,
+        MOCK_PRO_MODEL,
+        AUTH_OAUTH,
+        retryableError,
+      );
+
+      expect(policyHandler).toHaveBeenCalledWith(
+        MOCK_PRO_MODEL,
+        DEFAULT_GEMINI_FLASH_MODEL,
+        retryableError,
+      );
+    });
+
+    it('Call the handler with fallback model same as the failed model when the failed model is the last-resort policy', async () => {
      // Ensure short-circuit when wrapping to an unavailable upstream model.
      availability.selectFirstAvailable = vi
        .fn()
        .mockReturnValue({ selectedModel: null, skipped: [] });
+      vi.mocked(policyConfig.getModel).mockReturnValue(
+        DEFAULT_GEMINI_MODEL_AUTO,
+      );

      const result = await handleFallback(
        policyConfig,
@@ -718,14 +390,21 @@ describe('handleFallback', () => {
        AUTH_OAUTH,
      );

-      expect(result).toBeNull();
-      // Service called to check upstream; no UI handler since nothing selected.
-      expect(policyConfig.getModelAvailabilityService).toHaveBeenCalled();
-      expect(policyConfig.getFallbackModelHandler).not.toHaveBeenCalled();
+      policyHandler.mockResolvedValue('retry_once');
+
+      expect(result).not.toBeNull();
+      expect(policyHandler).toHaveBeenCalledWith(
+        DEFAULT_GEMINI_FLASH_MODEL,
+        DEFAULT_GEMINI_FLASH_MODEL,
+        undefined,
+      );
    });

    it('calls setActiveModel and logs telemetry when handler returns "retry_always"', async () => {
      policyHandler.mockResolvedValue('retry_always');
+      vi.mocked(policyConfig.getModel).mockReturnValue(
+        DEFAULT_GEMINI_MODEL_AUTO,
+      );

      const result = await handleFallback(
        policyConfig,
@@ -739,7 +418,7 @@ describe('handleFallback', () => {
      // TODO: add logging expect statement
    });

-    it('calls setActiveModel when handler returns "stop"', async () => {
+    it('does NOT call setActiveModel when handler returns "stop"', async () => {
      policyHandler.mockResolvedValue('stop');

      const result = await handleFallback(
@@ -749,8 +428,21 @@ describe('handleFallback', () => {
      );

      expect(result).toBe(false);
-      expect(policyConfig.setActiveModel).toHaveBeenCalledWith(FALLBACK_MODEL);
+      expect(policyConfig.setActiveModel).not.toHaveBeenCalled();
      // TODO: add logging expect statement
    });
+
+    it('does NOT call setActiveModel when handler returns "retry_once"', async () => {
+      policyHandler.mockResolvedValue('retry_once');
+
+      const result = await handleFallback(
+        policyConfig,
+        MOCK_PRO_MODEL,
+        AUTH_OAUTH,
+      );
+
+      expect(result).toBe(true);
+      expect(policyConfig.setActiveModel).not.toHaveBeenCalled();
+    });
  });
 });
--- a/packages/core/src/fallback/handler.ts
+++ b/packages/core/src/fallback/handler.ts
@@ -6,24 +6,16 @@

 import type { Config } from '../config/config.js';
 import { AuthType } from '../core/contentGenerator.js';
-import {
-  DEFAULT_GEMINI_FLASH_MODEL,
-  DEFAULT_GEMINI_MODEL,
-  PREVIEW_GEMINI_MODEL,
-} from '../config/models.js';
-import { logFlashFallback, FlashFallbackEvent } from '../telemetry/index.js';
 import { openBrowserSecurely } from '../utils/secure-browser-launcher.js';
 import { debugLogger } from '../utils/debugLogger.js';
 import { getErrorMessage } from '../utils/errors.js';
-import { ModelNotFoundError } from '../utils/httpErrors.js';
-import { TerminalQuotaError } from '../utils/googleQuotaErrors.js';
-import { coreEvents } from '../utils/events.js';
 import type { FallbackIntent, FallbackRecommendation } from './types.js';
 import { classifyFailureKind } from '../availability/errorClassification.js';
 import {
  buildFallbackPolicyContext,
  resolvePolicyChain,
  resolvePolicyAction,
+  applyAvailabilityTransition,
 } from '../availability/policyHelpers.js';

 const UPGRADE_URL_PAGE = 'https://goo.gle/set-up-gemini-code-assist';
@@ -34,75 +26,7 @@ export async function handleFallback(
  authType?: string,
  error?: unknown,
 ): Promise<string | boolean | null> {
-  if (config.isModelAvailabilityServiceEnabled()) {
-    return handlePolicyDrivenFallback(config, failedModel, authType, error);
-  }
-  return legacyHandleFallback(config, failedModel, authType, error);
-}
-
-/**
- * Old fallback logic relying on hard coded strings
- */
-async function legacyHandleFallback(
-  config: Config,
-  failedModel: string,
-  authType?: string,
-  error?: unknown,
-): Promise<string | boolean | null> {
-  if (authType !== AuthType.LOGIN_WITH_GOOGLE) return null;
-
-  // Guardrail: If it's a ModelNotFoundError but NOT the preview model, do not handle it.
-  if (
-    error instanceof ModelNotFoundError &&
-    failedModel !== PREVIEW_GEMINI_MODEL
-  ) {
-    return null;
-  }
-  const shouldActivatePreviewFallback =
-    failedModel === PREVIEW_GEMINI_MODEL &&
-    !(error instanceof TerminalQuotaError);
-  // Preview Model Specific Logic
-  if (shouldActivatePreviewFallback) {
-    // Always set bypass mode for the immediate retry, for non-TerminalQuotaErrors.
-    // This ensures the next attempt uses 2.5 Pro.
-    config.setPreviewModelBypassMode(true);
-
-    // If we are already in Preview Model fallback mode (user previously said "Always"),
-    // we silently retry (which will use 2.5 Pro due to bypass mode).
-    if (config.isPreviewModelFallbackMode()) {
-      return true;
-    }
-  }
-
-  const fallbackModel = shouldActivatePreviewFallback
-    ? DEFAULT_GEMINI_MODEL
-    : DEFAULT_GEMINI_FLASH_MODEL;
-
-  // Consult UI Handler for Intent
-  const fallbackModelHandler = config.fallbackModelHandler;
-  if (typeof fallbackModelHandler !== 'function') return null;
-
-  try {
-    // Pass the specific failed model to the UI handler.
-    const intent = await fallbackModelHandler(
-      failedModel,
-      fallbackModel,
-      error,
-    );
-
-    // Process Intent and Update State
-    return await processIntent(
-      config,
-      intent,
-      failedModel,
-      fallbackModel,
-      authType,
-      error,
-    );
-  } catch (handlerError) {
-    console.error('Fallback UI handler failed:', handlerError);
-    return null;
-  }
+  return handlePolicyDrivenFallback(config, failedModel, authType, error);
 }

 /**
@@ -125,50 +49,56 @@ async function handlePolicyDrivenFallback(
  );

  const failureKind = classifyFailureKind(error);
-
-  if (!candidates.length) {
-    return null;
-  }
-
  const availability = config.getModelAvailabilityService();
-  const selection = availability.selectFirstAvailable(
-    candidates.map((policy) => policy.model),
-  );
-
-  const lastResortPolicy = candidates.find((policy) => policy.isLastResort);
-  const fallbackModel = selection.selectedModel ?? lastResortPolicy?.model;
-  const selectedPolicy = candidates.find(
-    (policy) => policy.model === fallbackModel,
-  );
-
-  if (!fallbackModel || fallbackModel === failedModel || !selectedPolicy) {
-    return null;
-  }
-
-  // failureKind is already declared and calculated above
-  const action = resolvePolicyAction(failureKind, selectedPolicy);
-
-  if (action === 'silent') {
-    return processIntent(
-      config,
-      'retry_always',
-      failedModel,
-      fallbackModel,
-      authType,
-      error,
-    );
-  }
-
-  // This will be used in the future when FallbackRecommendation is passed through UI
-  const recommendation: FallbackRecommendation = {
-    ...selection,
-    selectedModel: fallbackModel,
-    action,
-    failureKind,
-    failedPolicy,
-    selectedPolicy,
+  const getAvailabilityContext = () => {
+    if (!failedPolicy) return undefined;
+    return { service: availability, policy: failedPolicy };
  };
-  void recommendation;
+
+  let fallbackModel: string;
+  if (!candidates.length) {
+    fallbackModel = failedModel;
+  } else {
+    const selection = availability.selectFirstAvailable(
+      candidates.map((policy) => policy.model),
+    );
+
+    const lastResortPolicy = candidates.find((policy) => policy.isLastResort);
+    const selectedFallbackModel =
+      selection.selectedModel ?? lastResortPolicy?.model;
+    const selectedPolicy = candidates.find(
+      (policy) => policy.model === selectedFallbackModel,
+    );
+
+    if (
+      !selectedFallbackModel ||
+      selectedFallbackModel === failedModel ||
+      !selectedPolicy
+    ) {
+      return null;
+    }
+
+    fallbackModel = selectedFallbackModel;
+
+    // failureKind is already declared and calculated above
+    const action = resolvePolicyAction(failureKind, selectedPolicy);
+
+    if (action === 'silent') {
+      applyAvailabilityTransition(getAvailabilityContext, failureKind);
+      return processIntent(config, 'retry_always', fallbackModel);
+    }
+
+    // This will be used in the future when FallbackRecommendation is passed through UI
+    const recommendation: FallbackRecommendation = {
+      ...selection,
+      selectedModel: fallbackModel,
+      action,
+      failureKind,
+      failedPolicy,
+      selectedPolicy,
+    };
+    void recommendation;
+  }

  const handler = config.getFallbackModelHandler();
  if (typeof handler !== 'function') {
@@ -177,14 +107,16 @@ async function handlePolicyDrivenFallback(

  try {
    const intent = await handler(failedModel, fallbackModel, error);
-    return await processIntent(
-      config,
-      intent,
-      failedModel,
-      fallbackModel,
-      authType,
-      error, // Pass the error so processIntent can handle preview-specific logic
-    );
+
+    // If the user chose to switch/retry, we apply the availability transition
+    // to the failed model (e.g. marking it terminal if it had a quota error).
+    // We DO NOT apply it if the user chose 'stop' or 'retry_later', allowing
+    // them to try again later with the same model state.
+    if (intent === 'retry_always' || intent === 'retry_once') {
+      applyAvailabilityTransition(getAvailabilityContext, failureKind);
+    }
+
+    return await processIntent(config, intent, fallbackModel);
  } catch (handlerError) {
    debugLogger.error('Fallback handler failed:', handlerError);
    return null;
@@ -205,47 +137,23 @@ async function handleUpgrade() {
 async function processIntent(
  config: Config,
  intent: FallbackIntent | null,
-  failedModel: string,
  fallbackModel: string,
-  authType?: string,
-  error?: unknown,
 ): Promise<boolean> {
-  const isAvailabilityEnabled = config.isModelAvailabilityServiceEnabled();
-
  switch (intent) {
    case 'retry_always':
-      if (isAvailabilityEnabled) {
-        // TODO(telemetry): Implement generic fallback event logging. Existing
-        // logFlashFallback is specific to a single Model.
-        config.setActiveModel(fallbackModel);
-      } else {
-        // If the error is non-retryable, e.g. TerminalQuota Error, trigger a regular fallback to flash.
-        // For all other errors, activate previewModel fallback.
-        if (
-          failedModel === PREVIEW_GEMINI_MODEL &&
-          !(error instanceof TerminalQuotaError)
-        ) {
-          activatePreviewModelFallbackMode(config);
-        } else {
-          activateFallbackMode(config, authType);
-        }
-      }
+      // TODO(telemetry): Implement generic fallback event logging. Existing
+      // logFlashFallback is specific to a single Model.
+      config.setActiveModel(fallbackModel);
      return true;

    case 'retry_once':
-      if (isAvailabilityEnabled) {
-        config.setActiveModel(fallbackModel);
-      }
+      // For distinct retry (retry_once), we do NOT set the active model permanently.
+      // The FallbackStrategy will handle routing to the available model for this turn
+      // based on the availability service state (which is updated before this).
      return true;

    case 'stop':
-      if (isAvailabilityEnabled) {
-        // TODO(telemetry): Implement generic fallback event logging. Existing
-        // logFlashFallback is specific to a single Model.
-        config.setActiveModel(fallbackModel);
-      } else {
-        activateFallbackMode(config, authType);
-      }
+      // Do not switch model on stop. User wants to stay on current model (and stop).
      return false;

    case 'retry_later':
@@ -261,20 +169,3 @@ async function processIntent(
      );
  }
 }
-
-function activateFallbackMode(config: Config, authType: string | undefined) {
-  if (!config.isInFallbackMode()) {
-    config.setFallbackMode(true);
-    coreEvents.emitFallbackModeChanged(true);
-    if (authType) {
-      logFlashFallback(config, new FlashFallbackEvent(authType));
-    }
-  }
-}
-
-function activatePreviewModelFallbackMode(config: Config) {
-  if (!config.isPreviewModelFallbackMode()) {
-    config.setPreviewModelFallbackMode(true);
-    // We might want a specific event for Preview Model fallback, but for now we just set the mode.
-  }
-}
--- a/packages/core/src/routing/modelRouterService.test.ts
+++ b/packages/core/src/routing/modelRouterService.test.ts
@@ -7,10 +7,7 @@
 import { describe, it, expect, vi, beforeEach } from 'vitest';
 import { ModelRouterService } from './modelRouterService.js';
 import { Config } from '../config/config.js';
-import {
-  PREVIEW_GEMINI_MODEL,
-  DEFAULT_GEMINI_MODEL,
-} from '../config/models.js';
+
 import type { BaseLlmClient } from '../core/baseLlmClient.js';
 import type { RoutingContext, RoutingDecision } from './routingStrategy.js';
 import { DefaultStrategy } from './strategies/defaultStrategy.js';
@@ -151,81 +148,5 @@ describe('ModelRouterService', () => {
        expect.any(ModelRoutingEvent),
      );
    });
-
-    it('should upgrade to Preview Model when preview features are enabled and model is 2.5 Pro', async () => {
-      vi.spyOn(mockCompositeStrategy, 'route').mockResolvedValue({
-        model: DEFAULT_GEMINI_MODEL,
-        metadata: { source: 'test', latencyMs: 0, reasoning: 'test' },
-      });
-      vi.spyOn(mockConfig, 'getPreviewFeatures').mockReturnValue(true);
-      vi.spyOn(mockConfig, 'isPreviewModelFallbackMode').mockReturnValue(false);
-
-      const decision = await service.route(mockContext);
-
-      expect(decision.model).toBe(PREVIEW_GEMINI_MODEL);
-    });
-
-    it('should NOT upgrade to Preview Model when preview features are disabled', async () => {
-      vi.spyOn(mockCompositeStrategy, 'route').mockResolvedValue({
-        model: DEFAULT_GEMINI_MODEL,
-        metadata: { source: 'test', latencyMs: 0, reasoning: 'test' },
-      });
-      vi.spyOn(mockConfig, 'getPreviewFeatures').mockReturnValue(false);
-
-      const decision = await service.route(mockContext);
-
-      expect(decision.model).toBe(DEFAULT_GEMINI_MODEL);
-    });
-
-    it('should upgrade to Preview Model when preview features are enabled and model is explicitly set to Pro', async () => {
-      // Simulate OverrideStrategy returning Preview Model (as resolveModel would do for "pro")
-      vi.spyOn(mockCompositeStrategy, 'route').mockResolvedValue({
-        model: PREVIEW_GEMINI_MODEL,
-        metadata: {
-          source: 'override',
-          latencyMs: 0,
-          reasoning: 'User selected',
-        },
-      });
-      vi.spyOn(mockConfig, 'getPreviewFeatures').mockReturnValue(true);
-      vi.spyOn(mockConfig, 'isPreviewModelFallbackMode').mockReturnValue(false);
-
-      const decision = await service.route(mockContext);
-
-      expect(decision.model).toBe(PREVIEW_GEMINI_MODEL);
-    });
-
-    it('should NOT upgrade to Preview Model when preview features are enabled and model is explicitly set to a specific string', async () => {
-      // Simulate OverrideStrategy returning a specific model (e.g. "gemini-2.5-pro")
-      // This happens when user explicitly sets model to "gemini-2.5-pro" instead of "pro"
-      vi.spyOn(mockCompositeStrategy, 'route').mockResolvedValue({
-        model: DEFAULT_GEMINI_MODEL,
-        metadata: {
-          source: 'override',
-          latencyMs: 0,
-          reasoning: 'User selected',
-        },
-      });
-      vi.spyOn(mockConfig, 'getPreviewFeatures').mockReturnValue(true);
-      vi.spyOn(mockConfig, 'isPreviewModelFallbackMode').mockReturnValue(false);
-
-      const decision = await service.route(mockContext);
-
-      // Should NOT upgrade to Preview Model because source is 'override' and model is specific
-      expect(decision.model).toBe(DEFAULT_GEMINI_MODEL);
-    });
-
-    it('should upgrade to Preview Model even if fallback mode is active (probing behavior)', async () => {
-      vi.spyOn(mockCompositeStrategy, 'route').mockResolvedValue({
-        model: DEFAULT_GEMINI_MODEL,
-        metadata: { source: 'default', latencyMs: 0, reasoning: 'Default' },
-      });
-      vi.spyOn(mockConfig, 'getPreviewFeatures').mockReturnValue(true);
-      vi.spyOn(mockConfig, 'isPreviewModelFallbackMode').mockReturnValue(true);
-
-      const decision = await service.route(mockContext);
-
-      expect(decision.model).toBe(PREVIEW_GEMINI_MODEL);
-    });
  });
 });
--- a/packages/core/src/routing/modelRouterService.ts
+++ b/packages/core/src/routing/modelRouterService.ts
@@ -5,10 +5,6 @@
 */

 import type { Config } from '../config/config.js';
-import {
-  PREVIEW_GEMINI_MODEL,
-  DEFAULT_GEMINI_MODEL,
-} from '../config/models.js';
 import type {
  RoutingContext,
  RoutingDecision,
@@ -66,23 +62,6 @@ export class ModelRouterService {
        this.config.getBaseLlmClient(),
      );

-      // Unified Preview Model Logic:
-      // If the decision is to use 'gemini-2.5-pro' and preview features are enabled,
-      // we attempt to upgrade to 'gemini-3.0-pro' (Preview Model).
-      if (
-        decision.model === DEFAULT_GEMINI_MODEL &&
-        this.config.getPreviewFeatures() &&
-        !decision.metadata.source.includes('override')
-      ) {
-        // We ALWAYS attempt to upgrade to Preview Model here.
-        // If we are in fallback mode, the 'previewModelBypassMode' flag (handled in handler.ts/geminiChat.ts)
-        // will ensure we downgrade to 2.5 Pro for the actual API call if needed.
-        // This allows us to "probe" Preview Model periodically (i.e., every new request tries Preview Model first).
-        decision.model = PREVIEW_GEMINI_MODEL;
-        decision.metadata.source += ' (Preview Model)';
-        decision.metadata.reasoning += ' (Upgraded to Preview Model)';
-      }
-
      const event = new ModelRoutingEvent(
        decision.model,
        decision.metadata.source,
--- a/packages/core/src/routing/strategies/classifierStrategy.test.ts
+++ b/packages/core/src/routing/strategies/classifierStrategy.test.ts
@@ -16,6 +16,7 @@ import {
 import {
  DEFAULT_GEMINI_FLASH_MODEL,
  DEFAULT_GEMINI_MODEL,
+  DEFAULT_GEMINI_MODEL_AUTO,
 } from '../../config/models.js';
 import { promptIdContext } from '../../utils/promptIdContext.js';
 import type { Content } from '@google/genai';
@@ -50,6 +51,7 @@ describe('ClassifierStrategy', () => {
      modelConfigService: {
        getResolvedConfig: vi.fn().mockReturnValue(mockResolvedConfig),
      },
+      getModel: () => DEFAULT_GEMINI_MODEL_AUTO,
      getPreviewFeatures: () => false,
    } as unknown as Config;
    mockBaseLlmClient = {
--- a/packages/core/src/routing/strategies/classifierStrategy.ts
+++ b/packages/core/src/routing/strategies/classifierStrategy.ts
@@ -12,11 +12,7 @@ import type {
  RoutingDecision,
  RoutingStrategy,
 } from '../routingStrategy.js';
-import {
-  GEMINI_MODEL_ALIAS_FLASH,
-  GEMINI_MODEL_ALIAS_PRO,
-  resolveModel,
-} from '../../config/models.js';
+import { resolveClassifierModel } from '../../config/models.js';
 import { createUserContent, Type } from '@google/genai';
 import type { Config } from '../../config/config.js';
 import {
@@ -171,32 +167,20 @@ export class ClassifierStrategy implements RoutingStrategy {

      const reasoning = routerResponse.reasoning;
      const latencyMs = Date.now() - startTime;
+      const selectedModel = resolveClassifierModel(
+        config.getModel(),
+        routerResponse.model_choice,
+        config.getPreviewFeatures(),
+      );

-      if (routerResponse.model_choice === FLASH_MODEL) {
-        return {
-          model: resolveModel(
-            GEMINI_MODEL_ALIAS_FLASH,
-            config.getPreviewFeatures(),
-          ),
-          metadata: {
-            source: 'Classifier',
-            latencyMs,
-            reasoning,
-          },
-        };
-      } else {
-        return {
-          model: resolveModel(
-            GEMINI_MODEL_ALIAS_PRO,
-            config.getPreviewFeatures(),
-          ),
-          metadata: {
-            source: 'Classifier',
-            reasoning,
-            latencyMs,
-          },
-        };
-      }
+      return {
+        model: selectedModel,
+        metadata: {
+          source: 'Classifier',
+          latencyMs,
+          reasoning,
+        },
+      };
    } catch (error) {
      // If the classifier fails for any reason (API error, parsing error, etc.),
      // we log it and return null to allow the composite strategy to proceed.
--- a/packages/core/src/routing/strategies/fallbackStrategy.test.ts
+++ b/packages/core/src/routing/strategies/fallbackStrategy.test.ts
@@ -4,87 +4,118 @@
 * SPDX-License-Identifier: Apache-2.0
 */

-import { describe, it, expect } from 'vitest';
+import { describe, it, expect, vi, beforeEach } from 'vitest';
 import { FallbackStrategy } from './fallbackStrategy.js';
 import type { RoutingContext } from '../routingStrategy.js';
 import type { BaseLlmClient } from '../../core/baseLlmClient.js';
 import type { Config } from '../../config/config.js';
+import type { ModelAvailabilityService } from '../../availability/modelAvailabilityService.js';
 import {
  DEFAULT_GEMINI_MODEL,
  DEFAULT_GEMINI_FLASH_MODEL,
-  DEFAULT_GEMINI_FLASH_LITE_MODEL,
+  DEFAULT_GEMINI_MODEL_AUTO,
 } from '../../config/models.js';
+import { selectModelForAvailability } from '../../availability/policyHelpers.js';
+
+vi.mock('../../availability/policyHelpers.js', () => ({
+  selectModelForAvailability: vi.fn(),
+}));
+
+const createMockConfig = (overrides: Partial<Config> = {}): Config =>
+  ({
+    isModelAvailabilityServiceEnabled: vi.fn().mockReturnValue(true),
+    getModelAvailabilityService: vi.fn(),
+    getModel: vi.fn().mockReturnValue(DEFAULT_GEMINI_MODEL),
+    getPreviewFeatures: vi.fn().mockReturnValue(false),
+    ...overrides,
+  }) as unknown as Config;

 describe('FallbackStrategy', () => {
  const strategy = new FallbackStrategy();
  const mockContext = {} as RoutingContext;
  const mockClient = {} as BaseLlmClient;
+  let mockService: ModelAvailabilityService;
+  let mockConfig: Config;

-  it('should return null when not in fallback mode', async () => {
-    const mockConfig = {
-      isInFallbackMode: () => false,
-      getModel: () => DEFAULT_GEMINI_MODEL,
-      getPreviewFeatures: () => false,
-    } as Config;
+  beforeEach(() => {
+    vi.resetAllMocks();
+
+    mockService = {
+      snapshot: vi.fn(),
+    } as unknown as ModelAvailabilityService;
+
+    mockConfig = createMockConfig({
+      getModelAvailabilityService: vi.fn().mockReturnValue(mockService),
+    });
+  });
+
+  it('should return null if service is disabled', async () => {
+    vi.mocked(mockConfig.isModelAvailabilityServiceEnabled).mockReturnValue(
+      false,
+    );

    const decision = await strategy.route(mockContext, mockConfig, mockClient);
    expect(decision).toBeNull();
  });

-  describe('when in fallback mode', () => {
-    it('should downgrade a pro model to the flash model', async () => {
-      const mockConfig = {
-        isInFallbackMode: () => true,
-        getModel: () => DEFAULT_GEMINI_MODEL,
-        getPreviewFeatures: () => false,
-      } as Config;
+  it('should return null if the requested model is available', async () => {
+    // Mock snapshot to return available
+    vi.mocked(mockService.snapshot).mockReturnValue({ available: true });

-      const decision = await strategy.route(
-        mockContext,
-        mockConfig,
-        mockClient,
-      );
+    const decision = await strategy.route(mockContext, mockConfig, mockClient);
+    expect(decision).toBeNull();
+    // Should check availability of the resolved model (DEFAULT_GEMINI_MODEL)
+    expect(mockService.snapshot).toHaveBeenCalledWith(DEFAULT_GEMINI_MODEL);
+  });

-      expect(decision).not.toBeNull();
-      expect(decision?.model).toBe(DEFAULT_GEMINI_FLASH_MODEL);
-      expect(decision?.metadata.source).toBe('fallback');
-      expect(decision?.metadata.reasoning).toContain('In fallback mode');
+  it('should return null if fallback selection is same as requested model', async () => {
+    // Mock snapshot to return unavailable
+    vi.mocked(mockService.snapshot).mockReturnValue({
+      available: false,
+      reason: 'quota',
+    });
+    // Mock selectModelForAvailability to return the SAME model (no fallback found)
+    vi.mocked(selectModelForAvailability).mockReturnValue({
+      selectedModel: DEFAULT_GEMINI_MODEL,
+      skipped: [],
    });

-    it('should honor a lite model request', async () => {
-      const mockConfig = {
-        isInFallbackMode: () => true,
-        getModel: () => DEFAULT_GEMINI_FLASH_LITE_MODEL,
-        getPreviewFeatures: () => false,
-      } as Config;
+    const decision = await strategy.route(mockContext, mockConfig, mockClient);
+    expect(decision).toBeNull();
+  });

-      const decision = await strategy.route(
-        mockContext,
-        mockConfig,
-        mockClient,
-      );
-
-      expect(decision).not.toBeNull();
-      expect(decision?.model).toBe(DEFAULT_GEMINI_FLASH_LITE_MODEL);
-      expect(decision?.metadata.source).toBe('fallback');
+  it('should return fallback decision if model is unavailable and fallback found', async () => {
+    // Mock snapshot to return unavailable
+    vi.mocked(mockService.snapshot).mockReturnValue({
+      available: false,
+      reason: 'quota',
    });

-    it('should use the flash model if flash is requested', async () => {
-      const mockConfig = {
-        isInFallbackMode: () => true,
-        getModel: () => DEFAULT_GEMINI_FLASH_MODEL,
-        getPreviewFeatures: () => false,
-      } as Config;
-
-      const decision = await strategy.route(
-        mockContext,
-        mockConfig,
-        mockClient,
-      );
-
-      expect(decision).not.toBeNull();
-      expect(decision?.model).toBe(DEFAULT_GEMINI_FLASH_MODEL);
-      expect(decision?.metadata.source).toBe('fallback');
+    // Mock selectModelForAvailability to find a fallback (Flash)
+    vi.mocked(selectModelForAvailability).mockReturnValue({
+      selectedModel: DEFAULT_GEMINI_FLASH_MODEL,
+      skipped: [{ model: DEFAULT_GEMINI_MODEL, reason: 'quota' }],
    });
+
+    const decision = await strategy.route(mockContext, mockConfig, mockClient);
+
+    expect(decision).not.toBeNull();
+    expect(decision?.model).toBe(DEFAULT_GEMINI_FLASH_MODEL);
+    expect(decision?.metadata.source).toBe('fallback');
+    expect(decision?.metadata.reasoning).toContain(
+      `Model ${DEFAULT_GEMINI_MODEL} is unavailable`,
+    );
+  });
+
+  it('should correctly handle "auto" alias by resolving it before checking availability', async () => {
+    // Mock snapshot to return available for the RESOLVED model
+    vi.mocked(mockService.snapshot).mockReturnValue({ available: true });
+    vi.mocked(mockConfig.getModel).mockReturnValue(DEFAULT_GEMINI_MODEL_AUTO);
+
+    const decision = await strategy.route(mockContext, mockConfig, mockClient);
+
+    expect(decision).toBeNull();
+    // Important: check that it queried snapshot with the RESOLVED model, not 'auto'
+    expect(mockService.snapshot).toHaveBeenCalledWith(DEFAULT_GEMINI_MODEL);
  });
 });
--- a/packages/core/src/routing/strategies/fallbackStrategy.ts
+++ b/packages/core/src/routing/strategies/fallbackStrategy.ts
@@ -4,8 +4,9 @@
 * SPDX-License-Identifier: Apache-2.0
 */

+import { selectModelForAvailability } from '../../availability/policyHelpers.js';
 import type { Config } from '../../config/config.js';
-import { getEffectiveModel } from '../../config/models.js';
+import { resolveModel } from '../../config/models.js';
 import type { BaseLlmClient } from '../../core/baseLlmClient.js';
 import type {
  RoutingContext,
@@ -21,24 +22,38 @@ export class FallbackStrategy implements RoutingStrategy {
    config: Config,
    _baseLlmClient: BaseLlmClient,
  ): Promise<RoutingDecision | null> {
-    const isInFallbackMode: boolean = config.isInFallbackMode();
-
-    if (!isInFallbackMode) {
+    if (!config.isModelAvailabilityServiceEnabled()) {
      return null;
    }

-    const effectiveModel = getEffectiveModel(
-      isInFallbackMode,
-      config.getModel(),
+    const requestedModel = config.getModel();
+    const resolvedModel = resolveModel(
+      requestedModel,
      config.getPreviewFeatures(),
    );
-    return {
-      model: effectiveModel,
-      metadata: {
-        source: this.name,
-        latencyMs: 0,
-        reasoning: `In fallback mode. Using: ${effectiveModel}`,
-      },
-    };
+    const service = config.getModelAvailabilityService();
+    const snapshot = service.snapshot(resolvedModel);
+
+    if (snapshot.available) {
+      return null;
+    }
+
+    const selection = selectModelForAvailability(config, requestedModel);
+
+    if (
+      selection?.selectedModel &&
+      selection.selectedModel !== requestedModel
+    ) {
+      return {
+        model: selection.selectedModel,
+        metadata: {
+          source: this.name,
+          latencyMs: 0,
+          reasoning: `Model ${requestedModel} is unavailable (${snapshot.reason}). Using fallback: ${selection.selectedModel}`,
+        },
+      };
+    }
+
+    return null;
  }
 }
--- a/packages/core/src/routing/strategies/overrideStrategy.ts
+++ b/packages/core/src/routing/strategies/overrideStrategy.ts
@@ -7,7 +7,8 @@
 import type { Config } from '../../config/config.js';
 import {
  DEFAULT_GEMINI_MODEL_AUTO,
-  resolveModel,
+  getEffectiveModel,
+  PREVIEW_GEMINI_MODEL_AUTO,
 } from '../../config/models.js';
 import type { BaseLlmClient } from '../../core/baseLlmClient.js';
 import type {
@@ -30,11 +31,15 @@ export class OverrideStrategy implements RoutingStrategy {
    const overrideModel = config.getModel();

    // If the model is 'auto' we should pass to the next strategy.
-    if (overrideModel === DEFAULT_GEMINI_MODEL_AUTO) return null;
+    if (
+      overrideModel === DEFAULT_GEMINI_MODEL_AUTO ||
+      overrideModel === PREVIEW_GEMINI_MODEL_AUTO
+    )
+      return null;

    // Return the overridden model name.
    return {
-      model: resolveModel(overrideModel, config.getPreviewFeatures()),
+      model: getEffectiveModel(overrideModel, config.getPreviewFeatures()),
      metadata: {
        source: this.name,
        latencyMs: 0,
--- a/packages/core/src/services/chatCompressionService.ts
+++ b/packages/core/src/services/chatCompressionService.ts
@@ -20,6 +20,7 @@ import {
  DEFAULT_GEMINI_FLASH_MODEL,
  DEFAULT_GEMINI_MODEL,
  PREVIEW_GEMINI_MODEL,
+  PREVIEW_GEMINI_FLASH_MODEL,
 } from '../config/models.js';
 import { firePreCompressHook } from '../core/sessionHookTriggers.js';
 import { PreCompressTrigger } from '../hooks/types.js';
@@ -88,6 +89,8 @@ export function modelStringToModelConfigAlias(model: string): string {
  switch (model) {
    case PREVIEW_GEMINI_MODEL:
      return 'chat-compression-3-pro';
+    case PREVIEW_GEMINI_FLASH_MODEL:
+      return 'chat-compression-3-flash';
    case DEFAULT_GEMINI_MODEL:
      return 'chat-compression-2.5-pro';
    case DEFAULT_GEMINI_FLASH_MODEL:
--- a/packages/core/src/services/test-data/resolved-aliases-retry.golden.json
+++ b/packages/core/src/services/test-data/resolved-aliases-retry.golden.json
@@ -49,6 +49,18 @@
      "topK": 64
    }
  },
+  "gemini-3-flash-preview": {
+    "model": "gemini-3-flash-preview",
+    "generateContentConfig": {
+      "temperature": 1,
+      "topP": 0.95,
+      "thinkingConfig": {
+        "includeThoughts": true,
+        "thinkingLevel": "HIGH"
+      },
+      "topK": 64
+    }
+  },
  "gemini-2.5-pro": {
    "model": "gemini-2.5-pro",
    "generateContentConfig": {
@@ -203,6 +215,10 @@
    "model": "gemini-3-pro-preview",
    "generateContentConfig": {}
  },
+  "chat-compression-3-flash": {
+    "model": "gemini-3-flash-preview",
+    "generateContentConfig": {}
+  },
  "chat-compression-2.5-pro": {
    "model": "gemini-2.5-pro",
    "generateContentConfig": {}
--- a/packages/core/src/services/test-data/resolved-aliases.golden.json
+++ b/packages/core/src/services/test-data/resolved-aliases.golden.json
@@ -49,6 +49,18 @@
      "topK": 64
    }
  },
+  "gemini-3-flash-preview": {
+    "model": "gemini-3-flash-preview",
+    "generateContentConfig": {
+      "temperature": 1,
+      "topP": 0.95,
+      "thinkingConfig": {
+        "includeThoughts": true,
+        "thinkingLevel": "HIGH"
+      },
+      "topK": 64
+    }
+  },
  "gemini-2.5-pro": {
    "model": "gemini-2.5-pro",
    "generateContentConfig": {
@@ -203,6 +215,10 @@
    "model": "gemini-3-pro-preview",
    "generateContentConfig": {}
  },
+  "chat-compression-3-flash": {
+    "model": "gemini-3-flash-preview",
+    "generateContentConfig": {}
+  },
  "chat-compression-2.5-pro": {
    "model": "gemini-2.5-pro",
    "generateContentConfig": {}
--- a/packages/core/src/utils/flashFallback.test.ts
+++ b/packages/core/src/utils/flashFallback.test.ts
@@ -124,7 +124,7 @@ describe('Retry Utility Fallback Integration', () => {
    });

    await expect(promise).rejects.toThrow('Daily limit');
-    expect(fallbackCallback).not.toHaveBeenCalled();
+    expect(fallbackCallback).toHaveBeenCalledTimes(1);
    expect(mockApiCall).toHaveBeenCalledTimes(1);
  });

--- a/packages/core/src/utils/retry.test.ts
+++ b/packages/core/src/utils/retry.test.ts
@@ -464,7 +464,7 @@ describe('retryWithBackoff', () => {
    });

    it.each([[AuthType.USE_GEMINI], [AuthType.USE_VERTEX_AI], [undefined]])(
-      'should not trigger fallback for non-Google auth users (authType: %s) on TerminalQuotaError',
+      'should invoke onPersistent429 callback (delegating decision) for non-Google auth users (authType: %s) on TerminalQuotaError',
      async (authType) => {
        const fallbackCallback = vi.fn();
        const mockFn = vi.fn().mockImplementation(async () => {
@@ -478,7 +478,7 @@ describe('retryWithBackoff', () => {
        });

        await expect(promise).rejects.toThrow('Daily limit reached');
-        expect(fallbackCallback).not.toHaveBeenCalled();
+        expect(fallbackCallback).toHaveBeenCalled();
        expect(mockFn).toHaveBeenCalledTimes(1);
      },
    );
@@ -629,20 +629,10 @@ describe('retryWithBackoff', () => {
      ).rejects.toThrow(TerminalQuotaError);

      // Verify failures
-      expect(mockService.markTerminal).toHaveBeenCalledWith('model-1', 'quota');
-      expect(mockService.markTerminal).toHaveBeenCalledWith('model-2', 'quota');
+      expect(mockService.markTerminal).not.toHaveBeenCalled();
+      expect(mockService.markTerminal).not.toHaveBeenCalled();

      // Verify sequences
-      expect(mockService.markTerminal).toHaveBeenNthCalledWith(
-        1,
-        'model-1',
-        'quota',
-      );
-      expect(mockService.markTerminal).toHaveBeenNthCalledWith(
-        2,
-        'model-2',
-        'quota',
-      );
    });

    it('marks sticky_retry after retries are exhausted for transient failures', async () => {
@@ -671,8 +661,8 @@ describe('retryWithBackoff', () => {
      expect(result).toBe(transientError);

      expect(fn).toHaveBeenCalledTimes(3);
-      expect(mockService.markRetryOncePerTurn).toHaveBeenCalledWith('model-1');
-      expect(mockService.markRetryOncePerTurn).toHaveBeenCalledTimes(1);
+      expect(mockService.markRetryOncePerTurn).not.toHaveBeenCalled();
+      expect(mockService.markRetryOncePerTurn).not.toHaveBeenCalled();
      expect(mockService.markTerminal).not.toHaveBeenCalled();
    });

@@ -710,29 +700,7 @@ describe('retryWithBackoff', () => {
        maxAttempts: 1,
        getAvailabilityContext: getContext,
      }).catch(() => {});
-      expect(mockService.markTerminal).toHaveBeenCalledWith('model-1', 'quota');
-
-      // Run for notFoundError
-      await retryWithBackoff(fn, {
-        maxAttempts: 1,
-        getAvailabilityContext: getContext,
-      }).catch(() => {});
-      expect(mockService.markTerminal).toHaveBeenCalledWith(
-        'model-1',
-        'capacity',
-      );
-
-      // Run for genericError
-      await retryWithBackoff(fn, {
-        maxAttempts: 1,
-        getAvailabilityContext: getContext,
-      }).catch(() => {});
-      expect(mockService.markTerminal).toHaveBeenCalledWith(
-        'model-1',
-        'capacity',
-      );
-
-      expect(mockService.markTerminal).toHaveBeenCalledTimes(3);
+      expect(mockService.markTerminal).not.toHaveBeenCalled();
    });
  });
 });
--- a/packages/core/src/utils/retry.ts
+++ b/packages/core/src/utils/retry.ts
@@ -6,7 +6,6 @@

 import type { GenerateContentResponse } from '@google/genai';
 import { ApiError } from '@google/genai';
-import { AuthType } from '../core/contentGenerator.js';
 import {
  TerminalQuotaError,
  RetryableQuotaError,
@@ -16,8 +15,6 @@ import { delay, createAbortError } from './delay.js';
 import { debugLogger } from './debugLogger.js';
 import { getErrorStatus, ModelNotFoundError } from './httpErrors.js';
 import type { RetryAvailabilityContext } from '../availability/modelPolicy.js';
-import { classifyFailureKind } from '../availability/errorClassification.js';
-import { applyAvailabilityTransition } from '../availability/policyHelpers.js';

 export type { RetryAvailabilityContext };

@@ -192,12 +189,6 @@ export async function retryWithBackoff<T>(
      }

      const classifiedError = classifyGoogleError(error);
-      const failureKind = classifyFailureKind(classifiedError);
-      const appliedImmediate =
-        failureKind === 'terminal' || failureKind === 'not_found';
-      if (appliedImmediate) {
-        applyAvailabilityTransition(getAvailabilityContext, failureKind);
-      }

      const errorCode = getErrorStatus(error);

@@ -205,7 +196,7 @@ export async function retryWithBackoff<T>(
        classifiedError instanceof TerminalQuotaError ||
        classifiedError instanceof ModelNotFoundError
      ) {
-        if (onPersistent429 && authType === AuthType.LOGIN_WITH_GOOGLE) {
+        if (onPersistent429) {
          try {
            const fallbackModel = await onPersistent429(
              authType,
@@ -229,7 +220,7 @@ export async function retryWithBackoff<T>(

      if (classifiedError instanceof RetryableQuotaError || is500) {
        if (attempt >= maxAttempts) {
-          if (onPersistent429 && authType === AuthType.LOGIN_WITH_GOOGLE) {
+          if (onPersistent429) {
            try {
              const fallbackModel = await onPersistent429(
                authType,
@@ -244,9 +235,6 @@ export async function retryWithBackoff<T>(
              console.warn('Model fallback failed:', fallbackError);
            }
          }
-          if (!appliedImmediate) {
-            applyAvailabilityTransition(getAvailabilityContext, failureKind);
-          }
          throw classifiedError instanceof RetryableQuotaError
            ? classifiedError
            : error;
@@ -276,9 +264,6 @@ export async function retryWithBackoff<T>(
        attempt >= maxAttempts ||
        !shouldRetryOnError(error as Error, retryFetchErrors)
      ) {
-        if (!appliedImmediate) {
-          applyAvailabilityTransition(getAvailabilityContext, failureKind);
-        }
        throw error;
      }