/** * Unified LLM Call Layer * * All LLM interactions should go through callLLM / streamLLM. */ import { generateText, streamText } from 'ai'; import type { GenerateTextResult, StreamTextResult } from 'ai'; import { createLogger } from '@/lib/logger'; import { PROVIDERS } from './providers'; import { thinkingContext } from './thinking-context'; import { getModelMetadataKey } from './model-metadata'; import type { ThinkingCapability, ThinkingConfig } from '@/lib/types/provider'; import { getThinkingMode, pickThinkingBudget, pickThinkingEffort, pickThinkingLevel, } from '@/lib/ai/thinking-config'; const log = createLogger('LLM'); // Re-export for external use export type { ThinkingConfig } from '@/lib/types/provider'; // Re-export the parameter types accepted by AI SDK type GenerateTextParams = Parameters[0]; type StreamTextParams = Parameters[0]; function _extractRequestInfo(params: GenerateTextParams | StreamTextParams) { const tools = params.tools ? Object.keys(params.tools as Record) : undefined; const p = params as Record; return { system: p.system as string | undefined, prompt: p.prompt as string | undefined, messages: p.messages as unknown[] | undefined, tools, maxOutputTokens: p.maxOutputTokens as number | undefined, }; } function getModelId(params: GenerateTextParams | StreamTextParams): string { const m = params.model; if (typeof m === 'string') return m; if (m && typeof m === 'object' && 'modelId' in m) return (m as { modelId: string }).modelId; return 'unknown'; } // --------------------------------------------------------------------------- // Thinking / Reasoning Adapter // // Builds a lookup table from PROVIDERS at module load time, then uses it to // map a unified ThinkingConfig into provider-specific providerOptions. // Native providers (OpenAI/Anthropic/Google) are mapped to providerOptions. // OpenAI-compatible providers are injected by the providers.ts fetch wrapper. // --------------------------------------------------------------------------- interface ModelThinkingInfo { thinking?: ThinkingCapability; } /** Provider/model → thinking capability (built once at module load) */ const MODEL_THINKING_MAP: Map = (() => { const map = new Map(); for (const provider of Object.values(PROVIDERS)) { for (const model of provider.models) { map.set(getModelMetadataKey(provider.id, model.id), { thinking: model.capabilities?.thinking, }); } } return map; })(); /** Model ID → thinking capability for IDs that are unique across providers. */ const UNIQUE_MODEL_THINKING_MAP: Map = (() => { const counts = new Map(); for (const provider of Object.values(PROVIDERS)) { for (const model of provider.models) { counts.set(model.id, (counts.get(model.id) ?? 0) + 1); } } const map = new Map(); for (const provider of Object.values(PROVIDERS)) { for (const model of provider.models) { if (counts.get(model.id) === 1) { map.set(model.id, { thinking: model.capabilities?.thinking, }); } } } return map; })(); /** Global thinking override from environment variable */ function getGlobalThinkingConfig(): ThinkingConfig | undefined { if (import.meta.env?.VITE_LLM_THINKING_DISABLED === 'true') { return { mode: 'disabled', enabled: false }; } return undefined; } type ProviderOptions = Record>; function getAnthropicEffort( thinking: ThinkingCapability, config: ThinkingConfig, ): 'low' | 'medium' | 'high' | 'xhigh' | 'max' | undefined { const effort = pickThinkingEffort(thinking, config); if (!effort || effort === 'none' || effort === 'minimal') return undefined; return effort; } function getModelProviderId(params: GenerateTextParams | StreamTextParams): string | undefined { const m = params.model; if (!m || typeof m !== 'object' || !('provider' in m)) return undefined; const provider = (m as { provider?: string }).provider; if (!provider) return undefined; if (provider in PROVIDERS) return provider; const prefix = provider.split('.')[0]; return prefix in PROVIDERS ? prefix : undefined; } /** * Map a unified ThinkingConfig to provider-specific providerOptions. */ function buildThinkingProviderOptions( providerId: string | undefined, modelId: string, config: ThinkingConfig, ): ProviderOptions | undefined { const info = providerId ? MODEL_THINKING_MAP.get(getModelMetadataKey(providerId, modelId)) : UNIQUE_MODEL_THINKING_MAP.get(modelId); if (!info?.thinking) return undefined; // model has no thinking capability const thinking = info.thinking; if (thinking.control === 'none') return undefined; const mode = getThinkingMode(config); switch (thinking.requestAdapter) { case 'openai': { const effort = pickThinkingEffort(thinking, config); return effort ? { openai: { reasoningEffort: effort } } : undefined; } case 'anthropic': { if (mode === 'disabled') return { anthropic: { thinking: { type: 'disabled' } } }; if (thinking.control === 'toggle-budget' || thinking.control === 'budget-only') { const budget = pickThinkingBudget(thinking, config); return budget === undefined ? undefined : { anthropic: { thinking: { type: 'enabled', budgetTokens: budget } } }; } const effort = getAnthropicEffort(thinking, config); if (!effort) return undefined; if (thinking.anthropicThinking?.type === 'adaptive') { return { anthropic: { thinking: { type: 'adaptive' }, effort, }, }; } const manualEffort = effort === 'xhigh' ? 'max' : effort; const budget = thinking.anthropicThinking?.budgetByEffort?.[manualEffort]; if (!budget) return undefined; return { anthropic: { thinking: { type: 'enabled', budgetTokens: budget }, effort: manualEffort, }, }; } case 'google': { if (thinking.control === 'level') { const level = pickThinkingLevel(thinking, config); return level ? { google: { thinkingConfig: { thinkingLevel: level } } } : undefined; } const budget = pickThinkingBudget(thinking, config); if (budget === undefined) return undefined; return { google: { thinkingConfig: { thinkingBudget: budget } } }; } default: // OpenAI-compatible providers are injected in providers.ts fetch wrapper. return undefined; } } /** * Inject provider-specific thinking options into LLM call params. * * For native providers (OpenAI/Anthropic/Google), this sets providerOptions. * For OpenAI-compatible providers, providerOptions won't work (stripped by * zod schema) — those are handled by the custom fetch wrapper via thinkingContext. * * Priority: caller's providerOptions > ThinkingConfig */ function injectProviderOptions( params: T, thinking?: ThinkingConfig, ): T { if ((params as Record).providerOptions) return params; // caller explicitly set providerOptions const modelId = getModelId(params); const providerId = getModelProviderId(params); if (thinking) { const opts = buildThinkingProviderOptions(providerId, modelId, thinking); if (opts) return { ...params, providerOptions: opts }; } return params; } /** * Options for LLM call retry on validation failure. * This is separate from the AI SDK's built-in maxRetries (which handles network/5xx errors). */ export interface LLMRetryOptions { /** Max retry attempts when validate() fails or the response is empty (default: 0 = no retry) */ retries?: number; /** Custom validation function. Return true to accept the result, false to retry. * Default: checks that response text is non-empty. */ validate?: (text: string) => boolean; } const DEFAULT_VALIDATE = (text: string) => text.trim().length > 0; /** * Unified wrapper around `generateText`. * * @param params - Same parameters as AI SDK's `generateText` * @param source - A short label for log grouping (e.g. 'scene-stream', 'pbl-chat') * @param retryOptions - Optional retry-on-validation-failure settings * @param thinking - Optional per-call thinking config (overrides global LLM_THINKING_DISABLED) */ export async function callLLM( params: T, source: string, retryOptions?: LLMRetryOptions, thinking?: ThinkingConfig, // eslint-disable-next-line @typescript-eslint/no-explicit-any ): Promise> { const maxAttempts = (retryOptions?.retries ?? 0) + 1; const validate = retryOptions?.validate ?? (maxAttempts > 1 ? DEFAULT_VALIDATE : undefined); // eslint-disable-next-line @typescript-eslint/no-explicit-any let lastResult: GenerateTextResult | undefined; let lastError: unknown; for (let attempt = 1; attempt <= maxAttempts; attempt++) { try { // Resolve effective thinking config: per-call > global env > undefined const effectiveThinking = thinking ?? getGlobalThinkingConfig(); const injectedParams = injectProviderOptions(params, effectiveThinking); // Wrap in thinkingContext so the custom fetch wrapper in providers.ts // can read the config and inject vendor-specific body params for // OpenAI-compatible providers. const result = await thinkingContext.run(effectiveThinking, () => generateText(injectedParams), ); // Validate result (only when retries are configured) if (validate && !validate(result.text)) { log.warn( `[${source}] Validation failed (attempt ${attempt}/${maxAttempts}), ${attempt < maxAttempts ? 'retrying...' : 'giving up'}`, ); lastResult = result; continue; } return result; } catch (error) { lastError = error; if (attempt < maxAttempts) { log.warn(`[${source}] Call failed (attempt ${attempt}/${maxAttempts}), retrying...`, error); continue; } } } // All attempts exhausted — return last result or throw last error if (lastResult) return lastResult; throw lastError; } /** * Unified wrapper around `streamText`. * * Returns the same StreamTextResult. * * @param params - Same parameters as AI SDK's `streamText` * @param source - A short label for log grouping * @param thinking - Optional per-call thinking config (overrides global LLM_THINKING_DISABLED) */ export function streamLLM( params: T, source: string, thinking?: ThinkingConfig, // eslint-disable-next-line @typescript-eslint/no-explicit-any ): StreamTextResult { // Resolve effective thinking config and wrap in thinkingContext const effectiveThinking = thinking ?? getGlobalThinkingConfig(); const injectedParams = injectProviderOptions(params, effectiveThinking); const result = thinkingContext.run(effectiveThinking, () => streamText(injectedParams)); return result; }