OpenMAIC-React / src /lib /ai /llm.ts
muthuk1's picture
Convert OpenMAIC from Next.js to React (Vite)
f56a29b verified
raw
history blame
11.3 kB
/**
* Unified LLM Call Layer
*
* All LLM interactions should go through callLLM / streamLLM.
*/
import { generateText, streamText } from 'ai';
import type { GenerateTextResult, StreamTextResult } from 'ai';
import { createLogger } from '@/lib/logger';
import { PROVIDERS } from './providers';
import { thinkingContext } from './thinking-context';
import { getModelMetadataKey } from './model-metadata';
import type { ThinkingCapability, ThinkingConfig } from '@/lib/types/provider';
import {
getThinkingMode,
pickThinkingBudget,
pickThinkingEffort,
pickThinkingLevel,
} from '@/lib/ai/thinking-config';
const log = createLogger('LLM');
// Re-export for external use
export type { ThinkingConfig } from '@/lib/types/provider';
// Re-export the parameter types accepted by AI SDK
type GenerateTextParams = Parameters<typeof generateText>[0];
type StreamTextParams = Parameters<typeof streamText>[0];
function _extractRequestInfo(params: GenerateTextParams | StreamTextParams) {
const tools = params.tools ? Object.keys(params.tools as Record<string, unknown>) : undefined;
const p = params as Record<string, unknown>;
return {
system: p.system as string | undefined,
prompt: p.prompt as string | undefined,
messages: p.messages as unknown[] | undefined,
tools,
maxOutputTokens: p.maxOutputTokens as number | undefined,
};
}
function getModelId(params: GenerateTextParams | StreamTextParams): string {
const m = params.model;
if (typeof m === 'string') return m;
if (m && typeof m === 'object' && 'modelId' in m) return (m as { modelId: string }).modelId;
return 'unknown';
}
// ---------------------------------------------------------------------------
// Thinking / Reasoning Adapter
//
// Builds a lookup table from PROVIDERS at module load time, then uses it to
// map a unified ThinkingConfig into provider-specific providerOptions.
// Native providers (OpenAI/Anthropic/Google) are mapped to providerOptions.
// OpenAI-compatible providers are injected by the providers.ts fetch wrapper.
// ---------------------------------------------------------------------------
interface ModelThinkingInfo {
thinking?: ThinkingCapability;
}
/** Provider/model → thinking capability (built once at module load) */
const MODEL_THINKING_MAP: Map<string, ModelThinkingInfo> = (() => {
const map = new Map<string, ModelThinkingInfo>();
for (const provider of Object.values(PROVIDERS)) {
for (const model of provider.models) {
map.set(getModelMetadataKey(provider.id, model.id), {
thinking: model.capabilities?.thinking,
});
}
}
return map;
})();
/** Model ID → thinking capability for IDs that are unique across providers. */
const UNIQUE_MODEL_THINKING_MAP: Map<string, ModelThinkingInfo> = (() => {
const counts = new Map<string, number>();
for (const provider of Object.values(PROVIDERS)) {
for (const model of provider.models) {
counts.set(model.id, (counts.get(model.id) ?? 0) + 1);
}
}
const map = new Map<string, ModelThinkingInfo>();
for (const provider of Object.values(PROVIDERS)) {
for (const model of provider.models) {
if (counts.get(model.id) === 1) {
map.set(model.id, {
thinking: model.capabilities?.thinking,
});
}
}
}
return map;
})();
/** Global thinking override from environment variable */
function getGlobalThinkingConfig(): ThinkingConfig | undefined {
if (import.meta.env?.VITE_LLM_THINKING_DISABLED === 'true') {
return { mode: 'disabled', enabled: false };
}
return undefined;
}
type ProviderOptions = Record<string, Record<string, unknown>>;
function getAnthropicEffort(
thinking: ThinkingCapability,
config: ThinkingConfig,
): 'low' | 'medium' | 'high' | 'xhigh' | 'max' | undefined {
const effort = pickThinkingEffort(thinking, config);
if (!effort || effort === 'none' || effort === 'minimal') return undefined;
return effort;
}
function getModelProviderId(params: GenerateTextParams | StreamTextParams): string | undefined {
const m = params.model;
if (!m || typeof m !== 'object' || !('provider' in m)) return undefined;
const provider = (m as { provider?: string }).provider;
if (!provider) return undefined;
if (provider in PROVIDERS) return provider;
const prefix = provider.split('.')[0];
return prefix in PROVIDERS ? prefix : undefined;
}
/**
* Map a unified ThinkingConfig to provider-specific providerOptions.
*/
function buildThinkingProviderOptions(
providerId: string | undefined,
modelId: string,
config: ThinkingConfig,
): ProviderOptions | undefined {
const info = providerId
? MODEL_THINKING_MAP.get(getModelMetadataKey(providerId, modelId))
: UNIQUE_MODEL_THINKING_MAP.get(modelId);
if (!info?.thinking) return undefined; // model has no thinking capability
const thinking = info.thinking;
if (thinking.control === 'none') return undefined;
const mode = getThinkingMode(config);
switch (thinking.requestAdapter) {
case 'openai': {
const effort = pickThinkingEffort(thinking, config);
return effort ? { openai: { reasoningEffort: effort } } : undefined;
}
case 'anthropic': {
if (mode === 'disabled') return { anthropic: { thinking: { type: 'disabled' } } };
if (thinking.control === 'toggle-budget' || thinking.control === 'budget-only') {
const budget = pickThinkingBudget(thinking, config);
return budget === undefined
? undefined
: { anthropic: { thinking: { type: 'enabled', budgetTokens: budget } } };
}
const effort = getAnthropicEffort(thinking, config);
if (!effort) return undefined;
if (thinking.anthropicThinking?.type === 'adaptive') {
return {
anthropic: {
thinking: { type: 'adaptive' },
effort,
},
};
}
const manualEffort = effort === 'xhigh' ? 'max' : effort;
const budget = thinking.anthropicThinking?.budgetByEffort?.[manualEffort];
if (!budget) return undefined;
return {
anthropic: {
thinking: { type: 'enabled', budgetTokens: budget },
effort: manualEffort,
},
};
}
case 'google': {
if (thinking.control === 'level') {
const level = pickThinkingLevel(thinking, config);
return level ? { google: { thinkingConfig: { thinkingLevel: level } } } : undefined;
}
const budget = pickThinkingBudget(thinking, config);
if (budget === undefined) return undefined;
return { google: { thinkingConfig: { thinkingBudget: budget } } };
}
default:
// OpenAI-compatible providers are injected in providers.ts fetch wrapper.
return undefined;
}
}
/**
* Inject provider-specific thinking options into LLM call params.
*
* For native providers (OpenAI/Anthropic/Google), this sets providerOptions.
* For OpenAI-compatible providers, providerOptions won't work (stripped by
* zod schema) — those are handled by the custom fetch wrapper via thinkingContext.
*
* Priority: caller's providerOptions > ThinkingConfig
*/
function injectProviderOptions<T extends GenerateTextParams | StreamTextParams>(
params: T,
thinking?: ThinkingConfig,
): T {
if ((params as Record<string, unknown>).providerOptions) return params; // caller explicitly set providerOptions
const modelId = getModelId(params);
const providerId = getModelProviderId(params);
if (thinking) {
const opts = buildThinkingProviderOptions(providerId, modelId, thinking);
if (opts) return { ...params, providerOptions: opts };
}
return params;
}
/**
* Options for LLM call retry on validation failure.
* This is separate from the AI SDK's built-in maxRetries (which handles network/5xx errors).
*/
export interface LLMRetryOptions {
/** Max retry attempts when validate() fails or the response is empty (default: 0 = no retry) */
retries?: number;
/** Custom validation function. Return true to accept the result, false to retry.
* Default: checks that response text is non-empty. */
validate?: (text: string) => boolean;
}
const DEFAULT_VALIDATE = (text: string) => text.trim().length > 0;
/**
* Unified wrapper around `generateText`.
*
* @param params - Same parameters as AI SDK's `generateText`
* @param source - A short label for log grouping (e.g. 'scene-stream', 'pbl-chat')
* @param retryOptions - Optional retry-on-validation-failure settings
* @param thinking - Optional per-call thinking config (overrides global LLM_THINKING_DISABLED)
*/
export async function callLLM<T extends GenerateTextParams>(
params: T,
source: string,
retryOptions?: LLMRetryOptions,
thinking?: ThinkingConfig,
// eslint-disable-next-line @typescript-eslint/no-explicit-any
): Promise<GenerateTextResult<any, any>> {
const maxAttempts = (retryOptions?.retries ?? 0) + 1;
const validate = retryOptions?.validate ?? (maxAttempts > 1 ? DEFAULT_VALIDATE : undefined);
// eslint-disable-next-line @typescript-eslint/no-explicit-any
let lastResult: GenerateTextResult<any, any> | undefined;
let lastError: unknown;
for (let attempt = 1; attempt <= maxAttempts; attempt++) {
try {
// Resolve effective thinking config: per-call > global env > undefined
const effectiveThinking = thinking ?? getGlobalThinkingConfig();
const injectedParams = injectProviderOptions(params, effectiveThinking);
// Wrap in thinkingContext so the custom fetch wrapper in providers.ts
// can read the config and inject vendor-specific body params for
// OpenAI-compatible providers.
const result = await thinkingContext.run(effectiveThinking, () =>
generateText(injectedParams),
);
// Validate result (only when retries are configured)
if (validate && !validate(result.text)) {
log.warn(
`[${source}] Validation failed (attempt ${attempt}/${maxAttempts}), ${attempt < maxAttempts ? 'retrying...' : 'giving up'}`,
);
lastResult = result;
continue;
}
return result;
} catch (error) {
lastError = error;
if (attempt < maxAttempts) {
log.warn(`[${source}] Call failed (attempt ${attempt}/${maxAttempts}), retrying...`, error);
continue;
}
}
}
// All attempts exhausted — return last result or throw last error
if (lastResult) return lastResult;
throw lastError;
}
/**
* Unified wrapper around `streamText`.
*
* Returns the same StreamTextResult.
*
* @param params - Same parameters as AI SDK's `streamText`
* @param source - A short label for log grouping
* @param thinking - Optional per-call thinking config (overrides global LLM_THINKING_DISABLED)
*/
export function streamLLM<T extends StreamTextParams>(
params: T,
source: string,
thinking?: ThinkingConfig,
// eslint-disable-next-line @typescript-eslint/no-explicit-any
): StreamTextResult<any, any> {
// Resolve effective thinking config and wrap in thinkingContext
const effectiveThinking = thinking ?? getGlobalThinkingConfig();
const injectedParams = injectProviderOptions(params, effectiveThinking);
const result = thinkingContext.run(effectiveThinking, () => streamText(injectedParams));
return result;
}