| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| export interface OnnxLLMConfig { |
| |
| modelId?: string; |
| |
| cacheDir?: string; |
| |
| quantized?: boolean; |
| |
| device?: 'cpu' | 'webgpu'; |
| |
| maxLength?: number; |
| } |
| export interface GenerationConfig { |
| |
| maxNewTokens?: number; |
| |
| temperature?: number; |
| |
| topP?: number; |
| |
| topK?: number; |
| |
| repetitionPenalty?: number; |
| |
| stopSequences?: string[]; |
| |
| systemPrompt?: string; |
| |
| onToken?: (token: string) => void; |
| } |
| export interface GenerationResult { |
| |
| text: string; |
| |
| tokensGenerated: number; |
| |
| timeMs: number; |
| |
| tokensPerSecond: number; |
| |
| model: string; |
| |
| cached: boolean; |
| } |
| export declare const AVAILABLE_MODELS: { |
| readonly 'trm-tinystories': { |
| readonly id: "Xenova/TinyStories-33M"; |
| readonly name: "TinyStories 33M (TRM)"; |
| readonly size: "~65MB"; |
| readonly description: "Ultra-tiny model for stories and basic generation"; |
| readonly contextLength: 512; |
| }; |
| readonly 'trm-gpt2-tiny': { |
| readonly id: "Xenova/gpt2"; |
| readonly name: "GPT-2 124M (TRM)"; |
| readonly size: "~250MB"; |
| readonly description: "Classic GPT-2 tiny for general text"; |
| readonly contextLength: 1024; |
| }; |
| readonly 'trm-distilgpt2': { |
| readonly id: "Xenova/distilgpt2"; |
| readonly name: "DistilGPT-2 (TRM)"; |
| readonly size: "~82MB"; |
| readonly description: "Distilled GPT-2, fastest general model"; |
| readonly contextLength: 1024; |
| }; |
| readonly 'smollm-135m': { |
| readonly id: "HuggingFaceTB/SmolLM-135M-Instruct"; |
| readonly name: "SmolLM 135M"; |
| readonly size: "~135MB"; |
| readonly description: "Smallest instruct model, very fast"; |
| readonly contextLength: 2048; |
| }; |
| readonly 'smollm-360m': { |
| readonly id: "HuggingFaceTB/SmolLM-360M-Instruct"; |
| readonly name: "SmolLM 360M"; |
| readonly size: "~360MB"; |
| readonly description: "Small model, fast, better quality"; |
| readonly contextLength: 2048; |
| }; |
| readonly 'smollm2-135m': { |
| readonly id: "HuggingFaceTB/SmolLM2-135M-Instruct"; |
| readonly name: "SmolLM2 135M"; |
| readonly size: "~135MB"; |
| readonly description: "Latest SmolLM v2, improved capabilities"; |
| readonly contextLength: 2048; |
| }; |
| readonly 'smollm2-360m': { |
| readonly id: "HuggingFaceTB/SmolLM2-360M-Instruct"; |
| readonly name: "SmolLM2 360M"; |
| readonly size: "~360MB"; |
| readonly description: "Latest SmolLM v2, better quality"; |
| readonly contextLength: 2048; |
| }; |
| readonly 'qwen2.5-0.5b': { |
| readonly id: "Qwen/Qwen2.5-0.5B-Instruct"; |
| readonly name: "Qwen2.5 0.5B"; |
| readonly size: "~300MB quantized"; |
| readonly description: "Good balance of speed and quality, multilingual"; |
| readonly contextLength: 4096; |
| }; |
| readonly tinyllama: { |
| readonly id: "TinyLlama/TinyLlama-1.1B-Chat-v1.0"; |
| readonly name: "TinyLlama 1.1B"; |
| readonly size: "~600MB quantized"; |
| readonly description: "Best small model quality, slower"; |
| readonly contextLength: 2048; |
| }; |
| readonly 'codegemma-2b': { |
| readonly id: "google/codegemma-2b"; |
| readonly name: "CodeGemma 2B"; |
| readonly size: "~1GB quantized"; |
| readonly description: "Code generation specialist"; |
| readonly contextLength: 8192; |
| }; |
| readonly 'deepseek-coder-1.3b': { |
| readonly id: "deepseek-ai/deepseek-coder-1.3b-instruct"; |
| readonly name: "DeepSeek Coder 1.3B"; |
| readonly size: "~700MB quantized"; |
| readonly description: "Excellent for code tasks"; |
| readonly contextLength: 4096; |
| }; |
| readonly 'phi-2': { |
| readonly id: "microsoft/phi-2"; |
| readonly name: "Phi-2 2.7B"; |
| readonly size: "~1.5GB quantized"; |
| readonly description: "High quality small model"; |
| readonly contextLength: 2048; |
| }; |
| readonly 'phi-3-mini': { |
| readonly id: "microsoft/Phi-3-mini-4k-instruct"; |
| readonly name: "Phi-3 Mini"; |
| readonly size: "~2GB quantized"; |
| readonly description: "Best quality tiny model"; |
| readonly contextLength: 4096; |
| }; |
| }; |
| export type ModelKey = keyof typeof AVAILABLE_MODELS; |
| |
| |
| |
| export declare function isTransformersAvailable(): Promise<boolean>; |
| |
| |
| |
| export declare function initOnnxLLM(config?: OnnxLLMConfig): Promise<boolean>; |
| |
| |
| |
| export declare function generate(prompt: string, config?: GenerationConfig): Promise<GenerationResult>; |
| |
| |
| |
| export declare function generateStream(prompt: string, config?: GenerationConfig): Promise<AsyncGenerator<string, GenerationResult, undefined>>; |
| |
| |
| |
| export declare function chat(messages: Array<{ |
| role: 'system' | 'user' | 'assistant'; |
| content: string; |
| }>, config?: GenerationConfig): Promise<GenerationResult>; |
| |
| |
| |
| export declare function getModelInfo(): { |
| model: string | null; |
| ready: boolean; |
| availableModels: typeof AVAILABLE_MODELS; |
| }; |
| |
| |
| |
| export declare function unload(): Promise<void>; |
| export declare class OnnxLLM { |
| private config; |
| private initialized; |
| constructor(config?: OnnxLLMConfig); |
| init(): Promise<boolean>; |
| generate(prompt: string, config?: GenerationConfig): Promise<GenerationResult>; |
| chat(messages: Array<{ |
| role: 'system' | 'user' | 'assistant'; |
| content: string; |
| }>, config?: GenerationConfig): Promise<GenerationResult>; |
| unload(): Promise<void>; |
| get ready(): boolean; |
| get model(): string | null; |
| } |
| export default OnnxLLM; |
| |