Spaces:
Running
Running
Copy nexus_os_v2/model_registry.py from dataset for module imports
Browse files
nexus_os_v2/model_registry.py
ADDED
|
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""NEXUS OS v2.1 — Model Registry with Real Ollama + Cloud Models."""
|
| 2 |
+
from dataclasses import dataclass, field
|
| 3 |
+
from typing import List, Optional, Dict
|
| 4 |
+
from enum import Enum
|
| 5 |
+
|
| 6 |
+
class Tier(Enum):
|
| 7 |
+
LOCAL_8GB="local_8gb"; LOCAL_16GB="local_16gb"; LOCAL_24GB="local_24gb"; LOCAL_48GB="local_48gb"; CLOUD_API="cloud_api"
|
| 8 |
+
|
| 9 |
+
class Capability(Enum):
|
| 10 |
+
REASONING="reasoning"; CODING="coding"; VISION="vision"; FUNCTION_CALLING="function_calling"; TOOL_USE="tool_use"
|
| 11 |
+
INSTRUCT="instruct"; UNCHAINED="unchained"; ABLITERATED="abliterated"; FAST="fast"; LONG_CONTEXT="long_context"
|
| 12 |
+
MULTILINGUAL="multilingual"; SAFETY="safety"; AUDIO="audio"; MULTIMODAL="multimodal"
|
| 13 |
+
|
| 14 |
+
@dataclass
|
| 15 |
+
class ModelProfile:
|
| 16 |
+
name: str; ollama_tag: Optional[str]=None; cloud_tag: Optional[str]=None
|
| 17 |
+
tier: Tier=Tier.LOCAL_8GB; size_gb: float=0.0; params_b: float=0.0
|
| 18 |
+
quantization: Optional[str]=None; capabilities: List[Capability]=field(default_factory=list)
|
| 19 |
+
default_temp: float=0.8; default_top_p: float=0.95; max_context: int=8192
|
| 20 |
+
description: str=""; family: str=""; T_c: float=1.0; mu_base: float=0.5; kappa: float=0.1
|
| 21 |
+
|
| 22 |
+
REGISTRY: Dict[str, ModelProfile] = {
|
| 23 |
+
# LOCAL 8GB
|
| 24 |
+
"functiongemma": ModelProfile(name="FunctionGemma",ollama_tag="functiongemma:latest",tier=Tier.LOCAL_8GB,size_gb=0.3,params_b=0.27,capabilities=[Capability.FUNCTION_CALLING,Capability.FAST,Capability.INSTRUCT],default_temp=0.3,max_context=8192,family="gemma"),
|
| 25 |
+
"huihui-granite-4.1-3b": ModelProfile(name="Huihui Granite 4.1 3B",ollama_tag="hf.co/mradermacher/Huihui-granite-4.1-3b-abliterated-i1-GGUF:Q6_K",tier=Tier.LOCAL_8GB,size_gb=2.8,params_b=3.0,quantization="Q6_K",capabilities=[Capability.REASONING,Capability.CODING,Capability.UNCHAINED,Capability.ABLITERATED,Capability.INSTRUCT],default_temp=0.7,max_context=128000,family="granite"),
|
| 26 |
+
"granite-4.1-3b-sft": ModelProfile(name="Granite 4.1 3B SFT",ollama_tag="hf.co/mradermacher/Granite-4.1-3B-SFT-Claude-Opus-Reasoning-Unsloth-i1-GGUF:Q6_K",tier=Tier.LOCAL_8GB,size_gb=2.8,params_b=3.0,quantization="Q6_K",capabilities=[Capability.REASONING,Capability.CODING,Capability.INSTRUCT,Capability.LONG_CONTEXT],default_temp=0.7,max_context=128000,family="granite"),
|
| 27 |
+
"trinity-nano": ModelProfile(name="Trinity Nano",ollama_tag="hf.co/bartowski/arcee-ai_Trinity-Nano-Preview-GGUF:Q4_K_M",tier=Tier.LOCAL_8GB,size_gb=3.8,params_b=4.0,quantization="Q4_K_M",capabilities=[Capability.REASONING,Capability.CODING,Capability.INSTRUCT,Capability.FAST],default_temp=0.7,max_context=32768,family="trinity"),
|
| 28 |
+
"ibm-grok4-coder-1b": ModelProfile(name="IBM Grok4 Coder 1B",ollama_tag="hf.co/WithinUsAI/IBM-Grok4-Ultra.Fast.Coder-1B-GGUF:Q5_K_M",tier=Tier.LOCAL_8GB,size_gb=1.2,params_b=1.0,quantization="Q5_K_M",capabilities=[Capability.CODING,Capability.FAST,Capability.INSTRUCT],default_temp=0.3,max_context=8192,family="grok"),
|
| 29 |
+
"minicpm-v-4.6": ModelProfile(name="MiniCPM-V 4.6",ollama_tag="hf.co/treadon/MiniCPM-V-4.6-Abliterated-AND-Disinhibited:Q4_K_M",tier=Tier.LOCAL_8GB,size_gb=1.6,params_b=4.0,quantization="Q4_K_M",capabilities=[Capability.VISION,Capability.REASONING,Capability.UNCHAINED,Capability.ABLITERATED],default_temp=0.7,max_context=32768,family="minicpm"),
|
| 30 |
+
"qwen3.5-0.8b-heretic": ModelProfile(name="Qwen 3.5 0.8B Heretic",ollama_tag="hf.co/mradermacher/Qwen3.5-0.8B-heretic-ara-v2-GGUF:Q8_0",tier=Tier.LOCAL_8GB,size_gb=0.8,params_b=0.8,quantization="Q8_0",capabilities=[Capability.CODING,Capability.FAST,Capability.INSTRUCT,Capability.UNCHAINED],default_temp=0.8,max_context=32768,family="qwen"),
|
| 31 |
+
"bonsai-1.7b": ModelProfile(name="Ternary Bonsai 1.7B",ollama_tag="hf.co/prism-ml/Ternary-Bonsai-1.7B-gguf:F16",tier=Tier.LOCAL_8GB,size_gb=3.4,params_b=1.7,quantization="F16",capabilities=[Capability.REASONING,Capability.INSTRUCT,Capability.FAST],default_temp=0.7,max_context=8192,family="bonsai"),
|
| 32 |
+
"darwin-4b": ModelProfile(name="Darwin 4B",ollama_tag="hf.co/mradermacher/Darwin-4B-Genesis-GGUF:Q4_K_M",tier=Tier.LOCAL_8GB,size_gb=5.3,params_b=4.0,quantization="Q4_K_M",capabilities=[Capability.REASONING,Capability.CODING,Capability.INSTRUCT],default_temp=0.7,max_context=32768,family="darwin"),
|
| 33 |
+
"dr-venus-4b-rl": ModelProfile(name="DR-Venus 4B RL",ollama_tag="hf.co/inclusionAI/DR-Venus-4B-RL-GGUF:Q6_K",tier=Tier.LOCAL_8GB,size_gb=3.6,params_b=4.0,quantization="Q6_K",capabilities=[Capability.REASONING,Capability.CODING,Capability.INSTRUCT,Capability.SAFETY],default_temp=0.7,max_context=32768,family="venus"),
|
| 34 |
+
"granite-4.1-3b-abliterated": ModelProfile(name="Granite 4.1 3B Abliterated",ollama_tag="hf.co/mradermacher/granite-4.1-3b-Abliterated-AND-Disinhibited-GGUF:Q8_0",tier=Tier.LOCAL_8GB,size_gb=3.6,params_b=3.0,quantization="Q8_0",capabilities=[Capability.REASONING,Capability.CODING,Capability.UNCHAINED,Capability.ABLITERATED,Capability.LONG_CONTEXT],default_temp=0.7,max_context=128000,family="granite"),
|
| 35 |
+
"gemma4-most-seen-2b": ModelProfile(name="Gemma4 Most Seen 2B",ollama_tag="hf.co/WithinUsAI/Gemma4-Most.Seen.Unseen.Reasoner-2B:Q4_K_M",tier=Tier.LOCAL_8GB,size_gb=3.4,params_b=2.0,quantization="Q4_K_M",capabilities=[Capability.REASONING,Capability.FAST,Capability.INSTRUCT],default_temp=0.7,max_context=32768,family="gemma"),
|
| 36 |
+
"grape-2-mini": ModelProfile(name="GRaPE 2 Mini",ollama_tag="hf.co/mradermacher/GRaPE-2-Mini-GGUF:Q8_0",tier=Tier.LOCAL_8GB,size_gb=4.8,params_b=4.0,quantization="Q8_0",capabilities=[Capability.REASONING,Capability.CODING,Capability.INSTRUCT],default_temp=0.7,max_context=32768,family="grape"),
|
| 37 |
+
"bonsai-8b-requantized": ModelProfile(name="Bonsai 8B Requantized",ollama_tag="hf.co/lilyanatia/Bonsai-8B-requantized:Bonsai-8B-Q2_K.gguf",tier=Tier.LOCAL_8GB,size_gb=3.0,params_b=8.0,quantization="Q2_K",capabilities=[Capability.REASONING,Capability.CODING,Capability.FAST,Capability.INSTRUCT],default_temp=0.7,max_context=8192,family="bonsai"),
|
| 38 |
+
"frob-locooperator": ModelProfile(name="Frob LocoOperator",ollama_tag="frob/locooperator:latest",tier=Tier.LOCAL_8GB,size_gb=2.5,params_b=3.0,capabilities=[Capability.TOOL_USE,Capability.FUNCTION_CALLING,Capability.FAST,Capability.INSTRUCT],default_temp=0.3,max_context=8192,family="loco"),
|
| 39 |
+
"nemotron-3-nano-4b": ModelProfile(name="Nemotron 3 Nano 4B",ollama_tag="nemotron-3-nano:4b",tier=Tier.LOCAL_8GB,size_gb=2.8,params_b=4.0,quantization="Q4_K_M",capabilities=[Capability.REASONING,Capability.CODING,Capability.SAFETY,Capability.INSTRUCT],default_temp=0.7,max_context=32768,family="nemotron"),
|
| 40 |
+
# NEW: OpenSonnet-Lite-MAX (4B dense, 262K context, Qwen3 base, Apache-2.0)
|
| 41 |
+
"opensonnet-lite-max": ModelProfile(name="OpenSonnet-Lite-MAX",ollama_tag="hf.co/mradermacher/OpenSonnet-Lite-MAX-GGUF:Q4_K_M",tier=Tier.LOCAL_8GB,size_gb=2.5,params_b=4.0,quantization="Q4_K_M",capabilities=[Capability.REASONING,Capability.CODING,Capability.INSTRUCT,Capability.LONG_CONTEXT,Capability.FAST],default_temp=0.6,max_context=262144,family="qwen",T_c=0.9,mu_base=0.55,kappa=0.09,description="Qwen3-4B-Thinking base, multi-stage SFT, 85.22 GSM8K. Apache-2.0."),
|
| 42 |
+
# LOCAL 16GB
|
| 43 |
+
"deepseek-r1-8b": ModelProfile(name="DeepSeek-R1 8B",ollama_tag="deepseek-r1:8b",tier=Tier.LOCAL_16GB,size_gb=5.2,params_b=8.0,quantization="Q4_K_M",capabilities=[Capability.REASONING,Capability.CODING,Capability.INSTRUCT,Capability.LONG_CONTEXT],default_temp=0.6,max_context=128000,family="deepseek"),
|
| 44 |
+
"qwen2.5-coder-7b": ModelProfile(name="Qwen 2.5 Coder 7B",ollama_tag="qwen2.5-coder:7b",tier=Tier.LOCAL_16GB,size_gb=4.7,params_b=7.0,quantization="Q4_K_M",capabilities=[Capability.CODING,Capability.INSTRUCT,Capability.FAST],default_temp=0.3,max_context=32768,family="qwen"),
|
| 45 |
+
"l3.1-dark-reasoning-8b": ModelProfile(name="L3.1 Dark Reasoning 8B",ollama_tag="hf.co/mradermacher/L3.1-Dark-Reasoning-LewdPlay-evo-Hermes-R1-Uncensored-8B-GGUF:Q5_K_M",tier=Tier.LOCAL_16GB,size_gb=5.7,params_b=8.0,quantization="Q5_K_M",capabilities=[Capability.REASONING,Capability.CODING,Capability.UNCHAINED,Capability.ABLITERATED,Capability.INSTRUCT],default_temp=0.7,max_context=32768,family="llama"),
|
| 46 |
+
"omega-evolution-9b": ModelProfile(name="Omega Evolution 9B",ollama_tag="hf.co/ReadyArt/Omega-Evolution-9B-v2.0-GGUF:Q4_K_M",tier=Tier.LOCAL_16GB,size_gb=6.6,params_b=9.0,quantization="Q4_K_M",capabilities=[Capability.REASONING,Capability.CODING,Capability.INSTRUCT,Capability.VISION],default_temp=0.7,max_context=32768,family="omega"),
|
| 47 |
+
"darwin-9b-opus": ModelProfile(name="Darwin 9B Opus",ollama_tag="hf.co/mradermacher/Darwin-9B-Opus-GGUF:Q4_K_M",tier=Tier.LOCAL_16GB,size_gb=6.3,params_b=9.0,quantization="Q4_K_M",capabilities=[Capability.REASONING,Capability.CODING,Capability.INSTRUCT,Capability.LONG_CONTEXT],default_temp=0.7,max_context=65536,family="darwin"),
|
| 48 |
+
"qwopus-3.5-9b": ModelProfile(name="Qwopus 3.5 9B",ollama_tag="hf.co/loilkon/Qwopus3.5-9B-v3.5-Uncensored:Q4_K_M",tier=Tier.LOCAL_16GB,size_gb=5.6,params_b=9.0,quantization="Q4_K_M",capabilities=[Capability.REASONING,Capability.CODING,Capability.UNCHAINED,Capability.ABLITERATED,Capability.INSTRUCT],default_temp=0.7,max_context=32768,family="qwopus"),
|
| 49 |
+
"carnice-9b": ModelProfile(name="Carnice 9B",ollama_tag="hf.co/kai-os/Carnice-9b-GGUF:Q4_K_M",tier=Tier.LOCAL_16GB,size_gb=5.6,params_b=9.0,quantization="Q4_K_M",capabilities=[Capability.REASONING,Capability.CODING,Capability.INSTRUCT,Capability.VISION],default_temp=0.7,max_context=32768,family="carnice"),
|
| 50 |
+
"open-search-vl-8b": ModelProfile(name="OpenSearch VL 8B",ollama_tag="hf.co/mradermacher/OpenSearch-VL-8B-GGUF:Q5_K_M",tier=Tier.LOCAL_16GB,size_gb=6.6,params_b=8.0,quantization="Q5_K_M",capabilities=[Capability.VISION,Capability.REASONING,Capability.INSTRUCT,Capability.LONG_CONTEXT],default_temp=0.7,max_context=65536,family="opensearch"),
|
| 51 |
+
"granite-4.1-8b-abliterated": ModelProfile(name="Granite 4.1 8B Abliterated",ollama_tag="hf.co/mradermacher/granite-4.1-8b-Abliterated-AND-Disinhibited-GGUF:Q4_K_M",tier=Tier.LOCAL_16GB,size_gb=5.1,params_b=8.0,quantization="Q4_K_M",capabilities=[Capability.REASONING,Capability.CODING,Capability.UNCHAINED,Capability.ABLITERATED,Capability.LONG_CONTEXT],default_temp=0.7,max_context=128000,family="granite"),
|
| 52 |
+
"huihui-granite-4.1-8b": ModelProfile(name="Huihui Granite 4.1 8B",ollama_tag="hf.co/mradermacher/Huihui-granite-4.1-8b-abliterated-GGUF:Q4_K_M",tier=Tier.LOCAL_16GB,size_gb=5.3,params_b=8.0,quantization="Q4_K_M",capabilities=[Capability.REASONING,Capability.CODING,Capability.UNCHAINED,Capability.ABLITERATED,Capability.LONG_CONTEXT],default_temp=0.7,max_context=128000,family="granite"),
|
| 53 |
+
"jaahas-qwen3.5-9b": ModelProfile(name="Jaahas Qwen 3.5 9B",ollama_tag="jaahas/qwen3.5-uncensored:9b",tier=Tier.LOCAL_16GB,size_gb=7.4,params_b=9.0,quantization="Q4_K_M",capabilities=[Capability.REASONING,Capability.CODING,Capability.UNCHAINED,Capability.ABLITERATED,Capability.INSTRUCT,Capability.MULTILINGUAL],default_temp=0.7,max_context=32768,family="qwen"),
|
| 54 |
+
# LOCAL 24GB
|
| 55 |
+
"lfm2-12b-deckard": ModelProfile(name="LFM2 12B Deckard",ollama_tag="hf.co/mradermacher/LFM2-12B-A1B-SpeedDemon-The-Deckard-II-HERETIC-Uncensored-i1-GGUF:Q3_K_L",tier=Tier.LOCAL_24GB,size_gb=5.8,params_b=12.0,quantization="Q3_K_L",capabilities=[Capability.REASONING,Capability.CODING,Capability.UNCHAINED,Capability.ABLITERATED,Capability.LONG_CONTEXT,Capability.FAST],default_temp=0.7,max_context=128000,family="lfm"),
|
| 56 |
+
"gemma4-e2b-opus": ModelProfile(name="Gemma4 E2B Opus",ollama_tag="hf.co/mradermacher/Gemma4-E2B-SFT-Claude-Opus-Reasoning-Unsloth-GGUF:Q8_0",tier=Tier.LOCAL_24GB,size_gb=5.5,params_b=4.0,quantization="Q8_0",capabilities=[Capability.REASONING,Capability.CODING,Capability.INSTRUCT,Capability.LONG_CONTEXT],default_temp=0.7,max_context=128000,family="gemma"),
|
| 57 |
+
"gemma4-uncensored": ModelProfile(name="Gemma 4 Uncensored",ollama_tag="hf.co/HauhauCS/Gemma-4-E2B-Uncensored-HauhauCS-Aggressive:Q6_K_P",tier=Tier.LOCAL_24GB,size_gb=4.9,params_b=4.0,quantization="Q6_K_P",capabilities=[Capability.REASONING,Capability.CODING,Capability.UNCHAINED,Capability.ABLITERATED,Capability.VISION],default_temp=0.7,max_context=32768,family="gemma"),
|
| 58 |
+
"gemma4-obliterated": ModelProfile(name="Gemma 4 OBLITERATED",ollama_tag="hf.co/OBLITERATUS/gemma-4-E4B-it-OBLITERATED:Q4_K_M",tier=Tier.LOCAL_24GB,size_gb=6.3,params_b=4.0,quantization="Q4_K_M",capabilities=[Capability.REASONING,Capability.CODING,Capability.UNCHAINED,Capability.ABLITERATED,Capability.VISION],default_temp=0.7,max_context=32768,family="gemma"),
|
| 59 |
+
"qwen3.6-27b-dflash": ModelProfile(name="Qwen 3.6 27B DFlash",ollama_tag="hf.co/Ardenzard/Qwen3.6-27B-DFlash-GGUF:Q4_K_M",tier=Tier.LOCAL_24GB,size_gb=1.0,params_b=27.0,quantization="Q4_K_M",capabilities=[Capability.REASONING,Capability.CODING,Capability.LONG_CONTEXT,Capability.FAST],default_temp=0.7,max_context=128000,family="qwen"),
|
| 60 |
+
# LOCAL 48GB
|
| 61 |
+
"gemma4-31b-cloud": ModelProfile(name="Gemma4 31B Cloud",ollama_tag="gemma4:31b-cloud",tier=Tier.LOCAL_48GB,size_gb=18.0,params_b=31.0,quantization="Q4_K_M",capabilities=[Capability.REASONING,Capability.CODING,Capability.VISION,Capability.LONG_CONTEXT,Capability.MULTILINGUAL],default_temp=0.7,max_context=128000,family="gemma"),
|
| 62 |
+
# NEW: Nemotron-3-Nano-Omni 30B (MoE, 3B active, 256K context, multimodal)
|
| 63 |
+
"nemotron-3-nano-omni-30b": ModelProfile(name="Nemotron-3 Nano-Omni 30B",ollama_tag="hf.co/batiai/Nemotron-3-Nano-Omni-30B-Text-Only-GGUF:Q4_K_M",tier=Tier.LOCAL_48GB,size_gb=18.0,params_b=30.0,quantization="Q4_K_M",capabilities=[Capability.REASONING,Capability.CODING,Capability.VISION,Capability.AUDIO,Capability.MULTIMODAL,Capability.LONG_CONTEXT,Capability.SAFETY,Capability.TOOL_USE],default_temp=0.6,max_context=256000,family="nemotron",T_c=0.85,mu_base=0.6,kappa=0.08,description="NVIDIA MoE hybrid (Mamba2-Transformer), 128 experts, 6 active/token. 256K context. Multimodal in/out."),
|
| 64 |
+
# CLOUD API
|
| 65 |
+
"deepseek-v4-pro": ModelProfile(name="DeepSeek V4 Pro",cloud_tag="deepseek-v4-pro:cloud",tier=Tier.CLOUD_API,size_gb=0.0,params_b=671.0,capabilities=[Capability.REASONING,Capability.CODING,Capability.LONG_CONTEXT,Capability.MULTILINGUAL,Capability.TOOL_USE],default_temp=0.6,max_context=64000,family="deepseek"),
|
| 66 |
+
"deepseek-v4-flash": ModelProfile(name="DeepSeek V4 Flash",cloud_tag="deepseek-v4-flash:cloud",tier=Tier.CLOUD_API,size_gb=0.0,params_b=671.0,capabilities=[Capability.REASONING,Capability.CODING,Capability.FAST,Capability.MULTILINGUAL,Capability.TOOL_USE],default_temp=0.8,max_context=64000,family="deepseek"),
|
| 67 |
+
"qwen3-coder-next": ModelProfile(name="Qwen 3 Coder Next",cloud_tag="qwen3-coder-next:cloud",tier=Tier.CLOUD_API,size_gb=0.0,params_b=32.0,capabilities=[Capability.CODING,Capability.REASONING,Capability.FAST,Capability.LONG_CONTEXT,Capability.TOOL_USE],default_temp=0.3,max_context=128000,family="qwen"),
|
| 68 |
+
"kimi-k2.6": ModelProfile(name="Kimi K2.6",cloud_tag="kimi-k2.6:cloud",tier=Tier.CLOUD_API,size_gb=0.0,params_b=32.0,capabilities=[Capability.REASONING,Capability.CODING,Capability.LONG_CONTEXT,Capability.MULTILINGUAL,Capability.VISION],default_temp=0.7,max_context=200000,family="kimi"),
|
| 69 |
+
"glm-5.1": ModelProfile(name="GLM 5.1",cloud_tag="glm-5.1:cloud",tier=Tier.CLOUD_API,size_gb=0.0,params_b=32.0,capabilities=[Capability.REASONING,Capability.CODING,Capability.MULTILINGUAL,Capability.TOOL_USE,Capability.VISION],default_temp=0.7,max_context=128000,family="glm"),
|
| 70 |
+
"minimax-m2.7": ModelProfile(name="MiniMax M2.7",cloud_tag="minimax-m2.7:cloud",tier=Tier.CLOUD_API,size_gb=0.0,params_b=32.0,capabilities=[Capability.REASONING,Capability.CODING,Capability.MULTILINGUAL,Capability.VISION],default_temp=0.7,max_context=128000,family="minimax"),
|
| 71 |
+
}
|
| 72 |
+
|
| 73 |
+
SULPHUR = ModelProfile(name="Sulphur Prompt Enhancer",ollama_tag="hugging-science/sulphur_prompt_enhancer-Q4_K_M-imatrix.gguf",tier=Tier.LOCAL_8GB,size_gb=3.0,params_b=3.0,quantization="Q4_K_M",capabilities=[Capability.INSTRUCT,Capability.FAST],default_temp=0.5,max_context=8192,family="sulphur")
|
| 74 |
+
|
| 75 |
+
def get(name:str)->Optional[ModelProfile]: return REGISTRY.get(name)
|
| 76 |
+
def by_tier(t:Tier)->List[ModelProfile]: return [m for m in REGISTRY.values() if m.tier==t]
|
| 77 |
+
def by_cap(c:Capability)->List[ModelProfile]: return [m for m in REGISTRY.values() if c in m.capabilities]
|
| 78 |
+
def all_names()->List[str]: return list(REGISTRY.keys())
|
| 79 |
+
def vram(names:List[str])->float: return sum(get(n).size_gb for n in names if get(n) and get(n).tier!=Tier.CLOUD_API)
|