specimba commited on
Commit
d17ef35
·
verified ·
1 Parent(s): 9dbdf80

Copy nexus_os_v2/model_registry.py from dataset for module imports

Browse files
Files changed (1) hide show
  1. nexus_os_v2/model_registry.py +79 -0
nexus_os_v2/model_registry.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """NEXUS OS v2.1 — Model Registry with Real Ollama + Cloud Models."""
2
+ from dataclasses import dataclass, field
3
+ from typing import List, Optional, Dict
4
+ from enum import Enum
5
+
6
+ class Tier(Enum):
7
+ LOCAL_8GB="local_8gb"; LOCAL_16GB="local_16gb"; LOCAL_24GB="local_24gb"; LOCAL_48GB="local_48gb"; CLOUD_API="cloud_api"
8
+
9
+ class Capability(Enum):
10
+ REASONING="reasoning"; CODING="coding"; VISION="vision"; FUNCTION_CALLING="function_calling"; TOOL_USE="tool_use"
11
+ INSTRUCT="instruct"; UNCHAINED="unchained"; ABLITERATED="abliterated"; FAST="fast"; LONG_CONTEXT="long_context"
12
+ MULTILINGUAL="multilingual"; SAFETY="safety"; AUDIO="audio"; MULTIMODAL="multimodal"
13
+
14
+ @dataclass
15
+ class ModelProfile:
16
+ name: str; ollama_tag: Optional[str]=None; cloud_tag: Optional[str]=None
17
+ tier: Tier=Tier.LOCAL_8GB; size_gb: float=0.0; params_b: float=0.0
18
+ quantization: Optional[str]=None; capabilities: List[Capability]=field(default_factory=list)
19
+ default_temp: float=0.8; default_top_p: float=0.95; max_context: int=8192
20
+ description: str=""; family: str=""; T_c: float=1.0; mu_base: float=0.5; kappa: float=0.1
21
+
22
+ REGISTRY: Dict[str, ModelProfile] = {
23
+ # LOCAL 8GB
24
+ "functiongemma": ModelProfile(name="FunctionGemma",ollama_tag="functiongemma:latest",tier=Tier.LOCAL_8GB,size_gb=0.3,params_b=0.27,capabilities=[Capability.FUNCTION_CALLING,Capability.FAST,Capability.INSTRUCT],default_temp=0.3,max_context=8192,family="gemma"),
25
+ "huihui-granite-4.1-3b": ModelProfile(name="Huihui Granite 4.1 3B",ollama_tag="hf.co/mradermacher/Huihui-granite-4.1-3b-abliterated-i1-GGUF:Q6_K",tier=Tier.LOCAL_8GB,size_gb=2.8,params_b=3.0,quantization="Q6_K",capabilities=[Capability.REASONING,Capability.CODING,Capability.UNCHAINED,Capability.ABLITERATED,Capability.INSTRUCT],default_temp=0.7,max_context=128000,family="granite"),
26
+ "granite-4.1-3b-sft": ModelProfile(name="Granite 4.1 3B SFT",ollama_tag="hf.co/mradermacher/Granite-4.1-3B-SFT-Claude-Opus-Reasoning-Unsloth-i1-GGUF:Q6_K",tier=Tier.LOCAL_8GB,size_gb=2.8,params_b=3.0,quantization="Q6_K",capabilities=[Capability.REASONING,Capability.CODING,Capability.INSTRUCT,Capability.LONG_CONTEXT],default_temp=0.7,max_context=128000,family="granite"),
27
+ "trinity-nano": ModelProfile(name="Trinity Nano",ollama_tag="hf.co/bartowski/arcee-ai_Trinity-Nano-Preview-GGUF:Q4_K_M",tier=Tier.LOCAL_8GB,size_gb=3.8,params_b=4.0,quantization="Q4_K_M",capabilities=[Capability.REASONING,Capability.CODING,Capability.INSTRUCT,Capability.FAST],default_temp=0.7,max_context=32768,family="trinity"),
28
+ "ibm-grok4-coder-1b": ModelProfile(name="IBM Grok4 Coder 1B",ollama_tag="hf.co/WithinUsAI/IBM-Grok4-Ultra.Fast.Coder-1B-GGUF:Q5_K_M",tier=Tier.LOCAL_8GB,size_gb=1.2,params_b=1.0,quantization="Q5_K_M",capabilities=[Capability.CODING,Capability.FAST,Capability.INSTRUCT],default_temp=0.3,max_context=8192,family="grok"),
29
+ "minicpm-v-4.6": ModelProfile(name="MiniCPM-V 4.6",ollama_tag="hf.co/treadon/MiniCPM-V-4.6-Abliterated-AND-Disinhibited:Q4_K_M",tier=Tier.LOCAL_8GB,size_gb=1.6,params_b=4.0,quantization="Q4_K_M",capabilities=[Capability.VISION,Capability.REASONING,Capability.UNCHAINED,Capability.ABLITERATED],default_temp=0.7,max_context=32768,family="minicpm"),
30
+ "qwen3.5-0.8b-heretic": ModelProfile(name="Qwen 3.5 0.8B Heretic",ollama_tag="hf.co/mradermacher/Qwen3.5-0.8B-heretic-ara-v2-GGUF:Q8_0",tier=Tier.LOCAL_8GB,size_gb=0.8,params_b=0.8,quantization="Q8_0",capabilities=[Capability.CODING,Capability.FAST,Capability.INSTRUCT,Capability.UNCHAINED],default_temp=0.8,max_context=32768,family="qwen"),
31
+ "bonsai-1.7b": ModelProfile(name="Ternary Bonsai 1.7B",ollama_tag="hf.co/prism-ml/Ternary-Bonsai-1.7B-gguf:F16",tier=Tier.LOCAL_8GB,size_gb=3.4,params_b=1.7,quantization="F16",capabilities=[Capability.REASONING,Capability.INSTRUCT,Capability.FAST],default_temp=0.7,max_context=8192,family="bonsai"),
32
+ "darwin-4b": ModelProfile(name="Darwin 4B",ollama_tag="hf.co/mradermacher/Darwin-4B-Genesis-GGUF:Q4_K_M",tier=Tier.LOCAL_8GB,size_gb=5.3,params_b=4.0,quantization="Q4_K_M",capabilities=[Capability.REASONING,Capability.CODING,Capability.INSTRUCT],default_temp=0.7,max_context=32768,family="darwin"),
33
+ "dr-venus-4b-rl": ModelProfile(name="DR-Venus 4B RL",ollama_tag="hf.co/inclusionAI/DR-Venus-4B-RL-GGUF:Q6_K",tier=Tier.LOCAL_8GB,size_gb=3.6,params_b=4.0,quantization="Q6_K",capabilities=[Capability.REASONING,Capability.CODING,Capability.INSTRUCT,Capability.SAFETY],default_temp=0.7,max_context=32768,family="venus"),
34
+ "granite-4.1-3b-abliterated": ModelProfile(name="Granite 4.1 3B Abliterated",ollama_tag="hf.co/mradermacher/granite-4.1-3b-Abliterated-AND-Disinhibited-GGUF:Q8_0",tier=Tier.LOCAL_8GB,size_gb=3.6,params_b=3.0,quantization="Q8_0",capabilities=[Capability.REASONING,Capability.CODING,Capability.UNCHAINED,Capability.ABLITERATED,Capability.LONG_CONTEXT],default_temp=0.7,max_context=128000,family="granite"),
35
+ "gemma4-most-seen-2b": ModelProfile(name="Gemma4 Most Seen 2B",ollama_tag="hf.co/WithinUsAI/Gemma4-Most.Seen.Unseen.Reasoner-2B:Q4_K_M",tier=Tier.LOCAL_8GB,size_gb=3.4,params_b=2.0,quantization="Q4_K_M",capabilities=[Capability.REASONING,Capability.FAST,Capability.INSTRUCT],default_temp=0.7,max_context=32768,family="gemma"),
36
+ "grape-2-mini": ModelProfile(name="GRaPE 2 Mini",ollama_tag="hf.co/mradermacher/GRaPE-2-Mini-GGUF:Q8_0",tier=Tier.LOCAL_8GB,size_gb=4.8,params_b=4.0,quantization="Q8_0",capabilities=[Capability.REASONING,Capability.CODING,Capability.INSTRUCT],default_temp=0.7,max_context=32768,family="grape"),
37
+ "bonsai-8b-requantized": ModelProfile(name="Bonsai 8B Requantized",ollama_tag="hf.co/lilyanatia/Bonsai-8B-requantized:Bonsai-8B-Q2_K.gguf",tier=Tier.LOCAL_8GB,size_gb=3.0,params_b=8.0,quantization="Q2_K",capabilities=[Capability.REASONING,Capability.CODING,Capability.FAST,Capability.INSTRUCT],default_temp=0.7,max_context=8192,family="bonsai"),
38
+ "frob-locooperator": ModelProfile(name="Frob LocoOperator",ollama_tag="frob/locooperator:latest",tier=Tier.LOCAL_8GB,size_gb=2.5,params_b=3.0,capabilities=[Capability.TOOL_USE,Capability.FUNCTION_CALLING,Capability.FAST,Capability.INSTRUCT],default_temp=0.3,max_context=8192,family="loco"),
39
+ "nemotron-3-nano-4b": ModelProfile(name="Nemotron 3 Nano 4B",ollama_tag="nemotron-3-nano:4b",tier=Tier.LOCAL_8GB,size_gb=2.8,params_b=4.0,quantization="Q4_K_M",capabilities=[Capability.REASONING,Capability.CODING,Capability.SAFETY,Capability.INSTRUCT],default_temp=0.7,max_context=32768,family="nemotron"),
40
+ # NEW: OpenSonnet-Lite-MAX (4B dense, 262K context, Qwen3 base, Apache-2.0)
41
+ "opensonnet-lite-max": ModelProfile(name="OpenSonnet-Lite-MAX",ollama_tag="hf.co/mradermacher/OpenSonnet-Lite-MAX-GGUF:Q4_K_M",tier=Tier.LOCAL_8GB,size_gb=2.5,params_b=4.0,quantization="Q4_K_M",capabilities=[Capability.REASONING,Capability.CODING,Capability.INSTRUCT,Capability.LONG_CONTEXT,Capability.FAST],default_temp=0.6,max_context=262144,family="qwen",T_c=0.9,mu_base=0.55,kappa=0.09,description="Qwen3-4B-Thinking base, multi-stage SFT, 85.22 GSM8K. Apache-2.0."),
42
+ # LOCAL 16GB
43
+ "deepseek-r1-8b": ModelProfile(name="DeepSeek-R1 8B",ollama_tag="deepseek-r1:8b",tier=Tier.LOCAL_16GB,size_gb=5.2,params_b=8.0,quantization="Q4_K_M",capabilities=[Capability.REASONING,Capability.CODING,Capability.INSTRUCT,Capability.LONG_CONTEXT],default_temp=0.6,max_context=128000,family="deepseek"),
44
+ "qwen2.5-coder-7b": ModelProfile(name="Qwen 2.5 Coder 7B",ollama_tag="qwen2.5-coder:7b",tier=Tier.LOCAL_16GB,size_gb=4.7,params_b=7.0,quantization="Q4_K_M",capabilities=[Capability.CODING,Capability.INSTRUCT,Capability.FAST],default_temp=0.3,max_context=32768,family="qwen"),
45
+ "l3.1-dark-reasoning-8b": ModelProfile(name="L3.1 Dark Reasoning 8B",ollama_tag="hf.co/mradermacher/L3.1-Dark-Reasoning-LewdPlay-evo-Hermes-R1-Uncensored-8B-GGUF:Q5_K_M",tier=Tier.LOCAL_16GB,size_gb=5.7,params_b=8.0,quantization="Q5_K_M",capabilities=[Capability.REASONING,Capability.CODING,Capability.UNCHAINED,Capability.ABLITERATED,Capability.INSTRUCT],default_temp=0.7,max_context=32768,family="llama"),
46
+ "omega-evolution-9b": ModelProfile(name="Omega Evolution 9B",ollama_tag="hf.co/ReadyArt/Omega-Evolution-9B-v2.0-GGUF:Q4_K_M",tier=Tier.LOCAL_16GB,size_gb=6.6,params_b=9.0,quantization="Q4_K_M",capabilities=[Capability.REASONING,Capability.CODING,Capability.INSTRUCT,Capability.VISION],default_temp=0.7,max_context=32768,family="omega"),
47
+ "darwin-9b-opus": ModelProfile(name="Darwin 9B Opus",ollama_tag="hf.co/mradermacher/Darwin-9B-Opus-GGUF:Q4_K_M",tier=Tier.LOCAL_16GB,size_gb=6.3,params_b=9.0,quantization="Q4_K_M",capabilities=[Capability.REASONING,Capability.CODING,Capability.INSTRUCT,Capability.LONG_CONTEXT],default_temp=0.7,max_context=65536,family="darwin"),
48
+ "qwopus-3.5-9b": ModelProfile(name="Qwopus 3.5 9B",ollama_tag="hf.co/loilkon/Qwopus3.5-9B-v3.5-Uncensored:Q4_K_M",tier=Tier.LOCAL_16GB,size_gb=5.6,params_b=9.0,quantization="Q4_K_M",capabilities=[Capability.REASONING,Capability.CODING,Capability.UNCHAINED,Capability.ABLITERATED,Capability.INSTRUCT],default_temp=0.7,max_context=32768,family="qwopus"),
49
+ "carnice-9b": ModelProfile(name="Carnice 9B",ollama_tag="hf.co/kai-os/Carnice-9b-GGUF:Q4_K_M",tier=Tier.LOCAL_16GB,size_gb=5.6,params_b=9.0,quantization="Q4_K_M",capabilities=[Capability.REASONING,Capability.CODING,Capability.INSTRUCT,Capability.VISION],default_temp=0.7,max_context=32768,family="carnice"),
50
+ "open-search-vl-8b": ModelProfile(name="OpenSearch VL 8B",ollama_tag="hf.co/mradermacher/OpenSearch-VL-8B-GGUF:Q5_K_M",tier=Tier.LOCAL_16GB,size_gb=6.6,params_b=8.0,quantization="Q5_K_M",capabilities=[Capability.VISION,Capability.REASONING,Capability.INSTRUCT,Capability.LONG_CONTEXT],default_temp=0.7,max_context=65536,family="opensearch"),
51
+ "granite-4.1-8b-abliterated": ModelProfile(name="Granite 4.1 8B Abliterated",ollama_tag="hf.co/mradermacher/granite-4.1-8b-Abliterated-AND-Disinhibited-GGUF:Q4_K_M",tier=Tier.LOCAL_16GB,size_gb=5.1,params_b=8.0,quantization="Q4_K_M",capabilities=[Capability.REASONING,Capability.CODING,Capability.UNCHAINED,Capability.ABLITERATED,Capability.LONG_CONTEXT],default_temp=0.7,max_context=128000,family="granite"),
52
+ "huihui-granite-4.1-8b": ModelProfile(name="Huihui Granite 4.1 8B",ollama_tag="hf.co/mradermacher/Huihui-granite-4.1-8b-abliterated-GGUF:Q4_K_M",tier=Tier.LOCAL_16GB,size_gb=5.3,params_b=8.0,quantization="Q4_K_M",capabilities=[Capability.REASONING,Capability.CODING,Capability.UNCHAINED,Capability.ABLITERATED,Capability.LONG_CONTEXT],default_temp=0.7,max_context=128000,family="granite"),
53
+ "jaahas-qwen3.5-9b": ModelProfile(name="Jaahas Qwen 3.5 9B",ollama_tag="jaahas/qwen3.5-uncensored:9b",tier=Tier.LOCAL_16GB,size_gb=7.4,params_b=9.0,quantization="Q4_K_M",capabilities=[Capability.REASONING,Capability.CODING,Capability.UNCHAINED,Capability.ABLITERATED,Capability.INSTRUCT,Capability.MULTILINGUAL],default_temp=0.7,max_context=32768,family="qwen"),
54
+ # LOCAL 24GB
55
+ "lfm2-12b-deckard": ModelProfile(name="LFM2 12B Deckard",ollama_tag="hf.co/mradermacher/LFM2-12B-A1B-SpeedDemon-The-Deckard-II-HERETIC-Uncensored-i1-GGUF:Q3_K_L",tier=Tier.LOCAL_24GB,size_gb=5.8,params_b=12.0,quantization="Q3_K_L",capabilities=[Capability.REASONING,Capability.CODING,Capability.UNCHAINED,Capability.ABLITERATED,Capability.LONG_CONTEXT,Capability.FAST],default_temp=0.7,max_context=128000,family="lfm"),
56
+ "gemma4-e2b-opus": ModelProfile(name="Gemma4 E2B Opus",ollama_tag="hf.co/mradermacher/Gemma4-E2B-SFT-Claude-Opus-Reasoning-Unsloth-GGUF:Q8_0",tier=Tier.LOCAL_24GB,size_gb=5.5,params_b=4.0,quantization="Q8_0",capabilities=[Capability.REASONING,Capability.CODING,Capability.INSTRUCT,Capability.LONG_CONTEXT],default_temp=0.7,max_context=128000,family="gemma"),
57
+ "gemma4-uncensored": ModelProfile(name="Gemma 4 Uncensored",ollama_tag="hf.co/HauhauCS/Gemma-4-E2B-Uncensored-HauhauCS-Aggressive:Q6_K_P",tier=Tier.LOCAL_24GB,size_gb=4.9,params_b=4.0,quantization="Q6_K_P",capabilities=[Capability.REASONING,Capability.CODING,Capability.UNCHAINED,Capability.ABLITERATED,Capability.VISION],default_temp=0.7,max_context=32768,family="gemma"),
58
+ "gemma4-obliterated": ModelProfile(name="Gemma 4 OBLITERATED",ollama_tag="hf.co/OBLITERATUS/gemma-4-E4B-it-OBLITERATED:Q4_K_M",tier=Tier.LOCAL_24GB,size_gb=6.3,params_b=4.0,quantization="Q4_K_M",capabilities=[Capability.REASONING,Capability.CODING,Capability.UNCHAINED,Capability.ABLITERATED,Capability.VISION],default_temp=0.7,max_context=32768,family="gemma"),
59
+ "qwen3.6-27b-dflash": ModelProfile(name="Qwen 3.6 27B DFlash",ollama_tag="hf.co/Ardenzard/Qwen3.6-27B-DFlash-GGUF:Q4_K_M",tier=Tier.LOCAL_24GB,size_gb=1.0,params_b=27.0,quantization="Q4_K_M",capabilities=[Capability.REASONING,Capability.CODING,Capability.LONG_CONTEXT,Capability.FAST],default_temp=0.7,max_context=128000,family="qwen"),
60
+ # LOCAL 48GB
61
+ "gemma4-31b-cloud": ModelProfile(name="Gemma4 31B Cloud",ollama_tag="gemma4:31b-cloud",tier=Tier.LOCAL_48GB,size_gb=18.0,params_b=31.0,quantization="Q4_K_M",capabilities=[Capability.REASONING,Capability.CODING,Capability.VISION,Capability.LONG_CONTEXT,Capability.MULTILINGUAL],default_temp=0.7,max_context=128000,family="gemma"),
62
+ # NEW: Nemotron-3-Nano-Omni 30B (MoE, 3B active, 256K context, multimodal)
63
+ "nemotron-3-nano-omni-30b": ModelProfile(name="Nemotron-3 Nano-Omni 30B",ollama_tag="hf.co/batiai/Nemotron-3-Nano-Omni-30B-Text-Only-GGUF:Q4_K_M",tier=Tier.LOCAL_48GB,size_gb=18.0,params_b=30.0,quantization="Q4_K_M",capabilities=[Capability.REASONING,Capability.CODING,Capability.VISION,Capability.AUDIO,Capability.MULTIMODAL,Capability.LONG_CONTEXT,Capability.SAFETY,Capability.TOOL_USE],default_temp=0.6,max_context=256000,family="nemotron",T_c=0.85,mu_base=0.6,kappa=0.08,description="NVIDIA MoE hybrid (Mamba2-Transformer), 128 experts, 6 active/token. 256K context. Multimodal in/out."),
64
+ # CLOUD API
65
+ "deepseek-v4-pro": ModelProfile(name="DeepSeek V4 Pro",cloud_tag="deepseek-v4-pro:cloud",tier=Tier.CLOUD_API,size_gb=0.0,params_b=671.0,capabilities=[Capability.REASONING,Capability.CODING,Capability.LONG_CONTEXT,Capability.MULTILINGUAL,Capability.TOOL_USE],default_temp=0.6,max_context=64000,family="deepseek"),
66
+ "deepseek-v4-flash": ModelProfile(name="DeepSeek V4 Flash",cloud_tag="deepseek-v4-flash:cloud",tier=Tier.CLOUD_API,size_gb=0.0,params_b=671.0,capabilities=[Capability.REASONING,Capability.CODING,Capability.FAST,Capability.MULTILINGUAL,Capability.TOOL_USE],default_temp=0.8,max_context=64000,family="deepseek"),
67
+ "qwen3-coder-next": ModelProfile(name="Qwen 3 Coder Next",cloud_tag="qwen3-coder-next:cloud",tier=Tier.CLOUD_API,size_gb=0.0,params_b=32.0,capabilities=[Capability.CODING,Capability.REASONING,Capability.FAST,Capability.LONG_CONTEXT,Capability.TOOL_USE],default_temp=0.3,max_context=128000,family="qwen"),
68
+ "kimi-k2.6": ModelProfile(name="Kimi K2.6",cloud_tag="kimi-k2.6:cloud",tier=Tier.CLOUD_API,size_gb=0.0,params_b=32.0,capabilities=[Capability.REASONING,Capability.CODING,Capability.LONG_CONTEXT,Capability.MULTILINGUAL,Capability.VISION],default_temp=0.7,max_context=200000,family="kimi"),
69
+ "glm-5.1": ModelProfile(name="GLM 5.1",cloud_tag="glm-5.1:cloud",tier=Tier.CLOUD_API,size_gb=0.0,params_b=32.0,capabilities=[Capability.REASONING,Capability.CODING,Capability.MULTILINGUAL,Capability.TOOL_USE,Capability.VISION],default_temp=0.7,max_context=128000,family="glm"),
70
+ "minimax-m2.7": ModelProfile(name="MiniMax M2.7",cloud_tag="minimax-m2.7:cloud",tier=Tier.CLOUD_API,size_gb=0.0,params_b=32.0,capabilities=[Capability.REASONING,Capability.CODING,Capability.MULTILINGUAL,Capability.VISION],default_temp=0.7,max_context=128000,family="minimax"),
71
+ }
72
+
73
+ SULPHUR = ModelProfile(name="Sulphur Prompt Enhancer",ollama_tag="hugging-science/sulphur_prompt_enhancer-Q4_K_M-imatrix.gguf",tier=Tier.LOCAL_8GB,size_gb=3.0,params_b=3.0,quantization="Q4_K_M",capabilities=[Capability.INSTRUCT,Capability.FAST],default_temp=0.5,max_context=8192,family="sulphur")
74
+
75
+ def get(name:str)->Optional[ModelProfile]: return REGISTRY.get(name)
76
+ def by_tier(t:Tier)->List[ModelProfile]: return [m for m in REGISTRY.values() if m.tier==t]
77
+ def by_cap(c:Capability)->List[ModelProfile]: return [m for m in REGISTRY.values() if c in m.capabilities]
78
+ def all_names()->List[str]: return list(REGISTRY.keys())
79
+ def vram(names:List[str])->float: return sum(get(n).size_gb for n in names if get(n) and get(n).tier!=Tier.CLOUD_API)