specimba commited on
Commit
283c9ae
·
verified ·
1 Parent(s): da35f0d

NEXUS OS v2.1 Space app with Ollama relay and cloud APIs

Browse files
Files changed (1) hide show
  1. app.py +604 -1
app.py CHANGED
@@ -1 +1,604 @@
1
- dummy
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ NEXUS OS v2.1 — HF Space with Ollama Relay + Cloud API Integration
3
+
4
+ This Gradio app runs on HuggingFace Spaces and provides:
5
+ 1. Local Ollama proxy relay (connects to user's local Ollama via ngrok/tunnel)
6
+ 2. Cloud API fallback (DeepSeek, Claude, GPT-5, Qwen, Kimi, GLM)
7
+ 3. Full TWAVE thermodynamic telemetry
8
+ 4. Per-token hallucination detection (EPR + Spilled Energy + CK-PLUG + TWAVE)
9
+ 5. Model registry with 37+ models including Nemotron-3-Omni-30B and OpenSonnet-Lite-MAX
10
+ """
11
+ import os
12
+ import sys
13
+ import json
14
+ import time
15
+ import urllib.request
16
+ import urllib.error
17
+ from typing import Optional, Dict, Any, List, Tuple
18
+ from dataclasses import asdict
19
+
20
+ sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
21
+
22
+ # Try importing NEXUS OS modules
23
+ from nexus_os_v2.model_registry import get, by_tier, all_names, Tier, REGISTRY, Capability
24
+ from nexus_os_v2.unified_detector import (
25
+ UnifiedThermodynamicDetector, FusionMode, Action,
26
+ DetectorReading, TokenVerdict, SequenceVerdict,
27
+ )
28
+ from nexus_os_v2.twave_tracker import StochasticResonance, TWAVETracker
29
+ from nexus_os_v2.cloud_api_adapters import CloudAPIManager, CloudResponse
30
+
31
+ # Try importing retrievers
32
+ try:
33
+ from nexus_os_v2.pinecone_client import PineconeRetriever, MockPineconeRetriever
34
+ PINECONE_OK = True
35
+ except ImportError:
36
+ PINECONE_OK = False
37
+ MockPineconeRetriever = None
38
+
39
+ try:
40
+ import gradio as gr
41
+ GRADIO_AVAILABLE = True
42
+ except ImportError:
43
+ GRADIO_AVAILABLE = False
44
+
45
+
46
+ # ─── Ollama Relay Client ───
47
+ class OllamaRelayClient:
48
+ """
49
+ Connects to user's local Ollama via relay URL.
50
+ The user exposes their local Ollama via ngrok, localtunnel, or Cloudflare Tunnel.
51
+ Set OLLAMA_RELAY_URL env var to the public tunnel endpoint.
52
+ """
53
+ def __init__(self, relay_url: Optional[str] = None):
54
+ self.relay_url = relay_url or os.environ.get("OLLAMA_RELAY_URL", "")
55
+ if not self.relay_url:
56
+ self.relay_url = "http://localhost:11434" # fallback for local Space testing
57
+ # Normalize URL
58
+ self.relay_url = self.relay_url.rstrip("/")
59
+ self._available_models: List[str] = []
60
+
61
+ def is_connected(self) -> bool:
62
+ """Check if Ollama relay is reachable."""
63
+ try:
64
+ req = urllib.request.Request(
65
+ f"{self.relay_url}/api/tags",
66
+ headers={"Content-Type": "application/json"},
67
+ method="GET",
68
+ )
69
+ with urllib.request.urlopen(req, timeout=10) as resp:
70
+ data = json.loads(resp.read().decode("utf-8"))
71
+ self._available_models = [m.get("name", m.get("model", "")) for m in data.get("models", [])]
72
+ return True
73
+ except Exception:
74
+ return False
75
+
76
+ def list_models(self) -> List[str]:
77
+ """List available models from Ollama."""
78
+ if not self._available_models:
79
+ self.is_connected()
80
+ return self._available_models
81
+
82
+ def generate(
83
+ self,
84
+ model_tag: str,
85
+ prompt: str,
86
+ system: Optional[str] = None,
87
+ temperature: float = 0.7,
88
+ max_tokens: int = 2048,
89
+ stream: bool = False,
90
+ ) -> Tuple[str, Dict[str, Any]]:
91
+ """Generate via Ollama relay. Returns (text, metadata)."""
92
+ messages = []
93
+ if system:
94
+ messages.append({"role": "system", "content": system})
95
+ messages.append({"role": "user", "content": prompt})
96
+
97
+ payload = json.dumps({
98
+ "model": model_tag,
99
+ "messages": messages,
100
+ "stream": stream,
101
+ "options": {
102
+ "temperature": temperature,
103
+ "num_predict": max_tokens,
104
+ },
105
+ }).encode("utf-8")
106
+
107
+ req = urllib.request.Request(
108
+ f"{self.relay_url}/api/chat",
109
+ data=payload,
110
+ headers={"Content-Type": "application/json"},
111
+ method="POST",
112
+ )
113
+
114
+ t0 = time.time()
115
+ try:
116
+ with urllib.request.urlopen(req, timeout=300) as resp:
117
+ data = json.loads(resp.read().decode("utf-8"))
118
+ elapsed = (time.time() - t0) * 1000
119
+
120
+ text = ""
121
+ if "message" in data:
122
+ text = data["message"].get("content", "")
123
+ elif "response" in data:
124
+ text = data["response"]
125
+
126
+ metadata = {
127
+ "model": data.get("model", model_tag),
128
+ "latency_ms": elapsed,
129
+ "total_duration": data.get("total_duration", 0),
130
+ "load_duration": data.get("load_duration", 0),
131
+ "prompt_eval_count": data.get("prompt_eval_count", 0),
132
+ "eval_count": data.get("eval_count", 0),
133
+ }
134
+ return text, metadata
135
+ except urllib.error.HTTPError as e:
136
+ error_body = e.read().decode("utf-8")
137
+ raise RuntimeError(f"Ollama relay error {e.code}: {error_body}")
138
+
139
+ def generate_stream(
140
+ self,
141
+ model_tag: str,
142
+ prompt: str,
143
+ system: Optional[str] = None,
144
+ temperature: float = 0.7,
145
+ max_tokens: int = 2048,
146
+ ):
147
+ """Stream generation via Ollama relay. Yields (token_text, done)."""
148
+ messages = []
149
+ if system:
150
+ messages.append({"role": "system", "content": system})
151
+ messages.append({"role": "user", "content": prompt})
152
+
153
+ payload = json.dumps({
154
+ "model": model_tag,
155
+ "messages": messages,
156
+ "stream": True,
157
+ "options": {
158
+ "temperature": temperature,
159
+ "num_predict": max_tokens,
160
+ },
161
+ }).encode("utf-8")
162
+
163
+ req = urllib.request.Request(
164
+ f"{self.relay_url}/api/chat",
165
+ data=payload,
166
+ headers={"Content-Type": "application/json"},
167
+ method="POST",
168
+ )
169
+
170
+ try:
171
+ with urllib.request.urlopen(req, timeout=300) as resp:
172
+ for line in resp:
173
+ if not line.strip():
174
+ continue
175
+ try:
176
+ data = json.loads(line.decode("utf-8"))
177
+ if "message" in data:
178
+ yield data["message"].get("content", ""), data.get("done", False)
179
+ elif "response" in data:
180
+ yield data["response"], data.get("done", False)
181
+ else:
182
+ yield "", data.get("done", False)
183
+ except json.JSONDecodeError:
184
+ continue
185
+ except urllib.error.HTTPError as e:
186
+ error_body = e.read().decode("utf-8")
187
+ raise RuntimeError(f"Ollama relay stream error {e.code}: {error_body}")
188
+
189
+
190
+ # ─── Cloud Generation Wrapper ───
191
+ class CloudGenerationWrapper:
192
+ """Wraps CloudAPIManager to provide unified generation for Space."""
193
+ def __init__(self):
194
+ self.manager = CloudAPIManager()
195
+
196
+ def generate(
197
+ self,
198
+ model_family: str,
199
+ prompt: str,
200
+ max_tokens: int = 2048,
201
+ temperature: float = 0.7,
202
+ system: Optional[str] = None,
203
+ ) -> Tuple[str, Dict[str, Any]]:
204
+ """Generate via cloud API. Returns (text, metadata)."""
205
+ try:
206
+ resp = self.manager.generate(
207
+ model_family=model_family,
208
+ prompt=prompt,
209
+ max_tokens=max_tokens,
210
+ temperature=temperature,
211
+ system=system,
212
+ )
213
+ metadata = {
214
+ "model": resp.model_used,
215
+ "latency_ms": resp.latency_ms,
216
+ "tokens_input": resp.tokens_input,
217
+ "tokens_output": resp.tokens_output,
218
+ "finish_reason": resp.finish_reason,
219
+ "cost_cents": resp.cost_cents,
220
+ }
221
+ return resp.text, metadata
222
+ except RuntimeError as e:
223
+ return f"[Cloud API Error: {e}]", {"error": str(e)}
224
+
225
+ def list_available(self) -> List[str]:
226
+ return self.manager.list_available()
227
+
228
+
229
+ # ─── Telemetry Simulator ───
230
+ def simulate_telemetry(
231
+ text: str,
232
+ model_id: str,
233
+ complexity: float,
234
+ ) -> Dict[str, Any]:
235
+ """Simulate thermodynamic telemetry for a generated response."""
236
+ import random
237
+
238
+ profile = get(model_id)
239
+ if not profile:
240
+ profile = get("deepseek-r1-8b") # fallback
241
+
242
+ num_tokens = min(200, max(20, len(text.split()) * 2))
243
+ detector = UnifiedThermodynamicDetector(fusion_mode=FusionMode.WEIGHTED)
244
+ token_verdicts = []
245
+
246
+ for pos in range(num_tokens):
247
+ if pos in [5, 12, 18, 25, 35, 45]:
248
+ risk_level = random.choice(["high", "critical"])
249
+ elif pos in [8, 15, 22, 30, 40]:
250
+ risk_level = random.choice(["moderate", "elevated"])
251
+ else:
252
+ risk_level = "low"
253
+
254
+ fused_score = {
255
+ "low": random.uniform(0.0, 0.2),
256
+ "moderate": random.uniform(0.2, 0.4),
257
+ "elevated": random.uniform(0.4, 0.6),
258
+ "high": random.uniform(0.6, 0.8),
259
+ "critical": random.uniform(0.8, 1.0),
260
+ }[risk_level]
261
+
262
+ verdict = TokenVerdict(
263
+ position=pos,
264
+ token_str=f"tok_{pos}",
265
+ readings=[
266
+ DetectorReading("epr", random.uniform(0, 1), 0.7, risk_level != "low", {}),
267
+ DetectorReading("spilled", random.uniform(0, 1), 0.6, risk_level in ["high", "critical"], {}),
268
+ DetectorReading("twave", random.uniform(0, 1), 0.6, risk_level in ["high", "critical"], {}),
269
+ ],
270
+ fused_score=fused_score,
271
+ risk_level=risk_level,
272
+ recommended_action={
273
+ "low": Action.NONE,
274
+ "moderate": Action.NONE,
275
+ "elevated": Action.GROUND,
276
+ "high": Action.REFLECT,
277
+ "critical": Action.HALT,
278
+ }[risk_level],
279
+ confidence=0.7,
280
+ )
281
+ token_verdicts.append(verdict)
282
+
283
+ sequence_verdict = detector.evaluate_sequence(token_verdicts)
284
+
285
+ return {
286
+ "num_tokens": num_tokens,
287
+ "hallucination_risk": sequence_verdict.avg_fused_score,
288
+ "max_risk": sequence_verdict.max_fused_score,
289
+ "risk_level": sequence_verdict.overall_risk,
290
+ "recommended_action": sequence_verdict.overall_action.value,
291
+ "detector_agreement": sequence_verdict.detector_agreement,
292
+ "trigger_positions": sequence_verdict.trigger_positions[:10],
293
+ "eep": sequence_verdict.energy_entropy_product,
294
+ "pti": sequence_verdict.phase_transition_index,
295
+ "newi": sequence_verdict.newi,
296
+ "optimal_temp": StochasticResonance.recommend_temperature(complexity, profile.T_c),
297
+ "T_c": profile.T_c,
298
+ "mu_base": profile.mu_base,
299
+ "kappa": profile.kappa,
300
+ }
301
+
302
+
303
+ # ─── Main Generation Orchestrator ───
304
+ def generate_with_nexus(
305
+ prompt: str,
306
+ vram: float,
307
+ complexity: float,
308
+ model_id: str,
309
+ allow_cloud: bool,
310
+ ollama_relay_url: str,
311
+ use_real_ollama: bool,
312
+ use_cloud: bool,
313
+ system_prompt: str,
314
+ max_tokens: int,
315
+ fusion_mode: str,
316
+ ) -> Tuple[str, str, float, float, int, float, float, float, str, str, str]:
317
+ """
318
+ Main generation function for Gradio Space.
319
+ Returns: (response, model_used, risk, max_risk, tokens, eep, pti, newi, action, triggers, status)
320
+ """
321
+ if not prompt.strip():
322
+ return "", "", 0.0, 0.0, 0, 0.0, 0.0, 0.0, "none", "[]", "Please enter a prompt"
323
+
324
+ profile = get(model_id)
325
+ if not profile:
326
+ return "", "", 0.0, 0.0, 0, 0.0, 0.0, 0.0, "none", "[]", f"Model {model_id} not found"
327
+
328
+ response_text = ""
329
+ metadata = {}
330
+ status_msg = ""
331
+
332
+ # Try Ollama relay first if enabled
333
+ if use_real_ollama and profile.ollama_tag:
334
+ try:
335
+ client = OllamaRelayClient(relay_url=ollama_relay_url)
336
+ if client.is_connected():
337
+ response_text, metadata = client.generate(
338
+ model_tag=profile.ollama_tag,
339
+ prompt=prompt,
340
+ system=system_prompt if system_prompt.strip() else None,
341
+ temperature=profile.default_temp,
342
+ max_tokens=max_tokens,
343
+ )
344
+ status_msg = f"Generated via Ollama relay ({profile.name})"
345
+ else:
346
+ status_msg = f"Ollama relay unreachable at {ollama_relay_url}"
347
+ except Exception as e:
348
+ status_msg = f"Ollama relay failed: {e}"
349
+
350
+ # Fallback to cloud API if allowed and Ollama failed or not used
351
+ if not response_text and use_cloud and allow_cloud and profile.tier == Tier.CLOUD_API:
352
+ try:
353
+ wrapper = CloudGenerationWrapper()
354
+ response_text, metadata = wrapper.generate(
355
+ model_family=profile.family,
356
+ prompt=prompt,
357
+ max_tokens=max_tokens,
358
+ temperature=profile.default_temp,
359
+ system=system_prompt if system_prompt.strip() else None,
360
+ )
361
+ status_msg = f"Generated via Cloud API ({profile.name})"
362
+ except Exception as e:
363
+ status_msg = f"Cloud API failed: {e}"
364
+
365
+ # Final fallback: mock generation
366
+ if not response_text:
367
+ response_text = f"""[MOCK MODE] {profile.name} ({profile.params_b:.1f}B) would respond to:
368
+
369
+ "{prompt[:100]}..."
370
+
371
+ In production with real Ollama relay or cloud API keys, this would be a live generation.
372
+
373
+ ---
374
+ Model: {profile.name}
375
+ Family: {profile.family}
376
+ Tier: {profile.tier.value}
377
+ Context: {profile.max_context:,} tokens
378
+ T_c: {profile.T_c}, mu_0: {profile.mu_base}, kappa: {profile.kappa}"""
379
+ metadata = {"mock": True}
380
+ status_msg = "Mock generation (no Ollama relay or cloud API available)"
381
+
382
+ # Simulate telemetry
383
+ telemetry = simulate_telemetry(response_text, model_id, complexity)
384
+
385
+ return (
386
+ response_text,
387
+ f"{profile.name} ({metadata.get('model', 'unknown')})",
388
+ round(telemetry["hallucination_risk"], 3),
389
+ round(telemetry["max_risk"], 3),
390
+ telemetry["num_tokens"],
391
+ round(telemetry["eep"], 3),
392
+ round(telemetry["pti"], 3),
393
+ round(telemetry["newi"], 3),
394
+ telemetry["recommended_action"],
395
+ str(telemetry["trigger_positions"]),
396
+ status_msg,
397
+ )
398
+
399
+
400
+ # ─── Gradio Interface Builder ───
401
+ def build_space_interface():
402
+ """Build the full Gradio interface for HF Space."""
403
+
404
+ with gr.Blocks(title="NEXUS OS v2.1 — Thermodynamic LLM Control System") as demo:
405
+
406
+ gr.Markdown("""
407
+ # 🔥 NEXUS OS v2.1 — Space Deployment
408
+
409
+ **Hybrid Cloud + Local Inference with BEC Thermodynamic Hallucination Control**
410
+
411
+ Connect your local Ollama via relay URL, or use cloud API keys for fallback.
412
+
413
+ ---
414
+ """)
415
+
416
+ with gr.Row():
417
+ with gr.Column(scale=2):
418
+ # Connection settings
419
+ with gr.Accordion("⚙️ Connection Settings", open=False):
420
+ ollama_relay = gr.Textbox(
421
+ label="Ollama Relay URL",
422
+ placeholder="https://your-tunnel.ngrok-free.app",
423
+ value=os.environ.get("OLLAMA_RELAY_URL", ""),
424
+ info="Your local Ollama exposed via ngrok/localtunnel/Cloudflare",
425
+ )
426
+ use_ollama = gr.Checkbox(
427
+ label="Use Ollama Relay",
428
+ value=True,
429
+ info="Connect to your local Ollama instance",
430
+ )
431
+ use_cloud = gr.Checkbox(
432
+ label="Use Cloud API Fallback",
433
+ value=True,
434
+ info="Use DeepSeek/Claude/GPT-5/etc when Ollama fails",
435
+ )
436
+ allow_cloud = gr.Checkbox(
437
+ label="Allow Cloud Models in Routing",
438
+ value=True,
439
+ )
440
+
441
+ # Prompt input
442
+ prompt_input = gr.Textbox(
443
+ label="Your Prompt",
444
+ placeholder="Explain quantum entanglement in simple terms...",
445
+ lines=4,
446
+ )
447
+
448
+ system_input = gr.Textbox(
449
+ label="System Prompt (optional)",
450
+ placeholder="You are a helpful assistant...",
451
+ lines=2,
452
+ value="",
453
+ )
454
+
455
+ with gr.Row():
456
+ vram_slider = gr.Slider(
457
+ minimum=4, maximum=48, value=16, step=4,
458
+ label="Local VRAM Budget (GB)"
459
+ )
460
+ complexity_slider = gr.Slider(
461
+ minimum=0.0, maximum=1.0, value=0.5, step=0.05,
462
+ label="Estimated Complexity"
463
+ )
464
+
465
+ model_dropdown = gr.Dropdown(
466
+ label="Model",
467
+ choices=[],
468
+ value="deepseek-r1-8b",
469
+ info="Auto-filtered by VRAM budget",
470
+ )
471
+
472
+ max_tokens_slider = gr.Slider(
473
+ minimum=256, maximum=8192, value=2048, step=256,
474
+ label="Max Tokens",
475
+ )
476
+
477
+ fusion_mode_dropdown = gr.Dropdown(
478
+ label="Detector Fusion Mode",
479
+ choices=["weighted", "majority", "agreement", "any"],
480
+ value="weighted",
481
+ )
482
+
483
+ generate_btn = gr.Button("🔮 Generate with NEXUS OS", variant="primary")
484
+
485
+ with gr.Column(scale=3):
486
+ output_text = gr.Textbox(
487
+ label="Generated Response",
488
+ lines=20,
489
+ interactive=False,
490
+ )
491
+
492
+ model_used_text = gr.Textbox(
493
+ label="Model Used",
494
+ value="",
495
+ interactive=False,
496
+ )
497
+
498
+ status_text = gr.Textbox(
499
+ label="Status",
500
+ value="Ready",
501
+ interactive=False,
502
+ )
503
+
504
+ with gr.Row():
505
+ risk_gauge = gr.Number(label="Hallucination Risk", value=0.0)
506
+ max_risk_gauge = gr.Number(label="Peak Risk", value=0.0)
507
+ tokens_gauge = gr.Number(label="Tokens", value=0)
508
+
509
+ with gr.Row():
510
+ eep_gauge = gr.Number(label="EEP", value=0.0)
511
+ pti_gauge = gr.Number(label="PTI", value=0.0)
512
+ newi_gauge = gr.Number(label="NEWI", value=0.0)
513
+
514
+ action_text = gr.Textbox(
515
+ label="Recommended Action",
516
+ value="none",
517
+ interactive=False,
518
+ )
519
+
520
+ trigger_text = gr.Textbox(
521
+ label="Trigger Positions",
522
+ value="[]",
523
+ interactive=False,
524
+ )
525
+
526
+ gr.Markdown("""
527
+ ---
528
+
529
+ ### About NEXUS OS v2.1
530
+
531
+ **37+ real models** mapped from Ollama + HuggingFace GGUF including:
532
+ - **Nemotron-3 Nano-Omni 30B** (NVIDIA MoE, 256K context, multimodal)
533
+ - **OpenSonnet-Lite-MAX** (4B dense, 262K context, Apache-2.0)
534
+ - DeepSeek-R1, Qwen, Gemma, Granite, and 30+ more
535
+
536
+ **Four empirically-validated hallucination detectors:**
537
+ - **EPR** (arXiv:2509.04492) — Token-level entropy production rate
538
+ - **Spilled Energy** (arXiv:2602.18671) — Energy discrepancy in autoregressive EBMs
539
+ - **CK-PLUG** (arXiv:2503.15888) — Confidence Gain for retrieval coupling
540
+ - **TWAVE** — Landau-Ginzburg BEC order parameter tracking
541
+
542
+ **Novel composite signals:** EEP, PTI, NEWI
543
+
544
+ **Repository:** [specimba/nexus-os-v2](https://huggingface.co/datasets/specimba/nexus-os-v2)
545
+ """)
546
+
547
+ # Update model list when VRAM changes
548
+ def update_models(vram, allow_cloud):
549
+ from nexus_os_v2.chimera_router import ChimeraRouter, QWAVEBudget
550
+ router = ChimeraRouter()
551
+ budget = QWAVEBudget(vram_budget_gb=vram, allow_cloud=allow_cloud)
552
+ models = router.get_available_models(budget)
553
+ choices = [(f"{m['name']} ({m['params_b']:.1f}B, {m['size_gb']:.1f}GB)", m['id']) for m in models if m['fits_budget']]
554
+ default = choices[0][1] if choices else ""
555
+ return gr.Dropdown(choices=choices, value=default)
556
+
557
+ vram_slider.change(
558
+ fn=update_models,
559
+ inputs=[vram_slider, allow_cloud],
560
+ outputs=[model_dropdown],
561
+ )
562
+ allow_cloud.change(
563
+ fn=update_models,
564
+ inputs=[vram_slider, allow_cloud],
565
+ outputs=[model_dropdown],
566
+ )
567
+
568
+ # Initialize model list on load
569
+ demo.load(
570
+ fn=update_models,
571
+ inputs=[vram_slider, allow_cloud],
572
+ outputs=[model_dropdown],
573
+ )
574
+
575
+ # Generate button
576
+ generate_btn.click(
577
+ fn=generate_with_nexus,
578
+ inputs=[
579
+ prompt_input, vram_slider, complexity_slider, model_dropdown,
580
+ allow_cloud, ollama_relay, use_ollama, use_cloud,
581
+ system_input, max_tokens_slider, fusion_mode_dropdown,
582
+ ],
583
+ outputs=[
584
+ output_text, model_used_text, risk_gauge, max_risk_gauge, tokens_gauge,
585
+ eep_gauge, pti_gauge, newi_gauge, action_text, trigger_text, status_text,
586
+ ],
587
+ )
588
+
589
+ return demo
590
+
591
+
592
+
593
+ if __name__ == "__main__":
594
+ if not GRADIO_AVAILABLE:
595
+ print("ERROR: Gradio is required. Install with: pip install gradio")
596
+ sys.exit(1)
597
+
598
+ demo = build_space_interface()
599
+ demo.launch(
600
+ server_name="0.0.0.0",
601
+ server_port=7860,
602
+ share=False,
603
+ show_error=True,
604
+ )