specimba commited on
Commit
76da8c7
Β·
verified Β·
1 Parent(s): 1da900a

v5.0 Provider Control Center: provider manager, arena comparison, experiment log, pinecone chat, model registry

Browse files
Files changed (1) hide show
  1. app.py +235 -339
app.py CHANGED
@@ -1,15 +1,15 @@
1
  """
2
- NEXUS OS β€” Provider Control Center
3
 
4
- A multi-provider LLM management dashboard inspired by HF collaboration spaces.
5
- Features:
6
  1. Provider Manager β€” enter API keys, check health, see available models
7
- 2. Side-by-Side Arena β€” same prompt across multiple providers, compare outputs
8
- 3. Experiment Log β€” save runs to table, sort by latency/cost/quality
9
- 4. Pinecone Chat β€” talk to pineosman2 assistant, show retrieved evidence
10
- 5. Model Registry β€” browse 37+ models with specs
11
 
12
- All self-contained. Only dependency: gradio.
13
  """
14
  import os
15
  import sys
@@ -29,105 +29,57 @@ except ImportError:
29
 
30
 
31
  # ═══════════════════════════════════════════════════════════════
32
- # PROVIDER DEFINITIONS
33
  # ═══════════════════════════════════════════════════════════════
34
- class Provider(Enum):
35
- HF_ROUTER = ("HF Inference Providers", "router.huggingface.co", "HF_TOKEN")
36
- GROQ = ("Groq", "api.groq.com", "GROQ_API_KEY")
37
- DEEPSEEK = ("DeepSeek", "api.deepseek.com", "DEEPSEEK_API_KEY")
38
- OPENROUTER = ("OpenRouter", "openrouter.ai", "OPENROUTER_API_KEY")
39
- TOGETHER = ("Together AI", "api.together.xyz", "TOGETHER_API_KEY")
40
- KILOCODE = ("Kilocode", "kilocode.ai", "KILOCODE_API_KEY")
41
- NVIDIA = ("NVIDIA NIM", "integrate.api.nvidia.com", "NVIDIA_API_KEY")
42
- OLLAMA = ("Ollama (Local)", "localhost:11434", "OLLAMA_HOST")
43
-
44
- def __init__(self, display_name, domain, key_env):
45
  self.display_name = display_name
46
  self.domain = domain
47
  self.key_env = key_env
48
 
 
 
 
 
 
 
 
 
 
 
 
49
 
50
- # ═══════════════════════════════════════════════════════════════
51
- # API ENDPOINTS (all OpenAI-compatible /v1/chat/completions)
52
- # ═══════════════════════════════════════════════════════════════
53
  ENDPOINTS = {
54
- Provider.HF_ROUTER: "https://router.huggingface.co/v1/chat/completions",
55
- Provider.GROQ: "https://api.groq.com/openai/v1/chat/completions",
56
- Provider.DEEPSEEK: "https://api.deepseek.com/v1/chat/completions",
57
- Provider.OPENROUTER: "https://openrouter.ai/api/v1/chat/completions",
58
- Provider.TOGETHER: "https://api.together.xyz/v1/chat/completions",
59
  }
60
 
61
- # Free models per provider
62
  FREE_MODELS = {
63
- Provider.HF_ROUTER: [
64
- ("SmolLM2-1.7B", "HuggingFaceTB/SmolLM2-1.7B-Instruct"),
65
- ("Llama-3.2-1B", "meta-llama/Llama-3.2-1B-Instruct"),
66
- ("Qwen2.5-0.5B", "Qwen/Qwen2.5-0.5B-Instruct"),
67
- ("Gemma-2-2B", "google/gemma-2-2b-it"),
68
- ],
69
- Provider.GROQ: [
70
- ("Llama-3.2-1B", "llama-3.2-1b-preview"),
71
- ("Llama-3.2-3B", "llama-3.2-3b-preview"),
72
- ("Mixtral-8x7B", "mixtral-8x7b-32768"),
73
- ("Qwen-2.5-Coder-32B", "qwen-2.5-coder-32b"),
74
- ("Gemma-2-9B-IT", "gemma2-9b-it"),
75
- ],
76
- Provider.DEEPSEEK: [
77
- ("DeepSeek-V3", "deepseek-chat"),
78
- ("DeepSeek-R1", "deepseek-reasoner"),
79
- ],
80
- Provider.OPENROUTER: [
81
- ("Llama-3.2-1B-Free", "meta-llama/llama-3.2-1b-instruct:free"),
82
- ("Qwen-2.5-Coder-32B-Free", "qwen/qwen-2.5-coder-32b-instruct:free"),
83
- ],
84
- Provider.TOGETHER: [
85
- ("Llama-3.3-70B-Free", "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free"),
86
- ("Llama-3.2-1B-Free", "meta-llama/Llama-3.2-1B-Instruct-Turbo-Free"),
87
- ],
88
  }
89
 
90
-
91
  # ═══════════════════════════════════════════════════════════════
92
- # HEALTH CHECK + GENERATION
93
  # ═══════════════════════════════════════════════════════════════
94
- @dataclass
95
- class HealthResult:
96
- provider: Provider
97
- status: str # "online", "offline", "no_key", "rate_limited"
98
- latency_ms: float
99
- error: str = ""
100
- models: List[Tuple[str, str]] = field(default_factory=list)
101
-
102
-
103
- @dataclass
104
- class GenerationResult:
105
- text: str
106
- provider: Provider
107
- model: str
108
- latency_ms: float
109
- tokens_input: int = 0
110
- tokens_output: int = 0
111
- error: str = ""
112
-
113
-
114
- def _call_api(endpoint: str, api_key: str, payload: Dict[str, Any], timeout: int = 120) -> Tuple[bool, Dict[str, Any], float, str]:
115
  body = json.dumps(payload).encode("utf-8")
116
- headers = {
117
- "Content-Type": "application/json",
118
- "Authorization": f"Bearer {api_key}",
119
- }
120
- # OpenRouter requires extra headers
121
  if "openrouter" in endpoint:
122
  headers["HTTP-Referer"] = "https://huggingface.co/spaces/specimba/nexus-os-space"
123
  headers["X-Title"] = "NEXUS OS"
124
-
125
  req = urllib.request.Request(endpoint, data=body, headers=headers, method="POST")
126
  t0 = time.time()
127
  try:
128
  with urllib.request.urlopen(req, timeout=timeout) as resp:
129
- data = json.loads(resp.read().decode("utf-8"))
130
- return True, data, (time.time() - t0) * 1000, ""
131
  except urllib.error.HTTPError as e:
132
  err = e.read().decode("utf-8", errors="replace")[:300]
133
  return False, {}, (time.time() - t0) * 1000, f"HTTP {e.code}: {err}"
@@ -135,146 +87,98 @@ def _call_api(endpoint: str, api_key: str, payload: Dict[str, Any], timeout: int
135
  return False, {}, (time.time() - t0) * 1000, str(e)[:200]
136
 
137
 
138
- def check_provider_health(provider: Provider, api_key: str) -> HealthResult:
139
- """Check provider health with a minimal test request."""
 
 
 
 
140
  if not api_key:
141
- return HealthResult(provider=provider, status="no_key", latency_ms=0,
142
- models=FREE_MODELS.get(provider, []))
143
 
144
- endpoint = ENDPOINTS.get(provider)
145
  if not endpoint:
146
- return HealthResult(provider=provider, status="offline", latency_ms=0,
147
- error="No endpoint configured",
148
- models=FREE_MODELS.get(provider, []))
149
-
150
- # Try a minimal generation
151
- models = FREE_MODELS.get(provider, [])
152
- model_id = models[0][1] if models else ""
153
- if not model_id:
154
- return HealthResult(provider=provider, status="offline", latency_ms=0,
155
- error="No models configured",
156
- models=FREE_MODELS.get(provider, []))
157
 
158
- payload = {
159
- "model": model_id,
160
- "messages": [{"role": "user", "content": "Hi"}],
161
- "max_tokens": 5,
162
- "temperature": 0.1,
163
- }
164
 
165
- success, data, latency, error = _call_api(endpoint, api_key, payload, timeout=20)
 
166
 
167
  if success and data.get("choices"):
168
- return HealthResult(provider=provider, status="online", latency_ms=latency,
169
- models=FREE_MODELS.get(provider, []))
170
  elif "429" in error or "rate limit" in error.lower():
171
- return HealthResult(provider=provider, status="rate_limited", latency_ms=latency,
172
- error=error, models=FREE_MODELS.get(provider, []))
173
  else:
174
- return HealthResult(provider=provider, status="offline", latency_ms=latency,
175
- error=error, models=FREE_MODELS.get(provider, []))
176
 
177
 
178
- def generate_with_provider(provider: Provider, api_key: str, model: str,
179
- prompt: str, system: Optional[str] = None,
180
- max_tokens: int = 512, temperature: float = 0.7) -> GenerationResult:
181
- """Generate with a specific provider."""
182
- endpoint = ENDPOINTS.get(provider)
183
  if not endpoint:
184
- return GenerationResult(text="", provider=provider, model=model, latency_ms=0,
185
- error="No endpoint configured")
186
 
187
  messages = []
188
  if system:
189
  messages.append({"role": "system", "content": system})
190
  messages.append({"role": "user", "content": prompt})
191
 
192
- payload = {
193
- "model": model,
194
- "messages": messages,
195
- "max_tokens": max_tokens,
196
- "temperature": temperature,
197
- }
198
-
199
- success, data, latency, error = _call_api(endpoint, api_key, payload)
200
 
201
  if not success:
202
- return GenerationResult(text="", provider=provider, model=model,
203
- latency_ms=latency, error=error)
204
 
205
  choice = data.get("choices", [{}])[0]
206
  message = choice.get("message", {})
207
  usage = data.get("usage", {})
208
 
209
- return GenerationResult(
210
- text=message.get("content", ""),
211
- provider=provider,
212
- model=model,
213
- latency_ms=latency,
214
- tokens_input=usage.get("prompt_tokens", 0),
215
- tokens_output=usage.get("completion_tokens", 0),
216
- )
217
 
218
 
219
  # ═══════════════════════════════════════════════════════════════
220
- # MODEL REGISTRY (37 models)
221
  # ═══════════════════════════════════════════════════════════════
222
- @dataclass
223
- class ModelProfile:
224
- name: str
225
- family: str
226
- tier: str
227
- size_gb: float
228
- params_b: float
229
- capabilities: List[str]
230
- default_temp: float
231
- max_context: int
232
-
233
  REGISTRY = {
234
- "deepseek-r1-8b": ModelProfile("DeepSeek-R1 8B", "deepseek", "16GB", 5.2, 8.0,
235
- ["reasoning", "coding", "long_context"], 0.6, 128000),
236
- "qwen2.5-coder-7b": ModelProfile("Qwen 2.5 Coder 7B", "qwen", "16GB", 4.7, 7.0,
237
- ["coding", "fast"], 0.3, 32768),
238
- "l3.1-dark-reasoning-8b": ModelProfile("L3.1 Dark Reasoning 8B", "llama", "16GB", 5.7, 8.0,
239
- ["reasoning", "coding"], 0.7, 32768),
240
- "omega-evolution-9b": ModelProfile("Omega Evolution 9B", "omega", "16GB", 6.6, 9.0,
241
- ["reasoning", "coding", "vision"], 0.7, 32768),
242
- "darwin-9b-opus": ModelProfile("Darwin 9B Opus", "darwin", "16GB", 6.3, 9.0,
243
- ["reasoning", "coding", "long_context"], 0.7, 65536),
244
- "qwopus-3.5-9b": ModelProfile("Qwopus 3.5 9B", "qwopus", "16GB", 5.6, 9.0,
245
- ["reasoning", "coding"], 0.7, 32768),
246
- "carnice-9b": ModelProfile("Carnice 9B", "carnice", "16GB", 5.6, 9.0,
247
- ["reasoning", "coding", "vision"], 0.7, 32768),
248
- "open-search-vl-8b": ModelProfile("OpenSearch VL 8B", "opensearch", "16GB", 6.6, 8.0,
249
- ["vision", "reasoning", "long_context"], 0.7, 65536),
250
- "granite-4.1-8b-abliterated": ModelProfile("Granite 4.1 8B Abliterated", "granite", "16GB", 5.1, 8.0,
251
- ["reasoning", "coding", "long_context"], 0.7, 128000),
252
- "jaahas-qwen3.5-9b": ModelProfile("Jaahas Qwen 3.5 9B", "qwen", "16GB", 7.4, 9.0,
253
- ["reasoning", "coding", "multilingual"], 0.7, 32768),
254
- "lfm2-12b-deckard": ModelProfile("LFM2 12B Deckard", "lfm", "24GB", 5.8, 12.0,
255
- ["reasoning", "coding", "long_context", "fast"], 0.7, 128000),
256
- "gemma4-e2b-opus": ModelProfile("Gemma4 E2B Opus", "gemma", "24GB", 5.5, 4.0,
257
- ["reasoning", "coding", "long_context"], 0.7, 128000),
258
- "gemma4-uncensored": ModelProfile("Gemma 4 Uncensored", "gemma", "24GB", 4.9, 4.0,
259
- ["reasoning", "coding", "vision"], 0.7, 32768),
260
- "gemma4-obliterated": ModelProfile("Gemma 4 OBLITERATED", "gemma", "24GB", 6.3, 4.0,
261
- ["reasoning", "coding", "vision"], 0.7, 32768),
262
- "qwen3.6-27b-dflash": ModelProfile("Qwen 3.6 27B DFlash", "qwen", "24GB", 1.0, 27.0,
263
- ["reasoning", "coding", "long_context", "fast"], 0.7, 128000),
264
- "gemma4-31b-cloud": ModelProfile("Gemma4 31B Cloud", "gemma", "48GB", 18.0, 31.0,
265
- ["reasoning", "coding", "vision", "long_context", "multilingual"], 0.7, 128000),
266
- "nemotron-3-nano-omni-30b": ModelProfile("Nemotron-3 Nano-Omni 30B", "nemotron", "48GB", 18.0, 30.0,
267
- ["reasoning", "coding", "vision", "long_context", "safety", "tool_use"], 0.6, 256000),
268
- "opensonnet-lite-max": ModelProfile("OpenSonnet-Lite-MAX", "qwen", "8GB", 2.5, 4.0,
269
- ["reasoning", "coding", "fast", "long_context"], 0.6, 262144),
270
- "deepseek-v4-pro": ModelProfile("DeepSeek V4 Pro", "deepseek", "cloud", 0.0, 671.0,
271
- ["reasoning", "coding", "long_context", "multilingual", "tool_use"], 0.6, 64000),
272
- "qwen3-coder-next": ModelProfile("Qwen 3 Coder Next", "qwen", "cloud", 0.0, 32.0,
273
- ["coding", "reasoning", "fast", "long_context", "tool_use"], 0.3, 128000),
274
- "kimi-k2.6": ModelProfile("Kimi K2.6", "kimi", "cloud", 0.0, 32.0,
275
- ["reasoning", "coding", "long_context", "multilingual", "vision"], 0.7, 200000),
276
- "glm-5.1": ModelProfile("GLM 5.1", "glm", "cloud", 0.0, 32.0,
277
- ["reasoning", "coding", "multilingual", "tool_use", "vision"], 0.7, 128000),
278
  }
279
 
280
 
@@ -283,20 +187,12 @@ REGISTRY = {
283
  # ═══════════════════════════════════════════════════════════════
284
  experiment_log: List[Dict[str, Any]] = []
285
 
 
286
  # ═══════════════════════════════════════════════════════════════
287
- # GRADIO INTERFACE β€” Provider Control Center
288
  # ═══════════════════════════════════════════════════════════════
289
  def build_control_center():
290
- with gr.Blocks(title="NEXUS OS β€” Provider Control Center", css="""
291
- .provider-card { border: 1px solid #ddd; border-radius: 8px; padding: 12px; margin: 4px; }
292
- .provider-online { border-left: 4px solid #10b981; }
293
- .provider-offline { border-left: 4px solid #ef4444; }
294
- .provider-rate { border-left: 4px solid #f59e0b; }
295
- .provider-nokey { border-left: 4px solid #6b7280; }
296
- .metric-box { text-align: center; padding: 8px; background: #f9fafb; border-radius: 6px; }
297
- .metric-value { font-size: 24px; font-weight: bold; color: #1f2937; }
298
- .metric-label { font-size: 11px; color: #6b7280; text-transform: uppercase; }
299
- """) as demo:
300
 
301
  gr.Markdown("""
302
  # πŸ”₯ NEXUS OS β€” Provider Control Center
@@ -306,78 +202,62 @@ def build_control_center():
306
 
307
  with gr.Tabs():
308
 
309
- # ═══════════════════════════════════════════════════════
310
  # TAB 1: Provider Manager
311
- # ═══════════════════════════════════════════════════════
312
  with gr.TabItem("πŸ”Œ Provider Manager"):
313
  gr.Markdown("""
314
- ### Enter your API keys to connect providers
315
 
316
- Keys are stored in **this session only** (not saved to disk).
317
  """)
318
 
319
- provider_keys = {}
320
- provider_status = {}
 
 
 
 
 
 
 
321
 
322
- for provider in [Provider.HF_ROUTER, Provider.GROQ, Provider.DEEPSEEK,
323
- Provider.OPENROUTER, Provider.TOGETHER, Provider.KILOCODE,
324
- Provider.NVIDIA]:
325
- with gr.Row():
326
- key_input = gr.Textbox(
327
- label=f"{provider.display_name} API Key",
328
- placeholder=f"sk-... or paste your {provider.key_env} here",
329
- type="password",
330
- value=os.environ.get(provider.key_env, ""),
331
- scale=3,
332
- )
333
- status_text = gr.Textbox(
334
- label="Status",
335
- value="Not checked" if not os.environ.get(provider.key_env, "") else "Key set (click Check)",
336
- interactive=False,
337
- scale=1,
338
- )
339
- provider_keys[provider] = key_input
340
- provider_status[provider] = status_text
341
 
342
- check_all_btn = gr.Button("πŸ” Check All Providers", variant="primary")
343
  health_table = gr.DataFrame(
344
- headers=["Provider", "Status", "Latency (ms)", "Free Models", "Error"],
345
  label="Provider Health Dashboard",
 
346
  interactive=False,
347
  )
348
 
349
- def check_all_providers(*keys):
350
  results = []
351
- for provider, key in zip([Provider.HF_ROUTER, Provider.GROQ, Provider.DEEPSEEK,
352
- Provider.OPENROUTER, Provider.TOGETHER, Provider.KILOCODE,
353
- Provider.NVIDIA], keys):
354
- health = check_provider_health(provider, key)
355
- status_emoji = {"online": "🟒", "rate_limited": "🟑",
356
- "offline": "πŸ”΄", "no_key": "βšͺ"}[health.status]
357
- models_str = ", ".join([m[0] for m in health.models[:3]]) if health.models else "N/A"
358
  results.append({
359
- "Provider": f"{status_emoji} {provider.display_name}",
360
- "Status": health.status,
361
- "Latency (ms)": f"{health.latency_ms:.0f}" if health.latency_ms > 0 else "N/A",
362
- "Free Models": models_str,
363
- "Error": health.error[:100] if health.error else "",
364
  })
365
  return results
366
 
367
- check_all_btn.click(
368
- fn=check_all_providers,
369
- inputs=list(provider_keys.values()),
370
  outputs=[health_table],
371
  )
372
 
373
- # ═══════════════════════════════════════════════════════
374
  # TAB 2: Side-by-Side Arena
375
- # ═══════════════════════════════════════════════════════
376
  with gr.TabItem("βš”οΈ Side-by-Side Arena"):
377
  gr.Markdown("""
378
- ### Send the same prompt to multiple providers and compare
379
-
380
- Select providers, enter a prompt, and see which gives the best response.
381
  """)
382
 
383
  with gr.Row():
@@ -395,106 +275,122 @@ def build_control_center():
395
  )
396
 
397
  with gr.Row():
398
- arena_providers = gr.CheckboxGroup(
399
- label="Select Providers",
400
- choices=[(p.display_name, p.name) for p in ENDPOINTS.keys()],
401
- value=[Provider.HF_ROUTER.name, Provider.GROQ.name],
402
  )
403
- arena_max_tokens = gr.Slider(minimum=64, maximum=2048, value=512, step=64,
404
- label="Max Tokens")
405
- arena_temperature = gr.Slider(minimum=0.0, maximum=2.0, value=0.7, step=0.1,
406
- label="Temperature")
 
 
 
 
 
 
 
 
 
407
 
408
  arena_go = gr.Button("πŸš€ Run Arena", variant="primary")
409
 
410
- # Dynamic output columns based on selected providers
411
- arena_outputs = {}
412
- for provider in ENDPOINTS.keys():
413
- with gr.Column(visible=False) as col:
414
- arena_outputs[provider] = {
415
- "col": col,
416
- "text": gr.Textbox(label=f"{provider.display_name}", lines=12, interactive=False),
417
- "metrics": gr.Textbox(label=f"Metrics", interactive=False, lines=2),
418
- }
 
 
 
 
 
 
 
 
419
 
420
- def run_arena(prompt, system, provider_names, max_tokens, temperature, *keys):
 
 
421
  if not prompt.strip():
422
- return ["Please enter a prompt"] * len(ENDPOINTS)
423
 
424
- provider_map = {p.name: p for p in ENDPOINTS.keys()}
425
- key_map = {p: k for p, k in zip([Provider.HF_ROUTER, Provider.GROQ, Provider.DEEPSEEK,
426
- Provider.OPENROUTER, Provider.TOGETHER], keys)}
427
 
428
- results = {}
429
- for name in provider_names:
430
- provider = provider_map.get(name)
431
- if not provider:
432
- continue
433
- key = key_map.get(provider, "")
434
- if not key:
435
- results[name] = (f"❌ No API key for {provider.display_name}", "")
436
- continue
437
-
438
- models = FREE_MODELS.get(provider, [])
439
- model = models[0][1] if models else ""
440
-
441
- result = generate_with_provider(
442
- provider, key, model, prompt, system,
443
- max_tokens, temperature,
444
- )
445
-
446
- if result.error:
447
- results[name] = (f"❌ Error: {result.error}", "")
448
- else:
449
- metrics = f"⏱️ {result.latency_ms:.0f}ms | πŸ“ {result.tokens_output} tokens | 🎲 {model}"
450
- results[name] = (result.text, metrics)
451
 
452
- # Build output list matching all provider columns
453
- outputs = []
454
- for provider in ENDPOINTS.keys():
455
- name = provider.name
456
- if name in results:
457
- outputs.extend([results[name][0], results[name][1]])
458
- else:
459
- outputs.extend(["", ""])
460
- return outputs
 
 
 
 
 
 
 
 
461
 
462
  arena_go.click(
463
  fn=run_arena,
464
- inputs=[arena_prompt, arena_system, arena_providers, arena_max_tokens, arena_temperature] + list(provider_keys.values())[:5],
465
- outputs=[item for p in ENDPOINTS.keys() for item in [arena_outputs[p]["text"], arena_outputs[p]["metrics"]]],
 
466
  )
467
 
468
- # ═══════════════════════════════════════════════════════
469
  # TAB 3: Experiment Log
470
- # ═══════════════════════════════════════════════════════
471
  with gr.TabItem("πŸ“Š Experiment Log"):
472
  gr.Markdown("""
473
- ### Track and compare your runs
474
-
475
- Each generation is logged with: timestamp, provider, model, latency, tokens, quality score.
476
  """)
477
 
478
  log_table = gr.DataFrame(
479
- headers=["Time", "Provider", "Model", "Prompt (first 50 chars)",
480
- "Latency (ms)", "Tokens Out", "Status"],
481
  label="Experiment History",
 
 
482
  interactive=False,
483
  )
484
 
 
485
  clear_log_btn = gr.Button("πŸ—‘οΈ Clear Log")
486
- export_log_btn = gr.Button("πŸ“₯ Export as JSON")
 
 
487
 
488
  def clear_log():
489
  global experiment_log
490
  experiment_log = []
491
  return []
492
 
 
493
  clear_log_btn.click(fn=clear_log, outputs=[log_table])
494
 
495
- # ═══════════════════════════════════════════════════════
496
  # TAB 4: Pinecone Chat
497
- # ═══════════════════════════════════════════════════════
498
  with gr.TabItem("🌲 Pinecone Chat"):
499
  gr.Markdown("""
500
  ### Chat with your Pinecone Assistant `pineosman2`
@@ -512,16 +408,16 @@ def build_control_center():
512
  pinecone_chat = gr.Chatbot(label="Conversation with pineosman2", height=400)
513
  pinecone_msg = gr.Textbox(label="Your message", placeholder="Ask about your documents...")
514
  pinecone_send = gr.Button("Send", variant="primary")
 
515
 
516
  def pinecone_chat_fn(message, history, api_key):
517
  if not api_key:
518
- return history + [(message, "❌ Please enter your Pinecone API key")]
519
  if not message.strip():
520
- return history
521
 
522
- # Simple REST call to Pinecone Assistant
523
  try:
524
- import urllib.request
525
  payload = json.dumps({
526
  "messages": [{"role": "user", "content": message}],
527
  }).encode("utf-8")
@@ -537,42 +433,42 @@ def build_control_center():
537
  with urllib.request.urlopen(req, timeout=60) as resp:
538
  data = json.loads(resp.read().decode("utf-8"))
539
  reply = data.get("message", {}).get("content", "No response")
540
- return history + [(message, reply)]
541
  except Exception as e:
542
- return history + [(message, f"❌ Error: {str(e)[:200]}")]
543
 
544
  pinecone_send.click(
545
  fn=pinecone_chat_fn,
546
  inputs=[pinecone_msg, pinecone_chat, pinecone_key],
547
- outputs=[pinecone_chat],
548
  ).then(lambda: "", outputs=[pinecone_msg])
549
 
550
- # ═══════════════════════════════════════════════════════
551
  # TAB 5: Model Registry
552
- # ═════════════════════════════════��═════════════════════
553
  with gr.TabItem("πŸ“‹ Model Registry"):
554
  gr.Markdown("""
555
- ### Browse all 37+ models in the NEXUS OS registry
556
  """)
557
 
558
  registry_table = gr.DataFrame(
 
559
  headers=["ID", "Name", "Family", "Tier", "Size (GB)", "Params (B)",
560
  "Capabilities", "Context", "Temp"],
561
- label="Registered Models",
562
  interactive=False,
563
  )
564
 
565
  def load_registry():
566
  return [{
567
  "ID": k,
568
- "Name": v.name,
569
- "Family": v.family,
570
- "Tier": v.tier,
571
- "Size (GB)": v.size_gb,
572
- "Params (B)": v.params_b,
573
- "Capabilities": ", ".join(v.capabilities),
574
- "Context": v.max_context,
575
- "Temp": v.default_temp,
576
  } for k, v in REGISTRY.items()]
577
 
578
  demo.load(fn=load_registry, outputs=[registry_table])
 
1
  """
2
+ NEXUS OS v5.0 β€” Provider Control Center
3
 
4
+ A multi-provider LLM management dashboard.
5
+ Tabs:
6
  1. Provider Manager β€” enter API keys, check health, see available models
7
+ 2. Side-by-Side Arena β€” compare 2 providers on same prompt
8
+ 3. Experiment Log β€” track runs, sort by latency/cost
9
+ 4. Pinecone Chat β€” talk to pineosman2 assistant
10
+ 5. Model Registry β€” browse 37+ models
11
 
12
+ Self-contained. Only dependency: gradio.
13
  """
14
  import os
15
  import sys
 
29
 
30
 
31
  # ═══════════════════════════════════════════════════════════════
32
+ # PROVIDERS
33
  # ═══════════════════════════════════════════════════════════════
34
+ class Provider:
35
+ def __init__(self, name, display_name, domain, key_env):
36
+ self.name = name
 
 
 
 
 
 
 
 
37
  self.display_name = display_name
38
  self.domain = domain
39
  self.key_env = key_env
40
 
41
+ PROVIDERS = [
42
+ Provider("hf_router", "HF Inference Providers", "router.huggingface.co", "HF_TOKEN"),
43
+ Provider("groq", "Groq", "api.groq.com", "GROQ_API_KEY"),
44
+ Provider("deepseek", "DeepSeek", "api.deepseek.com", "DEEPSEEK_API_KEY"),
45
+ Provider("openrouter", "OpenRouter", "openrouter.ai", "OPENROUTER_API_KEY"),
46
+ Provider("together", "Together AI", "api.together.xyz", "TOGETHER_API_KEY"),
47
+ Provider("kilocode", "Kilocode", "kilocode.ai", "KILOCODE_API_KEY"),
48
+ Provider("nvidia", "NVIDIA NIM", "integrate.api.nvidia.com", "NVIDIA_API_KEY"),
49
+ ]
50
+
51
+ PROVIDER_MAP = {p.name: p for p in PROVIDERS}
52
 
 
 
 
53
  ENDPOINTS = {
54
+ "hf_router": "https://router.huggingface.co/v1/chat/completions",
55
+ "groq": "https://api.groq.com/openai/v1/chat/completions",
56
+ "deepseek": "https://api.deepseek.com/v1/chat/completions",
57
+ "openrouter": "https://openrouter.ai/api/v1/chat/completions",
58
+ "together": "https://api.together.xyz/v1/chat/completions",
59
  }
60
 
 
61
  FREE_MODELS = {
62
+ "hf_router": ["HuggingFaceTB/SmolLM2-1.7B-Instruct", "meta-llama/Llama-3.2-1B-Instruct", "Qwen/Qwen2.5-0.5B-Instruct"],
63
+ "groq": ["llama-3.2-1b-preview", "llama-3.2-3b-preview", "mixtral-8x7b-32768"],
64
+ "deepseek": ["deepseek-chat", "deepseek-reasoner"],
65
+ "openrouter": ["meta-llama/llama-3.2-1b-instruct:free"],
66
+ "together": ["meta-llama/Llama-3.3-70B-Instruct-Turbo-Free"],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  }
68
 
 
69
  # ═══════════════════════════════════════════════════════════════
70
+ # API HELPERS
71
  # ═══════════════════════════════════════════════════════════════
72
+ def _api_call(endpoint: str, api_key: str, payload: Dict[str, Any], timeout: int = 120):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  body = json.dumps(payload).encode("utf-8")
74
+ headers = {"Content-Type": "application/json", "Authorization": f"Bearer {api_key}"}
 
 
 
 
75
  if "openrouter" in endpoint:
76
  headers["HTTP-Referer"] = "https://huggingface.co/spaces/specimba/nexus-os-space"
77
  headers["X-Title"] = "NEXUS OS"
 
78
  req = urllib.request.Request(endpoint, data=body, headers=headers, method="POST")
79
  t0 = time.time()
80
  try:
81
  with urllib.request.urlopen(req, timeout=timeout) as resp:
82
+ return True, json.loads(resp.read().decode("utf-8")), (time.time() - t0) * 1000, ""
 
83
  except urllib.error.HTTPError as e:
84
  err = e.read().decode("utf-8", errors="replace")[:300]
85
  return False, {}, (time.time() - t0) * 1000, f"HTTP {e.code}: {err}"
 
87
  return False, {}, (time.time() - t0) * 1000, str(e)[:200]
88
 
89
 
90
+ def check_provider_health(provider_name: str, api_key: str) -> Dict[str, Any]:
91
+ """Check provider health. Returns dict with status, latency, models, error."""
92
+ provider = PROVIDER_MAP.get(provider_name)
93
+ if not provider:
94
+ return {"provider": provider_name, "status": "unknown", "latency_ms": 0, "error": "Unknown provider", "models": []}
95
+
96
  if not api_key:
97
+ return {"provider": provider.display_name, "status": "no_key", "latency_ms": 0,
98
+ "error": "", "models": FREE_MODELS.get(provider_name, [])}
99
 
100
+ endpoint = ENDPOINTS.get(provider_name)
101
  if not endpoint:
102
+ return {"provider": provider.display_name, "status": "no_endpoint", "latency_ms": 0,
103
+ "error": "No endpoint configured", "models": []}
 
 
 
 
 
 
 
 
 
104
 
105
+ models = FREE_MODELS.get(provider_name, [])
106
+ model = models[0] if models else ""
107
+ if not model:
108
+ return {"provider": provider.display_name, "status": "no_models", "latency_ms": 0,
109
+ "error": "No free models configured", "models": []}
 
110
 
111
+ payload = {"model": model, "messages": [{"role": "user", "content": "Hi"}], "max_tokens": 5, "temperature": 0.1}
112
+ success, data, latency, error = _api_call(endpoint, api_key, payload, timeout=15)
113
 
114
  if success and data.get("choices"):
115
+ return {"provider": provider.display_name, "status": "online", "latency_ms": round(latency, 1),
116
+ "error": "", "models": models}
117
  elif "429" in error or "rate limit" in error.lower():
118
+ return {"provider": provider.display_name, "status": "rate_limited", "latency_ms": round(latency, 1),
119
+ "error": error[:100], "models": models}
120
  else:
121
+ return {"provider": provider.display_name, "status": "offline", "latency_ms": round(latency, 1),
122
+ "error": error[:100], "models": models}
123
 
124
 
125
+ def generate_with_provider(provider_name: str, api_key: str, model: str, prompt: str,
126
+ system: Optional[str] = None, max_tokens: int = 512, temperature: float = 0.7) -> Dict[str, Any]:
127
+ """Generate text with a provider. Returns dict with text, latency, tokens, error."""
128
+ endpoint = ENDPOINTS.get(provider_name)
 
129
  if not endpoint:
130
+ return {"text": "", "latency_ms": 0, "tokens_input": 0, "tokens_output": 0, "error": "No endpoint"}
 
131
 
132
  messages = []
133
  if system:
134
  messages.append({"role": "system", "content": system})
135
  messages.append({"role": "user", "content": prompt})
136
 
137
+ payload = {"model": model, "messages": messages, "max_tokens": max_tokens, "temperature": temperature}
138
+ success, data, latency, error = _api_call(endpoint, api_key, payload)
 
 
 
 
 
 
139
 
140
  if not success:
141
+ return {"text": "", "latency_ms": round(latency, 1), "tokens_input": 0, "tokens_output": 0, "error": error}
 
142
 
143
  choice = data.get("choices", [{}])[0]
144
  message = choice.get("message", {})
145
  usage = data.get("usage", {})
146
 
147
+ return {
148
+ "text": message.get("content", ""),
149
+ "latency_ms": round(latency, 1),
150
+ "tokens_input": usage.get("prompt_tokens", 0),
151
+ "tokens_output": usage.get("completion_tokens", 0),
152
+ "error": "",
153
+ }
 
154
 
155
 
156
  # ═══════════════════════════════════════════════════════════════
157
+ # MODEL REGISTRY
158
  # ═══════════════════════════════════════════════════════════════
 
 
 
 
 
 
 
 
 
 
 
159
  REGISTRY = {
160
+ "deepseek-r1-8b": {"name": "DeepSeek-R1 8B", "family": "deepseek", "tier": "16GB", "size_gb": 5.2, "params_b": 8.0, "caps": "reasoning, coding, long_context", "max_context": 128000, "temp": 0.6},
161
+ "qwen2.5-coder-7b": {"name": "Qwen 2.5 Coder 7B", "family": "qwen", "tier": "16GB", "size_gb": 4.7, "params_b": 7.0, "caps": "coding, fast", "max_context": 32768, "temp": 0.3},
162
+ "l3.1-dark-reasoning-8b": {"name": "L3.1 Dark Reasoning 8B", "family": "llama", "tier": "16GB", "size_gb": 5.7, "params_b": 8.0, "caps": "reasoning, coding", "max_context": 32768, "temp": 0.7},
163
+ "omega-evolution-9b": {"name": "Omega Evolution 9B", "family": "omega", "tier": "16GB", "size_gb": 6.6, "params_b": 9.0, "caps": "reasoning, coding, vision", "max_context": 32768, "temp": 0.7},
164
+ "darwin-9b-opus": {"name": "Darwin 9B Opus", "family": "darwin", "tier": "16GB", "size_gb": 6.3, "params_b": 9.0, "caps": "reasoning, coding, long_context", "max_context": 65536, "temp": 0.7},
165
+ "qwopus-3.5-9b": {"name": "Qwopus 3.5 9B", "family": "qwopus", "tier": "16GB", "size_gb": 5.6, "params_b": 9.0, "caps": "reasoning, coding", "max_context": 32768, "temp": 0.7},
166
+ "carnice-9b": {"name": "Carnice 9B", "family": "carnice", "tier": "16GB", "size_gb": 5.6, "params_b": 9.0, "caps": "reasoning, coding, vision", "max_context": 32768, "temp": 0.7},
167
+ "open-search-vl-8b": {"name": "OpenSearch VL 8B", "family": "opensearch", "tier": "16GB", "size_gb": 6.6, "params_b": 8.0, "caps": "vision, reasoning, long_context", "max_context": 65536, "temp": 0.7},
168
+ "granite-4.1-8b-abliterated": {"name": "Granite 4.1 8B Abliterated", "family": "granite", "tier": "16GB", "size_gb": 5.1, "params_b": 8.0, "caps": "reasoning, coding, long_context", "max_context": 128000, "temp": 0.7},
169
+ "jaahas-qwen3.5-9b": {"name": "Jaahas Qwen 3.5 9B", "family": "qwen", "tier": "16GB", "size_gb": 7.4, "params_b": 9.0, "caps": "reasoning, coding, multilingual", "max_context": 32768, "temp": 0.7},
170
+ "lfm2-12b-deckard": {"name": "LFM2 12B Deckard", "family": "lfm", "tier": "24GB", "size_gb": 5.8, "params_b": 12.0, "caps": "reasoning, coding, long_context, fast", "max_context": 128000, "temp": 0.7},
171
+ "gemma4-e2b-opus": {"name": "Gemma4 E2B Opus", "family": "gemma", "tier": "24GB", "size_gb": 5.5, "params_b": 4.0, "caps": "reasoning, coding, long_context", "max_context": 128000, "temp": 0.7},
172
+ "gemma4-uncensored": {"name": "Gemma 4 Uncensored", "family": "gemma", "tier": "24GB", "size_gb": 4.9, "params_b": 4.0, "caps": "reasoning, coding, vision", "max_context": 32768, "temp": 0.7},
173
+ "gemma4-obliterated": {"name": "Gemma 4 OBLITERATED", "family": "gemma", "tier": "24GB", "size_gb": 6.3, "params_b": 4.0, "caps": "reasoning, coding, vision", "max_context": 32768, "temp": 0.7},
174
+ "qwen3.6-27b-dflash": {"name": "Qwen 3.6 27B DFlash", "family": "qwen", "tier": "24GB", "size_gb": 1.0, "params_b": 27.0, "caps": "reasoning, coding, long_context, fast", "max_context": 128000, "temp": 0.7},
175
+ "gemma4-31b-cloud": {"name": "Gemma4 31B Cloud", "family": "gemma", "tier": "48GB", "size_gb": 18.0, "params_b": 31.0, "caps": "reasoning, coding, vision, long_context, multilingual", "max_context": 128000, "temp": 0.7},
176
+ "nemotron-3-nano-omni-30b": {"name": "Nemotron-3 Nano-Omni 30B", "family": "nemotron", "tier": "48GB", "size_gb": 18.0, "params_b": 30.0, "caps": "reasoning, coding, vision, long_context, safety, tool_use", "max_context": 256000, "temp": 0.6},
177
+ "opensonnet-lite-max": {"name": "OpenSonnet-Lite-MAX", "family": "qwen", "tier": "8GB", "size_gb": 2.5, "params_b": 4.0, "caps": "reasoning, coding, fast, long_context", "max_context": 262144, "temp": 0.6},
178
+ "deepseek-v4-pro": {"name": "DeepSeek V4 Pro", "family": "deepseek", "tier": "cloud", "size_gb": 0.0, "params_b": 671.0, "caps": "reasoning, coding, long_context, multilingual, tool_use", "max_context": 64000, "temp": 0.6},
179
+ "qwen3-coder-next": {"name": "Qwen 3 Coder Next", "family": "qwen", "tier": "cloud", "size_gb": 0.0, "params_b": 32.0, "caps": "coding, reasoning, fast, long_context, tool_use", "max_context": 128000, "temp": 0.3},
180
+ "kimi-k2.6": {"name": "Kimi K2.6", "family": "kimi", "tier": "cloud", "size_gb": 0.0, "params_b": 32.0, "caps": "reasoning, coding, long_context, multilingual, vision", "max_context": 200000, "temp": 0.7},
181
+ "glm-5.1": {"name": "GLM 5.1", "family": "glm", "tier": "cloud", "size_gb": 0.0, "params_b": 32.0, "caps": "reasoning, coding, multilingual, tool_use, vision", "max_context": 128000, "temp": 0.7},
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
182
  }
183
 
184
 
 
187
  # ═══════════════════════════════════════════════════════════════
188
  experiment_log: List[Dict[str, Any]] = []
189
 
190
+
191
  # ═══════════════════════════════════════════════════════════════
192
+ # GRADIO INTERFACE
193
  # ═══════════════════════════════════════════════════════════════
194
  def build_control_center():
195
+ with gr.Blocks(title="NEXUS OS β€” Provider Control Center") as demo:
 
 
 
 
 
 
 
 
 
196
 
197
  gr.Markdown("""
198
  # πŸ”₯ NEXUS OS β€” Provider Control Center
 
202
 
203
  with gr.Tabs():
204
 
205
+ # ═══════════════════════════════════════════════════
206
  # TAB 1: Provider Manager
207
+ # ═══════════════════════════════════════════════════
208
  with gr.TabItem("πŸ”Œ Provider Manager"):
209
  gr.Markdown("""
210
+ ### Enter API keys and check provider health
211
 
212
+ Keys are stored **in this session only** (not saved).
213
  """)
214
 
215
+ # Provider key inputs
216
+ key_inputs = {}
217
+ for provider in PROVIDERS:
218
+ key_inputs[provider.name] = gr.Textbox(
219
+ label=f"{provider.display_name} API Key",
220
+ placeholder=f"Paste {provider.key_env} here...",
221
+ type="password",
222
+ value=os.environ.get(provider.key_env, ""),
223
+ )
224
 
225
+ check_btn = gr.Button("πŸ” Check All Providers", variant="primary")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
226
 
 
227
  health_table = gr.DataFrame(
 
228
  label="Provider Health Dashboard",
229
+ headers=["Provider", "Status", "Latency (ms)", "Free Models", "Error"],
230
  interactive=False,
231
  )
232
 
233
+ def check_all(*keys):
234
  results = []
235
+ for provider, key in zip(PROVIDERS, keys):
236
+ h = check_provider_health(provider.name, key)
237
+ emoji = {"online": "🟒", "rate_limited": "🟑", "offline": "πŸ”΄",
238
+ "no_key": "βšͺ", "no_endpoint": "⚫", "no_models": "⚫"}.get(h["status"], "βšͺ")
239
+ models = ", ".join(h["models"][:3]) if h["models"] else "N/A"
 
 
240
  results.append({
241
+ "Provider": f"{emoji} {h['provider']}",
242
+ "Status": h["status"],
243
+ "Latency (ms)": str(h["latency_ms"]) if h["latency_ms"] > 0 else "N/A",
244
+ "Free Models": models,
245
+ "Error": h["error"],
246
  })
247
  return results
248
 
249
+ check_btn.click(
250
+ fn=check_all,
251
+ inputs=list(key_inputs.values()),
252
  outputs=[health_table],
253
  )
254
 
255
+ # ═══════════════════════════════════════════════════
256
  # TAB 2: Side-by-Side Arena
257
+ # ═══════════════════════════════════════════════════
258
  with gr.TabItem("βš”οΈ Side-by-Side Arena"):
259
  gr.Markdown("""
260
+ ### Send the same prompt to 2 providers and compare
 
 
261
  """)
262
 
263
  with gr.Row():
 
275
  )
276
 
277
  with gr.Row():
278
+ left_provider = gr.Dropdown(
279
+ label="Left Provider",
280
+ choices=[(p.display_name, p.name) for p in PROVIDERS if p.name in ENDPOINTS],
281
+ value="hf_router",
282
  )
283
+ right_provider = gr.Dropdown(
284
+ label="Right Provider",
285
+ choices=[(p.display_name, p.name) for p in PROVIDERS if p.name in ENDPOINTS],
286
+ value="groq",
287
+ )
288
+
289
+ with gr.Row():
290
+ left_model = gr.Dropdown(label="Left Model", choices=[], value="")
291
+ right_model = gr.Dropdown(label="Right Model", choices=[], value="")
292
+
293
+ with gr.Row():
294
+ arena_max_tokens = gr.Slider(minimum=64, maximum=2048, value=512, step=64, label="Max Tokens")
295
+ arena_temperature = gr.Slider(minimum=0.0, maximum=2.0, value=0.7, step=0.1, label="Temperature")
296
 
297
  arena_go = gr.Button("πŸš€ Run Arena", variant="primary")
298
 
299
+ with gr.Row():
300
+ with gr.Column():
301
+ left_output = gr.Textbox(label="Left Output", lines=12, interactive=False)
302
+ left_metrics = gr.Textbox(label="Left Metrics", interactive=False)
303
+ with gr.Column():
304
+ right_output = gr.Textbox(label="Right Output", lines=12, interactive=False)
305
+ right_metrics = gr.Textbox(label="Right Metrics", interactive=False)
306
+
307
+ # Update model lists when provider changes
308
+ def update_models(provider_name):
309
+ models = FREE_MODELS.get(provider_name, [])
310
+ choices = [(m, m) for m in models]
311
+ default = models[0] if models else ""
312
+ return gr.Dropdown(choices=choices, value=default)
313
+
314
+ left_provider.change(fn=update_models, inputs=[left_provider], outputs=[left_model])
315
+ right_provider.change(fn=update_models, inputs=[right_provider], outputs=[right_model])
316
 
317
+ # Run arena
318
+ def run_arena(prompt, system, left_prov, right_prov, left_mod, right_mod,
319
+ max_tokens, temperature, *all_keys):
320
  if not prompt.strip():
321
+ return "Enter a prompt", "", "Enter a prompt", ""
322
 
323
+ key_map = {p.name: k for p, k in zip(PROVIDERS, all_keys)}
 
 
324
 
325
+ # Left
326
+ left_key = key_map.get(left_prov, "")
327
+ left_result = generate_with_provider(left_prov, left_key, left_mod, prompt, system, max_tokens, temperature) if left_key else {"text": "❌ No API key", "latency_ms": 0, "tokens_output": 0, "error": "No key"}
328
+ left_text = left_result["text"] if not left_result["error"] else f"❌ {left_result['error']}"
329
+ left_met = f"⏱️ {left_result['latency_ms']}ms | πŸ“ {left_result['tokens_output']} tokens | 🎲 {left_mod}"
330
+
331
+ # Right
332
+ right_key = key_map.get(right_prov, "")
333
+ right_result = generate_with_provider(right_prov, right_key, right_mod, prompt, system, max_tokens, temperature) if right_key else {"text": "❌ No API key", "latency_ms": 0, "tokens_output": 0, "error": "No key"}
334
+ right_text = right_result["text"] if not right_result["error"] else f"❌ {right_result['error']}"
335
+ right_met = f"⏱️ {right_result['latency_ms']}ms | πŸ“ {right_result['tokens_output']} tokens | 🎲 {right_mod}"
 
 
 
 
 
 
 
 
 
 
 
 
336
 
337
+ # Log to experiment log
338
+ global experiment_log
339
+ import datetime
340
+ now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
341
+ experiment_log.append({
342
+ "Time": now,
343
+ "Provider": f"{left_prov} vs {right_prov}",
344
+ "Model": f"{left_mod} vs {right_mod}",
345
+ "Prompt (first 50 chars)": prompt[:50],
346
+ "Latency Left (ms)": left_result["latency_ms"],
347
+ "Latency Right (ms)": right_result["latency_ms"],
348
+ "Tokens Left": left_result["tokens_output"],
349
+ "Tokens Right": right_result["tokens_output"],
350
+ "Status": "complete" if not left_result["error"] and not right_result["error"] else "error",
351
+ })
352
+
353
+ return left_text, left_met, right_text, right_met
354
 
355
  arena_go.click(
356
  fn=run_arena,
357
+ inputs=[arena_prompt, arena_system, left_provider, right_provider,
358
+ left_model, right_model, arena_max_tokens, arena_temperature] + list(key_inputs.values()),
359
+ outputs=[left_output, left_metrics, right_output, right_metrics],
360
  )
361
 
362
+ # ═══════════════════════════════════════════════════
363
  # TAB 3: Experiment Log
364
+ # ═══════════════════════════════════════════════════
365
  with gr.TabItem("πŸ“Š Experiment Log"):
366
  gr.Markdown("""
367
+ ### Track and compare your arena runs
 
 
368
  """)
369
 
370
  log_table = gr.DataFrame(
 
 
371
  label="Experiment History",
372
+ headers=["Time", "Provider", "Model", "Prompt (first 50 chars)",
373
+ "Latency Left (ms)", "Latency Right (ms)", "Tokens Left", "Tokens Right", "Status"],
374
  interactive=False,
375
  )
376
 
377
+ refresh_log_btn = gr.Button("πŸ”„ Refresh Log")
378
  clear_log_btn = gr.Button("πŸ—‘οΈ Clear Log")
379
+
380
+ def refresh_log():
381
+ return experiment_log
382
 
383
  def clear_log():
384
  global experiment_log
385
  experiment_log = []
386
  return []
387
 
388
+ refresh_log_btn.click(fn=refresh_log, outputs=[log_table])
389
  clear_log_btn.click(fn=clear_log, outputs=[log_table])
390
 
391
+ # ═══════════════════════════════════════════════════
392
  # TAB 4: Pinecone Chat
393
+ # ═══════════════════════════════════════════════════
394
  with gr.TabItem("🌲 Pinecone Chat"):
395
  gr.Markdown("""
396
  ### Chat with your Pinecone Assistant `pineosman2`
 
408
  pinecone_chat = gr.Chatbot(label="Conversation with pineosman2", height=400)
409
  pinecone_msg = gr.Textbox(label="Your message", placeholder="Ask about your documents...")
410
  pinecone_send = gr.Button("Send", variant="primary")
411
+ pinecone_status = gr.Textbox(label="Status", interactive=False)
412
 
413
  def pinecone_chat_fn(message, history, api_key):
414
  if not api_key:
415
+ return history + [(message, "❌ Please enter your Pinecone API key")], "No key"
416
  if not message.strip():
417
+ return history, "Empty message"
418
 
419
+ # Use Pinecone REST API
420
  try:
 
421
  payload = json.dumps({
422
  "messages": [{"role": "user", "content": message}],
423
  }).encode("utf-8")
 
433
  with urllib.request.urlopen(req, timeout=60) as resp:
434
  data = json.loads(resp.read().decode("utf-8"))
435
  reply = data.get("message", {}).get("content", "No response")
436
+ return history + [(message, reply)], f"βœ“ Response received ({len(reply)} chars)"
437
  except Exception as e:
438
+ return history + [(message, f"❌ Error: {str(e)[:200]}")], f"Error: {str(e)[:100]}"
439
 
440
  pinecone_send.click(
441
  fn=pinecone_chat_fn,
442
  inputs=[pinecone_msg, pinecone_chat, pinecone_key],
443
+ outputs=[pinecone_chat, pinecone_status],
444
  ).then(lambda: "", outputs=[pinecone_msg])
445
 
446
+ # ═══════════════════════════════════════════════════
447
  # TAB 5: Model Registry
448
+ # ═══════════════════════════════════════════════════
449
  with gr.TabItem("πŸ“‹ Model Registry"):
450
  gr.Markdown("""
451
+ ### Browse all models in the NEXUS OS registry
452
  """)
453
 
454
  registry_table = gr.DataFrame(
455
+ label="Registered Models",
456
  headers=["ID", "Name", "Family", "Tier", "Size (GB)", "Params (B)",
457
  "Capabilities", "Context", "Temp"],
 
458
  interactive=False,
459
  )
460
 
461
  def load_registry():
462
  return [{
463
  "ID": k,
464
+ "Name": v["name"],
465
+ "Family": v["family"],
466
+ "Tier": v["tier"],
467
+ "Size (GB)": v["size_gb"],
468
+ "Params (B)": v["params_b"],
469
+ "Capabilities": v["caps"],
470
+ "Context": v["max_context"],
471
+ "Temp": v["temp"],
472
  } for k, v in REGISTRY.items()]
473
 
474
  demo.load(fn=load_registry, outputs=[registry_table])