beta3 commited on
Commit
4d88b3c
Β·
verified Β·
1 Parent(s): b11ed7b

Upload 3 files

Browse files
Files changed (3) hide show
  1. app.py +1125 -0
  2. models_data.py +338 -0
  3. requirements.txt +8 -0
app.py ADDED
@@ -0,0 +1,1125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Gemma Explorer β€” Gradio app to explore and chat with the full Gemma model family.
3
+ Powered by Google DeepMind models on ZeroGPU (NVIDIA H200).
4
+ """
5
+
6
+ import os
7
+ import gc
8
+ import json
9
+ import datetime
10
+ import html as _html
11
+ import gradio as gr
12
+ import torch
13
+ import spaces
14
+ from transformers import (
15
+ AutoTokenizer,
16
+ AutoProcessor,
17
+ AutoModelForCausalLM,
18
+ TextIteratorStreamer,
19
+ )
20
+ from threading import Thread
21
+ from PIL import Image
22
+ import numpy as np
23
+ from models_data import MODELS, FAMILIES, get_models_by_family
24
+
25
+ _HF_TOKEN = os.environ.get("HF_TOKEN")
26
+ if _HF_TOKEN:
27
+ from huggingface_hub import login
28
+ # Se agrega add_to_git_credential=False para evitar advertencias de sobreescritura de token
29
+ login(token=_HF_TOKEN, add_to_git_credential=False)
30
+
31
+ try:
32
+ import torchvision # noqa: F401
33
+ except ImportError:
34
+ import subprocess, sys
35
+ subprocess.run(
36
+ [sys.executable, "-m", "pip", "install", "torchvision", "-q", "--no-input"],
37
+ check=True,
38
+ )
39
+
40
+ try:
41
+ from transformers import AutoModelForMultimodalLM
42
+ _HAS_MULTIMODAL = True
43
+ except ImportError:
44
+ _HAS_MULTIMODAL = False
45
+ AutoModelForMultimodalLM = None
46
+
47
+
48
+ # ── Model state ───────────────────────────────────────────────────────────────
49
+
50
+ _model = None
51
+ _processor = None
52
+ _current_id = None
53
+
54
+
55
+ def _purge_model():
56
+ """Unload current model from CPU memory. Never touches CUDA directly β€”
57
+ CUDA cleanup happens inside @spaces.GPU contexts only."""
58
+ global _model, _processor, _current_id
59
+ if _model is not None:
60
+ del _model
61
+ _model = None
62
+ if _processor is not None:
63
+ del _processor
64
+ _processor = None
65
+ _current_id = None
66
+ gc.collect()
67
+
68
+
69
+ def _load_weights(model_id: str):
70
+ """Load model weights. Always purges previous model first, no exceptions."""
71
+ global _model, _processor, _current_id
72
+
73
+ meta = MODELS[model_id]
74
+ loader = meta["loader_type"]
75
+ dtype = meta["torch_dtype"]
76
+
77
+ _purge_model()
78
+
79
+ if loader == "multimodal":
80
+ if not _HAS_MULTIMODAL:
81
+ raise ImportError("AutoModelForMultimodalLM not available. Run: pip install -U transformers")
82
+ _processor = AutoProcessor.from_pretrained(model_id, token=_HF_TOKEN)
83
+ _model = AutoModelForMultimodalLM.from_pretrained(model_id, torch_dtype=dtype, token=_HF_TOKEN)
84
+ elif loader == "vision_causal":
85
+ _processor = AutoProcessor.from_pretrained(model_id, token=_HF_TOKEN)
86
+ _model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=dtype, token=_HF_TOKEN)
87
+ else:
88
+ _processor = AutoTokenizer.from_pretrained(model_id, token=_HF_TOKEN)
89
+ _model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=dtype, token=_HF_TOKEN)
90
+
91
+ _model.eval()
92
+ _current_id = model_id
93
+
94
+
95
+ def load_model_stream(model_id: str, card_html_list_len: int):
96
+ """
97
+ Two-phase generator so the loading notice is *visibly rendered* before
98
+ the blocking from_pretrained() call starts.
99
+ """
100
+ meta = MODELS[model_id]
101
+ n = card_html_list_len
102
+
103
+ # ── Phase 1: show loading notice ──────────────────────────────────────────
104
+ loading = _loading_html(meta["name"])
105
+ yield (
106
+ gr.update(visible=True, value=loading),
107
+ gr.update(visible=True, value=loading),
108
+ gr.update(),
109
+ gr.update(),
110
+ gr.update(),
111
+ gr.update(),
112
+ gr.update(),
113
+ gr.update(),
114
+ *([gr.update()] * n),
115
+ )
116
+
117
+ # ── Phase 2: actual load ──────────────────────────────────────────────────
118
+ try:
119
+ _load_weights(model_id)
120
+ status = _make_status_html(meta, state="ready")
121
+ inline = _chat_inline_status(loaded=True, name=meta["name"])
122
+ cards = [
123
+ gr.update(value=_card_html(mid, m, active=(mid == model_id)))
124
+ for mid, m in MODELS.items()
125
+ ]
126
+ yield (
127
+ gr.update(visible=False),
128
+ gr.update(visible=False),
129
+ status,
130
+ gr.update(selected="single"),
131
+ gr.update(visible=meta["supports_vision"]),
132
+ gr.update(value=model_id),
133
+ [],
134
+ inline,
135
+ *cards,
136
+ )
137
+ except Exception as exc:
138
+ error = (f'<div class="status-error">Error loading '
139
+ f'<strong>{_html.escape(meta["name"])}</strong>: '
140
+ f'{_html.escape(str(exc))}</div>')
141
+ yield (
142
+ gr.update(visible=False),
143
+ gr.update(visible=False),
144
+ error,
145
+ gr.update(), gr.update(), gr.update(), gr.update(), gr.update(),
146
+ *([gr.update()] * n),
147
+ )
148
+
149
+
150
+ # ── Inference ─────────────────────────────────────────────────────────────────
151
+
152
+ def _run_inference(message: str, image, max_new_tokens: int, temperature: float):
153
+ global _model, _processor, _current_id
154
+
155
+ if _model is None or _current_id is None or _processor is None:
156
+ yield "No model loaded. Go to Explore Models and click Load & Chat."
157
+ return
158
+
159
+ if _current_id not in MODELS:
160
+ _purge_model()
161
+ yield "Model state corrupted β€” please reload the model from Explore Models."
162
+ return
163
+
164
+ torch.cuda.empty_cache()
165
+
166
+ max_new_tokens = max(64, min(int(max_new_tokens), 2048))
167
+ temperature = max(0.0, min(float(temperature), 1.5))
168
+
169
+ meta = MODELS[_current_id]
170
+ loader = meta["loader_type"]
171
+ device = "cuda"
172
+ _model.to(device)
173
+
174
+ pil_image = None
175
+ if image is not None:
176
+ try:
177
+ pil_image = Image.fromarray(image) if isinstance(image, np.ndarray) else image
178
+ except Exception:
179
+ pil_image = None
180
+
181
+ try:
182
+ if loader == "multimodal":
183
+ content = []
184
+ if pil_image is not None:
185
+ content.append({"type": "image", "image": pil_image})
186
+ content.append({"type": "text", "text": message})
187
+ messages = [{"role": "user", "content": content}]
188
+ inputs = _processor.apply_chat_template(
189
+ messages, tokenize=True, return_dict=True,
190
+ return_tensors="pt", add_generation_prompt=True,
191
+ ).to(device)
192
+
193
+ elif loader == "vision_causal":
194
+ if pil_image is not None:
195
+ content = [{"type": "image"}, {"type": "text", "text": message}]
196
+ messages = [{"role": "user", "content": content}]
197
+ else:
198
+ messages = [{"role": "user", "content": message}]
199
+ text = _processor.apply_chat_template(messages, add_generation_prompt=True)
200
+ inputs = _processor(text=text, images=pil_image, return_tensors="pt").to(device)
201
+
202
+ else:
203
+ if pil_image is not None:
204
+ message = "[This model does not support images] " + message
205
+ messages = [{"role": "user", "content": message}]
206
+ text = _processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
207
+ inputs = _processor(text, return_tensors="pt").to(device)
208
+
209
+ except Exception as exc:
210
+ yield f"Error preparing inputs: {exc}"
211
+ return
212
+
213
+ tok = getattr(_processor, "tokenizer", _processor)
214
+ streamer = TextIteratorStreamer(tok, skip_prompt=True, skip_special_tokens=True, timeout=60)
215
+
216
+ allowed_keys = {"input_ids", "attention_mask", "token_type_ids", "pixel_values",
217
+ "image_sizes", "pixel_attention_mask", "image_position_ids",
218
+ "pixel_position_ids", "token_type_ids_for_images"}
219
+ filtered_inputs = {k: v for k, v in inputs.items() if k in allowed_keys}
220
+
221
+ if "image_position_ids" in filtered_inputs:
222
+ ipi = filtered_inputs["image_position_ids"]
223
+ if not isinstance(ipi, torch.Tensor):
224
+ filtered_inputs["image_position_ids"] = torch.tensor(ipi, device=device)
225
+ elif ipi.dtype == torch.bool:
226
+ filtered_inputs["image_position_ids"] = ipi.long()
227
+
228
+ gen_kwargs: dict = {**filtered_inputs, "streamer": streamer, "max_new_tokens": max_new_tokens}
229
+ if temperature > 0.01:
230
+ gen_kwargs.update({"do_sample": True, "temperature": temperature, "top_p": 0.95})
231
+ else:
232
+ gen_kwargs["do_sample"] = False
233
+
234
+ thread = Thread(target=_model.generate, kwargs=gen_kwargs)
235
+ thread.start()
236
+
237
+ partial = ""
238
+ for token in streamer:
239
+ partial += token
240
+ yield partial
241
+ thread.join()
242
+
243
+
244
+ @spaces.GPU(duration=120)
245
+ def infer_large(message, image, max_new_tokens, temperature):
246
+ yield from _run_inference(message, image, max_new_tokens, temperature)
247
+
248
+
249
+ @spaces.GPU(duration=180, size="xlarge")
250
+ def infer_xlarge(message, image, max_new_tokens, temperature):
251
+ yield from _run_inference(message, image, max_new_tokens, temperature)
252
+
253
+
254
+ def respond(message: str, image, max_new_tokens: int, temperature: float, history: list):
255
+ if not message.strip() and image is None:
256
+ yield history, gr.update()
257
+ return
258
+
259
+ if _current_id is None:
260
+ yield history + [
261
+ {"role": "user", "content": message or "[image attached]"},
262
+ {"role": "assistant", "content": "Please load a model first from the **Explore Models** tab."},
263
+ ], gr.update()
264
+ return
265
+
266
+ new_history = history + [{"role": "user", "content": message or "[image attached]"}]
267
+ yield new_history, gr.update()
268
+
269
+ meta = MODELS[_current_id]
270
+ infer_fn = infer_xlarge if meta["gpu_size"] == "xlarge" else infer_large
271
+
272
+ partial = ""
273
+ for chunk in infer_fn(message, image, max_new_tokens, temperature):
274
+ partial = chunk
275
+ yield new_history + [{"role": "assistant", "content": partial}], gr.update()
276
+
277
+ yield new_history + [{"role": "assistant", "content": partial}], gr.update(value=None)
278
+
279
+
280
+ def export_chat(history: list) -> str:
281
+ if not history:
282
+ return ""
283
+ model_name = MODELS[_current_id]["name"] if _current_id else "unknown"
284
+ lines = [
285
+ "# Gemma Explorer β€” Chat Export",
286
+ f"Model: {model_name}",
287
+ f"Date: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M')}",
288
+ "---", "",
289
+ ]
290
+ for msg in history:
291
+ role = "**You**" if msg["role"] == "user" else f"**{model_name}**"
292
+ lines.append(f"{role}: {msg['content']}")
293
+ lines.append("")
294
+ return "\n".join(lines)
295
+
296
+
297
+ # ── Dual-chat inference ───────────────────────────────────────────────────────
298
+
299
+ def respond_dual(message: str, image,
300
+ max_new_tokens: int, temperature: float,
301
+ sys_a: str, sys_b: str,
302
+ model_a_id: str, model_b_id: str,
303
+ hist_a: list, hist_b: list):
304
+ if not message.strip() and image is None:
305
+ yield hist_a, hist_b
306
+ return
307
+
308
+ user_msg = message or "[image attached]"
309
+
310
+ try:
311
+ _load_weights(model_a_id)
312
+ except Exception as exc:
313
+ _purge_model()
314
+ yield (hist_a + [{"role": "user", "content": user_msg},
315
+ {"role": "assistant", "content": f"Failed to load {model_a_id}: {exc}"}],
316
+ hist_b)
317
+ return
318
+
319
+ new_hist_a = hist_a + [{"role": "user", "content": user_msg}]
320
+ yield new_hist_a, hist_b
321
+
322
+ meta_a = MODELS[model_a_id]
323
+ infer_fn = infer_xlarge if meta_a["gpu_size"] == "xlarge" else infer_large
324
+ full_a = (sys_a.strip() + "\n\n" + message) if sys_a.strip() else message
325
+ partial_a = ""
326
+ for chunk in infer_fn(full_a, image, max_new_tokens, temperature):
327
+ partial_a = chunk
328
+ yield new_hist_a + [{"role": "assistant", "content": partial_a}], hist_b
329
+ hist_a = new_hist_a + [{"role": "assistant", "content": partial_a}]
330
+
331
+ try:
332
+ _load_weights(model_b_id)
333
+ except Exception as exc:
334
+ _purge_model()
335
+ yield (hist_a,
336
+ hist_b + [{"role": "user", "content": user_msg},
337
+ {"role": "assistant", "content": f"Failed to load {model_b_id}: {exc}"}])
338
+ return
339
+
340
+ new_hist_b = hist_b + [{"role": "user", "content": user_msg}]
341
+ yield hist_a, new_hist_b
342
+
343
+ meta_b = MODELS[model_b_id]
344
+ infer_fn = infer_xlarge if meta_b["gpu_size"] == "xlarge" else infer_large
345
+ full_b = (sys_b.strip() + "\n\n" + message) if sys_b.strip() else message
346
+ partial_b = ""
347
+ for chunk in infer_fn(full_b, image, max_new_tokens, temperature):
348
+ partial_b = chunk
349
+ yield hist_a, new_hist_b + [{"role": "assistant", "content": partial_b}]
350
+ yield hist_a, new_hist_b + [{"role": "assistant", "content": partial_b}]
351
+
352
+
353
+ # ── CSS ───────────────────────────────────────────────────────────────────────
354
+
355
+ CSS = """
356
+ @import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&family=Space+Grotesk:wght@400;500;600;700&display=swap');
357
+
358
+ :root {
359
+ --bg: #080d18;
360
+ --surface: #0f1523;
361
+ --surface2: #161e30;
362
+ --border: rgba(80,110,220,0.16);
363
+ --blue: #4f7ef8;
364
+ --blue-dim: #3a5fc4;
365
+ --purple: #8b72f0;
366
+ --purple-dim: #6a55cc;
367
+ --text: #dde4f4;
368
+ --text-dim: #7a86a8;
369
+ --green: #34d399;
370
+ --red: #f87171;
371
+ --amber: #fbbf24;
372
+ --radius: 13px;
373
+ }
374
+
375
+ *, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; }
376
+
377
+ .gradio-container {
378
+ font-family: 'Inter', sans-serif !important;
379
+ background: var(--bg) !important;
380
+ max-width: 1420px !important;
381
+ color: var(--text) !important;
382
+ }
383
+
384
+ footer, .gr-prose { display: none !important; }
385
+
386
+ .gemma-hero {
387
+ position: relative;
388
+ background: linear-gradient(150deg, #06091566 0%, #0b1225 60%, #0d1530 100%);
389
+ border-radius: 18px; padding: 42px 52px; margin-bottom: 6px;
390
+ overflow: hidden; border: 1px solid var(--border);
391
+ }
392
+ .hero-bg { position: absolute; inset: 0; pointer-events: none; z-index: 0; overflow: hidden; }
393
+ .hero-grid {
394
+ position: absolute; inset: 0;
395
+ background-image:
396
+ linear-gradient(rgba(79,126,248,0.055) 1px, transparent 1px),
397
+ linear-gradient(90deg, rgba(79,126,248,0.055) 1px, transparent 1px);
398
+ background-size: 44px 44px;
399
+ animation: grid-drift 22s linear infinite;
400
+ }
401
+ @keyframes grid-drift { from { background-position: 0 0; } to { background-position: 44px 44px; } }
402
+ .orb {
403
+ position: absolute; border-radius: 50%;
404
+ filter: blur(70px); opacity: 0.16;
405
+ animation: float-orb var(--dur, 13s) ease-in-out infinite var(--delay, 0s);
406
+ }
407
+ .orb-1 { width: 400px; height: 400px; background: var(--blue); top: -130px; right: -60px; --dur:15s; --delay:0s; }
408
+ .orb-2 { width: 280px; height: 280px; background: var(--purple); bottom: -90px; left: 12%; --dur:12s; --delay:-4s; }
409
+ .orb-3 { width: 200px; height: 200px; background: #60a5fa; top: 35%; left: 52%; --dur:18s; --delay:-7s; }
410
+ @keyframes float-orb {
411
+ 0%,100% { transform: translate(0,0) scale(1); }
412
+ 33% { transform: translate(16px,-20px) scale(1.05); }
413
+ 66% { transform: translate(-10px,12px) scale(0.96); }
414
+ }
415
+ .hero-stars { position: absolute; inset: 0; }
416
+ .star {
417
+ position: absolute; width: 2px; height: 2px;
418
+ background: #fff; border-radius: 50%; opacity: 0;
419
+ animation: twinkle var(--dur,3s) ease-in-out infinite var(--delay,0s);
420
+ }
421
+ @keyframes twinkle {
422
+ 0%,100% { opacity:0; transform:scale(.8); }
423
+ 50% { opacity:.65; transform:scale(1.3); }
424
+ }
425
+ .hero-inner { position: relative; z-index: 1; }
426
+ .hero-top-bar { display: flex; align-items: center; margin-bottom: 22px; }
427
+ .hero-eyebrow {
428
+ display: inline-flex; align-items: center; gap: 9px;
429
+ background: rgba(79,126,248,0.1); border: 1px solid rgba(79,126,248,0.28);
430
+ border-radius: 20px; padding: 5px 16px;
431
+ font-size: 11px; font-weight: 600; color: #7aabf8; letter-spacing: 1px; text-transform: uppercase;
432
+ }
433
+ .hero-dot-pulse {
434
+ width: 7px; height: 7px; border-radius: 50%; background: var(--green);
435
+ animation: pulse-ring 2s ease-out infinite;
436
+ }
437
+ @keyframes pulse-ring {
438
+ 0% { box-shadow: 0 0 0 0 rgba(52,211,153,.5); }
439
+ 70% { box-shadow: 0 0 0 8px rgba(52,211,153,0); }
440
+ 100% { box-shadow: 0 0 0 0 rgba(52,211,153,0); }
441
+ }
442
+ .hero-title {
443
+ font-family: 'Space Grotesk', sans-serif;
444
+ font-size: 54px; font-weight: 700; color: #fff;
445
+ line-height: 1.05; margin: 0 0 14px; letter-spacing: -2px;
446
+ }
447
+ .hero-title span {
448
+ background: linear-gradient(120deg, var(--blue) 20%, var(--purple) 80%);
449
+ -webkit-background-clip: text; -webkit-text-fill-color: transparent; background-clip: text;
450
+ }
451
+ .hero-subtitle {
452
+ font-size: 15px; color: rgba(221,228,244,0.55);
453
+ margin: 0 0 28px; max-width: 530px; line-height: 1.7; font-weight: 400;
454
+ }
455
+ .hero-chips { display: flex; flex-wrap: wrap; gap: 7px; }
456
+ .hero-chip {
457
+ background: rgba(255,255,255,0.04); border: 1px solid rgba(255,255,255,0.09);
458
+ border-radius: 7px; padding: 5px 12px;
459
+ font-size: 12px; color: rgba(221,228,244,0.7); font-weight: 500;
460
+ }
461
+ .hero-chip strong { color: #fff; }
462
+ .hero-legend { display: flex; flex-wrap: wrap; gap: 14px; margin-top: 16px; }
463
+ .legend-dot { display: inline-flex; align-items: center; gap: 6px; font-size: 11px; color: rgba(221,228,244,0.5); font-weight: 500; }
464
+ .legend-swatch { width: 9px; height: 9px; border-radius: 3px; flex-shrink: 0; }
465
+
466
+ .tab-nav { background: transparent !important; border-bottom: 1px solid var(--border) !important; gap: 2px !important; padding: 0 4px !important; margin-top: 6px !important; }
467
+ .tab-nav button { font-family: 'Inter', sans-serif !important; font-size: 13px !important; font-weight: 500 !important; border-radius: 8px 8px 0 0 !important; padding: 10px 22px !important; color: var(--text-dim) !important; border: none !important; background: transparent !important; transition: all 0.2s !important; letter-spacing: 0.2px !important; }
468
+ .tab-nav button:hover { color: var(--text) !important; background: rgba(79,126,248,0.07) !important; }
469
+ .tab-nav button.selected { color: var(--blue) !important; background: rgba(79,126,248,0.08) !important; border-bottom: 2px solid var(--blue) !important; }
470
+
471
+ .status-bar {
472
+ display: flex; align-items: center; justify-content: space-between; flex-wrap: wrap;
473
+ background: var(--surface); border: 1px solid var(--border);
474
+ border-left: 3px solid var(--accent, var(--blue));
475
+ border-radius: var(--radius); padding: 11px 18px; gap: 10px; margin-bottom: 14px;
476
+ }
477
+ .status-left, .status-right { display: flex; align-items: center; gap: 8px; flex-wrap: wrap; }
478
+ .status-dot { width: 8px; height: 8px; border-radius: 50%; flex-shrink: 0; }
479
+ .dot-ready { background: var(--green); box-shadow: 0 0 6px rgba(52,211,153,.5); }
480
+ .dot-loading { background: var(--amber); animation: pulse-ring 1.5s ease-out infinite; }
481
+ .status-name { font-family: 'Space Grotesk', sans-serif; font-size: 14px; font-weight: 600; }
482
+ .status-chip { font-size: 11px; font-weight: 500; padding: 2px 9px; border-radius: 5px; background: rgba(255,255,255,0.05); border: 1px solid var(--border); color: var(--text-dim); }
483
+ .chip-vision { background: rgba(79,126,248,0.13); border-color: rgba(79,126,248,0.28); color: #8ab4f8; }
484
+ .chip-text { background: rgba(122,134,168,0.08); border-color: var(--border); color: var(--text-dim); }
485
+ .status-ok { font-size: 12px; color: var(--green); font-weight: 500; }
486
+ .status-error { background: rgba(248,113,113,0.08); border: 1px solid rgba(248,113,113,0.22); color: var(--red); border-radius: 10px; padding: 12px 16px; font-size: 13px; }
487
+ .status-empty { font-size: 13px; color: var(--text-dim); padding: 10px 0; font-style: italic; }
488
+
489
+ .loading-notice { display: flex; align-items: center; gap: 12px; background: rgba(79,126,248,0.07); border: 1px solid rgba(79,126,248,0.22); border-radius: var(--radius); padding: 13px 18px; margin-bottom: 14px; color: #8ab4f8; font-size: 13px; font-weight: 500; animation: notice-pulse 2s ease-in-out infinite; }
490
+ .notice-spinner { width: 15px; height: 15px; border: 2px solid rgba(79,126,248,0.25); border-top-color: var(--blue); border-radius: 50%; animation: spin .75s linear infinite; flex-shrink: 0; }
491
+ @keyframes spin { to { transform: rotate(360deg); } }
492
+ @keyframes notice-pulse { 0%,100% { border-color: rgba(79,126,248,0.22); } 50% { border-color: rgba(79,126,248,0.5); } }
493
+
494
+ .family-header { display: flex; align-items: center; gap: 14px; padding: 15px 20px; border-radius: var(--radius); margin-bottom: 12px; background: var(--surface); border: 1px solid var(--border); }
495
+ .family-icon { font-family: 'Space Grotesk', sans-serif; font-size: 17px; font-weight: 700; width: 40px; height: 40px; display: flex; align-items: center; justify-content: center; border-radius: 10px; color: #fff; flex-shrink: 0; }
496
+ .family-text h3 { font-family: 'Space Grotesk', sans-serif; font-size: 15.5px; font-weight: 600; margin: 0 0 2px; color: var(--text); }
497
+ .family-text p { font-size: 12px; color: var(--text-dim); margin: 0; }
498
+ .family-year { margin-left: auto; font-size: 11px; color: var(--text-dim); font-weight: 500; opacity: 0.7; }
499
+ .family-new-badge { margin-left: 8px; background: linear-gradient(120deg, var(--blue), var(--purple)); color: #fff; border-radius: 7px; padding: 3px 10px; font-size: 10px; font-weight: 700; letter-spacing: 0.6px; text-transform: uppercase; }
500
+
501
+ .model-card-wrap { padding: 5px !important; }
502
+ .model-card {
503
+ background: var(--surface); border: 1px solid var(--border);
504
+ border-radius: var(--radius); overflow: hidden; height: 100%;
505
+ display: flex; flex-direction: column;
506
+ transition: border-color 0.25s, transform 0.25s, box-shadow 0.25s;
507
+ min-height: 255px;
508
+ }
509
+ .model-card:hover { border-color: rgba(79,126,248,0.42); transform: translateY(-3px); box-shadow: 0 10px 32px rgba(0,0,0,0.4); }
510
+ .model-card.card-active { border-color: var(--green) !important; box-shadow: 0 0 0 1px rgba(52,211,153,0.18), 0 8px 28px rgba(0,0,0,0.35) !important; }
511
+ .card-active-badge { font-size: 9px; font-weight: 700; padding: 2px 8px; border-radius: 5px; background: rgba(52,211,153,0.13); color: var(--green); border: 1px solid rgba(52,211,153,0.28); text-transform: uppercase; letter-spacing: 0.5px; flex-shrink: 0; }
512
+ .card-accent { height: 3px; width: 100%; flex-shrink: 0; }
513
+ .card-body { padding: 17px 17px 14px; flex: 1; display: flex; flex-direction: column; }
514
+ .card-top { display: flex; align-items: flex-start; justify-content: space-between; margin-bottom: 8px; gap: 6px; flex-wrap: wrap; }
515
+ .card-name { font-family: 'Space Grotesk', sans-serif; font-size: 15.5px; font-weight: 600; color: var(--text); margin: 0; line-height: 1.2; }
516
+ .card-badge { font-size: 9px; font-weight: 700; padding: 2px 7px; border-radius: 5px; text-transform: uppercase; letter-spacing: 0.5px; flex-shrink: 0; }
517
+ .badge-NEW { background: rgba(79,126,248,0.15); color: #8ab4f8; border: 1px solid rgba(79,126,248,0.28); }
518
+ .badge-FLAGSHIP { background: rgba(139,114,240,0.15); color: #baaaf8; border: 1px solid rgba(139,114,240,0.28); }
519
+ .card-desc { font-size: 12px; color: var(--text-dim); line-height: 1.55; margin: 0 0 13px; flex: 1; }
520
+ .card-stats { display: grid; grid-template-columns: repeat(3,1fr); gap: 5px; margin-bottom: 11px; }
521
+ .card-stat { font-size: 10.5px; font-weight: 500; padding: 5px 4px; border-radius: 6px; background: rgba(255,255,255,0.03); color: var(--text-dim); border: 1px solid var(--border); text-align: center; white-space: nowrap; overflow: hidden; text-overflow: ellipsis; }
522
+ .card-tags { display: flex; flex-wrap: wrap; gap: 5px; margin-bottom: 13px; }
523
+ .tag { font-size: 10px; font-weight: 500; padding: 2px 8px; border-radius: 5px; letter-spacing: 0.2px; border: 1px solid transparent; }
524
+ .tag-vision { background: rgba(79,126,248,0.1); color: #8ab4f8; border-color: rgba(79,126,248,0.22); }
525
+ .tag-text { background: rgba(122,134,168,0.07); color: var(--text-dim); border-color: var(--border); }
526
+ .tag-apache { background: rgba(52,211,153,0.09); color: #6ee7b7; border-color: rgba(52,211,153,0.22); }
527
+ .tag-gemma { background: rgba(251,191,36,0.09); color: #fcd34d; border-color: rgba(251,191,36,0.22); }
528
+ .tag-xlarge { background: rgba(139,114,240,0.1); color: #baaaf8; border-color: rgba(139,114,240,0.25); }
529
+ .tag-instruct { background: rgba(167,139,250,0.13); color: #c4b5fd; border-color: rgba(167,139,250,0.3); }
530
+ .tag-base { background: rgba(148,163,184,0.08); color: #94a3b8; border-color: rgba(148,163,184,0.2); }
531
+
532
+ .model-card-wrap { display: flex !important; flex-direction: column !important; }
533
+ .model-card-wrap > * { width: 100% !important; }
534
+ .card-btn { width: 100% !important; margin-top: 6px !important; }
535
+ .card-btn > div { width: 100% !important; }
536
+ .card-btn button { width: 100% !important; background: linear-gradient(120deg, var(--blue), var(--blue-dim)) !important; color: #fff !important; border-radius: 8px !important; font-family: 'Inter', sans-serif !important; font-size: 12.5px !important; font-weight: 500 !important; padding: 9px !important; border: none !important; cursor: pointer !important; letter-spacing: 0.2px !important; transition: opacity 0.2s, transform 0.15s !important; }
537
+ .card-btn button:hover { opacity: 0.85 !important; transform: translateY(-1px) !important; }
538
+ .card-btn button:disabled { opacity: 0.45 !important; cursor: not-allowed !important; transform: none !important; }
539
+ .card-btn-xlarge button { background: linear-gradient(120deg, var(--purple), var(--purple-dim)) !important; }
540
+
541
+ .thinking-wrap {
542
+ display: flex; align-items: center; gap: 10px;
543
+ padding: 8px 14px 10px;
544
+ border-bottom: 1px solid var(--border);
545
+ background: var(--surface2);
546
+ font-size: 12px; color: var(--text-dim); font-style: italic;
547
+ }
548
+ .thinking-dots { display: flex; gap: 5px; align-items: center; }
549
+ .thinking-dots span {
550
+ width: 7px; height: 7px; border-radius: 50%;
551
+ background: var(--blue); opacity: 0.3;
552
+ animation: dot-bounce 1.1s ease-in-out infinite;
553
+ }
554
+ .thinking-dots span:nth-child(2) { animation-delay: 0.18s; }
555
+ .thinking-dots span:nth-child(3) { animation-delay: 0.36s; }
556
+ @keyframes dot-bounce {
557
+ 0%, 100% { opacity: 0.2; transform: translateY(0px); }
558
+ 50% { opacity: 1; transform: translateY(-5px); }
559
+ }
560
+
561
+ .dual-loading-notice {
562
+ display: flex; align-items: flex-start; gap: 12px;
563
+ background: rgba(79,126,248,0.07); border: 1px solid rgba(79,126,248,0.28);
564
+ border-radius: var(--radius); padding: 13px 18px; margin-bottom: 12px;
565
+ animation: notice-pulse 2s ease-in-out infinite;
566
+ }
567
+ .dual-loading-notice .notice-spinner { margin-top: 2px; }
568
+ .dual-loading-body { display: flex; flex-direction: column; gap: 3px; }
569
+ .dual-loading-title { font-size: 13px; font-weight: 600; color: #8ab4f8; }
570
+ .dual-loading-sub { font-size: 11px; color: var(--text-dim); }
571
+
572
+ .chat-panel { background: var(--surface); border: 1px solid var(--border); border-radius: var(--radius); overflow: hidden; }
573
+ .gradio-chatbot { background: var(--surface) !important; border: none !important; color: var(--text) !important; }
574
+
575
+ .settings-panel { background: var(--surface); border: 1px solid var(--border); border-radius: var(--radius); padding: 18px; }
576
+ .zerogpu-notice { background: rgba(245,158,11,0.08); border: 1px solid rgba(245,158,11,0.25); color: #fcd34d; border-radius: 8px; padding: 12px 14px; font-size: 11px; margin-bottom: 16px; line-height: 1.6; font-weight: 500; }
577
+ .zgn-title { font-weight: 700; font-size: 11.5px; margin-bottom: 3px; }
578
+ .zgn-divider { border-top: 1px solid rgba(245,158,11,0.2); margin: 9px 0; }
579
+ .zerogpu-notice strong { color: #fde68a; }
580
+ .settings-title { font-family: 'Space Grotesk', sans-serif; font-size: 10.5px; font-weight: 700; color: var(--text-dim); text-transform: uppercase; letter-spacing: 1.2px; margin-bottom: 14px; padding-bottom: 10px; border-bottom: 1px solid var(--border); }
581
+ .settings-hint { font-size: 11.5px; color: var(--text-dim); line-height: 1.8; margin-top: 14px; padding-top: 14px; border-top: 1px solid var(--border); }
582
+ .settings-hint strong { color: rgba(221,228,244,0.55); font-weight: 500; display: block; margin-bottom: 2px; }
583
+ .system-prompt textarea { font-size: 12px !important; min-height: 72px !important; background: var(--surface2) !important; color: var(--text) !important; border-color: var(--border) !important; border-radius: 8px !important; resize: vertical !important; }
584
+
585
+ .send-btn button { background: linear-gradient(120deg, var(--blue), var(--blue-dim)) !important; color: #fff !important; border-radius: 10px !important; font-family: 'Inter', sans-serif !important; font-weight: 500 !important; padding: 10px 20px !important; border: none !important; letter-spacing: 0.2px !important; transition: opacity 0.2s !important; }
586
+ .send-btn button:hover { opacity: 0.85 !important; }
587
+ .clear-btn button, .export-btn button, .reset-btn button { background: transparent !important; border: 1px solid var(--border) !important; color: var(--text-dim) !important; border-radius: 10px !important; font-family: 'Inter', sans-serif !important; font-weight: 500 !important; font-size: 12.5px !important; width: 100% !important; transition: all 0.2s !important; }
588
+ .clear-btn button:hover { border-color: rgba(248,113,113,0.4) !important; color: var(--red) !important; background: rgba(248,113,113,0.06) !important; }
589
+ .export-btn button:hover { border-color: rgba(79,126,248,0.4) !important; color: var(--blue) !important; background: rgba(79,126,248,0.06) !important; }
590
+ .reset-btn button:hover { border-color: rgba(251,191,36,0.4) !important; color: var(--amber) !important; background: rgba(251,191,36,0.06) !important; }
591
+
592
+ .chat-inline-status { display: flex; align-items: center; gap: 8px; padding: 6px 14px; font-size: 12px; color: var(--text-dim); border-bottom: 1px solid var(--border); background: var(--surface2); min-height: 32px; }
593
+ .csi-dot { width: 7px; height: 7px; border-radius: 50%; flex-shrink: 0; }
594
+ .csi-dot-idle { background: var(--text-dim); opacity: 0.35; }
595
+ .csi-dot-ready { background: var(--green); box-shadow: 0 0 6px rgba(52,211,153,.5); }
596
+ .csi-name { font-weight: 600; color: var(--text); }
597
+ .csi-label { color: var(--green); font-weight: 500; }
598
+ .csi-idle { font-style: italic; }
599
+
600
+ .image-upload-wrap { width: 100% !important; margin-top: 6px !important; }
601
+ .image-upload-wrap > div,
602
+ .image-upload-wrap .wrap { min-height: 260px !important; border-radius: 10px !important; }
603
+
604
+ .dual-panel { background: var(--surface); border: 1px solid var(--border); border-radius: var(--radius); overflow: hidden; }
605
+ .dual-header { display: flex; align-items: center; padding: 10px 14px; background: var(--surface2); border-bottom: 1px solid var(--border); font-size: 12px; font-weight: 600; }
606
+ .dual-label-a { color: var(--blue); }
607
+ .dual-label-b { color: var(--purple); }
608
+ .dual-send-btn button { background: linear-gradient(120deg, var(--blue), var(--purple)) !important; color: #fff !important; border-radius: 10px !important; font-family: 'Inter', sans-serif !important; font-weight: 500 !important; padding: 10px 20px !important; border: none !important; letter-spacing: 0.2px !important; transition: opacity 0.2s !important; }
609
+ .dual-send-btn button:hover { opacity: 0.85 !important; }
610
+ .dual-img-wrap { width: 100% !important; margin-top: 6px !important; }
611
+ .dual-img-wrap > div,
612
+ .dual-img-wrap .wrap { min-height: 140px !important; border-radius: 10px !important; }
613
+
614
+ input, textarea, .gr-input { background: var(--surface2) !important; color: var(--text) !important; border-color: var(--border) !important; border-radius: 10px !important; }
615
+ label { color: var(--text-dim) !important; font-size: 12px !important; }
616
+ .gr-slider input[type=range] { accent-color: var(--blue); }
617
+ .gr-box, .gr-form { background: transparent !important; }
618
+ """
619
+
620
+
621
+ # ── Static HTML fragments ─────────────────────────────────────────────────────
622
+
623
+ _STAR_COORDS = [
624
+ (8,12),(15,78),(23,45),(31,67),(42,23),(48,89),(55,34),(63,56),
625
+ (71,14),(78,72),(85,41),(92,88),(5,55),(18,33),(27,91),(36,8),
626
+ (44,62),(52,27),(59,79),(67,48),(74,19),(82,64),(89,35),(96,82),
627
+ (11,95),(20,7),(29,53),(38,74),(47,16),(56,39),
628
+ ]
629
+ _STARS_HTML = "".join(
630
+ f'<div class="star" style="left:{x}%;top:{y}%;--dur:{2.4+(i%5)*0.7}s;--delay:{-(i%8)*0.6}s"></div>'
631
+ for i,(x,y) in enumerate(_STAR_COORDS)
632
+ )
633
+
634
+ _N_MODELS = len(MODELS)
635
+ _N_FAMILIES = len(FAMILIES)
636
+
637
+ _LEGEND_HTML = "".join(
638
+ f'<span class="legend-dot">'
639
+ f'<span class="legend-swatch" style="background:{info["color"]}"></span>'
640
+ f'{_html.escape(name)}</span>'
641
+ for name, info in FAMILIES.items()
642
+ )
643
+
644
+ _HERO_HTML = f"""
645
+ <div class="gemma-hero">
646
+ <div class="hero-bg">
647
+ <div class="hero-grid"></div>
648
+ <div class="orb orb-1"></div><div class="orb orb-2"></div><div class="orb orb-3"></div>
649
+ <div class="hero-stars">{_STARS_HTML}</div>
650
+ </div>
651
+ <div class="hero-inner">
652
+ <div class="hero-top-bar">
653
+ <div class="hero-eyebrow">
654
+ <span class="hero-dot-pulse"></span>
655
+ Google DeepMind &middot; Open Models
656
+ </div>
657
+ </div>
658
+ <h1 class="hero-title">Gemma <span>Explorer</span></h1>
659
+ <p class="hero-subtitle">Explore, compare, and chat with the full Gemma open model family &mdash; from the compact 1B to the powerful 31B multimodal.</p>
660
+ <div class="hero-chips">
661
+ <span class="hero-chip"><strong>{_N_MODELS}</strong> models</span>
662
+ <span class="hero-chip"><strong>{_N_FAMILIES}</strong> generations</span>
663
+ <span class="hero-chip">Vision &mdash; Gemma 3 &amp; 4</span>
664
+ <span class="hero-chip">ZeroGPU &middot; NVIDIA H200</span>
665
+ </div>
666
+ <div class="hero-legend">{_LEGEND_HTML}</div>
667
+ </div>
668
+ </div>
669
+ """
670
+
671
+ _ZEROGPU_NOTICE = """
672
+ <div class="zerogpu-notice">
673
+ <div class="zgn-title">⚑ ZeroGPU Latency</div>
674
+ GPU allocation happens on every message in this serverless Space β€” expect a brief wait before the first token.
675
+ <div class="zgn-divider"></div>
676
+ <div class="zgn-title">🧠 No Memory</div>
677
+ Due to ZeroGPU constraints, each message is processed independently.
678
+ The model has <strong>no conversation history</strong> β€” it starts fresh on every reply.
679
+ </div>
680
+ """
681
+
682
+ _SETTINGS_HINT = """
683
+ <div class="settings-hint">
684
+ <strong>Temperature</strong>
685
+ 0 = deterministic<br>0.7 = balanced<br>1.5 = creative
686
+ <br><br>
687
+ <strong>GPU allocation</strong>
688
+ large = 70 GB H200<br>xlarge = 141 GB H200<br>(Gemma 4 31B only)
689
+ </div>
690
+ """
691
+
692
+
693
+ # ── Card / header HTML ────────────────────────────────────────────────────────
694
+
695
+ def _card_html(model_id: str, meta: dict, active: bool = False) -> str:
696
+ color = meta["family_color"]
697
+ badge = meta.get("badge")
698
+ badge_html = f'<span class="card-badge badge-{badge}">{_html.escape(badge)}</span>' if badge else ""
699
+ active_html = '<span class="card-active-badge">βœ“ Loaded</span>' if active else ""
700
+ vision_tag = '<span class="tag tag-vision">Vision</span>' if meta["supports_vision"] else '<span class="tag tag-text">Text only</span>'
701
+ license_tag = '<span class="tag tag-apache">Apache 2.0</span>' if meta["license_open"] else '<span class="tag tag-gemma">Gemma License</span>'
702
+ gpu_tag = '<span class="tag tag-xlarge">xlarge GPU</span>' if meta["gpu_size"] == "xlarge" else ""
703
+ instruct_tag = '<span class="tag tag-instruct">Instruct</span>' if "-it" in model_id.lower() else '<span class="tag tag-base">Base</span>'
704
+ active_cls = " card-active" if active else ""
705
+ return f"""
706
+ <div class="model-card{active_cls}">
707
+ <div class="card-accent" style="background:linear-gradient(90deg,{color},{color}66)"></div>
708
+ <div class="card-body">
709
+ <div class="card-top">
710
+ <p class="card-name">{_html.escape(meta['name'])}</p>
711
+ <span style="display:flex;gap:4px;flex-shrink:0">{badge_html}{active_html}</span>
712
+ </div>
713
+ <p class="card-desc">{_html.escape(meta['description'])}</p>
714
+ <div class="card-stats">
715
+ <span class="card-stat">{_html.escape(meta['params_short'])}</span>
716
+ <span class="card-stat">{_html.escape(meta['context'])} ctx</span>
717
+ <span class="card-stat">{_html.escape(meta['vram'])}</span>
718
+ </div>
719
+ <div class="card-tags">{vision_tag}{instruct_tag}{license_tag}{gpu_tag}</div>
720
+ </div>
721
+ </div>
722
+ """
723
+
724
+
725
+ def _family_header_html(name: str, info: dict) -> str:
726
+ new_badge = '<span class="family-new-badge">New</span>' if info.get("new") else ""
727
+ return f"""
728
+ <div class="family-header">
729
+ <div class="family-icon" style="background:linear-gradient(135deg,{info['color']},{info['color']}88)">{info['icon']}</div>
730
+ <div class="family-text"><h3>{_html.escape(name)}</h3><p>{_html.escape(info['description'])}</p></div>
731
+ <span class="family-year">{info['year']}</span>
732
+ {new_badge}
733
+ </div>
734
+ """
735
+
736
+
737
+ def _make_status_html(meta: dict, state: str = "ready") -> str:
738
+ color = meta["family_color"]
739
+ safe_name = _html.escape(meta["name"])
740
+ dot_class, label = {
741
+ "ready": ("dot-ready", "Ready"),
742
+ "already": ("dot-ready", "Already loaded"),
743
+ }.get(state, ("dot-loading", "Loading\u2026"))
744
+ vision_tag = (
745
+ '<span class="status-chip chip-vision">Vision</span>'
746
+ if meta["supports_vision"]
747
+ else '<span class="status-chip chip-text">Text only</span>'
748
+ )
749
+ return f"""
750
+ <div class="status-bar" style="--accent:{color}">
751
+ <div class="status-left">
752
+ <span class="status-dot {dot_class}"></span>
753
+ <span class="status-name" style="color:{color}">{safe_name}</span>
754
+ {vision_tag}
755
+ </div>
756
+ <div class="status-right">
757
+ <span class="status-chip">{_html.escape(meta['params_short'])}</span>
758
+ <span class="status-chip">{_html.escape(str(meta['context']))} ctx</span>
759
+ <span class="status-chip">{_html.escape(meta['vram'])}</span>
760
+ <span class="status-ok">{label}</span>
761
+ </div>
762
+ </div>
763
+ """
764
+
765
+
766
+ def _chat_inline_status(loaded: bool = False, name: str = "") -> str:
767
+ safe_name = _html.escape(name)
768
+ if loaded:
769
+ return (f'<div class="chat-inline-status">'
770
+ f'<span class="csi-dot csi-dot-ready"></span>'
771
+ f'<span class="csi-name">{safe_name}</span>'
772
+ f'<span class="csi-label">&nbsp;&middot; Ready to chat</span>'
773
+ f'</div>')
774
+ return ('<div class="chat-inline-status">'
775
+ '<span class="csi-dot csi-dot-idle"></span>'
776
+ '<span class="csi-idle">No model loaded &mdash; go to <strong>Explore Models</strong> and click <em>Load &amp; Chat</em>.</span>'
777
+ '</div>')
778
+
779
+
780
+ def _empty_status() -> str:
781
+ return '<div class="status-empty">No model loaded &mdash; select one in <strong>Explore Models</strong>.</div>'
782
+
783
+
784
+ def _loading_html(model_name: str = "") -> str:
785
+ name_part = f" <strong>{_html.escape(model_name)}</strong>" if model_name else ""
786
+ return (
787
+ '<div class="loading-notice">'
788
+ '<div class="notice-spinner"></div>'
789
+ '<div>'
790
+ f'<div>Loading{name_part}, please wait&hellip;</div>'
791
+ '<div style="font-size:11px;opacity:0.7;margin-top:3px;font-weight:400">'
792
+ '⏱&nbsp; Large models (27B, 31B) can take 1&ndash;3 min. Please be patient.'
793
+ '</div>'
794
+ '</div>'
795
+ '</div>'
796
+ )
797
+
798
+
799
+ _THINKING_HTML = (
800
+ '<div class="thinking-wrap">'
801
+ '<div class="thinking-dots"><span></span><span></span><span></span></div>'
802
+ 'Thinking&hellip;'
803
+ '</div>'
804
+ )
805
+
806
+
807
+ def _dual_loading_html(label: str, color: str, model_name: str) -> str:
808
+ return (
809
+ '<div class="dual-loading-notice">'
810
+ '<div class="notice-spinner"></div>'
811
+ '<div class="dual-loading-body">'
812
+ f'<span class="dual-loading-title" style="color:{color}">'
813
+ f'Loading {_html.escape(label)}: {_html.escape(model_name)}&hellip;'
814
+ '</span>'
815
+ '<span class="dual-loading-sub">'
816
+ '⏱&nbsp; Large models (27B, 31B) may take 1&ndash;3 min. Please be patient.'
817
+ '</span>'
818
+ '</div>'
819
+ '</div>'
820
+ )
821
+
822
+
823
+ # ── Build Gradio UI ───────────────────────────────────────────────────────────
824
+
825
+ _MODEL_CHOICES = [(meta["name"], mid) for mid, meta in MODELS.items()]
826
+
827
+ # CorrecciΓ³n Gradio 6.0: css y theme movidos a launch()
828
+ with gr.Blocks(title="Gemma Explorer") as demo:
829
+
830
+ current_model_state = gr.State(value=None)
831
+ gr.HTML(value=_HERO_HTML)
832
+
833
+ with gr.Tabs() as main_tabs:
834
+
835
+ with gr.Tab("Explore Models", id="explore"):
836
+
837
+ status_html = gr.HTML(value=_empty_status())
838
+ loading_notice = gr.HTML(value=_loading_html(), visible=False)
839
+
840
+ card_html_components: dict[str, gr.HTML] = {}
841
+ load_btns: list[gr.Button] = []
842
+
843
+ for family_name, family_info in FAMILIES.items():
844
+ gr.HTML(_family_header_html(family_name, family_info))
845
+ family_models = list(get_models_by_family(family_name).items())
846
+
847
+ for row_start in range(0, len(family_models), 4):
848
+ row_models = family_models[row_start: row_start + 4]
849
+ with gr.Row(equal_height=True):
850
+ for model_id, meta in row_models:
851
+ with gr.Column(min_width=200, elem_classes=["model-card-wrap"]):
852
+ card_comp = gr.HTML(_card_html(model_id, meta, active=False))
853
+ card_html_components[model_id] = card_comp
854
+ btn_cls = ["card-btn", "card-btn-xlarge"] if meta["gpu_size"] == "xlarge" else ["card-btn"]
855
+ load_btn = gr.Button("Load & Chat", elem_classes=btn_cls)
856
+ load_btn._model_id = model_id
857
+ load_btns.append(load_btn)
858
+
859
+ with gr.Tab("Single Chat", id="single"):
860
+
861
+ chat_status_html = gr.HTML(value=_empty_status())
862
+ chat_loading_notice = gr.HTML(value=_loading_html(), visible=False)
863
+
864
+ with gr.Row(equal_height=False):
865
+
866
+ with gr.Column(scale=4, elem_classes=["chat-panel"]):
867
+ chat_inline = gr.HTML(value=_chat_inline_status(loaded=False))
868
+ thinking_html = gr.HTML(value="", visible=False)
869
+ chatbot = gr.Chatbot(value=[], height=480, show_label=False, placeholder="")
870
+
871
+ with gr.Row():
872
+ msg_input = gr.Textbox(
873
+ placeholder="Type your message here\u2026",
874
+ show_label=False, scale=5, lines=1, max_lines=5, autofocus=True,
875
+ )
876
+ send_btn = gr.Button("Send", variant="primary", elem_classes=["send-btn"], scale=1)
877
+
878
+ image_input = gr.Image(
879
+ type="numpy", label="Attach image (optional)",
880
+ show_label=True, visible=False,
881
+ elem_classes=["image-upload-wrap"], height=260,
882
+ )
883
+
884
+ with gr.Column(scale=1, elem_classes=["settings-panel"]):
885
+ gr.HTML(_ZEROGPU_NOTICE)
886
+ gr.HTML('<div class="settings-title">Parameters</div>')
887
+ max_tokens = gr.Slider(64, 2048, value=512, step=64, label="Max new tokens")
888
+ temperature = gr.Slider(0.0, 1.5, value=0.7, step=0.05, label="Temperature")
889
+ system_prompt = gr.Textbox(
890
+ label="System prompt (optional)",
891
+ placeholder="e.g. You are a helpful assistant\u2026",
892
+ lines=3, max_lines=6,
893
+ elem_classes=["system-prompt"],
894
+ )
895
+ gr.HTML(_SETTINGS_HINT)
896
+ reset_btn = gr.Button("β†Ί Reset params", elem_classes=["reset-btn"])
897
+ clear_btn = gr.Button("Clear Chat", elem_classes=["clear-btn"])
898
+ export_btn = gr.Button("⬇ Export .md", elem_classes=["export-btn"])
899
+ export_file = gr.File(label="Download chat", visible=False)
900
+
901
+ with gr.Tab("Dual Chat", id="dual"):
902
+
903
+ gr.HTML("""
904
+ <div class="zerogpu-notice" style="margin-bottom:16px">
905
+ <div class="zgn-title">βš”οΈ Dual Chat β€” Side-by-Side Comparison</div>
906
+ Send the same prompt to two models and compare their responses.
907
+ Models are loaded and run sequentially β€” Model A first, then Model B.
908
+ <div class="zgn-divider"></div>
909
+ <div class="zgn-title">🧠 No Memory</div>
910
+ Same ZeroGPU constraints apply β€” each turn is processed independently with no context history.
911
+ </div>
912
+ """)
913
+
914
+ dual_loading_html = gr.HTML(value="", visible=False)
915
+
916
+ with gr.Row():
917
+ with gr.Column(scale=1):
918
+ dual_model_a = gr.Dropdown(choices=_MODEL_CHOICES, value=list(MODELS.keys())[0], label="Model A")
919
+ dual_sys_a = gr.Textbox(label="System prompt A (optional)", lines=2,
920
+ placeholder="e.g. Answer concisely.",
921
+ elem_classes=["system-prompt"])
922
+ with gr.Column(scale=1):
923
+ dual_model_b = gr.Dropdown(
924
+ choices=_MODEL_CHOICES,
925
+ value=list(MODELS.keys())[min(3, len(MODELS)-1)],
926
+ label="Model B",
927
+ )
928
+ dual_sys_b = gr.Textbox(label="System prompt B (optional)", lines=2,
929
+ placeholder="e.g. Answer in detail.",
930
+ elem_classes=["system-prompt"])
931
+
932
+ with gr.Row(equal_height=True):
933
+ with gr.Column(scale=1, elem_classes=["dual-panel"]):
934
+ gr.HTML('<div class="dual-header"><span class="dual-label-a">β–² Model A</span></div>')
935
+ dual_bot_a = gr.Chatbot(value=[], height=400, show_label=False)
936
+ with gr.Column(scale=1, elem_classes=["dual-panel"]):
937
+ gr.HTML('<div class="dual-header"><span class="dual-label-b">β–² Model B</span></div>')
938
+ dual_bot_b = gr.Chatbot(value=[], height=400, show_label=False)
939
+
940
+ _init_a = list(MODELS.keys())[0]
941
+ _init_b = list(MODELS.keys())[min(3, len(MODELS)-1)]
942
+ _both_vision_init = MODELS[_init_a]["supports_vision"] and MODELS[_init_b]["supports_vision"]
943
+ dual_img = gr.Image(
944
+ type="numpy",
945
+ label="Attach image β€” sent to both models (only available when both models support vision)",
946
+ show_label=True,
947
+ elem_classes=["dual-img-wrap"],
948
+ height=160,
949
+ visible=_both_vision_init,
950
+ )
951
+
952
+ with gr.Row():
953
+ dual_msg = gr.Textbox(placeholder="Type a prompt β€” it will be sent to both models\u2026",
954
+ show_label=False, scale=5, lines=1, max_lines=4)
955
+ dual_send = gr.Button("Send to Both", variant="primary",
956
+ elem_classes=["dual-send-btn"], scale=1)
957
+
958
+ with gr.Row():
959
+ dual_max_tokens = gr.Slider(64, 2048, value=512, step=64, label="Max new tokens")
960
+ dual_temperature = gr.Slider(0.0, 1.5, value=0.7, step=0.05, label="Temperature")
961
+ dual_clear = gr.Button("Clear Both", elem_classes=["clear-btn"])
962
+
963
+ # ── Event wiring ──────────────────────────────────────────────────────────
964
+
965
+ card_html_list = [card_html_components[mid] for mid in MODELS]
966
+ _n_cards = len(card_html_list)
967
+
968
+ # CorrecciΓ³n: Uso de una funciΓ³n auxiliar para evitar el error del generador con lambda
969
+ def make_load_fn(model_id):
970
+ def load_fn():
971
+ yield from load_model_stream(model_id, _n_cards)
972
+ return load_fn
973
+
974
+ for btn in load_btns:
975
+ btn.click(
976
+ fn=make_load_fn(btn._model_id),
977
+ inputs=[],
978
+ outputs=[
979
+ loading_notice, chat_loading_notice,
980
+ status_html, main_tabs, image_input, current_model_state,
981
+ chatbot, chat_inline,
982
+ *card_html_list,
983
+ ],
984
+ )
985
+
986
+ # ── Single chat ───────────────────────────────────────────────────────────
987
+
988
+ def _do_respond(message, image, max_toks, temp, sys_prompt, history):
989
+ full_msg = (sys_prompt.strip() + "\n\n" + message) if sys_prompt.strip() else message
990
+ last_hist = history
991
+ _lock = gr.update(interactive=False)
992
+ _unlock = gr.update(interactive=True)
993
+ _thinking = gr.update(visible=True, value=_THINKING_HTML)
994
+ _done = gr.update(visible=False, value="")
995
+
996
+ yield last_hist, gr.update(value=""), gr.update(), _lock, _lock, _thinking
997
+
998
+ for hist_update, img_update in respond(full_msg, image, max_toks, temp, history):
999
+ last_hist = hist_update
1000
+ yield last_hist, gr.update(), img_update, gr.update(), gr.update(), _done
1001
+
1002
+ yield last_hist, gr.update(), gr.update(value=None), _unlock, _unlock, _done
1003
+
1004
+ _single_inputs = [msg_input, image_input, max_tokens, temperature, system_prompt, chatbot]
1005
+ _single_outputs = [chatbot, msg_input, image_input, send_btn, msg_input, thinking_html]
1006
+
1007
+ msg_input.submit(fn=_do_respond, inputs=_single_inputs, outputs=_single_outputs)
1008
+ send_btn.click( fn=_do_respond, inputs=_single_inputs, outputs=_single_outputs)
1009
+
1010
+ clear_btn.click(fn=lambda: [], outputs=[chatbot])
1011
+ reset_btn.click(fn=lambda: (512, 0.7, ""), outputs=[max_tokens, temperature, system_prompt])
1012
+
1013
+ def _do_export(history):
1014
+ if not history:
1015
+ return gr.update(visible=False)
1016
+ content = export_chat(history)
1017
+ path = f"/tmp/gemma_chat_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.md"
1018
+ with open(path, "w") as f:
1019
+ f.write(content)
1020
+ return gr.update(value=path, visible=True)
1021
+
1022
+ export_btn.click(fn=_do_export, inputs=[chatbot], outputs=[export_file])
1023
+
1024
+ # ── Dual chat ─────────────────────────────────────────────────────────────
1025
+
1026
+ def _dual_img_visibility(model_a, model_b):
1027
+ a_ok = MODELS.get(model_a, {}).get("supports_vision", False)
1028
+ b_ok = MODELS.get(model_b, {}).get("supports_vision", False)
1029
+ return gr.update(visible=(a_ok and b_ok))
1030
+
1031
+ dual_model_a.change(
1032
+ fn=_dual_img_visibility,
1033
+ inputs=[dual_model_a, dual_model_b],
1034
+ outputs=[dual_img],
1035
+ )
1036
+ dual_model_b.change(
1037
+ fn=_dual_img_visibility,
1038
+ inputs=[dual_model_a, dual_model_b],
1039
+ outputs=[dual_img],
1040
+ )
1041
+
1042
+ def _do_dual(message, image, max_toks, temp, sys_a, sys_b, model_a, model_b, hist_a, hist_b):
1043
+ if not message.strip() and image is None:
1044
+ yield hist_a, hist_b, gr.update(), gr.update(), gr.update(), gr.update(), gr.update(visible=False)
1045
+ return
1046
+
1047
+ _lock = gr.update(interactive=False)
1048
+ _unlock = gr.update(interactive=True)
1049
+
1050
+ name_a = MODELS.get(model_a, {}).get("name", model_a)
1051
+ name_b = MODELS.get(model_b, {}).get("name", model_b)
1052
+ user_msg = message or "[image attached]"
1053
+
1054
+ yield (hist_a, hist_b, gr.update(value=""), gr.update(), _lock, _lock,
1055
+ gr.update(visible=True, value=_dual_loading_html("Model A", "var(--blue)", name_a)))
1056
+
1057
+ try:
1058
+ _load_weights(model_a)
1059
+ except Exception as exc:
1060
+ _purge_model()
1061
+ err = f"❌ Failed to load {name_a}: {exc}"
1062
+ yield (hist_a + [{"role": "user", "content": user_msg}, {"role": "assistant", "content": err}],
1063
+ hist_b, gr.update(), gr.update(value=None), _unlock, _unlock,
1064
+ gr.update(visible=False))
1065
+ return
1066
+
1067
+ new_hist_a = hist_a + [{"role": "user", "content": user_msg}]
1068
+ meta_a = MODELS[model_a]
1069
+ infer_fn = infer_xlarge if meta_a["gpu_size"] == "xlarge" else infer_large
1070
+ full_a = (sys_a.strip() + "\n\n" + message) if sys_a.strip() else message
1071
+ partial_a = ""
1072
+
1073
+ for chunk in infer_fn(full_a, image, max_toks, temp):
1074
+ partial_a = chunk
1075
+ yield (new_hist_a + [{"role": "assistant", "content": partial_a}],
1076
+ hist_b, gr.update(), gr.update(), gr.update(), gr.update(), gr.update())
1077
+
1078
+ hist_a = new_hist_a + [{"role": "assistant", "content": partial_a}]
1079
+
1080
+ yield (hist_a, hist_b, gr.update(), gr.update(), gr.update(), gr.update(),
1081
+ gr.update(visible=True, value=_dual_loading_html("Model B", "var(--purple)", name_b)))
1082
+
1083
+ try:
1084
+ _load_weights(model_b)
1085
+ except Exception as exc:
1086
+ _purge_model()
1087
+ err = f"❌ Failed to load {name_b}: {exc}"
1088
+ yield (hist_a,
1089
+ hist_b + [{"role": "user", "content": user_msg}, {"role": "assistant", "content": err}],
1090
+ gr.update(), gr.update(value=None), _unlock, _unlock, gr.update(visible=False))
1091
+ return
1092
+
1093
+ new_hist_b = hist_b + [{"role": "user", "content": user_msg}]
1094
+ meta_b = MODELS[model_b]
1095
+ infer_fn = infer_xlarge if meta_b["gpu_size"] == "xlarge" else infer_large
1096
+ full_b = (sys_b.strip() + "\n\n" + message) if sys_b.strip() else message
1097
+ partial_b = ""
1098
+
1099
+ for chunk in infer_fn(full_b, image, max_toks, temp):
1100
+ partial_b = chunk
1101
+ yield (hist_a,
1102
+ new_hist_b + [{"role": "assistant", "content": partial_b}],
1103
+ gr.update(), gr.update(), gr.update(), gr.update(), gr.update())
1104
+
1105
+ yield (hist_a, new_hist_b + [{"role": "assistant", "content": partial_b}],
1106
+ gr.update(), gr.update(value=None), _unlock, _unlock, gr.update(visible=False))
1107
+
1108
+ _dual_inputs = [dual_msg, dual_img, dual_max_tokens, dual_temperature,
1109
+ dual_sys_a, dual_sys_b, dual_model_a, dual_model_b, dual_bot_a, dual_bot_b]
1110
+ _dual_outputs = [dual_bot_a, dual_bot_b, dual_msg, dual_img,
1111
+ dual_send, dual_msg, dual_loading_html]
1112
+
1113
+ dual_send.click(fn=_do_dual, inputs=_dual_inputs, outputs=_dual_outputs)
1114
+ dual_msg.submit( fn=_do_dual, inputs=_dual_inputs, outputs=_dual_outputs)
1115
+ dual_clear.click(fn=lambda: ([], []), outputs=[dual_bot_a, dual_bot_b])
1116
+
1117
+
1118
+ if __name__ == "__main__":
1119
+ _debug = os.environ.get("GRADIO_DEBUG", "0") == "1"
1120
+ # CorrecciΓ³n Gradio 6.0: css y theme ahora se pasan en launch()
1121
+ demo.launch(
1122
+ debug=_debug,
1123
+ css=CSS,
1124
+ theme=gr.themes.Base()
1125
+ )
models_data.py ADDED
@@ -0,0 +1,338 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+
3
+ # ─── Model Registry ───────────────────────────────────────────────────────────
4
+ # loader_type:
5
+ # "multimodal" β†’ AutoModelForMultimodalLM + AutoProcessor (Gemma 4)
6
+ # "vision_causal" β†’ AutoModelForCausalLM + AutoProcessor (Gemma 3 vision)
7
+ # "causal" β†’ AutoModelForCausalLM + AutoTokenizer (text-only)
8
+
9
+ MODELS = {
10
+ # ── Gemma 4 ───────────────────────────────────────────────────────────────
11
+ "google/gemma-4-E2B-it": {
12
+ "name": "Gemma 4 E2B",
13
+ "short": "E2B",
14
+ "family": "Gemma 4",
15
+ "family_color": "#1a73e8",
16
+ "params": "2.3B active / 5.1B total",
17
+ "params_short": "2.3B",
18
+ "context": "128K",
19
+ "context_k": 128,
20
+ "gpu_size": "large",
21
+ "supports_vision": True,
22
+ "loader_type": "multimodal",
23
+ "torch_dtype": torch.bfloat16,
24
+ "description": "Most compact Gemma 4. PLE architecture with image support. Fast and efficient.",
25
+ "release_year": 2026,
26
+ "license": "Apache 2.0",
27
+ "license_open": True,
28
+ "vram": "~10 GB",
29
+ "lmarena": None,
30
+ "architecture": "Transformer + PLE",
31
+ "badge": "NEW",
32
+ },
33
+ "google/gemma-4-E4B-it": {
34
+ "name": "Gemma 4 E4B",
35
+ "short": "E4B",
36
+ "family": "Gemma 4",
37
+ "family_color": "#1a73e8",
38
+ "params": "4.5B active / 8B total",
39
+ "params_short": "4.5B",
40
+ "context": "128K",
41
+ "context_k": 128,
42
+ "gpu_size": "large",
43
+ "supports_vision": True,
44
+ "loader_type": "multimodal",
45
+ "torch_dtype": torch.bfloat16,
46
+ "description": "Greater capacity with PLE and Shared KV Cache. Image + text. Great balance.",
47
+ "release_year": 2026,
48
+ "license": "Apache 2.0",
49
+ "license_open": True,
50
+ "vram": "~16 GB",
51
+ "lmarena": None,
52
+ "architecture": "Transformer + PLE",
53
+ "badge": "NEW",
54
+ },
55
+ "google/gemma-4-26B-A4B-it": {
56
+ "name": "Gemma 4 26B MoE",
57
+ "short": "26B MoE",
58
+ "family": "Gemma 4",
59
+ "family_color": "#1a73e8",
60
+ "params": "4B active / 26B total",
61
+ "params_short": "26B MoE",
62
+ "context": "256K",
63
+ "context_k": 256,
64
+ "gpu_size": "large",
65
+ "supports_vision": True,
66
+ "loader_type": "multimodal",
67
+ "torch_dtype": torch.bfloat16,
68
+ "description": "Mixture-of-Experts with only 4B active parameters. LMArena ~1441. Image + text.",
69
+ "release_year": 2026,
70
+ "license": "Apache 2.0",
71
+ "license_open": True,
72
+ "vram": "~52 GB",
73
+ "lmarena": 1441,
74
+ "architecture": "MoE Transformer",
75
+ "badge": "NEW",
76
+ },
77
+ "google/gemma-4-31B-it": {
78
+ "name": "Gemma 4 31B",
79
+ "short": "31B",
80
+ "family": "Gemma 4",
81
+ "family_color": "#1a73e8",
82
+ "params": "31B parameters",
83
+ "params_short": "31B",
84
+ "context": "256K",
85
+ "context_k": 256,
86
+ "gpu_size": "xlarge",
87
+ "supports_vision": True,
88
+ "loader_type": "multimodal",
89
+ "torch_dtype": torch.bfloat16,
90
+ "description": "Most powerful Gemma 4. Dense Transformer. LMArena ~1452. On par with models 30Γ— larger.",
91
+ "release_year": 2026,
92
+ "license": "Apache 2.0",
93
+ "license_open": True,
94
+ "vram": "~62 GB",
95
+ "lmarena": 1452,
96
+ "architecture": "Dense Transformer",
97
+ "badge": "FLAGSHIP",
98
+ },
99
+ # ── Gemma 3 ───────────────────────────────────────────────────────────────
100
+ "google/gemma-3-1b-it": {
101
+ "name": "Gemma 3 1B",
102
+ "short": "1B",
103
+ "family": "Gemma 3",
104
+ "family_color": "#137333",
105
+ "params": "1B parameters",
106
+ "params_short": "1B",
107
+ "context": "32K",
108
+ "context_k": 32,
109
+ "gpu_size": "large",
110
+ "supports_vision": False,
111
+ "loader_type": "causal",
112
+ "torch_dtype": torch.bfloat16,
113
+ "description": "Ultra-lightweight. Ideal for edge devices and low-latency tasks. Text only.",
114
+ "release_year": 2025,
115
+ "license": "Gemma",
116
+ "license_open": False,
117
+ "vram": "~2 GB",
118
+ "lmarena": None,
119
+ "architecture": "Transformer",
120
+ "badge": None,
121
+ },
122
+ "google/gemma-3-4b-it": {
123
+ "name": "Gemma 3 4B",
124
+ "short": "4B",
125
+ "family": "Gemma 3",
126
+ "family_color": "#137333",
127
+ "params": "4B parameters",
128
+ "params_short": "4B",
129
+ "context": "128K",
130
+ "context_k": 128,
131
+ "gpu_size": "large",
132
+ "supports_vision": True,
133
+ "loader_type": "vision_causal",
134
+ "torch_dtype": torch.bfloat16,
135
+ "description": "Perfect balance between size and capability. Image + text. 128K context.",
136
+ "release_year": 2025,
137
+ "license": "Gemma",
138
+ "license_open": False,
139
+ "vram": "~8 GB",
140
+ "lmarena": None,
141
+ "architecture": "Transformer",
142
+ "badge": None,
143
+ },
144
+ "google/gemma-3-12b-it": {
145
+ "name": "Gemma 3 12B",
146
+ "short": "12B",
147
+ "family": "Gemma 3",
148
+ "family_color": "#137333",
149
+ "params": "12B parameters",
150
+ "params_short": "12B",
151
+ "context": "128K",
152
+ "context_k": 128,
153
+ "gpu_size": "large",
154
+ "supports_vision": True,
155
+ "loader_type": "vision_causal",
156
+ "torch_dtype": torch.bfloat16,
157
+ "description": "High-capacity multimodal. Complex reasoning and image analysis.",
158
+ "release_year": 2025,
159
+ "license": "Gemma",
160
+ "license_open": False,
161
+ "vram": "~24 GB",
162
+ "lmarena": None,
163
+ "architecture": "Transformer",
164
+ "badge": None,
165
+ },
166
+ "google/gemma-3-27b-it": {
167
+ "name": "Gemma 3 27B",
168
+ "short": "27B",
169
+ "family": "Gemma 3",
170
+ "family_color": "#137333",
171
+ "params": "27B parameters",
172
+ "params_short": "27B",
173
+ "context": "128K",
174
+ "context_k": 128,
175
+ "gpu_size": "large",
176
+ "supports_vision": True,
177
+ "loader_type": "vision_causal",
178
+ "torch_dtype": torch.bfloat16,
179
+ "description": "Most capable Gemma 3. Advanced vision and high-level reasoning.",
180
+ "release_year": 2025,
181
+ "license": "Gemma",
182
+ "license_open": False,
183
+ "vram": "~54 GB",
184
+ "lmarena": None,
185
+ "architecture": "Transformer",
186
+ "badge": None,
187
+ },
188
+ # ── Gemma 2 ───────────────────────────────────────────────────────────────
189
+ "google/gemma-2-2b-it": {
190
+ "name": "Gemma 2 2B",
191
+ "short": "2B",
192
+ "family": "Gemma 2",
193
+ "family_color": "#e37400",
194
+ "params": "2B parameters",
195
+ "params_short": "2B",
196
+ "context": "8K",
197
+ "context_k": 8,
198
+ "gpu_size": "large",
199
+ "supports_vision": False,
200
+ "loader_type": "causal",
201
+ "torch_dtype": torch.bfloat16,
202
+ "description": "Fast and efficient. Sliding Window Attention. Text only.",
203
+ "release_year": 2024,
204
+ "license": "Gemma",
205
+ "license_open": False,
206
+ "vram": "~4 GB",
207
+ "lmarena": None,
208
+ "architecture": "Sliding Window Attn",
209
+ "badge": None,
210
+ },
211
+ "google/gemma-2-9b-it": {
212
+ "name": "Gemma 2 9B",
213
+ "short": "9B",
214
+ "family": "Gemma 2",
215
+ "family_color": "#e37400",
216
+ "params": "9B parameters",
217
+ "params_short": "9B",
218
+ "context": "8K",
219
+ "context_k": 8,
220
+ "gpu_size": "large",
221
+ "supports_vision": False,
222
+ "loader_type": "causal",
223
+ "torch_dtype": torch.bfloat16,
224
+ "description": "Solid text performance. Efficient architecture with sliding window.",
225
+ "release_year": 2024,
226
+ "license": "Gemma",
227
+ "license_open": False,
228
+ "vram": "~18 GB",
229
+ "lmarena": None,
230
+ "architecture": "Sliding Window Attn",
231
+ "badge": None,
232
+ },
233
+ "google/gemma-2-27b-it": {
234
+ "name": "Gemma 2 27B",
235
+ "short": "27B",
236
+ "family": "Gemma 2",
237
+ "family_color": "#e37400",
238
+ "params": "27B parameters",
239
+ "params_short": "27B",
240
+ "context": "8K",
241
+ "context_k": 8,
242
+ "gpu_size": "large",
243
+ "supports_vision": False,
244
+ "loader_type": "causal",
245
+ "torch_dtype": torch.bfloat16,
246
+ "description": "Largest Gemma 2. High performance on complex text tasks.",
247
+ "release_year": 2024,
248
+ "license": "Gemma",
249
+ "license_open": False,
250
+ "vram": "~54 GB",
251
+ "lmarena": None,
252
+ "architecture": "Sliding Window Attn",
253
+ "badge": None,
254
+ },
255
+ # ── Gemma 1 ───────────────────────────────────────────────────────────────
256
+ "google/gemma-1.1-2b-it": {
257
+ "name": "Gemma 1.1 2B",
258
+ "short": "2B",
259
+ "family": "Gemma 1",
260
+ "family_color": "#c5221f",
261
+ "params": "2B parameters",
262
+ "params_short": "2B",
263
+ "context": "8K",
264
+ "context_k": 8,
265
+ "gpu_size": "large",
266
+ "supports_vision": False,
267
+ "loader_type": "causal",
268
+ "torch_dtype": torch.float16,
269
+ "description": "The original foundation model. Where it all began. Text only.",
270
+ "release_year": 2024,
271
+ "license": "Gemma",
272
+ "license_open": False,
273
+ "vram": "~4 GB",
274
+ "lmarena": None,
275
+ "architecture": "Transformer",
276
+ "badge": None,
277
+ },
278
+ "google/gemma-1.1-7b-it": {
279
+ "name": "Gemma 1.1 7B",
280
+ "short": "7B",
281
+ "family": "Gemma 1",
282
+ "family_color": "#c5221f",
283
+ "params": "7B parameters",
284
+ "params_short": "7B",
285
+ "context": "8K",
286
+ "context_k": 8,
287
+ "gpu_size": "large",
288
+ "supports_vision": False,
289
+ "loader_type": "causal",
290
+ "torch_dtype": torch.float16,
291
+ "description": "The original 7B. The historical base of the entire Gemma family.",
292
+ "release_year": 2024,
293
+ "license": "Gemma",
294
+ "license_open": False,
295
+ "vram": "~14 GB",
296
+ "lmarena": None,
297
+ "architecture": "Transformer",
298
+ "badge": None,
299
+ },
300
+ }
301
+
302
+ FAMILIES = {
303
+ "Gemma 4": {
304
+ "color": "#1a73e8",
305
+ "bg": "#e8f0fe",
306
+ "year": 2026,
307
+ "description": "The newest generation. Full multimodal (image + text). Apache 2.0. Just launched!",
308
+ "icon": "✦",
309
+ "new": True,
310
+ },
311
+ "Gemma 3": {
312
+ "color": "#137333",
313
+ "bg": "#e6f4ea",
314
+ "year": 2025,
315
+ "description": "Second generation with vision. Long contexts up to 128K tokens.",
316
+ "icon": "β—†",
317
+ "new": False,
318
+ },
319
+ "Gemma 2": {
320
+ "color": "#e37400",
321
+ "bg": "#fef7e0",
322
+ "year": 2024,
323
+ "description": "Optimized for text with Sliding Window Attention. Efficient and fast.",
324
+ "icon": "●",
325
+ "new": False,
326
+ },
327
+ "Gemma 1": {
328
+ "color": "#c5221f",
329
+ "bg": "#fce8e6",
330
+ "year": 2024,
331
+ "description": "The original foundation models from Google DeepMind.",
332
+ "icon": "β—‰",
333
+ "new": False,
334
+ },
335
+ }
336
+
337
+ def get_models_by_family(family: str):
338
+ return {k: v for k, v in MODELS.items() if v["family"] == family}
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ gradio>=4.44.0
2
+ transformers>=4.51.0
3
+ torch>=2.3.0
4
+ torchvision>=0.18.0
5
+ accelerate>=0.30.0
6
+ Pillow>=10.0.0
7
+ sentencepiece>=0.2.0
8
+ huggingface_hub>=0.23.0