Spaces:
Sleeping
Sleeping
Upload 3 files
Browse files- app.py +1125 -0
- models_data.py +338 -0
- requirements.txt +8 -0
app.py
ADDED
|
@@ -0,0 +1,1125 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Gemma Explorer β Gradio app to explore and chat with the full Gemma model family.
|
| 3 |
+
Powered by Google DeepMind models on ZeroGPU (NVIDIA H200).
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import os
|
| 7 |
+
import gc
|
| 8 |
+
import json
|
| 9 |
+
import datetime
|
| 10 |
+
import html as _html
|
| 11 |
+
import gradio as gr
|
| 12 |
+
import torch
|
| 13 |
+
import spaces
|
| 14 |
+
from transformers import (
|
| 15 |
+
AutoTokenizer,
|
| 16 |
+
AutoProcessor,
|
| 17 |
+
AutoModelForCausalLM,
|
| 18 |
+
TextIteratorStreamer,
|
| 19 |
+
)
|
| 20 |
+
from threading import Thread
|
| 21 |
+
from PIL import Image
|
| 22 |
+
import numpy as np
|
| 23 |
+
from models_data import MODELS, FAMILIES, get_models_by_family
|
| 24 |
+
|
| 25 |
+
_HF_TOKEN = os.environ.get("HF_TOKEN")
|
| 26 |
+
if _HF_TOKEN:
|
| 27 |
+
from huggingface_hub import login
|
| 28 |
+
# Se agrega add_to_git_credential=False para evitar advertencias de sobreescritura de token
|
| 29 |
+
login(token=_HF_TOKEN, add_to_git_credential=False)
|
| 30 |
+
|
| 31 |
+
try:
|
| 32 |
+
import torchvision # noqa: F401
|
| 33 |
+
except ImportError:
|
| 34 |
+
import subprocess, sys
|
| 35 |
+
subprocess.run(
|
| 36 |
+
[sys.executable, "-m", "pip", "install", "torchvision", "-q", "--no-input"],
|
| 37 |
+
check=True,
|
| 38 |
+
)
|
| 39 |
+
|
| 40 |
+
try:
|
| 41 |
+
from transformers import AutoModelForMultimodalLM
|
| 42 |
+
_HAS_MULTIMODAL = True
|
| 43 |
+
except ImportError:
|
| 44 |
+
_HAS_MULTIMODAL = False
|
| 45 |
+
AutoModelForMultimodalLM = None
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
# ββ Model state βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 49 |
+
|
| 50 |
+
_model = None
|
| 51 |
+
_processor = None
|
| 52 |
+
_current_id = None
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
def _purge_model():
|
| 56 |
+
"""Unload current model from CPU memory. Never touches CUDA directly β
|
| 57 |
+
CUDA cleanup happens inside @spaces.GPU contexts only."""
|
| 58 |
+
global _model, _processor, _current_id
|
| 59 |
+
if _model is not None:
|
| 60 |
+
del _model
|
| 61 |
+
_model = None
|
| 62 |
+
if _processor is not None:
|
| 63 |
+
del _processor
|
| 64 |
+
_processor = None
|
| 65 |
+
_current_id = None
|
| 66 |
+
gc.collect()
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
def _load_weights(model_id: str):
|
| 70 |
+
"""Load model weights. Always purges previous model first, no exceptions."""
|
| 71 |
+
global _model, _processor, _current_id
|
| 72 |
+
|
| 73 |
+
meta = MODELS[model_id]
|
| 74 |
+
loader = meta["loader_type"]
|
| 75 |
+
dtype = meta["torch_dtype"]
|
| 76 |
+
|
| 77 |
+
_purge_model()
|
| 78 |
+
|
| 79 |
+
if loader == "multimodal":
|
| 80 |
+
if not _HAS_MULTIMODAL:
|
| 81 |
+
raise ImportError("AutoModelForMultimodalLM not available. Run: pip install -U transformers")
|
| 82 |
+
_processor = AutoProcessor.from_pretrained(model_id, token=_HF_TOKEN)
|
| 83 |
+
_model = AutoModelForMultimodalLM.from_pretrained(model_id, torch_dtype=dtype, token=_HF_TOKEN)
|
| 84 |
+
elif loader == "vision_causal":
|
| 85 |
+
_processor = AutoProcessor.from_pretrained(model_id, token=_HF_TOKEN)
|
| 86 |
+
_model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=dtype, token=_HF_TOKEN)
|
| 87 |
+
else:
|
| 88 |
+
_processor = AutoTokenizer.from_pretrained(model_id, token=_HF_TOKEN)
|
| 89 |
+
_model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=dtype, token=_HF_TOKEN)
|
| 90 |
+
|
| 91 |
+
_model.eval()
|
| 92 |
+
_current_id = model_id
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
def load_model_stream(model_id: str, card_html_list_len: int):
|
| 96 |
+
"""
|
| 97 |
+
Two-phase generator so the loading notice is *visibly rendered* before
|
| 98 |
+
the blocking from_pretrained() call starts.
|
| 99 |
+
"""
|
| 100 |
+
meta = MODELS[model_id]
|
| 101 |
+
n = card_html_list_len
|
| 102 |
+
|
| 103 |
+
# ββ Phase 1: show loading notice ββββββββββββββββββββββββββββββββββββββββββ
|
| 104 |
+
loading = _loading_html(meta["name"])
|
| 105 |
+
yield (
|
| 106 |
+
gr.update(visible=True, value=loading),
|
| 107 |
+
gr.update(visible=True, value=loading),
|
| 108 |
+
gr.update(),
|
| 109 |
+
gr.update(),
|
| 110 |
+
gr.update(),
|
| 111 |
+
gr.update(),
|
| 112 |
+
gr.update(),
|
| 113 |
+
gr.update(),
|
| 114 |
+
*([gr.update()] * n),
|
| 115 |
+
)
|
| 116 |
+
|
| 117 |
+
# ββ Phase 2: actual load ββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 118 |
+
try:
|
| 119 |
+
_load_weights(model_id)
|
| 120 |
+
status = _make_status_html(meta, state="ready")
|
| 121 |
+
inline = _chat_inline_status(loaded=True, name=meta["name"])
|
| 122 |
+
cards = [
|
| 123 |
+
gr.update(value=_card_html(mid, m, active=(mid == model_id)))
|
| 124 |
+
for mid, m in MODELS.items()
|
| 125 |
+
]
|
| 126 |
+
yield (
|
| 127 |
+
gr.update(visible=False),
|
| 128 |
+
gr.update(visible=False),
|
| 129 |
+
status,
|
| 130 |
+
gr.update(selected="single"),
|
| 131 |
+
gr.update(visible=meta["supports_vision"]),
|
| 132 |
+
gr.update(value=model_id),
|
| 133 |
+
[],
|
| 134 |
+
inline,
|
| 135 |
+
*cards,
|
| 136 |
+
)
|
| 137 |
+
except Exception as exc:
|
| 138 |
+
error = (f'<div class="status-error">Error loading '
|
| 139 |
+
f'<strong>{_html.escape(meta["name"])}</strong>: '
|
| 140 |
+
f'{_html.escape(str(exc))}</div>')
|
| 141 |
+
yield (
|
| 142 |
+
gr.update(visible=False),
|
| 143 |
+
gr.update(visible=False),
|
| 144 |
+
error,
|
| 145 |
+
gr.update(), gr.update(), gr.update(), gr.update(), gr.update(),
|
| 146 |
+
*([gr.update()] * n),
|
| 147 |
+
)
|
| 148 |
+
|
| 149 |
+
|
| 150 |
+
# ββ Inference βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 151 |
+
|
| 152 |
+
def _run_inference(message: str, image, max_new_tokens: int, temperature: float):
|
| 153 |
+
global _model, _processor, _current_id
|
| 154 |
+
|
| 155 |
+
if _model is None or _current_id is None or _processor is None:
|
| 156 |
+
yield "No model loaded. Go to Explore Models and click Load & Chat."
|
| 157 |
+
return
|
| 158 |
+
|
| 159 |
+
if _current_id not in MODELS:
|
| 160 |
+
_purge_model()
|
| 161 |
+
yield "Model state corrupted β please reload the model from Explore Models."
|
| 162 |
+
return
|
| 163 |
+
|
| 164 |
+
torch.cuda.empty_cache()
|
| 165 |
+
|
| 166 |
+
max_new_tokens = max(64, min(int(max_new_tokens), 2048))
|
| 167 |
+
temperature = max(0.0, min(float(temperature), 1.5))
|
| 168 |
+
|
| 169 |
+
meta = MODELS[_current_id]
|
| 170 |
+
loader = meta["loader_type"]
|
| 171 |
+
device = "cuda"
|
| 172 |
+
_model.to(device)
|
| 173 |
+
|
| 174 |
+
pil_image = None
|
| 175 |
+
if image is not None:
|
| 176 |
+
try:
|
| 177 |
+
pil_image = Image.fromarray(image) if isinstance(image, np.ndarray) else image
|
| 178 |
+
except Exception:
|
| 179 |
+
pil_image = None
|
| 180 |
+
|
| 181 |
+
try:
|
| 182 |
+
if loader == "multimodal":
|
| 183 |
+
content = []
|
| 184 |
+
if pil_image is not None:
|
| 185 |
+
content.append({"type": "image", "image": pil_image})
|
| 186 |
+
content.append({"type": "text", "text": message})
|
| 187 |
+
messages = [{"role": "user", "content": content}]
|
| 188 |
+
inputs = _processor.apply_chat_template(
|
| 189 |
+
messages, tokenize=True, return_dict=True,
|
| 190 |
+
return_tensors="pt", add_generation_prompt=True,
|
| 191 |
+
).to(device)
|
| 192 |
+
|
| 193 |
+
elif loader == "vision_causal":
|
| 194 |
+
if pil_image is not None:
|
| 195 |
+
content = [{"type": "image"}, {"type": "text", "text": message}]
|
| 196 |
+
messages = [{"role": "user", "content": content}]
|
| 197 |
+
else:
|
| 198 |
+
messages = [{"role": "user", "content": message}]
|
| 199 |
+
text = _processor.apply_chat_template(messages, add_generation_prompt=True)
|
| 200 |
+
inputs = _processor(text=text, images=pil_image, return_tensors="pt").to(device)
|
| 201 |
+
|
| 202 |
+
else:
|
| 203 |
+
if pil_image is not None:
|
| 204 |
+
message = "[This model does not support images] " + message
|
| 205 |
+
messages = [{"role": "user", "content": message}]
|
| 206 |
+
text = _processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
| 207 |
+
inputs = _processor(text, return_tensors="pt").to(device)
|
| 208 |
+
|
| 209 |
+
except Exception as exc:
|
| 210 |
+
yield f"Error preparing inputs: {exc}"
|
| 211 |
+
return
|
| 212 |
+
|
| 213 |
+
tok = getattr(_processor, "tokenizer", _processor)
|
| 214 |
+
streamer = TextIteratorStreamer(tok, skip_prompt=True, skip_special_tokens=True, timeout=60)
|
| 215 |
+
|
| 216 |
+
allowed_keys = {"input_ids", "attention_mask", "token_type_ids", "pixel_values",
|
| 217 |
+
"image_sizes", "pixel_attention_mask", "image_position_ids",
|
| 218 |
+
"pixel_position_ids", "token_type_ids_for_images"}
|
| 219 |
+
filtered_inputs = {k: v for k, v in inputs.items() if k in allowed_keys}
|
| 220 |
+
|
| 221 |
+
if "image_position_ids" in filtered_inputs:
|
| 222 |
+
ipi = filtered_inputs["image_position_ids"]
|
| 223 |
+
if not isinstance(ipi, torch.Tensor):
|
| 224 |
+
filtered_inputs["image_position_ids"] = torch.tensor(ipi, device=device)
|
| 225 |
+
elif ipi.dtype == torch.bool:
|
| 226 |
+
filtered_inputs["image_position_ids"] = ipi.long()
|
| 227 |
+
|
| 228 |
+
gen_kwargs: dict = {**filtered_inputs, "streamer": streamer, "max_new_tokens": max_new_tokens}
|
| 229 |
+
if temperature > 0.01:
|
| 230 |
+
gen_kwargs.update({"do_sample": True, "temperature": temperature, "top_p": 0.95})
|
| 231 |
+
else:
|
| 232 |
+
gen_kwargs["do_sample"] = False
|
| 233 |
+
|
| 234 |
+
thread = Thread(target=_model.generate, kwargs=gen_kwargs)
|
| 235 |
+
thread.start()
|
| 236 |
+
|
| 237 |
+
partial = ""
|
| 238 |
+
for token in streamer:
|
| 239 |
+
partial += token
|
| 240 |
+
yield partial
|
| 241 |
+
thread.join()
|
| 242 |
+
|
| 243 |
+
|
| 244 |
+
@spaces.GPU(duration=120)
|
| 245 |
+
def infer_large(message, image, max_new_tokens, temperature):
|
| 246 |
+
yield from _run_inference(message, image, max_new_tokens, temperature)
|
| 247 |
+
|
| 248 |
+
|
| 249 |
+
@spaces.GPU(duration=180, size="xlarge")
|
| 250 |
+
def infer_xlarge(message, image, max_new_tokens, temperature):
|
| 251 |
+
yield from _run_inference(message, image, max_new_tokens, temperature)
|
| 252 |
+
|
| 253 |
+
|
| 254 |
+
def respond(message: str, image, max_new_tokens: int, temperature: float, history: list):
|
| 255 |
+
if not message.strip() and image is None:
|
| 256 |
+
yield history, gr.update()
|
| 257 |
+
return
|
| 258 |
+
|
| 259 |
+
if _current_id is None:
|
| 260 |
+
yield history + [
|
| 261 |
+
{"role": "user", "content": message or "[image attached]"},
|
| 262 |
+
{"role": "assistant", "content": "Please load a model first from the **Explore Models** tab."},
|
| 263 |
+
], gr.update()
|
| 264 |
+
return
|
| 265 |
+
|
| 266 |
+
new_history = history + [{"role": "user", "content": message or "[image attached]"}]
|
| 267 |
+
yield new_history, gr.update()
|
| 268 |
+
|
| 269 |
+
meta = MODELS[_current_id]
|
| 270 |
+
infer_fn = infer_xlarge if meta["gpu_size"] == "xlarge" else infer_large
|
| 271 |
+
|
| 272 |
+
partial = ""
|
| 273 |
+
for chunk in infer_fn(message, image, max_new_tokens, temperature):
|
| 274 |
+
partial = chunk
|
| 275 |
+
yield new_history + [{"role": "assistant", "content": partial}], gr.update()
|
| 276 |
+
|
| 277 |
+
yield new_history + [{"role": "assistant", "content": partial}], gr.update(value=None)
|
| 278 |
+
|
| 279 |
+
|
| 280 |
+
def export_chat(history: list) -> str:
|
| 281 |
+
if not history:
|
| 282 |
+
return ""
|
| 283 |
+
model_name = MODELS[_current_id]["name"] if _current_id else "unknown"
|
| 284 |
+
lines = [
|
| 285 |
+
"# Gemma Explorer β Chat Export",
|
| 286 |
+
f"Model: {model_name}",
|
| 287 |
+
f"Date: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M')}",
|
| 288 |
+
"---", "",
|
| 289 |
+
]
|
| 290 |
+
for msg in history:
|
| 291 |
+
role = "**You**" if msg["role"] == "user" else f"**{model_name}**"
|
| 292 |
+
lines.append(f"{role}: {msg['content']}")
|
| 293 |
+
lines.append("")
|
| 294 |
+
return "\n".join(lines)
|
| 295 |
+
|
| 296 |
+
|
| 297 |
+
# ββ Dual-chat inference βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 298 |
+
|
| 299 |
+
def respond_dual(message: str, image,
|
| 300 |
+
max_new_tokens: int, temperature: float,
|
| 301 |
+
sys_a: str, sys_b: str,
|
| 302 |
+
model_a_id: str, model_b_id: str,
|
| 303 |
+
hist_a: list, hist_b: list):
|
| 304 |
+
if not message.strip() and image is None:
|
| 305 |
+
yield hist_a, hist_b
|
| 306 |
+
return
|
| 307 |
+
|
| 308 |
+
user_msg = message or "[image attached]"
|
| 309 |
+
|
| 310 |
+
try:
|
| 311 |
+
_load_weights(model_a_id)
|
| 312 |
+
except Exception as exc:
|
| 313 |
+
_purge_model()
|
| 314 |
+
yield (hist_a + [{"role": "user", "content": user_msg},
|
| 315 |
+
{"role": "assistant", "content": f"Failed to load {model_a_id}: {exc}"}],
|
| 316 |
+
hist_b)
|
| 317 |
+
return
|
| 318 |
+
|
| 319 |
+
new_hist_a = hist_a + [{"role": "user", "content": user_msg}]
|
| 320 |
+
yield new_hist_a, hist_b
|
| 321 |
+
|
| 322 |
+
meta_a = MODELS[model_a_id]
|
| 323 |
+
infer_fn = infer_xlarge if meta_a["gpu_size"] == "xlarge" else infer_large
|
| 324 |
+
full_a = (sys_a.strip() + "\n\n" + message) if sys_a.strip() else message
|
| 325 |
+
partial_a = ""
|
| 326 |
+
for chunk in infer_fn(full_a, image, max_new_tokens, temperature):
|
| 327 |
+
partial_a = chunk
|
| 328 |
+
yield new_hist_a + [{"role": "assistant", "content": partial_a}], hist_b
|
| 329 |
+
hist_a = new_hist_a + [{"role": "assistant", "content": partial_a}]
|
| 330 |
+
|
| 331 |
+
try:
|
| 332 |
+
_load_weights(model_b_id)
|
| 333 |
+
except Exception as exc:
|
| 334 |
+
_purge_model()
|
| 335 |
+
yield (hist_a,
|
| 336 |
+
hist_b + [{"role": "user", "content": user_msg},
|
| 337 |
+
{"role": "assistant", "content": f"Failed to load {model_b_id}: {exc}"}])
|
| 338 |
+
return
|
| 339 |
+
|
| 340 |
+
new_hist_b = hist_b + [{"role": "user", "content": user_msg}]
|
| 341 |
+
yield hist_a, new_hist_b
|
| 342 |
+
|
| 343 |
+
meta_b = MODELS[model_b_id]
|
| 344 |
+
infer_fn = infer_xlarge if meta_b["gpu_size"] == "xlarge" else infer_large
|
| 345 |
+
full_b = (sys_b.strip() + "\n\n" + message) if sys_b.strip() else message
|
| 346 |
+
partial_b = ""
|
| 347 |
+
for chunk in infer_fn(full_b, image, max_new_tokens, temperature):
|
| 348 |
+
partial_b = chunk
|
| 349 |
+
yield hist_a, new_hist_b + [{"role": "assistant", "content": partial_b}]
|
| 350 |
+
yield hist_a, new_hist_b + [{"role": "assistant", "content": partial_b}]
|
| 351 |
+
|
| 352 |
+
|
| 353 |
+
# ββ CSS βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 354 |
+
|
| 355 |
+
CSS = """
|
| 356 |
+
@import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&family=Space+Grotesk:wght@400;500;600;700&display=swap');
|
| 357 |
+
|
| 358 |
+
:root {
|
| 359 |
+
--bg: #080d18;
|
| 360 |
+
--surface: #0f1523;
|
| 361 |
+
--surface2: #161e30;
|
| 362 |
+
--border: rgba(80,110,220,0.16);
|
| 363 |
+
--blue: #4f7ef8;
|
| 364 |
+
--blue-dim: #3a5fc4;
|
| 365 |
+
--purple: #8b72f0;
|
| 366 |
+
--purple-dim: #6a55cc;
|
| 367 |
+
--text: #dde4f4;
|
| 368 |
+
--text-dim: #7a86a8;
|
| 369 |
+
--green: #34d399;
|
| 370 |
+
--red: #f87171;
|
| 371 |
+
--amber: #fbbf24;
|
| 372 |
+
--radius: 13px;
|
| 373 |
+
}
|
| 374 |
+
|
| 375 |
+
*, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; }
|
| 376 |
+
|
| 377 |
+
.gradio-container {
|
| 378 |
+
font-family: 'Inter', sans-serif !important;
|
| 379 |
+
background: var(--bg) !important;
|
| 380 |
+
max-width: 1420px !important;
|
| 381 |
+
color: var(--text) !important;
|
| 382 |
+
}
|
| 383 |
+
|
| 384 |
+
footer, .gr-prose { display: none !important; }
|
| 385 |
+
|
| 386 |
+
.gemma-hero {
|
| 387 |
+
position: relative;
|
| 388 |
+
background: linear-gradient(150deg, #06091566 0%, #0b1225 60%, #0d1530 100%);
|
| 389 |
+
border-radius: 18px; padding: 42px 52px; margin-bottom: 6px;
|
| 390 |
+
overflow: hidden; border: 1px solid var(--border);
|
| 391 |
+
}
|
| 392 |
+
.hero-bg { position: absolute; inset: 0; pointer-events: none; z-index: 0; overflow: hidden; }
|
| 393 |
+
.hero-grid {
|
| 394 |
+
position: absolute; inset: 0;
|
| 395 |
+
background-image:
|
| 396 |
+
linear-gradient(rgba(79,126,248,0.055) 1px, transparent 1px),
|
| 397 |
+
linear-gradient(90deg, rgba(79,126,248,0.055) 1px, transparent 1px);
|
| 398 |
+
background-size: 44px 44px;
|
| 399 |
+
animation: grid-drift 22s linear infinite;
|
| 400 |
+
}
|
| 401 |
+
@keyframes grid-drift { from { background-position: 0 0; } to { background-position: 44px 44px; } }
|
| 402 |
+
.orb {
|
| 403 |
+
position: absolute; border-radius: 50%;
|
| 404 |
+
filter: blur(70px); opacity: 0.16;
|
| 405 |
+
animation: float-orb var(--dur, 13s) ease-in-out infinite var(--delay, 0s);
|
| 406 |
+
}
|
| 407 |
+
.orb-1 { width: 400px; height: 400px; background: var(--blue); top: -130px; right: -60px; --dur:15s; --delay:0s; }
|
| 408 |
+
.orb-2 { width: 280px; height: 280px; background: var(--purple); bottom: -90px; left: 12%; --dur:12s; --delay:-4s; }
|
| 409 |
+
.orb-3 { width: 200px; height: 200px; background: #60a5fa; top: 35%; left: 52%; --dur:18s; --delay:-7s; }
|
| 410 |
+
@keyframes float-orb {
|
| 411 |
+
0%,100% { transform: translate(0,0) scale(1); }
|
| 412 |
+
33% { transform: translate(16px,-20px) scale(1.05); }
|
| 413 |
+
66% { transform: translate(-10px,12px) scale(0.96); }
|
| 414 |
+
}
|
| 415 |
+
.hero-stars { position: absolute; inset: 0; }
|
| 416 |
+
.star {
|
| 417 |
+
position: absolute; width: 2px; height: 2px;
|
| 418 |
+
background: #fff; border-radius: 50%; opacity: 0;
|
| 419 |
+
animation: twinkle var(--dur,3s) ease-in-out infinite var(--delay,0s);
|
| 420 |
+
}
|
| 421 |
+
@keyframes twinkle {
|
| 422 |
+
0%,100% { opacity:0; transform:scale(.8); }
|
| 423 |
+
50% { opacity:.65; transform:scale(1.3); }
|
| 424 |
+
}
|
| 425 |
+
.hero-inner { position: relative; z-index: 1; }
|
| 426 |
+
.hero-top-bar { display: flex; align-items: center; margin-bottom: 22px; }
|
| 427 |
+
.hero-eyebrow {
|
| 428 |
+
display: inline-flex; align-items: center; gap: 9px;
|
| 429 |
+
background: rgba(79,126,248,0.1); border: 1px solid rgba(79,126,248,0.28);
|
| 430 |
+
border-radius: 20px; padding: 5px 16px;
|
| 431 |
+
font-size: 11px; font-weight: 600; color: #7aabf8; letter-spacing: 1px; text-transform: uppercase;
|
| 432 |
+
}
|
| 433 |
+
.hero-dot-pulse {
|
| 434 |
+
width: 7px; height: 7px; border-radius: 50%; background: var(--green);
|
| 435 |
+
animation: pulse-ring 2s ease-out infinite;
|
| 436 |
+
}
|
| 437 |
+
@keyframes pulse-ring {
|
| 438 |
+
0% { box-shadow: 0 0 0 0 rgba(52,211,153,.5); }
|
| 439 |
+
70% { box-shadow: 0 0 0 8px rgba(52,211,153,0); }
|
| 440 |
+
100% { box-shadow: 0 0 0 0 rgba(52,211,153,0); }
|
| 441 |
+
}
|
| 442 |
+
.hero-title {
|
| 443 |
+
font-family: 'Space Grotesk', sans-serif;
|
| 444 |
+
font-size: 54px; font-weight: 700; color: #fff;
|
| 445 |
+
line-height: 1.05; margin: 0 0 14px; letter-spacing: -2px;
|
| 446 |
+
}
|
| 447 |
+
.hero-title span {
|
| 448 |
+
background: linear-gradient(120deg, var(--blue) 20%, var(--purple) 80%);
|
| 449 |
+
-webkit-background-clip: text; -webkit-text-fill-color: transparent; background-clip: text;
|
| 450 |
+
}
|
| 451 |
+
.hero-subtitle {
|
| 452 |
+
font-size: 15px; color: rgba(221,228,244,0.55);
|
| 453 |
+
margin: 0 0 28px; max-width: 530px; line-height: 1.7; font-weight: 400;
|
| 454 |
+
}
|
| 455 |
+
.hero-chips { display: flex; flex-wrap: wrap; gap: 7px; }
|
| 456 |
+
.hero-chip {
|
| 457 |
+
background: rgba(255,255,255,0.04); border: 1px solid rgba(255,255,255,0.09);
|
| 458 |
+
border-radius: 7px; padding: 5px 12px;
|
| 459 |
+
font-size: 12px; color: rgba(221,228,244,0.7); font-weight: 500;
|
| 460 |
+
}
|
| 461 |
+
.hero-chip strong { color: #fff; }
|
| 462 |
+
.hero-legend { display: flex; flex-wrap: wrap; gap: 14px; margin-top: 16px; }
|
| 463 |
+
.legend-dot { display: inline-flex; align-items: center; gap: 6px; font-size: 11px; color: rgba(221,228,244,0.5); font-weight: 500; }
|
| 464 |
+
.legend-swatch { width: 9px; height: 9px; border-radius: 3px; flex-shrink: 0; }
|
| 465 |
+
|
| 466 |
+
.tab-nav { background: transparent !important; border-bottom: 1px solid var(--border) !important; gap: 2px !important; padding: 0 4px !important; margin-top: 6px !important; }
|
| 467 |
+
.tab-nav button { font-family: 'Inter', sans-serif !important; font-size: 13px !important; font-weight: 500 !important; border-radius: 8px 8px 0 0 !important; padding: 10px 22px !important; color: var(--text-dim) !important; border: none !important; background: transparent !important; transition: all 0.2s !important; letter-spacing: 0.2px !important; }
|
| 468 |
+
.tab-nav button:hover { color: var(--text) !important; background: rgba(79,126,248,0.07) !important; }
|
| 469 |
+
.tab-nav button.selected { color: var(--blue) !important; background: rgba(79,126,248,0.08) !important; border-bottom: 2px solid var(--blue) !important; }
|
| 470 |
+
|
| 471 |
+
.status-bar {
|
| 472 |
+
display: flex; align-items: center; justify-content: space-between; flex-wrap: wrap;
|
| 473 |
+
background: var(--surface); border: 1px solid var(--border);
|
| 474 |
+
border-left: 3px solid var(--accent, var(--blue));
|
| 475 |
+
border-radius: var(--radius); padding: 11px 18px; gap: 10px; margin-bottom: 14px;
|
| 476 |
+
}
|
| 477 |
+
.status-left, .status-right { display: flex; align-items: center; gap: 8px; flex-wrap: wrap; }
|
| 478 |
+
.status-dot { width: 8px; height: 8px; border-radius: 50%; flex-shrink: 0; }
|
| 479 |
+
.dot-ready { background: var(--green); box-shadow: 0 0 6px rgba(52,211,153,.5); }
|
| 480 |
+
.dot-loading { background: var(--amber); animation: pulse-ring 1.5s ease-out infinite; }
|
| 481 |
+
.status-name { font-family: 'Space Grotesk', sans-serif; font-size: 14px; font-weight: 600; }
|
| 482 |
+
.status-chip { font-size: 11px; font-weight: 500; padding: 2px 9px; border-radius: 5px; background: rgba(255,255,255,0.05); border: 1px solid var(--border); color: var(--text-dim); }
|
| 483 |
+
.chip-vision { background: rgba(79,126,248,0.13); border-color: rgba(79,126,248,0.28); color: #8ab4f8; }
|
| 484 |
+
.chip-text { background: rgba(122,134,168,0.08); border-color: var(--border); color: var(--text-dim); }
|
| 485 |
+
.status-ok { font-size: 12px; color: var(--green); font-weight: 500; }
|
| 486 |
+
.status-error { background: rgba(248,113,113,0.08); border: 1px solid rgba(248,113,113,0.22); color: var(--red); border-radius: 10px; padding: 12px 16px; font-size: 13px; }
|
| 487 |
+
.status-empty { font-size: 13px; color: var(--text-dim); padding: 10px 0; font-style: italic; }
|
| 488 |
+
|
| 489 |
+
.loading-notice { display: flex; align-items: center; gap: 12px; background: rgba(79,126,248,0.07); border: 1px solid rgba(79,126,248,0.22); border-radius: var(--radius); padding: 13px 18px; margin-bottom: 14px; color: #8ab4f8; font-size: 13px; font-weight: 500; animation: notice-pulse 2s ease-in-out infinite; }
|
| 490 |
+
.notice-spinner { width: 15px; height: 15px; border: 2px solid rgba(79,126,248,0.25); border-top-color: var(--blue); border-radius: 50%; animation: spin .75s linear infinite; flex-shrink: 0; }
|
| 491 |
+
@keyframes spin { to { transform: rotate(360deg); } }
|
| 492 |
+
@keyframes notice-pulse { 0%,100% { border-color: rgba(79,126,248,0.22); } 50% { border-color: rgba(79,126,248,0.5); } }
|
| 493 |
+
|
| 494 |
+
.family-header { display: flex; align-items: center; gap: 14px; padding: 15px 20px; border-radius: var(--radius); margin-bottom: 12px; background: var(--surface); border: 1px solid var(--border); }
|
| 495 |
+
.family-icon { font-family: 'Space Grotesk', sans-serif; font-size: 17px; font-weight: 700; width: 40px; height: 40px; display: flex; align-items: center; justify-content: center; border-radius: 10px; color: #fff; flex-shrink: 0; }
|
| 496 |
+
.family-text h3 { font-family: 'Space Grotesk', sans-serif; font-size: 15.5px; font-weight: 600; margin: 0 0 2px; color: var(--text); }
|
| 497 |
+
.family-text p { font-size: 12px; color: var(--text-dim); margin: 0; }
|
| 498 |
+
.family-year { margin-left: auto; font-size: 11px; color: var(--text-dim); font-weight: 500; opacity: 0.7; }
|
| 499 |
+
.family-new-badge { margin-left: 8px; background: linear-gradient(120deg, var(--blue), var(--purple)); color: #fff; border-radius: 7px; padding: 3px 10px; font-size: 10px; font-weight: 700; letter-spacing: 0.6px; text-transform: uppercase; }
|
| 500 |
+
|
| 501 |
+
.model-card-wrap { padding: 5px !important; }
|
| 502 |
+
.model-card {
|
| 503 |
+
background: var(--surface); border: 1px solid var(--border);
|
| 504 |
+
border-radius: var(--radius); overflow: hidden; height: 100%;
|
| 505 |
+
display: flex; flex-direction: column;
|
| 506 |
+
transition: border-color 0.25s, transform 0.25s, box-shadow 0.25s;
|
| 507 |
+
min-height: 255px;
|
| 508 |
+
}
|
| 509 |
+
.model-card:hover { border-color: rgba(79,126,248,0.42); transform: translateY(-3px); box-shadow: 0 10px 32px rgba(0,0,0,0.4); }
|
| 510 |
+
.model-card.card-active { border-color: var(--green) !important; box-shadow: 0 0 0 1px rgba(52,211,153,0.18), 0 8px 28px rgba(0,0,0,0.35) !important; }
|
| 511 |
+
.card-active-badge { font-size: 9px; font-weight: 700; padding: 2px 8px; border-radius: 5px; background: rgba(52,211,153,0.13); color: var(--green); border: 1px solid rgba(52,211,153,0.28); text-transform: uppercase; letter-spacing: 0.5px; flex-shrink: 0; }
|
| 512 |
+
.card-accent { height: 3px; width: 100%; flex-shrink: 0; }
|
| 513 |
+
.card-body { padding: 17px 17px 14px; flex: 1; display: flex; flex-direction: column; }
|
| 514 |
+
.card-top { display: flex; align-items: flex-start; justify-content: space-between; margin-bottom: 8px; gap: 6px; flex-wrap: wrap; }
|
| 515 |
+
.card-name { font-family: 'Space Grotesk', sans-serif; font-size: 15.5px; font-weight: 600; color: var(--text); margin: 0; line-height: 1.2; }
|
| 516 |
+
.card-badge { font-size: 9px; font-weight: 700; padding: 2px 7px; border-radius: 5px; text-transform: uppercase; letter-spacing: 0.5px; flex-shrink: 0; }
|
| 517 |
+
.badge-NEW { background: rgba(79,126,248,0.15); color: #8ab4f8; border: 1px solid rgba(79,126,248,0.28); }
|
| 518 |
+
.badge-FLAGSHIP { background: rgba(139,114,240,0.15); color: #baaaf8; border: 1px solid rgba(139,114,240,0.28); }
|
| 519 |
+
.card-desc { font-size: 12px; color: var(--text-dim); line-height: 1.55; margin: 0 0 13px; flex: 1; }
|
| 520 |
+
.card-stats { display: grid; grid-template-columns: repeat(3,1fr); gap: 5px; margin-bottom: 11px; }
|
| 521 |
+
.card-stat { font-size: 10.5px; font-weight: 500; padding: 5px 4px; border-radius: 6px; background: rgba(255,255,255,0.03); color: var(--text-dim); border: 1px solid var(--border); text-align: center; white-space: nowrap; overflow: hidden; text-overflow: ellipsis; }
|
| 522 |
+
.card-tags { display: flex; flex-wrap: wrap; gap: 5px; margin-bottom: 13px; }
|
| 523 |
+
.tag { font-size: 10px; font-weight: 500; padding: 2px 8px; border-radius: 5px; letter-spacing: 0.2px; border: 1px solid transparent; }
|
| 524 |
+
.tag-vision { background: rgba(79,126,248,0.1); color: #8ab4f8; border-color: rgba(79,126,248,0.22); }
|
| 525 |
+
.tag-text { background: rgba(122,134,168,0.07); color: var(--text-dim); border-color: var(--border); }
|
| 526 |
+
.tag-apache { background: rgba(52,211,153,0.09); color: #6ee7b7; border-color: rgba(52,211,153,0.22); }
|
| 527 |
+
.tag-gemma { background: rgba(251,191,36,0.09); color: #fcd34d; border-color: rgba(251,191,36,0.22); }
|
| 528 |
+
.tag-xlarge { background: rgba(139,114,240,0.1); color: #baaaf8; border-color: rgba(139,114,240,0.25); }
|
| 529 |
+
.tag-instruct { background: rgba(167,139,250,0.13); color: #c4b5fd; border-color: rgba(167,139,250,0.3); }
|
| 530 |
+
.tag-base { background: rgba(148,163,184,0.08); color: #94a3b8; border-color: rgba(148,163,184,0.2); }
|
| 531 |
+
|
| 532 |
+
.model-card-wrap { display: flex !important; flex-direction: column !important; }
|
| 533 |
+
.model-card-wrap > * { width: 100% !important; }
|
| 534 |
+
.card-btn { width: 100% !important; margin-top: 6px !important; }
|
| 535 |
+
.card-btn > div { width: 100% !important; }
|
| 536 |
+
.card-btn button { width: 100% !important; background: linear-gradient(120deg, var(--blue), var(--blue-dim)) !important; color: #fff !important; border-radius: 8px !important; font-family: 'Inter', sans-serif !important; font-size: 12.5px !important; font-weight: 500 !important; padding: 9px !important; border: none !important; cursor: pointer !important; letter-spacing: 0.2px !important; transition: opacity 0.2s, transform 0.15s !important; }
|
| 537 |
+
.card-btn button:hover { opacity: 0.85 !important; transform: translateY(-1px) !important; }
|
| 538 |
+
.card-btn button:disabled { opacity: 0.45 !important; cursor: not-allowed !important; transform: none !important; }
|
| 539 |
+
.card-btn-xlarge button { background: linear-gradient(120deg, var(--purple), var(--purple-dim)) !important; }
|
| 540 |
+
|
| 541 |
+
.thinking-wrap {
|
| 542 |
+
display: flex; align-items: center; gap: 10px;
|
| 543 |
+
padding: 8px 14px 10px;
|
| 544 |
+
border-bottom: 1px solid var(--border);
|
| 545 |
+
background: var(--surface2);
|
| 546 |
+
font-size: 12px; color: var(--text-dim); font-style: italic;
|
| 547 |
+
}
|
| 548 |
+
.thinking-dots { display: flex; gap: 5px; align-items: center; }
|
| 549 |
+
.thinking-dots span {
|
| 550 |
+
width: 7px; height: 7px; border-radius: 50%;
|
| 551 |
+
background: var(--blue); opacity: 0.3;
|
| 552 |
+
animation: dot-bounce 1.1s ease-in-out infinite;
|
| 553 |
+
}
|
| 554 |
+
.thinking-dots span:nth-child(2) { animation-delay: 0.18s; }
|
| 555 |
+
.thinking-dots span:nth-child(3) { animation-delay: 0.36s; }
|
| 556 |
+
@keyframes dot-bounce {
|
| 557 |
+
0%, 100% { opacity: 0.2; transform: translateY(0px); }
|
| 558 |
+
50% { opacity: 1; transform: translateY(-5px); }
|
| 559 |
+
}
|
| 560 |
+
|
| 561 |
+
.dual-loading-notice {
|
| 562 |
+
display: flex; align-items: flex-start; gap: 12px;
|
| 563 |
+
background: rgba(79,126,248,0.07); border: 1px solid rgba(79,126,248,0.28);
|
| 564 |
+
border-radius: var(--radius); padding: 13px 18px; margin-bottom: 12px;
|
| 565 |
+
animation: notice-pulse 2s ease-in-out infinite;
|
| 566 |
+
}
|
| 567 |
+
.dual-loading-notice .notice-spinner { margin-top: 2px; }
|
| 568 |
+
.dual-loading-body { display: flex; flex-direction: column; gap: 3px; }
|
| 569 |
+
.dual-loading-title { font-size: 13px; font-weight: 600; color: #8ab4f8; }
|
| 570 |
+
.dual-loading-sub { font-size: 11px; color: var(--text-dim); }
|
| 571 |
+
|
| 572 |
+
.chat-panel { background: var(--surface); border: 1px solid var(--border); border-radius: var(--radius); overflow: hidden; }
|
| 573 |
+
.gradio-chatbot { background: var(--surface) !important; border: none !important; color: var(--text) !important; }
|
| 574 |
+
|
| 575 |
+
.settings-panel { background: var(--surface); border: 1px solid var(--border); border-radius: var(--radius); padding: 18px; }
|
| 576 |
+
.zerogpu-notice { background: rgba(245,158,11,0.08); border: 1px solid rgba(245,158,11,0.25); color: #fcd34d; border-radius: 8px; padding: 12px 14px; font-size: 11px; margin-bottom: 16px; line-height: 1.6; font-weight: 500; }
|
| 577 |
+
.zgn-title { font-weight: 700; font-size: 11.5px; margin-bottom: 3px; }
|
| 578 |
+
.zgn-divider { border-top: 1px solid rgba(245,158,11,0.2); margin: 9px 0; }
|
| 579 |
+
.zerogpu-notice strong { color: #fde68a; }
|
| 580 |
+
.settings-title { font-family: 'Space Grotesk', sans-serif; font-size: 10.5px; font-weight: 700; color: var(--text-dim); text-transform: uppercase; letter-spacing: 1.2px; margin-bottom: 14px; padding-bottom: 10px; border-bottom: 1px solid var(--border); }
|
| 581 |
+
.settings-hint { font-size: 11.5px; color: var(--text-dim); line-height: 1.8; margin-top: 14px; padding-top: 14px; border-top: 1px solid var(--border); }
|
| 582 |
+
.settings-hint strong { color: rgba(221,228,244,0.55); font-weight: 500; display: block; margin-bottom: 2px; }
|
| 583 |
+
.system-prompt textarea { font-size: 12px !important; min-height: 72px !important; background: var(--surface2) !important; color: var(--text) !important; border-color: var(--border) !important; border-radius: 8px !important; resize: vertical !important; }
|
| 584 |
+
|
| 585 |
+
.send-btn button { background: linear-gradient(120deg, var(--blue), var(--blue-dim)) !important; color: #fff !important; border-radius: 10px !important; font-family: 'Inter', sans-serif !important; font-weight: 500 !important; padding: 10px 20px !important; border: none !important; letter-spacing: 0.2px !important; transition: opacity 0.2s !important; }
|
| 586 |
+
.send-btn button:hover { opacity: 0.85 !important; }
|
| 587 |
+
.clear-btn button, .export-btn button, .reset-btn button { background: transparent !important; border: 1px solid var(--border) !important; color: var(--text-dim) !important; border-radius: 10px !important; font-family: 'Inter', sans-serif !important; font-weight: 500 !important; font-size: 12.5px !important; width: 100% !important; transition: all 0.2s !important; }
|
| 588 |
+
.clear-btn button:hover { border-color: rgba(248,113,113,0.4) !important; color: var(--red) !important; background: rgba(248,113,113,0.06) !important; }
|
| 589 |
+
.export-btn button:hover { border-color: rgba(79,126,248,0.4) !important; color: var(--blue) !important; background: rgba(79,126,248,0.06) !important; }
|
| 590 |
+
.reset-btn button:hover { border-color: rgba(251,191,36,0.4) !important; color: var(--amber) !important; background: rgba(251,191,36,0.06) !important; }
|
| 591 |
+
|
| 592 |
+
.chat-inline-status { display: flex; align-items: center; gap: 8px; padding: 6px 14px; font-size: 12px; color: var(--text-dim); border-bottom: 1px solid var(--border); background: var(--surface2); min-height: 32px; }
|
| 593 |
+
.csi-dot { width: 7px; height: 7px; border-radius: 50%; flex-shrink: 0; }
|
| 594 |
+
.csi-dot-idle { background: var(--text-dim); opacity: 0.35; }
|
| 595 |
+
.csi-dot-ready { background: var(--green); box-shadow: 0 0 6px rgba(52,211,153,.5); }
|
| 596 |
+
.csi-name { font-weight: 600; color: var(--text); }
|
| 597 |
+
.csi-label { color: var(--green); font-weight: 500; }
|
| 598 |
+
.csi-idle { font-style: italic; }
|
| 599 |
+
|
| 600 |
+
.image-upload-wrap { width: 100% !important; margin-top: 6px !important; }
|
| 601 |
+
.image-upload-wrap > div,
|
| 602 |
+
.image-upload-wrap .wrap { min-height: 260px !important; border-radius: 10px !important; }
|
| 603 |
+
|
| 604 |
+
.dual-panel { background: var(--surface); border: 1px solid var(--border); border-radius: var(--radius); overflow: hidden; }
|
| 605 |
+
.dual-header { display: flex; align-items: center; padding: 10px 14px; background: var(--surface2); border-bottom: 1px solid var(--border); font-size: 12px; font-weight: 600; }
|
| 606 |
+
.dual-label-a { color: var(--blue); }
|
| 607 |
+
.dual-label-b { color: var(--purple); }
|
| 608 |
+
.dual-send-btn button { background: linear-gradient(120deg, var(--blue), var(--purple)) !important; color: #fff !important; border-radius: 10px !important; font-family: 'Inter', sans-serif !important; font-weight: 500 !important; padding: 10px 20px !important; border: none !important; letter-spacing: 0.2px !important; transition: opacity 0.2s !important; }
|
| 609 |
+
.dual-send-btn button:hover { opacity: 0.85 !important; }
|
| 610 |
+
.dual-img-wrap { width: 100% !important; margin-top: 6px !important; }
|
| 611 |
+
.dual-img-wrap > div,
|
| 612 |
+
.dual-img-wrap .wrap { min-height: 140px !important; border-radius: 10px !important; }
|
| 613 |
+
|
| 614 |
+
input, textarea, .gr-input { background: var(--surface2) !important; color: var(--text) !important; border-color: var(--border) !important; border-radius: 10px !important; }
|
| 615 |
+
label { color: var(--text-dim) !important; font-size: 12px !important; }
|
| 616 |
+
.gr-slider input[type=range] { accent-color: var(--blue); }
|
| 617 |
+
.gr-box, .gr-form { background: transparent !important; }
|
| 618 |
+
"""
|
| 619 |
+
|
| 620 |
+
|
| 621 |
+
# ββ Static HTML fragments βββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 622 |
+
|
| 623 |
+
_STAR_COORDS = [
|
| 624 |
+
(8,12),(15,78),(23,45),(31,67),(42,23),(48,89),(55,34),(63,56),
|
| 625 |
+
(71,14),(78,72),(85,41),(92,88),(5,55),(18,33),(27,91),(36,8),
|
| 626 |
+
(44,62),(52,27),(59,79),(67,48),(74,19),(82,64),(89,35),(96,82),
|
| 627 |
+
(11,95),(20,7),(29,53),(38,74),(47,16),(56,39),
|
| 628 |
+
]
|
| 629 |
+
_STARS_HTML = "".join(
|
| 630 |
+
f'<div class="star" style="left:{x}%;top:{y}%;--dur:{2.4+(i%5)*0.7}s;--delay:{-(i%8)*0.6}s"></div>'
|
| 631 |
+
for i,(x,y) in enumerate(_STAR_COORDS)
|
| 632 |
+
)
|
| 633 |
+
|
| 634 |
+
_N_MODELS = len(MODELS)
|
| 635 |
+
_N_FAMILIES = len(FAMILIES)
|
| 636 |
+
|
| 637 |
+
_LEGEND_HTML = "".join(
|
| 638 |
+
f'<span class="legend-dot">'
|
| 639 |
+
f'<span class="legend-swatch" style="background:{info["color"]}"></span>'
|
| 640 |
+
f'{_html.escape(name)}</span>'
|
| 641 |
+
for name, info in FAMILIES.items()
|
| 642 |
+
)
|
| 643 |
+
|
| 644 |
+
_HERO_HTML = f"""
|
| 645 |
+
<div class="gemma-hero">
|
| 646 |
+
<div class="hero-bg">
|
| 647 |
+
<div class="hero-grid"></div>
|
| 648 |
+
<div class="orb orb-1"></div><div class="orb orb-2"></div><div class="orb orb-3"></div>
|
| 649 |
+
<div class="hero-stars">{_STARS_HTML}</div>
|
| 650 |
+
</div>
|
| 651 |
+
<div class="hero-inner">
|
| 652 |
+
<div class="hero-top-bar">
|
| 653 |
+
<div class="hero-eyebrow">
|
| 654 |
+
<span class="hero-dot-pulse"></span>
|
| 655 |
+
Google DeepMind · Open Models
|
| 656 |
+
</div>
|
| 657 |
+
</div>
|
| 658 |
+
<h1 class="hero-title">Gemma <span>Explorer</span></h1>
|
| 659 |
+
<p class="hero-subtitle">Explore, compare, and chat with the full Gemma open model family — from the compact 1B to the powerful 31B multimodal.</p>
|
| 660 |
+
<div class="hero-chips">
|
| 661 |
+
<span class="hero-chip"><strong>{_N_MODELS}</strong> models</span>
|
| 662 |
+
<span class="hero-chip"><strong>{_N_FAMILIES}</strong> generations</span>
|
| 663 |
+
<span class="hero-chip">Vision — Gemma 3 & 4</span>
|
| 664 |
+
<span class="hero-chip">ZeroGPU · NVIDIA H200</span>
|
| 665 |
+
</div>
|
| 666 |
+
<div class="hero-legend">{_LEGEND_HTML}</div>
|
| 667 |
+
</div>
|
| 668 |
+
</div>
|
| 669 |
+
"""
|
| 670 |
+
|
| 671 |
+
_ZEROGPU_NOTICE = """
|
| 672 |
+
<div class="zerogpu-notice">
|
| 673 |
+
<div class="zgn-title">β‘ ZeroGPU Latency</div>
|
| 674 |
+
GPU allocation happens on every message in this serverless Space β expect a brief wait before the first token.
|
| 675 |
+
<div class="zgn-divider"></div>
|
| 676 |
+
<div class="zgn-title">π§ No Memory</div>
|
| 677 |
+
Due to ZeroGPU constraints, each message is processed independently.
|
| 678 |
+
The model has <strong>no conversation history</strong> β it starts fresh on every reply.
|
| 679 |
+
</div>
|
| 680 |
+
"""
|
| 681 |
+
|
| 682 |
+
_SETTINGS_HINT = """
|
| 683 |
+
<div class="settings-hint">
|
| 684 |
+
<strong>Temperature</strong>
|
| 685 |
+
0 = deterministic<br>0.7 = balanced<br>1.5 = creative
|
| 686 |
+
<br><br>
|
| 687 |
+
<strong>GPU allocation</strong>
|
| 688 |
+
large = 70 GB H200<br>xlarge = 141 GB H200<br>(Gemma 4 31B only)
|
| 689 |
+
</div>
|
| 690 |
+
"""
|
| 691 |
+
|
| 692 |
+
|
| 693 |
+
# ββ Card / header HTML ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 694 |
+
|
| 695 |
+
def _card_html(model_id: str, meta: dict, active: bool = False) -> str:
|
| 696 |
+
color = meta["family_color"]
|
| 697 |
+
badge = meta.get("badge")
|
| 698 |
+
badge_html = f'<span class="card-badge badge-{badge}">{_html.escape(badge)}</span>' if badge else ""
|
| 699 |
+
active_html = '<span class="card-active-badge">β Loaded</span>' if active else ""
|
| 700 |
+
vision_tag = '<span class="tag tag-vision">Vision</span>' if meta["supports_vision"] else '<span class="tag tag-text">Text only</span>'
|
| 701 |
+
license_tag = '<span class="tag tag-apache">Apache 2.0</span>' if meta["license_open"] else '<span class="tag tag-gemma">Gemma License</span>'
|
| 702 |
+
gpu_tag = '<span class="tag tag-xlarge">xlarge GPU</span>' if meta["gpu_size"] == "xlarge" else ""
|
| 703 |
+
instruct_tag = '<span class="tag tag-instruct">Instruct</span>' if "-it" in model_id.lower() else '<span class="tag tag-base">Base</span>'
|
| 704 |
+
active_cls = " card-active" if active else ""
|
| 705 |
+
return f"""
|
| 706 |
+
<div class="model-card{active_cls}">
|
| 707 |
+
<div class="card-accent" style="background:linear-gradient(90deg,{color},{color}66)"></div>
|
| 708 |
+
<div class="card-body">
|
| 709 |
+
<div class="card-top">
|
| 710 |
+
<p class="card-name">{_html.escape(meta['name'])}</p>
|
| 711 |
+
<span style="display:flex;gap:4px;flex-shrink:0">{badge_html}{active_html}</span>
|
| 712 |
+
</div>
|
| 713 |
+
<p class="card-desc">{_html.escape(meta['description'])}</p>
|
| 714 |
+
<div class="card-stats">
|
| 715 |
+
<span class="card-stat">{_html.escape(meta['params_short'])}</span>
|
| 716 |
+
<span class="card-stat">{_html.escape(meta['context'])} ctx</span>
|
| 717 |
+
<span class="card-stat">{_html.escape(meta['vram'])}</span>
|
| 718 |
+
</div>
|
| 719 |
+
<div class="card-tags">{vision_tag}{instruct_tag}{license_tag}{gpu_tag}</div>
|
| 720 |
+
</div>
|
| 721 |
+
</div>
|
| 722 |
+
"""
|
| 723 |
+
|
| 724 |
+
|
| 725 |
+
def _family_header_html(name: str, info: dict) -> str:
|
| 726 |
+
new_badge = '<span class="family-new-badge">New</span>' if info.get("new") else ""
|
| 727 |
+
return f"""
|
| 728 |
+
<div class="family-header">
|
| 729 |
+
<div class="family-icon" style="background:linear-gradient(135deg,{info['color']},{info['color']}88)">{info['icon']}</div>
|
| 730 |
+
<div class="family-text"><h3>{_html.escape(name)}</h3><p>{_html.escape(info['description'])}</p></div>
|
| 731 |
+
<span class="family-year">{info['year']}</span>
|
| 732 |
+
{new_badge}
|
| 733 |
+
</div>
|
| 734 |
+
"""
|
| 735 |
+
|
| 736 |
+
|
| 737 |
+
def _make_status_html(meta: dict, state: str = "ready") -> str:
|
| 738 |
+
color = meta["family_color"]
|
| 739 |
+
safe_name = _html.escape(meta["name"])
|
| 740 |
+
dot_class, label = {
|
| 741 |
+
"ready": ("dot-ready", "Ready"),
|
| 742 |
+
"already": ("dot-ready", "Already loaded"),
|
| 743 |
+
}.get(state, ("dot-loading", "Loading\u2026"))
|
| 744 |
+
vision_tag = (
|
| 745 |
+
'<span class="status-chip chip-vision">Vision</span>'
|
| 746 |
+
if meta["supports_vision"]
|
| 747 |
+
else '<span class="status-chip chip-text">Text only</span>'
|
| 748 |
+
)
|
| 749 |
+
return f"""
|
| 750 |
+
<div class="status-bar" style="--accent:{color}">
|
| 751 |
+
<div class="status-left">
|
| 752 |
+
<span class="status-dot {dot_class}"></span>
|
| 753 |
+
<span class="status-name" style="color:{color}">{safe_name}</span>
|
| 754 |
+
{vision_tag}
|
| 755 |
+
</div>
|
| 756 |
+
<div class="status-right">
|
| 757 |
+
<span class="status-chip">{_html.escape(meta['params_short'])}</span>
|
| 758 |
+
<span class="status-chip">{_html.escape(str(meta['context']))} ctx</span>
|
| 759 |
+
<span class="status-chip">{_html.escape(meta['vram'])}</span>
|
| 760 |
+
<span class="status-ok">{label}</span>
|
| 761 |
+
</div>
|
| 762 |
+
</div>
|
| 763 |
+
"""
|
| 764 |
+
|
| 765 |
+
|
| 766 |
+
def _chat_inline_status(loaded: bool = False, name: str = "") -> str:
|
| 767 |
+
safe_name = _html.escape(name)
|
| 768 |
+
if loaded:
|
| 769 |
+
return (f'<div class="chat-inline-status">'
|
| 770 |
+
f'<span class="csi-dot csi-dot-ready"></span>'
|
| 771 |
+
f'<span class="csi-name">{safe_name}</span>'
|
| 772 |
+
f'<span class="csi-label"> · Ready to chat</span>'
|
| 773 |
+
f'</div>')
|
| 774 |
+
return ('<div class="chat-inline-status">'
|
| 775 |
+
'<span class="csi-dot csi-dot-idle"></span>'
|
| 776 |
+
'<span class="csi-idle">No model loaded — go to <strong>Explore Models</strong> and click <em>Load & Chat</em>.</span>'
|
| 777 |
+
'</div>')
|
| 778 |
+
|
| 779 |
+
|
| 780 |
+
def _empty_status() -> str:
|
| 781 |
+
return '<div class="status-empty">No model loaded — select one in <strong>Explore Models</strong>.</div>'
|
| 782 |
+
|
| 783 |
+
|
| 784 |
+
def _loading_html(model_name: str = "") -> str:
|
| 785 |
+
name_part = f" <strong>{_html.escape(model_name)}</strong>" if model_name else ""
|
| 786 |
+
return (
|
| 787 |
+
'<div class="loading-notice">'
|
| 788 |
+
'<div class="notice-spinner"></div>'
|
| 789 |
+
'<div>'
|
| 790 |
+
f'<div>Loading{name_part}, please wait…</div>'
|
| 791 |
+
'<div style="font-size:11px;opacity:0.7;margin-top:3px;font-weight:400">'
|
| 792 |
+
'β± Large models (27B, 31B) can take 1–3 min. Please be patient.'
|
| 793 |
+
'</div>'
|
| 794 |
+
'</div>'
|
| 795 |
+
'</div>'
|
| 796 |
+
)
|
| 797 |
+
|
| 798 |
+
|
| 799 |
+
_THINKING_HTML = (
|
| 800 |
+
'<div class="thinking-wrap">'
|
| 801 |
+
'<div class="thinking-dots"><span></span><span></span><span></span></div>'
|
| 802 |
+
'Thinking…'
|
| 803 |
+
'</div>'
|
| 804 |
+
)
|
| 805 |
+
|
| 806 |
+
|
| 807 |
+
def _dual_loading_html(label: str, color: str, model_name: str) -> str:
|
| 808 |
+
return (
|
| 809 |
+
'<div class="dual-loading-notice">'
|
| 810 |
+
'<div class="notice-spinner"></div>'
|
| 811 |
+
'<div class="dual-loading-body">'
|
| 812 |
+
f'<span class="dual-loading-title" style="color:{color}">'
|
| 813 |
+
f'Loading {_html.escape(label)}: {_html.escape(model_name)}…'
|
| 814 |
+
'</span>'
|
| 815 |
+
'<span class="dual-loading-sub">'
|
| 816 |
+
'β± Large models (27B, 31B) may take 1–3 min. Please be patient.'
|
| 817 |
+
'</span>'
|
| 818 |
+
'</div>'
|
| 819 |
+
'</div>'
|
| 820 |
+
)
|
| 821 |
+
|
| 822 |
+
|
| 823 |
+
# ββ Build Gradio UI βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 824 |
+
|
| 825 |
+
_MODEL_CHOICES = [(meta["name"], mid) for mid, meta in MODELS.items()]
|
| 826 |
+
|
| 827 |
+
# CorrecciΓ³n Gradio 6.0: css y theme movidos a launch()
|
| 828 |
+
with gr.Blocks(title="Gemma Explorer") as demo:
|
| 829 |
+
|
| 830 |
+
current_model_state = gr.State(value=None)
|
| 831 |
+
gr.HTML(value=_HERO_HTML)
|
| 832 |
+
|
| 833 |
+
with gr.Tabs() as main_tabs:
|
| 834 |
+
|
| 835 |
+
with gr.Tab("Explore Models", id="explore"):
|
| 836 |
+
|
| 837 |
+
status_html = gr.HTML(value=_empty_status())
|
| 838 |
+
loading_notice = gr.HTML(value=_loading_html(), visible=False)
|
| 839 |
+
|
| 840 |
+
card_html_components: dict[str, gr.HTML] = {}
|
| 841 |
+
load_btns: list[gr.Button] = []
|
| 842 |
+
|
| 843 |
+
for family_name, family_info in FAMILIES.items():
|
| 844 |
+
gr.HTML(_family_header_html(family_name, family_info))
|
| 845 |
+
family_models = list(get_models_by_family(family_name).items())
|
| 846 |
+
|
| 847 |
+
for row_start in range(0, len(family_models), 4):
|
| 848 |
+
row_models = family_models[row_start: row_start + 4]
|
| 849 |
+
with gr.Row(equal_height=True):
|
| 850 |
+
for model_id, meta in row_models:
|
| 851 |
+
with gr.Column(min_width=200, elem_classes=["model-card-wrap"]):
|
| 852 |
+
card_comp = gr.HTML(_card_html(model_id, meta, active=False))
|
| 853 |
+
card_html_components[model_id] = card_comp
|
| 854 |
+
btn_cls = ["card-btn", "card-btn-xlarge"] if meta["gpu_size"] == "xlarge" else ["card-btn"]
|
| 855 |
+
load_btn = gr.Button("Load & Chat", elem_classes=btn_cls)
|
| 856 |
+
load_btn._model_id = model_id
|
| 857 |
+
load_btns.append(load_btn)
|
| 858 |
+
|
| 859 |
+
with gr.Tab("Single Chat", id="single"):
|
| 860 |
+
|
| 861 |
+
chat_status_html = gr.HTML(value=_empty_status())
|
| 862 |
+
chat_loading_notice = gr.HTML(value=_loading_html(), visible=False)
|
| 863 |
+
|
| 864 |
+
with gr.Row(equal_height=False):
|
| 865 |
+
|
| 866 |
+
with gr.Column(scale=4, elem_classes=["chat-panel"]):
|
| 867 |
+
chat_inline = gr.HTML(value=_chat_inline_status(loaded=False))
|
| 868 |
+
thinking_html = gr.HTML(value="", visible=False)
|
| 869 |
+
chatbot = gr.Chatbot(value=[], height=480, show_label=False, placeholder="")
|
| 870 |
+
|
| 871 |
+
with gr.Row():
|
| 872 |
+
msg_input = gr.Textbox(
|
| 873 |
+
placeholder="Type your message here\u2026",
|
| 874 |
+
show_label=False, scale=5, lines=1, max_lines=5, autofocus=True,
|
| 875 |
+
)
|
| 876 |
+
send_btn = gr.Button("Send", variant="primary", elem_classes=["send-btn"], scale=1)
|
| 877 |
+
|
| 878 |
+
image_input = gr.Image(
|
| 879 |
+
type="numpy", label="Attach image (optional)",
|
| 880 |
+
show_label=True, visible=False,
|
| 881 |
+
elem_classes=["image-upload-wrap"], height=260,
|
| 882 |
+
)
|
| 883 |
+
|
| 884 |
+
with gr.Column(scale=1, elem_classes=["settings-panel"]):
|
| 885 |
+
gr.HTML(_ZEROGPU_NOTICE)
|
| 886 |
+
gr.HTML('<div class="settings-title">Parameters</div>')
|
| 887 |
+
max_tokens = gr.Slider(64, 2048, value=512, step=64, label="Max new tokens")
|
| 888 |
+
temperature = gr.Slider(0.0, 1.5, value=0.7, step=0.05, label="Temperature")
|
| 889 |
+
system_prompt = gr.Textbox(
|
| 890 |
+
label="System prompt (optional)",
|
| 891 |
+
placeholder="e.g. You are a helpful assistant\u2026",
|
| 892 |
+
lines=3, max_lines=6,
|
| 893 |
+
elem_classes=["system-prompt"],
|
| 894 |
+
)
|
| 895 |
+
gr.HTML(_SETTINGS_HINT)
|
| 896 |
+
reset_btn = gr.Button("βΊ Reset params", elem_classes=["reset-btn"])
|
| 897 |
+
clear_btn = gr.Button("Clear Chat", elem_classes=["clear-btn"])
|
| 898 |
+
export_btn = gr.Button("β¬ Export .md", elem_classes=["export-btn"])
|
| 899 |
+
export_file = gr.File(label="Download chat", visible=False)
|
| 900 |
+
|
| 901 |
+
with gr.Tab("Dual Chat", id="dual"):
|
| 902 |
+
|
| 903 |
+
gr.HTML("""
|
| 904 |
+
<div class="zerogpu-notice" style="margin-bottom:16px">
|
| 905 |
+
<div class="zgn-title">βοΈ Dual Chat β Side-by-Side Comparison</div>
|
| 906 |
+
Send the same prompt to two models and compare their responses.
|
| 907 |
+
Models are loaded and run sequentially β Model A first, then Model B.
|
| 908 |
+
<div class="zgn-divider"></div>
|
| 909 |
+
<div class="zgn-title">π§ No Memory</div>
|
| 910 |
+
Same ZeroGPU constraints apply β each turn is processed independently with no context history.
|
| 911 |
+
</div>
|
| 912 |
+
""")
|
| 913 |
+
|
| 914 |
+
dual_loading_html = gr.HTML(value="", visible=False)
|
| 915 |
+
|
| 916 |
+
with gr.Row():
|
| 917 |
+
with gr.Column(scale=1):
|
| 918 |
+
dual_model_a = gr.Dropdown(choices=_MODEL_CHOICES, value=list(MODELS.keys())[0], label="Model A")
|
| 919 |
+
dual_sys_a = gr.Textbox(label="System prompt A (optional)", lines=2,
|
| 920 |
+
placeholder="e.g. Answer concisely.",
|
| 921 |
+
elem_classes=["system-prompt"])
|
| 922 |
+
with gr.Column(scale=1):
|
| 923 |
+
dual_model_b = gr.Dropdown(
|
| 924 |
+
choices=_MODEL_CHOICES,
|
| 925 |
+
value=list(MODELS.keys())[min(3, len(MODELS)-1)],
|
| 926 |
+
label="Model B",
|
| 927 |
+
)
|
| 928 |
+
dual_sys_b = gr.Textbox(label="System prompt B (optional)", lines=2,
|
| 929 |
+
placeholder="e.g. Answer in detail.",
|
| 930 |
+
elem_classes=["system-prompt"])
|
| 931 |
+
|
| 932 |
+
with gr.Row(equal_height=True):
|
| 933 |
+
with gr.Column(scale=1, elem_classes=["dual-panel"]):
|
| 934 |
+
gr.HTML('<div class="dual-header"><span class="dual-label-a">β² Model A</span></div>')
|
| 935 |
+
dual_bot_a = gr.Chatbot(value=[], height=400, show_label=False)
|
| 936 |
+
with gr.Column(scale=1, elem_classes=["dual-panel"]):
|
| 937 |
+
gr.HTML('<div class="dual-header"><span class="dual-label-b">β² Model B</span></div>')
|
| 938 |
+
dual_bot_b = gr.Chatbot(value=[], height=400, show_label=False)
|
| 939 |
+
|
| 940 |
+
_init_a = list(MODELS.keys())[0]
|
| 941 |
+
_init_b = list(MODELS.keys())[min(3, len(MODELS)-1)]
|
| 942 |
+
_both_vision_init = MODELS[_init_a]["supports_vision"] and MODELS[_init_b]["supports_vision"]
|
| 943 |
+
dual_img = gr.Image(
|
| 944 |
+
type="numpy",
|
| 945 |
+
label="Attach image β sent to both models (only available when both models support vision)",
|
| 946 |
+
show_label=True,
|
| 947 |
+
elem_classes=["dual-img-wrap"],
|
| 948 |
+
height=160,
|
| 949 |
+
visible=_both_vision_init,
|
| 950 |
+
)
|
| 951 |
+
|
| 952 |
+
with gr.Row():
|
| 953 |
+
dual_msg = gr.Textbox(placeholder="Type a prompt β it will be sent to both models\u2026",
|
| 954 |
+
show_label=False, scale=5, lines=1, max_lines=4)
|
| 955 |
+
dual_send = gr.Button("Send to Both", variant="primary",
|
| 956 |
+
elem_classes=["dual-send-btn"], scale=1)
|
| 957 |
+
|
| 958 |
+
with gr.Row():
|
| 959 |
+
dual_max_tokens = gr.Slider(64, 2048, value=512, step=64, label="Max new tokens")
|
| 960 |
+
dual_temperature = gr.Slider(0.0, 1.5, value=0.7, step=0.05, label="Temperature")
|
| 961 |
+
dual_clear = gr.Button("Clear Both", elem_classes=["clear-btn"])
|
| 962 |
+
|
| 963 |
+
# ββ Event wiring ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 964 |
+
|
| 965 |
+
card_html_list = [card_html_components[mid] for mid in MODELS]
|
| 966 |
+
_n_cards = len(card_html_list)
|
| 967 |
+
|
| 968 |
+
# CorrecciΓ³n: Uso de una funciΓ³n auxiliar para evitar el error del generador con lambda
|
| 969 |
+
def make_load_fn(model_id):
|
| 970 |
+
def load_fn():
|
| 971 |
+
yield from load_model_stream(model_id, _n_cards)
|
| 972 |
+
return load_fn
|
| 973 |
+
|
| 974 |
+
for btn in load_btns:
|
| 975 |
+
btn.click(
|
| 976 |
+
fn=make_load_fn(btn._model_id),
|
| 977 |
+
inputs=[],
|
| 978 |
+
outputs=[
|
| 979 |
+
loading_notice, chat_loading_notice,
|
| 980 |
+
status_html, main_tabs, image_input, current_model_state,
|
| 981 |
+
chatbot, chat_inline,
|
| 982 |
+
*card_html_list,
|
| 983 |
+
],
|
| 984 |
+
)
|
| 985 |
+
|
| 986 |
+
# ββ Single chat βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 987 |
+
|
| 988 |
+
def _do_respond(message, image, max_toks, temp, sys_prompt, history):
|
| 989 |
+
full_msg = (sys_prompt.strip() + "\n\n" + message) if sys_prompt.strip() else message
|
| 990 |
+
last_hist = history
|
| 991 |
+
_lock = gr.update(interactive=False)
|
| 992 |
+
_unlock = gr.update(interactive=True)
|
| 993 |
+
_thinking = gr.update(visible=True, value=_THINKING_HTML)
|
| 994 |
+
_done = gr.update(visible=False, value="")
|
| 995 |
+
|
| 996 |
+
yield last_hist, gr.update(value=""), gr.update(), _lock, _lock, _thinking
|
| 997 |
+
|
| 998 |
+
for hist_update, img_update in respond(full_msg, image, max_toks, temp, history):
|
| 999 |
+
last_hist = hist_update
|
| 1000 |
+
yield last_hist, gr.update(), img_update, gr.update(), gr.update(), _done
|
| 1001 |
+
|
| 1002 |
+
yield last_hist, gr.update(), gr.update(value=None), _unlock, _unlock, _done
|
| 1003 |
+
|
| 1004 |
+
_single_inputs = [msg_input, image_input, max_tokens, temperature, system_prompt, chatbot]
|
| 1005 |
+
_single_outputs = [chatbot, msg_input, image_input, send_btn, msg_input, thinking_html]
|
| 1006 |
+
|
| 1007 |
+
msg_input.submit(fn=_do_respond, inputs=_single_inputs, outputs=_single_outputs)
|
| 1008 |
+
send_btn.click( fn=_do_respond, inputs=_single_inputs, outputs=_single_outputs)
|
| 1009 |
+
|
| 1010 |
+
clear_btn.click(fn=lambda: [], outputs=[chatbot])
|
| 1011 |
+
reset_btn.click(fn=lambda: (512, 0.7, ""), outputs=[max_tokens, temperature, system_prompt])
|
| 1012 |
+
|
| 1013 |
+
def _do_export(history):
|
| 1014 |
+
if not history:
|
| 1015 |
+
return gr.update(visible=False)
|
| 1016 |
+
content = export_chat(history)
|
| 1017 |
+
path = f"/tmp/gemma_chat_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.md"
|
| 1018 |
+
with open(path, "w") as f:
|
| 1019 |
+
f.write(content)
|
| 1020 |
+
return gr.update(value=path, visible=True)
|
| 1021 |
+
|
| 1022 |
+
export_btn.click(fn=_do_export, inputs=[chatbot], outputs=[export_file])
|
| 1023 |
+
|
| 1024 |
+
# ββ Dual chat βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 1025 |
+
|
| 1026 |
+
def _dual_img_visibility(model_a, model_b):
|
| 1027 |
+
a_ok = MODELS.get(model_a, {}).get("supports_vision", False)
|
| 1028 |
+
b_ok = MODELS.get(model_b, {}).get("supports_vision", False)
|
| 1029 |
+
return gr.update(visible=(a_ok and b_ok))
|
| 1030 |
+
|
| 1031 |
+
dual_model_a.change(
|
| 1032 |
+
fn=_dual_img_visibility,
|
| 1033 |
+
inputs=[dual_model_a, dual_model_b],
|
| 1034 |
+
outputs=[dual_img],
|
| 1035 |
+
)
|
| 1036 |
+
dual_model_b.change(
|
| 1037 |
+
fn=_dual_img_visibility,
|
| 1038 |
+
inputs=[dual_model_a, dual_model_b],
|
| 1039 |
+
outputs=[dual_img],
|
| 1040 |
+
)
|
| 1041 |
+
|
| 1042 |
+
def _do_dual(message, image, max_toks, temp, sys_a, sys_b, model_a, model_b, hist_a, hist_b):
|
| 1043 |
+
if not message.strip() and image is None:
|
| 1044 |
+
yield hist_a, hist_b, gr.update(), gr.update(), gr.update(), gr.update(), gr.update(visible=False)
|
| 1045 |
+
return
|
| 1046 |
+
|
| 1047 |
+
_lock = gr.update(interactive=False)
|
| 1048 |
+
_unlock = gr.update(interactive=True)
|
| 1049 |
+
|
| 1050 |
+
name_a = MODELS.get(model_a, {}).get("name", model_a)
|
| 1051 |
+
name_b = MODELS.get(model_b, {}).get("name", model_b)
|
| 1052 |
+
user_msg = message or "[image attached]"
|
| 1053 |
+
|
| 1054 |
+
yield (hist_a, hist_b, gr.update(value=""), gr.update(), _lock, _lock,
|
| 1055 |
+
gr.update(visible=True, value=_dual_loading_html("Model A", "var(--blue)", name_a)))
|
| 1056 |
+
|
| 1057 |
+
try:
|
| 1058 |
+
_load_weights(model_a)
|
| 1059 |
+
except Exception as exc:
|
| 1060 |
+
_purge_model()
|
| 1061 |
+
err = f"β Failed to load {name_a}: {exc}"
|
| 1062 |
+
yield (hist_a + [{"role": "user", "content": user_msg}, {"role": "assistant", "content": err}],
|
| 1063 |
+
hist_b, gr.update(), gr.update(value=None), _unlock, _unlock,
|
| 1064 |
+
gr.update(visible=False))
|
| 1065 |
+
return
|
| 1066 |
+
|
| 1067 |
+
new_hist_a = hist_a + [{"role": "user", "content": user_msg}]
|
| 1068 |
+
meta_a = MODELS[model_a]
|
| 1069 |
+
infer_fn = infer_xlarge if meta_a["gpu_size"] == "xlarge" else infer_large
|
| 1070 |
+
full_a = (sys_a.strip() + "\n\n" + message) if sys_a.strip() else message
|
| 1071 |
+
partial_a = ""
|
| 1072 |
+
|
| 1073 |
+
for chunk in infer_fn(full_a, image, max_toks, temp):
|
| 1074 |
+
partial_a = chunk
|
| 1075 |
+
yield (new_hist_a + [{"role": "assistant", "content": partial_a}],
|
| 1076 |
+
hist_b, gr.update(), gr.update(), gr.update(), gr.update(), gr.update())
|
| 1077 |
+
|
| 1078 |
+
hist_a = new_hist_a + [{"role": "assistant", "content": partial_a}]
|
| 1079 |
+
|
| 1080 |
+
yield (hist_a, hist_b, gr.update(), gr.update(), gr.update(), gr.update(),
|
| 1081 |
+
gr.update(visible=True, value=_dual_loading_html("Model B", "var(--purple)", name_b)))
|
| 1082 |
+
|
| 1083 |
+
try:
|
| 1084 |
+
_load_weights(model_b)
|
| 1085 |
+
except Exception as exc:
|
| 1086 |
+
_purge_model()
|
| 1087 |
+
err = f"β Failed to load {name_b}: {exc}"
|
| 1088 |
+
yield (hist_a,
|
| 1089 |
+
hist_b + [{"role": "user", "content": user_msg}, {"role": "assistant", "content": err}],
|
| 1090 |
+
gr.update(), gr.update(value=None), _unlock, _unlock, gr.update(visible=False))
|
| 1091 |
+
return
|
| 1092 |
+
|
| 1093 |
+
new_hist_b = hist_b + [{"role": "user", "content": user_msg}]
|
| 1094 |
+
meta_b = MODELS[model_b]
|
| 1095 |
+
infer_fn = infer_xlarge if meta_b["gpu_size"] == "xlarge" else infer_large
|
| 1096 |
+
full_b = (sys_b.strip() + "\n\n" + message) if sys_b.strip() else message
|
| 1097 |
+
partial_b = ""
|
| 1098 |
+
|
| 1099 |
+
for chunk in infer_fn(full_b, image, max_toks, temp):
|
| 1100 |
+
partial_b = chunk
|
| 1101 |
+
yield (hist_a,
|
| 1102 |
+
new_hist_b + [{"role": "assistant", "content": partial_b}],
|
| 1103 |
+
gr.update(), gr.update(), gr.update(), gr.update(), gr.update())
|
| 1104 |
+
|
| 1105 |
+
yield (hist_a, new_hist_b + [{"role": "assistant", "content": partial_b}],
|
| 1106 |
+
gr.update(), gr.update(value=None), _unlock, _unlock, gr.update(visible=False))
|
| 1107 |
+
|
| 1108 |
+
_dual_inputs = [dual_msg, dual_img, dual_max_tokens, dual_temperature,
|
| 1109 |
+
dual_sys_a, dual_sys_b, dual_model_a, dual_model_b, dual_bot_a, dual_bot_b]
|
| 1110 |
+
_dual_outputs = [dual_bot_a, dual_bot_b, dual_msg, dual_img,
|
| 1111 |
+
dual_send, dual_msg, dual_loading_html]
|
| 1112 |
+
|
| 1113 |
+
dual_send.click(fn=_do_dual, inputs=_dual_inputs, outputs=_dual_outputs)
|
| 1114 |
+
dual_msg.submit( fn=_do_dual, inputs=_dual_inputs, outputs=_dual_outputs)
|
| 1115 |
+
dual_clear.click(fn=lambda: ([], []), outputs=[dual_bot_a, dual_bot_b])
|
| 1116 |
+
|
| 1117 |
+
|
| 1118 |
+
if __name__ == "__main__":
|
| 1119 |
+
_debug = os.environ.get("GRADIO_DEBUG", "0") == "1"
|
| 1120 |
+
# CorrecciΓ³n Gradio 6.0: css y theme ahora se pasan en launch()
|
| 1121 |
+
demo.launch(
|
| 1122 |
+
debug=_debug,
|
| 1123 |
+
css=CSS,
|
| 1124 |
+
theme=gr.themes.Base()
|
| 1125 |
+
)
|
models_data.py
ADDED
|
@@ -0,0 +1,338 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
|
| 3 |
+
# βββ Model Registry βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 4 |
+
# loader_type:
|
| 5 |
+
# "multimodal" β AutoModelForMultimodalLM + AutoProcessor (Gemma 4)
|
| 6 |
+
# "vision_causal" β AutoModelForCausalLM + AutoProcessor (Gemma 3 vision)
|
| 7 |
+
# "causal" β AutoModelForCausalLM + AutoTokenizer (text-only)
|
| 8 |
+
|
| 9 |
+
MODELS = {
|
| 10 |
+
# ββ Gemma 4 βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 11 |
+
"google/gemma-4-E2B-it": {
|
| 12 |
+
"name": "Gemma 4 E2B",
|
| 13 |
+
"short": "E2B",
|
| 14 |
+
"family": "Gemma 4",
|
| 15 |
+
"family_color": "#1a73e8",
|
| 16 |
+
"params": "2.3B active / 5.1B total",
|
| 17 |
+
"params_short": "2.3B",
|
| 18 |
+
"context": "128K",
|
| 19 |
+
"context_k": 128,
|
| 20 |
+
"gpu_size": "large",
|
| 21 |
+
"supports_vision": True,
|
| 22 |
+
"loader_type": "multimodal",
|
| 23 |
+
"torch_dtype": torch.bfloat16,
|
| 24 |
+
"description": "Most compact Gemma 4. PLE architecture with image support. Fast and efficient.",
|
| 25 |
+
"release_year": 2026,
|
| 26 |
+
"license": "Apache 2.0",
|
| 27 |
+
"license_open": True,
|
| 28 |
+
"vram": "~10 GB",
|
| 29 |
+
"lmarena": None,
|
| 30 |
+
"architecture": "Transformer + PLE",
|
| 31 |
+
"badge": "NEW",
|
| 32 |
+
},
|
| 33 |
+
"google/gemma-4-E4B-it": {
|
| 34 |
+
"name": "Gemma 4 E4B",
|
| 35 |
+
"short": "E4B",
|
| 36 |
+
"family": "Gemma 4",
|
| 37 |
+
"family_color": "#1a73e8",
|
| 38 |
+
"params": "4.5B active / 8B total",
|
| 39 |
+
"params_short": "4.5B",
|
| 40 |
+
"context": "128K",
|
| 41 |
+
"context_k": 128,
|
| 42 |
+
"gpu_size": "large",
|
| 43 |
+
"supports_vision": True,
|
| 44 |
+
"loader_type": "multimodal",
|
| 45 |
+
"torch_dtype": torch.bfloat16,
|
| 46 |
+
"description": "Greater capacity with PLE and Shared KV Cache. Image + text. Great balance.",
|
| 47 |
+
"release_year": 2026,
|
| 48 |
+
"license": "Apache 2.0",
|
| 49 |
+
"license_open": True,
|
| 50 |
+
"vram": "~16 GB",
|
| 51 |
+
"lmarena": None,
|
| 52 |
+
"architecture": "Transformer + PLE",
|
| 53 |
+
"badge": "NEW",
|
| 54 |
+
},
|
| 55 |
+
"google/gemma-4-26B-A4B-it": {
|
| 56 |
+
"name": "Gemma 4 26B MoE",
|
| 57 |
+
"short": "26B MoE",
|
| 58 |
+
"family": "Gemma 4",
|
| 59 |
+
"family_color": "#1a73e8",
|
| 60 |
+
"params": "4B active / 26B total",
|
| 61 |
+
"params_short": "26B MoE",
|
| 62 |
+
"context": "256K",
|
| 63 |
+
"context_k": 256,
|
| 64 |
+
"gpu_size": "large",
|
| 65 |
+
"supports_vision": True,
|
| 66 |
+
"loader_type": "multimodal",
|
| 67 |
+
"torch_dtype": torch.bfloat16,
|
| 68 |
+
"description": "Mixture-of-Experts with only 4B active parameters. LMArena ~1441. Image + text.",
|
| 69 |
+
"release_year": 2026,
|
| 70 |
+
"license": "Apache 2.0",
|
| 71 |
+
"license_open": True,
|
| 72 |
+
"vram": "~52 GB",
|
| 73 |
+
"lmarena": 1441,
|
| 74 |
+
"architecture": "MoE Transformer",
|
| 75 |
+
"badge": "NEW",
|
| 76 |
+
},
|
| 77 |
+
"google/gemma-4-31B-it": {
|
| 78 |
+
"name": "Gemma 4 31B",
|
| 79 |
+
"short": "31B",
|
| 80 |
+
"family": "Gemma 4",
|
| 81 |
+
"family_color": "#1a73e8",
|
| 82 |
+
"params": "31B parameters",
|
| 83 |
+
"params_short": "31B",
|
| 84 |
+
"context": "256K",
|
| 85 |
+
"context_k": 256,
|
| 86 |
+
"gpu_size": "xlarge",
|
| 87 |
+
"supports_vision": True,
|
| 88 |
+
"loader_type": "multimodal",
|
| 89 |
+
"torch_dtype": torch.bfloat16,
|
| 90 |
+
"description": "Most powerful Gemma 4. Dense Transformer. LMArena ~1452. On par with models 30Γ larger.",
|
| 91 |
+
"release_year": 2026,
|
| 92 |
+
"license": "Apache 2.0",
|
| 93 |
+
"license_open": True,
|
| 94 |
+
"vram": "~62 GB",
|
| 95 |
+
"lmarena": 1452,
|
| 96 |
+
"architecture": "Dense Transformer",
|
| 97 |
+
"badge": "FLAGSHIP",
|
| 98 |
+
},
|
| 99 |
+
# ββ Gemma 3 βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 100 |
+
"google/gemma-3-1b-it": {
|
| 101 |
+
"name": "Gemma 3 1B",
|
| 102 |
+
"short": "1B",
|
| 103 |
+
"family": "Gemma 3",
|
| 104 |
+
"family_color": "#137333",
|
| 105 |
+
"params": "1B parameters",
|
| 106 |
+
"params_short": "1B",
|
| 107 |
+
"context": "32K",
|
| 108 |
+
"context_k": 32,
|
| 109 |
+
"gpu_size": "large",
|
| 110 |
+
"supports_vision": False,
|
| 111 |
+
"loader_type": "causal",
|
| 112 |
+
"torch_dtype": torch.bfloat16,
|
| 113 |
+
"description": "Ultra-lightweight. Ideal for edge devices and low-latency tasks. Text only.",
|
| 114 |
+
"release_year": 2025,
|
| 115 |
+
"license": "Gemma",
|
| 116 |
+
"license_open": False,
|
| 117 |
+
"vram": "~2 GB",
|
| 118 |
+
"lmarena": None,
|
| 119 |
+
"architecture": "Transformer",
|
| 120 |
+
"badge": None,
|
| 121 |
+
},
|
| 122 |
+
"google/gemma-3-4b-it": {
|
| 123 |
+
"name": "Gemma 3 4B",
|
| 124 |
+
"short": "4B",
|
| 125 |
+
"family": "Gemma 3",
|
| 126 |
+
"family_color": "#137333",
|
| 127 |
+
"params": "4B parameters",
|
| 128 |
+
"params_short": "4B",
|
| 129 |
+
"context": "128K",
|
| 130 |
+
"context_k": 128,
|
| 131 |
+
"gpu_size": "large",
|
| 132 |
+
"supports_vision": True,
|
| 133 |
+
"loader_type": "vision_causal",
|
| 134 |
+
"torch_dtype": torch.bfloat16,
|
| 135 |
+
"description": "Perfect balance between size and capability. Image + text. 128K context.",
|
| 136 |
+
"release_year": 2025,
|
| 137 |
+
"license": "Gemma",
|
| 138 |
+
"license_open": False,
|
| 139 |
+
"vram": "~8 GB",
|
| 140 |
+
"lmarena": None,
|
| 141 |
+
"architecture": "Transformer",
|
| 142 |
+
"badge": None,
|
| 143 |
+
},
|
| 144 |
+
"google/gemma-3-12b-it": {
|
| 145 |
+
"name": "Gemma 3 12B",
|
| 146 |
+
"short": "12B",
|
| 147 |
+
"family": "Gemma 3",
|
| 148 |
+
"family_color": "#137333",
|
| 149 |
+
"params": "12B parameters",
|
| 150 |
+
"params_short": "12B",
|
| 151 |
+
"context": "128K",
|
| 152 |
+
"context_k": 128,
|
| 153 |
+
"gpu_size": "large",
|
| 154 |
+
"supports_vision": True,
|
| 155 |
+
"loader_type": "vision_causal",
|
| 156 |
+
"torch_dtype": torch.bfloat16,
|
| 157 |
+
"description": "High-capacity multimodal. Complex reasoning and image analysis.",
|
| 158 |
+
"release_year": 2025,
|
| 159 |
+
"license": "Gemma",
|
| 160 |
+
"license_open": False,
|
| 161 |
+
"vram": "~24 GB",
|
| 162 |
+
"lmarena": None,
|
| 163 |
+
"architecture": "Transformer",
|
| 164 |
+
"badge": None,
|
| 165 |
+
},
|
| 166 |
+
"google/gemma-3-27b-it": {
|
| 167 |
+
"name": "Gemma 3 27B",
|
| 168 |
+
"short": "27B",
|
| 169 |
+
"family": "Gemma 3",
|
| 170 |
+
"family_color": "#137333",
|
| 171 |
+
"params": "27B parameters",
|
| 172 |
+
"params_short": "27B",
|
| 173 |
+
"context": "128K",
|
| 174 |
+
"context_k": 128,
|
| 175 |
+
"gpu_size": "large",
|
| 176 |
+
"supports_vision": True,
|
| 177 |
+
"loader_type": "vision_causal",
|
| 178 |
+
"torch_dtype": torch.bfloat16,
|
| 179 |
+
"description": "Most capable Gemma 3. Advanced vision and high-level reasoning.",
|
| 180 |
+
"release_year": 2025,
|
| 181 |
+
"license": "Gemma",
|
| 182 |
+
"license_open": False,
|
| 183 |
+
"vram": "~54 GB",
|
| 184 |
+
"lmarena": None,
|
| 185 |
+
"architecture": "Transformer",
|
| 186 |
+
"badge": None,
|
| 187 |
+
},
|
| 188 |
+
# ββ Gemma 2 βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 189 |
+
"google/gemma-2-2b-it": {
|
| 190 |
+
"name": "Gemma 2 2B",
|
| 191 |
+
"short": "2B",
|
| 192 |
+
"family": "Gemma 2",
|
| 193 |
+
"family_color": "#e37400",
|
| 194 |
+
"params": "2B parameters",
|
| 195 |
+
"params_short": "2B",
|
| 196 |
+
"context": "8K",
|
| 197 |
+
"context_k": 8,
|
| 198 |
+
"gpu_size": "large",
|
| 199 |
+
"supports_vision": False,
|
| 200 |
+
"loader_type": "causal",
|
| 201 |
+
"torch_dtype": torch.bfloat16,
|
| 202 |
+
"description": "Fast and efficient. Sliding Window Attention. Text only.",
|
| 203 |
+
"release_year": 2024,
|
| 204 |
+
"license": "Gemma",
|
| 205 |
+
"license_open": False,
|
| 206 |
+
"vram": "~4 GB",
|
| 207 |
+
"lmarena": None,
|
| 208 |
+
"architecture": "Sliding Window Attn",
|
| 209 |
+
"badge": None,
|
| 210 |
+
},
|
| 211 |
+
"google/gemma-2-9b-it": {
|
| 212 |
+
"name": "Gemma 2 9B",
|
| 213 |
+
"short": "9B",
|
| 214 |
+
"family": "Gemma 2",
|
| 215 |
+
"family_color": "#e37400",
|
| 216 |
+
"params": "9B parameters",
|
| 217 |
+
"params_short": "9B",
|
| 218 |
+
"context": "8K",
|
| 219 |
+
"context_k": 8,
|
| 220 |
+
"gpu_size": "large",
|
| 221 |
+
"supports_vision": False,
|
| 222 |
+
"loader_type": "causal",
|
| 223 |
+
"torch_dtype": torch.bfloat16,
|
| 224 |
+
"description": "Solid text performance. Efficient architecture with sliding window.",
|
| 225 |
+
"release_year": 2024,
|
| 226 |
+
"license": "Gemma",
|
| 227 |
+
"license_open": False,
|
| 228 |
+
"vram": "~18 GB",
|
| 229 |
+
"lmarena": None,
|
| 230 |
+
"architecture": "Sliding Window Attn",
|
| 231 |
+
"badge": None,
|
| 232 |
+
},
|
| 233 |
+
"google/gemma-2-27b-it": {
|
| 234 |
+
"name": "Gemma 2 27B",
|
| 235 |
+
"short": "27B",
|
| 236 |
+
"family": "Gemma 2",
|
| 237 |
+
"family_color": "#e37400",
|
| 238 |
+
"params": "27B parameters",
|
| 239 |
+
"params_short": "27B",
|
| 240 |
+
"context": "8K",
|
| 241 |
+
"context_k": 8,
|
| 242 |
+
"gpu_size": "large",
|
| 243 |
+
"supports_vision": False,
|
| 244 |
+
"loader_type": "causal",
|
| 245 |
+
"torch_dtype": torch.bfloat16,
|
| 246 |
+
"description": "Largest Gemma 2. High performance on complex text tasks.",
|
| 247 |
+
"release_year": 2024,
|
| 248 |
+
"license": "Gemma",
|
| 249 |
+
"license_open": False,
|
| 250 |
+
"vram": "~54 GB",
|
| 251 |
+
"lmarena": None,
|
| 252 |
+
"architecture": "Sliding Window Attn",
|
| 253 |
+
"badge": None,
|
| 254 |
+
},
|
| 255 |
+
# ββ Gemma 1 βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 256 |
+
"google/gemma-1.1-2b-it": {
|
| 257 |
+
"name": "Gemma 1.1 2B",
|
| 258 |
+
"short": "2B",
|
| 259 |
+
"family": "Gemma 1",
|
| 260 |
+
"family_color": "#c5221f",
|
| 261 |
+
"params": "2B parameters",
|
| 262 |
+
"params_short": "2B",
|
| 263 |
+
"context": "8K",
|
| 264 |
+
"context_k": 8,
|
| 265 |
+
"gpu_size": "large",
|
| 266 |
+
"supports_vision": False,
|
| 267 |
+
"loader_type": "causal",
|
| 268 |
+
"torch_dtype": torch.float16,
|
| 269 |
+
"description": "The original foundation model. Where it all began. Text only.",
|
| 270 |
+
"release_year": 2024,
|
| 271 |
+
"license": "Gemma",
|
| 272 |
+
"license_open": False,
|
| 273 |
+
"vram": "~4 GB",
|
| 274 |
+
"lmarena": None,
|
| 275 |
+
"architecture": "Transformer",
|
| 276 |
+
"badge": None,
|
| 277 |
+
},
|
| 278 |
+
"google/gemma-1.1-7b-it": {
|
| 279 |
+
"name": "Gemma 1.1 7B",
|
| 280 |
+
"short": "7B",
|
| 281 |
+
"family": "Gemma 1",
|
| 282 |
+
"family_color": "#c5221f",
|
| 283 |
+
"params": "7B parameters",
|
| 284 |
+
"params_short": "7B",
|
| 285 |
+
"context": "8K",
|
| 286 |
+
"context_k": 8,
|
| 287 |
+
"gpu_size": "large",
|
| 288 |
+
"supports_vision": False,
|
| 289 |
+
"loader_type": "causal",
|
| 290 |
+
"torch_dtype": torch.float16,
|
| 291 |
+
"description": "The original 7B. The historical base of the entire Gemma family.",
|
| 292 |
+
"release_year": 2024,
|
| 293 |
+
"license": "Gemma",
|
| 294 |
+
"license_open": False,
|
| 295 |
+
"vram": "~14 GB",
|
| 296 |
+
"lmarena": None,
|
| 297 |
+
"architecture": "Transformer",
|
| 298 |
+
"badge": None,
|
| 299 |
+
},
|
| 300 |
+
}
|
| 301 |
+
|
| 302 |
+
FAMILIES = {
|
| 303 |
+
"Gemma 4": {
|
| 304 |
+
"color": "#1a73e8",
|
| 305 |
+
"bg": "#e8f0fe",
|
| 306 |
+
"year": 2026,
|
| 307 |
+
"description": "The newest generation. Full multimodal (image + text). Apache 2.0. Just launched!",
|
| 308 |
+
"icon": "β¦",
|
| 309 |
+
"new": True,
|
| 310 |
+
},
|
| 311 |
+
"Gemma 3": {
|
| 312 |
+
"color": "#137333",
|
| 313 |
+
"bg": "#e6f4ea",
|
| 314 |
+
"year": 2025,
|
| 315 |
+
"description": "Second generation with vision. Long contexts up to 128K tokens.",
|
| 316 |
+
"icon": "β",
|
| 317 |
+
"new": False,
|
| 318 |
+
},
|
| 319 |
+
"Gemma 2": {
|
| 320 |
+
"color": "#e37400",
|
| 321 |
+
"bg": "#fef7e0",
|
| 322 |
+
"year": 2024,
|
| 323 |
+
"description": "Optimized for text with Sliding Window Attention. Efficient and fast.",
|
| 324 |
+
"icon": "β",
|
| 325 |
+
"new": False,
|
| 326 |
+
},
|
| 327 |
+
"Gemma 1": {
|
| 328 |
+
"color": "#c5221f",
|
| 329 |
+
"bg": "#fce8e6",
|
| 330 |
+
"year": 2024,
|
| 331 |
+
"description": "The original foundation models from Google DeepMind.",
|
| 332 |
+
"icon": "β",
|
| 333 |
+
"new": False,
|
| 334 |
+
},
|
| 335 |
+
}
|
| 336 |
+
|
| 337 |
+
def get_models_by_family(family: str):
|
| 338 |
+
return {k: v for k, v in MODELS.items() if v["family"] == family}
|
requirements.txt
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
gradio>=4.44.0
|
| 2 |
+
transformers>=4.51.0
|
| 3 |
+
torch>=2.3.0
|
| 4 |
+
torchvision>=0.18.0
|
| 5 |
+
accelerate>=0.30.0
|
| 6 |
+
Pillow>=10.0.0
|
| 7 |
+
sentencepiece>=0.2.0
|
| 8 |
+
huggingface_hub>=0.23.0
|