Spaces:
Sleeping
Update app.py
Browse filesimport gradio as gr
from transformers import AutoModelForImageTextToText, AutoProcessor
import torch
import os
# 1. Setup Model & Token
model_id = "google/gemma-3n-E2B-it"
hf_token = os.getenv("HF_TOKEN")
device = "cpu"
print("Loading Gemma 3n (10GB)... This takes a few minutes.")
# We add low_cpu_mem_usage=True to prevent crashing on load
processor = AutoProcessor.from_pretrained(model_id, token=hf_token)
model = AutoModelForImageTextToText.from_pretrained(
model_id,
token=hf_token,
torch_dtype=torch.float32,
low_cpu_mem_usage=True,
device_map="auto"
)
def chat_function(message, history):
# Prepare history for the model
msgs = []
for user_msg, assistant_msg in history:
if user_msg: msgs.append({"role": "user", "content": [{"type": "text", "text": user_msg}]})
if assistant_msg: msgs.append({"role": "model", "content": [{"type": "text", "text": assistant_msg}]})
# Add new message
msgs.append({"role": "user", "content": [{"type": "text", "text": message}]})
# Apply template
inputs = processor.apply_chat_template(
msgs,
add_generation_prompt=True,
tokenize=True,
return_tensors="pt"
).to(device)
# Generate
with torch.no_grad(): # Saves memory during generation
outputs = model.generate(
**inputs,
max_new_tokens=400,
do_sample=True,
temperature=0.4
)
response = processor.decode(outputs[0][inputs['input_ids'].shape[-1]:], skip_special_tokens=True)
return response
# 5. The Interface
demo = gr.ChatInterface(
fn=chat_function,
title="Gemma 3n E2B (Fixed)",
description="Now with 'timm' installed and optimized for 16GB RAM!",
)
if __name__ == "__main__":
demo.launch()
|
@@ -1,65 +0,0 @@
|
|
| 1 |
-
import gradio as gr
|
| 2 |
-
from transformers import AutoModelForImageTextToText, AutoProcessor
|
| 3 |
-
import torch
|
| 4 |
-
import os
|
| 5 |
-
|
| 6 |
-
# 1. Setup Model & Token
|
| 7 |
-
model_id = "google/gemma-3n-E2B-it"
|
| 8 |
-
hf_token = os.getenv("HF_TOKEN")
|
| 9 |
-
|
| 10 |
-
# Use CPU for Free Tier
|
| 11 |
-
device = "cpu"
|
| 12 |
-
|
| 13 |
-
print("Loading Gemma 3n... This takes about 2 minutes on CPU.")
|
| 14 |
-
|
| 15 |
-
# Gemma 3n uses AutoProcessor and AutoModelForImageTextToText for its multimodal brain
|
| 16 |
-
processor = AutoProcessor.from_pretrained(model_id, token=hf_token)
|
| 17 |
-
model = AutoModelForImageTextToText.from_pretrained(
|
| 18 |
-
model_id,
|
| 19 |
-
token=hf_token,
|
| 20 |
-
torch_dtype=torch.float32,
|
| 21 |
-
device_map="auto"
|
| 22 |
-
)
|
| 23 |
-
|
| 24 |
-
def chat_function(message, history):
|
| 25 |
-
# 2. Correct History Formatting
|
| 26 |
-
# This prevents the 'list error' you saw before
|
| 27 |
-
msgs = []
|
| 28 |
-
for user_msg, assistant_msg in history:
|
| 29 |
-
if user_msg: msgs.append({"role": "user", "content": [{"type": "text", "text": user_msg}]})
|
| 30 |
-
if assistant_msg: msgs.append({"role": "model", "content": [{"type": "text", "text": assistant_msg}]})
|
| 31 |
-
|
| 32 |
-
# Add the new message
|
| 33 |
-
msgs.append({"role": "user", "content": [{"type": "text", "text": message}]})
|
| 34 |
-
|
| 35 |
-
# 3. Prepare Inputs
|
| 36 |
-
inputs = processor.apply_chat_template(
|
| 37 |
-
msgs,
|
| 38 |
-
add_generation_prompt=True,
|
| 39 |
-
tokenize=True,
|
| 40 |
-
return_tensors="pt"
|
| 41 |
-
).to(device)
|
| 42 |
-
|
| 43 |
-
# 4. Generate Answer
|
| 44 |
-
# We use a lower temperature (0.4) for better school/fact accuracy
|
| 45 |
-
outputs = model.generate(
|
| 46 |
-
**inputs,
|
| 47 |
-
max_new_tokens=512,
|
| 48 |
-
do_sample=True,
|
| 49 |
-
temperature=0.4
|
| 50 |
-
)
|
| 51 |
-
|
| 52 |
-
# Extract only the response
|
| 53 |
-
response = processor.decode(outputs[0][inputs['input_ids'].shape[-1]:], skip_special_tokens=True)
|
| 54 |
-
return response
|
| 55 |
-
|
| 56 |
-
# 5. The Interface
|
| 57 |
-
demo = gr.ChatInterface(
|
| 58 |
-
fn=chat_function,
|
| 59 |
-
title="Gemma 3n E2B: The Smart Assistant",
|
| 60 |
-
description="I have much more 'sense' than the 135M model. Try me with logic riddles or school assignments!",
|
| 61 |
-
theme="ocean"
|
| 62 |
-
)
|
| 63 |
-
|
| 64 |
-
if __name__ == "__main__":
|
| 65 |
-
demo.launch()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|