Spaces:

rufatronics
/

Gemma3n

Sleeping

App Files Files Community

rufatronics commited on Feb 2

Commit

d74a673

verified ·

1 Parent(s): cd49dba

Update app.py

Browse files

Files changed (1) hide show

app.py +61 -0

app.py CHANGED Viewed

	@@ -0,0 +1,61 @@

+import gradio as gr
+from transformers import AutoModelForImageTextToText, AutoProcessor
+import torch
+import os
+# 1. Setup Model & Token
+model_id = "google/gemma-3n-E2B-it"
+hf_token = os.getenv("HF_TOKEN")
+device = "cpu"
+print("Loading Gemma 3n (10GB)... This takes a few minutes.")
+# We add low_cpu_mem_usage=True to prevent crashing on load
+processor = AutoProcessor.from_pretrained(model_id, token=hf_token)
+model = AutoModelForImageTextToText.from_pretrained(
+    model_id,
+    token=hf_token,
+    torch_dtype=torch.float32,
+    low_cpu_mem_usage=True,
+    device_map="auto"
+)
+def chat_function(message, history):
+    # Prepare history for the model
+    msgs = []
+    for user_msg, assistant_msg in history:
+        if user_msg: msgs.append({"role": "user", "content": [{"type": "text", "text": user_msg}]})
+        if assistant_msg: msgs.append({"role": "model", "content": [{"type": "text", "text": assistant_msg}]})
+    # Add new message
+    msgs.append({"role": "user", "content": [{"type": "text", "text": message}]})
+    # Apply template
+    inputs = processor.apply_chat_template(
+        msgs,
+        add_generation_prompt=True,
+        tokenize=True,
+        return_tensors="pt"
+    ).to(device)
+    # Generate
+    with torch.no_grad(): # Saves memory during generation
+        outputs = model.generate(
+            **inputs,
+            max_new_tokens=400,
+            do_sample=True,
+            temperature=0.4
+        )
+    response = processor.decode(outputs[0][inputs['input_ids'].shape[-1]:], skip_special_tokens=True)
+    return response
+# 5. The Interface
+demo = gr.ChatInterface(
+    fn=chat_function,
+    title="Gemma 3n E2B (Fixed)",
+    description="Now with 'timm' installed and optimized for 16GB RAM!",
+)
+if __name__ == "__main__":
+    demo.launch()