Spaces:

dpv007
/

gui

Sleeping

App Files Files Community

dpv007 commited on 16 days ago

Commit

7d00133

verified ·

1 Parent(s): 5ccfc4c

Update app.py

Browse files

Files changed (1) hide show

app.py +15 -23

app.py CHANGED Viewed

@@ -1,10 +1,9 @@
 import gradio as gr
 import torch
 from transformers import AutoProcessor, AutoModelForImageTextToText
-from PIL import Image
 # =========================
-# Load model
 # =========================
 model_id = "microsoft/GUI-Actor-Verifier-2B"
@@ -16,24 +15,25 @@ processor = AutoProcessor.from_pretrained(
 model = AutoModelForImageTextToText.from_pretrained(
     model_id,
     trust_remote_code=True,
-    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
-    device_map="auto"
 )
 # =========================
-# Inference function
 # =========================
 def run_model(image, prompt):
     try:
-        # Safety check
         if image is None:
             return "❌ Please upload an image."
-        if prompt is None or prompt.strip() == "":
             prompt = "Describe this image."
-        # Build message properly
         messages = [
             {
                 "role": "user",
@@ -44,7 +44,6 @@ def run_model(image, prompt):
             }
         ]
-        # Prepare inputs
         inputs = processor.apply_chat_template(
             messages,
             add_generation_prompt=True,
@@ -53,17 +52,16 @@ def run_model(image, prompt):
             return_tensors="pt",
         )
-        # Move to device
-        inputs = {k: v.to(model.device) for k, v in inputs.items()}
-        # Generate output
         with torch.no_grad():
             outputs = model.generate(
                 **inputs,
-                max_new_tokens=100
             )
-        # Decode response
         result = processor.decode(
             outputs[0][inputs["input_ids"].shape[-1]:],
             skip_special_tokens=True
@@ -82,17 +80,11 @@ demo = gr.Interface(
     fn=run_model,
     inputs=[
         gr.Image(type="pil", label="Upload Image"),
-        gr.Textbox(
-            label="Your Question",
-            placeholder="What is happening in this image?"
-        )
     ],
     outputs=gr.Textbox(label="Model Output"),
-    title="🧠 GUI Actor Verifier 2B",
-    description="Upload an image and ask questions about it."
 )
-# =========================
-# Launch
-# =========================
 demo.launch()

 import gradio as gr
 import torch
 from transformers import AutoProcessor, AutoModelForImageTextToText
 # =========================
+# Load model (CPU optimized)
 # =========================
 model_id = "microsoft/GUI-Actor-Verifier-2B"
 model = AutoModelForImageTextToText.from_pretrained(
     model_id,
     trust_remote_code=True,
+    torch_dtype=torch.float32,   # CPU needs float32
+    device_map="cpu",            # force CPU
+    low_cpu_mem_usage=True
 )
+model.eval()
 # =========================
+# Inference
 # =========================
 def run_model(image, prompt):
     try:
         if image is None:
             return "❌ Please upload an image."
+        if not prompt or prompt.strip() == "":
             prompt = "Describe this image."
         messages = [
             {
                 "role": "user",
             }
         ]
         inputs = processor.apply_chat_template(
             messages,
             add_generation_prompt=True,
             return_tensors="pt",
         )
+        # Move tensors to CPU explicitly
+        inputs = {k: v.to("cpu") for k, v in inputs.items()}
         with torch.no_grad():
             outputs = model.generate(
                 **inputs,
+                max_new_tokens=50,   # IMPORTANT: keep small for CPU
+                do_sample=False
             )
         result = processor.decode(
             outputs[0][inputs["input_ids"].shape[-1]:],
             skip_special_tokens=True
     fn=run_model,
     inputs=[
         gr.Image(type="pil", label="Upload Image"),
+        gr.Textbox(label="Your Question")
     ],
     outputs=gr.Textbox(label="Model Output"),
+    title="GUI Actor Verifier (CPU Mode)",
+    description="⚠️ Running on CPU — responses may be slow."
 )
 demo.launch()