dpv007 commited on
Commit
7d00133
·
verified ·
1 Parent(s): 5ccfc4c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -23
app.py CHANGED
@@ -1,10 +1,9 @@
1
  import gradio as gr
2
  import torch
3
  from transformers import AutoProcessor, AutoModelForImageTextToText
4
- from PIL import Image
5
 
6
  # =========================
7
- # Load model
8
  # =========================
9
  model_id = "microsoft/GUI-Actor-Verifier-2B"
10
 
@@ -16,24 +15,25 @@ processor = AutoProcessor.from_pretrained(
16
  model = AutoModelForImageTextToText.from_pretrained(
17
  model_id,
18
  trust_remote_code=True,
19
- torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
20
- device_map="auto"
 
21
  )
22
 
 
 
23
 
24
  # =========================
25
- # Inference function
26
  # =========================
27
  def run_model(image, prompt):
28
  try:
29
- # Safety check
30
  if image is None:
31
  return "❌ Please upload an image."
32
 
33
- if prompt is None or prompt.strip() == "":
34
  prompt = "Describe this image."
35
 
36
- # Build message properly
37
  messages = [
38
  {
39
  "role": "user",
@@ -44,7 +44,6 @@ def run_model(image, prompt):
44
  }
45
  ]
46
 
47
- # Prepare inputs
48
  inputs = processor.apply_chat_template(
49
  messages,
50
  add_generation_prompt=True,
@@ -53,17 +52,16 @@ def run_model(image, prompt):
53
  return_tensors="pt",
54
  )
55
 
56
- # Move to device
57
- inputs = {k: v.to(model.device) for k, v in inputs.items()}
58
 
59
- # Generate output
60
  with torch.no_grad():
61
  outputs = model.generate(
62
  **inputs,
63
- max_new_tokens=100
 
64
  )
65
 
66
- # Decode response
67
  result = processor.decode(
68
  outputs[0][inputs["input_ids"].shape[-1]:],
69
  skip_special_tokens=True
@@ -82,17 +80,11 @@ demo = gr.Interface(
82
  fn=run_model,
83
  inputs=[
84
  gr.Image(type="pil", label="Upload Image"),
85
- gr.Textbox(
86
- label="Your Question",
87
- placeholder="What is happening in this image?"
88
- )
89
  ],
90
  outputs=gr.Textbox(label="Model Output"),
91
- title="🧠 GUI Actor Verifier 2B",
92
- description="Upload an image and ask questions about it."
93
  )
94
 
95
- # =========================
96
- # Launch
97
- # =========================
98
  demo.launch()
 
1
  import gradio as gr
2
  import torch
3
  from transformers import AutoProcessor, AutoModelForImageTextToText
 
4
 
5
  # =========================
6
+ # Load model (CPU optimized)
7
  # =========================
8
  model_id = "microsoft/GUI-Actor-Verifier-2B"
9
 
 
15
  model = AutoModelForImageTextToText.from_pretrained(
16
  model_id,
17
  trust_remote_code=True,
18
+ torch_dtype=torch.float32, # CPU needs float32
19
+ device_map="cpu", # force CPU
20
+ low_cpu_mem_usage=True
21
  )
22
 
23
+ model.eval()
24
+
25
 
26
  # =========================
27
+ # Inference
28
  # =========================
29
  def run_model(image, prompt):
30
  try:
 
31
  if image is None:
32
  return "❌ Please upload an image."
33
 
34
+ if not prompt or prompt.strip() == "":
35
  prompt = "Describe this image."
36
 
 
37
  messages = [
38
  {
39
  "role": "user",
 
44
  }
45
  ]
46
 
 
47
  inputs = processor.apply_chat_template(
48
  messages,
49
  add_generation_prompt=True,
 
52
  return_tensors="pt",
53
  )
54
 
55
+ # Move tensors to CPU explicitly
56
+ inputs = {k: v.to("cpu") for k, v in inputs.items()}
57
 
 
58
  with torch.no_grad():
59
  outputs = model.generate(
60
  **inputs,
61
+ max_new_tokens=50, # IMPORTANT: keep small for CPU
62
+ do_sample=False
63
  )
64
 
 
65
  result = processor.decode(
66
  outputs[0][inputs["input_ids"].shape[-1]:],
67
  skip_special_tokens=True
 
80
  fn=run_model,
81
  inputs=[
82
  gr.Image(type="pil", label="Upload Image"),
83
+ gr.Textbox(label="Your Question")
 
 
 
84
  ],
85
  outputs=gr.Textbox(label="Model Output"),
86
+ title="GUI Actor Verifier (CPU Mode)",
87
+ description="⚠️ Running on CPU responses may be slow."
88
  )
89
 
 
 
 
90
  demo.launch()