omni / image_utils.py
Samfredoly's picture
Upload 14 files
2a729e6 verified
"""
Image processing helper functions for OmniParser
"""
import cv2
import numpy as np
from PIL import Image
from pathlib import Path
from typing import Tuple, List
def load_image(image_path: str) -> Image.Image:
"""Load image from file"""
return Image.open(image_path)
def resize_image(image: Image.Image, max_size: Tuple[int, int] = (1920, 1080)) -> Image.Image:
"""Resize image to max dimensions while maintaining aspect ratio"""
image.thumbnail(max_size, Image.Resampling.LANCZOS)
return image
def capture_screenshot() -> Image.Image:
"""Capture screenshot (requires mss or similar)"""
try:
import mss
with mss.mss() as sct:
monitor = sct.monitors[1] # Primary monitor
screenshot = sct.grab(monitor)
return Image.frombytes('RGB', screenshot.size, screenshot.rgb)
except ImportError:
print("⚠️ mss not installed. Install with: pip install mss")
return None
def annotate_image(image: Image.Image, elements: List[dict]) -> Image.Image:
"""Draw bounding boxes on image for visualization"""
img_copy = image.copy()
from PIL import ImageDraw, ImageFont
draw = ImageDraw.Draw(img_copy)
colors = {
"button": "red",
"textfield": "blue",
"icon": "green",
"text": "yellow",
"image": "purple"
}
for i, elem in enumerate(elements):
bbox = elem.get("bbox", [])
if len(bbox) >= 4:
x1, y1, x2, y2 = bbox[:4]
elem_type = elem.get("element_type", "unknown")
color = colors.get(elem_type, "white")
# Draw bounding box
draw.rectangle([x1, y1, x2, y2], outline=color, width=2)
# Draw label
label = f"{elem.get('label', 'elem')} ({elem.get('confidence', 0):.2f})"
draw.text((x1, y1 - 10), label, fill=color)
return img_copy
def save_image(image: Image.Image, output_path: str):
"""Save image to file"""
image.save(output_path)
print(f"βœ… Image saved: {output_path}")
def create_sample_screenshot() -> Image.Image:
"""Create a simple sample image for testing"""
# Create a blank image with some shapes
img = Image.new('RGB', (800, 600), color='white')
from PIL import ImageDraw
draw = ImageDraw.Draw(img)
# Draw some sample UI elements
# Button
draw.rectangle([50, 50, 200, 100], fill='lightblue', outline='blue', width=2)
draw.text((80, 65), "Click Me", fill='black')
# Search box
draw.rectangle([250, 50, 700, 100], fill='white', outline='gray', width=2)
draw.text((260, 65), "Search...", fill='gray')
# Menu items
for i, text in enumerate(['Home', 'About', 'Contact']):
y = 150 + i * 50
draw.rectangle([50, y, 200, y + 40], fill='lightgray', outline='black', width=1)
draw.text((70, y + 10), text, fill='black')
# Status area
draw.rectangle([250, 150, 700, 500], fill='lightyellow', outline='orange', width=2)
draw.text((260, 160), "Status Area", fill='black')
return img
if __name__ == "__main__":
print("Image Processing Examples")
print("=" * 60)
# Create sample image
print("πŸ“· Creating sample screenshot...")
sample_img = create_sample_screenshot()
sample_img.save("sample_screenshot.png")
print("βœ… Sample saved as: sample_screenshot.png")
# Resize example
print("\nπŸ“ Resizing image...")
resized = resize_image(sample_img, (640, 480))
print(f"βœ… Resized to: {resized.size}")
print("\nβœ… All examples completed!")