""" Image processing helper functions for OmniParser """ import cv2 import numpy as np from PIL import Image from pathlib import Path from typing import Tuple, List def load_image(image_path: str) -> Image.Image: """Load image from file""" return Image.open(image_path) def resize_image(image: Image.Image, max_size: Tuple[int, int] = (1920, 1080)) -> Image.Image: """Resize image to max dimensions while maintaining aspect ratio""" image.thumbnail(max_size, Image.Resampling.LANCZOS) return image def capture_screenshot() -> Image.Image: """Capture screenshot (requires mss or similar)""" try: import mss with mss.mss() as sct: monitor = sct.monitors[1] # Primary monitor screenshot = sct.grab(monitor) return Image.frombytes('RGB', screenshot.size, screenshot.rgb) except ImportError: print("āš ļø mss not installed. Install with: pip install mss") return None def annotate_image(image: Image.Image, elements: List[dict]) -> Image.Image: """Draw bounding boxes on image for visualization""" img_copy = image.copy() from PIL import ImageDraw, ImageFont draw = ImageDraw.Draw(img_copy) colors = { "button": "red", "textfield": "blue", "icon": "green", "text": "yellow", "image": "purple" } for i, elem in enumerate(elements): bbox = elem.get("bbox", []) if len(bbox) >= 4: x1, y1, x2, y2 = bbox[:4] elem_type = elem.get("element_type", "unknown") color = colors.get(elem_type, "white") # Draw bounding box draw.rectangle([x1, y1, x2, y2], outline=color, width=2) # Draw label label = f"{elem.get('label', 'elem')} ({elem.get('confidence', 0):.2f})" draw.text((x1, y1 - 10), label, fill=color) return img_copy def save_image(image: Image.Image, output_path: str): """Save image to file""" image.save(output_path) print(f"āœ… Image saved: {output_path}") def create_sample_screenshot() -> Image.Image: """Create a simple sample image for testing""" # Create a blank image with some shapes img = Image.new('RGB', (800, 600), color='white') from PIL import ImageDraw draw = ImageDraw.Draw(img) # Draw some sample UI elements # Button draw.rectangle([50, 50, 200, 100], fill='lightblue', outline='blue', width=2) draw.text((80, 65), "Click Me", fill='black') # Search box draw.rectangle([250, 50, 700, 100], fill='white', outline='gray', width=2) draw.text((260, 65), "Search...", fill='gray') # Menu items for i, text in enumerate(['Home', 'About', 'Contact']): y = 150 + i * 50 draw.rectangle([50, y, 200, y + 40], fill='lightgray', outline='black', width=1) draw.text((70, y + 10), text, fill='black') # Status area draw.rectangle([250, 150, 700, 500], fill='lightyellow', outline='orange', width=2) draw.text((260, 160), "Status Area", fill='black') return img if __name__ == "__main__": print("Image Processing Examples") print("=" * 60) # Create sample image print("šŸ“· Creating sample screenshot...") sample_img = create_sample_screenshot() sample_img.save("sample_screenshot.png") print("āœ… Sample saved as: sample_screenshot.png") # Resize example print("\nšŸ“ Resizing image...") resized = resize_image(sample_img, (640, 480)) print(f"āœ… Resized to: {resized.size}") print("\nāœ… All examples completed!")